From a7845feded690292d405512fc5a0f9c98ac831ae Mon Sep 17 00:00:00 2001 From: Goober5000 Date: Sat, 4 Apr 2026 22:44:37 -0400 Subject: [PATCH] Fix multipart upload reliability: retry finish, handle 504, increase verify timeout Large uploads fail when the reverse proxy times out during chunk reassembly, returning 504. The client treated this as failure and retried finish with no server-state verification, causing cascading 500 errors when the server had already completed and deleted the chunks. Changes: - Add multiupload/finish to the GatewayTimeout hack so 504 is treated as tentative success (same pattern as mod/release endpoints) - Add bounded retry loop (max 3) to Finish() with exponential backoff; between retries, call multiupload/start to check the server's done flag via new CheckUploadDone() helper before blindly retrying - Increase verify_part timeout from 45s to 120s to handle disk I/O contention during parallel uploads - Remove premature completed=true assignment in Upload(); Finish() now manages the flag internally Co-Authored-By: Claude Opus 4.6 (1M context) --- Knossos.NET/Models/Nebula.cs | 101 ++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/Knossos.NET/Models/Nebula.cs b/Knossos.NET/Models/Nebula.cs index 8c47bdc7..ae4b0746 100644 --- a/Knossos.NET/Models/Nebula.cs +++ b/Knossos.NET/Models/Nebula.cs @@ -804,9 +804,9 @@ private enum ApiMethod return await ApiCall(resourceUrl, data, needsLogIn, timeoutSeconds, method); } /* Upload/Update/delete Mod Timeout Hack */ - if(response.StatusCode.ToString() == "GatewayTimeout" && (resourceUrl == "mod/release" || resourceUrl == "mod/release/update" || resourceUrl == "mod/release/delete")) + if(response.StatusCode.ToString() == "GatewayTimeout" && (resourceUrl == "mod/release" || resourceUrl == "mod/release/update" || resourceUrl == "mod/release/delete" || resourceUrl == "multiupload/finish")) { - Log.Add(Log.LogSeverity.Warning, "Nebula.ApiCall(" + resourceUrl + ")", "During mod/release request a GatewayTimeout was recieved. This is a known issue with Nebula and while Knet handles this" + + Log.Add(Log.LogSeverity.Warning, "Nebula.ApiCall(" + resourceUrl + ")", "A GatewayTimeout was received. This is a known issue with Nebula and while Knet handles this" + " as a success there is not an actual way to know if the api call was really successfull."); var reply = new ApiReply(); reply.result = true; @@ -1844,14 +1844,12 @@ public async Task Upload() } }); - completed = true; + if (cancellationTokenSource.IsCancellationRequested) + throw new TaskCanceledException(); if (progressCallback != null) progressCallback.Invoke("Verifying Upload...", maxProgress, maxProgress); - if (cancellationTokenSource.IsCancellationRequested) - throw new TaskCanceledException(); - int attempt = 1; do { @@ -1860,10 +1858,11 @@ public async Task Upload() if (verified && progressCallback != null) progressCallback.Invoke("Verify: " + verified, maxProgress, maxProgress); - if (!verified && progressCallback != null && attempt <= maxUploadRetries) + if (!verified && attempt <= maxUploadRetries) { Log.Add(Log.LogSeverity.Warning, "Nebula.Upload", "File failed nebula upload verify, retrying: " + fileFullPath); - progressCallback.Invoke("Verify: Failed, Retrying... Retry #" + attempt, maxProgress, maxProgress); + if (progressCallback != null) + progressCallback.Invoke("Verify: Failed, Retrying... Retry #" + attempt, maxProgress, maxProgress); await Task.Delay(2000); } @@ -1875,37 +1874,87 @@ public async Task Upload() /// /// Call to complete the upload process /// Nebula will check the complete file checksum here + /// Retries with exponential backoff, checking server state between attempts /// /// true if everything is fine, false otherwise private async Task Finish() { - var data = new MultipartFormDataContent() - { - { new StringContent(fileChecksum!), "id" }, - { new StringContent(fileChecksum!), "checksum" }, - { new StringContent("None"), "content_checksum" }, - { new StringContent("None"), "vp_checksum" } - }; + const int maxFinishRetries = 3; - var reply = await ApiCall("multiupload/finish", data, true, 160); - if (reply.HasValue) + for (int attempt = 1; attempt <= maxFinishRetries; attempt++) { - if (!reply.Value.result) + var data = new MultipartFormDataContent() { - Log.Add(Log.LogSeverity.Error, "MultipartUploader.Finish", "Unable to multi part upload process to Nebula. Reason: " + reply.Value.reason); - if (progressCallback != null) - progressCallback.Invoke("Verify: " + reply.Value.reason, 0, 1); + { new StringContent(fileChecksum!), "id" }, + { new StringContent(fileChecksum!), "checksum" }, + { new StringContent("None"), "content_checksum" }, + { new StringContent("None"), "vp_checksum" } + }; + + var reply = await ApiCall("multiupload/finish", data, true, 160); + if (reply.HasValue) + { + if (reply.Value.result) + { + Log.Add(Log.LogSeverity.Information, "MultipartUploader.Finish", "Multiupload: File uploaded to Nebula! " + fileFullPath); + completed = true; + return true; + } + else + { + Log.Add(Log.LogSeverity.Error, "MultipartUploader.Finish", "Unable to complete multipart upload to Nebula. Reason: " + reply.Value.reason); + if (progressCallback != null) + progressCallback.Invoke("Verify: " + reply.Value.reason, 0, 1); + completed = false; + return false; // Definitive failure (e.g. checksum mismatch) — don't retry + } } - else + + // reply is null — network error, client timeout, or similar + Log.Add(Log.LogSeverity.Warning, "MultipartUploader.Finish", + "Finish attempt " + attempt + " of " + maxFinishRetries + " got no valid reply. Checking server state..."); + + await Task.Delay(2000 * attempt); // Exponential backoff + + // Check if the server actually completed the upload + if (await CheckUploadDone()) { - Log.Add(Log.LogSeverity.Information, "MultipartUploader.Finish", "Multiupload: File uploaded to Nebula! " + fileFullPath); + Log.Add(Log.LogSeverity.Information, "MultipartUploader.Finish", + "Server confirmed upload is complete after finish attempt " + attempt); + completed = true; + return true; + } + + if (attempt < maxFinishRetries) + { + Log.Add(Log.LogSeverity.Warning, "MultipartUploader.Finish", + "Server says upload not done. Retrying finish (attempt " + (attempt + 1) + ")..."); } - completed = reply.Value.result; - return reply.Value.result; } + + Log.Add(Log.LogSeverity.Error, "MultipartUploader.Finish", + "All finish attempts exhausted for " + fileFullPath); + completed = false; return false; } + /// + /// Calls multiupload/start to check if the server has already marked this upload as done. + /// Used to verify whether a timed-out finish call actually succeeded server-side. + /// + private async Task CheckUploadDone() + { + var data = new MultipartFormDataContent() + { + { new StringContent(fileChecksum!), "id" }, + { new StringContent(fileLenght.ToString()), "size" }, + { new StringContent(fileParts.Count().ToString()), "parts" } + }; + + var reply = await ApiCall("multiupload/start", data, true); + return reply.HasValue && reply.Value.done; + } + /// /// Starts the file upload process /// Here we pass file checksum, file size and number of parts to Nebula @@ -2076,7 +2125,7 @@ public async Task Verify() { new StringContent(partChecksum), "checksum" }, }; - var reply = await ApiCall("multiupload/verify_part", data, true); + var reply = await ApiCall("multiupload/verify_part", data, true, 120); if (reply.HasValue) { if (!reply.Value.result)