From 104d99856b0f99c540c8cc1ea674a8f249114a01 Mon Sep 17 00:00:00 2001 From: Noi Date: Thu, 2 Mar 2023 09:58:15 -0800 Subject: [PATCH 1/4] Prevent unnecessary disconnection when user list fetch takes too long --- BackgroundServices/AutoUserDownload.cs | 6 ++++-- BirthdayBot.csproj | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/BackgroundServices/AutoUserDownload.cs b/BackgroundServices/AutoUserDownload.cs index dffcd6f..9e4f882 100644 --- a/BackgroundServices/AutoUserDownload.cs +++ b/BackgroundServices/AutoUserDownload.cs @@ -32,9 +32,11 @@ class AutoUserDownload : BackgroundService { var guild = ShardInstance.DiscordClient.GetGuild((ulong)item); if (guild == null) continue; // A guild disappeared...? - await guild.DownloadUsersAsync().ConfigureAwait(false); // We're already on a seperate thread, no need to use Task.Run - await Task.Delay(200, CancellationToken.None).ConfigureAwait(false); // Must delay, or else it seems to hang... + await guild.DownloadUsersAsync(); // We're already on a seperate thread, no need to use Task.Run + await Task.Delay(200, CancellationToken.None); // Must delay, or else it seems to hang... processed++; + + if (processed >= 150) break; // take a break (don't get killed by ShardManager for taking too long due to quantity) } if (processed > 25) Log($"Explicit user list request processed for {processed} guild(s)."); diff --git a/BirthdayBot.csproj b/BirthdayBot.csproj index bfda17f..9c81cf5 100644 --- a/BirthdayBot.csproj +++ b/BirthdayBot.csproj @@ -22,7 +22,7 @@ - + all From 1fba7ff708c4093d4a40bb05d240fbf3829bdb95 Mon Sep 17 00:00:00 2001 From: Noi Date: Sat, 4 Mar 2023 11:27:31 -0800 Subject: [PATCH 2/4] stopgap fixes --- BackgroundServices/AutoUserDownload.cs | 33 +++++++++++++++++++------- ShardManager.cs | 8 +++---- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/BackgroundServices/AutoUserDownload.cs b/BackgroundServices/AutoUserDownload.cs index 9e4f882..30bdc75 100644 --- a/BackgroundServices/AutoUserDownload.cs +++ b/BackgroundServices/AutoUserDownload.cs @@ -8,6 +8,8 @@ namespace BirthdayBot.BackgroundServices; class AutoUserDownload : BackgroundService { public AutoUserDownload(ShardInstance instance) : base(instance) { } + private static readonly HashSet _failedDownloads = new(); + public override async Task OnTick(int tickCount, CancellationToken token) { // Take action if a guild's cache is incomplete... var incompleteCaches = ShardInstance.DiscordClient.Guilds.Where(g => !g.HasAllMembers).Select(g => g.Id).ToHashSet(); @@ -16,9 +18,13 @@ class AutoUserDownload : BackgroundService { try { await DbConcurrentOperationsLock.WaitAsync(token); using var db = new BotDatabaseContext(); - mustFetch = db.UserEntries.AsNoTracking() - .Where(e => incompleteCaches.Contains(e.GuildId)).Select(e => e.GuildId).Distinct() - .ToList(); + lock (_failedDownloads) + mustFetch = db.UserEntries.AsNoTracking() + .Where(e => incompleteCaches.Contains(e.GuildId)) + .Select(e => e.GuildId) + .Distinct() + .Where(e => !_failedDownloads.Contains(e)) + .ToList(); } finally { try { DbConcurrentOperationsLock.Release(); @@ -26,19 +32,30 @@ class AutoUserDownload : BackgroundService { } var processed = 0; + var processStartTime = DateTimeOffset.UtcNow; foreach (var item in mustFetch) { // May cause a disconnect in certain situations. Cancel all further attempts until the next pass if it happens. if (ShardInstance.DiscordClient.ConnectionState != ConnectionState.Connected) break; - var guild = ShardInstance.DiscordClient.GetGuild((ulong)item); + var guild = ShardInstance.DiscordClient.GetGuild(item); if (guild == null) continue; // A guild disappeared...? - await guild.DownloadUsersAsync(); // We're already on a seperate thread, no need to use Task.Run - await Task.Delay(200, CancellationToken.None); // Must delay, or else it seems to hang... + + const int singleTimeout = 500; + var dl = guild.DownloadUsersAsync(); // We're already on a seperate thread, no need to use Task.Run (?) + dl.Wait(singleTimeout * 1000, token); + if (!dl.IsCompletedSuccessfully) { + Log($"Giving up on {guild.Id} after {singleTimeout} seconds. (Total members: {guild.MemberCount})"); + lock (_failedDownloads) _failedDownloads.Add(guild.Id); + } processed++; - if (processed >= 150) break; // take a break (don't get killed by ShardManager for taking too long due to quantity) + if (Math.Abs((DateTimeOffset.UtcNow - processStartTime).TotalMinutes) > 2) { + Log("Break time!"); + // take a break (don't get killed by ShardManager for taking too long due to quantity) + break; + } } - if (processed > 25) Log($"Explicit user list request processed for {processed} guild(s)."); + if (processed > 20) Log($"Explicit user list request processed for {processed} guild(s)."); } } diff --git a/ShardManager.cs b/ShardManager.cs index ff30a64..420fa49 100644 --- a/ShardManager.cs +++ b/ShardManager.cs @@ -144,10 +144,10 @@ class ShardManager : IDisposable { shardStatuses.AppendLine(); - if (lastRun > DeadShardThreshold) { - shardStatuses.AppendLine($"Shard {i:00} marked for disposal."); - deadShards.Add(i); - } + //if (lastRun > DeadShardThreshold) { + // shardStatuses.AppendLine($"Shard {i:00} marked for disposal."); + // deadShards.Add(i); + //} } Log(shardStatuses.ToString().TrimEnd()); From 29f2eeb31ebfce0d0038ecaaa1fb02be4056ec74 Mon Sep 17 00:00:00 2001 From: Noi Date: Sun, 5 Mar 2023 22:57:09 -0800 Subject: [PATCH 3/4] Skip user download in guilds where the method hangs A workaround for an apparent bug in Discord.Net's `SocketGuild.DownloadUsersAsync()` method. Conclusions made while writing and testing this workaround: * The guilds it hangs on are seemingly random. * Guild size does not matter. * If a download hangs, it never recovers within 2 hours, probably more. * Trying a guild again never works. It just hangs again. --- BackgroundServices/AutoUserDownload.cs | 33 +++++++++++++++----------- ShardManager.cs | 10 ++++---- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/BackgroundServices/AutoUserDownload.cs b/BackgroundServices/AutoUserDownload.cs index 30bdc75..6e8f0c5 100644 --- a/BackgroundServices/AutoUserDownload.cs +++ b/BackgroundServices/AutoUserDownload.cs @@ -9,10 +9,14 @@ class AutoUserDownload : BackgroundService { public AutoUserDownload(ShardInstance instance) : base(instance) { } private static readonly HashSet _failedDownloads = new(); + private static readonly TimeSpan _singleDlTimeout = ShardManager.DeadShardThreshold / 3; public override async Task OnTick(int tickCount, CancellationToken token) { // Take action if a guild's cache is incomplete... - var incompleteCaches = ShardInstance.DiscordClient.Guilds.Where(g => !g.HasAllMembers).Select(g => g.Id).ToHashSet(); + var incompleteCaches = ShardInstance.DiscordClient.Guilds + .Where(g => !g.HasAllMembers) + .Select(g => g.Id) + .ToHashSet(); // ...and if the guild contains any user data IEnumerable mustFetch; try { @@ -34,28 +38,29 @@ class AutoUserDownload : BackgroundService { var processed = 0; var processStartTime = DateTimeOffset.UtcNow; foreach (var item in mustFetch) { - // May cause a disconnect in certain situations. Cancel all further attempts until the next pass if it happens. + // May cause a disconnect in certain situations. Make no further attempts until the next pass if it happens. if (ShardInstance.DiscordClient.ConnectionState != ConnectionState.Connected) break; var guild = ShardInstance.DiscordClient.GetGuild(item); if (guild == null) continue; // A guild disappeared...? - const int singleTimeout = 500; - var dl = guild.DownloadUsersAsync(); // We're already on a seperate thread, no need to use Task.Run (?) - dl.Wait(singleTimeout * 1000, token); - if (!dl.IsCompletedSuccessfully) { - Log($"Giving up on {guild.Id} after {singleTimeout} seconds. (Total members: {guild.MemberCount})"); - lock (_failedDownloads) _failedDownloads.Add(guild.Id); - } + await Task.Delay(200, CancellationToken.None); // Delay a bit (reduces the possibility of hanging, somehow). processed++; - - if (Math.Abs((DateTimeOffset.UtcNow - processStartTime).TotalMinutes) > 2) { - Log("Break time!"); - // take a break (don't get killed by ShardManager for taking too long due to quantity) + var dl = guild.DownloadUsersAsync(); + dl.Wait((int)_singleDlTimeout.TotalMilliseconds / 2, token); + if (dl.IsFaulted) { + Log("Exception thrown by download task: " + dl.Exception); break; + } else if (!dl.IsCompletedSuccessfully) { + Log($"Hang on processing {guild.Id}. Skipping."); + lock (_failedDownloads) _failedDownloads.Add(guild.Id); + continue; } + + // Prevent unnecessary disconnections by ShardManager if we're taking too long + if (DateTimeOffset.UtcNow - processStartTime > _singleDlTimeout) break; } - if (processed > 20) Log($"Explicit user list request processed for {processed} guild(s)."); + if (processed > 10) Log($"Member list downloads handled for {processed} guilds."); } } diff --git a/ShardManager.cs b/ShardManager.cs index 420fa49..2732204 100644 --- a/ShardManager.cs +++ b/ShardManager.cs @@ -25,7 +25,7 @@ class ShardManager : IDisposable { /// Amount of time without a completed background service run before a shard instance /// is considered "dead" and tasked to be removed. /// - private static readonly TimeSpan DeadShardThreshold = new(0, 20, 0); + public static readonly TimeSpan DeadShardThreshold = new(0, 20, 0); /// /// A dictionary with shard IDs as its keys and shard instances as its values. @@ -144,10 +144,10 @@ class ShardManager : IDisposable { shardStatuses.AppendLine(); - //if (lastRun > DeadShardThreshold) { - // shardStatuses.AppendLine($"Shard {i:00} marked for disposal."); - // deadShards.Add(i); - //} + if (lastRun > DeadShardThreshold) { + shardStatuses.AppendLine($"Shard {i:00} marked for disposal."); + deadShards.Add(i); + } } Log(shardStatuses.ToString().TrimEnd()); From c893b317f0c2daf813fb7f2c2a60abe0ce5f784e Mon Sep 17 00:00:00 2001 From: Noi Date: Sun, 5 Mar 2023 23:05:37 -0800 Subject: [PATCH 4/4] Fix logged message's grammar; bump version --- BackgroundServices/AutoUserDownload.cs | 2 +- BirthdayBot.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BackgroundServices/AutoUserDownload.cs b/BackgroundServices/AutoUserDownload.cs index 6e8f0c5..c4cb47c 100644 --- a/BackgroundServices/AutoUserDownload.cs +++ b/BackgroundServices/AutoUserDownload.cs @@ -52,7 +52,7 @@ class AutoUserDownload : BackgroundService { Log("Exception thrown by download task: " + dl.Exception); break; } else if (!dl.IsCompletedSuccessfully) { - Log($"Hang on processing {guild.Id}. Skipping."); + Log($"Task for guild {guild.Id} is unresponsive. Skipping guild. Members: {guild.MemberCount}. Name: {guild.Name}."); lock (_failedDownloads) _failedDownloads.Add(guild.Id); continue; } diff --git a/BirthdayBot.csproj b/BirthdayBot.csproj index 9c81cf5..8b6d6bc 100644 --- a/BirthdayBot.csproj +++ b/BirthdayBot.csproj @@ -5,7 +5,7 @@ net6.0 enable enable - 3.4.7 + 3.4.8 NoiTheCat