Set up useful exit codes

Additionally, updated style on affected files and cleaned up certain parts of the code in each.
This commit is contained in:
Noi 2021-10-18 00:26:25 -07:00
parent 6f34fbe657
commit fdffa5425c
2 changed files with 249 additions and 272 deletions

View file

@ -2,19 +2,20 @@
using System;
using System.Threading.Tasks;
namespace BirthdayBot
{
class Program
{
namespace BirthdayBot;
class Program {
private static ShardManager _bot;
public static DateTimeOffset BotStartTime { get; private set; }
public static DateTimeOffset BotStartTime { get; } = DateTimeOffset.UtcNow;
static async Task Main()
{
BotStartTime = DateTimeOffset.UtcNow;
static async Task Main() {
var cfg = new Configuration();
try {
await Database.DoInitialDatabaseSetupAsync();
} catch (Npgsql.NpgsqlException e) {
Console.WriteLine("Error when attempting to connect to database: " + e.Message);
Environment.Exit(1);
}
Console.CancelKeyPress += OnCancelKeyPressed;
_bot = new ShardManager(cfg);
@ -25,34 +26,30 @@ namespace BirthdayBot
/// <summary>
/// Sends a formatted message to console.
/// </summary>
public static void Log(string source, string message)
{
public static void Log(string source, string message) {
var ts = DateTime.UtcNow;
var ls = new string[] { "\r\n", "\n" };
foreach (var item in message.Split(ls, StringSplitOptions.None))
Console.WriteLine($"{ts:u} [{source}] {item}");
}
private static void OnCancelKeyPressed(object sender, ConsoleCancelEventArgs e)
{
private static void OnCancelKeyPressed(object sender, ConsoleCancelEventArgs e) {
e.Cancel = true;
Log("Shutdown", "Captured cancel key; sending shutdown.");
ProgramStop();
}
private static bool _stopping = false;
public static void ProgramStop()
{
public static void ProgramStop() {
if (_stopping) return;
_stopping = true;
Log("Shutdown", "Commencing shutdown...");
var dispose = Task.Run(_bot.Dispose);
if (!dispose.Wait(90000))
{
if (!dispose.Wait(90000)) {
Log("Shutdown", "Normal shutdown has not concluded after 90 seconds. Will force quit.");
Environment.ExitCode += 0x200;
}
Environment.Exit(0);
}
Environment.Exit(Environment.ExitCode);
}
}

View file

@ -10,19 +10,17 @@ using System.Threading;
using System.Threading.Tasks;
using static BirthdayBot.UserInterface.CommandsCommon;
namespace BirthdayBot
{
namespace BirthdayBot;
/// <summary>
/// The highest level part of this bot:
/// Starts up, looks over, and manages shard instances while containing common resources
/// and providing common functions for all existing shards.
/// More or less the main class for the program. Handles individual shards and provides frequent
/// status reports regarding the overall health of the application.
/// </summary>
class ShardManager : IDisposable
{
class ShardManager : IDisposable {
/// <summary>
/// Number of seconds between each time the manager's watchdog task runs, in seconds.
/// Number of seconds between each time the status task runs, in seconds.
/// </summary>
private const int WatchdogInterval = 90;
private const int StatusInterval = 90;
/// <summary>
/// Number of shards allowed to be destroyed before forcing the program to close.
@ -37,7 +35,8 @@ namespace BirthdayBot
/// <summary>
/// Amount of time without a completed background service run before a shard instance
/// is considered "dead" and tasked to be removed.
/// is considered "dead" and tasked to be removed. A fraction of this value is also used
/// to determine when a shard is "slow".
/// </summary>
private static readonly TimeSpan DeadShardThreshold = new(0, 20, 0);
@ -49,24 +48,21 @@ namespace BirthdayBot
/// </summary>
private readonly Dictionary<int, ShardInstance> _shards;
// Commonly used command handler instances
private readonly Dictionary<string, CommandHandler> _dispatchCommands;
private readonly UserCommands _cmdsUser;
private readonly ListingCommands _cmdsListing;
private readonly HelpInfoCommands _cmdsHelp;
private readonly ManagerCommands _cmdsMods;
// Watchdog stuff
private readonly Task _watchdogTask;
private readonly CancellationTokenSource _watchdogCancel;
private readonly Task _statusTask;
private readonly CancellationTokenSource _mainCancel;
private int _destroyedShards = 0;
internal Configuration Config { get; }
public ShardManager(Configuration cfg)
{
public ShardManager(Configuration cfg) {
var ver = System.Reflection.Assembly.GetExecutingAssembly().GetName().Version;
Log($"Birthday Bot v{ver.ToString(3)} is starting...");
Log($"Birthday Bot v{ver!.ToString(3)} is starting...");
Config = cfg;
@ -81,39 +77,35 @@ namespace BirthdayBot
_cmdsMods = new ManagerCommands(cfg, _cmdsUser.Commands);
foreach (var item in _cmdsMods.Commands) _dispatchCommands.Add(item.Item1, item.Item2);
// Allocate shards based on configuration
_shards = new Dictionary<int, ShardInstance>();
// Create only the specified shards as needed by this instance
for (int i = Config.ShardStart; i < (Config.ShardStart + Config.ShardAmount); i++)
{
for (int i = Config.ShardStart; i < (Config.ShardStart + Config.ShardAmount); i++) {
_shards.Add(i, null);
}
// Start watchdog
_watchdogCancel = new CancellationTokenSource();
_watchdogTask = Task.Factory.StartNew(WatchdogLoop, _watchdogCancel.Token,
// Start status reporting thread
_mainCancel = new CancellationTokenSource();
_statusTask = Task.Factory.StartNew(StatusLoop, _mainCancel.Token,
TaskCreationOptions.LongRunning, TaskScheduler.Default);
}
public void Dispose()
{
_watchdogCancel.Cancel();
_watchdogTask.Wait(5000);
if (!_watchdogTask.IsCompleted)
Log("Warning: Shard status watcher has not ended in time. Continuing...");
public void Dispose() {
_mainCancel.Cancel();
_statusTask.Wait(10000);
if (!_statusTask.IsCompleted)
Log("Warning: Main thread did not cleanly finish up in time. Continuing...");
Log("Shutting down all shards...");
var shardDisposes = new List<Task>();
foreach (var item in _shards)
{
foreach (var item in _shards) {
if (item.Value == null) continue;
shardDisposes.Add(Task.Run(item.Value.Dispose));
}
if (!Task.WhenAll(shardDisposes).Wait(60000))
{
Log("Warning: All shards did not properly stop after 60 seconds. Continuing...");
if (!Task.WhenAll(shardDisposes).Wait(30000)) {
Log("Warning: Not all shards terminated cleanly after 30 seconds. Continuing...");
}
Log($"Shutdown complete. Bot uptime: {Common.BotUptime}");
Log($"Uptime: {Common.BotUptime}");
}
private void Log(string message) => Program.Log(nameof(ShardManager), message);
@ -121,12 +113,10 @@ namespace BirthdayBot
/// <summary>
/// Creates and sets up a new shard instance.
/// </summary>
private async Task<ShardInstance> InitializeShard(int shardId)
{
private async Task<ShardInstance> InitializeShard(int shardId) {
ShardInstance newInstance;
var clientConf = new DiscordSocketConfig()
{
var clientConf = new DiscordSocketConfig() {
ShardId = shardId,
TotalShards = Config.ShardTotal,
LogLevel = LogSeverity.Info,
@ -141,36 +131,55 @@ namespace BirthdayBot
return newInstance;
}
private async Task WatchdogLoop()
{
try
{
while (!_watchdogCancel.IsCancellationRequested)
{
#region Status checking and display
private struct GuildStatusData {
public int GuildCount;
public TimeSpan LastTaskRunTime;
public string? ExecutingTask;
}
private string StatusDisplay(IEnumerable<int> guildList, Dictionary<int, GuildStatusData> guildInfo, bool showDetail) {
if (!guildList.Any()) return "--";
var result = new StringBuilder();
foreach (var item in guildList) {
result.Append(item.ToString("00") + " ");
if (showDetail) {
result.Remove(result.Length - 1, 1);
result.Append($"[{Math.Floor(guildInfo[item].LastTaskRunTime.TotalSeconds):000}s");
if (guildInfo[item].ExecutingTask != null)
result.Append($" {guildInfo[item].ExecutingTask}");
result.Append("] ");
}
}
if (result.Length > 0) result.Remove(result.Length - 1, 1);
return result.ToString();
}
private async Task StatusLoop() {
try {
while (!_mainCancel.IsCancellationRequested) {
Log($"Bot uptime: {Common.BotUptime}");
// Iterate through shard list, extract data
var guildInfo = new Dictionary<int, (int, TimeSpan, string)>();
var guildInfo = new Dictionary<int, GuildStatusData>();
var now = DateTimeOffset.UtcNow;
var nullShards = new List<int>();
foreach (var item in _shards)
{
if (item.Value == null)
{
foreach (var item in _shards) {
if (item.Value == null) {
nullShards.Add(item.Key);
continue;
}
var shard = item.Value;
var guildCount = shard.DiscordClient.Guilds.Count;
var lastRun = now - shard.LastBackgroundRun;
var lastExec = shard.CurrentExecutingService ?? "null";
guildInfo[item.Key] = (guildCount, lastRun, lastExec);
guildInfo[item.Key] = new GuildStatusData {
GuildCount = shard.DiscordClient.Guilds.Count,
LastTaskRunTime = now - shard.LastBackgroundRun,
ExecutingTask = shard.CurrentExecutingService
};
}
// Process info
var guildCounts = guildInfo.Select(i => i.Value.Item1);
var guildCounts = guildInfo.Select(i => i.Value.GuildCount);
var guildTotal = guildCounts.Sum();
var guildAverage = guildCounts.Any() ? guildCounts.Average() : 0;
Log($"Currently in {guildTotal} guilds. Average shard load: {guildAverage:0.0}.");
@ -179,77 +188,48 @@ namespace BirthdayBot
var goodShards = new List<int>();
var badShards = new List<int>(); // shards with low connection score OR long time since last work
var deadShards = new List<int>(); // shards to destroy and reinitialize
foreach (var item in guildInfo)
{
var lastRun = item.Value.Item2;
foreach (var item in guildInfo) {
var lastRun = item.Value.LastTaskRunTime;
if (lastRun > DeadShardThreshold / 3)
{
if (lastRun > DeadShardThreshold / 3) {
badShards.Add(item.Key);
// Consider a shard dead after a long span without background activity
if (lastRun > DeadShardThreshold)
deadShards.Add(item.Key);
}
else
{
} else {
goodShards.Add(item.Key);
}
}
string statusDisplay(IEnumerable<int> list, bool detailedInfo)
{
if (!list.Any()) return "--";
var result = new StringBuilder();
foreach (var item in list)
{
result.Append(item.ToString("00") + " ");
if (detailedInfo)
{
result.Remove(result.Length - 1, 1);
result.Append($"[{Math.Floor(guildInfo[item].Item2.TotalSeconds):000}s");
result.Append($" {guildInfo[item].Item3}] ");
}
}
if (result.Length > 0) result.Remove(result.Length - 1, 1);
return result.ToString();
}
Log("Stable shards: " + statusDisplay(goodShards, false));
if (badShards.Count > 0) Log("Unstable shards: " + statusDisplay(badShards, true));
if (deadShards.Count > 0) Log("Shards to be restarted: " + statusDisplay(deadShards, false));
if (nullShards.Count > 0) Log("Inactive shards: " + statusDisplay(nullShards, false));
Log("Online: " + StatusDisplay(goodShards, guildInfo, false));
if (badShards.Count > 0) Log("Slow: " + StatusDisplay(badShards, guildInfo, true));
if (deadShards.Count > 0) Log("Dead: " + StatusDisplay(deadShards, guildInfo, false));
if (nullShards.Count > 0) Log("Offline: " + StatusDisplay(nullShards, guildInfo, false));
// Remove dead shards
foreach (var dead in deadShards) {
// TODO investigate - has this been hanging here?
_shards[dead].Dispose();
_shards[dead] = null;
_destroyedShards++;
}
if (Config.QuitOnFails && _destroyedShards > MaxDestroyedShards)
{
if (Config.QuitOnFails && _destroyedShards > MaxDestroyedShards) {
Environment.ExitCode = 0x04;
Program.ProgramStop();
}
else
{
} else {
// Start up any missing shards
int startAllowance = MaxConcurrentOperations;
foreach (var id in nullShards)
{
foreach (var id in nullShards) {
// To avoid possible issues with resources strained over so many shards starting at once,
// initialization is spread out by only starting a few at a time.
if (startAllowance-- > 0)
{
if (startAllowance-- > 0) {
_shards[id] = await InitializeShard(id).ConfigureAwait(false);
}
else break;
} else break;
}
}
// All done for now
await Task.Delay(WatchdogInterval * 1000, _watchdogCancel.Token).ConfigureAwait(false);
}
}
catch (TaskCanceledException) { }
await Task.Delay(StatusInterval * 1000, _mainCancel.Token).ConfigureAwait(false);
}
} catch (TaskCanceledException) { }
}
#endregion
}