stream Redstone replies via sendMessageDraft + fix self-killing server_down

Two related changes to how Redstone behaves in chat. Streaming: replies now animate word-by-word into Telegram using the new sendMessageDraft endpoint (Bot API 9.5, March 2026) via the @grammyjs/stream plugin, with @grammyjs/auto-retry on the API layer to swallow 429s transparently. The previous editMessageText-based approach is gone — sendMessageDraft is designed for this and animates natively on the client without hitting the 1/sec-per-chat edit limit. Pace lives in STREAM_WORD_DELAY_MS=50; tunable in one spot. server_down footgun: the MCP tool was running `compose down` with no service arg, which tore down the whole project — including the bot container running the tool call, which then got SIGTERM mid-conversation and didn't come back (unless-stopped doesn't restart on a clean compose down). Behaviour now matches the existing /stop slash command: compose stop minecraft autossh, leaving the bot up. Pass an explicit service to stop just that one. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 08:47:35 +02:00
parent 7976de7da4
commit cab5337e3f
4 changed files with 45 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ portforward.pid
 bot.token
 bot/node_modules/
 bot/bun.lockb
 bot/bun.lock
 bot/data/
 # Google Generative Language API key
--- a/bot/bot.ts
+++ b/bot/bot.ts
@@ -1,4 +1,8 @@
 import { Bot, Context, GrammyError, HttpError, InlineKeyboard } from "grammy";
 import { autoRetry } from "@grammyjs/auto-retry";
 import { stream, type StreamFlavor } from "@grammyjs/stream";
 type BotContext = StreamFlavor<Context>;
 import { Database } from "bun:sqlite";
 import { readFileSync, mkdirSync } from "node:fs";
 import { dirname } from "node:path";
@@ -314,7 +318,14 @@ async function maybeSendSticker(ctx: Context, event: string) {
 }
 // ---- Bot ----
-const bot = new Bot(TOKEN);
+const bot = new Bot<BotContext>(TOKEN);
 // Auto-retry handles 429 Too Many Requests from Telegram automatically (waits the
 // retry_after window then re-sends). The stream plugin uses the new
 // sendMessageDraft endpoint (Bot API 9.5, March 2026) to natively animate
 // progressively-revealed text on the client — feels like an LLM typing into chat.
 bot.api.config.use(autoRetry());
 bot.use(stream());
 // Capture incoming + outgoing for the audit log; track usernames + seen users.
 bot.use(async (ctx, next) => {
@@ -1205,8 +1216,16 @@ bot.on("message:text", async (ctx) => {
  }
  const out = trim(reply, 3500);
-  const sent = await ctx.reply(out, { reply_parameters: { message_id: ctx.message.message_id } });
+  // Stream the reply word-by-word so the message animates into the chat like
-  Q.insMsg.run(chatId, sent.message_id, null, "out", "text", out, ctx.message.message_id);
+  // an LLM typing. @grammyjs/stream uses sendMessageDraft (Bot API 9.5) to push
  // each delta as a native, animated draft and finalizes with sendMessage.
  const messages = await ctx.replyWithStream(streamWords(out), {}, {
    reply_parameters: { message_id: ctx.message.message_id },
  });
  const finalMessage = messages[messages.length - 1];
  if (finalMessage) {
    Q.insMsg.run(chatId, finalMessage.message_id, null, "out", "text", out, ctx.message.message_id);
  }
  // Emoji → sticker swap. Scan the reply for an emoji that has a matching
  // sticker in the library and, if found, fire it as a follow-up. We don't
@@ -1224,6 +1243,19 @@ bot.on("message:text", async (ctx) => {
  }
 });
 // Word-by-word delta generator for ctx.replyWithStream. ~50ms between words
 // gives a brisk LLM-typing feel without dragging on long replies. The split
 // regex keeps whitespace so the rendered text reconstructs cleanly.
 const STREAM_WORD_DELAY_MS = 50;
 async function* streamWords(text: string): AsyncGenerator<string> {
  const parts = text.split(/(\s+)/);
  for (const p of parts) {
    if (!p) continue;
    yield p;
    if (STREAM_WORD_DELAY_MS > 0) await new Promise((r) => setTimeout(r, STREAM_WORD_DELAY_MS));
  }
 }
 // Walk the reply text and return the file_id of the first sticker-library
 // emoji that appears in it, or null if none match. We rank by where the emoji
 // occurs in the text (earliest wins) rather than by sticker frequency, so the
--- a/bot/package.json
+++ b/bot/package.json
@@ -7,6 +7,8 @@
    "start": "bun run bot.ts"
  },
  "dependencies": {
    "@grammyjs/auto-retry": "^2.0.2",
    "@grammyjs/stream": "^1.0.1",
    "grammy": "^1.30.0"
  },
  "devDependencies": {
--- a/mcp/lib/minecraft-tools.ts
+++ b/mcp/lib/minecraft-tools.ts
@@ -49,10 +49,15 @@ export const minecraftTools: Tool<z.ZodRawShape>[] = [
  tool({
    name: "server_down",
    title: "Stop services",
-    description: "docker compose down. Pass a service to stop just one (uses `stop` instead of `down`).",
+    description: "Stop the minecraft + autossh services. The bot container is left running on purpose — a full `compose down` would shut the bot off mid-conversation. Pass a specific service to stop just that one.",
    parameters: { service: z.string().optional() },
    handler: async ({ service }, { mc }) => {
-      const r = service ? await mc.compose("stop", service) : await mc.compose("down");
+      // Without a service arg, stop only minecraft + autossh and leave the bot
      // running. `compose down` would tear down everything including the bot,
      // which kills the very process executing this tool call.
      const r = service
        ? await mc.compose("stop", service)
        : await mc.compose("stop", mc.config.serverSvc, mc.config.pfSvc);
      return { ok: r.ok, code: r.code, output: [r.out, r.err].filter(Boolean).join("\n").trim() || "(no output)" };
    },
  }),