diff --git a/bot/bot.ts b/bot/bot.ts index 2038bda..ce32023 100644 --- a/bot/bot.ts +++ b/bot/bot.ts @@ -1210,10 +1210,11 @@ function redstonePersona(user: User, chatType: string, chatId: number, srv: stri "- Minecraft server: check status (server_status), start/stop/restart services (server_up/down/restart), tail logs (server_logs), look up players in the bot's database (players_list/player_get).", "- Admin-only tools (the caller's role is shown in the context block): run RCON commands (rcon), trigger a world backup (backup), and modify player records (player_upsert/set_*/remove, seen_list). If a non-admin asks for one, politely refuse and don't call it.", "- Minecraft wiki: look things up on minecraft.wiki and surface images into the chat.", - "- For 'how do I craft X?' / 'recipe for X' questions: call wiki_recipe(title=…) directly (use wiki_search first only if you're unsure of the exact page title). wiki_recipe returns the ingredient list (e.g. 'Block of Iron + Iron Ingot') AND the page's main item thumbnail URL. Then call tg_send_photo with that thumbnail_url and a caption like 'Anvil — Block of Iron ×3 + Iron Ingot' — visual + the recipe in one go. Minecraft Wiki does NOT host standalone recipe images (recipes are rendered live as HTML grids), so don't waste a turn calling wiki_page_images looking for one.", + "- For 'recipe for X' / 'how do I craft X' / 'send me the recipe image': call wiki_recipe_image(title=…, chat=) — ONE call that composites a real 3×3 grid PNG from the wiki's slot sprites and uploads it straight to the chat. Returns {sent:true,message_id} when it's already in the chat — you do NOT need a separate tg_send_photo turn afterwards. Only add a short text reply if you want to caption the recipe with extra context.", + "- If wiki_recipe_image fails (e.g. 'no 3×3 crafting recipe' — the item is smelted, brewed, or only obtained from loot), use wiki_recipe to fetch the ingredient text, then tg_send_photo with the page's thumbnail_url as a fallback so SOMETHING visual lands.", "- For 'what is X?' / general lookup: wiki_page for the intro extract + thumbnail. Then a short text reply, optionally + tg_send_photo of the thumbnail.", - "- For specific images that aren't the page's main thumbnail (block variants, GUI screenshots, charts): wiki_page_images with an optional filename filter.", - "- IMAGE DELIVERY IS MANDATORY when the user asks for a picture / image / recipe / what something looks like. You MUST end the turn by calling tg_send_photo with a real URL — never describe the image in words and stop. If wiki_recipe / wiki_page returns no thumbnail at all, say so plainly instead of inventing one.", + "- For specific non-thumbnail images (block variants, GUI screenshots, charts): wiki_page_images with an optional filename filter.", + "- IMAGE DELIVERY IS MANDATORY when the user asks for a picture / image / recipe / what something looks like. End the turn with a tool call that actually puts a photo into the chat (wiki_recipe_image OR tg_send_photo). Never describe an image in words and stop.", "- When you use a tool, you usually don't need a separate text reply unless you're adding context — the tool's effect IS the response. After a server_status / players_list lookup, summarize the answer in one sentence.", "", "Stickers — drop emojis inline and the bot does the rest:", diff --git a/bot/package.json b/bot/package.json index 191e479..2cc421b 100644 --- a/bot/package.json +++ b/bot/package.json @@ -9,7 +9,8 @@ "dependencies": { "@grammyjs/auto-retry": "^2.0.2", "@grammyjs/stream": "^1.0.1", - "grammy": "^1.30.0" + "grammy": "^1.30.0", + "sharp": "^0.33.5" }, "devDependencies": { "@types/bun": "latest", diff --git a/mcp/lib/minecraft-wiki-tools.ts b/mcp/lib/minecraft-wiki-tools.ts index b0c3dc6..dcd0904 100644 --- a/mcp/lib/minecraft-wiki-tools.ts +++ b/mcp/lib/minecraft-wiki-tools.ts @@ -3,6 +3,7 @@ // the model can pass it to tg_send_photo to surface a recipe / item image // directly into the chat. import { z } from "zod"; +import sharp from "sharp"; import { tool, type Tool } from "./types.ts"; const WIKI_BASE = "https://minecraft.wiki"; @@ -29,6 +30,141 @@ function pageUrl(title: string): string { return `${WIKI_BASE}/w/${encodeURIComponent(title.replace(/ /g, "_"))}`; } +function absolutize(url: string): string { + if (url.startsWith("//")) return `https:${url}`; + if (url.startsWith("/")) return `${WIKI_BASE}${url}`; + return url; +} + +// Parse the rendered HTML of a Minecraft wiki page and pull out: +// - the 3×3 input grid as [[url|null, …], …] (9 slots, in reading order) +// - the result item URL (the contents of ) +// Returns null when the page has no `mcui-Crafting_Table` block (uncraftable, +// or only smelting / brewing / smithing recipes, which use other mcui classes). +function parseCraftingGrid(html: string): { grid: (string | null)[][]; result: string | null } | null { + const tableMatch = html.match(/]*data-description="Crafting recipes"[^>]*>([\s\S]*?)<\/table>/); + if (!tableMatch) return null; + const tableHtml = tableMatch[1]; + // The crafting widget: . Find the input + output sections. + const widget = tableHtml.match(/]*>([\s\S]*?)<\/span>\s*<\/div>/); + if (!widget) return null; + const w = widget[1]; + + // Each slot starts with ``. Splitting on that marker gives us one chunk per slot + // (the first chunk is whatever preceded the first invslot — discarded). An empty slot's chunk starts + // with `` immediately; a filled slot's chunk contains an first. + const chunks = w.split('').slice(1); + const allSlots: (string | null)[] = chunks.map((chunk) => { + if (chunk.startsWith("")) return null; + const img = chunk.match(/]*\bsrc="([^"]+)"/); + return img ? absolutize(img[1]) : null; + }); + // The output uses `invslot invslot-large`, but the split keys on `invslot">` so the trailing + // -large slot is split too. The first 9 slots are the input grid; the next non-null is the + // result (any "auxiliary" empty filler slots between input and output are tolerable to skip). + if (allSlots.length < 9) return null; + const inputs = allSlots.slice(0, 9); + const result = allSlots.slice(9).find((s) => s != null) ?? null; + return { + grid: [inputs.slice(0, 3), inputs.slice(3, 6), inputs.slice(6, 9)], + result, + }; +} + +async function fetchBytes(url: string): Promise { + const res = await fetch(url, { + headers: { "User-Agent": UA }, + signal: AbortSignal.timeout(TIMEOUT_MS), + }); + if (!res.ok) throw new Error(`${url} → ${res.status}`); + return Buffer.from(await res.arrayBuffer()); +} + +// Compose a Minecraft-style crafting recipe PNG. Layout (each cell 64×64 px, +// scaled up from the wiki's native 32 px for clarity on phones): +// [grid 3×3] [arrow "→"] [result] +// Background is the classic inventory gray (#8b8b8b) with darker slot borders. +const CELL = 64; +const GAP = 4; +const PAD = 8; +const ARROW_W = 56; +async function composeRecipePng(grid: (string | null)[][], result: string | null): Promise { + // Collect unique URLs and fetch each once. + const unique = new Set(); + for (const row of grid) for (const u of row) if (u) unique.add(u); + if (result) unique.add(result); + const blobs: Record = {}; + await Promise.all( + [...unique].map(async (u) => { + blobs[u] = await fetchBytes(u); + }), + ); + + const gridW = CELL * 3 + GAP * 2; + const gridH = CELL * 3 + GAP * 2; + const totalW = PAD + gridW + (result ? PAD + ARROW_W + PAD + CELL : 0) + PAD; + const totalH = PAD + gridH + PAD; + + // Resize every needed sprite to CELL×CELL up front, with nearest-neighbour so + // the 16-pixel inventory icons stay crisp (no antialiasing). Sharp's input + // is the original buffer; outputs are 64×64 PNG buffers. + const sized: Record = {}; + await Promise.all( + [...unique].map(async (u) => { + sized[u] = await sharp(blobs[u]).resize(CELL, CELL, { kernel: "nearest" }).png().toBuffer(); + }), + ); + + // Arrow rendered as SVG → PNG. + const arrowSvg = Buffer.from( + `` + + `` + + ``, + ); + const arrowPng = await sharp(arrowSvg).png().toBuffer(); + + const composites: { input: Buffer; top: number; left: number }[] = []; + // Slot backgrounds (slightly darker squares to look like inventory cells). + for (let r = 0; r < 3; r++) { + for (let c = 0; c < 3; c++) { + const left = PAD + c * (CELL + GAP); + const top = PAD + r * (CELL + GAP); + composites.push({ + input: Buffer.from( + `` + + `` + + ``, + ), + top, left, + }); + const u = grid[r][c]; + if (u) composites.push({ input: sized[u], top, left }); + } + } + if (result) { + const ax = PAD + gridW + PAD; + const rx = ax + ARROW_W + PAD; + composites.push({ input: arrowPng, top: PAD + (gridH - CELL) / 2, left: ax }); + composites.push({ + input: Buffer.from( + `` + + `` + + ``, + ), + top: PAD + (gridH - CELL) / 2, + left: rx, + }); + composites.push({ input: sized[result], top: PAD + (gridH - CELL) / 2, left: rx }); + } + + return sharp({ + create: { width: totalW, height: totalH, channels: 4, background: { r: 139, g: 139, b: 139, alpha: 1 } }, + }) + .composite(composites) + .png() + .toBuffer(); +} + export const minecraftWikiTools: Tool[] = [ tool({ name: "wiki_search", @@ -102,6 +238,46 @@ export const minecraftWikiTools: Tool[] = [ }, }), + tool({ + name: "wiki_recipe_image", + title: "Send a Minecraft crafting recipe image to a chat", + description: + "Generate a 3×3 crafting-grid image of the recipe (with the result item beside an arrow) and send it directly to a Telegram chat. Use this for 'send me the recipe' / 'image of recipe' requests instead of chaining wiki_recipe + tg_send_photo — minecraft.wiki has no standalone recipe PNG, so this tool composites one from the per-slot ingredient sprites. Returns { sent: true, message_id } on success. Fails if the page has no 3×3 crafting recipe (smelting, brewing, smithing all use different mcui widgets).", + parameters: { + title: z.string().min(1).describe("Page title — e.g. 'Anvil', 'Beacon', 'Crafting Table'"), + chat: z.union([z.number().int(), z.string()]).describe("Telegram chat id to send the photo into"), + caption: z.string().max(1024).optional().describe("Optional caption — defaults to the page title if omitted"), + }, + handler: async ({ title, chat, caption }, { tg }) => { + type ParseRes = { parse?: { title?: string; text?: string } }; + const data = await wikiGet({ + action: "parse", + page: title, + prop: "text", + redirects: "1", + disableeditsection: "1", + }); + const html = data.parse?.text ?? ""; + if (!html) return { error: `no wiki page titled '${title}'` }; + const resolved = data.parse?.title ?? title; + const parsed = parseCraftingGrid(html); + if (!parsed) { + return { + error: + `no 3×3 crafting recipe on '${resolved}' (page may use smelting / brewing / smithing, or item is uncraftable)`, + }; + } + const png = await composeRecipePng(parsed.grid, parsed.result); + const sent = await tg.sendPhotoBytes({ + chat_id: chat, + bytes: png, + filename: `${resolved.replace(/[^A-Za-z0-9._-]+/g, "_")}_recipe.png`, + caption: caption ?? `${resolved} — crafting recipe`, + }); + return { sent: true, message_id: sent.message_id, title: resolved }; + }, + }), + tool({ name: "wiki_recipe", title: "Get a crafting recipe from the Minecraft wiki", diff --git a/mcp/lib/types.ts b/mcp/lib/types.ts index 2b5d144..c477d31 100644 --- a/mcp/lib/types.ts +++ b/mcp/lib/types.ts @@ -7,6 +7,19 @@ import type { Database } from "bun:sqlite"; export type TgClient = { call: (method: string, params?: Record) => Promise; + /** + * Upload a binary photo to Telegram via multipart/form-data sendPhoto. + * Use for synthesized images (e.g. composited recipe grids) that don't + * live at a public URL Telegram can fetch itself. + */ + sendPhotoBytes: (params: { + chat_id: number | string; + bytes: Uint8Array; + filename?: string; + caption?: string; + parse_mode?: "HTML" | "MarkdownV2"; + reply_to_message_id?: number; + }) => Promise<{ message_id: number }>; }; export function createTgClient(token: string): TgClient { @@ -24,6 +37,24 @@ export function createTgClient(token: string): TgClient { if (!data.ok) throw new Error(`${method}: ${data.description} (code ${data.error_code})`); return data.result as T; }, + async sendPhotoBytes({ chat_id, bytes, filename = "photo.png", caption, parse_mode, reply_to_message_id }) { + const fd = new FormData(); + fd.set("chat_id", String(chat_id)); + if (caption !== undefined) fd.set("caption", caption); + if (parse_mode !== undefined) fd.set("parse_mode", parse_mode); + if (reply_to_message_id !== undefined) { + fd.set("reply_parameters", JSON.stringify({ message_id: reply_to_message_id })); + } + fd.set("photo", new Blob([bytes], { type: "image/png" }), filename); + const res = await fetch(`${base}/sendPhoto`, { + method: "POST", + body: fd, + signal: AbortSignal.timeout(30_000), + }); + const data = (await res.json()) as { ok: boolean; result?: { message_id: number }; description?: string; error_code?: number }; + if (!data.ok) throw new Error(`sendPhoto(bytes): ${data.description} (code ${data.error_code})`); + return data.result!; + }, }; } diff --git a/mcp/package.json b/mcp/package.json index af7ff3c..d618bd3 100644 --- a/mcp/package.json +++ b/mcp/package.json @@ -8,6 +8,7 @@ }, "dependencies": { "@modelcontextprotocol/sdk": "^1.0.4", + "sharp": "^0.33.5", "zod": "^3.23.8", "zod-to-json-schema": "^3.25.2" },