Created
February 20, 2026 14:42
-
-
Save duarteocarmo/df138650dd1a245d700381832d9e573f to your computer and use it in GitHub Desktop.
LLM Response Annotation Tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // app.tsx — LLM Response Annotation Tool | |
| // bun run app.tsx | |
| const html = `<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>Annotate</title> | |
| <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&family=IBM+Plex+Sans:wght@400;500;600&display=swap" rel="stylesheet" /> | |
| <style> | |
| * { margin: 0; padding: 0; box-sizing: border-box; } | |
| html { font-size: 14px; } | |
| body { | |
| font-family: 'IBM Plex Sans', sans-serif; | |
| background: #fafaf8; | |
| color: #1a1a1a; | |
| line-height: 1.5; | |
| } | |
| /* ── Top bar ── */ | |
| .topbar { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| padding: 0.6rem 1.2rem; | |
| border-bottom: 1px solid #ddd; | |
| background: #fff; | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| } | |
| .topbar-left { display: flex; align-items: center; gap: 1rem; } | |
| .topbar h1 { | |
| font-size: 0.85rem; | |
| font-weight: 600; | |
| letter-spacing: -0.01em; | |
| } | |
| .topbar .count { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.7rem; | |
| color: #888; | |
| } | |
| .topbar-actions { display: flex; gap: 0.4rem; } | |
| /* ── Buttons ── */ | |
| .btn { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.68rem; | |
| padding: 0.35rem 0.7rem; | |
| border: 1px solid #ccc; | |
| border-radius: 3px; | |
| background: #fff; | |
| color: #555; | |
| cursor: pointer; | |
| } | |
| .btn:hover { border-color: #999; color: #1a1a1a; } | |
| .btn-primary { background: #1a1a1a; color: #fff; border-color: #1a1a1a; } | |
| .btn-primary:hover { background: #333; } | |
| .btn-danger:hover { color: #c44; border-color: #c44; } | |
| /* ── Input area ── */ | |
| .input-bar { | |
| padding: 0.8rem 1.2rem; | |
| border-bottom: 1px solid #ddd; | |
| background: #fff; | |
| display: flex; | |
| gap: 0.6rem; | |
| align-items: flex-start; | |
| } | |
| .input-bar textarea { | |
| flex: 1; | |
| min-height: 60px; | |
| padding: 0.5rem 0.6rem; | |
| border: 1px solid #ddd; | |
| border-radius: 3px; | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.78rem; | |
| line-height: 1.5; | |
| resize: vertical; | |
| outline: none; | |
| background: #fafaf8; | |
| } | |
| .input-bar textarea:focus { border-color: #999; } | |
| .input-bar textarea::placeholder { color: #bbb; } | |
| .input-right { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.4rem; | |
| min-width: 140px; | |
| } | |
| .input-right input { | |
| padding: 0.4rem 0.5rem; | |
| border: 1px solid #ddd; | |
| border-radius: 3px; | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.72rem; | |
| outline: none; | |
| background: #fafaf8; | |
| } | |
| .input-right input:focus { border-color: #999; } | |
| .input-right input::placeholder { color: #bbb; } | |
| /* ── Main workspace ── */ | |
| .workspace { | |
| display: grid; | |
| grid-template-columns: 1fr 420px; | |
| height: calc(100vh - 110px); | |
| overflow: hidden; | |
| } | |
| /* ── Left: response ── */ | |
| .response-pane { | |
| overflow-y: auto; | |
| padding: 1.5rem 2rem; | |
| border-right: 1px solid #ddd; | |
| } | |
| .response-pane .question-label { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.7rem; | |
| color: #888; | |
| margin-bottom: 0.3rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.04em; | |
| } | |
| .response-pane .question-text { | |
| font-size: 0.9rem; | |
| font-weight: 500; | |
| margin-bottom: 1rem; | |
| padding-bottom: 0.8rem; | |
| border-bottom: 1px solid #eee; | |
| } | |
| /* markdown */ | |
| .md h1, .md h2, .md h3 { font-weight: 600; margin: 1em 0 0.4em; } | |
| .md h1 { font-size: 1.3rem; } | |
| .md h2 { font-size: 1.1rem; } | |
| .md h3 { font-size: 1rem; } | |
| .md h1:first-child, .md h2:first-child, .md h3:first-child { margin-top: 0; } | |
| .md p { margin: 0.5em 0; } | |
| .md ul, .md ol { margin: 0.4em 0; padding-left: 1.4em; } | |
| .md li { margin: 0.15em 0; } | |
| .md code { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.85em; | |
| background: #f0eeeb; | |
| padding: 0.1em 0.35em; | |
| border-radius: 2px; | |
| } | |
| .md pre { | |
| background: #f0eeeb; | |
| border-radius: 3px; | |
| padding: 0.8rem; | |
| overflow-x: auto; | |
| margin: 0.6em 0; | |
| } | |
| .md pre code { background: none; padding: 0; } | |
| .md blockquote { | |
| border-left: 2px solid #ccc; | |
| padding-left: 0.8rem; | |
| color: #666; | |
| margin: 0.6em 0; | |
| } | |
| .md a { color: #1a6dd4; } | |
| .md strong { font-weight: 600; } | |
| /* ── Right: rubric ── */ | |
| .rubric-pane { | |
| overflow-y: auto; | |
| padding: 1rem 1.2rem; | |
| background: #fff; | |
| } | |
| .rubric-header { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| margin-bottom: 0.8rem; | |
| padding-bottom: 0.5rem; | |
| border-bottom: 1px solid #eee; | |
| } | |
| .rubric-header .score-total { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.75rem; | |
| color: #888; | |
| } | |
| .rubric-header .score-total strong { | |
| color: #1a1a1a; | |
| } | |
| .criterion { | |
| margin-bottom: 0.9rem; | |
| padding-bottom: 0.9rem; | |
| border-bottom: 1px solid #f0f0f0; | |
| } | |
| .criterion:last-of-type { border-bottom: none; margin-bottom: 0; } | |
| .criterion-name { | |
| font-size: 0.8rem; | |
| font-weight: 600; | |
| margin-bottom: 0.4rem; | |
| } | |
| .level-row { | |
| display: flex; | |
| align-items: flex-start; | |
| gap: 0.5rem; | |
| margin-bottom: 0.2rem; | |
| padding: 0.25rem 0.35rem; | |
| border-radius: 3px; | |
| cursor: pointer; | |
| transition: background 0.1s; | |
| } | |
| .level-row:hover { background: #f5f5f3; } | |
| .level-row.selected { background: #eef5ee; } | |
| .level-score { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.72rem; | |
| font-weight: 600; | |
| min-width: 18px; | |
| height: 18px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| border-radius: 2px; | |
| border: 1px solid #ddd; | |
| background: #fff; | |
| color: #999; | |
| flex-shrink: 0; | |
| margin-top: 1px; | |
| } | |
| .level-row.selected .level-score { | |
| background: #2a7d2a; | |
| border-color: #2a7d2a; | |
| color: #fff; | |
| } | |
| .level-desc { | |
| font-size: 0.72rem; | |
| color: #666; | |
| line-height: 1.4; | |
| } | |
| .level-row.selected .level-desc { | |
| color: #1a1a1a; | |
| font-weight: 500; | |
| } | |
| /* notes */ | |
| .notes-section { margin-top: 0.8rem; padding-top: 0.8rem; border-top: 1px solid #eee; } | |
| .notes-section textarea { | |
| width: 100%; | |
| min-height: 45px; | |
| padding: 0.4rem 0.5rem; | |
| border: 1px solid #ddd; | |
| border-radius: 3px; | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.72rem; | |
| resize: vertical; | |
| outline: none; | |
| background: #fafaf8; | |
| } | |
| .notes-section textarea:focus { border-color: #999; } | |
| .notes-section textarea::placeholder { color: #bbb; } | |
| /* ── Empty state ── */ | |
| .empty { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| height: 100%; | |
| color: #bbb; | |
| font-size: 0.85rem; | |
| } | |
| /* ── Card list (when multiple) ── */ | |
| .card-list { | |
| border-bottom: 1px solid #ddd; | |
| background: #fff; | |
| display: flex; | |
| gap: 0; | |
| overflow-x: auto; | |
| } | |
| .card-tab { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.68rem; | |
| padding: 0.45rem 0.8rem; | |
| border: none; | |
| background: none; | |
| color: #888; | |
| cursor: pointer; | |
| white-space: nowrap; | |
| border-bottom: 2px solid transparent; | |
| } | |
| .card-tab:hover { color: #1a1a1a; } | |
| .card-tab.active { | |
| color: #1a1a1a; | |
| border-bottom-color: #1a1a1a; | |
| } | |
| .card-tab .tab-score { | |
| margin-left: 0.3rem; | |
| color: #aaa; | |
| } | |
| .card-tab.active .tab-score { color: #666; } | |
| /* scrollbar */ | |
| .response-pane::-webkit-scrollbar, .rubric-pane::-webkit-scrollbar { width: 4px; } | |
| .response-pane::-webkit-scrollbar-thumb, .rubric-pane::-webkit-scrollbar-thumb { background: #ddd; border-radius: 2px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div id="root"></div> | |
| <script type="module"> | |
| import { createElement as h, useState, useEffect, useCallback, useRef, Fragment } from "https://esm.sh/react@18"; | |
| import { createRoot } from "https://esm.sh/react-dom@18/client"; | |
| import { marked } from "https://esm.sh/marked@12"; | |
| const RUBRIC = [ | |
| { | |
| key: "relevance", name: "Relevance & Focus", | |
| levels: [ | |
| "Answers a different question or fills space with irrelevant content", | |
| "Buries the answer under generic definitions or drifts into tangents", | |
| "Addresses the topic clearly but includes minor fluff", | |
| "Direct answer, no throat-clearing, every sentence adds value" | |
| ] | |
| }, | |
| { | |
| key: "accuracy", name: "Accuracy", | |
| levels: [ | |
| "Completely incorrect or contains hallucinated details", | |
| "Partially correct but includes misleading claims", | |
| "Mostly correct but some poorly worded claims", | |
| "Factually flawless, identifies edge cases and limitations" | |
| ] | |
| }, | |
| { | |
| key: "currency", name: "Industry Currency", | |
| levels: [ | |
| "Outdated, generic, or textbook examples", | |
| "Mentions recent concepts but applies them superficially", | |
| "References current trends but lacks insider nuance", | |
| "Reflects concepts used at top companies with specific examples" | |
| ] | |
| }, | |
| { | |
| key: "actionability", name: "Actionability", | |
| levels: [ | |
| "Abstract principles only, no concrete steps", | |
| "Some useful pointers but too abstract to act on", | |
| "Workable approach with concrete steps or examples", | |
| "Clear, actionable guidance the reader could follow immediately" | |
| ] | |
| }, | |
| { | |
| key: "clarity", name: "Cognitive Clarity", | |
| levels: [ | |
| "Conflicting advice unresolved, jargon undefined, circular logic", | |
| "Concepts defined but narrative is disjointed", | |
| "Generally clear, jargon mostly explained", | |
| "Resolves competing suggestions, logic flows seamlessly" | |
| ] | |
| }, | |
| { | |
| key: "tone", name: "Tone & Persona", | |
| levels: [ | |
| "Robotic or sycophantic, uses AI-isms, lectures the user", | |
| "Safe & generic, reads like Wikipedia or customer service", | |
| "Professional and objective, avoids most filler", | |
| "Expert peer: direct, confident, zero fluff or sycophancy" | |
| ] | |
| }, | |
| { | |
| key: "selfawareness", name: "Self-Awareness", | |
| levels: [ | |
| "One-size-fits-all answer with false confidence", | |
| "Acknowledges some ambiguity but doesn't help resolve it", | |
| "Notes context-dependence and explains key factors", | |
| "Acknowledges limits, explains context, invites reader to narrow down" | |
| ] | |
| }, | |
| { | |
| key: "attribution", name: "Attribution", | |
| levels: [ | |
| "No named sources, experts, or citations of any kind", | |
| "Vague sources: 'studies show,' 'experts say'", | |
| "Named but niche or tangential sources", | |
| "Trusted, domain-matched authority with specific citations" | |
| ] | |
| }, | |
| { | |
| key: "sourcecredibility", name: "Source Credibility", | |
| levels: [ | |
| "No sources, or sources are fabricated/misattributed", | |
| "Sources exist but are irrelevant, non-credible, or cited but not actually used", | |
| "Credible and relevant sources but coverage is thin or attribution is loose", | |
| "Credible, relevant sources that meaningfully support the answer; correct attribution" | |
| ] | |
| }, | |
| { | |
| key: "scannability", name: "Scannability & Formatting", | |
| levels: [ | |
| "Broken formatting or completely unstructured wall of text", | |
| "Long answer (2000+ chars) with no headers or bullet points", | |
| "Uses either headers or bullet points to organize; short answers default here", | |
| "Well-organized with both headers and bullet points for easy scanning" | |
| ] | |
| } | |
| ]; | |
| const STORAGE_KEY = "llm-annotations-v1"; | |
| function load() { try { return JSON.parse(localStorage.getItem(STORAGE_KEY)) || []; } catch { return []; } } | |
| function save(data) { localStorage.setItem(STORAGE_KEY, JSON.stringify(data)); } | |
| marked.setOptions({ breaks: true, gfm: true }); | |
| function App() { | |
| const [annotations, setAnnotations] = useState(() => load()); | |
| const [activeId, setActiveId] = useState(null); | |
| const [inputText, setInputText] = useState(""); | |
| const [questionText, setQuestionText] = useState(""); | |
| const textareaRef = useRef(null); | |
| useEffect(() => { save(annotations); }, [annotations]); | |
| // auto-select first if none active | |
| useEffect(() => { | |
| if (activeId === null && annotations.length > 0) setActiveId(annotations[0].id); | |
| }, [annotations.length]); | |
| const active = annotations.find(a => a.id === activeId) || null; | |
| function addAnnotation() { | |
| const text = inputText.trim(); | |
| if (!text) return; | |
| const entry = { | |
| id: Date.now(), | |
| text, | |
| question: questionText.trim() || null, | |
| scores: {}, | |
| notes: "", | |
| createdAt: new Date().toISOString() | |
| }; | |
| setAnnotations(prev => [entry, ...prev]); | |
| setActiveId(entry.id); | |
| setInputText(""); | |
| setQuestionText(""); | |
| } | |
| function updateActive(patch) { | |
| setAnnotations(prev => prev.map(a => a.id === activeId ? { ...a, ...patch } : a)); | |
| } | |
| function setScore(key, value) { | |
| if (!active) return; | |
| const scores = { ...active.scores }; | |
| scores[key] = scores[key] === value ? null : value; | |
| updateActive({ scores }); | |
| } | |
| function deleteAnnotation(id) { | |
| setAnnotations(prev => { | |
| const next = prev.filter(a => a.id !== id); | |
| if (activeId === id) setActiveId(next.length > 0 ? next[0].id : null); | |
| return next; | |
| }); | |
| } | |
| function exportJSON() { | |
| const blob = new Blob([JSON.stringify(annotations, null, 2)], { type: "application/json" }); | |
| const url = URL.createObjectURL(blob); | |
| Object.assign(document.createElement("a"), { href: url, download: "annotations.json" }).click(); | |
| URL.revokeObjectURL(url); | |
| } | |
| function exportCSV() { | |
| const headers = ["id", "question", "created_at", ...RUBRIC.map(r => r.key), "total", "notes", "response_text"]; | |
| const rows = annotations.map(a => { | |
| const scores = RUBRIC.map(r => a.scores?.[r.key] ?? ""); | |
| const total = scores.filter(s => s !== "").reduce((acc, v) => acc + v, 0); | |
| const esc = s => '"' + String(s || "").replace(/"/g, '""') + '"'; | |
| return [a.id, esc(a.question), a.createdAt, ...scores, total, esc(a.notes), esc(a.text?.substring(0, 500))].join(","); | |
| }); | |
| const csv = [headers.join(","), ...rows].join("\\n"); | |
| const blob = new Blob([csv], { type: "text/csv" }); | |
| const url = URL.createObjectURL(blob); | |
| Object.assign(document.createElement("a"), { href: url, download: "annotations.csv" }).click(); | |
| URL.revokeObjectURL(url); | |
| } | |
| function handleKeyDown(e) { | |
| if ((e.metaKey || e.ctrlKey) && e.key === "Enter") { e.preventDefault(); addAnnotation(); } | |
| } | |
| function tabScore(a) { | |
| const scored = Object.values(a.scores || {}).filter(v => v !== null && v !== undefined); | |
| if (scored.length === 0) return ""; | |
| return scored.reduce((s, v) => s + v, 0) + "/" + (RUBRIC.length * 3); | |
| } | |
| // total score for active | |
| const activeScored = active ? Object.values(active.scores || {}).filter(v => v !== null && v !== undefined) : []; | |
| const activeTotal = activeScored.reduce((s, v) => s + v, 0); | |
| return h(Fragment, null, | |
| // top bar | |
| h("div", { className: "topbar" }, | |
| h("div", { className: "topbar-left" }, | |
| h("h1", null, "Annotate"), | |
| h("span", { className: "count" }, annotations.length + " responses") | |
| ), | |
| h("div", { className: "topbar-actions" }, | |
| annotations.length > 0 ? h("button", { className: "btn", onClick: exportJSON }, "JSON") : null, | |
| annotations.length > 0 ? h("button", { className: "btn", onClick: exportCSV }, "CSV") : null, | |
| annotations.length > 0 ? h("button", { className: "btn btn-danger", onClick: () => { if (confirm("Delete all?")) { setAnnotations([]); setActiveId(null); } } }, "Clear") : null | |
| ) | |
| ), | |
| // input bar | |
| h("div", { className: "input-bar" }, | |
| h("textarea", { | |
| ref: textareaRef, | |
| value: inputText, | |
| onChange: e => setInputText(e.target.value), | |
| onKeyDown: handleKeyDown, | |
| placeholder: "Paste LLM response (markdown)..." | |
| }), | |
| h("div", { className: "input-right" }, | |
| h("input", { | |
| type: "text", | |
| value: questionText, | |
| onChange: e => setQuestionText(e.target.value), | |
| onKeyDown: handleKeyDown, | |
| placeholder: "Question (optional)" | |
| }), | |
| h("button", { className: "btn btn-primary", onClick: addAnnotation, disabled: !inputText.trim() }, "Add") | |
| ) | |
| ), | |
| // tabs for multiple annotations | |
| annotations.length > 1 ? h("div", { className: "card-list" }, | |
| annotations.map(a => | |
| h("button", { | |
| key: a.id, | |
| className: "card-tab" + (a.id === activeId ? " active" : ""), | |
| onClick: () => setActiveId(a.id) | |
| }, | |
| "#" + String(a.id).slice(-4), | |
| a.question ? " " + a.question.substring(0, 20) : "", | |
| h("span", { className: "tab-score" }, tabScore(a)) | |
| ) | |
| ) | |
| ) : null, | |
| // workspace | |
| active | |
| ? h("div", { className: "workspace" }, | |
| // left: rendered response | |
| h("div", { className: "response-pane" }, | |
| active.question ? h(Fragment, null, | |
| h("div", { className: "question-label" }, "Question"), | |
| h("div", { className: "question-text" }, active.question) | |
| ) : null, | |
| h("div", { className: "md", dangerouslySetInnerHTML: { __html: marked.parse(active.text || "") } }) | |
| ), | |
| // right: rubric | |
| h("div", { className: "rubric-pane" }, | |
| h("div", { className: "rubric-header" }, | |
| h("span", { style: { fontSize: "0.75rem", fontWeight: 600 } }, "Rubric"), | |
| h("span", { className: "score-total" }, | |
| activeScored.length > 0 | |
| ? h(Fragment, null, h("strong", null, activeTotal), " / " + (RUBRIC.length * 3)) | |
| : "not scored" | |
| ) | |
| ), | |
| RUBRIC.map(criterion => | |
| h("div", { key: criterion.key, className: "criterion" }, | |
| h("div", { className: "criterion-name" }, criterion.name), | |
| criterion.levels.map((desc, i) => | |
| h("div", { | |
| key: i, | |
| className: "level-row" + (active.scores?.[criterion.key] === i ? " selected" : ""), | |
| onClick: () => setScore(criterion.key, i) | |
| }, | |
| h("span", { className: "level-score" }, i), | |
| h("span", { className: "level-desc" }, desc) | |
| ) | |
| ) | |
| ) | |
| ), | |
| // notes | |
| h("div", { className: "notes-section" }, | |
| h("textarea", { | |
| placeholder: "Notes...", | |
| value: active.notes || "", | |
| onChange: e => updateActive({ notes: e.target.value }) | |
| }) | |
| ), | |
| // delete this one | |
| h("div", { style: { marginTop: "0.6rem", textAlign: "right" } }, | |
| h("button", { | |
| className: "btn btn-danger", | |
| onClick: () => deleteAnnotation(active.id) | |
| }, "Delete this response") | |
| ) | |
| ) | |
| ) | |
| : h("div", { className: "workspace" }, | |
| h("div", { className: "empty", style: { gridColumn: "1 / -1" } }, "Paste a response above to start annotating") | |
| ) | |
| ); | |
| } | |
| createRoot(document.getElementById("root")).render(h(App)); | |
| </script> | |
| </body> | |
| </html>`; | |
| Bun.serve({ | |
| port: 3000, | |
| fetch: () => new Response(html, { headers: { "Content-Type": "text/html" } }), | |
| }); | |
| console.log("http://localhost:3000"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment