目标读者:想学习如何写测试的开发者 文件:
tests/memory-gate.test.mjs
// 1. 导入依赖
import test from "node:test";
import assert from "node:assert/strict";
import { MemoryGateAnalyzer } from "../dist/memory-gate/analyzer.js";
import { MEMORY_GATE_SYSTEM_PROMPT } from "../dist/memory-gate/prompt.js";
// 2. 辅助函数
function createLogger() {
return {
debug() {}, info() {}, warn() {}, error() {},
};
}
// 3. 测试用例(按功能分组)
test("测试描述 1", async () => { ... });
test("测试描述 2", () => { ... });
// ...test("MemoryGateAnalyzer consumes structured object output from LLMService", async () => {
// ========== 1. 准备阶段(Arrange)==========
const calls = []; // 用于捕获 LLM 调用参数
// 创建一个"假"的 LLM 服务(Mock)
const llmService = {
async generateObject(params) {
calls.push(params); // 记录调用参数
// 返回预设的响应(不真的调用 LLM)
return {
decision: "UPDATE_USER",
reason: "stable collaboration preference",
candidate_fact: "prefers direct technical feedback",
};
},
};
// 创建被测试的对象
const analyzer = new MemoryGateAnalyzer(llmService, createLogger());
// ========== 2. 执行阶段(Act)==========
const result = await analyzer.analyze({
recentMessages: [
{
role: "user",
message: "I want direct feedback, not sugar coating.",
timestamp: Date.now() - 1000,
},
],
currentUserMessage: "Please be direct when reviewing my code.",
currentAgentReply: "I will keep feedback concise and direct.",
});
// ========== 3. 断言阶段(Assert)==========
// 断言 1:返回结果符合预期
assert.deepEqual(result, {
decision: "UPDATE_USER",
reason: "stable collaboration preference",
candidateFact: "prefers direct technical feedback", // 注意:下划线变驼峰
});
// 断言 2:LLM 被调用了一次
assert.equal(calls.length, 1);
// 断言 3:使用了正确的 system prompt
assert.match(
calls[0].systemPrompt,
/memory_gate/,
"expected analyzer to use the memory_gate system prompt"
);
});| 要点 | 说明 |
|---|---|
| Mock/Stub | 用假的 LLMService 代替真实的,测试更快更稳定 |
| Arrange-Act-Assert | 测试的三段式结构:准备→执行→验证 |
| 断言的粒度 | 不只验证返回值,还验证调用次数、参数内容 |
test("memory_gate system prompt includes explicit routing guidance for USER, MEMORY, SOUL, IDENTITY, and TOOLS", () => {
// 这个测试检查 prompt 中是否包含关键指令
// 目的是防止 prompt 修改时意外删除重要指导
// 负面检查:prompt 不应该硬编码特定助手名字
assert.doesNotMatch(MEMORY_GATE_SYSTEM_PROMPT, /Lia/);
// 正面检查:必须包含各类路由指导
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /UPDATE_USER[\s\S]*language/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /UPDATE_SOUL[\s\S]*behavioral principle/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /even if proposed by the user/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /general manner[\s\S]*UPDATE_SOUL/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /UPDATE_IDENTITY[\s\S]*name/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /UPDATE_TOOLS[\s\S]*local tool/i);
assert.match(MEMORY_GATE_SYSTEM_PROMPT, /ssh[\s\S]*alias|alias[\s\S]*ssh/i);
// ... 还有 20+ 个类似检查
});| 要点 | 说明 |
|---|---|
| 回归测试 | 确保代码修改不会破坏已有功能 |
| 正则匹配 | 用 [\s\S]* 匹配跨行内容,用 i 表示忽略大小写 |
| 测试的脆弱性 | 这类测试很脆弱,prompt 措辞一变就挂,要权衡使用 |
// 更好的方式:只测试关键结构,不测试具体措辞
test("memory_gate prompt includes required routing sections", () => {
const requiredSections = [
{ name: "USER routing", pattern: /UPDATE_USER/ },
{ name: "SOUL routing", pattern: /UPDATE_SOUL/ },
{ name: "IDENTITY routing", pattern: /UPDATE_IDENTITY/ },
{ name: "TOOLS routing", pattern: /UPDATE_TOOLS/ },
{ name: "NO_WRITE guidance", pattern: /NO_WRITE/ },
];
for (const section of requiredSections) {
assert.match(
MEMORY_GATE_SYSTEM_PROMPT,
section.pattern,
`Missing ${section.name} in prompt`
);
}
});test("MemoryGateAnalyzer accepts UPDATE_TOOLS responses from the LLM", async () => {
// 测试 LLM 返回 UPDATE_TOOLS 决策时的处理
const analyzer = new MemoryGateAnalyzer(
{
async generateObject() {
return {
decision: "UPDATE_TOOLS",
reason: "local ssh alias mapping",
candidate_fact: "home-server SSH alias refers to devbox.internal",
};
},
},
createLogger()
);
const result = await analyzer.analyze({
recentMessages: [],
currentUserMessage: "记一下 home-server 其实是 devbox.internal。", // 中文输入
currentAgentReply: "收到,以后 home-server 这个别名指向 devbox.internal。",
});
// 验证决策被正确传递
assert.deepEqual(result, {
decision: "UPDATE_TOOLS",
reason: "local ssh alias mapping",
candidateFact: "home-server SSH alias refers to devbox.internal",
});
});| 要点 | 说明 |
|---|---|
| 边界测试 | 测试每种可能的决策类型(UPDATE_USER, UPDATE_TOOLS, NO_WRITE 等) |
| 国际化 | 用中文输入测试系统是否能正确处理非英语场景 |
test("memory_gate prompt asks for a canonical concise English candidate fact", () => {
// 测试 buildPrompt 方法生成的 prompt 结构
const analyzer = new MemoryGateAnalyzer(
{
async generateObject() {
return { decision: "NO_WRITE", reason: "noop" };
},
},
createLogger()
);
const prompt = analyzer.buildPrompt({
recentMessages: [],
currentUserMessage: "之后默认说中文。",
currentAgentReply: "好,默认中文。",
});
// 验证 prompt 包含关键指令
assert.match(prompt, /canonical/i); // 要求规范化表达
assert.match(prompt, /english/i); // 要求英文
assert.match(prompt, /candidate_fact/i); // 要求输出候选事实
});| 要点 | 说明 |
|---|---|
| 测试私有方法 | 通过 analyzer.buildPrompt() 测试内部逻辑 |
| 内容检查 | 验证生成的 prompt 包含必要的指导词 |
F - Fast(快速) : 用 Mock 避免真实 LLM 调用
I - Independent(独立): 每个测试不依赖其他测试
R - Repeatable(可重复): 任何环境运行结果一致
S - Self-validating(自验证): 自动判断通过/失败
T - Timely(及时) : 与代码一起编写
/\
/ \
/ E2E \ <- 端到端测试(少量)
/--------\
/ Integration\ <- 集成测试(中等)
/--------------\
/ Unit Tests \ <- 单元测试(大量)<- 我们现在写的
/--------------------\
当前的 memory_gate 测试属于 单元测试 层。
// ❌ 差的命名
test("analyzer works", () => {});
// ✅ 好的命名:描述行为,不是实现
test("MemoryGateAnalyzer returns UPDATE_USER for stable preferences", () => {});
test("MemoryGateAnalyzer rejects temporary mood changes", () => {});假设师兄要添加一个 UPDATE_MEMORY 的特殊场景(失败经验记录):
// src/memory-gate/analyzer.ts 中的新逻辑
// 如果用户说 "之前用 X 方法失败了,不要用",
// 应该 UPDATE_MEMORY 记录这个失败经验test("MemoryGateAnalyzer records failed approaches as UPDATE_MEMORY", async () => {
const analyzer = new MemoryGateAnalyzer(
{
async generateObject() {
// 先让 LLM 返回期望的结果
return {
decision: "UPDATE_MEMORY",
reason: "failed approach should be remembered",
candidate_fact: "using Python threading for CPU-bound tasks failed",
};
},
},
createLogger()
);
const result = await analyzer.analyze({
recentMessages: [],
currentUserMessage: "之前用多线程处理计算任务卡死了,别推荐这个。",
currentAgentReply: "了解了,以后不推荐用多线程处理 CPU 密集型任务。",
});
assert.equal(result.decision, "UPDATE_MEMORY");
assert.match(result.candidateFact, /threading|failed/i);
});npm test如果是真的 LLM 调用,可能返回不同的决策,这时候需要:
- 调整 prompt
- 或者调整测试的预期
test("MemoryGateAnalyzer handles all decision types", async () => {
const testCases = [
{
name: "user preference",
input: { currentUserMessage: "I prefer dark mode" },
mockResponse: { decision: "UPDATE_USER", candidate_fact: "prefers dark mode" },
expected: { decision: "UPDATE_USER" },
},
{
name: "identity change",
input: { currentUserMessage: "Call me Bob" },
mockResponse: { decision: "UPDATE_IDENTITY", candidate_fact: "Name is Bob" },
expected: { decision: "UPDATE_IDENTITY" },
},
{
name: "small talk",
input: { currentUserMessage: "How are you?" },
mockResponse: { decision: "NO_WRITE", reason: "small talk" },
expected: { decision: "NO_WRITE" },
},
];
for (const tc of testCases) {
const analyzer = new MemoryGateAnalyzer(
{ async generateObject() { return tc.mockResponse; } },
createLogger()
);
const result = await analyzer.analyze({
recentMessages: [],
currentUserMessage: tc.input.currentUserMessage,
currentAgentReply: "OK",
});
assert.equal(result.decision, tc.expected.decision,
`Failed for case: ${tc.name}`);
}
});import { before, after } from "node:test";
let analyzer;
let calls;
before(() => {
// 每个测试前的准备
calls = [];
});
after(() => {
// 每个测试后的清理
calls = null;
});
test("test 1", async () => { ... });
test("test 2", async () => { ... });test("MemoryGateAnalyzer handles LLM errors gracefully", async () => {
const analyzer = new MemoryGateAnalyzer(
{
async generateObject() {
throw new Error("LLM provider timeout");
},
},
createLogger()
);
// 验证抛出异常
await assert.rejects(
async () => {
await analyzer.analyze({
recentMessages: [],
currentUserMessage: "test",
currentAgentReply: "test",
});
},
/LLM provider timeout/
);
});# 使用 Node.js 内置覆盖率
node --test --experimental-test-coverage tests/*.test.mjs输出示例:
ℹ ---------------------------------------
ℹ file | line %
ℹ ---------------------------------------
ℹ src/memory-gate/analyzer.ts | 94.23%
ℹ src/memory-gate/prompt.ts | 100.00%
ℹ ---------------------------------------
写一个新功能时,问自己:
- 正常路径:功能按预期工作吗?
- 边界情况:空输入、极大值、特殊字符?
- 错误处理:网络失败、LLM 返回格式错误?
- 所有分支:if/else 每个分支都测试了吗?
- 回归防护:改代码会不会破坏已有功能?
-
Node.js Test Runner 官方文档 https://nodejs.org/api/test.html
-
Testing JavaScript (Kent C. Dodds)
- 概念:Mock、Stub、Spy 的区别
-
单元测试准则 (GitHub - microsoft/code-with-engineering-playbook)
- 命名规范
- 结构组织
import test from "node:test";
import assert from "node:assert/strict";
import { ModuleUnderTest } from "../path/to/module.js";
// 辅助函数
function createMockDependencies() {
return {
logger: { debug() {}, info() {}, warn() {}, error() {} },
service: { async call() { return {}; } },
};
}
test("ModuleUnderTest does X when Y", async () => {
// Arrange
const deps = createMockDependencies();
const instance = new ModuleUnderTest(deps);
// Act
const result = await instance.method(input);
// Assert
assert.equal(result.property, expectedValue);
});文档版本: 1.0 作者: Lia for Sirocco