From 1119ae2843c7e841aeaf8b6f4dacb5f52d5358fc Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Tue, 14 Apr 2026 10:11:46 +0100 Subject: [PATCH 1/4] feat: add pre-flight discipline rule (karpathy principles) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encodes the four upstream-failure preventions Karpathy named: silent assumptions, overcomplicated diffs, drive-by edits, vague success criteria. Self-correction catches mistakes after the fact - this catches them before. Adds rules/pre-flight-discipline.mdc (alwaysApply: true) and a §1b cross-reference in SKILL.md so it sits next to self-correction in the narrative. No changes to existing rules or agents. Adapted from forrestchang/andrej-karpathy-skills (MIT) with light reframing to match pro-workflow's voice and structure. --- rules/pre-flight-discipline.mdc | 62 +++++++++++++++++++++++++++++++++ skills/pro-workflow/SKILL.md | 26 ++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 rules/pre-flight-discipline.mdc diff --git a/rules/pre-flight-discipline.mdc b/rules/pre-flight-discipline.mdc new file mode 100644 index 0000000..ee5ba6e --- /dev/null +++ b/rules/pre-flight-discipline.mdc @@ -0,0 +1,62 @@ +--- +description: Pre-flight discipline - prevent silent assumptions, scope creep, and drive-by edits before they happen +alwaysApply: true +--- + +Quality gates and self-correction catch mistakes after the fact. These rules prevent the upstream failures. + +## 1. Surface, don't assume + +- State assumptions explicitly. If uncertain, ask before coding. +- If the request has multiple interpretations, present them - never pick silently. +- If a simpler approach exists than what was asked, say so. +- If something is unclear, stop. Name what's confusing. Ask. + +## 2. Minimum viable code + +- No features beyond what was asked. +- No abstractions for single-use code. +- No "flexibility" or configurability that wasn't requested. +- No error handling for scenarios that cannot happen. +- If the diff is 200 lines and 50 would do, rewrite it. + +Senior-engineer test: would they call this overcomplicated? If yes, simplify before showing. + +## 3. Stay in your lane + +Every changed line must trace to the user's request. + +- Don't "improve" adjacent code, comments, or formatting. +- Don't refactor things that aren't broken. +- Match existing style even if you'd write it differently. +- Notice unrelated dead code? Mention it. Don't delete it. + +When your changes orphan something: +- Remove imports/symbols that *your* edit made unused. +- Leave pre-existing dead code alone unless asked. + +## 4. Verifiable goals over imperatives + +Convert tasks into verification loops: + +| Imperative | Verifiable goal | +|------------|-----------------| +| "Add validation" | "Write tests for invalid inputs, then make them pass" | +| "Fix the bug" | "Write a failing test that reproduces it, then make it pass" | +| "Refactor X" | "Tests pass before and after; behavior unchanged" | + +For multi-step work, plan as `step → verify`: + +``` +1. [step] → verify: [check] +2. [step] → verify: [check] +3. [step] → verify: [check] +``` + +Strong success criteria let the loop run independently. "Make it work" requires constant re-clarification. + +--- + +**Tradeoff:** These rules bias toward caution over speed. For trivial fixes (typos, one-liners, obvious renames), use judgment - not every change needs the full rigor. + +**Source:** Adapted from [Andrej Karpathy's observations](https://x.com/karpathy/status/2015883857489522876) on LLM coding pitfalls, via [forrestchang/andrej-karpathy-skills](https://github.com/forrestchang/andrej-karpathy-skills) (MIT). diff --git a/skills/pro-workflow/SKILL.md b/skills/pro-workflow/SKILL.md index 46d0601..fa9f333 100644 --- a/skills/pro-workflow/SKILL.md +++ b/skills/pro-workflow/SKILL.md @@ -62,6 +62,32 @@ Should I add this? --- +## 1b. Pre-Flight Discipline + +**Self-correction catches mistakes after the fact. This catches them before.** + +Karpathy's [observations on LLM coding pitfalls](https://x.com/karpathy/status/2015883857489522876) name the upstream failures: silent assumptions, overcomplicated diffs, drive-by edits, vague success criteria. Four rules prevent each one. + +| Rule | Prevents | +|------|----------| +| **Surface, don't assume** | Wrong interpretation, hidden confusion, missing tradeoffs | +| **Minimum viable code** | 200-line diffs that should be 50, speculative abstractions | +| **Stay in your lane** | Drive-by refactors, "improvements" to adjacent code | +| **Verifiable goals** | Endless re-clarification, "make it work" loops | + +Full rules in `rules/pre-flight-discipline.mdc` (`alwaysApply: true`). Pairs with self-correction: pre-flight stops the mistake, self-correction captures the lesson when one slips through. + +### Add to CLAUDE.md + +```markdown +## Pre-Flight Discipline +Before coding: state assumptions, present ambiguity, push back if simpler exists. +Every changed line traces to the request - no drive-by edits. +Convert imperatives to verifiable goals: "fix bug" → "failing test → make it pass". +``` + +--- + ## 2. Parallel Sessions with Worktrees **Zero dead time.** While one Claude thinks, work on something else. From 565eb199bd66ed604815e0f0704fd8777255a846 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Fri, 8 May 2026 12:15:25 +0100 Subject: [PATCH 2/4] feat(v3.3.0): wiki knowledge base + auto-research loop + multi-LLM council Persistent knowledge plane on top of self-correction memory. Skills (5 new): - wiki-builder: 9 flavors, FTS5 shadow index, --scope global|project - wiki-query: BM25 retrieval with snippet, related, show subcommands - wiki-research-loop: budget-capped BFS driver, pluggable source fetchers (web/arxiv/github + custom), convergence detection, kill-switch - llm-council: provider-agnostic 3-phase deliberation (Anthropic/OpenAI/OpenRouter/Fireworks/custom OpenAI-compat) - survey-generator: provider-agnostic literature survey, output to wiki page Scripts: - embed-wiki.js: optional embeddings via OpenAI/Voyage; hybrid BM25+vector+RRF - research-tick.js: cron-driven single-iteration runner - learn-capture: parses Wiki: for wiki-scoped learnings - prompt-submit: auto-injects top-3 wiki hits when prompt matches index - session-start: lists registered wikis on session boot - file-changed: edits inside wiki tree auto-enqueue verify seeds Schema: - wikis, wiki_pages (+FTS5), wiki_sources, wiki_claims, wiki_seeds, wiki_embeddings, learnings_wiki Commands: - /wiki unified entry: init, list, info, page, reindex, ask, related, show, seed, research, seeds, cancel, status, embed, hybrid, council, survey - /doctor extended with wiki KB + council provider sections Build: - schema.sql now copied into dist/ on build, eliminating silent inline-fallback hazard --- README.md | 26 +- commands/doctor.md | 16 + commands/wiki.md | 83 +++++ package.json | 6 +- scripts/embed-wiki.js | 122 ++++++ scripts/file-changed.js | 24 ++ scripts/learn-capture.js | 5 +- scripts/prompt-submit.js | 11 + scripts/research-tick.js | 70 ++++ scripts/session-start.js | 11 + skills/learn-rule/SKILL.md | 11 + skills/llm-council/SKILL.md | 86 +++++ skills/llm-council/scripts/council.js | 278 ++++++++++++++ skills/survey-generator/SKILL.md | 131 +++++++ .../survey-generator/scripts/build-survey.js | 205 ++++++++++ .../templates/research_bundle.template.json | 61 +++ skills/wiki-builder/SKILL.md | 105 ++++++ skills/wiki-builder/agents/openai.yaml | 5 + .../wiki-builder/references/wiki-flavors.md | 109 ++++++ skills/wiki-builder/scripts/init_wiki.sh | 85 +++++ skills/wiki-builder/scripts/wiki-cli.js | 224 +++++++++++ skills/wiki-builder/templates/index.md | 21 ++ .../wiki-builder/templates/maintenance-log.md | 5 + .../templates/prompts/compile-concept-page.md | 15 + .../templates/prompts/compile-index.md | 16 + .../templates/prompts/compile-source-page.md | 16 + .../templates/prompts/lint-wiki.md | 12 + .../templates/prompts/query-and-file.md | 11 + skills/wiki-builder/templates/sources.md | 8 + skills/wiki-builder/templates/wiki.config.md | 56 +++ skills/wiki-query/SKILL.md | 72 ++++ skills/wiki-query/scripts/query.js | 110 ++++++ skills/wiki-research-loop/SKILL.md | 144 +++++++ .../scripts/research-loop.js | 352 ++++++++++++++++++ .../scripts/source-fetchers/arxiv.js | 48 +++ .../scripts/source-fetchers/github.js | 48 +++ .../scripts/source-fetchers/web.js | 64 ++++ src/db/index.ts | 69 +--- src/db/schema.sql | 104 ++++++ src/db/store.ts | 250 ++++++++++++- src/search/embeddings.ts | 136 +++++++ 41 files changed, 3159 insertions(+), 72 deletions(-) create mode 100644 commands/wiki.md create mode 100755 scripts/embed-wiki.js create mode 100644 scripts/research-tick.js create mode 100644 skills/llm-council/SKILL.md create mode 100755 skills/llm-council/scripts/council.js create mode 100644 skills/survey-generator/SKILL.md create mode 100755 skills/survey-generator/scripts/build-survey.js create mode 100644 skills/survey-generator/templates/research_bundle.template.json create mode 100644 skills/wiki-builder/SKILL.md create mode 100644 skills/wiki-builder/agents/openai.yaml create mode 100644 skills/wiki-builder/references/wiki-flavors.md create mode 100755 skills/wiki-builder/scripts/init_wiki.sh create mode 100755 skills/wiki-builder/scripts/wiki-cli.js create mode 100644 skills/wiki-builder/templates/index.md create mode 100644 skills/wiki-builder/templates/maintenance-log.md create mode 100644 skills/wiki-builder/templates/prompts/compile-concept-page.md create mode 100644 skills/wiki-builder/templates/prompts/compile-index.md create mode 100644 skills/wiki-builder/templates/prompts/compile-source-page.md create mode 100644 skills/wiki-builder/templates/prompts/lint-wiki.md create mode 100644 skills/wiki-builder/templates/prompts/query-and-file.md create mode 100644 skills/wiki-builder/templates/sources.md create mode 100644 skills/wiki-builder/templates/wiki.config.md create mode 100644 skills/wiki-query/SKILL.md create mode 100755 skills/wiki-query/scripts/query.js create mode 100644 skills/wiki-research-loop/SKILL.md create mode 100755 skills/wiki-research-loop/scripts/research-loop.js create mode 100644 skills/wiki-research-loop/scripts/source-fetchers/arxiv.js create mode 100644 skills/wiki-research-loop/scripts/source-fetchers/github.js create mode 100644 skills/wiki-research-loop/scripts/source-fetchers/web.js create mode 100644 src/search/embeddings.ts diff --git a/README.md b/README.md index f20f945..a8ec2c1 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@

Your Claude Code gets smarter every session.

- Self-correcting memory that compounds over 50+ sessions. You correct Claude once — it never makes the same mistake again.
- 24 skills8 agents21 commands29 hook scripts across 24 events
+ Self-correcting memory that compounds over 50+ sessions. You correct Claude once — it never makes the same mistake again. Persistent research wikis indexed in FTS5 surface relevant prior work the moment you ask. Auto-research loop grows the knowledge base while you sleep.
+ 29 skills8 agents22 commands31 hook scripts across 24 events
Works with Claude Code, Cursor, and 32+ agents via SkillKit.

@@ -75,6 +75,20 @@ cd ~/.claude/plugins/*/pro-workflow && npm install && npm run build --- +## What's New in v3.3 + +Persistent knowledge plane on top of the self-correction memory. + +- **Wiki Builder** — Persistent research wikis on disk + SQLite FTS5 shadow index. 9 flavors: research, paper, domain, product, person, organization, project, codebase, incident. `/wiki init`, `/wiki page`, `/wiki reindex`. +- **Wiki Query** — BM25 retrieval across wiki pages. `/wiki ask ""` returns top-K with citations. `related` and `show` subcommands. Auto-loads top-3 hits on UserPromptSubmit when prompt mentions indexed topics. +- **Wiki-scoped learnings** — `[LEARN] ... Wiki: ` binds a rule to one wiki, no cross-project pollution. +- **Auto-research loop** — Budget-capped BFS driver. Pluggable source fetchers (web/arXiv/GitHub + custom). Convergence detection, kill-switch, depth caps. `/wiki seed`, `/wiki research`, `/wiki seeds`, `/wiki cancel`, `/wiki status`. +- **Hybrid retrieval (optional)** — sqlite-vec compatible embeddings via OpenAI or Voyage. RRF fusion of BM25 + vector. `/wiki embed`, `/wiki hybrid`. Degrades cleanly to BM25-only when no embedding key set. +- **LLM Council** — Provider-agnostic 3-phase deliberation (Anthropic/OpenAI/OpenRouter/Fireworks/custom OpenAI-compat). Persists transcript as a wiki page when `--wiki` is passed. `/wiki council`. +- **Survey Generator** — Provider-agnostic literature survey artifact. Output target = wiki markdown page (not standalone HTML). Bibliography rows append to `sources.md` deduped. `/wiki survey`. +- **Reactive triggers** — Edits inside a wiki tree auto-enqueue verify-seeds. Cron-tick script (`scripts/research-tick.js`) runs one iteration of the oldest opted-in wiki with pending seeds. +- **Schema additions** — `wikis`, `wiki_pages` (+ FTS5), `wiki_sources`, `wiki_claims`, `wiki_seeds`, `wiki_embeddings`, `learnings_wiki`. + ## What's New in v3.2 - **LLM Gates** — First plugin with `type: "prompt"` hooks for AI-powered commit validation and secret detection @@ -94,15 +108,19 @@ cd ~/.claude/plugins/*/pro-workflow && npm install && npm run build | Feature | Pro Workflow | [Superpowers](https://github.com/obra/superpowers) | [ECC](https://github.com/affaan-m/everything-claude-code) | [gstack](https://github.com/garrytan/gstack) | [GSD](https://github.com/gsd-build/get-shit-done) | |---------|:-----------:|:-----------:|:---:|:------:|:---:| | Self-correcting memory (SQLite + FTS5) | **Yes** | No | No | No | No | +| Persistent research wikis (FTS5-indexed) | **Yes** | No | No | No | No | +| Auto-research loop (budget-capped BFS) | **Yes** | No | No | No | No | +| Hybrid retrieval (BM25 + vector + RRF) | **Yes** | No | No | No | No | +| Multi-provider LLM council | **Yes** | No | No | No | No | | LLM-powered hooks (`type: "prompt"`) | **Yes** | No | No | No | No | | Permission denial analysis | **Yes** | No | No | No | No | | Compaction-aware state preservation | **Yes** | No | No | No | No | | Cost tracking and budget alerts | **Yes** | No | No | No | No | | MCP overhead auditing | **Yes** | No | No | No | No | | Cross-agent (32+ agents via SkillKit) | **Yes** | No | Some | No | No | -| Skills | 24 | 14 | 140+ | 18+ | 0 | +| Skills | 29 | 14 | 140+ | 18+ | 0 | | Agents | 8 | 5 | 36 | 0 | 18 | -| Commands | 21 | 3 | 60+ | 5+ | 57 | +| Commands | 22 | 3 | 60+ | 5+ | 57 | | Hook Events | 24 | 8 | 18 | 0 | 0 | --- diff --git a/commands/doctor.md b/commands/doctor.md index aa51298..98686f7 100644 --- a/commands/doctor.md +++ b/commands/doctor.md @@ -60,6 +60,22 @@ cat ~/.claude/settings.json 2>/dev/null | head -3 - Check for conflicting settings between project and user level - Verify permission rules are correct +### 8. Wiki Knowledge Base +```bash +node $PRO_WORKFLOW_ROOT/skills/wiki-builder/scripts/wiki-cli.js list 2>/dev/null +node $PRO_WORKFLOW_ROOT/skills/wiki-research-loop/scripts/research-loop.js status 2>/dev/null +ls -1 ~/.pro-workflow/STOP 2>/dev/null && echo "KILL SWITCH ACTIVE — no research runs" +tail -5 ~/.pro-workflow/tick.log 2>/dev/null +``` +- List of wikis · seed counts per status · kill-switch state · last cron-tick activity +- Embeddings: `OPENAI_API_KEY`/`VOYAGE_API_KEY` set? Hybrid search uses provider; otherwise BM25-only. + +### 9. Council Providers +```bash +node $PRO_WORKFLOW_ROOT/skills/llm-council/scripts/council.js providers 2>/dev/null +``` +- Shows which provider env vars are set (Anthropic/OpenAI/OpenRouter/Fireworks/custom). + ## Quick Fixes | Issue | Fix | diff --git a/commands/wiki.md b/commands/wiki.md new file mode 100644 index 0000000..fe4eef7 --- /dev/null +++ b/commands/wiki.md @@ -0,0 +1,83 @@ +# /wiki — Persistent Research Wikis + +Build, query, and maintain long-lived knowledge bases. Each wiki = markdown folder + SQLite FTS5 shadow index. Survives sessions, indexes auto-load on `SessionStart`. + +## Subcommands + +| Subcommand | Action | +|------------|--------| +| `/wiki init --title "X" [--flavor research] [--scope global\|project]` | Scaffold + register a new wiki | +| `/wiki list [--scope global\|project]` | List registered wikis | +| `/wiki info ` | Show wiki metadata + page count | +| `/wiki page [--title "X"] [--type concept\|paper\|...]` | Upsert a markdown page into the FTS index | +| `/wiki reindex ` | Walk `wiki/` and re-index all `*.md` | +| `/wiki ask "" [--wiki ] [--limit N]` | BM25 search over wiki pages | +| `/wiki related ` | Find adjacent pages by reusing title+summary as query | +| `/wiki show ` | Print a page from the index | +| `/wiki seed "" [--depth 0]` | Enqueue a research seed | +| `/wiki research [--max-pages 5] [--budget-usd 0.50] [--fetchers web,arxiv,github]` | Run the auto-research loop | +| `/wiki seeds [--status pending\|active\|done\|failed]` | List queued seeds | +| `/wiki cancel ` | Mark all pending/active seeds as failed | +| `/wiki status` | Cross-wiki seed counts + kill-switch state | +| `/wiki embed [] [--limit N]` | Compute embeddings for indexed pages (needs `OPENAI_API_KEY` or `VOYAGE_API_KEY`) | +| `/wiki hybrid "" [--wiki ]` | Hybrid retrieval: BM25 + vector + RRF | +| `/wiki council "" --wiki ` | Run llm-council; persist transcript as a wiki page | +| `/wiki survey --bundle --wiki ` | Generate literature survey from a research_bundle.json | + +## Routes to skills + +- `init / list / info / page / reindex` → `wiki-builder` +- `ask / related / show / hybrid` → `wiki-query` (+ `embed-wiki.js` for hybrid) +- `seed / research / seeds / cancel / status` → `wiki-research-loop` +- `council` → `llm-council` +- `survey` → `survey-generator` +- `embed` → `scripts/embed-wiki.js` + +## Storage + +- **Global** (default): `~/.pro-workflow/wikis//` +- **Project**: `/.claude/wikis//` via `--scope project` + +Both indexed in `~/.pro-workflow/data.db`. + +## Flavors + +`research`, `paper`, `domain`, `product`, `person`, `organization`, `project`, `codebase`, `incident`. See wiki-builder skill for layout per flavor. + +## Examples + +``` +/wiki init agent-memory --title "Agent Memory" --flavor research +/wiki page agent-memory wiki/concepts/episodic-memory.md --type concept +/wiki ask "what is episodic memory" --wiki agent-memory +/wiki related agent-memory wiki/concepts/episodic-memory.md +``` + +## Auto-research (Phase 3.3.1+) + +Loop is opt-in per-wiki via `wiki.config.md`: + +```yaml +auto_research: + enabled: true + max_pages_per_run: 5 + max_depth: 3 + budget_usd: 0.50 +``` + +Phase 3.3.0 ships the index + manual page workflow. Loop driver lands next. + +## Cited recall + +Every answer that uses a wiki hit must cite as: + +``` +[wiki:] — `<rel_path>` +``` + +No paraphrase without citation. + +## Related + +- `/learn` (and `learn-rule`) accepts `--wiki <slug>` to scope a learning rule to one wiki. +- SessionStart auto-loads top-3 wiki hits when prompt matches indexed content. diff --git a/package.json b/package.json index b5de6f9..4bf6ef7 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,11 @@ { "name": "pro-workflow", - "version": "3.1.0", - "description": "Complete AI coding workflow system with orchestration patterns, cross-agent support, reference guides, and searchable learnings", + "version": "3.3.0", + "description": "Complete AI coding workflow system with orchestration patterns, cross-agent support, reference guides, searchable learnings, and persistent research wikis", "main": "dist/index.js", "types": "dist/index.d.ts", "scripts": { - "build": "tsc", + "build": "tsc && cp src/db/schema.sql dist/db/schema.sql", "clean": "rm -rf dist", "prepublishOnly": "npm run build", "db:init": "node dist/db/index.js" diff --git a/scripts/embed-wiki.js b/scripts/embed-wiki.js new file mode 100755 index 0000000..08930d5 --- /dev/null +++ b/scripts/embed-wiki.js @@ -0,0 +1,122 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..'); + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) { console.error('build store first'); process.exit(1); } + return require(distPath).createStore(); +} + +function getEmbedHelpers() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'search', 'embeddings.js'); + if (!fs.existsSync(distPath)) { console.error('build embeddings first'); process.exit(1); } + return require(distPath); +} + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { const k = a.slice(2); const n = argv[i+1]; if (n && !n.startsWith('--')) { out[k] = n; i++; } else out[k] = true; } + else out._.push(a); + } + return out; +} + +async function cmdAll(args) { + const slug = args._[0]; + const helpers = getEmbedHelpers(); + const provider = helpers.getEmbeddingProvider(); + if (!provider) { + console.error('No embedding provider env set. OPENAI_API_KEY or VOYAGE_API_KEY required.'); + process.exit(2); + } + const store = getStore(); + try { + const pages = slug ? store.listWikiPages(slug) : store.db.prepare('SELECT * FROM wiki_pages').all(); + const limit = parseInt(args.limit, 10) || 200; + const todo = []; + for (const p of pages.slice(0, limit)) { + const has = store.db.prepare('SELECT 1 FROM wiki_embeddings WHERE page_id = ? AND model = ?').get(p.id, `${provider.name}:${provider.model}`); + if (has && !args.force) continue; + todo.push(p); + } + if (!todo.length) { console.log(JSON.stringify({ embedded: 0, provider: `${provider.name}:${provider.model}`, message: 'all up-to-date' })); return; } + console.error(`[embed] ${todo.length} pages → ${provider.name}:${provider.model}`); + + const batchSize = 16; + let done = 0; + for (let i = 0; i < todo.length; i += batchSize) { + const batch = todo.slice(i, i + batchSize); + const inputs = batch.map(p => `${p.title}\n\n${(p.content || '').slice(0, 8000)}`); + const vectors = await provider.embed(inputs); + for (let j = 0; j < batch.length; j++) { + helpers.upsertEmbedding(store.db, batch[j].id, provider, vectors[j]); + done++; + } + console.error(`[embed] ${done}/${todo.length}`); + } + console.log(JSON.stringify({ embedded: done, provider: `${provider.name}:${provider.model}` })); + } finally { store.close(); } +} + +async function cmdSearch(args) { + const query = args._[0]; + if (!query) { console.error('search: query required'); process.exit(1); } + const helpers = getEmbedHelpers(); + const provider = helpers.getEmbeddingProvider(); + if (!provider) { console.error('No embedding provider env'); process.exit(2); } + const store = getStore(); + try { + const [qv] = await provider.embed([query]); + const limit = parseInt(args.limit, 10) || 10; + + const vectorHits = helpers.vectorSearch(store.db, qv, { wikiSlug: args.wiki, limit }); + const bm25Hits = store.searchWiki(query, { wikiSlug: args.wiki, limit, loose: true }); + + if (args.mode === 'vector') { + console.log(JSON.stringify(vectorHits, null, 2)); + return; + } + if (args.mode === 'bm25') { + console.log(JSON.stringify(bm25Hits, null, 2)); + return; + } + + // hybrid via RRF + const fused = helpers.reciprocalRankFusion( + [vectorHits.map(v => ({ page_id: v.page_id })), bm25Hits.map(h => ({ page_id: h.page_id }))], + (x) => String(x.page_id), + ); + const byId = new Map(); + for (const h of bm25Hits) byId.set(h.page_id, h); + for (const v of vectorHits) if (!byId.has(v.page_id)) { + const row = store.db.prepare('SELECT id AS page_id, wiki_slug, rel_path, title, summary FROM wiki_pages WHERE id = ?').get(v.page_id); + if (row) byId.set(v.page_id, { ...row, rank: -v.similarity, snippet: '' }); + } + const out = fused.slice(0, limit).map(f => ({ ...byId.get(parseInt(f.key, 10)), rrf_score: f.score })); + console.log(JSON.stringify(out, null, 2)); + } finally { store.close(); } +} + +function usage() { + console.error(`Usage: + embed-wiki.js all [<slug>] [--limit 200] [--force] + embed-wiki.js search "<query>" [--wiki slug] [--limit 10] [--mode hybrid|vector|bm25]`); + process.exit(1); +} + +async function main() { + const [, , cmd, ...rest] = process.argv; + const args = parseArgs(rest); + switch (cmd) { + case 'all': await cmdAll(args); break; + case 'search': await cmdSearch(args); break; + default: usage(); + } +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/file-changed.js b/scripts/file-changed.js index e3ed041..0e9e26d 100755 --- a/scripts/file-changed.js +++ b/scripts/file-changed.js @@ -24,6 +24,30 @@ process.stdin.on('end', () => { const isImportant = importantPatterns.some(p => p.test(filePath)); + // Reactive wiki seed enqueue: edits inside a wiki/ tree spawn a verify seed. + const wikiMatch = filePath.match(/(?:^|\/)\.claude\/wikis\/([^/]+)\/wiki\/.+\.md$/) || + filePath.match(/(?:^|\/)\.pro-workflow\/wikis\/([^/]+)\/wiki\/.+\.md$/); + if (wikiMatch) { + try { + const path2 = require('path'); + const fs2 = require('fs'); + const distPath = path2.join(__dirname, '..', 'dist', 'db', 'store.js'); + if (fs2.existsSync(distPath)) { + const { createStore } = require(distPath); + const store = createStore(); + try { + const slug = wikiMatch[1]; + const w = store.getWiki(slug); + if (w) { + const rel = path2.relative(w.root_path, filePath); + store.enqueueSeed({ wiki_slug: slug, query: `verify edits in ${rel}`, depth: 0 }); + console.error(`[ProWorkflow] enqueued verify seed for ${slug}/${rel}`); + } + } finally { store.close(); } + } + } catch (e) { /* never break the hook */ } + } + if (isImportant) { console.error('[ProWorkflow] Important config file changed: ' + filePath); diff --git a/scripts/learn-capture.js b/scripts/learn-capture.js index 182edd6..8183402 100755 --- a/scripts/learn-capture.js +++ b/scripts/learn-capture.js @@ -25,7 +25,7 @@ async function main() { return; } - const regex = /\[LEARN\]\s*([\w][\w\s-]*?)\s*:\s*(.+?)(?:\nMistake:\s*(.+?))?(?:\nCorrection:\s*(.+?))?(?=\n\[LEARN\]|\n\n|$)/gim; + const regex = /\[LEARN\]\s*([\w][\w\s-]*?)\s*:\s*(.+?)(?:\nMistake:\s*(.+?))?(?:\nCorrection:\s*(.+?))?(?:\nWiki:\s*([A-Za-z0-9_-]+))?(?=\n\[LEARN\]|\n\n|$)/gim; let match; let store = null; @@ -40,13 +40,14 @@ async function main() { if (!store) break; const projectDir = process.env.CLAUDE_PROJECT_DIR || ''; + const wikiSlug = match[5]?.trim() || undefined; store.addLearning({ project: projectDir ? path.basename(projectDir) : null, category: match[1].trim(), rule: match[2].trim(), mistake: match[3]?.trim() || null, correction: match[4]?.trim() || null, - }); + }, wikiSlug); count++; } diff --git a/scripts/prompt-submit.js b/scripts/prompt-submit.js index 203f72f..e82c232 100644 --- a/scripts/prompt-submit.js +++ b/scripts/prompt-submit.js @@ -86,6 +86,17 @@ async function main() { store.updateSessionCounts(sessionId, 0, isCorrection ? 1 : 0, 1); sessionUpdated = true; } + + if (typeof store.searchWiki === 'function' && prompt.split(/\s+/).length >= 3) { + const hits = store.searchWiki(prompt, { limit: 3, loose: true }); + if (hits.length > 0) { + log(`[ProWorkflow] ${hits.length} relevant wiki page(s):`); + for (const h of hits) { + log(` - ${h.wiki_slug} · ${h.rel_path} — ${h.title}`); + } + log(' (use /wiki ask "<query>" --wiki <slug> for full retrieval)'); + } + } } catch (e) { // DB error, fall back to file-based } finally { diff --git a/scripts/research-tick.js b/scripts/research-tick.js new file mode 100644 index 0000000..3eae3ac --- /dev/null +++ b/scripts/research-tick.js @@ -0,0 +1,70 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { spawnSync } = require('child_process'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..'); +const STOP_FILE = path.join(os.homedir(), '.pro-workflow', 'STOP'); +const LOOP_SCRIPT = path.join(PRO_WORKFLOW_ROOT, 'skills', 'wiki-research-loop', 'scripts', 'research-loop.js'); +const TICK_LOG = path.join(os.homedir(), '.pro-workflow', 'tick.log'); + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) { console.error('build store first'); process.exit(1); } + return require(distPath).createStore(); +} + +function readWikiConfig(rootPath) { + const cfgPath = path.join(rootPath, 'wiki.config.md'); + if (!fs.existsSync(cfgPath)) return {}; + const raw = fs.readFileSync(cfgPath, 'utf8'); + const m = raw.match(/^---\s*\n([\s\S]*?)\n---/); + if (!m) return {}; + const obj = {}; let nested = null; + for (const line of m[1].split('\n')) { + if (!line.trim()) continue; + const indent = line.match(/^(\s*)/)[1].length; + const trimmed = line.trim(); + const kv = trimmed.match(/^([A-Za-z_]+):\s*(.*)$/); + if (!kv) continue; + const k = kv[1], v = kv[2]; + const parsed = v === '' ? {} : (v === 'true' ? true : v === 'false' ? false : (/^-?\d+(\.\d+)?$/.test(v) ? Number(v) : v)); + if (indent === 0) { obj[k] = parsed; nested = (typeof parsed === 'object') ? obj[k] : null; } + else if (nested) nested[k] = parsed; + } + return obj; +} + +function appendLog(line) { + fs.mkdirSync(path.dirname(TICK_LOG), { recursive: true }); + fs.appendFileSync(TICK_LOG, `[${new Date().toISOString()}] ${line}\n`); +} + +function tick() { + if (fs.existsSync(STOP_FILE)) { appendLog('skip: STOP file present'); return { skipped: 'stop' }; } + const store = getStore(); + let target = null; + try { + const wikis = store.listWikis(); + for (const w of wikis) { + const cfg = readWikiConfig(w.root_path); + const auto = cfg.auto_research || {}; + if (!auto.enabled) continue; + const pending = store.db.prepare(`SELECT COUNT(*) AS n FROM wiki_seeds WHERE wiki_slug = ? AND status = 'pending'`).get(w.slug); + if (!pending || !pending.n) continue; + target = w; break; + } + } finally { store.close(); } + + if (!target) { appendLog('skip: no opted-in wiki with pending seeds'); return { skipped: 'no-target' }; } + + appendLog(`tick: running ${target.slug}`); + const r = spawnSync('node', [LOOP_SCRIPT, 'run', target.slug, '--max-pages', '1'], { encoding: 'utf8' }); + appendLog(`tick: ${target.slug} exit=${r.status}`); + if (r.stderr) appendLog(`stderr: ${r.stderr.slice(0, 500)}`); + return { ran: target.slug, exit: r.status }; +} + +const result = tick(); +console.log(JSON.stringify(result)); diff --git a/scripts/session-start.js b/scripts/session-start.js index 72b3a1b..89462bf 100644 --- a/scripts/session-start.js +++ b/scripts/session-start.js @@ -66,6 +66,17 @@ async function main() { log(`[ProWorkflow] Previous session: ${lastSession.started_at.split('T')[0]} (${lastSession.edit_count} edits, ${lastSession.corrections_count} corrections)`); } } + + if (typeof store.listWikis === 'function') { + const wikis = store.listWikis(); + if (wikis.length > 0) { + log(`[ProWorkflow] ${wikis.length} wiki(s) available:`); + wikis.slice(0, 5).forEach(w => { + log(` - ${w.slug} (${w.flavor}, ${w.scope})`); + }); + if (wikis.length > 5) log(` ... and ${wikis.length - 5} more`); + } + } } catch (e) { log(`[ProWorkflow] DB error: ${e.message}`); } finally { diff --git a/skills/learn-rule/SKILL.md b/skills/learn-rule/SKILL.md index 3b85c1d..738af44 100644 --- a/skills/learn-rule/SKILL.md +++ b/skills/learn-rule/SKILL.md @@ -26,6 +26,17 @@ Mistake: What went wrong Correction: How it was fixed ``` +### Wiki-scoped rules + +Append `Wiki: <slug>` to bind the rule to a single pro-workflow wiki. The rule loads only when that wiki is in scope, avoiding cross-project pollution: + +``` +[LEARN] Editing: Cite a sources.md row before adding any wiki claim. +Wiki: agent-memory +``` + +The capture hook auto-detects `Wiki: <slug>` and links the learning to that wiki via `learnings_wiki`. + ## Categories | Category | Examples | diff --git a/skills/llm-council/SKILL.md b/skills/llm-council/SKILL.md new file mode 100644 index 0000000..387a7df --- /dev/null +++ b/skills/llm-council/SKILL.md @@ -0,0 +1,86 @@ +--- +name: llm-council +description: Provider-agnostic multi-LLM deliberation. Three phases — independent responses, cross-model anonymized ranking, chairman synthesis. Provider config from env (OPENAI/ANTHROPIC/FIREWORKS/OPENROUTER/custom OpenAI-compatible base URL). Persists transcript to a wiki page when --wiki <slug> is passed. Use when the user wants multiple AI perspectives, consensus-building, or the "LLM Council" approach for high-stakes reviews, plan critique, or contested learning rules. +allowed-tools: Read, Write, Bash, AskUserQuestion +--- + +# LLM Council + +Karpathy's LLM Council pattern, provider-agnostic. dair-academy's version hardcoded Fireworks; ours reads any OpenAI-compatible endpoint via env. + +## When to use + +- High-stakes plan review (`/plan` crosses N-file threshold) +- Conflicting learning-rules → re-resolve via vote +- User invokes `/council "<query>"` or `/wiki council` +- Architecture decisions where you want multiple viewpoints captured +- Persisting deliberation as a wiki page for future reference + +## Three phases + +1. **Independent**: each model answers in parallel +2. **Ranking**: each model ranks anonymized peer responses +3. **Synthesis**: chairman model reads all responses + rankings → final answer + +## Provider config + +Provider chosen via env. First-match wins: + +| Env var | Provider | Default base URL | +|---------|----------|------------------| +| `ANTHROPIC_API_KEY` | Anthropic | `https://api.anthropic.com` | +| `OPENAI_API_KEY` | OpenAI | `https://api.openai.com/v1` | +| `OPENROUTER_API_KEY` | OpenRouter | `https://openrouter.ai/api/v1` | +| `FIREWORKS_API_KEY` | Fireworks | `https://api.fireworks.ai/inference/v1` | +| `LLM_COUNCIL_BASE_URL` + `LLM_COUNCIL_API_KEY` | Custom OpenAI-compat | (user-supplied) | + +Override per-run with `--provider openai|anthropic|openrouter|fireworks|custom`. + +Default model rosters per provider live in `scripts/council.js` and can be overridden via `--models` CSV and `--chairman <id>`. + +## Commands + +``` +node $SKILL_ROOT/scripts/council.js run "<query>" [--models id1,id2,id3] [--chairman id] [--provider <name>] [--wiki <slug>] +node $SKILL_ROOT/scripts/council.js providers +node $SKILL_ROOT/scripts/council.js show <session-id> +``` + +`--wiki <slug>` writes the full transcript to `<wiki>/derived/council/<session-id>.md` and registers it via `wiki-cli.js page` so it shows in FTS5 search. + +## Output + +Each session writes: + +``` +~/.pro-workflow/council/<session-id>/ +├── config.json # query, models, chairman, provider +├── phase1_responses.json # raw API responses per model +├── phase2_rankings.json # anonymized ranking outputs +├── phase3_synthesis.txt # chairman's final answer +└── final_output.md # human-readable bundle +``` + +Console prints the markdown bundle. Pipe to `pbcopy` / `tee` as needed. + +## Hard rules + +1. Never skip the ranking phase. It's the core of the council pattern. +2. Save raw responses to disk verbatim. No summarization in storage. +3. Anonymize responses for ranking — models see `Response A/B/C/...`, not peer names. +4. The chairman sees both real names AND rankings. +5. Display all three phases to the user. No phase elision. + +## Cost awareness + +The script logs per-call latency + tokens on supported providers. Multiply by your provider rate to estimate. Council cost grows linearly with `len(models)^2` (each model ranks all others) plus the chairman. + +Default council size: 3-5 models. More models = exponentially more ranking calls. + +## Use with wiki + +``` +/wiki council agent-memory "should we adopt episodic memory in our agents?" +``` + +Loads `agent-memory` wiki context as system prompt prefix, runs council, persists transcript as `wiki/derived/council/<id>.md`. The transcript becomes searchable via `/wiki ask`. diff --git a/skills/llm-council/scripts/council.js b/skills/llm-council/scripts/council.js new file mode 100755 index 0000000..250f23d --- /dev/null +++ b/skills/llm-council/scripts/council.js @@ -0,0 +1,278 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const https = require('https'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..', '..', '..'); +const COUNCIL_ROOT = path.join(os.homedir(), '.pro-workflow', 'council'); + +const PROVIDERS = { + anthropic: { + envKey: 'ANTHROPIC_API_KEY', + baseUrl: 'https://api.anthropic.com', + defaultModels: ['claude-opus-4-7', 'claude-sonnet-4-6', 'claude-haiku-4-5-20251001'], + defaultChairman: 'claude-opus-4-7', + call: callAnthropic, + }, + openai: { + envKey: 'OPENAI_API_KEY', + baseUrl: 'https://api.openai.com/v1', + defaultModels: ['gpt-4o', 'gpt-4o-mini', 'o3-mini'], + defaultChairman: 'gpt-4o', + call: callOpenAICompat, + }, + openrouter: { + envKey: 'OPENROUTER_API_KEY', + baseUrl: 'https://openrouter.ai/api/v1', + defaultModels: ['anthropic/claude-opus-4', 'openai/gpt-4o', 'google/gemini-2.0-flash'], + defaultChairman: 'anthropic/claude-opus-4', + call: callOpenAICompat, + }, + fireworks: { + envKey: 'FIREWORKS_API_KEY', + baseUrl: 'https://api.fireworks.ai/inference/v1', + defaultModels: [ + 'accounts/fireworks/models/glm-5', + 'accounts/fireworks/models/deepseek-v3p2', + 'accounts/fireworks/models/kimi-k2p5', + ], + defaultChairman: 'accounts/fireworks/models/glm-5', + call: callOpenAICompat, + }, + custom: { + envKey: 'LLM_COUNCIL_API_KEY', + baseUrl: process.env.LLM_COUNCIL_BASE_URL || '', + defaultModels: (process.env.LLM_COUNCIL_MODELS || '').split(',').filter(Boolean), + defaultChairman: process.env.LLM_COUNCIL_CHAIRMAN || '', + call: callOpenAICompat, + }, +}; + +function pickProvider(arg) { + if (arg && PROVIDERS[arg]) return arg; + for (const [name, p] of Object.entries(PROVIDERS)) { + if (process.env[p.envKey]) return name; + } + return null; +} + +function postJSON(urlStr, body, headers) { + return new Promise((resolve, reject) => { + const url = new URL(urlStr); + const data = JSON.stringify(body); + const req = https.request({ + hostname: url.hostname, + path: url.pathname + url.search, + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(data), ...headers }, + }, res => { + let chunks = ''; + res.on('data', c => { chunks += c; }); + res.on('end', () => resolve({ status: res.statusCode, body: chunks })); + }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +async function callOpenAICompat(provider, model, system, user) { + const start = Date.now(); + const url = `${provider.baseUrl}/chat/completions`; + const res = await postJSON(url, { + model, + messages: [{ role: 'system', content: system }, { role: 'user', content: user }], + max_tokens: 4000, + temperature: 1, + }, { Authorization: `Bearer ${process.env[provider.envKey]}` }); + const elapsed = Date.now() - start; + if (res.status >= 400) return { success: false, content: `[ERROR ${res.status}: ${res.body.slice(0, 300)}]`, model, latency_ms: elapsed }; + let data; + try { data = JSON.parse(res.body); } catch (e) { return { success: false, content: `[parse-error]`, model, latency_ms: elapsed }; } + const content = data.choices?.[0]?.message?.content || ''; + return { success: true, content, model, latency_ms: elapsed, tokens: data.usage || {} }; +} + +async function callAnthropic(provider, model, system, user) { + const start = Date.now(); + const url = `${provider.baseUrl}/v1/messages`; + const res = await postJSON(url, { + model, + max_tokens: 4000, + system, + messages: [{ role: 'user', content: user }], + }, { + 'x-api-key': process.env[provider.envKey], + 'anthropic-version': '2023-06-01', + }); + const elapsed = Date.now() - start; + if (res.status >= 400) return { success: false, content: `[ERROR ${res.status}: ${res.body.slice(0, 300)}]`, model, latency_ms: elapsed }; + let data; + try { data = JSON.parse(res.body); } catch { return { success: false, content: '[parse-error]', model, latency_ms: elapsed }; } + const content = (data.content || []).map(b => b.text || '').join(''); + return { success: true, content, model, latency_ms: elapsed, tokens: data.usage || {} }; +} + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const next = argv[i + 1]; + if (next && !next.startsWith('--')) { out[key] = next; i++; } + else out[key] = true; + } else out._.push(a); + } + return out; +} + +function ts() { return new Date().toISOString().replace(/[:.]/g, '-'); } + +function persistToWiki(slug, sessionId, output) { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) return null; + const { createStore } = require(distPath); + const store = createStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) return null; + const relPath = path.join('derived', 'council', `${sessionId}.md`); + const fileAbs = path.join(wiki.root_path, relPath); + fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); + fs.writeFileSync(fileAbs, output); + store.upsertWikiPage({ + wiki_slug: slug, + rel_path: relPath, + title: `Council session ${sessionId}`, + summary: output.slice(0, 500), + content: output, + page_type: 'council', + content_hash: null, + }); + return fileAbs; + } finally { store.close(); } +} + +async function cmdRun(args) { + const query = args._[0]; + if (!query) { console.error('run: query required'); process.exit(1); } + const providerName = pickProvider(args.provider); + if (!providerName) { console.error('No provider env var set. Try ANTHROPIC_API_KEY or OPENAI_API_KEY.'); process.exit(2); } + const provider = PROVIDERS[providerName]; + if (!provider.baseUrl) { console.error(`provider ${providerName} requires LLM_COUNCIL_BASE_URL`); process.exit(2); } + + const models = (args.models ? String(args.models).split(',') : provider.defaultModels).filter(Boolean); + const chairman = args.chairman || provider.defaultChairman; + if (!models.length) { console.error('no models — pass --models'); process.exit(2); } + if (!chairman) { console.error('no chairman — pass --chairman'); process.exit(2); } + + const sessionId = ts(); + const sessionDir = path.join(COUNCIL_ROOT, sessionId); + fs.mkdirSync(sessionDir, { recursive: true }); + + fs.writeFileSync(path.join(sessionDir, 'config.json'), JSON.stringify({ query, models, chairman, provider: providerName }, null, 2)); + + // Phase 1 + const sysIndep = 'You are participating in an LLM council deliberation. Provide your best, most thoughtful response to the query. Be comprehensive but focused.'; + const phase1Entries = await Promise.all(models.map(m => provider.call(provider, m, sysIndep, query))); + const phase1 = Object.fromEntries(models.map((m, i) => [m, phase1Entries[i]])); + fs.writeFileSync(path.join(sessionDir, 'phase1_responses.json'), JSON.stringify(phase1, null, 2)); + + // Phase 2 + const labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G'].slice(0, models.length); + const labelOf = Object.fromEntries(models.map((m, i) => [m, labels[i]])); + const anon = models.map(m => `=== Response ${labelOf[m]} ===\n${phase1[m].content}`).join('\n\n'); + const sysRank = (own) => `You are ranking AI responses objectively. Your own response is labeled '${own}'.`; + const userRank = `QUERY:\n${query}\n\nRESPONSES:\n${anon}\n\nRank from BEST to WORST. Format:\nRANKINGS:\n1. [Letter] - [reason]\n2. [Letter] - [reason]\n...`; + const phase2Entries = await Promise.all(models.map(m => provider.call(provider, m, sysRank(labelOf[m]), userRank))); + const phase2 = { label_of: labelOf, rankings: Object.fromEntries(models.map((m, i) => [m, phase2Entries[i]])) }; + fs.writeFileSync(path.join(sessionDir, 'phase2_rankings.json'), JSON.stringify(phase2, null, 2)); + + // Phase 3 + const responsesText = models.map(m => `=== ${labelOf[m]}: ${m} ===\n${phase1[m].content}`).join('\n\n'); + const rankingsText = models.map(m => `[${m}'s Rankings]\n${phase2.rankings[m].content}`).join('\n\n'); + const sysSynth = 'You are the Chairman of an LLM Council. Synthesize multiple AI perspectives into a definitive, comprehensive response.'; + const userSynth = `ORIGINAL QUERY:\n${query}\n\nINDIVIDUAL RESPONSES:\n${responsesText}\n\nMODEL RANKINGS:\n${rankingsText}\n\nProduce the FINAL SYNTHESIS:`; + const synth = await provider.call(provider, chairman, sysSynth, userSynth); + fs.writeFileSync(path.join(sessionDir, 'phase3_synthesis.txt'), synth.content); + + // Render + const out = []; + out.push(`# LLM Council Deliberation`); + out.push(`Session: ${sessionId} · Provider: ${providerName}`); + out.push(''); + out.push(`**Query:** ${query}`); + out.push(`**Council:** ${models.join(', ')}`); + out.push(`**Chairman:** ${chairman}`); + out.push(''); + out.push('## Phase 1 — Individual responses'); + for (const m of models) { + out.push(`### [${labelOf[m]}] ${m} (${phase1[m].latency_ms}ms)`); + out.push(phase1[m].content); + out.push(''); + } + out.push('## Phase 2 — Cross-model rankings'); + for (const m of models) { + out.push(`### ${m}`); + out.push(phase2.rankings[m].content); + out.push(''); + } + out.push('## Phase 3 — Chairman synthesis'); + out.push(`### ${chairman}`); + out.push(synth.content); + + const md = out.join('\n'); + fs.writeFileSync(path.join(sessionDir, 'final_output.md'), md); + + if (args.wiki) { + const wikiPath = persistToWiki(args.wiki, sessionId, md); + if (wikiPath) console.error(`[council] persisted to ${wikiPath}`); + else console.error(`[council] wiki ${args.wiki} not found, skipping persist`); + } + + console.log(md); +} + +function cmdProviders() { + const rows = Object.entries(PROVIDERS).map(([name, p]) => ({ + name, + env_var: p.envKey, + has_key: !!process.env[p.envKey], + base_url: p.baseUrl || '(unset)', + default_models: p.defaultModels, + default_chairman: p.defaultChairman, + })); + console.log(JSON.stringify(rows, null, 2)); +} + +function cmdShow(args) { + const id = args._[0]; + if (!id) { console.error('show: session-id required'); process.exit(1); } + const dir = path.join(COUNCIL_ROOT, id); + const file = path.join(dir, 'final_output.md'); + if (!fs.existsSync(file)) { console.error('session not found'); process.exit(1); } + console.log(fs.readFileSync(file, 'utf8')); +} + +function usage() { + console.error(`Usage: + council.js run "<query>" [--models id1,id2,id3] [--chairman id] [--provider name] [--wiki slug] + council.js providers + council.js show <session-id>`); + process.exit(1); +} + +async function main() { + const [, , cmd, ...rest] = process.argv; + const args = parseArgs(rest); + switch (cmd) { + case 'run': await cmdRun(args); break; + case 'providers': cmdProviders(); break; + case 'show': cmdShow(args); break; + default: usage(); + } +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/skills/survey-generator/SKILL.md b/skills/survey-generator/SKILL.md new file mode 100644 index 0000000..1495d67 --- /dev/null +++ b/skills/survey-generator/SKILL.md @@ -0,0 +1,131 @@ +--- +name: survey-generator +description: Compile a structured literature survey on any AI/ML topic. Agent curates a research bundle (taxonomy + sections + bibliography of real papers) from a public anchor resource, then a chosen LLM generates the survey artifact. Output target is a wiki page (markdown), not a one-off HTML — survey lands in `<wiki>/derived/surveys/<slug>.md` with full bibliography rows in `sources.md`. Provider-agnostic (Anthropic/OpenAI/OpenRouter/Fireworks/custom OpenAI-compat). Use when the user asks for a "survey", "literature review", "lit review", or "deep dive" on a technical topic. +allowed-tools: Read, Write, Bash, WebFetch, AskUserQuestion +--- + +# Survey Generator + +Provider-agnostic literature-survey artifact generator. Output flows into a pro-workflow wiki, not a standalone HTML file — survives sessions and indexes for FTS5 retrieval. + +## Diff vs dair-academy version + +| dair | pro-workflow | +|------|--------------| +| Hardcoded Kimi K2.6 on Fireworks | Provider-agnostic (Anthropic/OpenAI/OpenRouter/Fireworks/custom) | +| Output = single-file HTML with inline SVG | Output = wiki markdown page + bibliography rows in `sources.md` | +| One-off artifact, no follow-up | Persists in FTS5 index; reused by `wiki-research-loop` | +| Manual run only | Composable with `/wiki research` for auto-bibliography expansion | + +## When to use + +- "Survey on <topic>" / "lit review on <topic>" +- Onboarding a new domain — generate the map-of-the-field +- After a wiki has 10-30 sources, compile a synthesis page over them +- Pre-step before `/wiki research` runs: gives the loop a high-quality seed bundle + +## Inputs + +| Input | Required | Description | +|-------|----------|-------------| +| `topic` | yes | "Reasoning Models", "Agentic Engineering" | +| `source_url` | yes | Public anchor: arXiv survey, GitHub awesome-list, canonical blog post | +| `--wiki <slug>` | yes | Target wiki for the artifact | +| `--bibliography-size N` | no | Default 20. 40-50 comprehensive, 80-100 exhaustive | +| `--section-count N` | no | Default 6-10 numbered sections | +| `--provider name` | no | Override provider (default: first env var found) | +| `--model id` | no | Override model | + +## Workflow (the agent runs these in order) + +### Step 1 — Read the anchor + +`WebFetch source_url`. Extract subtopics + cited papers. For GitHub awesome-lists, walk README + linked papers files. For arXiv survey PDFs, use abstract + ToC. + +### Step 2 — Build research_bundle.json + +Use `templates/research_bundle.template.json` as scaffold. Required keys: + +```json +{ + "topic": "...", + "anchor_source": "...", + "abstract_hints": ["..."], + "taxonomy": [{"branch": "...", "children": [{"name": "...", "description": "..."}]}], + "sections": [{"title": "...", "guidance": "...", "papers": ["key1","key2"]}], + "bibliography": [{"key": "author-year-shortname", "authors": "...", "year": 2024, "title": "...", "venue": "...", "summary": "..."}] +} +``` + +**Hard rules:** +- Every paper in `bibliography` must be real. No invented entries. +- Every `key` referenced in `sections[].papers` must exist in `bibliography`. +- 4-8 taxonomy branches, 2-4 children each. +- 6-10 numbered sections covering: introduction → foundations → methods → evaluation → open problems. + +### Step 3 — Run the generator + +```bash +node $SKILL_ROOT/scripts/build-survey.js \ + --bundle <path-to-research_bundle.json> \ + --wiki <slug> \ + [--provider anthropic|openai|openrouter|fireworks|custom] \ + [--model <id>] +``` + +Generator: +1. Reads bundle. +2. Sends to LLM with strict markdown spec (numbered sections, inline `[^paper-key]` citations, no HTML). +3. Writes output to `<wiki>/derived/surveys/<topic-slug>.md`. +4. Appends bibliography rows to `<wiki>/sources.md` (deduped by key). +5. Calls `wiki-cli.js page` to upsert into FTS5 index. + +### Step 4 — Iterate + +If prose is thin: tighten `sections[].guidance` and rerun. Output filename versions automatically (`<slug>-v2.md`, `<slug>-v3.md`). + +To compare providers: + +```bash +node build-survey.js --bundle bundle.json --wiki agent-memory --provider openai --model gpt-4o +node build-survey.js --bundle bundle.json --wiki agent-memory --provider anthropic --model claude-opus-4-7 +``` + +Each writes a separate versioned file; diff them. + +## Output structure + +``` +<wiki-root>/ +├── sources.md # bibliography rows appended (deduped) +└── derived/surveys/ + └── <topic-slug>-v1.md # the survey + # title (h1) + # ## 1. Introduction + # ## 2. Foundations + # ... + # ## References + # [^paper-key] author year. title. venue. +``` + +## Hard rules + +1. Never invent bibliography entries — every paper must be a real work with venue. +2. Every section's `papers` array references keys in `bibliography`. +3. Output is markdown ONLY. No HTML, no inline SVG, no JS. +4. Bibliography rows in `sources.md` use the same `[^src-NNN]` format as the rest of the wiki. +5. Iterate on inputs (`research_bundle.json`), not on the generated output. +6. Provider+model selection is the user's call — never hardcode. + +## Composing with research loop + +``` +/wiki init reasoning-models --title "Reasoning Models" --flavor research +# Manually compile a research_bundle.json +node skills/survey-generator/scripts/build-survey.js --bundle bundle.json --wiki reasoning-models +# Now the wiki has a structured survey + 50 bibliography rows +# Enable auto-research to expand: +# (edit reasoning-models/wiki.config.md, set auto_research.enabled: true) +node skills/wiki-research-loop/scripts/research-loop.js seed reasoning-models "chain-of-thought failure modes" --depth 0 +node skills/wiki-research-loop/scripts/research-loop.js run reasoning-models +``` diff --git a/skills/survey-generator/scripts/build-survey.js b/skills/survey-generator/scripts/build-survey.js new file mode 100755 index 0000000..1ec554a --- /dev/null +++ b/skills/survey-generator/scripts/build-survey.js @@ -0,0 +1,205 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); +const https = require('https'); +const { execFileSync } = require('child_process'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..', '..', '..'); +const COUNCIL = path.join(PRO_WORKFLOW_ROOT, 'skills', 'llm-council', 'scripts', 'council.js'); + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const next = argv[i + 1]; + if (next && !next.startsWith('--')) { out[key] = next; i++; } + else out[key] = true; + } else out._.push(a); + } + return out; +} + +function die(msg) { console.error(`[survey] ${msg}`); process.exit(1); } + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) die(`built store missing at ${distPath}. Run npm run build`); + return require(distPath).createStore(); +} + +function postJSON(urlStr, body, headers) { + return new Promise((resolve, reject) => { + const url = new URL(urlStr); + const data = JSON.stringify(body); + const req = https.request({ + hostname: url.hostname, + path: url.pathname + url.search, + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(data), ...headers }, + }, res => { + let chunks = ''; + res.on('data', c => { chunks += c; }); + res.on('end', () => resolve({ status: res.statusCode, body: chunks })); + }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +const PROVIDER_DEFAULTS = { + anthropic: { envKey: 'ANTHROPIC_API_KEY', baseUrl: 'https://api.anthropic.com', model: 'claude-opus-4-7' }, + openai: { envKey: 'OPENAI_API_KEY', baseUrl: 'https://api.openai.com/v1', model: 'gpt-4o' }, + openrouter: { envKey: 'OPENROUTER_API_KEY', baseUrl: 'https://openrouter.ai/api/v1', model: 'anthropic/claude-opus-4' }, + fireworks: { envKey: 'FIREWORKS_API_KEY', baseUrl: 'https://api.fireworks.ai/inference/v1', model: 'accounts/fireworks/models/kimi-k2p5' }, + custom: { envKey: 'LLM_COUNCIL_API_KEY', baseUrl: process.env.LLM_COUNCIL_BASE_URL || '', model: process.env.LLM_COUNCIL_CHAIRMAN || '' }, +}; + +function pickProvider(arg) { + if (arg && PROVIDER_DEFAULTS[arg]) return arg; + for (const [name, p] of Object.entries(PROVIDER_DEFAULTS)) if (process.env[p.envKey]) return name; + return null; +} + +async function callProvider(providerName, model, system, user, maxTokens) { + const p = PROVIDER_DEFAULTS[providerName]; + if (!process.env[p.envKey]) die(`${p.envKey} not set`); + if (providerName === 'anthropic') { + const res = await postJSON(`${p.baseUrl}/v1/messages`, { + model, max_tokens: maxTokens, system, messages: [{ role: 'user', content: user }], + }, { 'x-api-key': process.env[p.envKey], 'anthropic-version': '2023-06-01' }); + if (res.status >= 400) die(`anthropic error ${res.status}: ${res.body.slice(0, 300)}`); + const data = JSON.parse(res.body); + return (data.content || []).map(b => b.text || '').join(''); + } + const res = await postJSON(`${p.baseUrl}/chat/completions`, { + model, max_tokens: maxTokens, temperature: 0.7, + messages: [{ role: 'system', content: system }, { role: 'user', content: user }], + }, { Authorization: `Bearer ${process.env[p.envKey]}` }); + if (res.status >= 400) die(`${providerName} error ${res.status}: ${res.body.slice(0, 300)}`); + const data = JSON.parse(res.body); + return data.choices?.[0]?.message?.content || ''; +} + +function slugify(s) { return s.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '').slice(0, 60); } + +function appendBibliographyToSources(wikiRoot, bibliography) { + const file = path.join(wikiRoot, 'sources.md'); + let existing = ''; + if (fs.existsSync(file)) existing = fs.readFileSync(file, 'utf8'); + const seenKeys = new Set(); + for (const m of existing.matchAll(/\| (src-bib-[a-z0-9-]+) \|/g)) seenKeys.add(m[1]); + + const newRows = []; + for (const b of bibliography) { + const id = `src-bib-${slugify(b.key)}`; + if (seenKeys.has(id)) continue; + const url = b.url || (b.venue && b.venue.startsWith('arXiv:') ? `https://arxiv.org/abs/${b.venue.slice(6)}` : ''); + newRows.push(`| ${id} | paper | ${url} | ${b.title.replace(/\|/g, '\\|')} | ${b.key} | ${new Date().toISOString().slice(0, 10)} |`); + } + if (!newRows.length) return 0; + if (!existing.includes('| id | type |')) { + fs.writeFileSync(file, existing + (existing.endsWith('\n') ? '' : '\n') + newRows.join('\n') + '\n'); + } else { + fs.writeFileSync(file, existing.trimEnd() + '\n' + newRows.join('\n') + '\n'); + } + return newRows.length; +} + +function nextVersion(dir, baseSlug) { + if (!fs.existsSync(dir)) return 1; + const re = new RegExp(`^${baseSlug}-v(\\d+)\\.md$`); + let max = 0; + for (const f of fs.readdirSync(dir)) { + const m = f.match(re); + if (m) max = Math.max(max, parseInt(m[1], 10)); + } + return max + 1; +} + +function buildPrompt(bundle) { + return `Compile a literature survey on the topic "${bundle.topic}" using ONLY the bibliography provided. + +Output strict markdown: +- H1 = topic title +- Numbered H2 sections following the provided sections list +- Inline citations as [^${'<paper-key>'}] referencing entries from the bibliography +- A "## References" section at the end listing every cited [^key] with: key, authors, year, title, venue, one-sentence summary +- No HTML, no SVG, no inline images +- ~600-1200 words per section, scaled by bibliography size +- For each section, weave together the papers in section[].papers; do not just list them + +Bibliography (USE THESE KEYS EXACTLY): +${JSON.stringify(bundle.bibliography, null, 2)} + +Sections to produce in order: +${JSON.stringify(bundle.sections, null, 2)} + +Anchor (context only, do not cite): +${bundle.anchor_source || ''} + +Hard rules: +- Cite real papers from the bibliography only. Do not invent. +- Every section that lists papers MUST cite each one at least once. +- Use [^paper-key] for inline citations. The References section reuses these keys. +- Do not write any prose under the H1; start sections immediately.`; +} + +async function cmdRun(args) { + const bundlePath = args.bundle; + const slug = args.wiki; + if (!bundlePath || !slug) die('usage: build-survey.js --bundle <path> --wiki <slug> [--provider name] [--model id]'); + if (!fs.existsSync(bundlePath)) die(`bundle not found: ${bundlePath}`); + + const bundle = JSON.parse(fs.readFileSync(bundlePath, 'utf8')); + if (!bundle.topic || !Array.isArray(bundle.bibliography)) die('bundle missing topic or bibliography[]'); + + const providerName = pickProvider(args.provider); + if (!providerName) die('no provider env var set'); + const model = args.model || PROVIDER_DEFAULTS[providerName].model; + if (!model) die('no model — pass --model'); + + const store = getStore(); + let wiki; + try { wiki = store.getWiki(slug); } finally { store.close(); } + if (!wiki) die(`unknown wiki: ${slug}`); + + console.error(`[survey] generating with ${providerName}:${model} for wiki ${slug}`); + const md = await callProvider(providerName, model, 'You are a careful technical-writing assistant generating a literature survey.', buildPrompt(bundle), 16000); + + const surveysDir = path.join(wiki.root_path, 'derived', 'surveys'); + fs.mkdirSync(surveysDir, { recursive: true }); + const baseSlug = slugify(bundle.topic); + const v = nextVersion(surveysDir, baseSlug); + const fileName = `${baseSlug}-v${v}.md`; + const fileAbs = path.join(surveysDir, fileName); + fs.writeFileSync(fileAbs, md); + + const added = appendBibliographyToSources(wiki.root_path, bundle.bibliography); + console.error(`[survey] wrote ${fileAbs}`); + console.error(`[survey] appended ${added} new bibliography rows to sources.md`); + + // Index via wiki-cli + const wikiCli = path.join(PRO_WORKFLOW_ROOT, 'skills', 'wiki-builder', 'scripts', 'wiki-cli.js'); + const relPath = path.relative(wiki.root_path, fileAbs); + try { + execFileSync('node', [wikiCli, 'page', slug, relPath, '--type', 'survey'], { stdio: 'inherit' }); + } catch (e) { + console.error('[survey] wiki-cli page failed:', e.message); + } + console.log(JSON.stringify({ slug, file: fileAbs, version: v, bibliography_added: added }, null, 2)); +} + +async function main() { + const [, , ...rest] = process.argv; + const args = parseArgs(rest); + if (rest.length === 0 || args.help) { + console.error('Usage: build-survey.js --bundle <path> --wiki <slug> [--provider anthropic|openai|openrouter|fireworks|custom] [--model id]'); + process.exit(1); + } + await cmdRun(args); +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/skills/survey-generator/templates/research_bundle.template.json b/skills/survey-generator/templates/research_bundle.template.json new file mode 100644 index 0000000..c9ce98d --- /dev/null +++ b/skills/survey-generator/templates/research_bundle.template.json @@ -0,0 +1,61 @@ +{ + "topic": "<concise survey topic, e.g. 'Reasoning Models'>", + "anchor_source": "<URL of the public anchor: arXiv survey, awesome-list, canonical blog post>", + "abstract_hints": [ + "<one bullet on the core motivation>", + "<one bullet on the key contributions to highlight>", + "<one bullet on the open questions to surface in the conclusion>" + ], + "taxonomy": [ + { + "branch": "<top-level category 1>", + "description": "<one sentence>", + "children": [ + {"name": "<sub-area>", "description": "<one sentence>"}, + {"name": "<sub-area>", "description": "<one sentence>"} + ] + } + ], + "sections": [ + { + "n": 1, + "title": "Introduction", + "guidance": "Frame the topic, state why it matters now, preview the taxonomy.", + "papers": [] + }, + { + "n": 2, + "title": "Foundations", + "guidance": "Cover the prerequisite concepts and earliest-cited papers.", + "papers": ["author1-year-key", "author2-year-key"] + }, + { + "n": 3, + "title": "Methods", + "guidance": "Group method papers by taxonomy branch.", + "papers": [] + }, + { + "n": 4, + "title": "Evaluation", + "guidance": "Benchmarks, evaluation protocols, contested measurements.", + "papers": [] + }, + { + "n": 5, + "title": "Open Problems", + "guidance": "What remains unsolved, what the field disagrees on.", + "papers": [] + } + ], + "bibliography": [ + { + "key": "author1-year-shortname", + "authors": "Last, F., Other, A.", + "year": 2024, + "title": "<paper title>", + "venue": "<conference/journal/arXiv:NNNN.NNNNN>", + "summary": "<one to two sentence summary of contribution>" + } + ] +} diff --git a/skills/wiki-builder/SKILL.md b/skills/wiki-builder/SKILL.md new file mode 100644 index 0000000..e0c0c42 --- /dev/null +++ b/skills/wiki-builder/SKILL.md @@ -0,0 +1,105 @@ +--- +name: wiki-builder +description: Start, structure, and grow a persistent research wiki indexed in pro-workflow's SQLite knowledge base. Each wiki is a folder of markdown pages with provenance, plus a shadow FTS5 index so any session can recall it. Use when the user says "start a wiki", "add to wiki", "compile a page", "wiki on X", or wants a long-lived knowledge base on a topic, paper, product, person, project, or codebase. +--- + +# Wiki Builder + +Persistent knowledge base for any topic. Markdown on disk + SQLite FTS5 shadow index. + +## When to use + +- "Start a wiki on <topic>" +- "Add this paper / link / note to the <slug> wiki" +- "Compile a concept page on X in <slug>" +- "What does the <slug> wiki say about Y?" (delegates to wiki-query) +- "List my wikis" + +## Locations + +- **Global**: `~/.pro-workflow/wikis/<slug>/` — default, never committed +- **Project**: `<project>/.claude/wikis/<slug>/` — pass `--scope project`, committable + +Both register in the same `~/.pro-workflow/data.db`. + +## Flavors + +| Flavor | Use for | +|--------|---------| +| `research` | ongoing topic exploration | +| `paper` | one-paper deep dive | +| `domain` | broad subject area | +| `product` | product/tool KB | +| `person` | researcher/founder dossier | +| `organization` | company/lab profile | +| `project` | internal project KB | +| `codebase` | symbol/file-aware KB tied to a repo | +| `incident` | post-mortem KB | + +## Layout + +``` +<slug>/ +├── wiki.config.md # purpose, audience, page types, style, auto_research block +├── raw/ # untouched source material (PDFs, scrapes, transcripts) +├── wiki/ +│ └── index.md # entry point, hand-curated TOC +├── derived/ # generated artifacts (surveys, charts, summaries) +├── prompts/ # per-task prompts (compile-page, lint, query) +├── logs/maintenance-log.md +└── sources.md # one row per source: id | url | title | hash | fetched_at +``` + +Flavor adds folders: `wiki/papers`, `wiki/concepts`, `wiki/people`, `wiki/products`, `wiki/timelines`, `wiki/questions`. + +## CLI surface + +``` +node $SKILL_ROOT/scripts/wiki-cli.js init <slug> --title "X" --flavor research [--scope project] [--root <path>] +node $SKILL_ROOT/scripts/wiki-cli.js list +node $SKILL_ROOT/scripts/wiki-cli.js page <slug> <rel-path> --title "X" [--type concept|paper|person|...] [--from-file path] +node $SKILL_ROOT/scripts/wiki-cli.js reindex <slug> +node $SKILL_ROOT/scripts/wiki-cli.js info <slug> +``` + +`init` runs `init_wiki.sh` (mirrors dair layout) AND registers the wiki in SQLite. `page` writes markdown + upserts FTS row. + +## Workflow when invoked + +1. Resolve action (init / ingest / compile / list / reindex / info). +2. Read `wiki.config.md` of the target wiki before any compile. +3. Every claim that lands in `wiki/` must cite a row in `sources.md` (one citation = one source row). +4. After page write, call `wiki-cli.js page` so FTS index stays in sync. +5. Append a one-line entry to `logs/maintenance-log.md` per change. +6. Update `wiki/index.md` if new top-level page. + +## Quality bar + +- First page useful immediately, not stub. +- Stable slug filenames (`tool-use-benchmarks.md`, not `2026-05-08-notes.md`). +- Separate raw source from compiled interpretation. +- Cross-link related pages in same wiki via relative links. +- Mark speculation with `> SPECULATION:` block. +- No duplicate summaries — link existing page instead. +- Generated pages stay navigable for future agents. + +## Privacy + +Wikis with `private: true` in config never get fetched from web sources by `wiki-research-loop`. Local raw/ only. + +## Auto-research opt-in + +Phase 3.3.0 ships builder + query only. Loop arrives in 3.3.1. To prep, `wiki.config.md` may include: + +```yaml +auto_research: + enabled: false # flip in 3.3.1 + max_pages_per_run: 5 + max_depth: 3 + budget_usd: 0.50 + fetchers: [web, arxiv, github] +``` + +## Templates + +See `templates/` for `wiki.config.md`, `index.md`, prompt files. `init_wiki.sh` copies these into the new wiki root. diff --git a/skills/wiki-builder/agents/openai.yaml b/skills/wiki-builder/agents/openai.yaml new file mode 100644 index 0000000..2614919 --- /dev/null +++ b/skills/wiki-builder/agents/openai.yaml @@ -0,0 +1,5 @@ +interface: + display_name: "Wiki Builder" + short_description: "Persistent research wikis indexed in pro-workflow's SQLite FTS5 store" + brand_color: "#1e1e2e" + default_prompt: "Use $wiki-builder to start a new persistent research wiki. Pages auto-index for FTS5 retrieval." diff --git a/skills/wiki-builder/references/wiki-flavors.md b/skills/wiki-builder/references/wiki-flavors.md new file mode 100644 index 0000000..f3e28c9 --- /dev/null +++ b/skills/wiki-builder/references/wiki-flavors.md @@ -0,0 +1,109 @@ +# Wiki Flavors + +Starting points only. Each wiki's `wiki.config.md` overrides any structure listed here. + +## Research + +Ongoing topic with many source types. + +- `wiki/index.md` — overview + navigation +- `wiki/maps/research-map.md` — conceptual map +- `wiki/concepts/<concept>.md` — durable ideas +- `wiki/sources/<source>.md` — important source writeups +- `wiki/questions/<question>.md` — open investigations +- `derived/briefs/` — synthesis memos + +## Paper + +A paper, paper cluster, or literature review. + +- `wiki/index.md` — paper-set overview +- `wiki/papers/<paper-slug>.md` — individual papers +- `wiki/concepts/<concept>.md` — reusable technical ideas +- `wiki/comparisons/<topic>.md` — cross-paper comparisons +- `wiki/questions/<question>.md` — research gaps + +Paper pages cover: problem, method, results, limitations, implementation notes, related papers. + +## Domain + +Tracking an entire field. + +- `wiki/index.md` — high-level map +- `wiki/landscape.md` — actors, concepts, tools, debates +- `wiki/timelines/<topic>.md` — historical development +- `wiki/glossary.md` — terms +- `wiki/questions/<question>.md` — active uncertainties + +## Product + +Products, tools, APIs, platforms. + +- `wiki/index.md` — product summary +- `wiki/features/<feature>.md` — feature pages +- `wiki/use-cases/<use-case>.md` — applied workflows +- `wiki/competitors/<competitor>.md` — alternatives +- `wiki/questions/<question>.md` — evaluation gaps + +Distinguish documented behavior, observed behavior, pricing/availability, limitations, integration notes. + +## Person + +Researcher, founder, writer, public expert. + +- `wiki/index.md` — profile + navigation +- `wiki/work/<work-slug>.md` — papers, talks, posts, projects, artifacts +- `wiki/themes/<theme>.md` — recurring ideas +- `wiki/timeline.md` — dated milestones +- `wiki/questions/<question>.md` — unresolved context + +Source-grounded language only. No unsupported biographical claims. + +## Organization + +Labs, companies, communities, institutions. + +- `wiki/index.md` — overview +- `wiki/projects/<project>.md` — important initiatives +- `wiki/people/<person>.md` — relevant people +- `wiki/timeline.md` — milestones +- `wiki/strategy.md` — source-grounded strategic analysis + +Separate facts from interpretation, especially for strategy. + +## Project + +Internal build, research initiative, course, content project. + +- `wiki/index.md` — status + navigation +- `wiki/decisions/<decision>.md` — important choices +- `wiki/specs/<spec>.md` — requirements +- `wiki/notes/<note>.md` — working notes +- `derived/briefs/` — summaries + handoffs + +Project wikis make current state obvious to the next agent. + +## Codebase (pro-workflow extension) + +Symbol/file-aware KB tied to a repo. + +- `wiki/index.md` — module map +- `wiki/modules/<module>.md` — per-module deep dive +- `wiki/symbols/<symbol>.md` — high-traffic types/functions +- `wiki/decisions/<decision>.md` — ADR-style entries +- `wiki/runbooks/<flow>.md` — operational sequences +- `wiki/questions/<question>.md` — open architecture questions + +Pages link to file paths; reindex on git pull. Pair with `--scope project` for committable form. + +## Incident (pro-workflow extension) + +Post-mortem KB. + +- `wiki/index.md` — incident roster +- `wiki/timeline/<incident>.md` — minute-by-minute +- `wiki/signals/<signal>.md` — early-warning patterns +- `wiki/fixes/<fix>.md` — applied remedies +- `wiki/questions/<question>.md` — what we still don't know + +Pin status (`active | resolved | recurring`). Cross-link to runbooks if a `codebase` wiki exists. diff --git a/skills/wiki-builder/scripts/init_wiki.sh b/skills/wiki-builder/scripts/init_wiki.sh new file mode 100755 index 0000000..94e6794 --- /dev/null +++ b/skills/wiki-builder/scripts/init_wiki.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +set -euo pipefail + +slug="" +title="" +flavor="research" +scope="global" +root="" + +usage() { + cat <<EOF +Usage: init_wiki.sh <slug> --title "<title>" [--flavor <flavor>] [--scope global|project] [--root <path>] + +Flavors: research paper domain product person organization project codebase incident +EOF +} + +if [ $# -lt 1 ]; then usage; exit 1; fi +slug="$1"; shift + +while [ $# -gt 0 ]; do + case "$1" in + --title) title="$2"; shift 2 ;; + --flavor) flavor="$2"; shift 2 ;; + --scope) scope="$2"; shift 2 ;; + --root) root="$2"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown arg: $1" >&2; usage; exit 1 ;; + esac +done + +if [ -z "$title" ]; then echo "--title required" >&2; exit 1; fi + +if [ -z "$root" ]; then + if [ "$scope" = "project" ]; then + project_dir="${CLAUDE_PROJECT_DIR:-$PWD}" + root="$project_dir/.claude/wikis" + else + root="${WIKI_ROOT:-$HOME/.pro-workflow/wikis}" + fi +fi + +dest="$root/$slug" +if [ -d "$dest" ]; then + echo "Wiki already exists: $dest" >&2 + exit 2 +fi + +mkdir -p "$dest"/{raw,wiki,derived,prompts,logs} + +case "$flavor" in + paper) mkdir -p "$dest/wiki/sections" ;; + domain|research) mkdir -p "$dest/wiki"/{concepts,papers,questions} ;; + product) mkdir -p "$dest/wiki"/{features,decisions,issues} ;; + person|organization) mkdir -p "$dest/wiki"/{publications,timelines} ;; + project) mkdir -p "$dest/wiki"/{decisions,runbooks,questions} ;; + codebase) mkdir -p "$dest/wiki"/{modules,symbols,decisions} ;; + incident) mkdir -p "$dest/wiki"/{timeline,signals,fixes} ;; +esac + +skill_dir="$(cd "$(dirname "$0")"/.. && pwd)" +templates="$skill_dir/templates" + +render() { + local src="$1" dst="$2" + sed -e "s|{{SLUG}}|$slug|g" \ + -e "s|{{TITLE}}|$title|g" \ + -e "s|{{FLAVOR}}|$flavor|g" \ + -e "s|{{SCOPE}}|$scope|g" \ + -e "s|{{TODAY}}|$(date -u +%Y-%m-%d)|g" \ + "$src" > "$dst" +} + +render "$templates/wiki.config.md" "$dest/wiki.config.md" +render "$templates/index.md" "$dest/wiki/index.md" +render "$templates/sources.md" "$dest/sources.md" +render "$templates/maintenance-log.md" "$dest/logs/maintenance-log.md" + +for p in compile-index compile-source-page compile-concept-page query-and-file lint-wiki; do + if [ -f "$templates/prompts/$p.md" ]; then + render "$templates/prompts/$p.md" "$dest/prompts/$p.md" + fi +done + +echo "$dest" diff --git a/skills/wiki-builder/scripts/wiki-cli.js b/skills/wiki-builder/scripts/wiki-cli.js new file mode 100755 index 0000000..d7646a2 --- /dev/null +++ b/skills/wiki-builder/scripts/wiki-cli.js @@ -0,0 +1,224 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const crypto = require('crypto'); +const { execFileSync } = require('child_process'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..', '..', '..'); + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) { + die(`Built store missing at ${distPath}. Run: cd ${PRO_WORKFLOW_ROOT} && npm install && npm run build`); + } + const mod = require(distPath); + if (typeof mod.createStore !== 'function') die('createStore not exported'); + return mod.createStore(); +} + +function die(msg) { + console.error(`[wiki] ${msg}`); + process.exit(1); +} + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const next = argv[i + 1]; + if (next && !next.startsWith('--')) { out[key] = next; i++; } + else out[key] = true; + } else { + out._.push(a); + } + } + return out; +} + +function defaultRoot(scope) { + if (scope === 'project') { + const proj = process.env.CLAUDE_PROJECT_DIR || process.cwd(); + return path.join(proj, '.claude', 'wikis'); + } + return process.env.WIKI_ROOT || path.join(os.homedir(), '.pro-workflow', 'wikis'); +} + +function sha256(s) { + return crypto.createHash('sha256').update(s).digest('hex').slice(0, 16); +} + +function cmdInit(args) { + const slug = args._[0]; + if (!slug) die('init: slug required'); + const title = args.title || slug; + const flavor = args.flavor || 'research'; + const scope = args.scope || 'global'; + const root = args.root || defaultRoot(scope); + + const initSh = path.join(__dirname, 'init_wiki.sh'); + const dest = execFileSync('bash', [initSh, slug, '--title', title, '--flavor', flavor, '--scope', scope, '--root', root], { encoding: 'utf8' }).trim(); + + const store = getStore(); + try { + store.upsertWiki({ slug, title, flavor, root_path: dest, scope }); + } finally { + store.close(); + } + console.log(JSON.stringify({ slug, title, flavor, scope, root_path: dest }, null, 2)); +} + +function cmdList(args) { + const store = getStore(); + try { + const wikis = store.listWikis(args.scope); + if (args.json) { console.log(JSON.stringify(wikis, null, 2)); return; } + if (!wikis.length) { console.log('(no wikis)'); return; } + for (const w of wikis) { + console.log(`${w.slug.padEnd(24)} ${w.flavor.padEnd(12)} ${w.scope.padEnd(8)} ${w.root_path}`); + } + } finally { + store.close(); + } +} + +function cmdInfo(args) { + const slug = args._[0]; + if (!slug) die('info: slug required'); + const store = getStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) die(`unknown wiki: ${slug}`); + const pages = store.listWikiPages(slug); + console.log(JSON.stringify({ wiki, page_count: pages.length, pages: pages.map(p => p.rel_path) }, null, 2)); + } finally { + store.close(); + } +} + +function cmdPage(args) { + const slug = args._[0]; + const relPath = args._[1]; + if (!slug || !relPath) die('page: slug and rel-path required'); + + const store = getStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) die(`unknown wiki: ${slug}. Run: wiki-cli.js init ${slug} --title "..."`); + + const fileAbs = path.join(wiki.root_path, relPath); + let content = ''; + if (args['from-file']) { + content = fs.readFileSync(args['from-file'], 'utf8'); + fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); + fs.writeFileSync(fileAbs, content); + } else if (fs.existsSync(fileAbs)) { + content = fs.readFileSync(fileAbs, 'utf8'); + } else { + die(`page file does not exist: ${fileAbs}. Pass --from-file or write the file first.`); + } + + const title = args.title || extractTitle(content) || path.basename(relPath, '.md'); + const summary = args.summary || extractSummary(content); + const pageType = args.type || inferType(relPath); + + const row = store.upsertWikiPage({ + wiki_slug: slug, + rel_path: relPath, + title, + summary, + content, + page_type: pageType, + content_hash: sha256(content), + }); + console.log(JSON.stringify({ id: row.id, wiki_slug: slug, rel_path: relPath, title, page_type: pageType }, null, 2)); + } finally { + store.close(); + } +} + +function cmdReindex(args) { + const slug = args._[0]; + if (!slug) die('reindex: slug required'); + const store = getStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) die(`unknown wiki: ${slug}`); + const wikiDir = path.join(wiki.root_path, 'wiki'); + if (!fs.existsSync(wikiDir)) die(`no wiki/ folder at ${wikiDir}`); + + let count = 0; + walk(wikiDir).forEach(abs => { + if (!abs.endsWith('.md')) return; + const rel = path.relative(wiki.root_path, abs); + const content = fs.readFileSync(abs, 'utf8'); + store.upsertWikiPage({ + wiki_slug: slug, + rel_path: rel, + title: extractTitle(content) || path.basename(rel, '.md'), + summary: extractSummary(content), + content, + page_type: inferType(rel), + content_hash: sha256(content), + }); + count++; + }); + console.log(JSON.stringify({ slug, indexed: count }, null, 2)); + } finally { + store.close(); + } +} + +function walk(dir) { + const out = []; + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const abs = path.join(dir, entry.name); + if (entry.isDirectory()) out.push(...walk(abs)); + else out.push(abs); + } + return out; +} + +function extractTitle(md) { + const m = md.match(/^#\s+(.+)$/m); + return m ? m[1].trim() : null; +} + +function extractSummary(md) { + const stripped = md.replace(/^---[\s\S]*?---\s*/m, '').replace(/^#.*\n/m, '').trim(); + const para = stripped.split(/\n\n/)[0] || ''; + return para.slice(0, 500) || null; +} + +function inferType(relPath) { + const parts = relPath.split(path.sep); + if (parts[0] === 'wiki' && parts.length >= 3) return parts[1].replace(/s$/, ''); + return null; +} + +function usage() { + console.error(`Usage: + wiki-cli.js init <slug> --title "X" [--flavor research] [--scope global|project] [--root path] + wiki-cli.js list [--scope global|project] [--json] + wiki-cli.js info <slug> + wiki-cli.js page <slug> <rel-path> [--title "X"] [--type concept|paper|...] [--from-file path] + wiki-cli.js reindex <slug>`); + process.exit(1); +} + +function main() { + const [, , cmd, ...rest] = process.argv; + const args = parseArgs(rest); + switch (cmd) { + case 'init': return cmdInit(args); + case 'list': return cmdList(args); + case 'info': return cmdInfo(args); + case 'page': return cmdPage(args); + case 'reindex': return cmdReindex(args); + default: usage(); + } +} + +main(); diff --git a/skills/wiki-builder/templates/index.md b/skills/wiki-builder/templates/index.md new file mode 100644 index 0000000..0c5c228 --- /dev/null +++ b/skills/wiki-builder/templates/index.md @@ -0,0 +1,21 @@ +# {{TITLE}} + +> {{FLAVOR}} wiki · created {{TODAY}} + +A persistent knowledge base on **{{TITLE}}**. + +## Sections + +_Add links to compiled pages here as you build them._ + +## Open questions + +_List unresolved threads. New seeds for the research loop start here._ + +## Sources + +See [sources.md](../sources.md). + +## Maintenance log + +See [logs/maintenance-log.md](../logs/maintenance-log.md). diff --git a/skills/wiki-builder/templates/maintenance-log.md b/skills/wiki-builder/templates/maintenance-log.md new file mode 100644 index 0000000..7af7052 --- /dev/null +++ b/skills/wiki-builder/templates/maintenance-log.md @@ -0,0 +1,5 @@ +# Maintenance Log — {{TITLE}} + +One line per change. Newest on top. + +- {{TODAY}} init: scaffolded wiki ({{FLAVOR}}, {{SCOPE}}) diff --git a/skills/wiki-builder/templates/prompts/compile-concept-page.md b/skills/wiki-builder/templates/prompts/compile-concept-page.md new file mode 100644 index 0000000..0471832 --- /dev/null +++ b/skills/wiki-builder/templates/prompts/compile-concept-page.md @@ -0,0 +1,15 @@ +# Compile concept page — {{TITLE}} + +Synthesize a single idea across multiple sources. Output `wiki/concepts/<slug>.md`: + +1. **One-line definition** that future-you will trust. +2. **Why it matters** — when this concept changes a decision. +3. **Variants / related concepts** with cross-links. +4. **Evidence** — bulleted claims `[^src-id]`, multi-source where possible. +5. **Counter-evidence** — claims that contradict, also cited. +6. **Status**: `stable | contested | speculative`. + +Rules: +- A concept page that cites only one source is suspect; flag in the page header. +- Prefer multi-source synthesis over single-source restatement. +- Link to relevant `paper/<key>.md` pages instead of restating the paper. diff --git a/skills/wiki-builder/templates/prompts/compile-index.md b/skills/wiki-builder/templates/prompts/compile-index.md new file mode 100644 index 0000000..14e2a29 --- /dev/null +++ b/skills/wiki-builder/templates/prompts/compile-index.md @@ -0,0 +1,16 @@ +# Compile index — {{TITLE}} + +Refresh `wiki/index.md` so it stays the entry point. + +1. List sections by type (concepts, papers, questions...). +2. Within each section, link pages alphabetically by slug. +3. Surface 3-5 "open questions" pulled from `question/` pages with status `open`. +4. Top of file: 2-line orientation paragraph. +5. Bottom: link to `sources.md` and `logs/maintenance-log.md`. + +Never delete user prose at the top; only update the generated section bounded by: +``` +<!-- BEGIN GENERATED INDEX --> +... +<!-- END GENERATED INDEX --> +``` diff --git a/skills/wiki-builder/templates/prompts/compile-source-page.md b/skills/wiki-builder/templates/prompts/compile-source-page.md new file mode 100644 index 0000000..47a76cd --- /dev/null +++ b/skills/wiki-builder/templates/prompts/compile-source-page.md @@ -0,0 +1,16 @@ +# Compile source page — {{TITLE}} + +Given a single source (paper, blog, video transcript, doc), produce `wiki/<type>/<slug>.md` with: + +1. **Front-matter**: title, source_id, page_type, last_verified. +2. **One-paragraph TL;DR** in plain language. +3. **Key claims** as bulleted list, each suffixed `[^src-id]`. +4. **Method / argument summary** — what the source actually does, not editorial. +5. **Open questions raised** — feed back into wiki seeds. +6. **Cross-links** — relative links to existing pages in this wiki when topics overlap. + +Rules: +- Never paraphrase without citing. +- Never copy long verbatim quotes. Two sentences max per quote. +- If source is paywalled or `private: true`, skip web verification. +- Mark inferences with `> SPECULATION:`. diff --git a/skills/wiki-builder/templates/prompts/lint-wiki.md b/skills/wiki-builder/templates/prompts/lint-wiki.md new file mode 100644 index 0000000..65c92d5 --- /dev/null +++ b/skills/wiki-builder/templates/prompts/lint-wiki.md @@ -0,0 +1,12 @@ +# Lint wiki — {{TITLE}} + +Audit the wiki and report (do not auto-fix unless asked): + +1. **Orphan pages** — pages not linked from `wiki/index.md` or any other page. +2. **Broken cross-links** — relative links pointing to missing files. +3. **Uncited claims** — paragraphs in `wiki/` with no `[^src-id]` reference. +4. **Stale claims** — `last_verified_at` older than 90 days. +5. **Duplicate summaries** — pages whose first paragraph is >80% similar to another's. +6. **Missing sources rows** — `[^src-NNN]` cited but `src-NNN` not in `sources.md`. + +Output a report under `derived/lint-{{TODAY}}.md`. Do not modify `wiki/` content. diff --git a/skills/wiki-builder/templates/prompts/query-and-file.md b/skills/wiki-builder/templates/prompts/query-and-file.md new file mode 100644 index 0000000..d8655bf --- /dev/null +++ b/skills/wiki-builder/templates/prompts/query-and-file.md @@ -0,0 +1,11 @@ +# Query and file — {{TITLE}} + +When a user asks a question that the wiki should answer: + +1. Search the wiki via `wiki-query` (FTS5). +2. If a page already covers the answer, cite it directly. Do not duplicate. +3. If coverage is partial, draft an addendum to the existing page. +4. If no coverage, create the appropriate page type (concept/paper/question/...). +5. If still uncertain, file a `question/<slug>.md` and append it as a seed for the research loop (Phase 3.3.1+). + +Always echo the answer with citations. Never fabricate citations. diff --git a/skills/wiki-builder/templates/sources.md b/skills/wiki-builder/templates/sources.md new file mode 100644 index 0000000..37073cd --- /dev/null +++ b/skills/wiki-builder/templates/sources.md @@ -0,0 +1,8 @@ +# Sources — {{TITLE}} + +Every claim in `wiki/` cites a row from this table by `[^id]`. + +| id | type | url / path | title | hash | fetched_at | +|----|------|------------|-------|------|------------| + +<!-- Append new rows under the header. id format: src-NNN. --> diff --git a/skills/wiki-builder/templates/wiki.config.md b/skills/wiki-builder/templates/wiki.config.md new file mode 100644 index 0000000..51f633d --- /dev/null +++ b/skills/wiki-builder/templates/wiki.config.md @@ -0,0 +1,56 @@ +--- +slug: {{SLUG}} +title: {{TITLE}} +flavor: {{FLAVOR}} +scope: {{SCOPE}} +created_at: {{TODAY}} +private: false +auto_research: + enabled: false + max_pages_per_run: 5 + max_depth: 3 + budget_usd: 0.50 + fetchers: [web, arxiv, github] +--- + +# {{TITLE}} — Wiki Config + +## Purpose + +Why this wiki exists. Who reads it. What questions it answers. + +## Audience + +Primary: <author / future-you / team> +Secondary: <other agents that might query this wiki> + +## Page types + +- `concept/<slug>.md` — durable explanation of a single idea +- `paper/<key>.md` — one-paper deep dive (key = author-year-shortname) +- `question/<slug>.md` — open question, links to claims/papers that bear on it +- `note/<date>-<slug>.md` — dated raw thinking, may be promoted to concept later + +Adjust per flavor. + +## Style rules + +- Plain markdown, no HTML. +- Headings: H1 = page title, H2 = top-level section, H3 = subsection. +- Citations inline as `[^src-id]` referencing `sources.md` row. +- Speculation marked `> SPECULATION:` blockquote. +- Each page ≤ 1500 words; split if longer. + +## Update workflow + +1. Land raw material in `raw/` (PDFs, scrapes, transcripts). +2. Add a row to `sources.md` (id, url, title, hash, fetched_at). +3. Compile `wiki/<type>/<slug>.md` citing those sources. +4. Cross-link from `wiki/index.md`. +5. Run `wiki-cli.js page` to update the FTS index. +6. Append a one-line entry to `logs/maintenance-log.md`. + +## Auto-research + +Loop is opt-in. Set `auto_research.enabled: true` once Phase 3.3.1 ships. +Budget caps are enforced; loop halts on cap or convergence. diff --git a/skills/wiki-query/SKILL.md b/skills/wiki-query/SKILL.md new file mode 100644 index 0000000..bc854bb --- /dev/null +++ b/skills/wiki-query/SKILL.md @@ -0,0 +1,72 @@ +--- +name: wiki-query +description: Query pro-workflow wikis via SQLite FTS5 BM25 retrieval. Returns top-K passages with citations. Use when answering a question that any of the user's wikis already covers, when the user says "what does the wiki say about X", "ask wiki", "search wikis", or before drafting a new wiki page (to avoid duplication). +--- + +# Wiki Query + +FTS5 BM25 retrieval over wiki pages indexed by `wiki-builder`. + +## When to use + +- Before writing any new wiki page → check coverage first +- User asks a domain question that may already live in a wiki +- "Ask the <slug> wiki: <question>" +- Verifying citations before quoting a claim +- `SessionStart` auto-load when prompt matches a known wiki topic + +## Commands + +``` +node $SKILL_ROOT/scripts/query.js search "<query>" [--wiki <slug>] [--limit 10] [--json] +node $SKILL_ROOT/scripts/query.js related <slug> <rel-path> [--limit 5] +node $SKILL_ROOT/scripts/query.js show <slug> <rel-path> +``` + +`search` with no `--wiki` ranks across all wikis. `related` finds adjacent pages by reusing the page's title + summary as the query. + +## Output + +JSON-friendly. Each hit: + +``` +{ + "page_id": 12, + "wiki_slug": "agent-memory", + "rel_path": "wiki/concepts/episodic-memory.md", + "title": "Episodic Memory", + "snippet": "... [time-stamped] traces, distinct from semantic ...", + "rank": -3.21 +} +``` + +Lower (more negative) rank = better BM25 match. + +## Citing back + +Every wiki hit must be cited as: + +``` +[wiki:<slug>] <title> — `<rel_path>` +``` + +Do not paraphrase a hit without showing the source. + +## SessionStart integration + +When `pro-workflow`'s SessionStart hook detects wiki-relevant terms in the user prompt, it runs `query.js search "<prompt>" --limit 3` and injects top hits into the session as a hint: + +``` +[wiki-query] 3 relevant pages: +- agent-memory · wiki/concepts/episodic-memory.md +- agent-memory · wiki/papers/park-2023-generative-agents.md +- ... +``` + +Helps Claude recall existing knowledge instead of redoing research. + +## Limits (Phase 3.3.0) + +- BM25 only. Vector search arrives 3.3.2 with sqlite-vec. +- No re-ranking. MMR diversity arrives with the research loop in 3.3.1. +- Snippet window is 16 tokens around match — tune via `--snippet-len`. diff --git a/skills/wiki-query/scripts/query.js b/skills/wiki-query/scripts/query.js new file mode 100755 index 0000000..a650045 --- /dev/null +++ b/skills/wiki-query/scripts/query.js @@ -0,0 +1,110 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..', '..', '..'); + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) { + console.error(`[wiki-query] built store missing at ${distPath}. Run: cd ${PRO_WORKFLOW_ROOT} && npm install && npm run build`); + process.exit(1); + } + return require(distPath).createStore(); +} + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const next = argv[i + 1]; + if (next && !next.startsWith('--')) { out[key] = next; i++; } + else out[key] = true; + } else { + out._.push(a); + } + } + return out; +} + +function cmdSearch(args) { + const query = args._[0]; + if (!query) { console.error('search: query required'); process.exit(1); } + const limit = parseInt(args.limit, 10) || 10; + const store = getStore(); + try { + const hits = store.searchWiki(query, { wikiSlug: args.wiki, limit }); + if (args.json) { + console.log(JSON.stringify(hits, null, 2)); + } else if (!hits.length) { + console.log('(no matches)'); + } else { + for (const h of hits) { + console.log(`${h.wiki_slug} · ${h.rel_path} [${h.rank.toFixed(2)}]`); + console.log(` ${h.title}`); + if (h.snippet) console.log(` ${h.snippet.replace(/\n/g, ' ')}`); + } + } + } finally { + store.close(); + } +} + +function cmdRelated(args) { + const slug = args._[0]; + const relPath = args._[1]; + if (!slug || !relPath) { console.error('related: slug and rel-path required'); process.exit(1); } + const limit = parseInt(args.limit, 10) || 5; + const store = getStore(); + try { + const page = store.getWikiPage(slug, relPath); + if (!page) { console.error(`page not found: ${slug}/${relPath}`); process.exit(1); } + const seed = [page.title, page.summary].filter(Boolean).join(' '); + const hits = store.searchWiki(seed, { wikiSlug: slug, limit: limit + 1 }) + .filter(h => h.rel_path !== relPath) + .slice(0, limit); + if (args.json) console.log(JSON.stringify(hits, null, 2)); + else if (!hits.length) console.log('(no related pages)'); + else hits.forEach(h => console.log(`${h.rel_path} ${h.title} [${h.rank.toFixed(2)}]`)); + } finally { + store.close(); + } +} + +function cmdShow(args) { + const slug = args._[0]; + const relPath = args._[1]; + if (!slug || !relPath) { console.error('show: slug and rel-path required'); process.exit(1); } + const store = getStore(); + try { + const page = store.getWikiPage(slug, relPath); + if (!page) { console.error('page not found'); process.exit(1); } + if (args.json) console.log(JSON.stringify({ ...page, content: undefined, content_preview: page.content?.slice(0, 1000) }, null, 2)); + else console.log(page.content || ''); + } finally { + store.close(); + } +} + +function usage() { + console.error(`Usage: + query.js search "<query>" [--wiki <slug>] [--limit 10] [--json] + query.js related <slug> <rel-path> [--limit 5] [--json] + query.js show <slug> <rel-path> [--json]`); + process.exit(1); +} + +function main() { + const [, , cmd, ...rest] = process.argv; + const args = parseArgs(rest); + switch (cmd) { + case 'search': return cmdSearch(args); + case 'related': return cmdRelated(args); + case 'show': return cmdShow(args); + default: usage(); + } +} + +main(); diff --git a/skills/wiki-research-loop/SKILL.md b/skills/wiki-research-loop/SKILL.md new file mode 100644 index 0000000..681e82c --- /dev/null +++ b/skills/wiki-research-loop/SKILL.md @@ -0,0 +1,144 @@ +--- +name: wiki-research-loop +description: Auto-grow a pro-workflow wiki by running a budget-capped BFS research loop over pluggable source fetchers (web, arXiv, GitHub). Each iteration pops a seed from the queue, fetches sources, drafts a wiki page, dedupes claims against existing pages, enqueues follow-up seeds. Halts on budget cap, depth cap, or convergence. Use when the user says "research <topic>", "grow the <slug> wiki", "auto-research", or wants a knowledge base that builds itself overnight. +--- + +# Wiki Research Loop + +Driver that turns a wiki into an auto-grown knowledge base. Layers on top of `wiki-builder` and `wiki-query`. + +## Loop semantics + +``` +seed-queue (pending) → next-seed + → fetch sources via plugins (web | arxiv | github) + → extract claims + → dedupe vs index (FTS5; later vector via 3.3.2) + → compile new page or amend existing + → upsert page (auto-FTS-index) + → enqueue follow-up seeds (max-depth gate) + → mark seed done + → if budget OR convergence OR kill-switch → halt +``` + +## Halt conditions (any one trips) + +- `budget_usd` exceeded (loop tracks per-fetcher cost estimate) +- `max_pages_per_run` written +- `max_depth` reached on every active branch +- 3 consecutive pages add < 5 % new claims (convergence) +- File `~/.pro-workflow/STOP` exists (operator kill-switch) +- `wiki.config.md` `auto_research.enabled: false` +- Wiki `private: true` AND any non-local fetcher selected + +## Commands + +``` +node $SKILL_ROOT/scripts/research-loop.js run <slug> [--max-pages N] [--max-depth N] [--budget-usd 0.50] [--fetchers web,arxiv,github] +node $SKILL_ROOT/scripts/research-loop.js seed <slug> "<query>" [--depth 0] [--parent-id N] +node $SKILL_ROOT/scripts/research-loop.js seeds <slug> [--status pending|active|done|failed] +node $SKILL_ROOT/scripts/research-loop.js cancel <slug> +node $SKILL_ROOT/scripts/research-loop.js status +``` + +CLI flags override `wiki.config.md` for one run only. + +## Source fetchers + +Pluggable. Each lives at `scripts/source-fetchers/<name>.js`. Interface: + +```js +module.exports = { + name: 'web', + match: (q) => true, // is this fetcher useful? + estimateCost: (q) => ({ usd: 0, tokens: 0 }), + fetch: async (q, opts) => [ // returns RawDoc[] + { url, title, content, fetched_at } + ] +}; +``` + +Built-in: +- **`web.js`** — Fetches via the user's available `WebFetch` tool through a stdin/stdout shim. Treats result as plain text/markdown. +- **`arxiv.js`** — `https://export.arxiv.org/api/query` (free, public, no key). Returns abstract + metadata. +- **`github.js`** — `https://api.github.com/search/repositories` + README pull (uses `GH_TOKEN` if set, otherwise unauthenticated rate limit). + +Drop a new file in `~/.pro-workflow/fetchers/<name>.js` to add a custom fetcher. Loaded at startup if present. + +## Budget enforcement + +Pre-iteration: sum `estimateCost` across selected fetchers. If projected cumulative cost would exceed `budget_usd`, halt. + +Post-iteration: track tokens used by the LLM compile step (Anthropic/OpenAI passthrough). Hard-kill on overrun. + +Per-fetcher overrides via env: `WIKI_LOOP_BUDGET_USD`, `WIKI_LOOP_MAX_PAGES`, `WIKI_LOOP_MAX_DEPTH`. + +## Seed queue + +SQLite-backed via `wiki_seeds` table: + +| field | meaning | +|-------|---------| +| `query` | natural-language seed | +| `status` | `pending` → `active` → `done`\|`failed` | +| `parent_id` | seed that produced this one | +| `depth` | BFS depth from root | + +Loop pops by `(depth ASC, created_at ASC)` so it explores breadth-first. + +## Convergence detection + +After each compiled page, compute Jaccard overlap of claim-text tokens vs the prior 3 pages. If `< 5 %` novel content for 3 consecutive pages, halt and report `converged`. + +## Kill switch + +``` +touch ~/.pro-workflow/STOP +``` + +Loop checks per-iteration and halts gracefully. Remove file to resume next run. + +## Privacy guard + +If `wiki.config.md` has `private: true`, the loop refuses any non-local fetcher and emits a warning. Only `raw/` ingestion via manual seeds is allowed. + +## Reactive trigger (Phase 3.3.4) + +`scripts/file-watcher.js` watches `wiki/<slug>/wiki/**/*.md`. On user-edited claim, enqueues a verification seed (`verify: <claim>`) at depth 0. Wired through pro-workflow's `file-watcher.js` hook. + +## Cron tick (Phase 3.3.4) + +`scripts/research-tick.js` is launchable from any cron-style runner. Picks the oldest opted-in wiki with pending seeds and runs a single iteration. Hook event: `pro-workflow:research-tick`. + +## Output + +Each run writes: + +``` +<wiki-root>/logs/research-<UTC-timestamp>.md # human-readable run log +<wiki-root>/derived/run-<UTC-timestamp>.json # structured stats +``` + +Run log lines: + +``` +[2026-05-08T10:42Z] seed-3 (depth=1) "memory consolidation in agents" + fetcher=arxiv hits=3 + fetcher=web hits=2 + compiled wiki/concepts/memory-consolidation.md (claims=7, novel=4) + enqueued 2 follow-up seeds + cost so far: $0.04 / $0.50 +``` + +## Integration with `wiki-query` + +Every compiled page goes through `wiki-cli.js page` so FTS5 stays consistent. The dedupe step calls `searchWiki` with the candidate claim text to find near-duplicates. + +## Status (Phase 3.3.1) + +Ships: loop driver, seed queue, web/arxiv/github fetchers, budget caps, convergence detector, kill-switch, manual `run` command. + +Defers: +- Vector dedupe (Phase 3.3.2 via sqlite-vec) +- LLM-judged claim novelty (current = Jaccard token overlap) +- Cron + reactive (Phase 3.3.4) diff --git a/skills/wiki-research-loop/scripts/research-loop.js b/skills/wiki-research-loop/scripts/research-loop.js new file mode 100755 index 0000000..d5fac5c --- /dev/null +++ b/skills/wiki-research-loop/scripts/research-loop.js @@ -0,0 +1,352 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const crypto = require('crypto'); + +const PRO_WORKFLOW_ROOT = path.resolve(__dirname, '..', '..', '..'); +const SKILL_ROOT = path.resolve(__dirname, '..'); +const STOP_FILE = path.join(os.homedir(), '.pro-workflow', 'STOP'); + +function getStore() { + const distPath = path.join(PRO_WORKFLOW_ROOT, 'dist', 'db', 'store.js'); + if (!fs.existsSync(distPath)) { + die(`built store missing at ${distPath}. Run: cd ${PRO_WORKFLOW_ROOT} && npm install && npm run build`); + } + return require(distPath).createStore(); +} + +function die(msg) { console.error(`[research-loop] ${msg}`); process.exit(1); } +function log(msg) { console.error(`[research-loop] ${msg}`); } + +function parseArgs(argv) { + const out = { _: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const next = argv[i + 1]; + if (next && !next.startsWith('--')) { out[key] = next; i++; } + else out[key] = true; + } else out._.push(a); + } + return out; +} + +function loadFetchers(names) { + const fetchers = {}; + const dirs = [ + path.join(SKILL_ROOT, 'scripts', 'source-fetchers'), + path.join(os.homedir(), '.pro-workflow', 'fetchers'), + ]; + for (const dir of dirs) { + if (!fs.existsSync(dir)) continue; + for (const f of fs.readdirSync(dir)) { + if (!f.endsWith('.js')) continue; + const name = path.basename(f, '.js'); + if (names && !names.includes(name)) continue; + try { + fetchers[name] = require(path.join(dir, f)); + } catch (e) { + log(`failed to load fetcher ${name}: ${e.message}`); + } + } + } + return fetchers; +} + +function readWikiConfig(rootPath) { + const cfgPath = path.join(rootPath, 'wiki.config.md'); + if (!fs.existsSync(cfgPath)) return {}; + const raw = fs.readFileSync(cfgPath, 'utf8'); + const m = raw.match(/^---\s*\n([\s\S]*?)\n---/); + if (!m) return {}; + const obj = {}; + let nested = null; + for (const line of m[1].split('\n')) { + if (!line.trim()) continue; + const indent = line.match(/^(\s*)/)[1].length; + const trimmed = line.trim(); + const kv = trimmed.match(/^([A-Za-z_]+):\s*(.*)$/); + if (!kv) continue; + const k = kv[1], v = kv[2]; + if (indent === 0) { + if (v === '') { obj[k] = {}; nested = obj[k]; } + else { obj[k] = parseScalar(v); nested = null; } + } else if (nested) { + nested[k] = parseScalar(v); + } + } + return obj; +} + +function parseScalar(v) { + if (/^\[.*\]$/.test(v)) return v.slice(1, -1).split(',').map(s => s.trim()).filter(Boolean); + if (v === 'true') return true; + if (v === 'false') return false; + if (/^-?\d+(\.\d+)?$/.test(v)) return Number(v); + return v; +} + +function tokenize(text) { + return new Set((text.toLowerCase().match(/[a-z0-9_]{4,}/g) || [])); +} + +function jaccardNovelty(newText, prevTexts) { + const a = tokenize(newText); + if (a.size === 0) return 1; + const b = new Set(); + for (const p of prevTexts) tokenize(p).forEach(t => b.add(t)); + if (b.size === 0) return 1; + let overlap = 0; + for (const t of a) if (b.has(t)) overlap++; + return 1 - (overlap / a.size); +} + +function slugify(s) { + return s.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '').slice(0, 60) || 'page'; +} + +function compilePage(seed, docs, prevPages) { + const claims = []; + const seen = new Set(); + for (const d of docs) { + const text = d.content || ''; + for (const sentence of text.split(/(?<=[.!?])\s+/).slice(0, 8)) { + const trimmed = sentence.trim(); + if (trimmed.length < 40 || trimmed.length > 400) continue; + const key = trimmed.toLowerCase().slice(0, 80); + if (seen.has(key)) continue; + seen.add(key); + claims.push({ text: trimmed, source: d.url || d.title || 'unknown' }); + } + } + if (!claims.length) return null; + + const novelty = jaccardNovelty(claims.map(c => c.text).join(' '), prevPages.map(p => p.content || '')); + + const lines = []; + lines.push(`# ${seed.query}`); + lines.push(''); + lines.push(`> seed-${seed.id} · depth ${seed.depth} · novelty ${(novelty * 100).toFixed(0)}%`); + lines.push(''); + lines.push('## Sources'); + lines.push(''); + for (const d of docs) { + lines.push(`- [${d.title || d.url}](${d.url || ''})`); + } + lines.push(''); + lines.push('## Claims'); + lines.push(''); + for (const [i, c] of claims.entries()) { + lines.push(`- ${c.text} [^src-${i + 1}]`); + } + lines.push(''); + lines.push('## Open follow-ups'); + lines.push(''); + lines.push('_Auto-extracted; review and prune._'); + + return { content: lines.join('\n'), claims, novelty }; +} + +function deriveFollowUps(seed, page) { + const queries = new Set(); + for (const c of page.claims) { + const m = c.text.match(/\b([A-Z][a-zA-Z]{3,})\b/g); + if (m) for (const term of m.slice(0, 2)) queries.add(`${term} in ${seed.query}`); + } + return Array.from(queries).slice(0, 3); +} + +async function runOne(slug, args) { + if (fs.existsSync(STOP_FILE)) { + log('STOP file present — aborting'); + return { halted: 'kill-switch' }; + } + const store = getStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) die(`unknown wiki: ${slug}`); + const cfg = readWikiConfig(wiki.root_path); + const auto = cfg.auto_research || {}; + const enabled = !!(auto.enabled || args.force); + if (!enabled) { log(`auto_research.enabled is false in ${slug}/wiki.config.md (use --force to override)`); return { halted: 'disabled' }; } + + const isPrivate = !!(cfg.private); + const fetcherNames = (args.fetchers ? String(args.fetchers).split(',') : auto.fetchers) || ['web', 'arxiv', 'github']; + if (isPrivate && fetcherNames.some(n => n !== 'local')) { + log(`wiki ${slug} is private — refusing non-local fetchers`); + return { halted: 'private' }; + } + + const maxPages = parseInt(args['max-pages'] || process.env.WIKI_LOOP_MAX_PAGES || auto.max_pages_per_run || 5, 10); + const maxDepth = parseInt(args['max-depth'] || process.env.WIKI_LOOP_MAX_DEPTH || auto.max_depth || 3, 10); + const budget = parseFloat(args['budget-usd'] || process.env.WIKI_LOOP_BUDGET_USD || auto.budget_usd || 0.50); + + const fetchers = loadFetchers(fetcherNames); + if (Object.keys(fetchers).length === 0) die(`no usable fetchers among: ${fetcherNames.join(',')}`); + + const ts = new Date().toISOString().replace(/[:.]/g, '-'); + const logFile = path.join(wiki.root_path, 'logs', `research-${ts}.md`); + const stats = { slug, started: ts, pages: 0, cost_usd: 0, halted: null, log: [] }; + fs.mkdirSync(path.dirname(logFile), { recursive: true }); + + const prevPages = store.listWikiPages(slug); + let convergeStreak = 0; + + while (stats.pages < maxPages) { + if (fs.existsSync(STOP_FILE)) { stats.halted = 'kill-switch'; break; } + const seed = store.nextPendingSeed(slug); + if (!seed) { stats.halted = 'queue-empty'; break; } + if (seed.depth > maxDepth) { store.setSeedStatus(seed.id, 'done'); continue; } + store.setSeedStatus(seed.id, 'active'); + + const docs = []; + for (const [name, fetcher] of Object.entries(fetchers)) { + try { + if (!fetcher.match(seed.query)) continue; + const cost = fetcher.estimateCost ? fetcher.estimateCost(seed.query) : { usd: 0 }; + if (stats.cost_usd + (cost.usd || 0) > budget) { stats.halted = 'budget'; break; } + const hits = await fetcher.fetch(seed.query, { limit: 3 }); + docs.push(...hits); + stats.cost_usd += cost.usd || 0; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} hits=${hits.length}`); + } catch (e) { + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} ERROR ${e.message}`); + } + } + if (stats.halted === 'budget') break; + + const compiled = compilePage(seed, docs, prevPages); + if (!compiled) { + store.setSeedStatus(seed.id, 'failed'); + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} no usable claims`); + continue; + } + + const relPath = path.join('wiki', 'questions', `${slugify(seed.query)}.md`); + const fileAbs = path.join(wiki.root_path, relPath); + fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); + fs.writeFileSync(fileAbs, compiled.content); + + const row = store.upsertWikiPage({ + wiki_slug: slug, + rel_path: relPath, + title: seed.query, + summary: compiled.content.slice(0, 500), + content: compiled.content, + page_type: 'question', + content_hash: crypto.createHash('sha256').update(compiled.content).digest('hex').slice(0, 16), + }); + prevPages.push(row); + stats.pages++; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} compiled ${relPath} novelty=${compiled.novelty.toFixed(2)}`); + + if (compiled.novelty < 0.05) convergeStreak++; + else convergeStreak = 0; + if (convergeStreak >= 3) { stats.halted = 'converged'; break; } + + const followUps = deriveFollowUps(seed, compiled); + for (const q of followUps) { + if (seed.depth + 1 > maxDepth) continue; + store.enqueueSeed({ wiki_slug: slug, query: q, parent_id: seed.id, depth: seed.depth + 1 }); + } + store.setSeedStatus(seed.id, 'done'); + } + + fs.writeFileSync(logFile, ['# Research run ' + ts, '', ...stats.log].join('\n')); + fs.writeFileSync(path.join(wiki.root_path, 'derived', `run-${ts}.json`), JSON.stringify(stats, null, 2)); + return stats; + } finally { + store.close(); + } +} + +function cmdSeed(args) { + const slug = args._[0]; + const query = args._[1]; + if (!slug || !query) die('seed: slug and query required'); + const store = getStore(); + try { + const wiki = store.getWiki(slug); + if (!wiki) die(`unknown wiki: ${slug}`); + const seed = store.enqueueSeed({ + wiki_slug: slug, + query, + depth: parseInt(args.depth, 10) || 0, + parent_id: args['parent-id'] ? parseInt(args['parent-id'], 10) : null, + }); + console.log(JSON.stringify(seed, null, 2)); + } finally { store.close(); } +} + +function cmdSeeds(args) { + const slug = args._[0]; + if (!slug) die('seeds: slug required'); + const store = getStore(); + try { + const status = args.status; + const where = status ? `WHERE wiki_slug = ? AND status = ?` : `WHERE wiki_slug = ?`; + const stmt = store.db.prepare(`SELECT * FROM wiki_seeds ${where} ORDER BY depth ASC, created_at ASC`); + const rows = status ? stmt.all(slug, status) : stmt.all(slug); + console.log(JSON.stringify(rows, null, 2)); + } finally { store.close(); } +} + +function cmdCancel(args) { + const slug = args._[0]; + if (!slug) die('cancel: slug required'); + const store = getStore(); + try { + const stmt = store.db.prepare(`UPDATE wiki_seeds SET status='failed' WHERE wiki_slug=? AND status IN ('pending','active')`); + const r = stmt.run(slug); + console.log(JSON.stringify({ slug, cancelled: r.changes }, null, 2)); + } finally { store.close(); } +} + +function cmdStatus() { + const store = getStore(); + try { + const rows = store.db.prepare(` + SELECT wiki_slug, + SUM(CASE WHEN status='pending' THEN 1 ELSE 0 END) AS pending, + SUM(CASE WHEN status='active' THEN 1 ELSE 0 END) AS active, + SUM(CASE WHEN status='done' THEN 1 ELSE 0 END) AS done, + SUM(CASE WHEN status='failed' THEN 1 ELSE 0 END) AS failed + FROM wiki_seeds GROUP BY wiki_slug + `).all(); + console.log(JSON.stringify({ kill_switch: fs.existsSync(STOP_FILE), wikis: rows }, null, 2)); + } finally { store.close(); } +} + +async function cmdRun(args) { + const slug = args._[0]; + if (!slug) die('run: slug required'); + const stats = await runOne(slug, args); + console.log(JSON.stringify(stats, null, 2)); +} + +function usage() { + console.error(`Usage: + research-loop.js run <slug> [--max-pages 5] [--max-depth 3] [--budget-usd 0.50] [--fetchers web,arxiv,github] [--force] + research-loop.js seed <slug> "<query>" [--depth 0] [--parent-id N] + research-loop.js seeds <slug> [--status pending|active|done|failed] + research-loop.js cancel <slug> + research-loop.js status`); + process.exit(1); +} + +async function main() { + const [, , cmd, ...rest] = process.argv; + const args = parseArgs(rest); + switch (cmd) { + case 'run': await cmdRun(args); break; + case 'seed': cmdSeed(args); break; + case 'seeds': cmdSeeds(args); break; + case 'cancel': cmdCancel(args); break; + case 'status': cmdStatus(); break; + default: usage(); + } +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js new file mode 100644 index 0000000..6a730cb --- /dev/null +++ b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js @@ -0,0 +1,48 @@ +const https = require('https'); + +function httpsGet(url) { + return new Promise((resolve, reject) => { + https.get(url, { headers: { 'User-Agent': 'pro-workflow/wiki-research-loop' } }, res => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return httpsGet(res.headers.location).then(resolve, reject); + } + let data = ''; + res.on('data', c => { data += c; }); + res.on('end', () => resolve({ status: res.statusCode, body: data })); + }).on('error', reject); + }); +} + +function extractTag(xml, tag) { + const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'g'); + const out = []; + let m; + while ((m = re.exec(xml)) !== null) out.push(m[1].trim()); + return out; +} + +function parseEntries(xml) { + const entries = extractTag(xml, 'entry'); + return entries.map(entry => { + const title = (extractTag(entry, 'title')[0] || '').replace(/\s+/g, ' ').trim(); + const summary = (extractTag(entry, 'summary')[0] || '').replace(/\s+/g, ' ').trim(); + const idMatch = entry.match(/<id>([\s\S]*?)<\/id>/); + const url = idMatch ? idMatch[1].trim() : null; + const published = (entry.match(/<published>([\s\S]*?)<\/published>/) || [])[1] || null; + return { title, content: summary, url, fetched_at: new Date().toISOString(), published }; + }); +} + +module.exports = { + name: 'arxiv', + match: () => true, + estimateCost: () => ({ usd: 0, tokens: 0 }), + async fetch(query, opts = {}) { + const limit = opts.limit || 3; + const q = encodeURIComponent(query); + const url = `https://export.arxiv.org/api/query?search_query=all:${q}&start=0&max_results=${limit}`; + const res = await httpsGet(url); + if (res.status !== 200) return []; + return parseEntries(res.body); + } +}; diff --git a/skills/wiki-research-loop/scripts/source-fetchers/github.js b/skills/wiki-research-loop/scripts/source-fetchers/github.js new file mode 100644 index 0000000..d2ca873 --- /dev/null +++ b/skills/wiki-research-loop/scripts/source-fetchers/github.js @@ -0,0 +1,48 @@ +const https = require('https'); + +function httpsGet(url, headers = {}) { + return new Promise((resolve, reject) => { + const opts = { headers: { 'User-Agent': 'pro-workflow/wiki-research-loop', Accept: 'application/vnd.github+json', ...headers } }; + https.get(url, opts, res => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return httpsGet(res.headers.location, headers).then(resolve, reject); + } + let data = ''; + res.on('data', c => { data += c; }); + res.on('end', () => resolve({ status: res.statusCode, body: data })); + }).on('error', reject); + }); +} + +function authHeader() { + const tok = process.env.GH_TOKEN || process.env.GITHUB_TOKEN; + return tok ? { Authorization: `Bearer ${tok}` } : {}; +} + +module.exports = { + name: 'github', + match: () => true, + estimateCost: () => ({ usd: 0, tokens: 0 }), + async fetch(query, opts = {}) { + const limit = opts.limit || 3; + const url = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&sort=stars&per_page=${limit}`; + const res = await httpsGet(url, authHeader()); + if (res.status !== 200) return []; + let json; + try { json = JSON.parse(res.body); } catch { return []; } + const items = json.items || []; + const out = []; + for (const r of items) { + const desc = r.description || ''; + const stars = r.stargazers_count || 0; + const summary = `${desc} (${stars}★, ${r.language || 'unknown'})`; + out.push({ + title: r.full_name, + content: summary, + url: r.html_url, + fetched_at: new Date().toISOString(), + }); + } + return out; + } +}; diff --git a/skills/wiki-research-loop/scripts/source-fetchers/web.js b/skills/wiki-research-loop/scripts/source-fetchers/web.js new file mode 100644 index 0000000..62e44ea --- /dev/null +++ b/skills/wiki-research-loop/scripts/source-fetchers/web.js @@ -0,0 +1,64 @@ +const https = require('https'); +const { URL } = require('url'); + +function httpsGet(url, headers = {}) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const opts = { + hostname: u.hostname, + path: u.pathname + u.search, + method: 'GET', + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; pro-workflow/wiki-research-loop)', + Accept: 'text/html,application/xhtml+xml', + ...headers, + }, + }; + https.get(opts, res => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + const loc = res.headers.location.startsWith('http') ? res.headers.location : `https://${u.hostname}${res.headers.location}`; + return httpsGet(loc, headers).then(resolve, reject); + } + let data = ''; + res.on('data', c => { data += c; }); + res.on('end', () => resolve({ status: res.statusCode, body: data })); + }).on('error', reject); + }); +} + +function stripTags(html) { + return html.replace(/<[^>]+>/g, ' ').replace(/&[a-z]+;/gi, ' ').replace(/\s+/g, ' ').trim(); +} + +function extractDuckDuckGoLite(html, limit) { + const out = []; + const linkRe = /<a[^>]+class="result-link"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/g; + const snipRe = /<td[^>]+class="result-snippet"[^>]*>([\s\S]*?)<\/td>/g; + const links = []; + let m; + while ((m = linkRe.exec(html)) !== null) links.push({ url: m[1], title: stripTags(m[2]) }); + const snippets = []; + while ((m = snipRe.exec(html)) !== null) snippets.push(stripTags(m[1])); + for (let i = 0; i < Math.min(limit, links.length); i++) { + out.push({ + url: links[i].url, + title: links[i].title, + content: snippets[i] || '', + fetched_at: new Date().toISOString(), + }); + } + return out; +} + +module.exports = { + name: 'web', + match: () => true, + estimateCost: () => ({ usd: 0, tokens: 0 }), + async fetch(query, opts = {}) { + const limit = opts.limit || 3; + const url = `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`; + const res = await httpsGet(url); + if (res.status !== 200) return []; + return extractDuckDuckGoLite(res.body, limit); + } +}; diff --git a/src/db/index.ts b/src/db/index.ts index 7232bfd..094d443 100644 --- a/src/db/index.ts +++ b/src/db/index.ts @@ -28,67 +28,16 @@ export function initializeDatabase(dbPath: string = DEFAULT_DB_PATH): Database.D db.pragma('journal_mode = WAL'); db.pragma('foreign_keys = ON'); - const schemaPath = path.join(__dirname, 'schema.sql'); - - if (fs.existsSync(schemaPath)) { - const schema = fs.readFileSync(schemaPath, 'utf8'); - db.exec(schema); - } else { - db.exec(` - CREATE TABLE IF NOT EXISTS learnings ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - created_at TEXT DEFAULT (datetime('now')), - project TEXT, - category TEXT NOT NULL, - rule TEXT NOT NULL, - mistake TEXT, - correction TEXT, - times_applied INTEGER DEFAULT 0 - ); - - CREATE VIRTUAL TABLE IF NOT EXISTS learnings_fts USING fts5( - category, - rule, - mistake, - correction, - content=learnings, - content_rowid=id - ); - - CREATE TRIGGER IF NOT EXISTS learnings_ai AFTER INSERT ON learnings BEGIN - INSERT INTO learnings_fts(rowid, category, rule, mistake, correction) - VALUES (new.id, new.category, new.rule, new.mistake, new.correction); - END; - - CREATE TRIGGER IF NOT EXISTS learnings_ad AFTER DELETE ON learnings BEGIN - INSERT INTO learnings_fts(learnings_fts, rowid, category, rule, mistake, correction) - VALUES ('delete', old.id, old.category, old.rule, old.mistake, old.correction); - END; - - CREATE TRIGGER IF NOT EXISTS learnings_au AFTER UPDATE ON learnings BEGIN - INSERT INTO learnings_fts(learnings_fts, rowid, category, rule, mistake, correction) - VALUES ('delete', old.id, old.category, old.rule, old.mistake, old.correction); - INSERT INTO learnings_fts(rowid, category, rule, mistake, correction) - VALUES (new.id, new.category, new.rule, new.mistake, new.correction); - END; - - CREATE TABLE IF NOT EXISTS sessions ( - id TEXT PRIMARY KEY, - project TEXT, - started_at TEXT DEFAULT (datetime('now')), - ended_at TEXT, - edit_count INTEGER DEFAULT 0, - corrections_count INTEGER DEFAULT 0, - prompts_count INTEGER DEFAULT 0 - ); - - CREATE INDEX IF NOT EXISTS idx_learnings_category ON learnings(category); - CREATE INDEX IF NOT EXISTS idx_learnings_project ON learnings(project); - CREATE INDEX IF NOT EXISTS idx_learnings_created_at ON learnings(created_at); - CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project); - CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at); - `); + const candidates = [ + path.join(__dirname, 'schema.sql'), + path.join(__dirname, '..', '..', 'src', 'db', 'schema.sql'), + ]; + const schemaPath = candidates.find(p => fs.existsSync(p)); + if (!schemaPath) { + throw new Error(`pro-workflow: schema.sql not found. Tried: ${candidates.join(', ')}. Run: npm run build`); } + const schema = fs.readFileSync(schemaPath, 'utf8'); + db.exec(schema); return db; } diff --git a/src/db/schema.sql b/src/db/schema.sql index bcf9e15..1325549 100644 --- a/src/db/schema.sql +++ b/src/db/schema.sql @@ -58,3 +58,107 @@ CREATE INDEX IF NOT EXISTS idx_learnings_project ON learnings(project); CREATE INDEX IF NOT EXISTS idx_learnings_created_at ON learnings(created_at); CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project); CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at); + +-- Wiki knowledge base (Phase 3.3.0) +CREATE TABLE IF NOT EXISTS wikis ( + slug TEXT PRIMARY KEY, + title TEXT NOT NULL, + flavor TEXT NOT NULL DEFAULT 'research', + root_path TEXT NOT NULL, + scope TEXT NOT NULL DEFAULT 'global', + auto_research INTEGER NOT NULL DEFAULT 0, + private INTEGER NOT NULL DEFAULT 0, + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS wiki_pages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + wiki_slug TEXT NOT NULL REFERENCES wikis(slug) ON DELETE CASCADE, + rel_path TEXT NOT NULL, + title TEXT NOT NULL, + summary TEXT, + content TEXT, + page_type TEXT, + content_hash TEXT, + updated_at TEXT DEFAULT (datetime('now')), + UNIQUE(wiki_slug, rel_path) +); + +CREATE TABLE IF NOT EXISTS wiki_sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + wiki_slug TEXT NOT NULL REFERENCES wikis(slug) ON DELETE CASCADE, + url TEXT, + title TEXT, + fetched_at TEXT DEFAULT (datetime('now')), + content_hash TEXT, + fetcher TEXT, + UNIQUE(wiki_slug, content_hash) +); + +CREATE TABLE IF NOT EXISTS wiki_claims ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES wiki_pages(id) ON DELETE CASCADE, + source_id INTEGER REFERENCES wiki_sources(id) ON DELETE SET NULL, + text TEXT NOT NULL, + confidence REAL DEFAULT 0.8, + last_verified_at TEXT DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS wiki_seeds ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + wiki_slug TEXT NOT NULL REFERENCES wikis(slug) ON DELETE CASCADE, + query TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + parent_id INTEGER REFERENCES wiki_seeds(id) ON DELETE SET NULL, + depth INTEGER NOT NULL DEFAULT 0, + created_at TEXT DEFAULT (datetime('now')) +); + +CREATE VIRTUAL TABLE IF NOT EXISTS wiki_pages_fts USING fts5( + title, + summary, + content, + content=wiki_pages, + content_rowid=id +); + +CREATE TRIGGER IF NOT EXISTS wiki_pages_ai AFTER INSERT ON wiki_pages BEGIN + INSERT INTO wiki_pages_fts(rowid, title, summary, content) + VALUES (new.id, new.title, new.summary, new.content); +END; + +CREATE TRIGGER IF NOT EXISTS wiki_pages_ad AFTER DELETE ON wiki_pages BEGIN + INSERT INTO wiki_pages_fts(wiki_pages_fts, rowid, title, summary, content) + VALUES ('delete', old.id, old.title, old.summary, old.content); +END; + +CREATE TRIGGER IF NOT EXISTS wiki_pages_au AFTER UPDATE ON wiki_pages BEGIN + INSERT INTO wiki_pages_fts(wiki_pages_fts, rowid, title, summary, content) + VALUES ('delete', old.id, old.title, old.summary, old.content); + INSERT INTO wiki_pages_fts(rowid, title, summary, content) + VALUES (new.id, new.title, new.summary, new.content); +END; + +CREATE INDEX IF NOT EXISTS idx_wiki_pages_slug ON wiki_pages(wiki_slug); +CREATE INDEX IF NOT EXISTS idx_wiki_pages_type ON wiki_pages(page_type); +CREATE INDEX IF NOT EXISTS idx_wiki_seeds_status ON wiki_seeds(wiki_slug, status); +CREATE INDEX IF NOT EXISTS idx_wiki_claims_page ON wiki_claims(page_id); + +-- Embeddings (Phase 3.3.2). Stored as float32 BLOB. Hybrid retrieval = BM25 + vector + RRF. +-- sqlite-vec is optional; if not loaded, this table degrades to dot-product in JS. +CREATE TABLE IF NOT EXISTS wiki_embeddings ( + page_id INTEGER PRIMARY KEY REFERENCES wiki_pages(id) ON DELETE CASCADE, + model TEXT NOT NULL, + dim INTEGER NOT NULL, + vector BLOB NOT NULL, + computed_at TEXT DEFAULT (datetime('now')) +); +CREATE INDEX IF NOT EXISTS idx_wiki_embeddings_model ON wiki_embeddings(model); + +-- Wiki-scoped learning rules +CREATE TABLE IF NOT EXISTS learnings_wiki ( + learning_id INTEGER PRIMARY KEY REFERENCES learnings(id) ON DELETE CASCADE, + wiki_slug TEXT NOT NULL REFERENCES wikis(slug) ON DELETE CASCADE +); +CREATE INDEX IF NOT EXISTS idx_learnings_wiki_slug ON learnings_wiki(wiki_slug); diff --git a/src/db/store.ts b/src/db/store.ts index 11c1e69..0cd7e14 100644 --- a/src/db/store.ts +++ b/src/db/store.ts @@ -22,13 +22,64 @@ export interface Session { prompts_count: number; } +export type WikiFlavor = + | 'research' | 'paper' | 'domain' | 'product' | 'person' + | 'organization' | 'project' | 'codebase' | 'incident'; + +export type WikiScope = 'global' | 'project'; + +export interface Wiki { + slug: string; + title: string; + flavor: WikiFlavor; + root_path: string; + scope: WikiScope; + auto_research: number; + private: number; + created_at: string; + updated_at: string; +} + +export interface WikiPage { + id: number; + wiki_slug: string; + rel_path: string; + title: string; + summary: string | null; + content: string | null; + page_type: string | null; + content_hash: string | null; + updated_at: string; +} + +export interface WikiSearchHit { + page_id: number; + wiki_slug: string; + rel_path: string; + title: string; + summary: string | null; + snippet: string; + rank: number; +} + +export interface WikiSeed { + id: number; + wiki_slug: string; + query: string; + status: 'pending' | 'active' | 'done' | 'failed'; + parent_id: number | null; + depth: number; + created_at: string; +} + export interface Store { db: Database.Database; close: () => void; - addLearning: (learning: Omit<Learning, 'id' | 'created_at' | 'times_applied'>) => Learning; + addLearning: (learning: Omit<Learning, 'id' | 'created_at' | 'times_applied'>, wikiSlug?: string) => Learning; getLearning: (id: number) => Learning | undefined; getAllLearnings: (project?: string) => Learning[]; + getLearningsByWiki: (wikiSlug: string) => Learning[]; updateLearning: (id: number, updates: Partial<Learning>) => boolean; deleteLearning: (id: number) => boolean; incrementTimesApplied: (id: number) => void; @@ -38,6 +89,21 @@ export interface Store { getSession: (id: string) => Session | undefined; updateSessionCounts: (id: string, edits?: number, corrections?: number, prompts?: number) => void; getRecentSessions: (limit?: number) => Session[]; + + // Wiki KB + upsertWiki: (wiki: Pick<Wiki, 'slug' | 'title' | 'flavor' | 'root_path'> & Partial<Wiki>) => Wiki; + getWiki: (slug: string) => Wiki | undefined; + listWikis: (scope?: WikiScope) => Wiki[]; + deleteWiki: (slug: string) => boolean; + + upsertWikiPage: (page: Omit<WikiPage, 'id' | 'updated_at'>) => WikiPage; + getWikiPage: (wikiSlug: string, relPath: string) => WikiPage | undefined; + listWikiPages: (wikiSlug: string) => WikiPage[]; + searchWiki: (query: string, opts?: { wikiSlug?: string; limit?: number; loose?: boolean }) => WikiSearchHit[]; + + enqueueSeed: (seed: Omit<WikiSeed, 'id' | 'created_at' | 'status'> & { status?: WikiSeed['status'] }) => WikiSeed; + nextPendingSeed: (wikiSlug: string) => WikiSeed | undefined; + setSeedStatus: (id: number, status: WikiSeed['status']) => void; } export function createStore(dbPath: string = getDefaultDbPath()): Store { @@ -102,11 +168,86 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? `); + const upsertWikiStmt = db.prepare(` + INSERT INTO wikis (slug, title, flavor, root_path, scope, auto_research, private) + VALUES (@slug, @title, @flavor, @root_path, @scope, @auto_research, @private) + ON CONFLICT(slug) DO UPDATE SET + title = excluded.title, + flavor = excluded.flavor, + root_path = excluded.root_path, + scope = excluded.scope, + auto_research = excluded.auto_research, + private = excluded.private, + updated_at = datetime('now') + `); + const getWikiStmt = db.prepare(`SELECT * FROM wikis WHERE slug = ?`); + const listWikisStmt = db.prepare(`SELECT * FROM wikis ORDER BY updated_at DESC`); + const listWikisByScopeStmt = db.prepare(`SELECT * FROM wikis WHERE scope = ? ORDER BY updated_at DESC`); + const deleteWikiStmt = db.prepare(`DELETE FROM wikis WHERE slug = ?`); + + const upsertWikiPageStmt = db.prepare(` + INSERT INTO wiki_pages (wiki_slug, rel_path, title, summary, content, page_type, content_hash) + VALUES (@wiki_slug, @rel_path, @title, @summary, @content, @page_type, @content_hash) + ON CONFLICT(wiki_slug, rel_path) DO UPDATE SET + title = excluded.title, + summary = excluded.summary, + content = excluded.content, + page_type = excluded.page_type, + content_hash = excluded.content_hash, + updated_at = datetime('now') + RETURNING id + `); + const getWikiPageStmt = db.prepare(`SELECT * FROM wiki_pages WHERE wiki_slug = ? AND rel_path = ?`); + const getWikiPageByIdStmt = db.prepare(`SELECT * FROM wiki_pages WHERE id = ?`); + const listWikiPagesStmt = db.prepare(`SELECT * FROM wiki_pages WHERE wiki_slug = ? ORDER BY updated_at DESC`); + + const searchWikiAllStmt = db.prepare(` + SELECT p.id AS page_id, p.wiki_slug, p.rel_path, p.title, p.summary, + snippet(wiki_pages_fts, 2, '[', ']', '...', 16) AS snippet, + bm25(wiki_pages_fts) AS rank + FROM wiki_pages_fts + JOIN wiki_pages p ON p.id = wiki_pages_fts.rowid + WHERE wiki_pages_fts MATCH @q + ORDER BY rank + LIMIT @limit + `); + const searchWikiScopedStmt = db.prepare(` + SELECT p.id AS page_id, p.wiki_slug, p.rel_path, p.title, p.summary, + snippet(wiki_pages_fts, 2, '[', ']', '...', 16) AS snippet, + bm25(wiki_pages_fts) AS rank + FROM wiki_pages_fts + JOIN wiki_pages p ON p.id = wiki_pages_fts.rowid + WHERE wiki_pages_fts MATCH @q AND p.wiki_slug = @slug + ORDER BY rank + LIMIT @limit + `); + + const enqueueSeedStmt = db.prepare(` + INSERT INTO wiki_seeds (wiki_slug, query, status, parent_id, depth) + VALUES (@wiki_slug, @query, @status, @parent_id, @depth) + RETURNING * + `); + const nextPendingSeedStmt = db.prepare(` + SELECT * FROM wiki_seeds WHERE wiki_slug = ? AND status = 'pending' + ORDER BY depth ASC, created_at ASC LIMIT 1 + `); + const setSeedStatusStmt = db.prepare(`UPDATE wiki_seeds SET status = ? WHERE id = ?`); + + const linkLearningWikiStmt = db.prepare(` + INSERT OR REPLACE INTO learnings_wiki (learning_id, wiki_slug) VALUES (?, ?) + `); + const learningsByWikiStmt = db.prepare(` + SELECT l.* FROM learnings l + JOIN learnings_wiki lw ON lw.learning_id = l.id + WHERE lw.wiki_slug = ? + ORDER BY l.created_at DESC + `); + return { db, close: () => db.close(), - addLearning(learning) { + addLearning(learning, wikiSlug) { const result = addLearningStmt.run({ project: learning.project ?? null, category: learning.category, @@ -114,7 +255,11 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { mistake: learning.mistake ?? null, correction: learning.correction ?? null, }); - return getLearningStmt.get(result.lastInsertRowid) as Learning; + const row = getLearningStmt.get(result.lastInsertRowid) as Learning; + if (wikiSlug) { + linkLearningWikiStmt.run(row.id, wikiSlug); + } + return row; }, getLearning(id) { @@ -168,5 +313,104 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { getRecentSessions(limit = 10) { return getRecentSessionsStmt.all(limit) as Session[]; }, + + getLearningsByWiki(wikiSlug) { + return learningsByWikiStmt.all(wikiSlug) as Learning[]; + }, + + upsertWiki(wiki) { + upsertWikiStmt.run({ + slug: wiki.slug, + title: wiki.title, + flavor: wiki.flavor, + root_path: wiki.root_path, + scope: wiki.scope ?? 'global', + auto_research: wiki.auto_research ?? 0, + private: wiki.private ?? 0, + }); + return getWikiStmt.get(wiki.slug) as Wiki; + }, + + getWiki(slug) { + return getWikiStmt.get(slug) as Wiki | undefined; + }, + + listWikis(scope) { + if (scope) return listWikisByScopeStmt.all(scope) as Wiki[]; + return listWikisStmt.all() as Wiki[]; + }, + + deleteWiki(slug) { + return deleteWikiStmt.run(slug).changes > 0; + }, + + upsertWikiPage(page) { + const row = upsertWikiPageStmt.get({ + wiki_slug: page.wiki_slug, + rel_path: page.rel_path, + title: page.title, + summary: page.summary ?? null, + content: page.content ?? null, + page_type: page.page_type ?? null, + content_hash: page.content_hash ?? null, + }) as { id: number }; + return getWikiPageByIdStmt.get(row.id) as WikiPage; + }, + + getWikiPage(wikiSlug, relPath) { + return getWikiPageStmt.get(wikiSlug, relPath) as WikiPage | undefined; + }, + + listWikiPages(wikiSlug) { + return listWikiPagesStmt.all(wikiSlug) as WikiPage[]; + }, + + searchWiki(query, opts = {}) { + const limit = opts.limit ?? 10; + const q = sanitizeFtsQuery(query, opts.loose); + if (!q) return []; + const rows = opts.wikiSlug + ? searchWikiScopedStmt.all({ q, slug: opts.wikiSlug, limit }) + : searchWikiAllStmt.all({ q, limit }); + return rows as WikiSearchHit[]; + }, + + enqueueSeed(seed) { + return enqueueSeedStmt.get({ + wiki_slug: seed.wiki_slug, + query: seed.query, + status: seed.status ?? 'pending', + parent_id: seed.parent_id ?? null, + depth: seed.depth, + }) as WikiSeed; + }, + + nextPendingSeed(wikiSlug) { + return nextPendingSeedStmt.get(wikiSlug) as WikiSeed | undefined; + }, + + setSeedStatus(id, status) { + setSeedStatusStmt.run(status, id); + }, }; } + +const STOPWORDS = new Set([ + 'a', 'an', 'the', 'and', 'or', 'of', 'to', 'in', 'on', 'for', 'with', + 'is', 'it', 'this', 'that', 'be', 'as', 'at', 'by', 'are', 'was', 'were', + 'what', 'which', 'how', 'why', 'when', 'where', 'who', 'about', + 'explain', 'tell', 'show', 'find', 'do', 'does', 'use', 'using' +]); + +function sanitizeFtsQuery(input: string, loose = false): string { + const trimmed = input.trim(); + if (!trimmed) return ''; + const tokens = trimmed.split(/\s+/) + .map(t => t.replace(/[^A-Za-z0-9_]/g, '').toLowerCase()) + .filter(t => t.length >= 2 && !STOPWORDS.has(t)); + if (!tokens.length) return ''; + if (loose) { + return tokens.map(t => `${t}*`).join(' OR '); + } + return tokens.map(t => `"${t}"`).join(' '); +} diff --git a/src/search/embeddings.ts b/src/search/embeddings.ts new file mode 100644 index 0000000..91f002d --- /dev/null +++ b/src/search/embeddings.ts @@ -0,0 +1,136 @@ +import Database from 'better-sqlite3'; +import * as https from 'https'; + +export interface EmbeddingProvider { + name: string; + model: string; + dim: number; + embed(texts: string[]): Promise<Float32Array[]>; +} + +function pickProvider(): EmbeddingProvider | null { + if (process.env.OPENAI_API_KEY) return openai(); + if (process.env.VOYAGE_API_KEY) return voyage(); + return null; +} + +function postJSON(urlStr: string, body: unknown, headers: Record<string, string>): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const url = new URL(urlStr); + const data = JSON.stringify(body); + const req = https.request({ + hostname: url.hostname, + path: url.pathname + url.search, + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(data), + ...headers, + }, + }, res => { + let chunks = ''; + res.on('data', c => { chunks += c; }); + res.on('end', () => resolve({ status: res.statusCode || 0, body: chunks })); + }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +function openai(): EmbeddingProvider { + const model = process.env.PROWORKFLOW_EMBED_MODEL || 'text-embedding-3-small'; + const dim = model === 'text-embedding-3-large' ? 3072 : 1536; + return { + name: 'openai', model, dim, + async embed(texts) { + const res = await postJSON('https://api.openai.com/v1/embeddings', { input: texts, model }, { Authorization: `Bearer ${process.env.OPENAI_API_KEY}` }); + if (res.status >= 400) throw new Error(`openai embeddings ${res.status}: ${res.body.slice(0, 200)}`); + const data = JSON.parse(res.body); + return data.data.map((d: { embedding: number[] }) => Float32Array.from(d.embedding)); + }, + }; +} + +function voyage(): EmbeddingProvider { + const model = process.env.PROWORKFLOW_EMBED_MODEL || 'voyage-3'; + return { + name: 'voyage', model, dim: 1024, + async embed(texts) { + const res = await postJSON('https://api.voyageai.com/v1/embeddings', { input: texts, model }, { Authorization: `Bearer ${process.env.VOYAGE_API_KEY}` }); + if (res.status >= 400) throw new Error(`voyage embeddings ${res.status}: ${res.body.slice(0, 200)}`); + const data = JSON.parse(res.body); + return data.data.map((d: { embedding: number[] }) => Float32Array.from(d.embedding)); + }, + }; +} + +export function getEmbeddingProvider(): EmbeddingProvider | null { + return pickProvider(); +} + +function f32ToBlob(v: Float32Array): Buffer { + return Buffer.from(v.buffer, v.byteOffset, v.byteLength); +} + +function blobToF32(buf: Buffer): Float32Array { + return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4); +} + +export function upsertEmbedding(db: Database.Database, pageId: number, provider: EmbeddingProvider, vector: Float32Array): void { + if (vector.length !== provider.dim) throw new Error(`dim mismatch: ${vector.length} vs ${provider.dim}`); + db.prepare(` + INSERT INTO wiki_embeddings (page_id, model, dim, vector) + VALUES (?, ?, ?, ?) + ON CONFLICT(page_id) DO UPDATE SET + model = excluded.model, + dim = excluded.dim, + vector = excluded.vector, + computed_at = datetime('now') + `).run(pageId, `${provider.name}:${provider.model}`, provider.dim, f32ToBlob(vector)); +} + +function cosine(a: Float32Array, b: Float32Array): number { + let dot = 0, na = 0, nb = 0; + const len = Math.min(a.length, b.length); + for (let i = 0; i < len; i++) { + dot += a[i] * b[i]; + na += a[i] * a[i]; + nb += b[i] * b[i]; + } + if (na === 0 || nb === 0) return 0; + return dot / (Math.sqrt(na) * Math.sqrt(nb)); +} + +export interface VectorHit { + page_id: number; + similarity: number; +} + +export function vectorSearch(db: Database.Database, queryVec: Float32Array, opts: { wikiSlug?: string; limit?: number } = {}): VectorHit[] { + const limit = opts.limit ?? 10; + const sql = opts.wikiSlug + ? `SELECT e.page_id, e.vector FROM wiki_embeddings e JOIN wiki_pages p ON p.id = e.page_id WHERE p.wiki_slug = ?` + : `SELECT e.page_id, e.vector FROM wiki_embeddings e`; + const rows = opts.wikiSlug ? db.prepare(sql).all(opts.wikiSlug) : db.prepare(sql).all(); + const scored: VectorHit[] = []; + for (const r of rows as { page_id: number; vector: Buffer }[]) { + const v = blobToF32(r.vector); + scored.push({ page_id: r.page_id, similarity: cosine(queryVec, v) }); + } + scored.sort((a, b) => b.similarity - a.similarity); + return scored.slice(0, limit); +} + +export function reciprocalRankFusion<T>(lists: T[][], keyFn: (x: T) => string, k = 60): { key: string; score: number }[] { + const scores = new Map<string, number>(); + for (const list of lists) { + list.forEach((item, i) => { + const key = keyFn(item); + scores.set(key, (scores.get(key) || 0) + 1 / (k + i + 1)); + }); + } + return Array.from(scores.entries()) + .map(([key, score]) => ({ key, score })) + .sort((a, b) => b.score - a.score); +} From ad2e3f2045abab1fc60bc9ac71d0d3eab7935b35 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare <ghumare64@gmail.com> Date: Fri, 8 May 2026 13:03:26 +0100 Subject: [PATCH 3/4] fix(coderabbit): address 21 majors + 2 critical PR review findings Critical (path traversal): - init_wiki.sh: validate slug as ^[a-z0-9][a-z0-9-]*$ before touching disk - wiki-cli.js: resolve and verify rel-path stays inside wiki.root_path Major: - package.json: cross-platform schema copy via Node (not cp) - src/search/embeddings.ts: cosine returns 0 on dim mismatch; vectorSearch filters by dim in SQL; postJSON 30s timeout - web.js / github.js / arxiv.js: redirect cap (5), 15s timeout, URL-base resolution for relative Location headers, drain redirect responses - src/db/index.ts: close DB on schema load/exec failure - src/db/store.ts: * claimPendingSeed (UPDATE...RETURNING) atomic select+claim * addLearning + linkLearningWiki wrapped in db.transaction * sanitizeFtsQuery replaces punctuation with spaces (preserves gpt-4o, wiki-builder, store.ts as separate tokens) - council.js: postJSON 120s timeout; phase 1+2 use Promise.allSettled so one provider failure no longer aborts the run; rel_path uses path.posix.join - build-survey.js: postJSON 180s timeout; bibliography validates non-empty key+title before generation; die() on wiki-cli failure - init_wiki.sh: explicit allowlist for --scope and --flavor - research-tick.js: spawnSync 10min timeout + SIGKILL + error/signal logging - research-loop.js: * front matter parser accepts CRLF * derived/ created via mkdirSync before writing run-*.json * try/finally guarantees seeds never stuck in 'active' * uses claimPendingSeed; budget halt re-marks seed pending for retry - survey-generator SKILL.md: ID format aligned with generator output (src-bib-<slug>) --- package.json | 2 +- scripts/research-tick.js | 10 +- skills/llm-council/scripts/council.js | 16 ++- skills/survey-generator/SKILL.md | 2 +- .../survey-generator/scripts/build-survey.js | 9 +- skills/wiki-builder/scripts/init_wiki.sh | 15 +++ skills/wiki-builder/scripts/wiki-cli.js | 6 +- .../scripts/research-loop.js | 121 ++++++++++-------- .../scripts/source-fetchers/arxiv.js | 12 +- .../scripts/source-fetchers/github.js | 12 +- .../scripts/source-fetchers/web.js | 14 +- src/db/index.ts | 23 ++-- src/db/store.ts | 49 +++++-- src/search/embeddings.ts | 13 +- 14 files changed, 199 insertions(+), 105 deletions(-) diff --git a/package.json b/package.json index 4e092c1..bd80e88 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "scripts": { - "build": "tsc && cp src/db/schema.sql dist/db/schema.sql", + "build": "tsc && node -e \"const fs=require('fs');fs.mkdirSync('dist/db',{recursive:true});fs.copyFileSync('src/db/schema.sql','dist/db/schema.sql')\"", "clean": "rm -rf dist", "prepublishOnly": "npm run build", "db:init": "node dist/db/index.js" diff --git a/scripts/research-tick.js b/scripts/research-tick.js index 3eae3ac..8e8bb96 100644 --- a/scripts/research-tick.js +++ b/scripts/research-tick.js @@ -60,10 +60,16 @@ function tick() { if (!target) { appendLog('skip: no opted-in wiki with pending seeds'); return { skipped: 'no-target' }; } appendLog(`tick: running ${target.slug}`); - const r = spawnSync('node', [LOOP_SCRIPT, 'run', target.slug, '--max-pages', '1'], { encoding: 'utf8' }); + const r = spawnSync('node', [LOOP_SCRIPT, 'run', target.slug, '--max-pages', '1'], { + encoding: 'utf8', + timeout: 10 * 60 * 1000, + killSignal: 'SIGKILL', + }); + if (r.error) appendLog(`error: ${r.error.message}`); + if (r.signal) appendLog(`signal: ${r.signal}`); appendLog(`tick: ${target.slug} exit=${r.status}`); if (r.stderr) appendLog(`stderr: ${r.stderr.slice(0, 500)}`); - return { ran: target.slug, exit: r.status }; + return { ran: target.slug, exit: r.status, error: r.error?.message, signal: r.signal }; } const result = tick(); diff --git a/skills/llm-council/scripts/council.js b/skills/llm-council/scripts/council.js index 250f23d..c34d0e7 100755 --- a/skills/llm-council/scripts/council.js +++ b/skills/llm-council/scripts/council.js @@ -57,7 +57,7 @@ function pickProvider(arg) { return null; } -function postJSON(urlStr, body, headers) { +function postJSON(urlStr, body, headers, timeoutMs = 120000) { return new Promise((resolve, reject) => { const url = new URL(urlStr); const data = JSON.stringify(body); @@ -71,6 +71,7 @@ function postJSON(urlStr, body, headers) { res.on('data', c => { chunks += c; }); res.on('end', () => resolve({ status: res.statusCode, body: chunks })); }); + req.setTimeout(timeoutMs, () => req.destroy(new Error('council request timeout'))); req.on('error', reject); req.write(data); req.end(); @@ -138,7 +139,7 @@ function persistToWiki(slug, sessionId, output) { try { const wiki = store.getWiki(slug); if (!wiki) return null; - const relPath = path.join('derived', 'council', `${sessionId}.md`); + const relPath = path.posix.join('derived', 'council', `${sessionId}.md`); const fileAbs = path.join(wiki.root_path, relPath); fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); fs.writeFileSync(fileAbs, output); @@ -174,9 +175,15 @@ async function cmdRun(args) { fs.writeFileSync(path.join(sessionDir, 'config.json'), JSON.stringify({ query, models, chairman, provider: providerName }, null, 2)); + function settledToEntry(model, settled) { + if (settled.status === 'fulfilled') return settled.value; + return { success: false, content: `[ERROR: ${settled.reason?.message || settled.reason}]`, model, latency_ms: 0 }; + } + // Phase 1 const sysIndep = 'You are participating in an LLM council deliberation. Provide your best, most thoughtful response to the query. Be comprehensive but focused.'; - const phase1Entries = await Promise.all(models.map(m => provider.call(provider, m, sysIndep, query))); + const phase1Settled = await Promise.allSettled(models.map(m => provider.call(provider, m, sysIndep, query))); + const phase1Entries = phase1Settled.map((s, i) => settledToEntry(models[i], s)); const phase1 = Object.fromEntries(models.map((m, i) => [m, phase1Entries[i]])); fs.writeFileSync(path.join(sessionDir, 'phase1_responses.json'), JSON.stringify(phase1, null, 2)); @@ -186,7 +193,8 @@ async function cmdRun(args) { const anon = models.map(m => `=== Response ${labelOf[m]} ===\n${phase1[m].content}`).join('\n\n'); const sysRank = (own) => `You are ranking AI responses objectively. Your own response is labeled '${own}'.`; const userRank = `QUERY:\n${query}\n\nRESPONSES:\n${anon}\n\nRank from BEST to WORST. Format:\nRANKINGS:\n1. [Letter] - [reason]\n2. [Letter] - [reason]\n...`; - const phase2Entries = await Promise.all(models.map(m => provider.call(provider, m, sysRank(labelOf[m]), userRank))); + const phase2Settled = await Promise.allSettled(models.map(m => provider.call(provider, m, sysRank(labelOf[m]), userRank))); + const phase2Entries = phase2Settled.map((s, i) => settledToEntry(models[i], s)); const phase2 = { label_of: labelOf, rankings: Object.fromEntries(models.map((m, i) => [m, phase2Entries[i]])) }; fs.writeFileSync(path.join(sessionDir, 'phase2_rankings.json'), JSON.stringify(phase2, null, 2)); diff --git a/skills/survey-generator/SKILL.md b/skills/survey-generator/SKILL.md index 1495d67..fe2d66d 100644 --- a/skills/survey-generator/SKILL.md +++ b/skills/survey-generator/SKILL.md @@ -113,7 +113,7 @@ Each writes a separate versioned file; diff them. 1. Never invent bibliography entries — every paper must be a real work with venue. 2. Every section's `papers` array references keys in `bibliography`. 3. Output is markdown ONLY. No HTML, no inline SVG, no JS. -4. Bibliography rows in `sources.md` use the same `[^src-NNN]` format as the rest of the wiki. +4. Bibliography rows in `sources.md` use the slug-style id `src-bib-<slug>` (derived from the bibliography `key`); cite as `[^src-bib-<slug>]`. Manual non-bibliography sources continue to use `src-NNN`. 5. Iterate on inputs (`research_bundle.json`), not on the generated output. 6. Provider+model selection is the user's call — never hardcode. diff --git a/skills/survey-generator/scripts/build-survey.js b/skills/survey-generator/scripts/build-survey.js index 1ec554a..7ce9c35 100755 --- a/skills/survey-generator/scripts/build-survey.js +++ b/skills/survey-generator/scripts/build-survey.js @@ -29,7 +29,7 @@ function getStore() { return require(distPath).createStore(); } -function postJSON(urlStr, body, headers) { +function postJSON(urlStr, body, headers, timeoutMs = 180000) { return new Promise((resolve, reject) => { const url = new URL(urlStr); const data = JSON.stringify(body); @@ -43,6 +43,7 @@ function postJSON(urlStr, body, headers) { res.on('data', c => { chunks += c; }); res.on('end', () => resolve({ status: res.statusCode, body: chunks })); }); + req.setTimeout(timeoutMs, () => req.destroy(new Error('survey request timeout'))); req.on('error', reject); req.write(data); req.end(); @@ -155,6 +156,10 @@ async function cmdRun(args) { const bundle = JSON.parse(fs.readFileSync(bundlePath, 'utf8')); if (!bundle.topic || !Array.isArray(bundle.bibliography)) die('bundle missing topic or bibliography[]'); + const invalid = bundle.bibliography.find( + b => !b || typeof b.key !== 'string' || !b.key.trim() || typeof b.title !== 'string' || !b.title.trim() + ); + if (invalid) die('bundle bibliography[] entries must include non-empty string key and title'); const providerName = pickProvider(args.provider); if (!providerName) die('no provider env var set'); @@ -187,7 +192,7 @@ async function cmdRun(args) { try { execFileSync('node', [wikiCli, 'page', slug, relPath, '--type', 'survey'], { stdio: 'inherit' }); } catch (e) { - console.error('[survey] wiki-cli page failed:', e.message); + die(`wiki-cli page failed: ${e.message}`); } console.log(JSON.stringify({ slug, file: fileAbs, version: v, bibliography_added: added }, null, 2)); } diff --git a/skills/wiki-builder/scripts/init_wiki.sh b/skills/wiki-builder/scripts/init_wiki.sh index 94e6794..c9a7cf1 100755 --- a/skills/wiki-builder/scripts/init_wiki.sh +++ b/skills/wiki-builder/scripts/init_wiki.sh @@ -18,6 +18,11 @@ EOF if [ $# -lt 1 ]; then usage; exit 1; fi slug="$1"; shift +if ! [[ "$slug" =~ ^[a-z0-9][a-z0-9-]*$ ]]; then + echo "Invalid slug: lowercase letters, digits, hyphens only (must start alphanumeric)" >&2 + exit 1 +fi + while [ $# -gt 0 ]; do case "$1" in --title) title="$2"; shift 2 ;; @@ -31,6 +36,16 @@ done if [ -z "$title" ]; then echo "--title required" >&2; exit 1; fi +case "$scope" in + global|project) ;; + *) echo "Invalid --scope: $scope (expected global|project)" >&2; exit 1 ;; +esac + +case "$flavor" in + research|paper|domain|product|person|organization|project|codebase|incident) ;; + *) echo "Invalid --flavor: $flavor" >&2; exit 1 ;; +esac + if [ -z "$root" ]; then if [ "$scope" = "project" ]; then project_dir="${CLAUDE_PROJECT_DIR:-$PWD}" diff --git a/skills/wiki-builder/scripts/wiki-cli.js b/skills/wiki-builder/scripts/wiki-cli.js index d7646a2..eb7b57d 100755 --- a/skills/wiki-builder/scripts/wiki-cli.js +++ b/skills/wiki-builder/scripts/wiki-cli.js @@ -108,7 +108,11 @@ function cmdPage(args) { const wiki = store.getWiki(slug); if (!wiki) die(`unknown wiki: ${slug}. Run: wiki-cli.js init ${slug} --title "..."`); - const fileAbs = path.join(wiki.root_path, relPath); + const rootAbs = path.resolve(wiki.root_path); + const fileAbs = path.resolve(wiki.root_path, relPath); + if (fileAbs !== rootAbs && !fileAbs.startsWith(rootAbs + path.sep)) { + die(`rel-path escapes wiki root: ${relPath}`); + } let content = ''; if (args['from-file']) { content = fs.readFileSync(args['from-file'], 'utf8'); diff --git a/skills/wiki-research-loop/scripts/research-loop.js b/skills/wiki-research-loop/scripts/research-loop.js index d5fac5c..df622aa 100755 --- a/skills/wiki-research-loop/scripts/research-loop.js +++ b/skills/wiki-research-loop/scripts/research-loop.js @@ -59,11 +59,11 @@ function readWikiConfig(rootPath) { const cfgPath = path.join(rootPath, 'wiki.config.md'); if (!fs.existsSync(cfgPath)) return {}; const raw = fs.readFileSync(cfgPath, 'utf8'); - const m = raw.match(/^---\s*\n([\s\S]*?)\n---/); + const m = raw.match(/^---\s*\r?\n([\s\S]*?)\r?\n---/); if (!m) return {}; const obj = {}; let nested = null; - for (const line of m[1].split('\n')) { + for (const line of m[1].split(/\r?\n/)) { if (!line.trim()) continue; const indent = line.match(/^(\s*)/)[1].length; const trimmed = line.trim(); @@ -196,66 +196,81 @@ async function runOne(slug, args) { while (stats.pages < maxPages) { if (fs.existsSync(STOP_FILE)) { stats.halted = 'kill-switch'; break; } - const seed = store.nextPendingSeed(slug); + const seed = store.claimPendingSeed(slug); if (!seed) { stats.halted = 'queue-empty'; break; } if (seed.depth > maxDepth) { store.setSeedStatus(seed.id, 'done'); continue; } - store.setSeedStatus(seed.id, 'active'); - - const docs = []; - for (const [name, fetcher] of Object.entries(fetchers)) { - try { - if (!fetcher.match(seed.query)) continue; - const cost = fetcher.estimateCost ? fetcher.estimateCost(seed.query) : { usd: 0 }; - if (stats.cost_usd + (cost.usd || 0) > budget) { stats.halted = 'budget'; break; } - const hits = await fetcher.fetch(seed.query, { limit: 3 }); - docs.push(...hits); - stats.cost_usd += cost.usd || 0; - stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} hits=${hits.length}`); - } catch (e) { - stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} ERROR ${e.message}`); + + let finalStatus = 'done'; + let shouldBreak = false; + try { + const docs = []; + for (const [name, fetcher] of Object.entries(fetchers)) { + try { + if (!fetcher.match(seed.query)) continue; + const cost = fetcher.estimateCost ? fetcher.estimateCost(seed.query) : { usd: 0 }; + if (stats.cost_usd + (cost.usd || 0) > budget) { + stats.halted = 'budget'; + finalStatus = 'pending'; + shouldBreak = true; + break; + } + const hits = await fetcher.fetch(seed.query, { limit: 3 }); + docs.push(...hits); + stats.cost_usd += cost.usd || 0; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} hits=${hits.length}`); + } catch (e) { + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} fetcher=${name} ERROR ${e.message}`); + } } - } - if (stats.halted === 'budget') break; + if (shouldBreak) continue; - const compiled = compilePage(seed, docs, prevPages); - if (!compiled) { - store.setSeedStatus(seed.id, 'failed'); - stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} no usable claims`); - continue; - } + const compiled = compilePage(seed, docs, prevPages); + if (!compiled) { + finalStatus = 'failed'; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} no usable claims`); + continue; + } - const relPath = path.join('wiki', 'questions', `${slugify(seed.query)}.md`); - const fileAbs = path.join(wiki.root_path, relPath); - fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); - fs.writeFileSync(fileAbs, compiled.content); - - const row = store.upsertWikiPage({ - wiki_slug: slug, - rel_path: relPath, - title: seed.query, - summary: compiled.content.slice(0, 500), - content: compiled.content, - page_type: 'question', - content_hash: crypto.createHash('sha256').update(compiled.content).digest('hex').slice(0, 16), - }); - prevPages.push(row); - stats.pages++; - stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} compiled ${relPath} novelty=${compiled.novelty.toFixed(2)}`); - - if (compiled.novelty < 0.05) convergeStreak++; - else convergeStreak = 0; - if (convergeStreak >= 3) { stats.halted = 'converged'; break; } - - const followUps = deriveFollowUps(seed, compiled); - for (const q of followUps) { - if (seed.depth + 1 > maxDepth) continue; - store.enqueueSeed({ wiki_slug: slug, query: q, parent_id: seed.id, depth: seed.depth + 1 }); + const relPath = path.join('wiki', 'questions', `${slugify(seed.query)}.md`); + const fileAbs = path.join(wiki.root_path, relPath); + fs.mkdirSync(path.dirname(fileAbs), { recursive: true }); + fs.writeFileSync(fileAbs, compiled.content); + + const row = store.upsertWikiPage({ + wiki_slug: slug, + rel_path: relPath, + title: seed.query, + summary: compiled.content.slice(0, 500), + content: compiled.content, + page_type: 'question', + content_hash: crypto.createHash('sha256').update(compiled.content).digest('hex').slice(0, 16), + }); + prevPages.push(row); + stats.pages++; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} compiled ${relPath} novelty=${compiled.novelty.toFixed(2)}`); + + if (compiled.novelty < 0.05) convergeStreak++; + else convergeStreak = 0; + if (convergeStreak >= 3) { stats.halted = 'converged'; shouldBreak = true; } + + const followUps = deriveFollowUps(seed, compiled); + for (const q of followUps) { + if (seed.depth + 1 > maxDepth) continue; + store.enqueueSeed({ wiki_slug: slug, query: q, parent_id: seed.id, depth: seed.depth + 1 }); + } + } catch (e) { + finalStatus = 'failed'; + stats.log.push(`[${new Date().toISOString()}] seed-${seed.id} ERROR ${e.message}`); + } finally { + store.setSeedStatus(seed.id, finalStatus); } - store.setSeedStatus(seed.id, 'done'); + if (shouldBreak) break; } fs.writeFileSync(logFile, ['# Research run ' + ts, '', ...stats.log].join('\n')); - fs.writeFileSync(path.join(wiki.root_path, 'derived', `run-${ts}.json`), JSON.stringify(stats, null, 2)); + const derivedDir = path.join(wiki.root_path, 'derived'); + fs.mkdirSync(derivedDir, { recursive: true }); + fs.writeFileSync(path.join(derivedDir, `run-${ts}.json`), JSON.stringify(stats, null, 2)); return stats; } finally { store.close(); diff --git a/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js index 6a730cb..4186697 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js @@ -1,15 +1,19 @@ const https = require('https'); -function httpsGet(url) { +function httpsGet(url, redirects = 0) { return new Promise((resolve, reject) => { - https.get(url, { headers: { 'User-Agent': 'pro-workflow/wiki-research-loop' } }, res => { + if (redirects > 5) return reject(new Error('Too many redirects')); + const req = https.get(url, { headers: { 'User-Agent': 'pro-workflow/wiki-research-loop' } }, res => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { - return httpsGet(res.headers.location).then(resolve, reject); + res.resume(); + return httpsGet(res.headers.location, redirects + 1).then(resolve, reject); } let data = ''; res.on('data', c => { data += c; }); res.on('end', () => resolve({ status: res.statusCode, body: data })); - }).on('error', reject); + }); + req.setTimeout(15000, () => req.destroy(new Error('arxiv fetch timeout'))); + req.on('error', reject); }); } diff --git a/skills/wiki-research-loop/scripts/source-fetchers/github.js b/skills/wiki-research-loop/scripts/source-fetchers/github.js index d2ca873..e909d97 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/github.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/github.js @@ -1,16 +1,20 @@ const https = require('https'); -function httpsGet(url, headers = {}) { +function httpsGet(url, headers = {}, redirects = 0) { return new Promise((resolve, reject) => { + if (redirects > 5) return reject(new Error('Too many redirects')); const opts = { headers: { 'User-Agent': 'pro-workflow/wiki-research-loop', Accept: 'application/vnd.github+json', ...headers } }; - https.get(url, opts, res => { + const req = https.get(url, opts, res => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { - return httpsGet(res.headers.location, headers).then(resolve, reject); + res.resume(); + return httpsGet(res.headers.location, headers, redirects + 1).then(resolve, reject); } let data = ''; res.on('data', c => { data += c; }); res.on('end', () => resolve({ status: res.statusCode, body: data })); - }).on('error', reject); + }); + req.setTimeout(15000, () => req.destroy(new Error('github fetch timeout'))); + req.on('error', reject); }); } diff --git a/skills/wiki-research-loop/scripts/source-fetchers/web.js b/skills/wiki-research-loop/scripts/source-fetchers/web.js index 62e44ea..b9b8e7d 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/web.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/web.js @@ -1,8 +1,9 @@ const https = require('https'); const { URL } = require('url'); -function httpsGet(url, headers = {}) { +function httpsGet(url, headers = {}, redirects = 0) { return new Promise((resolve, reject) => { + if (redirects > 5) return reject(new Error('Too many redirects')); const u = new URL(url); const opts = { hostname: u.hostname, @@ -14,15 +15,18 @@ function httpsGet(url, headers = {}) { ...headers, }, }; - https.get(opts, res => { + const req = https.get(opts, res => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { - const loc = res.headers.location.startsWith('http') ? res.headers.location : `https://${u.hostname}${res.headers.location}`; - return httpsGet(loc, headers).then(resolve, reject); + res.resume(); + const loc = new URL(res.headers.location, u).toString(); + return httpsGet(loc, headers, redirects + 1).then(resolve, reject); } let data = ''; res.on('data', c => { data += c; }); res.on('end', () => resolve({ status: res.statusCode, body: data })); - }).on('error', reject); + }); + req.setTimeout(15000, () => req.destroy(new Error('web fetch timeout'))); + req.on('error', reject); }); } diff --git a/src/db/index.ts b/src/db/index.ts index 094d443..eb7f26e 100644 --- a/src/db/index.ts +++ b/src/db/index.ts @@ -28,16 +28,21 @@ export function initializeDatabase(dbPath: string = DEFAULT_DB_PATH): Database.D db.pragma('journal_mode = WAL'); db.pragma('foreign_keys = ON'); - const candidates = [ - path.join(__dirname, 'schema.sql'), - path.join(__dirname, '..', '..', 'src', 'db', 'schema.sql'), - ]; - const schemaPath = candidates.find(p => fs.existsSync(p)); - if (!schemaPath) { - throw new Error(`pro-workflow: schema.sql not found. Tried: ${candidates.join(', ')}. Run: npm run build`); + try { + const candidates = [ + path.join(__dirname, 'schema.sql'), + path.join(__dirname, '..', '..', 'src', 'db', 'schema.sql'), + ]; + const schemaPath = candidates.find(p => fs.existsSync(p)); + if (!schemaPath) { + throw new Error(`pro-workflow: schema.sql not found. Tried: ${candidates.join(', ')}. Run: npm run build`); + } + const schema = fs.readFileSync(schemaPath, 'utf8'); + db.exec(schema); + } catch (err) { + db.close(); + throw err; } - const schema = fs.readFileSync(schemaPath, 'utf8'); - db.exec(schema); return db; } diff --git a/src/db/store.ts b/src/db/store.ts index 0cd7e14..1560347 100644 --- a/src/db/store.ts +++ b/src/db/store.ts @@ -103,6 +103,7 @@ export interface Store { enqueueSeed: (seed: Omit<WikiSeed, 'id' | 'created_at' | 'status'> & { status?: WikiSeed['status'] }) => WikiSeed; nextPendingSeed: (wikiSlug: string) => WikiSeed | undefined; + claimPendingSeed: (wikiSlug: string) => WikiSeed | undefined; setSeedStatus: (id: number, status: WikiSeed['status']) => void; } @@ -231,6 +232,17 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { SELECT * FROM wiki_seeds WHERE wiki_slug = ? AND status = 'pending' ORDER BY depth ASC, created_at ASC LIMIT 1 `); + const claimPendingSeedStmt = db.prepare(` + UPDATE wiki_seeds + SET status = 'active' + WHERE id = ( + SELECT id FROM wiki_seeds + WHERE wiki_slug = ? AND status = 'pending' + ORDER BY depth ASC, created_at ASC + LIMIT 1 + ) + RETURNING * + `); const setSeedStatusStmt = db.prepare(`UPDATE wiki_seeds SET status = ? WHERE id = ?`); const linkLearningWikiStmt = db.prepare(` @@ -243,23 +255,25 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { ORDER BY l.created_at DESC `); + const addLearningTx = db.transaction((learning: Omit<Learning, 'id' | 'created_at' | 'times_applied'>, wikiSlug?: string) => { + const result = addLearningStmt.run({ + project: learning.project ?? null, + category: learning.category, + rule: learning.rule, + mistake: learning.mistake ?? null, + correction: learning.correction ?? null, + }); + const row = getLearningStmt.get(result.lastInsertRowid) as Learning; + if (wikiSlug) linkLearningWikiStmt.run(row.id, wikiSlug); + return row; + }); + return { db, close: () => db.close(), addLearning(learning, wikiSlug) { - const result = addLearningStmt.run({ - project: learning.project ?? null, - category: learning.category, - rule: learning.rule, - mistake: learning.mistake ?? null, - correction: learning.correction ?? null, - }); - const row = getLearningStmt.get(result.lastInsertRowid) as Learning; - if (wikiSlug) { - linkLearningWikiStmt.run(row.id, wikiSlug); - } - return row; + return addLearningTx(learning, wikiSlug); }, getLearning(id) { @@ -389,6 +403,10 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { return nextPendingSeedStmt.get(wikiSlug) as WikiSeed | undefined; }, + claimPendingSeed(wikiSlug) { + return claimPendingSeedStmt.get(wikiSlug) as WikiSeed | undefined; + }, + setSeedStatus(id, status) { setSeedStatusStmt.run(status, id); }, @@ -405,8 +423,11 @@ const STOPWORDS = new Set([ function sanitizeFtsQuery(input: string, loose = false): string { const trimmed = input.trim(); if (!trimmed) return ''; - const tokens = trimmed.split(/\s+/) - .map(t => t.replace(/[^A-Za-z0-9_]/g, '').toLowerCase()) + const tokens = trimmed + .toLowerCase() + .replace(/[^a-z0-9_]+/g, ' ') + .trim() + .split(/\s+/) .filter(t => t.length >= 2 && !STOPWORDS.has(t)); if (!tokens.length) return ''; if (loose) { diff --git a/src/search/embeddings.ts b/src/search/embeddings.ts index 91f002d..fd16402 100644 --- a/src/search/embeddings.ts +++ b/src/search/embeddings.ts @@ -32,6 +32,7 @@ function postJSON(urlStr: string, body: unknown, headers: Record<string, string> res.on('data', c => { chunks += c; }); res.on('end', () => resolve({ status: res.statusCode || 0, body: chunks })); }); + req.setTimeout(30000, () => req.destroy(new Error('embedding request timeout'))); req.on('error', reject); req.write(data); req.end(); @@ -91,9 +92,9 @@ export function upsertEmbedding(db: Database.Database, pageId: number, provider: } function cosine(a: Float32Array, b: Float32Array): number { + if (a.length !== b.length) return 0; let dot = 0, na = 0, nb = 0; - const len = Math.min(a.length, b.length); - for (let i = 0; i < len; i++) { + for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; na += a[i] * a[i]; nb += b[i] * b[i]; @@ -109,13 +110,15 @@ export interface VectorHit { export function vectorSearch(db: Database.Database, queryVec: Float32Array, opts: { wikiSlug?: string; limit?: number } = {}): VectorHit[] { const limit = opts.limit ?? 10; + const dim = queryVec.length; const sql = opts.wikiSlug - ? `SELECT e.page_id, e.vector FROM wiki_embeddings e JOIN wiki_pages p ON p.id = e.page_id WHERE p.wiki_slug = ?` - : `SELECT e.page_id, e.vector FROM wiki_embeddings e`; - const rows = opts.wikiSlug ? db.prepare(sql).all(opts.wikiSlug) : db.prepare(sql).all(); + ? `SELECT e.page_id, e.vector FROM wiki_embeddings e JOIN wiki_pages p ON p.id = e.page_id WHERE p.wiki_slug = ? AND e.dim = ?` + : `SELECT e.page_id, e.vector FROM wiki_embeddings e WHERE e.dim = ?`; + const rows = opts.wikiSlug ? db.prepare(sql).all(opts.wikiSlug, dim) : db.prepare(sql).all(dim); const scored: VectorHit[] = []; for (const r of rows as { page_id: number; vector: Buffer }[]) { const v = blobToF32(r.vector); + if (v.length !== dim) continue; scored.push({ page_id: r.page_id, similarity: cosine(queryVec, v) }); } scored.sort((a, b) => b.similarity - a.similarity); From a6d4b3a16f5554ce5b14e18e83c0fc5bb57dd1a3 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare <ghumare64@gmail.com> Date: Sat, 9 May 2026 11:13:49 +0100 Subject: [PATCH 4/4] fix(coderabbit-r2): citation alignment, bib validation, sources header, fence languages, fetcher safety, slug-collision guard, README + infographic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit build-survey.js: - Citation IDs now align with sources.md row IDs: prompt receives bibliography entries with citation_id field (src-bib-<slug>) and sections with paper_citation_ids. References reuse citation_id. - Validate bibliography uniqueness; validate sections[].papers references exist in bibliography before any model call. - sources.md gets a proper markdown table header + separator when the file is empty or lacks the header row. survey SKILL.md: - Fence languages: ```text for tree, ```bash for command examples (markdownlint MD040). - Reference example shows src-bib-<slug> citation form. source-fetchers: - arxiv/github/web all wrap fetch in try/catch returning [] on error so transient failures don't abort the loop. - All fetchers use opts.limit ?? 3 (preserves explicit 0 if ever passed; previous || coerced 0 to 3). - web.js: chooses http vs https client based on url.protocol on every call (fixes redirects from https → http). - web.js: bounded body (4 MiB MAX_BODY_BYTES) + 30s body-read deadline; both clean up listeners + destroy socket on overflow or timeout to prevent leaks. src/db/store.ts + wiki-cli.js: - upsertWiki refuses to register a slug that already exists at a different (scope, root_path); raises a clean error so two repos with the same project-scope slug don't silently overwrite each other's registration. - wiki-cli.js init wraps the upsert in try/catch so the user sees a friendly error instead of a stack trace. src/db/schema.sql: - Comment documents that wiki identity is enforced at the application layer for FK simplicity. README.md: - Adds 60-second wiki tour after Install (init → page → ask → seed → research → embed → hybrid → council). - Counts updated to 33 skills / 22 commands / 37 hook scripts to reflect upstream merges (token-efficiency etc.) plus v3.3 work. docs/infographic.html: - Version display v3.2 → v3.3. - Title block + components grid + section 10 reflect the wiki knowledge plane and the new counts. --- README.md | 34 +++++++++++- docs/infographic.html | 37 +++++++------ skills/survey-generator/SKILL.md | 6 +-- .../survey-generator/scripts/build-survey.js | 53 ++++++++++++++----- skills/wiki-builder/scripts/wiki-cli.js | 2 + .../scripts/source-fetchers/arxiv.js | 12 +++-- .../scripts/source-fetchers/github.js | 34 ++++++------ .../scripts/source-fetchers/web.js | 43 +++++++++++---- src/db/schema.sql | 4 ++ src/db/store.ts | 10 +++- 10 files changed, 170 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index a8ec2c1..01dd5b2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ <p align="center"> Self-correcting memory that compounds over 50+ sessions. You correct Claude once — it never makes the same mistake again. Persistent research wikis indexed in FTS5 surface relevant prior work the moment you ask. Auto-research loop grows the knowledge base while you sleep.<br/> - <b>29 skills</b> • <b>8 agents</b> • <b>22 commands</b> • <b>31 hook scripts across 24 events</b><br/> + <b>33 skills</b> • <b>8 agents</b> • <b>22 commands</b> • <b>37 hook scripts across 24 events</b><br/> Works with <b>Claude Code</b>, <b>Cursor</b>, and <b>32+ agents</b> via SkillKit. </p> @@ -75,6 +75,36 @@ cd ~/.claude/plugins/*/pro-workflow && npm install && npm run build --- +## Wiki Knowledge Base — 60-second tour + +Auto-grow a persistent FTS5-indexed research wiki next to your code: + +```bash +# Scaffold +/wiki init agent-memory --title "Agent Memory" --flavor research + +# Add a page (any .md you wrote — file gets FTS-indexed) +/wiki page agent-memory wiki/concepts/episodic-memory.md --type concept + +# Ask the wiki anything +/wiki ask "what is episodic memory" --wiki agent-memory + +# Auto-grow it: queue a seed, run the loop, watch it write pages + enqueue follow-ups +/wiki seed agent-memory "memory consolidation in agents" +/wiki research agent-memory --max-pages 5 --budget-usd 0.50 + +# Hybrid retrieval (BM25 + vector RRF) — needs OPENAI_API_KEY or VOYAGE_API_KEY +/wiki embed agent-memory +/wiki hybrid "consolidation patterns" --wiki agent-memory + +# Multi-LLM deliberation, transcript persisted as a wiki page +/wiki council "should we adopt episodic memory?" --wiki agent-memory +``` + +Every wiki hit auto-loads on `UserPromptSubmit` when prompts mention indexed topics. Kill-switch: `touch ~/.pro-workflow/STOP`. + +--- + ## What's New in v3.3 Persistent knowledge plane on top of the self-correction memory. @@ -118,7 +148,7 @@ Persistent knowledge plane on top of the self-correction memory. | Cost tracking and budget alerts | **Yes** | No | No | No | No | | MCP overhead auditing | **Yes** | No | No | No | No | | Cross-agent (32+ agents via SkillKit) | **Yes** | No | Some | No | No | -| Skills | 29 | 14 | 140+ | 18+ | 0 | +| Skills | 33 | 14 | 140+ | 18+ | 0 | | Agents | 8 | 5 | 36 | 0 | 18 | | Commands | 22 | 3 | 60+ | 5+ | 57 | | Hook Events | 24 | 8 | 18 | 0 | 0 | diff --git a/docs/infographic.html b/docs/infographic.html index 850b825..121eaf7 100644 --- a/docs/infographic.html +++ b/docs/infographic.html @@ -680,7 +680,7 @@ <div class="header-sub" style="margin-left: 18px;">github.com/rohitg00/pro-workflow · <a href="https://rohitg00-pro-workflow.mintlify.app/" style="color:var(--anthro-coral); text-decoration:none;">docs</a></div> </div> <div class="header-right"> - <div class="star-count">v3.2</div> + <div class="star-count">v3.3</div> <div class="star-label">Latest Release</div> </div> </div> @@ -690,8 +690,8 @@ <div class="title">Pro Workflow</div> <div class="subtitle"> Battle-tested Claude Code patterns from power users who ship production code daily. - Self-correcting memory, parallel worktrees, quality gates, and the 80/20 AI coding ratio — - 24 Skills, 8 Agents, 24 Hooks, 21 Commands, 3 Contexts, 7 Guides. + Self-correcting memory, persistent FTS5-indexed wikis, auto-research loop, multi-LLM council, parallel worktrees, quality gates — + 33 Skills, 8 Agents, 24 Hook events, 22 Commands, 3 Contexts, 7 Guides. </div> </div> @@ -751,10 +751,10 @@ <div class="section-num">03 Components</div> <div class="section-title">Everything Included</div> <div class="comp-grid" style="grid-template-columns: repeat(4, 1fr);"> - <div class="comp-item"><div class="comp-num">24</div><div class="comp-label">Skills</div></div> + <div class="comp-item"><div class="comp-num">33</div><div class="comp-label">Skills</div></div> <div class="comp-item"><div class="comp-num">8</div><div class="comp-label">Agents</div></div> - <div class="comp-item"><div class="comp-num">24</div><div class="comp-label">Hooks</div></div> - <div class="comp-item"><div class="comp-num">21</div><div class="comp-label">Commands</div></div> + <div class="comp-item"><div class="comp-num">24</div><div class="comp-label">Hook events</div></div> + <div class="comp-item"><div class="comp-num">22</div><div class="comp-label">Commands</div></div> <div class="comp-item"><div class="comp-num">3</div><div class="comp-label">Contexts</div></div> <div class="comp-item"><div class="comp-num">7</div><div class="comp-label">Rules</div></div> <div class="comp-item"><div class="comp-num">29</div><div class="comp-label">Scripts</div></div> @@ -1161,13 +1161,18 @@ <!-- 10 SKILLS --> <div> <div class="section-num">10 Skills Library</div> - <div class="section-title">24 Skills</div> + <div class="section-title">33 Skills</div> <table> <tr><th>Skill</th><th>What It Does</th></tr> <tr><td><strong>pro-workflow</strong></td><td>Core 8 patterns</td></tr> + <tr style="background:var(--anthro-light-coral);"><td><strong>wiki-builder</strong></td><td>Persistent FTS5-indexed research wikis (9 flavors)</td></tr> + <tr style="background:var(--anthro-light-coral);"><td><strong>wiki-query</strong></td><td>BM25 retrieval, snippets, related/show</td></tr> + <tr style="background:var(--anthro-light-coral);"><td><strong>wiki-research-loop</strong></td><td>Budget-capped BFS auto-research over web/arXiv/GH</td></tr> + <tr style="background:var(--anthro-light-coral);"><td><strong>llm-council</strong></td><td>Provider-agnostic 3-phase multi-LLM deliberation</td></tr> + <tr style="background:var(--anthro-light-coral);"><td><strong>survey-generator</strong></td><td>Provider-agnostic literature survey to wiki page</td></tr> <tr><td><strong>smart-commit</strong></td><td>Quality gates + conventional commits</td></tr> <tr><td><strong>wrap-up</strong></td><td>Session ritual with learning capture</td></tr> - <tr><td><strong>learn-rule</strong></td><td>Persist corrections to memory</td></tr> + <tr><td><strong>learn-rule</strong></td><td>Persist corrections to memory (wiki-scoped)</td></tr> <tr><td><strong>parallel-worktrees</strong></td><td>Git worktree setup for zero dead time</td></tr> <tr><td><strong>replay-learnings</strong></td><td>Surface relevant past patterns</td></tr> <tr><td><strong>session-handoff</strong></td><td>Resume docs for next session</td></tr> @@ -1175,15 +1180,15 @@ <tr><td><strong>deslop</strong></td><td>Remove AI code slop before commit</td></tr> <tr><td><strong>context-optimizer</strong></td><td>Token management and MCP audit</td></tr> <tr><td><strong>orchestrate</strong></td><td>Multi-phase dev workflow</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>permission-tuner</strong></td><td>Optimize allow/deny rules</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>compact-guard</strong></td><td>Protect context through compaction</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>cost-tracker</strong></td><td>Session cost awareness + budgets</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>auto-setup</strong></td><td>Auto-configure quality gates</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>mcp-audit</strong></td><td>MCP overhead analysis</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>llm-gate</strong></td><td>AI-powered quality gates</td></tr> - <tr style="background:var(--anthro-light-coral);"><td><strong>file-watcher</strong></td><td>Reactive file change workflows</td></tr> + <tr><td><strong>permission-tuner</strong></td><td>Optimize allow/deny rules</td></tr> + <tr><td><strong>compact-guard</strong></td><td>Protect context through compaction</td></tr> + <tr><td><strong>cost-tracker</strong></td><td>Session cost awareness + budgets</td></tr> + <tr><td><strong>auto-setup</strong></td><td>Auto-configure quality gates</td></tr> + <tr><td><strong>mcp-audit</strong></td><td>MCP overhead analysis</td></tr> + <tr><td><strong>llm-gate</strong></td><td>AI-powered quality gates</td></tr> + <tr><td><strong>file-watcher</strong></td><td>Reactive file change workflows</td></tr> </table> - <div style="font-size:10px; color:var(--anthro-muted); margin-top:6px;">+ 7 more: context-engineering, batch-orchestration, thoroughness-scoring, sprint-status, agent-teams, safe-mode</div> + <div style="font-size:10px; color:var(--anthro-muted); margin-top:6px;">Coral rows = new in v3.3 (knowledge plane). + 10 more: context-engineering, batch-orchestration, thoroughness-scoring, sprint-status, agent-teams, safe-mode, token-efficiency, anti-sycophancy, tool-call-budget, design-rules</div> </div> <!-- 11 MODEL SELECTION --> diff --git a/skills/survey-generator/SKILL.md b/skills/survey-generator/SKILL.md index fe2d66d..d049df0 100644 --- a/skills/survey-generator/SKILL.md +++ b/skills/survey-generator/SKILL.md @@ -95,7 +95,7 @@ Each writes a separate versioned file; diff them. ## Output structure -``` +```text <wiki-root>/ ├── sources.md # bibliography rows appended (deduped) └── derived/surveys/ @@ -105,7 +105,7 @@ Each writes a separate versioned file; diff them. # ## 2. Foundations # ... # ## References - # [^paper-key] author year. title. venue. + # [^src-bib-<slug>] author year. title. venue. ``` ## Hard rules @@ -119,7 +119,7 @@ Each writes a separate versioned file; diff them. ## Composing with research loop -``` +```bash /wiki init reasoning-models --title "Reasoning Models" --flavor research # Manually compile a research_bundle.json node skills/survey-generator/scripts/build-survey.js --bundle bundle.json --wiki reasoning-models diff --git a/skills/survey-generator/scripts/build-survey.js b/skills/survey-generator/scripts/build-survey.js index 7ce9c35..fb7643f 100755 --- a/skills/survey-generator/scripts/build-survey.js +++ b/skills/survey-generator/scripts/build-survey.js @@ -86,6 +86,10 @@ async function callProvider(providerName, model, system, user, maxTokens) { function slugify(s) { return s.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '').slice(0, 60); } +function bibCitationId(key) { + return `src-bib-${slugify(key)}`; +} + function appendBibliographyToSources(wikiRoot, bibliography) { const file = path.join(wikiRoot, 'sources.md'); let existing = ''; @@ -95,14 +99,18 @@ function appendBibliographyToSources(wikiRoot, bibliography) { const newRows = []; for (const b of bibliography) { - const id = `src-bib-${slugify(b.key)}`; + const id = bibCitationId(b.key); if (seenKeys.has(id)) continue; const url = b.url || (b.venue && b.venue.startsWith('arXiv:') ? `https://arxiv.org/abs/${b.venue.slice(6)}` : ''); newRows.push(`| ${id} | paper | ${url} | ${b.title.replace(/\|/g, '\\|')} | ${b.key} | ${new Date().toISOString().slice(0, 10)} |`); } if (!newRows.length) return 0; - if (!existing.includes('| id | type |')) { - fs.writeFileSync(file, existing + (existing.endsWith('\n') ? '' : '\n') + newRows.join('\n') + '\n'); + + const tableHeader = '| id | type | url | title | key | added_at |\n| --- | --- | --- | --- | --- | --- |'; + const hasHeader = existing.includes('| id | type |'); + if (!hasHeader) { + const prefix = existing.length ? (existing.endsWith('\n') ? existing : existing + '\n') : ''; + fs.writeFileSync(file, `${prefix}${tableHeader}\n${newRows.join('\n')}\n`); } else { fs.writeFileSync(file, existing.trimEnd() + '\n' + newRows.join('\n') + '\n'); } @@ -121,30 +129,35 @@ function nextVersion(dir, baseSlug) { } function buildPrompt(bundle) { + const bibWithIds = bundle.bibliography.map(b => ({ ...b, citation_id: bibCitationId(b.key) })); + const sectionsWithIds = bundle.sections.map(s => ({ + ...s, + paper_citation_ids: (s.papers || []).map(k => bibCitationId(k)), + })); return `Compile a literature survey on the topic "${bundle.topic}" using ONLY the bibliography provided. Output strict markdown: - H1 = topic title - Numbered H2 sections following the provided sections list -- Inline citations as [^${'<paper-key>'}] referencing entries from the bibliography -- A "## References" section at the end listing every cited [^key] with: key, authors, year, title, venue, one-sentence summary +- Inline citations as [^citation_id] using the EXACT citation_id from the bibliography below (e.g., [^src-bib-park-2023-generative-agents]) +- A "## References" section at the end listing every cited [^citation_id] with: citation_id, authors, year, title, venue, one-sentence summary - No HTML, no SVG, no inline images - ~600-1200 words per section, scaled by bibliography size -- For each section, weave together the papers in section[].papers; do not just list them +- For each section, weave together the papers in section.paper_citation_ids; do not just list them -Bibliography (USE THESE KEYS EXACTLY): -${JSON.stringify(bundle.bibliography, null, 2)} +Bibliography (USE THE citation_id FIELD EXACTLY for inline citations): +${JSON.stringify(bibWithIds, null, 2)} -Sections to produce in order: -${JSON.stringify(bundle.sections, null, 2)} +Sections to produce in order (use paper_citation_ids for citations): +${JSON.stringify(sectionsWithIds, null, 2)} Anchor (context only, do not cite): ${bundle.anchor_source || ''} Hard rules: - Cite real papers from the bibliography only. Do not invent. -- Every section that lists papers MUST cite each one at least once. -- Use [^paper-key] for inline citations. The References section reuses these keys. +- Every section that lists papers MUST cite each one at least once via its citation_id. +- Use [^citation_id] for inline citations. The References section reuses these citation_id values. - Do not write any prose under the H1; start sections immediately.`; } @@ -161,6 +174,22 @@ async function cmdRun(args) { ); if (invalid) die('bundle bibliography[] entries must include non-empty string key and title'); + const bibKeys = new Set(); + for (const b of bundle.bibliography) { + if (bibKeys.has(b.key)) die(`duplicate bibliography key: ${b.key}`); + bibKeys.add(b.key); + } + + if (Array.isArray(bundle.sections)) { + for (const [i, s] of bundle.sections.entries()) { + if (!Array.isArray(s.papers)) continue; + for (const k of s.papers) { + if (typeof k !== 'string' || !k.trim()) die(`sections[${i}].papers contains non-string entry`); + if (!bibKeys.has(k)) die(`sections[${i}].papers references unknown bibliography key: ${k}`); + } + } + } + const providerName = pickProvider(args.provider); if (!providerName) die('no provider env var set'); const model = args.model || PROVIDER_DEFAULTS[providerName].model; diff --git a/skills/wiki-builder/scripts/wiki-cli.js b/skills/wiki-builder/scripts/wiki-cli.js index eb7b57d..2814ea9 100755 --- a/skills/wiki-builder/scripts/wiki-cli.js +++ b/skills/wiki-builder/scripts/wiki-cli.js @@ -64,6 +64,8 @@ function cmdInit(args) { const store = getStore(); try { store.upsertWiki({ slug, title, flavor, root_path: dest, scope }); + } catch (e) { + die(e.message); } finally { store.close(); } diff --git a/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js index 4186697..bb1c873 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/arxiv.js @@ -42,11 +42,15 @@ module.exports = { match: () => true, estimateCost: () => ({ usd: 0, tokens: 0 }), async fetch(query, opts = {}) { - const limit = opts.limit || 3; + const limit = opts.limit ?? 3; const q = encodeURIComponent(query); const url = `https://export.arxiv.org/api/query?search_query=all:${q}&start=0&max_results=${limit}`; - const res = await httpsGet(url); - if (res.status !== 200) return []; - return parseEntries(res.body); + try { + const res = await httpsGet(url); + if (res.status !== 200) return []; + return parseEntries(res.body); + } catch { + return []; + } } }; diff --git a/skills/wiki-research-loop/scripts/source-fetchers/github.js b/skills/wiki-research-loop/scripts/source-fetchers/github.js index e909d97..b0248d4 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/github.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/github.js @@ -28,25 +28,25 @@ module.exports = { match: () => true, estimateCost: () => ({ usd: 0, tokens: 0 }), async fetch(query, opts = {}) { - const limit = opts.limit || 3; + const limit = opts.limit ?? 3; const url = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&sort=stars&per_page=${limit}`; - const res = await httpsGet(url, authHeader()); - if (res.status !== 200) return []; - let json; - try { json = JSON.parse(res.body); } catch { return []; } - const items = json.items || []; - const out = []; - for (const r of items) { - const desc = r.description || ''; - const stars = r.stargazers_count || 0; - const summary = `${desc} (${stars}★, ${r.language || 'unknown'})`; - out.push({ - title: r.full_name, - content: summary, - url: r.html_url, - fetched_at: new Date().toISOString(), + try { + const res = await httpsGet(url, authHeader()); + if (res.status !== 200) return []; + const json = JSON.parse(res.body); + const items = json.items || []; + return items.map(r => { + const desc = r.description || ''; + const stars = r.stargazers_count || 0; + return { + title: r.full_name, + content: `${desc} (${stars}★, ${r.language || 'unknown'})`, + url: r.html_url, + fetched_at: new Date().toISOString(), + }; }); + } catch { + return []; } - return out; } }; diff --git a/skills/wiki-research-loop/scripts/source-fetchers/web.js b/skills/wiki-research-loop/scripts/source-fetchers/web.js index b9b8e7d..2f22dba 100644 --- a/skills/wiki-research-loop/scripts/source-fetchers/web.js +++ b/skills/wiki-research-loop/scripts/source-fetchers/web.js @@ -1,12 +1,18 @@ const https = require('https'); +const http = require('http'); const { URL } = require('url'); -function httpsGet(url, headers = {}, redirects = 0) { +const MAX_BODY_BYTES = 4 * 1024 * 1024; +const BODY_DEADLINE_MS = 30000; + +function httpGet(url, headers = {}, redirects = 0) { return new Promise((resolve, reject) => { if (redirects > 5) return reject(new Error('Too many redirects')); const u = new URL(url); + const client = u.protocol === 'http:' ? http : https; const opts = { hostname: u.hostname, + port: u.port || undefined, path: u.pathname + u.search, method: 'GET', headers: { @@ -15,15 +21,28 @@ function httpsGet(url, headers = {}, redirects = 0) { ...headers, }, }; - const req = https.get(opts, res => { + const req = client.get(opts, res => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { res.resume(); const loc = new URL(res.headers.location, u).toString(); - return httpsGet(loc, headers, redirects + 1).then(resolve, reject); + return httpGet(loc, headers, redirects + 1).then(resolve, reject); } - let data = ''; - res.on('data', c => { data += c; }); - res.on('end', () => resolve({ status: res.statusCode, body: data })); + const chunks = []; + let received = 0; + let bodyTimer = null; + const cleanup = () => { + if (bodyTimer) clearTimeout(bodyTimer); + res.removeAllListeners(); + }; + const fail = (err) => { cleanup(); res.destroy(); reject(err); }; + bodyTimer = setTimeout(() => fail(new Error('body read deadline exceeded')), BODY_DEADLINE_MS); + res.on('data', c => { + received += c.length; + if (received > MAX_BODY_BYTES) return fail(new Error(`body exceeds ${MAX_BODY_BYTES} bytes`)); + chunks.push(c); + }); + res.on('end', () => { cleanup(); resolve({ status: res.statusCode, body: Buffer.concat(chunks).toString('utf8') }); }); + res.on('error', fail); }); req.setTimeout(15000, () => req.destroy(new Error('web fetch timeout'))); req.on('error', reject); @@ -59,10 +78,14 @@ module.exports = { match: () => true, estimateCost: () => ({ usd: 0, tokens: 0 }), async fetch(query, opts = {}) { - const limit = opts.limit || 3; + const limit = opts.limit ?? 3; const url = `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`; - const res = await httpsGet(url); - if (res.status !== 200) return []; - return extractDuckDuckGoLite(res.body, limit); + try { + const res = await httpGet(url); + if (res.status !== 200) return []; + return extractDuckDuckGoLite(res.body, limit); + } catch { + return []; + } } }; diff --git a/src/db/schema.sql b/src/db/schema.sql index 1325549..a47e8c1 100644 --- a/src/db/schema.sql +++ b/src/db/schema.sql @@ -60,6 +60,10 @@ CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project); CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at); -- Wiki knowledge base (Phase 3.3.0) +-- slug is the natural id used everywhere (FKs, CLI, hooks). To avoid silent +-- overwrites when two wikis share a slug across different (scope, root_path) +-- locations, upsertWiki() guards on those columns at the application layer +-- and refuses to overwrite a registration that points at a different location. CREATE TABLE IF NOT EXISTS wikis ( slug TEXT PRIMARY KEY, title TEXT NOT NULL, diff --git a/src/db/store.ts b/src/db/store.ts index 1560347..dab77e9 100644 --- a/src/db/store.ts +++ b/src/db/store.ts @@ -333,12 +333,20 @@ export function createStore(dbPath: string = getDefaultDbPath()): Store { }, upsertWiki(wiki) { + const scope = wiki.scope ?? 'global'; + const existing = getWikiStmt.get(wiki.slug) as Wiki | undefined; + if (existing && (existing.scope !== scope || existing.root_path !== wiki.root_path)) { + throw new Error( + `wiki slug "${wiki.slug}" already registered at ${existing.scope}:${existing.root_path}; ` + + `pick a different slug or delete the existing registration first` + ); + } upsertWikiStmt.run({ slug: wiki.slug, title: wiki.title, flavor: wiki.flavor, root_path: wiki.root_path, - scope: wiki.scope ?? 'global', + scope, auto_research: wiki.auto_research ?? 0, private: wiki.private ?? 0, });