diff --git a/universal-refiner/src/core/dashboard.html b/universal-refiner/src/core/dashboard.html index a989acc..f1175b6 100644 --- a/universal-refiner/src/core/dashboard.html +++ b/universal-refiner/src/core/dashboard.html @@ -83,6 +83,7 @@

PROMPT🧠 COMMIT INTELLIGENCE +
Selected Project
@@ -168,6 +169,35 @@

Provider Metrics

+ + + @@ -190,12 +220,38 @@

Provider Metrics

document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active')); event.target.classList.add('active'); - const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library', 'health': 'Provider Health' }; + const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library', 'tournaments': 'A/B Tournaments', 'health': 'Provider Health' }; document.getElementById('view-title').textContent = titles[viewId]; currentView = viewId; refreshData(); } + async function runTournament() { + const baseline = document.getElementById('t-baseline').value; + const variantA = document.getElementById('t-variant-a').value; + const variantB = document.getElementById('t-variant-b').value; + if (!baseline || !variantA || !variantB) { + document.getElementById('tournament-run-status').textContent = "All three prompts are required."; + return; + } + document.getElementById('tournament-run-status').textContent = "Running heuristic evaluation..."; + try { + const res = await fetch(`/api/tournaments/run?project=${encodeURIComponent(currentProject)}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ baseline, variantA, variantB }) + }); + if (res.ok) { + document.getElementById('tournament-run-status').innerHTML = `Tournament complete!`; + refreshData(); + } else { + document.getElementById('tournament-run-status').textContent = "Failed to run tournament."; + } + } catch(e) { + document.getElementById('tournament-run-status').textContent = "Error: " + e.message; + } + } + async function refreshData() { // 1. Refresh Basic State const stateRes = await fetch(`/api/state?project=${encodeURIComponent(currentProject)}`); @@ -313,6 +369,46 @@

Provider Metrics

`).join('') || '

Prompt library is currently empty.

'; } + if (currentView === 'tournaments') { + const res = await fetch(`/api/tournaments?project=${encodeURIComponent(currentProject)}`); + const data = await res.json(); + document.getElementById('tournament-history').innerHTML = data.map(t => { + let details = {}; + try { details = JSON.parse(t.details_json); } catch(e) {} + + let winnerHtml = `WINNER: ${t.winner_observed}`; + if (t.winner_observed === 'tie') { + winnerHtml = `TIE`; + } + + let scoreA = details.variantA?.evaluation?.heuristicScore || 0; + let scoreB = details.variantB?.evaluation?.heuristicScore || 0; + + return ` +
+
+ Tournament ${t.id.substring(0,8)}... + ${winnerHtml} +
+
Created: ${new Date(t.created_at).toLocaleString()}
+ +
BASELINE: ${escapeHtml(t.baseline_prompt)}
+ +
+
+
VARIANT A (Score: ${scoreA})
+
${escapeHtml(t.variant_a)}
+
+
+
VARIANT B (Score: ${scoreB})
+
${escapeHtml(t.variant_b)}
+
+
+
+ `; + }).join('') || '

No tournament history found.

'; + } + if (currentView === 'health') { const res = await fetch(`/api/health?project=${encodeURIComponent(currentProject)}`); const health = await res.json(); diff --git a/universal-refiner/src/core/dashboard.ts b/universal-refiner/src/core/dashboard.ts index e4a3102..52a0868 100644 --- a/universal-refiner/src/core/dashboard.ts +++ b/universal-refiner/src/core/dashboard.ts @@ -13,6 +13,8 @@ import { ConfigManager } from "./config.js"; import { TimelineProvider } from "../history/timeline.js"; import { EventStore } from "../history/event-store.js"; import { AutoPilotStatus } from "./autopilot-status.js"; +import { createABEvaluationRecord } from "../evaluation/prompt-evaluator.js"; +import { randomUUID } from "crypto"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -393,6 +395,69 @@ export class CommandCenterDashboard { } }); + app.get("/api/tournaments", async (c) => { + const selectedPath = this.resolveSelectedPath(c.req.query("project")); + try { + const repoId = EventStore.getInstance().ensureRepository(selectedPath).id; + const tournaments = EventStore.getInstance().getTournaments(repoId); + return c.json(tournaments); + } catch (error) { + this.logRouteError("api/tournaments", error, selectedPath); + return c.json({ error: "Failed to fetch tournaments" }, 500); + } + }); + + app.post("/api/tournaments/run", async (c) => { + const selectedPath = this.resolveSelectedPath(c.req.query("project")); + try { + if (!isSameOriginRequest(c.req.header("origin"), c.req.url)) { + return c.json({ error: "Cross-origin tournament requests are not allowed" }, 403); + } + if (!isJsonContentType(c.req.header("content-type"))) { + return c.json({ error: "Tournament requests must use application/json" }, 415); + } + + let body: { baseline?: unknown; variantA?: unknown; variantB?: unknown }; + try { + body = await c.req.json() as { baseline?: unknown; variantA?: unknown; variantB?: unknown }; + } catch { + return c.json({ error: "Tournament request body must be valid JSON" }, 400); + } + + const { baseline, variantA, variantB } = body; + if ( + typeof baseline !== "string" || baseline.trim().length === 0 || + typeof variantA !== "string" || variantA.trim().length === 0 || + typeof variantB !== "string" || variantB.trim().length === 0 + ) { + return c.json({ error: "Tournament baseline, variantA, and variantB must be non-empty strings" }, 400); + } + + const experiment = createABEvaluationRecord({ + experimentId: `exp_${randomUUID()}`, + baselinePrompt: baseline, + variantA: { id: "A", prompt: variantA }, + variantB: { id: "B", prompt: variantB } + }); + + const repoId = EventStore.getInstance().ensureRepository(selectedPath).id; + EventStore.getInstance().recordTournament({ + id: experiment.experimentId, + repo_id: repoId, + baseline_prompt: baseline, + variant_a: variantA, + variant_b: variantB, + winner_observed: experiment.heuristicPreference, + details_json: JSON.stringify(experiment) + }); + + return c.json(experiment); + } catch (error) { + this.logRouteError("api/tournaments/run", error, selectedPath); + return c.json({ error: "Failed to run tournament" }, 500); + } + }); + app.get("/api/events", async (c) => { try { return streamSSE(c, async (stream) => { diff --git a/universal-refiner/src/history/event-store.ts b/universal-refiner/src/history/event-store.ts index 90736c4..62bd4e8 100644 --- a/universal-refiner/src/history/event-store.ts +++ b/universal-refiner/src/history/event-store.ts @@ -403,6 +403,43 @@ export class EventStore { ); } + recordTournament(tournament: { + id: string; + repo_id?: string | null; + baseline_prompt: string; + variant_a: string; + variant_b: string; + winner_observed: string; + details_json: string; + }) { + const now = new Date().toISOString(); + const stmt = this.db.prepare(` + INSERT INTO tournaments ( + id, repo_id, baseline_prompt, variant_a, variant_b, winner_observed, details_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `); + stmt.run( + tournament.id, + tournament.repo_id || null, + tournament.baseline_prompt, + tournament.variant_a, + tournament.variant_b, + tournament.winner_observed, + tournament.details_json, + now + ); + } + + getTournaments(repoId: string, limit = 50) { + const stmt = this.db.prepare(` + SELECT * FROM tournaments + WHERE repo_id = ? OR repo_id IS NULL + ORDER BY created_at DESC + LIMIT ? + `); + return stmt.all(repoId, limit); + } + recordTemplate(template: { id: string; repo_id: string; diff --git a/universal-refiner/src/history/schema.ts b/universal-refiner/src/history/schema.ts index 2d012b2..77d5d30 100644 --- a/universal-refiner/src/history/schema.ts +++ b/universal-refiner/src/history/schema.ts @@ -154,4 +154,15 @@ CREATE TABLE IF NOT EXISTS prompt_template_links ( commit_id TEXT, lesson_id TEXT ); + +CREATE TABLE IF NOT EXISTS tournaments ( + id TEXT PRIMARY KEY, + repo_id TEXT, + baseline_prompt TEXT NOT NULL, + variant_a TEXT NOT NULL, + variant_b TEXT NOT NULL, + winner_observed TEXT NOT NULL, + details_json TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL +); `; diff --git a/universal-refiner/tests/dashboard-api.test.ts b/universal-refiner/tests/dashboard-api.test.ts index 4c16178..acfb81e 100644 --- a/universal-refiner/tests/dashboard-api.test.ts +++ b/universal-refiner/tests/dashboard-api.test.ts @@ -123,6 +123,49 @@ describe("dashboard review and health APIs", () => { expect(invalidJson.status).toBe(400); }); + it("runs and persists prompt tournaments through same-origin JSON requests", async () => { + const app = CommandCenterDashboard.createApp(repoDir); + const response = await app.request("/api/tournaments/run", { + method: "POST", + headers: { "content-type": "application/json", origin: "http://localhost" }, + body: JSON.stringify({ + baseline: "Fix failing tests", + variantA: "Fix failing tests with regression coverage and verification", + variantB: "Fix tests", + }), + }); + + expect(response.status).toBe(200); + const experiment = await response.json() as any; + expect(experiment.experimentId).toMatch(/^exp_/); + expect(experiment.heuristicPreference).toBe("A"); + + const listResponse = await app.request("/api/tournaments"); + const tournaments = await listResponse.json() as any[]; + expect(listResponse.status).toBe(200); + expect(tournaments).toEqual([ + expect.objectContaining({ + id: experiment.experimentId, + baseline_prompt: "Fix failing tests", + winner_observed: "A", + }), + ]); + }); + + it("rejects unsafe or malformed tournament mutations", async () => { + const app = CommandCenterDashboard.createApp(repoDir); + const request = (body: string, headers: Record = { "content-type": "application/json", origin: "http://localhost" }) => + app.request("/api/tournaments/run", { method: "POST", headers, body }); + + expect((await request(JSON.stringify({ baseline: "x", variantA: "y", variantB: "z" }), { + "content-type": "application/json", + origin: "https://attacker.example", + })).status).toBe(403); + expect((await request("{}", { origin: "http://localhost" })).status).toBe(415); + expect((await request("{")).status).toBe(400); + expect((await request(JSON.stringify({ baseline: "x", variantA: " ", variantB: "z" }))).status).toBe(400); + }); + it("returns sanitized semantic provider and runtime health", async () => { fs.writeFileSync(path.join(repoDir, ".universal-refiner.json"), JSON.stringify({ semantic: { @@ -177,6 +220,8 @@ describe("dashboard review and health APIs", () => { expect(html).toContain("reviewCandidate"); expect(html).toContain("Approve"); expect(html).toContain("Reject"); + expect(html).toContain("Run A/B Prompt Tournament"); + expect(html).toContain("/api/tournaments"); expect(html).toContain("PROVIDER HEALTH"); expect(html).toContain("/api/health"); }); diff --git a/universal-refiner/tests/dashboard-coverage.test.ts b/universal-refiner/tests/dashboard-coverage.test.ts index fa07659..9963de8 100644 --- a/universal-refiner/tests/dashboard-coverage.test.ts +++ b/universal-refiner/tests/dashboard-coverage.test.ts @@ -139,6 +139,7 @@ describe("dashboard deterministic fallbacks", () => { ["/api/commits", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("commit secret"); })], ["/api/lessons", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("lesson secret"); })], ["/api/templates", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("template secret"); })], + ["/api/tournaments", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("tournament secret"); })], ["/api/health", () => vi.spyOn(CommandCenterDashboard as any, "buildHealth").mockImplementationOnce(() => { throw new Error("health secret"); })], ["/", () => vi.spyOn(CommandCenterDashboard as any, "buildState").mockRejectedValueOnce("root failure")], ]; @@ -151,6 +152,24 @@ describe("dashboard deterministic fallbacks", () => { expect(RuntimeLogger.error).toBeDefined(); }); + it("returns sanitized tournament mutation failures", async () => { + const app = CommandCenterDashboard.createApp(directory); + vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("tournament write secret"); }); + + const response = await app.request("/api/tournaments/run", { + method: "POST", + headers: { "content-type": "application/json", origin: "http://localhost" }, + body: JSON.stringify({ + baseline: "baseline", + variantA: "variant a", + variantB: "variant b", + }), + }); + + expect(response.status).toBe(500); + expect(await response.json()).toEqual({ error: "Failed to run tournament" }); + }); + it("renders an Error without a stack in the root failure page", async () => { const app = CommandCenterDashboard.createApp(directory); const error = new Error("root message"); diff --git a/universal-refiner/tests/dashboard-routes.test.ts b/universal-refiner/tests/dashboard-routes.test.ts index 077b5c7..a87060d 100644 --- a/universal-refiner/tests/dashboard-routes.test.ts +++ b/universal-refiner/tests/dashboard-routes.test.ts @@ -26,7 +26,7 @@ describe("dashboard route coverage", () => { fs.rmSync(testDir, { recursive: true, force: true }); }); - it("serves state, timeline, commits, lessons, templates, health, and HTML", async () => { + it("serves state, timeline, commits, lessons, templates, tournaments, health, and HTML", async () => { const repoId = store.ensureRepository(repoDir).id; store.recordPrompt({ id: "prompt", repo_id: repoId, client: "test", raw_prompt: "Implement feature" }); store.recordCommit({ @@ -55,9 +55,18 @@ describe("dashboard route coverage", () => { source_type: "test", success_score: 80, }); + store.recordTournament({ + id: "tournament", + repo_id: repoId, + baseline_prompt: "baseline", + variant_a: "variant a", + variant_b: "variant b", + winner_observed: "A", + details_json: "{}", + }); const app = CommandCenterDashboard.createApp(repoDir); - for (const route of ["/api/state", "/api/timeline", "/api/commits", "/api/lessons", "/api/templates", "/api/health", "/"]) { + for (const route of ["/api/state", "/api/timeline", "/api/commits", "/api/lessons", "/api/templates", "/api/tournaments", "/api/health", "/"]) { const response = await app.request(route); expect(response.status, route).toBe(200); } diff --git a/universal-refiner/tests/history.test.ts b/universal-refiner/tests/history.test.ts index c2fc274..07256c4 100644 --- a/universal-refiner/tests/history.test.ts +++ b/universal-refiner/tests/history.test.ts @@ -286,6 +286,43 @@ describe("EventStore", () => { }); }); + it("records and lists prompt tournament evaluations for a repository", () => { + const store = EventStore.getInstance(); + store.recordTournament({ + id: "tournament-1", + repo_id: "repo", + baseline_prompt: "Fix the failing tests", + variant_a: "Fix the failing tests with regression coverage", + variant_b: "Fix tests", + winner_observed: "A", + details_json: "{\"winner\":\"A\"}", + }); + store.recordTournament({ + id: "tournament-2", + repo_id: null, + baseline_prompt: "Global baseline", + variant_a: "Global A", + variant_b: "Global B", + winner_observed: "tie", + details_json: "{}", + }); + + expect(store.getTournaments("repo", 10)).toEqual(expect.arrayContaining([ + expect.objectContaining({ + id: "tournament-1", + repo_id: "repo", + baseline_prompt: "Fix the failing tests", + winner_observed: "A", + }), + expect.objectContaining({ + id: "tournament-2", + repo_id: null, + winner_observed: "tie", + }), + ])); + expect(store.getTournaments("repo", 1)).toHaveLength(1); + }); + it("backs up and restores a verified database", async () => { const store = EventStore.getInstance(); store.recordEvent({ id: "before-backup", event_type: "test", summary: "persist me" }); diff --git a/universal-refiner/tests/tournaments.test.ts b/universal-refiner/tests/tournaments.test.ts new file mode 100644 index 0000000..027e1d5 --- /dev/null +++ b/universal-refiner/tests/tournaments.test.ts @@ -0,0 +1,55 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { EventStore } from "../src/history/event-store.js"; +import { createABEvaluationRecord } from "../src/evaluation/prompt-evaluator.js"; + +describe("A/B Prompt Tournaments", () => { + let store: EventStore; + let testDir: string; + + beforeEach(() => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), "prompt-tournaments-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = path.join(testDir, "global"); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + store = EventStore.getInstance(); + }); + + afterEach(() => { + store.close(); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(testDir, { recursive: true, force: true }); + }); + + it("should evaluate and record an A/B tournament correctly", () => { + const baseline = "Write a test."; + const variantA = "Write a unit test for the login function verifying the 404 response."; + const variantB = "Test login."; + + const experiment = createABEvaluationRecord({ + experimentId: "exp_test_123", + baselinePrompt: baseline, + variantA: { id: "A", prompt: variantA }, + variantB: { id: "B", prompt: variantB } + }); + + const winner = experiment.heuristicPreference; + + store.recordTournament({ + id: experiment.experimentId, + repo_id: "test-repo", + baseline_prompt: baseline, + variant_a: variantA, + variant_b: variantB, + winner_observed: winner, + details_json: JSON.stringify(experiment) + }); + + const results = store.getTournaments("test-repo"); + expect(results).toHaveLength(1); + expect(results[0].id).toBe("exp_test_123"); + expect(results[0].winner_observed).toBe(winner); + }); +});