Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 97 additions & 1 deletion universal-refiner/src/core/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ <h1 style="margin:0; font-size: 1rem; letter-spacing: -0.025em;">PROMPT<span sty
<div class="nav-item" onclick="switchView('intelligence')">🧠 COMMIT INTELLIGENCE</div>
<div class="nav-item" onclick="switchView('learning')">📖 LEARNING LAYER</div>
<div class="nav-item" onclick="switchView('library')">📚 PROMPT LIBRARY</div>
<div class="nav-item" onclick="switchView('tournaments')">🏆 TOURNAMENTS</div>
<div class="nav-item" onclick="switchView('health')">PROVIDER HEALTH</div>

<div style="margin-top: 2rem; font-size: 0.6rem; color: var(--dim); text-transform: uppercase; letter-spacing: 0.1em; padding-left: 0.75rem;">Selected Project</div>
Expand Down Expand Up @@ -168,6 +169,35 @@ <h2>Provider Metrics</h2>
</div>
</div>
</div>

<!-- VIEW: TOURNAMENTS -->
<div id="view-tournaments" class="main-view hidden">
<div class="grid">
<div class="card col-12">
<h2>Run A/B Prompt Tournament</h2>
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
<div style="flex: 1;">
<label style="display:block; margin-bottom: 4px; color: var(--dim);">Baseline Prompt</label>
<textarea id="t-baseline" style="width: 100%; height: 100px; background: rgba(0,0,0,0.2); border: 1px solid rgba(255,255,255,0.1); color: var(--text); padding: 8px; font-family: monospace;"></textarea>
</div>
<div style="flex: 1;">
<label style="display:block; margin-bottom: 4px; color: var(--dim);">Variant A</label>
<textarea id="t-variant-a" style="width: 100%; height: 100px; background: rgba(0,0,0,0.2); border: 1px solid rgba(255,255,255,0.1); color: var(--text); padding: 8px; font-family: monospace;"></textarea>
</div>
<div style="flex: 1;">
<label style="display:block; margin-bottom: 4px; color: var(--dim);">Variant B</label>
<textarea id="t-variant-b" style="width: 100%; height: 100px; background: rgba(0,0,0,0.2); border: 1px solid rgba(255,255,255,0.1); color: var(--text); padding: 8px; font-family: monospace;"></textarea>
</div>
</div>
<button onclick="runTournament()" style="background: var(--accent); color: #fff; border: none; padding: 8px 16px; cursor: pointer; border-radius: 4px; font-weight: bold;">Run Heuristic Tournament</button>
<div id="tournament-run-status" style="margin-top: 8px; font-size: 0.8rem; color: var(--dim);"></div>
</div>
<div class="card col-12">
<h2>Tournament History</h2>
<div id="tournament-history"></div>
</div>
</div>
</div>
</div>

<script id="dashboard-data" type="application/json">{{STATE_JSON}}</script>
Expand All @@ -190,12 +220,38 @@ <h2>Provider Metrics</h2>
document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active'));
event.target.classList.add('active');

const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library', 'health': 'Provider Health' };
const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library', 'tournaments': 'A/B Tournaments', 'health': 'Provider Health' };
document.getElementById('view-title').textContent = titles[viewId];
currentView = viewId;
refreshData();
}

async function runTournament() {
const baseline = document.getElementById('t-baseline').value;
const variantA = document.getElementById('t-variant-a').value;
const variantB = document.getElementById('t-variant-b').value;
if (!baseline || !variantA || !variantB) {
document.getElementById('tournament-run-status').textContent = "All three prompts are required.";
return;
}
document.getElementById('tournament-run-status').textContent = "Running heuristic evaluation...";
try {
const res = await fetch(`/api/tournaments/run?project=${encodeURIComponent(currentProject)}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ baseline, variantA, variantB })
});
if (res.ok) {
document.getElementById('tournament-run-status').innerHTML = `<span style="color:#22c55e">Tournament complete!</span>`;
refreshData();
} else {
document.getElementById('tournament-run-status').textContent = "Failed to run tournament.";
}
} catch(e) {
document.getElementById('tournament-run-status').textContent = "Error: " + e.message;
}
}

async function refreshData() {
// 1. Refresh Basic State
const stateRes = await fetch(`/api/state?project=${encodeURIComponent(currentProject)}`);
Expand Down Expand Up @@ -313,6 +369,46 @@ <h2>Provider Metrics</h2>
`).join('') || '<p style="color:var(--dim)">Prompt library is currently empty.</p>';
}

if (currentView === 'tournaments') {
const res = await fetch(`/api/tournaments?project=${encodeURIComponent(currentProject)}`);
const data = await res.json();
document.getElementById('tournament-history').innerHTML = data.map(t => {
let details = {};
try { details = JSON.parse(t.details_json); } catch(e) {}

let winnerHtml = `<span class="badge" style="background: rgba(34, 197, 94, 0.15); color: #86efac; border-color: rgba(34, 197, 94, 0.3);">WINNER: ${t.winner_observed}</span>`;
if (t.winner_observed === 'tie') {
winnerHtml = `<span class="badge" style="background: rgba(255, 255, 255, 0.15); color: #fff; border-color: rgba(255, 255, 255, 0.3);">TIE</span>`;
}

let scoreA = details.variantA?.evaluation?.heuristicScore || 0;
let scoreB = details.variantB?.evaluation?.heuristicScore || 0;

return `
<div class="card" style="border-top: 2px solid ${t.winner_observed === 'A' ? 'var(--accent)' : t.winner_observed === 'B' ? '#f472b6' : 'var(--dim)'}; margin-bottom: 1rem;">
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom: 8px;">
<strong>Tournament ${t.id.substring(0,8)}...</strong>
${winnerHtml}
</div>
<div style="font-size: 0.8rem; color: var(--dim); margin-bottom: 8px;">Created: ${new Date(t.created_at).toLocaleString()}</div>

<div style="font-size: 0.85rem; color: var(--dim); margin-bottom: 4px;"><strong>BASELINE:</strong> ${escapeHtml(t.baseline_prompt)}</div>

<div style="display: flex; gap: 1rem; margin-top: 10px;">
<div style="flex:1; padding: 10px; background: rgba(0,0,0,0.2); border-left: 2px solid var(--accent); border-radius: 4px;">
<div style="margin-bottom: 6px;"><strong>VARIANT A</strong> (Score: ${scoreA})</div>
<pre style="font-size: 0.75rem; white-space: pre-wrap;">${escapeHtml(t.variant_a)}</pre>
</div>
<div style="flex:1; padding: 10px; background: rgba(0,0,0,0.2); border-left: 2px solid #f472b6; border-radius: 4px;">
<div style="margin-bottom: 6px;"><strong>VARIANT B</strong> (Score: ${scoreB})</div>
<pre style="font-size: 0.75rem; white-space: pre-wrap;">${escapeHtml(t.variant_b)}</pre>
</div>
</div>
</div>
`;
}).join('') || '<p style="color:var(--dim)">No tournament history found.</p>';
}

if (currentView === 'health') {
const res = await fetch(`/api/health?project=${encodeURIComponent(currentProject)}`);
const health = await res.json();
Expand Down
65 changes: 65 additions & 0 deletions universal-refiner/src/core/dashboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import { ConfigManager } from "./config.js";
import { TimelineProvider } from "../history/timeline.js";
import { EventStore } from "../history/event-store.js";
import { AutoPilotStatus } from "./autopilot-status.js";
import { createABEvaluationRecord } from "../evaluation/prompt-evaluator.js";
import { randomUUID } from "crypto";

const __dirname = path.dirname(fileURLToPath(import.meta.url));

Expand Down Expand Up @@ -393,6 +395,69 @@ export class CommandCenterDashboard {
}
});

app.get("/api/tournaments", async (c) => {
const selectedPath = this.resolveSelectedPath(c.req.query("project"));
try {
const repoId = EventStore.getInstance().ensureRepository(selectedPath).id;
const tournaments = EventStore.getInstance().getTournaments(repoId);
return c.json(tournaments);
} catch (error) {
this.logRouteError("api/tournaments", error, selectedPath);
return c.json({ error: "Failed to fetch tournaments" }, 500);
}
});

app.post("/api/tournaments/run", async (c) => {
const selectedPath = this.resolveSelectedPath(c.req.query("project"));
try {
if (!isSameOriginRequest(c.req.header("origin"), c.req.url)) {
return c.json({ error: "Cross-origin tournament requests are not allowed" }, 403);
}
if (!isJsonContentType(c.req.header("content-type"))) {
return c.json({ error: "Tournament requests must use application/json" }, 415);
}

let body: { baseline?: unknown; variantA?: unknown; variantB?: unknown };
try {
body = await c.req.json() as { baseline?: unknown; variantA?: unknown; variantB?: unknown };
} catch {
return c.json({ error: "Tournament request body must be valid JSON" }, 400);
}

const { baseline, variantA, variantB } = body;
if (
typeof baseline !== "string" || baseline.trim().length === 0 ||
typeof variantA !== "string" || variantA.trim().length === 0 ||
typeof variantB !== "string" || variantB.trim().length === 0
) {
return c.json({ error: "Tournament baseline, variantA, and variantB must be non-empty strings" }, 400);
}

const experiment = createABEvaluationRecord({
experimentId: `exp_${randomUUID()}`,
baselinePrompt: baseline,
variantA: { id: "A", prompt: variantA },
variantB: { id: "B", prompt: variantB }
});

const repoId = EventStore.getInstance().ensureRepository(selectedPath).id;
EventStore.getInstance().recordTournament({
id: experiment.experimentId,
repo_id: repoId,
baseline_prompt: baseline,
variant_a: variantA,
variant_b: variantB,
winner_observed: experiment.heuristicPreference,
details_json: JSON.stringify(experiment)
});

return c.json(experiment);
} catch (error) {
this.logRouteError("api/tournaments/run", error, selectedPath);
return c.json({ error: "Failed to run tournament" }, 500);
}
});

app.get("/api/events", async (c) => {
try {
return streamSSE(c, async (stream) => {
Expand Down
37 changes: 37 additions & 0 deletions universal-refiner/src/history/event-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,43 @@ export class EventStore {
);
}

recordTournament(tournament: {
id: string;
repo_id?: string | null;
baseline_prompt: string;
variant_a: string;
variant_b: string;
winner_observed: string;
details_json: string;
}) {
const now = new Date().toISOString();
const stmt = this.db.prepare(`
INSERT INTO tournaments (
id, repo_id, baseline_prompt, variant_a, variant_b, winner_observed, details_json, created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`);
stmt.run(
tournament.id,
tournament.repo_id || null,
tournament.baseline_prompt,
tournament.variant_a,
tournament.variant_b,
tournament.winner_observed,
tournament.details_json,
now
);
}

getTournaments(repoId: string, limit = 50) {
const stmt = this.db.prepare(`
SELECT * FROM tournaments
WHERE repo_id = ? OR repo_id IS NULL
ORDER BY created_at DESC
LIMIT ?
`);
return stmt.all(repoId, limit);
}

recordTemplate(template: {
id: string;
repo_id: string;
Expand Down
11 changes: 11 additions & 0 deletions universal-refiner/src/history/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,15 @@ CREATE TABLE IF NOT EXISTS prompt_template_links (
commit_id TEXT,
lesson_id TEXT
);

CREATE TABLE IF NOT EXISTS tournaments (
id TEXT PRIMARY KEY,
repo_id TEXT,
baseline_prompt TEXT NOT NULL,
variant_a TEXT NOT NULL,
variant_b TEXT NOT NULL,
winner_observed TEXT NOT NULL,
details_json TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL
);
`;
45 changes: 45 additions & 0 deletions universal-refiner/tests/dashboard-api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,49 @@ describe("dashboard review and health APIs", () => {
expect(invalidJson.status).toBe(400);
});

it("runs and persists prompt tournaments through same-origin JSON requests", async () => {
const app = CommandCenterDashboard.createApp(repoDir);
const response = await app.request("/api/tournaments/run", {
method: "POST",
headers: { "content-type": "application/json", origin: "http://localhost" },
body: JSON.stringify({
baseline: "Fix failing tests",
variantA: "Fix failing tests with regression coverage and verification",
variantB: "Fix tests",
}),
});

expect(response.status).toBe(200);
const experiment = await response.json() as any;
expect(experiment.experimentId).toMatch(/^exp_/);
expect(experiment.heuristicPreference).toBe("A");

const listResponse = await app.request("/api/tournaments");
const tournaments = await listResponse.json() as any[];
expect(listResponse.status).toBe(200);
expect(tournaments).toEqual([
expect.objectContaining({
id: experiment.experimentId,
baseline_prompt: "Fix failing tests",
winner_observed: "A",
}),
]);
});

it("rejects unsafe or malformed tournament mutations", async () => {
const app = CommandCenterDashboard.createApp(repoDir);
const request = (body: string, headers: Record<string, string> = { "content-type": "application/json", origin: "http://localhost" }) =>
app.request("/api/tournaments/run", { method: "POST", headers, body });

expect((await request(JSON.stringify({ baseline: "x", variantA: "y", variantB: "z" }), {
"content-type": "application/json",
origin: "https://attacker.example",
})).status).toBe(403);
expect((await request("{}", { origin: "http://localhost" })).status).toBe(415);
expect((await request("{")).status).toBe(400);
expect((await request(JSON.stringify({ baseline: "x", variantA: " ", variantB: "z" }))).status).toBe(400);
});

it("returns sanitized semantic provider and runtime health", async () => {
fs.writeFileSync(path.join(repoDir, ".universal-refiner.json"), JSON.stringify({
semantic: {
Expand Down Expand Up @@ -177,6 +220,8 @@ describe("dashboard review and health APIs", () => {
expect(html).toContain("reviewCandidate");
expect(html).toContain("Approve");
expect(html).toContain("Reject");
expect(html).toContain("Run A/B Prompt Tournament");
expect(html).toContain("/api/tournaments");
expect(html).toContain("PROVIDER HEALTH");
expect(html).toContain("/api/health");
});
Expand Down
19 changes: 19 additions & 0 deletions universal-refiner/tests/dashboard-coverage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ describe("dashboard deterministic fallbacks", () => {
["/api/commits", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("commit secret"); })],
["/api/lessons", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("lesson secret"); })],
["/api/templates", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("template secret"); })],
["/api/tournaments", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("tournament secret"); })],
["/api/health", () => vi.spyOn(CommandCenterDashboard as any, "buildHealth").mockImplementationOnce(() => { throw new Error("health secret"); })],
["/", () => vi.spyOn(CommandCenterDashboard as any, "buildState").mockRejectedValueOnce("root failure")],
];
Expand All @@ -151,6 +152,24 @@ describe("dashboard deterministic fallbacks", () => {
expect(RuntimeLogger.error).toBeDefined();
});

it("returns sanitized tournament mutation failures", async () => {
const app = CommandCenterDashboard.createApp(directory);
vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("tournament write secret"); });

const response = await app.request("/api/tournaments/run", {
method: "POST",
headers: { "content-type": "application/json", origin: "http://localhost" },
body: JSON.stringify({
baseline: "baseline",
variantA: "variant a",
variantB: "variant b",
}),
});

expect(response.status).toBe(500);
expect(await response.json()).toEqual({ error: "Failed to run tournament" });
});

it("renders an Error without a stack in the root failure page", async () => {
const app = CommandCenterDashboard.createApp(directory);
const error = new Error("root message");
Expand Down
13 changes: 11 additions & 2 deletions universal-refiner/tests/dashboard-routes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ describe("dashboard route coverage", () => {
fs.rmSync(testDir, { recursive: true, force: true });
});

it("serves state, timeline, commits, lessons, templates, health, and HTML", async () => {
it("serves state, timeline, commits, lessons, templates, tournaments, health, and HTML", async () => {
const repoId = store.ensureRepository(repoDir).id;
store.recordPrompt({ id: "prompt", repo_id: repoId, client: "test", raw_prompt: "Implement feature" });
store.recordCommit({
Expand Down Expand Up @@ -55,9 +55,18 @@ describe("dashboard route coverage", () => {
source_type: "test",
success_score: 80,
});
store.recordTournament({
id: "tournament",
repo_id: repoId,
baseline_prompt: "baseline",
variant_a: "variant a",
variant_b: "variant b",
winner_observed: "A",
details_json: "{}",
});
const app = CommandCenterDashboard.createApp(repoDir);

for (const route of ["/api/state", "/api/timeline", "/api/commits", "/api/lessons", "/api/templates", "/api/health", "/"]) {
for (const route of ["/api/state", "/api/timeline", "/api/commits", "/api/lessons", "/api/templates", "/api/tournaments", "/api/health", "/"]) {
const response = await app.request(route);
expect(response.status, route).toBe(200);
}
Expand Down
Loading