OpenAI Prompt Caching Playground

Here’s the single, ready-to-run HTML file with both patches combined:

Longer, ~2× instructions & docs (to help pass the 1024-token cache threshold).
Expandable JSON sections showing the exact request payload (without your API key) and the raw response.

html

OpenAI Prompt Caching Playground :root { --bg:#0d1117; --panel:#161b22; --ink:#e6edf3; --muted:#9da7b3; --accent:#3fb950; --warn:#f0883e; --border:#30363d; } html,body {background:var(--bg); color:var(--ink); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; margin:0; padding:0;} header {padding:16px 20px; border-bottom:1px solid var(--border); position:sticky; top:0; background:rgba(13,17,23,.9); backdrop-filter: blur(6px);} h1 {font-size:18px; margin:0 0 6px;} .sub {color:var(--muted); font-size:13px;} .wrap {display:grid; grid-template-columns: 360px 1fr; gap:16px; padding:16px;} .panel {background:var(--panel); border:1px solid var(--border); border-radius:10px; padding:14px;} .panel h2 {font-size:14px; margin:0 0 10px; color:#c9d1d9; letter-spacing:.2px;} textarea, input, select {background:#0b0f14; color:var(--ink); border:1px solid var(--border); border-radius:8px; padding:8px; width:100%; box-sizing:border-box; font: inherit;} textarea {min-height:110px; resize:vertical; line-height:1.35;} .row {display:flex; gap:8px; align-items:center;} .row > * {flex:1;} .row .shrink {flex:0 0 auto;} .btn {background:#21262d; border:1px solid var(--border); padding:8px 10px; border-radius:8px; cursor:pointer; color:var(--ink);} .btn:hover {border-color:#6e7681;} .btn.primary {background:var(--accent); color:#031d0b; border-color:#2ea043; font-weight:600;} .btn.ghost {background:transparent;} .grid-2 {display:grid; grid-template-columns:1fr 1fr; gap:8px;} .stats {display:grid; grid-template-columns: repeat(6, minmax(90px, 1fr)); gap:8px; margin-top:8px;} .stat {background:#0b0f14; border:1px dashed var(--border); border-radius:8px; padding:8px;} .stat .k {font-size:11px; color:var(--muted);} .stat .v {font-size:16px; font-weight:700; margin-top:4px;} .badge {display:inline-block; padding:2px 8px; border-radius:999px; font-size:12px; border:1px solid var(--border); background:#0b0f14; color:var(--muted);} .badge.hit {background:rgba(63,185,80,.12); color:#56d364; border-color:#2ea043;} .badge.miss {background:rgba(240,136,62,.12); color:#f0883e; border-color:#8a5700;} .log {font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size:12px; white-space:pre-wrap; background:#0b0f14; border:1px solid var(--border); border-radius:8px; padding:10px; max-height:260px; overflow:auto;} .out {background:#0b0f14; border:1px solid var(--border); border-radius:8px; padding:10px; min-height:140px; font-family: ui-monospace, Menlo, Consolas, monospace; white-space:pre-wrap;} /* Raw JSON sections */ .code { background:#0b0f14; border:1px solid var(--border); border-radius:8px; padding:10px; font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size:12px; line-height:1.5; white-space:pre-wrap; max-height:260px; overflow:auto; } details summary { cursor:pointer; user-select:none; } details { margin:8px 0; } .footer-note {color:var(--muted); font-size:12px;} .inline {display:inline-flex; align-items:center; gap:8px;} .sep {height:1px; background:var(--border); margin:10px 0;} @media (max-width: 980px){ .wrap{grid-template-columns:1fr;} } Prompt Caching Playground Optimize your prompt structure and observe cache hits. Cache engages at ≥ 1024 tokens, with cacheable chunks in 128-token increments (e.g., 1024, 1152, 1280…). Setup Chat Completions API Responses API (supports prompt_cache_key) gpt-4o-mini gpt-4o gpt-4.1-mini Load demo scenario… A) Static instructions + Doc A + Q1 A2) Same instructions + Doc A + Q2 B) Same instructions + Doc B + Q1 Load Repeat instructions Repeat doc Use repeats to push your prompt over 1024 tokens (and observe cached increments). Prompt Parts Initial Instructions (static; put these first for best cache hit rate) Document Context (semi-static; keep above user input) User Question (dynamic; place last) Send Send Variant (new Q) Clear Log Copy Log Tip: Try A → A2 (same prefix, new question) to see cache hits; then A → B to see instruction-only prefix hits. Result cache status latency timestamp Prompt Tokens– Cached Tokens– Cache Hit %– Completion Tokens– Total Tokens– Model– Endpoint – prompt_cache_key – Response will appear here… Raw JSON Most recent request JSON — Most recent response JSON — Run Log /** Minimal, in-page "localStorage prompt" API key flow */ function getAPIKey() { let apiKey = localStorage.getItem('openai_api_key'); if (!apiKey) { apiKey = prompt('Please enter your OpenAI API Key:'); if (apiKey) { localStorage.setItem('openai_api_key', apiKey); } } return apiKey; } /** Helpers */ const $ = (id) => document.getElementById(id); function nowISO() { const d = new Date(); return d.toLocaleString(undefined, {hour12:false}) + ' (' + d.toISOString() + ')'; } function logLine(txt) { const el = $('runLog'); el.textContent += txt + '\n'; el.scrollTop = el.scrollHeight; } function setBadge(el, text, kind) { el.textContent = text; el.classList.remove('hit','miss'); if (kind) el.classList.add(kind); } function pretty(obj) { try { return JSON.stringify(obj, null, 2); } catch { return String(obj); } } function setReqJson(url, payload) { // Do NOT include headers / API key. Show only URL + payload. const snapshot = { url, payload }; $('reqJson').textContent = pretty(snapshot); } function setResJson(data) { $('resJson').textContent = pretty(data); } /** Demo scenarios (2× length) */ const demo = { instructions: `You are a highly concise assistant. Always answer in 1–3 sentences unless explicitly asked for more. Use plain language. If the user asks for code, include only code unless told otherwise. General rules: - If asked to compare, list key differences as short bullets. - If a calculation is needed, show the equation then the result. - If a definition is requested, give a crisp one-liner first. - Prefer step-by-step logic but keep it terse; avoid filler. - Cite assumptions when they influence the answer. Style guide: - Avoid hedging like “it seems.” Be direct when evidence supports it. - Use simple words; avoid jargon unless the question is technical. - Bullets over paragraphs when listing 3+ items. - For numbers: include units and orders of magnitude when relevant. Math & formatting: - Show one compact formula, then compute. - Round sensibly; default to 2–3 sig figs unless precision matters. - Monospace code blocks for code only; no prose inside code fences. Safety rails: - Decline disallowed content with a brief reason and a safer alternative. - Don’t fabricate citations, links, or data you can’t verify. Edge cases: - If the question is ambiguous, answer the most common interpretation and note the alternative in one line. - If insufficient data, state what’s missing and provide a minimal actionable next step. Examples (for length and reference): Q: Define latency vs throughput. A: Latency is per-request delay; throughput is requests/second. They trade off: batching raises throughput but can add latency. Q: Summarize a spec into 3 bullets. A: Goal, core objects, critical risks (1 line each). Q: Show a quick ROI calc. A: ROI = (benefit − cost)/cost. With $50k benefit and $n20k cost, ROI = (50−20)/20 = 1.5 = 150%. When code is requested: - Provide a minimal runnable snippet. - Include a 1-line comment on how to run or integrate. Tools you could hypothetically use (for realism in token length): web.run (search, open, click), python (analysis), image_query (images for people/places). These tool names are illustrative for prompt length; they don’t execute here. Glossary (compact): - CRDT: conflict-free replicated data type. - Event sourcing: store events then derive state from them. Extended guidance filler to lift token count: ${'Guideline: Prefer clear, literal phrasing; keep answers scoped; surface assumptions explicitly; show one example when helpful.\n'.repeat(60)} (End of static instructions.)`, docA: `Product Spec: Nimbus Notes - Goal: Lightning-fast note capture with offline-first sync. - Core objects: Notebook, Note, Tag, Attachment. - Sync model: CRDT-based; conflict-free merges. - Pricing: Free; Pro at $n4/month with 10GB attachments. - Roadmap Q4: iOS widgets, Android share sheet revamp, web clipper. Architecture notes: - Local-first write path, background sync, deterministic conflict resolution. - Indexing: inverted index on device; server compaction nightly. - Export: Markdown + attachments in a flat bundle. Risks: - Complex merge semantics for rich text. - Battery usage on large notebooks during background sync. Extra long filler to increase tokens: ${'• Feature: ' + 'rich text, backlinks, slash commands, export to Markdown.\n'.repeat(80)}`, docB: `Product Spec: Zephyr Tasks - Goal: Kanban-like tasking for small teams, built on ActivityPub. - Core objects: Board, Column, Card, Checklist, Comment. - Sync model: Event-sourced with snapshotting. - Pricing: Free; Team at $n6/user/month, custom SSO. - Roadmap Q4: Gantt view, email in, calendar sync. Architecture notes: - Federated updates via ActivityPub; per-tenant queues. - Snapshot intervals tuned to active column size. - Automation hooks for webhooks and email ingestion. Risks: - Federation consistency and moderation boundaries. - Notification overload without good defaults. Extra long filler to increase tokens: ${'• Capability: ' + 'labels, due dates, swimlanes, WIP limits, automations.\n'.repeat(80)}`, q1A: `What are the minimum moving parts I need to build first MVP?`, q2A: `What are the top three risks for this approach?`, q1B: `What metrics should we track in the first month?`, }; function loadScenario(which) { if (which === 'A') { $('instructions').value = demo.instructions; $('doc').value = demo.docA; $('question').value = demo.q1A; $('scenarioName').value = 'A (Instr + Doc A + Q1)'; } else if (which === 'A2') { $('instructions').value = demo.instructions; $('doc').value = demo.docA; $('question').value = demo.q2A; $('scenarioName').value = 'A2 (Instr + Doc A + Q2)'; } else if (which === 'B') { $('instructions').value = demo.instructions; $('doc').value = demo.docB; $('question').value = demo.q1B; $('scenarioName').value = 'B (Instr + Doc B + Q1)'; } else { $('instructions').value = demo.instructions; $('doc').value = demo.docA; $('question').value = demo.q1A; $('scenarioName').value = 'Starter'; } } $('loadScenarioBtn').addEventListener('click', () => loadScenario($('scenarioPicker').value)); /** Build messages with static-first ordering (to maximize cache hits) */ function buildMessages() { const instr = $('instructions').value; const doc = $('doc').value; const question = $('question').value; const repI = Math.max(1, parseInt($('repeatInstructions').value || '1', 10)); const repD = Math.max(1, parseInt($('repeatDoc').value || '1', 10)); const instrRepeated = Array.from({length:repI}).map(() => instr).join('\n\n'); const docRepeated = Array.from({length:repD}).map((_,i) => `# Document Copy ${i+1}\n` + doc).join('\n\n'); const system = instrRepeated.trim(); const user = (`Document Context:\n${docRepeated}\n\nUser Question:\n${question}`).trim(); const messages = [ {role:'system', content: system}, {role:'user', content: user}, ]; return {messages, system, user}; } /** Core send function (supports Chat Completions and Responses API) */ async function send(kind) { const apiKey = getAPIKey(); if (!apiKey) return; // Optionally mutate the user question for the "variant" button if (kind === 'variant') { $('question').value = $('question').value + ' (Please keep it to 3 bullets.)'; } const endpointSel = $('endpoint').value; const model = $('model').value; const pck = $('cacheKey').value.trim() || null; const label = $('scenarioName').value.trim() || '(unnamed)'; const {messages} = buildMessages(); let url, payload, headers = { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}`, }; if (endpointSel === 'responses') { url = 'https://api.openai.com/v1/responses'; payload = { model, messages, prompt_cache_key: pck || undefined, temperature: 0.2, }; } else { url = 'https://api.openai.com/v1/chat/completions'; payload = { model, messages, temperature: 0.2, }; } // Show outgoing request snapshot (no headers / key) and reset response pane setReqJson(url, payload); $('resJson').textContent = '—'; // UI pre-state setBadge($('hitBadge'), '…', null); setBadge($('timeBadge'), '–', null); setBadge($('tsBadge'), new Date().toLocaleTimeString(), null); $('output').textContent = 'Waiting for response…'; $('promptTokens').textContent = '–'; $('cachedTokens').textContent = '–'; $('hitPct').textContent = '–'; $('completionTokens').textContent = '–'; $('totalTokens').textContent = '–'; $('modelEcho').textContent = model; $('endpointEcho').textContent = endpointSel; $('pckEcho').textContent = pck || '—'; const t0 = performance.now(); let resp, data, ok = false, textOut = '', usage = {}, cached = 0; try { resp = await fetch(url, {method:'POST', headers, body: JSON.stringify(payload)}); const t1 = performance.now(); const latencyMs = Math.round(t1 - t0); ok = resp.ok; data = await resp.json(); setResJson(data); // Extract output text + usage across both APIs if (endpointSel === 'responses') { textOut = data.output_text ?? ''; if (!textOut && Array.isArray(data.output)) { textOut = data.output.map(o => { if (o?.content) { return o.content.map(c => (c.type === 'output_text' && c.text) ? c.text : '').join(''); } return ''; }).join('').trim(); } usage = data.usage || {}; } else { textOut = (data.choices && data.choices[0] && data.choices[0].message && data.choices[0].message.content) || ''; usage = data.usage || {}; } // Pull cached_tokens if present const promptDetails = usage.prompt_tokens_details || {}; cached = (typeof promptDetails.cached_tokens === 'number') ? promptDetails.cached_tokens : 0; // Display stats const pt = usage.prompt_tokens ?? '—'; const ct = usage.completion_tokens ?? '—'; const tt = usage.total_tokens ?? '—'; $('promptTokens').textContent = pt; $('cachedTokens').textContent = cached; $('completionTokens').textContent = ct; $('totalTokens').textContent = tt; // Cache hit badge logic let hitBadgeText = (cached && cached > 0) ? `cache hit: ${cached}` : 'cache miss'; let hitKind = (cached && cached > 0) ? 'hit' : 'miss'; setBadge($('hitBadge'), hitBadgeText, hitKind); // Cache hit % let hitPct = (typeof pt === 'number' && pt > 0 && typeof cached === 'number') ? ((cached/pt)*100).toFixed(1) + '%' : '—'; $('hitPct').textContent = hitPct; setBadge($('timeBadge'), `${latencyMs} ms`, null); setBadge($('tsBadge'), new Date().toLocaleTimeString(), null); // Show response text $('output').textContent = textOut || JSON.stringify(data, null, 2); // Log line (concise) const ts = nowISO(); logLine(`[${ts}] label="${label}" endpoint=${endpointSel} model=${model}` + (pck ? ` pck="${pck}"` : '') + ` | latency=${latencyMs}ms tokens: prompt=${pt} cached=${cached} completion=${ct} total=${tt} | ` + `hit=${cached>0} | Q="${$('question').value.trim().slice(0,80)}"`); } catch (e) { $('output').textContent = 'Error: ' + (e?.message || e); setResJson({ error: String(e) }); setBadge($('hitBadge'), 'error', 'miss'); logLine(`[${nowISO()}] ERROR ${e?.message || e}`); } } /** UI wires */ $('sendBtn').addEventListener('click', () => send('normal')); $('sendVariantBtn').addEventListener('click', () => send('variant')); $('clearLogBtn').addEventListener('click', () => { $('runLog').textContent=''; }); $('copyLogBtn').addEventListener('click', async () => { await navigator.clipboard.writeText($('runLog').textContent); alert('Log copied to clipboard.'); }); /** Initialize with starter scenario + session log */ function init() { loadScenario('starter'); logLine(`[${nowISO()}] Session started. Try A → A2 for a cache hit (same prefix, new question). Then A → B to see instruction-only prefix hits. Caches typically persist ~5–10 minutes idle (sometimes up to ~1 hour off-peak).`); } init();

OpenAI Prompt Caching Playground

If you want, I can also add a quick “estimated tokens” counter client-side (pre-send) to help you target ≥1024 tokens before making a call.