Last active
November 5, 2025 16:11
-
-
Save Cdaprod/3c1fe7ce012084a855b590454e47dcc6 to your computer and use it in GitHub Desktop.
Dockerized Codex Shim for completions to responses (standalone)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| docker run -d \ | |
| --name codex-shim \ | |
| -p 3030:3030 \ | |
| -e LMSTUDIO_BASE="http://192.168.0.25:9999/v1" \ | |
| node:20-alpine \ | |
| sh -c " | |
| apk add --no-cache nodejs npm && | |
| npm init -y >/dev/null && | |
| npm i express node-fetch@3 && | |
| cat <<'EOF' > server.js | |
| import express from 'express'; | |
| import fetch from 'node-fetch'; | |
| import { TextDecoder } from 'util'; | |
| const app = express(); | |
| app.use(express.json({limit:'2mb'})); | |
| const VLLM = process.env.LMSTUDIO_BASE || 'http://192.168.0.25:9999/v1'; | |
| const PORT = process.env.PORT || 3030; | |
| function toMsgs(body){ | |
| if(Array.isArray(body?.messages)) return body.messages; | |
| if(typeof body?.input==='string') return [{role:'user',content:body.input}]; | |
| return [{role:'user',content:'Hello'}]; | |
| } | |
| app.post('/v1/responses', async (req,res)=>{ | |
| const {model='local',stream=false,max_tokens,temperature} = req.body||{}; | |
| const msgs = toMsgs(req.body); | |
| if(!stream){ | |
| const r = await fetch(VLLM+'/chat/completions',{method:'POST', | |
| headers:{'content-type':'application/json'}, | |
| body:JSON.stringify({model,messages:msgs,max_tokens,temperature})}); | |
| const j = await r.json(); const text=j?.choices?.[0]?.message?.content||''; | |
| const now=Math.floor(Date.now()/1000); | |
| return res.json({id:'resp_'+now,object:'response',created:now,model, | |
| output:[{type:'output_text',text}],usage:j?.usage||{}}); | |
| } | |
| res.setHeader('Content-Type','text/event-stream'); | |
| res.setHeader('Cache-Control','no-cache'); | |
| const up=await fetch(VLLM+'/chat/completions',{method:'POST', | |
| headers:{'content-type':'application/json'}, | |
| body:JSON.stringify({model,messages:msgs,stream:true,max_tokens,temperature})}); | |
| const reader=up.body.getReader(); let full=''; | |
| const send=(e,d)=>res.write(`event: ${e}\ndata: ${JSON.stringify(d)}\n\n`); | |
| const ts=Math.floor(Date.now()/1000); | |
| send('response.created',{id:'resp_'+ts,created:ts,model}); | |
| while(true){ | |
| const {done,value}=await reader.read(); if(done) break; | |
| const chunk=new TextDecoder().decode(value); | |
| for(const line of chunk.split('\\n')){ | |
| if(!line.trim().startsWith('data:')) continue; | |
| const payload=line.trim().slice(5).trim(); if(payload==='[DONE]') continue; | |
| try{const j=JSON.parse(payload); | |
| const d=j?.choices?.[0]?.delta?.content||j?.choices?.[0]?.message?.content||''; | |
| if(d){full+=d;send('response.output_text.delta',{delta:d});} | |
| }catch{} | |
| } | |
| } | |
| send('response.completed',{id:'resp_'+ts,model,output:[{type:'output_text',text:full}]}); | |
| res.end(); | |
| }); | |
| app.get('/v1/models',async(_req,res)=>{ | |
| const r=await fetch(VLLM+'/models'); res.send(await r.text()); | |
| }); | |
| app.listen(PORT,()=>console.log('Shim ready on :'+PORT)); | |
| EOF | |
| node server.js | |
| " |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Minimal Responses→ChatCompletions shim for Codex↔LM Studio | |
| // Run: node server.js | |
| import express from "express"; | |
| import fetch from "node-fetch"; | |
| import { Readable } from "stream"; | |
| const app = express(); | |
| app.use(express.json({ limit: "2mb" })); | |
| const VLLM = process.env.LMSTUDIO_BASE || "http://192.168.0.25:9999/v1"; | |
| // Helper to coerce request into chat messages | |
| function toMessages(body) { | |
| if (Array.isArray(body?.messages)) return body.messages; | |
| if (typeof body?.input === "string") { | |
| return [{ role: "user", content: body.input }]; | |
| } | |
| if (Array.isArray(body?.input) && body.input.length) { | |
| // If input is array of text parts, join them | |
| const text = body.input.map(p => (typeof p === "string" ? p : p?.text || "")).join(""); | |
| return [{ role: "user", content: text }]; | |
| } | |
| // Fallback | |
| return [{ role: "user", content: "Hello" }]; | |
| } | |
| // Health | |
| app.get("/v1/models", async (_req, res) => { | |
| const r = await fetch(`${VLLM}/models`); | |
| const j = await r.json(); | |
| res.json(j); | |
| }); | |
| // NON-STREAM: /v1/responses → one-shot chat completion | |
| app.post("/v1/responses", async (req, res) => { | |
| const { model = "local", stream = false, max_tokens, temperature } = req.body || {}; | |
| const messages = toMessages(req.body); | |
| if (!stream) { | |
| const r = await fetch(`${VLLM}/chat/completions`, { | |
| method: "POST", | |
| headers: { "content-type": "application/json" }, | |
| body: JSON.stringify({ model, messages, max_tokens, temperature }) | |
| }); | |
| const j = await r.json(); | |
| const text = j?.choices?.[0]?.message?.content ?? ""; | |
| // Minimal Responses object | |
| const now = Math.floor(Date.now() / 1000); | |
| return res.json({ | |
| id: `resp_${now}`, | |
| object: "response", | |
| created: now, | |
| model, | |
| output: [{ type: "output_text", text }], | |
| usage: j?.usage || { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, | |
| }); | |
| } | |
| // STREAM mode → SSE that Codex expects (delta + completed) | |
| res.setHeader("Content-Type", "text/event-stream; charset=utf-8"); | |
| res.setHeader("Cache-Control", "no-cache, no-transform"); | |
| res.setHeader("Connection", "keep-alive"); | |
| const upstream = await fetch(`${VLLM}/chat/completions`, { | |
| method: "POST", | |
| headers: { "content-type": "application/json" }, | |
| body: JSON.stringify({ model, messages, stream: true, max_tokens, temperature }) | |
| }); | |
| let fullText = ""; | |
| const reader = upstream.body.getReader(); | |
| const encoder = new TextEncoder(); | |
| // Helper to send SSE events Codex looks for | |
| const send = (event, data) => { | |
| res.write(`event: ${event}\n`); | |
| res.write(`data: ${JSON.stringify(data)}\n\n`); | |
| }; | |
| // Open: Created | |
| const ts = Math.floor(Date.now() / 1000); | |
| send("response.created", { id: `resp_${ts}`, created: ts, model }); | |
| // Pump upstream chunks (OpenAI chat stream) and emit as output_text.delta | |
| while (true) { | |
| const { done, value } = await reader.read(); | |
| if (done) break; | |
| const chunk = new TextDecoder().decode(value); | |
| // Parse each line that starts with `data: { ... }` | |
| for (const line of chunk.split("\n")) { | |
| const m = line.trim(); | |
| if (!m.startsWith("data:")) continue; | |
| const payload = m.slice(5).trim(); | |
| if (payload === "[DONE]") continue; | |
| try { | |
| const j = JSON.parse(payload); | |
| const delta = j?.choices?.[0]?.delta?.content || j?.choices?.[0]?.message?.content || ""; | |
| if (delta) { | |
| fullText += delta; | |
| send("response.output_text.delta", { delta }); | |
| } | |
| } catch { | |
| // ignore parse errors | |
| } | |
| } | |
| } | |
| // Completed event (Codex waits for this) | |
| send("response.completed", { | |
| id: `resp_${ts}`, | |
| model, | |
| output: [{ type: "output_text", text: fullText }] | |
| }); | |
| res.end(); | |
| }); | |
| const PORT = process.env.PORT || 3030; | |
| app.listen(PORT, () => { | |
| console.log(`OpenAI Responses shim listening on http://127.0.0.1:${PORT}/v1`); | |
| }); | |
| /** | |
| * Run it: | |
| * ## /serving/openai-shim/ | |
| * npm init -y | |
| * npm i express node-fetch | |
| * node server.js | |
| * | |
| * ## Then point Codex to the shim: | |
| * export OPENAI_BASE_URL="http://127.0.0.1:3030/v1" | |
| * export OPENAI_API_KEY="local-dev" | |
| * codex "Write a bash script that prints hello" | |
| * | |
| * LLM Studio will now log /v1/chat/completions requests coming from the shim, while Codex happily uses /v1/responses. | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment