Skip to content

Instantly share code, notes, and snippets.

@Cdaprod
Last active November 5, 2025 16:11
Show Gist options
  • Select an option

  • Save Cdaprod/3c1fe7ce012084a855b590454e47dcc6 to your computer and use it in GitHub Desktop.

Select an option

Save Cdaprod/3c1fe7ce012084a855b590454e47dcc6 to your computer and use it in GitHub Desktop.
Dockerized Codex Shim for completions to responses (standalone)
docker run -d \
--name codex-shim \
-p 3030:3030 \
-e LMSTUDIO_BASE="http://192.168.0.25:9999/v1" \
node:20-alpine \
sh -c "
apk add --no-cache nodejs npm &&
npm init -y >/dev/null &&
npm i express node-fetch@3 &&
cat <<'EOF' > server.js
import express from 'express';
import fetch from 'node-fetch';
import { TextDecoder } from 'util';
const app = express();
app.use(express.json({limit:'2mb'}));
const VLLM = process.env.LMSTUDIO_BASE || 'http://192.168.0.25:9999/v1';
const PORT = process.env.PORT || 3030;
function toMsgs(body){
if(Array.isArray(body?.messages)) return body.messages;
if(typeof body?.input==='string') return [{role:'user',content:body.input}];
return [{role:'user',content:'Hello'}];
}
app.post('/v1/responses', async (req,res)=>{
const {model='local',stream=false,max_tokens,temperature} = req.body||{};
const msgs = toMsgs(req.body);
if(!stream){
const r = await fetch(VLLM+'/chat/completions',{method:'POST',
headers:{'content-type':'application/json'},
body:JSON.stringify({model,messages:msgs,max_tokens,temperature})});
const j = await r.json(); const text=j?.choices?.[0]?.message?.content||'';
const now=Math.floor(Date.now()/1000);
return res.json({id:'resp_'+now,object:'response',created:now,model,
output:[{type:'output_text',text}],usage:j?.usage||{}});
}
res.setHeader('Content-Type','text/event-stream');
res.setHeader('Cache-Control','no-cache');
const up=await fetch(VLLM+'/chat/completions',{method:'POST',
headers:{'content-type':'application/json'},
body:JSON.stringify({model,messages:msgs,stream:true,max_tokens,temperature})});
const reader=up.body.getReader(); let full='';
const send=(e,d)=>res.write(`event: ${e}\ndata: ${JSON.stringify(d)}\n\n`);
const ts=Math.floor(Date.now()/1000);
send('response.created',{id:'resp_'+ts,created:ts,model});
while(true){
const {done,value}=await reader.read(); if(done) break;
const chunk=new TextDecoder().decode(value);
for(const line of chunk.split('\\n')){
if(!line.trim().startsWith('data:')) continue;
const payload=line.trim().slice(5).trim(); if(payload==='[DONE]') continue;
try{const j=JSON.parse(payload);
const d=j?.choices?.[0]?.delta?.content||j?.choices?.[0]?.message?.content||'';
if(d){full+=d;send('response.output_text.delta',{delta:d});}
}catch{}
}
}
send('response.completed',{id:'resp_'+ts,model,output:[{type:'output_text',text:full}]});
res.end();
});
app.get('/v1/models',async(_req,res)=>{
const r=await fetch(VLLM+'/models'); res.send(await r.text());
});
app.listen(PORT,()=>console.log('Shim ready on :'+PORT));
EOF
node server.js
"
// Minimal Responses→ChatCompletions shim for Codex↔LM Studio
// Run: node server.js
import express from "express";
import fetch from "node-fetch";
import { Readable } from "stream";
const app = express();
app.use(express.json({ limit: "2mb" }));
const VLLM = process.env.LMSTUDIO_BASE || "http://192.168.0.25:9999/v1";
// Helper to coerce request into chat messages
function toMessages(body) {
if (Array.isArray(body?.messages)) return body.messages;
if (typeof body?.input === "string") {
return [{ role: "user", content: body.input }];
}
if (Array.isArray(body?.input) && body.input.length) {
// If input is array of text parts, join them
const text = body.input.map(p => (typeof p === "string" ? p : p?.text || "")).join("");
return [{ role: "user", content: text }];
}
// Fallback
return [{ role: "user", content: "Hello" }];
}
// Health
app.get("/v1/models", async (_req, res) => {
const r = await fetch(`${VLLM}/models`);
const j = await r.json();
res.json(j);
});
// NON-STREAM: /v1/responses → one-shot chat completion
app.post("/v1/responses", async (req, res) => {
const { model = "local", stream = false, max_tokens, temperature } = req.body || {};
const messages = toMessages(req.body);
if (!stream) {
const r = await fetch(`${VLLM}/chat/completions`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ model, messages, max_tokens, temperature })
});
const j = await r.json();
const text = j?.choices?.[0]?.message?.content ?? "";
// Minimal Responses object
const now = Math.floor(Date.now() / 1000);
return res.json({
id: `resp_${now}`,
object: "response",
created: now,
model,
output: [{ type: "output_text", text }],
usage: j?.usage || { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
});
}
// STREAM mode → SSE that Codex expects (delta + completed)
res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
res.setHeader("Cache-Control", "no-cache, no-transform");
res.setHeader("Connection", "keep-alive");
const upstream = await fetch(`${VLLM}/chat/completions`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ model, messages, stream: true, max_tokens, temperature })
});
let fullText = "";
const reader = upstream.body.getReader();
const encoder = new TextEncoder();
// Helper to send SSE events Codex looks for
const send = (event, data) => {
res.write(`event: ${event}\n`);
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
// Open: Created
const ts = Math.floor(Date.now() / 1000);
send("response.created", { id: `resp_${ts}`, created: ts, model });
// Pump upstream chunks (OpenAI chat stream) and emit as output_text.delta
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
// Parse each line that starts with `data: { ... }`
for (const line of chunk.split("\n")) {
const m = line.trim();
if (!m.startsWith("data:")) continue;
const payload = m.slice(5).trim();
if (payload === "[DONE]") continue;
try {
const j = JSON.parse(payload);
const delta = j?.choices?.[0]?.delta?.content || j?.choices?.[0]?.message?.content || "";
if (delta) {
fullText += delta;
send("response.output_text.delta", { delta });
}
} catch {
// ignore parse errors
}
}
}
// Completed event (Codex waits for this)
send("response.completed", {
id: `resp_${ts}`,
model,
output: [{ type: "output_text", text: fullText }]
});
res.end();
});
const PORT = process.env.PORT || 3030;
app.listen(PORT, () => {
console.log(`OpenAI Responses shim listening on http://127.0.0.1:${PORT}/v1`);
});
/**
* Run it:
* ## /serving/openai-shim/
* npm init -y
* npm i express node-fetch
* node server.js
*
* ## Then point Codex to the shim:
* export OPENAI_BASE_URL="http://127.0.0.1:3030/v1"
* export OPENAI_API_KEY="local-dev"
* codex "Write a bash script that prints hello"
*
* LLM Studio will now log /v1/chat/completions requests coming from the shim, while Codex happily uses /v1/responses.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment