Cdaprod · November 5, 2025 16:11
diff --git a/openai-shim-docker-server.sh b/openai-shim-docker-server.sh
 docker run -d \
  --name codex-shim \
  -p 3030:3030 \
  -e LMSTUDIO_BASE="http://192.168.0.25:9999/v1" \
  node:20-alpine \
  sh -c "
    apk add --no-cache nodejs npm &&
    npm init -y >/dev/null &&
    npm i express node-fetch@3 &&
    cat <<'EOF' > server.js
 import express from 'express';
 import fetch from 'node-fetch';
 import { TextDecoder } from 'util';

 const app = express();
 app.use(express.json({limit:'2mb'}));
 const VLLM = process.env.LMSTUDIO_BASE || 'http://192.168.0.25:9999/v1';
 const PORT = process.env.PORT || 3030;

 function toMsgs(body){
  if(Array.isArray(body?.messages)) return body.messages;
  if(typeof body?.input==='string') return [{role:'user',content:body.input}];
  return [{role:'user',content:'Hello'}];
 }

 app.post('/v1/responses', async (req,res)=>{
  const {model='local',stream=false,max_tokens,temperature} = req.body||{};
  const msgs = toMsgs(req.body);
  if(!stream){
    const r = await fetch(VLLM+'/chat/completions',{method:'POST',
      headers:{'content-type':'application/json'},
      body:JSON.stringify({model,messages:msgs,max_tokens,temperature})});
    const j = await r.json(); const text=j?.choices?.[0]?.message?.content||'';
    const now=Math.floor(Date.now()/1000);
    return res.json({id:'resp_'+now,object:'response',created:now,model,
      output:[{type:'output_text',text}],usage:j?.usage||{}});
  }
  res.setHeader('Content-Type','text/event-stream');
  res.setHeader('Cache-Control','no-cache');
  const up=await fetch(VLLM+'/chat/completions',{method:'POST',
    headers:{'content-type':'application/json'},
    body:JSON.stringify({model,messages:msgs,stream:true,max_tokens,temperature})});
  const reader=up.body.getReader(); let full='';
  const send=(e,d)=>res.write(`event: ${e}\ndata: ${JSON.stringify(d)}\n\n`);
  const ts=Math.floor(Date.now()/1000);
  send('response.created',{id:'resp_'+ts,created:ts,model});
  while(true){
    const {done,value}=await reader.read(); if(done) break;
    const chunk=new TextDecoder().decode(value);
    for(const line of chunk.split('\\n')){
      if(!line.trim().startsWith('data:')) continue;
      const payload=line.trim().slice(5).trim(); if(payload==='[DONE]') continue;
      try{const j=JSON.parse(payload);
        const d=j?.choices?.[0]?.delta?.content||j?.choices?.[0]?.message?.content||'';
        if(d){full+=d;send('response.output_text.delta',{delta:d});}
      }catch{}
    }
  }
  send('response.completed',{id:'resp_'+ts,model,output:[{type:'output_text',text:full}]});
  res.end();
 });

 app.get('/v1/models',async(_req,res)=>{
  const r=await fetch(VLLM+'/models'); res.send(await r.text());
 });
 app.listen(PORT,()=>console.log('Shim ready on :'+PORT));
 EOF
    node server.js
  "
diff --git a/shim-server.js b/shim-server.js
 // Minimal Responses→ChatCompletions shim for Codex↔LM Studio
 // Run: node server.js
 import express from "express";
 import fetch from "node-fetch";
 import { Readable } from "stream";

 const app = express();
 app.use(express.json({ limit: "2mb" }));

 const VLLM = process.env.LMSTUDIO_BASE || "http://192.168.0.25:9999/v1";

 // Helper to coerce request into chat messages
 function toMessages(body) {
  if (Array.isArray(body?.messages)) return body.messages;
  if (typeof body?.input === "string") {
    return [{ role: "user", content: body.input }];
  }
  if (Array.isArray(body?.input) && body.input.length) {
    // If input is array of text parts, join them
    const text = body.input.map(p => (typeof p === "string" ? p : p?.text || "")).join("");
    return [{ role: "user", content: text }];
  }
  // Fallback
  return [{ role: "user", content: "Hello" }];
 }

 // Health
 app.get("/v1/models", async (_req, res) => {
  const r = await fetch(`${VLLM}/models`);
  const j = await r.json();
  res.json(j);
 });

 // NON-STREAM: /v1/responses  →  one-shot chat completion
 app.post("/v1/responses", async (req, res) => {
  const { model = "local", stream = false, max_tokens, temperature } = req.body || {};
  const messages = toMessages(req.body);

  if (!stream) {
    const r = await fetch(`${VLLM}/chat/completions`, {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({ model, messages, max_tokens, temperature })
    });
    const j = await r.json();
    const text = j?.choices?.[0]?.message?.content ?? "";

    // Minimal Responses object
    const now = Math.floor(Date.now() / 1000);
    return res.json({
      id: `resp_${now}`,
      object: "response",
      created: now,
      model,
      output: [{ type: "output_text", text }],
      usage: j?.usage || { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
    });
  }

  // STREAM mode → SSE that Codex expects (delta + completed)
  res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
  res.setHeader("Cache-Control", "no-cache, no-transform");
  res.setHeader("Connection", "keep-alive");

  const upstream = await fetch(`${VLLM}/chat/completions`, {
    method: "POST",
    headers: { "content-type": "application/json" },
    body: JSON.stringify({ model, messages, stream: true, max_tokens, temperature })
  });

  let fullText = "";
  const reader = upstream.body.getReader();
  const encoder = new TextEncoder();

  // Helper to send SSE events Codex looks for
  const send = (event, data) => {
    res.write(`event: ${event}\n`);
    res.write(`data: ${JSON.stringify(data)}\n\n`);
  };

  // Open: Created
  const ts = Math.floor(Date.now() / 1000);
  send("response.created", { id: `resp_${ts}`, created: ts, model });

  // Pump upstream chunks (OpenAI chat stream) and emit as output_text.delta
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    const chunk = new TextDecoder().decode(value);

    // Parse each line that starts with `data: { ... }`
    for (const line of chunk.split("\n")) {
      const m = line.trim();
      if (!m.startsWith("data:")) continue;
      const payload = m.slice(5).trim();
      if (payload === "[DONE]") continue;

      try {
        const j = JSON.parse(payload);
        const delta = j?.choices?.[0]?.delta?.content || j?.choices?.[0]?.message?.content || "";
        if (delta) {
          fullText += delta;
          send("response.output_text.delta", { delta });
        }
      } catch {
        // ignore parse errors
      }
    }
  }

  // Completed event (Codex waits for this)
  send("response.completed", {
    id: `resp_${ts}`,
    model,
    output: [{ type: "output_text", text: fullText }]
  });

  res.end();
 });

 const PORT = process.env.PORT || 3030;
 app.listen(PORT, () => {
  console.log(`OpenAI Responses shim listening on http://127.0.0.1:${PORT}/v1`);
 });

 /**
 * Run it:
 * ## /serving/openai-shim/
 * npm init -y
 * npm i express node-fetch
 * node server.js
 * 
 * ## Then point Codex to the shim:
 * export OPENAI_BASE_URL="http://127.0.0.1:3030/v1"
 * export OPENAI_API_KEY="local-dev"
 * codex "Write a bash script that prints hello"
 * 
 * LLM Studio will now log /v1/chat/completions requests coming from the shim, while Codex happily uses /v1/responses.
 */
	docker run -d \
	--name codex-shim \
	-p 3030:3030 \
	-e LMSTUDIO_BASE="http://192.168.0.25:9999/v1" \
	node:20-alpine \
	sh -c "
	apk add --no-cache nodejs npm &&
	npm init -y >/dev/null &&
	npm i express node-fetch@3 &&
	cat <<'EOF' > server.js
	import express from 'express';
	import fetch from 'node-fetch';
	import { TextDecoder } from 'util';

	const app = express();
	app.use(express.json({limit:'2mb'}));
	const VLLM = process.env.LMSTUDIO_BASE \|\| 'http://192.168.0.25:9999/v1';
	const PORT = process.env.PORT \|\| 3030;

	function toMsgs(body){
	if(Array.isArray(body?.messages)) return body.messages;
	if(typeof body?.input==='string') return [{role:'user',content:body.input}];
	return [{role:'user',content:'Hello'}];
	}

	app.post('/v1/responses', async (req,res)=>{
	const {model='local',stream=false,max_tokens,temperature} = req.body\|\|{};
	const msgs = toMsgs(req.body);
	if(!stream){
	const r = await fetch(VLLM+'/chat/completions',{method:'POST',
	headers:{'content-type':'application/json'},
	body:JSON.stringify({model,messages:msgs,max_tokens,temperature})});
	const j = await r.json(); const text=j?.choices?.[0]?.message?.content\|\|'';
	const now=Math.floor(Date.now()/1000);
	return res.json({id:'resp_'+now,object:'response',created:now,model,
	output:[{type:'output_text',text}],usage:j?.usage\|\|{}});
	}
	res.setHeader('Content-Type','text/event-stream');
	res.setHeader('Cache-Control','no-cache');
	const up=await fetch(VLLM+'/chat/completions',{method:'POST',
	headers:{'content-type':'application/json'},
	body:JSON.stringify({model,messages:msgs,stream:true,max_tokens,temperature})});
	const reader=up.body.getReader(); let full='';
	const send=(e,d)=>res.write(`event: ${e}\ndata: ${JSON.stringify(d)}\n\n`);
	const ts=Math.floor(Date.now()/1000);
	send('response.created',{id:'resp_'+ts,created:ts,model});
	while(true){
	const {done,value}=await reader.read(); if(done) break;
	const chunk=new TextDecoder().decode(value);
	for(const line of chunk.split('\\n')){
	if(!line.trim().startsWith('data:')) continue;
	const payload=line.trim().slice(5).trim(); if(payload==='[DONE]') continue;
	try{const j=JSON.parse(payload);
	const d=j?.choices?.[0]?.delta?.content\|\|j?.choices?.[0]?.message?.content\|\|'';
	if(d){full+=d;send('response.output_text.delta',{delta:d});}
	}catch{}
	}
	}
	send('response.completed',{id:'resp_'+ts,model,output:[{type:'output_text',text:full}]});
	res.end();
	});

	app.get('/v1/models',async(_req,res)=>{
	const r=await fetch(VLLM+'/models'); res.send(await r.text());
	});
	app.listen(PORT,()=>console.log('Shim ready on :'+PORT));
	EOF
	node server.js
	"
	// Minimal Responses→ChatCompletions shim for Codex↔LM Studio
	// Run: node server.js
	import express from "express";
	import fetch from "node-fetch";
	import { Readable } from "stream";

	const app = express();
	app.use(express.json({ limit: "2mb" }));

	const VLLM = process.env.LMSTUDIO_BASE \|\| "http://192.168.0.25:9999/v1";

	// Helper to coerce request into chat messages
	function toMessages(body) {
	if (Array.isArray(body?.messages)) return body.messages;
	if (typeof body?.input === "string") {
	return [{ role: "user", content: body.input }];
	}
	if (Array.isArray(body?.input) && body.input.length) {
	// If input is array of text parts, join them
	const text = body.input.map(p => (typeof p === "string" ? p : p?.text \|\| "")).join("");
	return [{ role: "user", content: text }];
	}
	// Fallback
	return [{ role: "user", content: "Hello" }];
	}

	// Health
	app.get("/v1/models", async (_req, res) => {
	const r = await fetch(`${VLLM}/models`);
	const j = await r.json();
	res.json(j);
	});

	// NON-STREAM: /v1/responses → one-shot chat completion
	app.post("/v1/responses", async (req, res) => {
	const { model = "local", stream = false, max_tokens, temperature } = req.body \|\| {};
	const messages = toMessages(req.body);

	if (!stream) {
	const r = await fetch(`${VLLM}/chat/completions`, {
	method: "POST",
	headers: { "content-type": "application/json" },
	body: JSON.stringify({ model, messages, max_tokens, temperature })
	});
	const j = await r.json();
	const text = j?.choices?.[0]?.message?.content ?? "";

	// Minimal Responses object
	const now = Math.floor(Date.now() / 1000);
	return res.json({
	id: `resp_${now}`,
	object: "response",
	created: now,
	model,
	output: [{ type: "output_text", text }],
	usage: j?.usage \|\| { input_tokens: 0, output_tokens: 0, total_tokens: 0 },
	});
	}

	// STREAM mode → SSE that Codex expects (delta + completed)
	res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
	res.setHeader("Cache-Control", "no-cache, no-transform");
	res.setHeader("Connection", "keep-alive");

	const upstream = await fetch(`${VLLM}/chat/completions`, {
	method: "POST",
	headers: { "content-type": "application/json" },
	body: JSON.stringify({ model, messages, stream: true, max_tokens, temperature })
	});

	let fullText = "";
	const reader = upstream.body.getReader();
	const encoder = new TextEncoder();

	// Helper to send SSE events Codex looks for
	const send = (event, data) => {
	res.write(`event: ${event}\n`);
	res.write(`data: ${JSON.stringify(data)}\n\n`);
	};

	// Open: Created
	const ts = Math.floor(Date.now() / 1000);
	send("response.created", { id: `resp_${ts}`, created: ts, model });

	// Pump upstream chunks (OpenAI chat stream) and emit as output_text.delta
	while (true) {
	const { done, value } = await reader.read();
	if (done) break;
	const chunk = new TextDecoder().decode(value);

	// Parse each line that starts with `data: { ... }`
	for (const line of chunk.split("\n")) {
	const m = line.trim();
	if (!m.startsWith("data:")) continue;
	const payload = m.slice(5).trim();
	if (payload === "[DONE]") continue;

	try {
	const j = JSON.parse(payload);
	const delta = j?.choices?.[0]?.delta?.content \|\| j?.choices?.[0]?.message?.content \|\| "";
	if (delta) {
	fullText += delta;
	send("response.output_text.delta", { delta });
	}
	} catch {
	// ignore parse errors
	}
	}
	}

	// Completed event (Codex waits for this)
	send("response.completed", {
	id: `resp_${ts}`,
	model,
	output: [{ type: "output_text", text: fullText }]
	});

	res.end();
	});

	const PORT = process.env.PORT \|\| 3030;
	app.listen(PORT, () => {
	console.log(`OpenAI Responses shim listening on http://127.0.0.1:${PORT}/v1`);
	});

	/**
	* Run it:
	* ## /serving/openai-shim/
	* npm init -y
	* npm i express node-fetch
	* node server.js
	*
	* ## Then point Codex to the shim:
	* export OPENAI_BASE_URL="http://127.0.0.1:3030/v1"
	* export OPENAI_API_KEY="local-dev"
	* codex "Write a bash script that prints hello"
	*
	* LLM Studio will now log /v1/chat/completions requests coming from the shim, while Codex happily uses /v1/responses.
	*/