Created
August 24, 2025 20:26
-
-
Save richardhundt/8a6b200e9f062e21aee6f4ee7c51c4c3 to your computer and use it in GitHub Desktop.
glm-4.5-codec
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class GLM_45_Codec: | |
| """ | |
| GLM-4.5 codec that implements the ProviderCodec protocol. | |
| This codec includes the GLM-4.5 jinja template inlined and handles | |
| the specific response format used by GLM models. It includes stateful | |
| parsing for streaming to handle tool calls intelligently. | |
| """ | |
| def __init__(self): | |
| """Initialize the GLM-4.5 codec.""" | |
| self._template = jinja2.Template(self.TEMPLATE_STR) | |
| # State for streaming tool call parsing | |
| self._in_tool_call = False | |
| self._tool_call_buffer = "" | |
| self._accumulated_content = "" | |
| # GLM-4.5 template inlined | |
| TEMPLATE_STR = """[gMASK]<sop> | |
| {%- if tools -%} | |
| <|system|> | |
| # Tools | |
| You may call one or more functions to assist with the user query. | |
| You are provided with function signatures within <tools></tools> XML tags: | |
| <tools> | |
| {% for tool in tools %} | |
| {{ tool | tojson|string }} | |
| {% endfor %} | |
| </tools> | |
| For each function call, output the function name and arguments within the following XML format: | |
| <tool_call>{function-name} | |
| <arg_key>{arg-key-1}</arg_key> | |
| <arg_value>{arg-value-1}</arg_value> | |
| <arg_key>{arg-key-2}</arg_key> | |
| <arg_value>{arg-value-2}</arg_value> | |
| ... | |
| </tool_call>{%- endif -%} | |
| {%- macro visible_text(content) -%} | |
| {%- if content is string -%} | |
| {{- content }} | |
| {%- elif content is iterable and content is not mapping -%} | |
| {%- for item in content -%} | |
| {%- if item is mapping and item.type == 'text' -%} | |
| {{- item.text }} | |
| {%- elif item is string -%} | |
| {{- item }} | |
| {%- endif -%} | |
| {%- endfor -%} | |
| {%- else -%} | |
| {{- content }} | |
| {%- endif -%} | |
| {%- endmacro -%} | |
| {%- set ns = namespace(last_user_index=-1) %} | |
| {%- for m in messages %} | |
| {%- if m.role == 'user' %} | |
| {% set ns.last_user_index = loop.index0 -%} | |
| {%- endif %} | |
| {%- endfor %} | |
| {% for m in messages %} | |
| {%- if m.role == 'user' -%}<|user|> | |
| {% set content = visible_text(m.content) %}{{ content }} | |
| {{- '/nothink' if (enable_thinking is defined and not enable_thinking) else '' -}} | |
| {%- elif m.role == 'assistant' -%} | |
| <|assistant|> | |
| {%- set reasoning_content = '' %} | |
| {%- set content = visible_text(m.content) or '' %} | |
| {%- if m.reasoning_content is string %} | |
| {%- set reasoning_content = m.reasoning_content %} | |
| {%- endif %} | |
| {%- if loop.index0 > ns.last_user_index and reasoning_content -%} | |
| {{ '\n<think>' + reasoning_content + '</think>'}} | |
| {%- else -%} | |
| {{ '\n<think></think>' }} | |
| {%- endif -%} | |
| {%- if content -%} | |
| {{ '\n' + content }} | |
| {%- endif -%} | |
| {% if m.tool_calls %} | |
| {% for tc in m.tool_calls %} | |
| {%- if tc.function %} | |
| {%- set tc = tc.function %} | |
| {%- endif %} | |
| {{ '\n<tool_call>' + tc.name }} | |
| {% set _args = tc.arguments %} | |
| {% for k, v in _args.items() %} | |
| <arg_key>{{ k }}</arg_key> | |
| <arg_value>{{ v | tojson|string if v is not string else v }}</arg_value> | |
| {% endfor %} | |
| </tool_call>{% endfor %} | |
| {% endif %} | |
| {%- elif m.role == 'tool' -%} | |
| {%- if m.content is string -%} | |
| {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} | |
| {{- '<|observation|>' }} | |
| {%- endif %} | |
| {{- '\n<tool_response>\n' }} | |
| {{- m.content }} | |
| {{- '\n</tool_response>' }} | |
| {%- else -%} | |
| <|observation|>{% for tr in m.content %} | |
| <tool_response> | |
| {{ tr.output if tr.output is defined else tr }} | |
| </tool_response>{% endfor -%} | |
| {% endif -%} | |
| {%- elif m.role == 'system' -%} | |
| <|system|> | |
| {{ visible_text(m.content) }} | |
| {%- endif -%} | |
| {%- endfor -%} | |
| {%- if add_generation_prompt -%} | |
| <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}} | |
| {%- endif -%}""" | |
| def reset_state(self): | |
| """Reset the streaming state for a new conversation.""" | |
| self._in_tool_call = False | |
| self._tool_call_buffer = "" | |
| self._accumulated_content = "" | |
| def process_streaming_chunk(self, chunk: str) -> tuple[str, bool]: | |
| """ | |
| Process a streaming chunk with state machine logic for tool calls. | |
| Args: | |
| chunk: The streaming JSON chunk from GLM API | |
| Returns: | |
| tuple of (content_to_emit, is_complete_tool_call) | |
| """ | |
| try: | |
| # Parse the GLM streaming chunk format | |
| chunk_data = json.loads(chunk) | |
| # Extract text content from GLM format | |
| content = "" | |
| if "choices" in chunk_data and chunk_data["choices"]: | |
| choice = chunk_data["choices"][0] | |
| if "text" in choice: | |
| content = choice["text"] | |
| if not content: | |
| return "", False | |
| except (json.JSONDecodeError, KeyError): | |
| # If it's not valid JSON, treat as plain text | |
| content = chunk | |
| # Accumulate all content for final parsing | |
| self._accumulated_content += content | |
| # Check if we're entering a tool call | |
| if not self._in_tool_call and "<tool_call>" in content: | |
| # Split the content at the tool call start | |
| parts = content.split("<tool_call>", 1) | |
| content_before = parts[0] | |
| tool_call_start = "<tool_call>" + parts[1] if len(parts) > 1 else "<tool_call>" | |
| self._in_tool_call = True | |
| self._tool_call_buffer = tool_call_start | |
| # Return only the content before the tool call | |
| return content_before, False | |
| # If we're in a tool call, accumulate in buffer | |
| elif self._in_tool_call: | |
| self._tool_call_buffer += content | |
| # Check if we've completed the tool call | |
| if "</tool_call>" in self._tool_call_buffer: | |
| # Split at the closing tag | |
| parts = self._tool_call_buffer.split("</tool_call>", 1) | |
| complete_tool_call = parts[0] + "</tool_call>" | |
| content_after = parts[1] if len(parts) > 1 else "" | |
| self._in_tool_call = False | |
| self._tool_call_buffer = "" | |
| # Return the content after the tool call (if any) | |
| return content_after, True | |
| else: | |
| # Still accumulating tool call, don't emit anything | |
| return "", False | |
| # Normal content, not in a tool call | |
| else: | |
| return content, False | |
| def get_accumulated_content(self) -> str: | |
| """Get the accumulated content from streaming.""" | |
| return self._accumulated_content | |
| def encode(self, messages: list[Message], options: dict[str, Any] | None = None) -> str: | |
| """ | |
| Encode messages and options into a GLM-4.5 formatted prompt string. | |
| Args: | |
| messages: List of Message objects to encode | |
| options: Optional parameters for template rendering | |
| Returns: | |
| Formatted prompt string ready for GLM-4.5 model | |
| """ | |
| # Convert Message objects to dicts for jinja2 | |
| message_dicts = [] | |
| for msg in messages: | |
| msg_dict: dict[str, Any] = { | |
| "role": msg.role, | |
| "content": msg.content, | |
| } | |
| # Handle tool calls | |
| if msg.tool_calls: | |
| msg_dict["tool_calls"] = [ | |
| { | |
| "function": { | |
| "name": tc.function.name, | |
| "arguments": tc.function.arguments | |
| } | |
| } | |
| for tc in msg.tool_calls | |
| ] | |
| # Handle tool call id for tool responses | |
| if msg.tool_call_id: | |
| msg_dict["tool_call_id"] = msg.tool_call_id | |
| message_dicts.append(msg_dict) | |
| # Template variables | |
| template_vars = { | |
| "messages": message_dicts, | |
| "add_generation_prompt": True, | |
| "enable_thinking": True, | |
| } | |
| # Add tools if provided in options | |
| if options and "tools" in options: | |
| template_vars["tools"] = options["tools"] | |
| # Add any other template variables from options | |
| if options: | |
| for key, value in options.items(): | |
| if key.startswith("template_"): | |
| template_vars[key[9:]] = value # Remove "template_" prefix | |
| return self._template.render(**template_vars) | |
| def decode(self, raw_response: str, model: str, start_time: datetime) -> Response: | |
| """ | |
| Decode raw GLM-4.5 response string into Response object. | |
| Args: | |
| raw_response: Raw response string from GLM-4.5 model | |
| model: Model name for the response | |
| start_time: Start time of the request | |
| Returns: | |
| Response object with parsed content and tool calls | |
| """ | |
| content = raw_response.strip() | |
| try: | |
| # Try to parse as JSON first (common for API responses) | |
| data = json.loads(content) | |
| # Handle different response formats | |
| if "choices" in data and data["choices"]: | |
| choice = data["choices"][0] | |
| if "text" in choice: | |
| content = choice["text"] | |
| elif "message" in choice and "content" in choice["message"]: | |
| content = choice["message"]["content"] | |
| elif "content" in data: | |
| content = data["content"] | |
| elif "text" in data: | |
| content = data["text"] | |
| elif "response" in data: | |
| content = data["response"] | |
| else: | |
| # Fallback to raw response | |
| content = raw_response | |
| except json.JSONDecodeError: | |
| # Not JSON, treat as plain text | |
| content = raw_response | |
| # Parse tool calls if present (GLM format) | |
| tool_calls = [] | |
| if "<tool_call>" in content: | |
| # Extract tool calls from GLM format | |
| tool_call_pattern = r'<tool_call>([^\s<\n]+)\s*((?:<arg_key>.*?</arg_key>\s*<arg_value>.*?</arg_value>\s*)*)</tool_call>' | |
| matches = re.findall(tool_call_pattern, content, re.DOTALL) | |
| for tool_name, args_str in matches: | |
| # Parse arguments | |
| arg_pattern = r'<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>' | |
| arg_matches = re.findall(arg_pattern, args_str, re.DOTALL) | |
| arguments = {} | |
| for key, value in arg_matches: | |
| key = key.strip() | |
| value = value.strip() | |
| # Try to parse JSON values | |
| try: | |
| arguments[key] = json.loads(value) | |
| except json.JSONDecodeError: | |
| arguments[key] = value | |
| tool_calls.append(ToolCall( | |
| function=ToolCallFunction( | |
| name=tool_name.strip(), | |
| arguments=arguments | |
| ) | |
| )) | |
| # Remove tool calls from content for cleaner display | |
| content = re.sub(tool_call_pattern, '', content, flags=re.DOTALL).strip() | |
| # Clean up GLM thinking tags - remove empty or whitespace-only think sequences | |
| if "<think>" in content: | |
| # First pass: remove empty think tags | |
| content = re.sub(r'<think>\s*</think>', '', content, flags=re.DOTALL) | |
| # Second pass: remove nested or malformed empty think sequences like "</think>\n</think>\n</think>" | |
| content = re.sub(r'</think>\s*</think>', '</think>', content, flags=re.DOTALL) | |
| # Third pass: remove any remaining think tags with only whitespace | |
| content = re.sub(r'<think>\s+</think>', '', content, flags=re.DOTALL) | |
| # Final cleanup: remove any remaining think tags | |
| content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL) | |
| content = content.strip() | |
| return Response( | |
| model=model, | |
| created_at=start_time, | |
| done=True, | |
| message=Message( | |
| role="assistant", | |
| content=content, | |
| tool_calls=tool_calls if tool_calls else None | |
| ), | |
| total_duration=int((datetime.now() - start_time).total_seconds() * 1000) | |
| ) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
And here's the streaming dispatch loop: