richardhundt · August 24, 2025 20:26 · richardhundt · Aug 24, 2025
diff --git a/glm_4_5_codec.py b/glm_4_5_codec.py
 class GLM_45_Codec:
    """
    GLM-4.5 codec that implements the ProviderCodec protocol.

    This codec includes the GLM-4.5 jinja template inlined and handles
    the specific response format used by GLM models. It includes stateful
    parsing for streaming to handle tool calls intelligently.
    """

    def __init__(self):
        """Initialize the GLM-4.5 codec."""
        self._template = jinja2.Template(self.TEMPLATE_STR)
        # State for streaming tool call parsing
        self._in_tool_call = False
        self._tool_call_buffer = ""
        self._accumulated_content = ""

    # GLM-4.5 template inlined
    TEMPLATE_STR = """[gMASK]<sop>
 {%- if tools -%}
 <|system|>
 # Tools

 You may call one or more functions to assist with the user query.

 You are provided with function signatures within <tools></tools> XML tags:
 <tools>
 {% for tool in tools %}
 {{ tool | tojson|string }}
 {% endfor %}
 </tools>

 For each function call, output the function name and arguments within the following XML format:
 <tool_call>{function-name}
 <arg_key>{arg-key-1}</arg_key>
 <arg_value>{arg-value-1}</arg_value>
 <arg_key>{arg-key-2}</arg_key>
 <arg_value>{arg-value-2}</arg_value>
 ...
 </tool_call>{%- endif -%}
 {%- macro visible_text(content) -%}
    {%- if content is string -%}
        {{- content }}
    {%- elif content is iterable and content is not mapping -%}
        {%- for item in content -%}
            {%- if item is mapping and item.type == 'text' -%}
                {{- item.text }}
            {%- elif item is string -%}
                {{- item }}
            {%- endif -%}
        {%- endfor -%}
    {%- else -%}
        {{- content }}
    {%- endif -%}
 {%- endmacro -%}
 {%- set ns = namespace(last_user_index=-1) %}
 {%- for m in messages %}
    {%- if m.role == 'user' %}
        {% set ns.last_user_index = loop.index0 -%}
    {%- endif %}
 {%- endfor %}
 {% for m in messages %}
 {%- if m.role == 'user' -%}<|user|>
 {% set content = visible_text(m.content) %}{{ content }}
 {{- '/nothink' if (enable_thinking is defined and not enable_thinking) else '' -}}
 {%- elif m.role == 'assistant' -%}
 <|assistant|>
 {%- set reasoning_content = '' %}
 {%- set content = visible_text(m.content) or '' %}
 {%- if m.reasoning_content is string %}
    {%- set reasoning_content = m.reasoning_content %}
 {%- endif %}
 {%- if loop.index0 > ns.last_user_index and reasoning_content -%}
 {{ '\n<think>' + reasoning_content + '</think>'}}
 {%- else -%}
 {{ '\n<think></think>' }}
 {%- endif -%}
 {%- if content -%}
 {{ '\n' + content }}
 {%- endif -%}
 {% if m.tool_calls %}
 {% for tc in m.tool_calls %}
 {%- if tc.function %}
    {%- set tc = tc.function %}
 {%- endif %}
 {{ '\n<tool_call>' + tc.name }}
 {% set _args = tc.arguments %}
 {% for k, v in _args.items() %}
 <arg_key>{{ k }}</arg_key>
 <arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>
 {% endfor %}
 </tool_call>{% endfor %}
 {% endif %}
 {%- elif m.role == 'tool' -%}
 {%- if m.content is string -%}
 {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
    {{- '<|observation|>' }}
 {%- endif %}
 {{- '\n<tool_response>\n' }}
 {{- m.content }}
 {{- '\n</tool_response>' }}
 {%- else -%}
 <|observation|>{% for tr in m.content %}

 <tool_response>
 {{ tr.output if tr.output is defined else tr }}
 </tool_response>{% endfor -%}
 {% endif -%}
 {%- elif m.role == 'system' -%}
 <|system|>
 {{ visible_text(m.content) }}
 {%- endif -%}
 {%- endfor -%}
 {%- if add_generation_prompt -%}
    <|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
 {%- endif -%}"""

    def reset_state(self):
        """Reset the streaming state for a new conversation."""
        self._in_tool_call = False
        self._tool_call_buffer = ""
        self._accumulated_content = ""

    def process_streaming_chunk(self, chunk: str) -> tuple[str, bool]:
        """
        Process a streaming chunk with state machine logic for tool calls.

        Args:
            chunk: The streaming JSON chunk from GLM API

        Returns:
            tuple of (content_to_emit, is_complete_tool_call)
        """
        try:
            # Parse the GLM streaming chunk format
            chunk_data = json.loads(chunk)

            # Extract text content from GLM format
            content = ""
            if "choices" in chunk_data and chunk_data["choices"]:
                choice = chunk_data["choices"][0]
                if "text" in choice:
                    content = choice["text"]

            if not content:
                return "", False

        except (json.JSONDecodeError, KeyError):
            # If it's not valid JSON, treat as plain text
            content = chunk

        # Accumulate all content for final parsing
        self._accumulated_content += content

        # Check if we're entering a tool call
        if not self._in_tool_call and "<tool_call>" in content:
            # Split the content at the tool call start
            parts = content.split("<tool_call>", 1)
            content_before = parts[0]
            tool_call_start = "<tool_call>" + parts[1] if len(parts) > 1 else "<tool_call>"

            self._in_tool_call = True
            self._tool_call_buffer = tool_call_start

            # Return only the content before the tool call
            return content_before, False

        # If we're in a tool call, accumulate in buffer
        elif self._in_tool_call:
            self._tool_call_buffer += content

            # Check if we've completed the tool call
            if "</tool_call>" in self._tool_call_buffer:
                # Split at the closing tag
                parts = self._tool_call_buffer.split("</tool_call>", 1)
                complete_tool_call = parts[0] + "</tool_call>"
                content_after = parts[1] if len(parts) > 1 else ""

                self._in_tool_call = False
                self._tool_call_buffer = ""

                # Return the content after the tool call (if any)
                return content_after, True
            else:
                # Still accumulating tool call, don't emit anything
                return "", False

        # Normal content, not in a tool call
        else:
            return content, False

    def get_accumulated_content(self) -> str:
        """Get the accumulated content from streaming."""
        return self._accumulated_content

    def encode(self, messages: list[Message], options: dict[str, Any] | None = None) -> str:
        """
        Encode messages and options into a GLM-4.5 formatted prompt string.

        Args:
            messages: List of Message objects to encode
            options: Optional parameters for template rendering

        Returns:
            Formatted prompt string ready for GLM-4.5 model
        """
        # Convert Message objects to dicts for jinja2
        message_dicts = []
        for msg in messages:
            msg_dict: dict[str, Any] = {
                "role": msg.role,
                "content": msg.content,
            }

            # Handle tool calls
            if msg.tool_calls:
                msg_dict["tool_calls"] = [
                    {
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments
                        }
                    }
                    for tc in msg.tool_calls
                ]

            # Handle tool call id for tool responses
            if msg.tool_call_id:
                msg_dict["tool_call_id"] = msg.tool_call_id

            message_dicts.append(msg_dict)

        # Template variables
        template_vars = {
            "messages": message_dicts,
            "add_generation_prompt": True,
            "enable_thinking": True,
        }

        # Add tools if provided in options
        if options and "tools" in options:
            template_vars["tools"] = options["tools"]

        # Add any other template variables from options
        if options:
            for key, value in options.items():
                if key.startswith("template_"):
                    template_vars[key[9:]] = value  # Remove "template_" prefix

        return self._template.render(**template_vars)

    def decode(self, raw_response: str, model: str, start_time: datetime) -> Response:
        """
        Decode raw GLM-4.5 response string into Response object.

        Args:
            raw_response: Raw response string from GLM-4.5 model
            model: Model name for the response
            start_time: Start time of the request

        Returns:
            Response object with parsed content and tool calls
        """
        content = raw_response.strip()

        try:
            # Try to parse as JSON first (common for API responses)
            data = json.loads(content)

            # Handle different response formats
            if "choices" in data and data["choices"]:
                choice = data["choices"][0]
                if "text" in choice:
                    content = choice["text"]
                elif "message" in choice and "content" in choice["message"]:
                    content = choice["message"]["content"]
            elif "content" in data:
                content = data["content"]
            elif "text" in data:
                content = data["text"]
            elif "response" in data:
                content = data["response"]
            else:
                # Fallback to raw response
                content = raw_response

        except json.JSONDecodeError:
            # Not JSON, treat as plain text
            content = raw_response

        # Parse tool calls if present (GLM format)
        tool_calls = []
        if "<tool_call>" in content:
            # Extract tool calls from GLM format
            tool_call_pattern = r'<tool_call>([^\s<\n]+)\s*((?:<arg_key>.*?</arg_key>\s*<arg_value>.*?</arg_value>\s*)*)</tool_call>'
            matches = re.findall(tool_call_pattern, content, re.DOTALL)

            for tool_name, args_str in matches:
                # Parse arguments
                arg_pattern = r'<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>'
                arg_matches = re.findall(arg_pattern, args_str, re.DOTALL)

                arguments = {}
                for key, value in arg_matches:
                    key = key.strip()
                    value = value.strip()
                    # Try to parse JSON values
                    try:
                        arguments[key] = json.loads(value)
                    except json.JSONDecodeError:
                        arguments[key] = value

                tool_calls.append(ToolCall(
                    function=ToolCallFunction(
                        name=tool_name.strip(),
                        arguments=arguments
                    )
                ))

            # Remove tool calls from content for cleaner display
            content = re.sub(tool_call_pattern, '', content, flags=re.DOTALL).strip()

        # Clean up GLM thinking tags - remove empty or whitespace-only think sequences
        if "<think>" in content:
            # First pass: remove empty think tags
            content = re.sub(r'<think>\s*</think>', '', content, flags=re.DOTALL)
            # Second pass: remove nested or malformed empty think sequences like "</think>\n</think>\n</think>"
            content = re.sub(r'</think>\s*</think>', '</think>', content, flags=re.DOTALL)
            # Third pass: remove any remaining think tags with only whitespace
            content = re.sub(r'<think>\s+</think>', '', content, flags=re.DOTALL)
            # Final cleanup: remove any remaining think tags
            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
            content = content.strip()

        return Response(
            model=model,
            created_at=start_time,
            done=True,
            message=Message(
                role="assistant",
                content=content,
                tool_calls=tool_calls if tool_calls else None
            ),
            total_duration=int((datetime.now() - start_time).total_seconds() * 1000)
        )
	class GLM_45_Codec:
	"""
	GLM-4.5 codec that implements the ProviderCodec protocol.

	This codec includes the GLM-4.5 jinja template inlined and handles
	the specific response format used by GLM models. It includes stateful
	parsing for streaming to handle tool calls intelligently.
	"""

	def __init__(self):
	"""Initialize the GLM-4.5 codec."""
	self._template = jinja2.Template(self.TEMPLATE_STR)
	# State for streaming tool call parsing
	self._in_tool_call = False
	self._tool_call_buffer = ""
	self._accumulated_content = ""

	# GLM-4.5 template inlined
	TEMPLATE_STR = """[gMASK]<sop>
	{%- if tools -%}
	<\|system\|>
	# Tools

	You may call one or more functions to assist with the user query.

	You are provided with function signatures within <tools></tools> XML tags:
	<tools>
	{% for tool in tools %}
	{{ tool \| tojson\|string }}
	{% endfor %}
	</tools>

	For each function call, output the function name and arguments within the following XML format:
	<tool_call>{function-name}
	<arg_key>{arg-key-1}</arg_key>
	<arg_value>{arg-value-1}</arg_value>
	<arg_key>{arg-key-2}</arg_key>
	<arg_value>{arg-value-2}</arg_value>
	...
	</tool_call>{%- endif -%}
	{%- macro visible_text(content) -%}
	{%- if content is string -%}
	{{- content }}
	{%- elif content is iterable and content is not mapping -%}
	{%- for item in content -%}
	{%- if item is mapping and item.type == 'text' -%}
	{{- item.text }}
	{%- elif item is string -%}
	{{- item }}
	{%- endif -%}
	{%- endfor -%}
	{%- else -%}
	{{- content }}
	{%- endif -%}
	{%- endmacro -%}
	{%- set ns = namespace(last_user_index=-1) %}
	{%- for m in messages %}
	{%- if m.role == 'user' %}
	{% set ns.last_user_index = loop.index0 -%}
	{%- endif %}
	{%- endfor %}
	{% for m in messages %}
	{%- if m.role == 'user' -%}<\|user\|>
	{% set content = visible_text(m.content) %}{{ content }}
	{{- '/nothink' if (enable_thinking is defined and not enable_thinking) else '' -}}
	{%- elif m.role == 'assistant' -%}
	<\|assistant\|>
	{%- set reasoning_content = '' %}
	{%- set content = visible_text(m.content) or '' %}
	{%- if m.reasoning_content is string %}
	{%- set reasoning_content = m.reasoning_content %}
	{%- endif %}
	{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
	{{ '\n<think>' + reasoning_content + '</think>'}}
	{%- else -%}
	{{ '\n<think></think>' }}
	{%- endif -%}
	{%- if content -%}
	{{ '\n' + content }}
	{%- endif -%}
	{% if m.tool_calls %}
	{% for tc in m.tool_calls %}
	{%- if tc.function %}
	{%- set tc = tc.function %}
	{%- endif %}
	{{ '\n<tool_call>' + tc.name }}
	{% set _args = tc.arguments %}
	{% for k, v in _args.items() %}
	<arg_key>{{ k }}</arg_key>
	<arg_value>{{ v \| tojson\|string if v is not string else v }}</arg_value>
	{% endfor %}
	</tool_call>{% endfor %}
	{% endif %}
	{%- elif m.role == 'tool' -%}
	{%- if m.content is string -%}
	{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
	{{- '<\|observation\|>' }}
	{%- endif %}
	{{- '\n<tool_response>\n' }}
	{{- m.content }}
	{{- '\n</tool_response>' }}
	{%- else -%}
	<\|observation\|>{% for tr in m.content %}

	<tool_response>
	{{ tr.output if tr.output is defined else tr }}
	</tool_response>{% endfor -%}
	{% endif -%}
	{%- elif m.role == 'system' -%}
	<\|system\|>
	{{ visible_text(m.content) }}
	{%- endif -%}
	{%- endfor -%}
	{%- if add_generation_prompt -%}
	<\|assistant\|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
	{%- endif -%}"""

	def reset_state(self):
	"""Reset the streaming state for a new conversation."""
	self._in_tool_call = False
	self._tool_call_buffer = ""
	self._accumulated_content = ""

	def process_streaming_chunk(self, chunk: str) -> tuple[str, bool]:
	"""
	Process a streaming chunk with state machine logic for tool calls.

	Args:
	chunk: The streaming JSON chunk from GLM API

	Returns:
	tuple of (content_to_emit, is_complete_tool_call)
	"""
	try:
	# Parse the GLM streaming chunk format
	chunk_data = json.loads(chunk)

	# Extract text content from GLM format
	content = ""
	if "choices" in chunk_data and chunk_data["choices"]:
	choice = chunk_data["choices"][0]
	if "text" in choice:
	content = choice["text"]

	if not content:
	return "", False

	except (json.JSONDecodeError, KeyError):
	# If it's not valid JSON, treat as plain text
	content = chunk

	# Accumulate all content for final parsing
	self._accumulated_content += content

	# Check if we're entering a tool call
	if not self._in_tool_call and "<tool_call>" in content:
	# Split the content at the tool call start
	parts = content.split("<tool_call>", 1)
	content_before = parts[0]
	tool_call_start = "<tool_call>" + parts[1] if len(parts) > 1 else "<tool_call>"

	self._in_tool_call = True
	self._tool_call_buffer = tool_call_start

	# Return only the content before the tool call
	return content_before, False

	# If we're in a tool call, accumulate in buffer
	elif self._in_tool_call:
	self._tool_call_buffer += content

	# Check if we've completed the tool call
	if "</tool_call>" in self._tool_call_buffer:
	# Split at the closing tag
	parts = self._tool_call_buffer.split("</tool_call>", 1)
	complete_tool_call = parts[0] + "</tool_call>"
	content_after = parts[1] if len(parts) > 1 else ""

	self._in_tool_call = False
	self._tool_call_buffer = ""

	# Return the content after the tool call (if any)
	return content_after, True
	else:
	# Still accumulating tool call, don't emit anything
	return "", False

	# Normal content, not in a tool call
	else:
	return content, False

	def get_accumulated_content(self) -> str:
	"""Get the accumulated content from streaming."""
	return self._accumulated_content

	def encode(self, messages: list[Message], options: dict[str, Any] \| None = None) -> str:
	"""
	Encode messages and options into a GLM-4.5 formatted prompt string.

	Args:
	messages: List of Message objects to encode
	options: Optional parameters for template rendering

	Returns:
	Formatted prompt string ready for GLM-4.5 model
	"""
	# Convert Message objects to dicts for jinja2
	message_dicts = []
	for msg in messages:
	msg_dict: dict[str, Any] = {
	"role": msg.role,
	"content": msg.content,
	}

	# Handle tool calls
	if msg.tool_calls:
	msg_dict["tool_calls"] = [
	{
	"function": {
	"name": tc.function.name,
	"arguments": tc.function.arguments
	}
	}
	for tc in msg.tool_calls
	]

	# Handle tool call id for tool responses
	if msg.tool_call_id:
	msg_dict["tool_call_id"] = msg.tool_call_id

	message_dicts.append(msg_dict)

	# Template variables
	template_vars = {
	"messages": message_dicts,
	"add_generation_prompt": True,
	"enable_thinking": True,
	}

	# Add tools if provided in options
	if options and "tools" in options:
	template_vars["tools"] = options["tools"]

	# Add any other template variables from options
	if options:
	for key, value in options.items():
	if key.startswith("template_"):
	template_vars[key[9:]] = value # Remove "template_" prefix

	return self._template.render(**template_vars)

	def decode(self, raw_response: str, model: str, start_time: datetime) -> Response:
	"""
	Decode raw GLM-4.5 response string into Response object.

	Args:
	raw_response: Raw response string from GLM-4.5 model
	model: Model name for the response
	start_time: Start time of the request

	Returns:
	Response object with parsed content and tool calls
	"""
	content = raw_response.strip()

	try:
	# Try to parse as JSON first (common for API responses)
	data = json.loads(content)

	# Handle different response formats
	if "choices" in data and data["choices"]:
	choice = data["choices"][0]
	if "text" in choice:
	content = choice["text"]
	elif "message" in choice and "content" in choice["message"]:
	content = choice["message"]["content"]
	elif "content" in data:
	content = data["content"]
	elif "text" in data:
	content = data["text"]
	elif "response" in data:
	content = data["response"]
	else:
	# Fallback to raw response
	content = raw_response

	except json.JSONDecodeError:
	# Not JSON, treat as plain text
	content = raw_response

	# Parse tool calls if present (GLM format)
	tool_calls = []
	if "<tool_call>" in content:
	# Extract tool calls from GLM format
	tool_call_pattern = r'<tool_call>([^\s<\n]+)\s((?:<arg_key>.?</arg_key>\s<arg_value>.?</arg_value>\s))</tool_call>'
	matches = re.findall(tool_call_pattern, content, re.DOTALL)

	for tool_name, args_str in matches:
	# Parse arguments
	arg_pattern = r'<arg_key>(.?)</arg_key>\s<arg_value>(.*?)</arg_value>'
	arg_matches = re.findall(arg_pattern, args_str, re.DOTALL)

	arguments = {}
	for key, value in arg_matches:
	key = key.strip()
	value = value.strip()
	# Try to parse JSON values
	try:
	arguments[key] = json.loads(value)
	except json.JSONDecodeError:
	arguments[key] = value

	tool_calls.append(ToolCall(
	function=ToolCallFunction(
	name=tool_name.strip(),
	arguments=arguments
	)
	))

	# Remove tool calls from content for cleaner display
	content = re.sub(tool_call_pattern, '', content, flags=re.DOTALL).strip()

	# Clean up GLM thinking tags - remove empty or whitespace-only think sequences
	if "<think>" in content:
	# First pass: remove empty think tags
	content = re.sub(r'<think>\s*</think>', '', content, flags=re.DOTALL)
	# Second pass: remove nested or malformed empty think sequences like "</think>\n</think>\n</think>"
	content = re.sub(r'</think>\s*</think>', '</think>', content, flags=re.DOTALL)
	# Third pass: remove any remaining think tags with only whitespace
	content = re.sub(r'<think>\s+</think>', '', content, flags=re.DOTALL)
	# Final cleanup: remove any remaining think tags
	content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
	content = content.strip()

	return Response(
	model=model,
	created_at=start_time,
	done=True,
	message=Message(
	role="assistant",
	content=content,
	tool_calls=tool_calls if tool_calls else None
	),
	total_duration=int((datetime.now() - start_time).total_seconds() * 1000)
	)
No results found