Skip to content

Instantly share code, notes, and snippets.

@alt-glitch
Created September 17, 2025 09:27
Show Gist options
  • Select an option

  • Save alt-glitch/adbdde2c806ad40e9f2e96c948554c9f to your computer and use it in GitHub Desktop.

Select an option

Save alt-glitch/adbdde2c806ad40e9f2e96c948554c9f to your computer and use it in GitHub Desktop.
Reverse text for `verifiers` modified to work with ClaudeCodeEnv
from datasets import load_dataset
import verifiers as vf
def load_environment(num_train_examples=2, num_eval_examples=1, **kwargs):
dataset = load_dataset("agentlans/wikipedia-paragraphs", split="train").map(
lambda x: {"question": x["text"], "answer": x["text"][::-1]}
)
train_dataset = dataset.select(range(num_train_examples)) # type: ignore
eval_dataset = dataset.select( # type: ignore
range(num_train_examples, num_train_examples + num_eval_examples)
)
parser = vf.XMLParser(["think", "answer"], answer_field="answer")
system_prompt = f"""Reverse the given text.
Respond in the following format:
{parser.get_format_str()}"""
def lcs_reward_func(completion, answer, **kwargs) -> float:
"""
LCS ratio of the reversed prompt and the parsed completion.
"""
def lcs_ratio(x: str, y: str) -> float:
"""
Return the longest common subsequence ratio of x and y.
"""
from difflib import SequenceMatcher
return SequenceMatcher(None, x, y).ratio()
response = parser.parse_answer(completion) or ""
return lcs_ratio(response, answer)
rubric = vf.Rubric(
parser=parser,
funcs=[
lcs_reward_func,
parser.get_format_reward_func(),
],
weights=[1.0, 0.2],
)
vf_env = vf.ClaudeCodeEnv(
dataset=train_dataset,
eval_dataset=eval_dataset, # type: ignore
system_prompt=system_prompt,
parser=parser,
rubric=rubric,
)
return vf_env
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment