Skip to content

Instantly share code, notes, and snippets.

@murphyk
Created November 23, 2025 19:32
Show Gist options
  • Select an option

  • Save murphyk/41a15824378143222bd29235727680d6 to your computer and use it in GitHub Desktop.

Select an option

Save murphyk/41a15824378143222bd29235727680d6 to your computer and use it in GitHub Desktop.
interactive TTT in Tinker
import os
os.environ["TINKER_API_KEY"] = #
import asyncio
from datetime import datetime
import chz
from tinker_cookbook import cli_utils, model_info
from tinker_cookbook.recipes.multiplayer_rl.text_arena.env import TwoPlayerTextArenaDatasetBuilder
from tinker_cookbook.rl import train
from tinker_cookbook import renderers, tokenizer_utils
import nest_asyncio
nest_asyncio.apply()
@chz.chz
class CLIConfig:
model_name: str = "Qwen/Qwen3-4B-Instruct-2507"
renderer_name: str | None = None
game_name: str = "TicTacToe-v0"
batch_size: int = 2 # 512
num_train_datapoints: int = 4 # 131072
num_test_datapoints: int = 128
learning_rate: float = 3e-5
max_tokens: int = 64
eval_every: int = 5
save_every: int = 20
wandb_project: str | None = None
wandb_name: str | None = None
log_path: str | None = None
cli_config = CLIConfig()
model_name = cli_config.model_name
renderer_name = cli_config.renderer_name or model_info.get_recommended_renderer_name(
cli_config.model_name
)
dataset_builder = TwoPlayerTextArenaDatasetBuilder(
batch_size=cli_config.batch_size,
model_name=model_name,
game_name=cli_config.game_name,
num_train_datapoints=cli_config.num_train_datapoints,
num_test_datapoints=cli_config.num_test_datapoints,
renderer_name=renderer_name,
)
dataset, maybe_test_dataset = await dataset_builder()
batch = dataset.get_batch(0)
envs_builder = batch[0]
envs = await envs_builder.make_envs()
env = envs[0]
obs, stop = await env.initial_observation()
print(obs)
tokenizer = tokenizer_utils.get_tokenizer(cli_config.model_name)
print(tokenizer.decode(obs.to_ints())) # TTT board
action = '[0]'
action_tokens = tokenizer.encode(action, add_special_tokens=True)
res = await env.step(action=action_tokens)
print(res) # hangs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment