Created
November 23, 2025 19:32
-
-
Save murphyk/41a15824378143222bd29235727680d6 to your computer and use it in GitHub Desktop.
interactive TTT in Tinker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| os.environ["TINKER_API_KEY"] = # | |
| import asyncio | |
| from datetime import datetime | |
| import chz | |
| from tinker_cookbook import cli_utils, model_info | |
| from tinker_cookbook.recipes.multiplayer_rl.text_arena.env import TwoPlayerTextArenaDatasetBuilder | |
| from tinker_cookbook.rl import train | |
| from tinker_cookbook import renderers, tokenizer_utils | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| @chz.chz | |
| class CLIConfig: | |
| model_name: str = "Qwen/Qwen3-4B-Instruct-2507" | |
| renderer_name: str | None = None | |
| game_name: str = "TicTacToe-v0" | |
| batch_size: int = 2 # 512 | |
| num_train_datapoints: int = 4 # 131072 | |
| num_test_datapoints: int = 128 | |
| learning_rate: float = 3e-5 | |
| max_tokens: int = 64 | |
| eval_every: int = 5 | |
| save_every: int = 20 | |
| wandb_project: str | None = None | |
| wandb_name: str | None = None | |
| log_path: str | None = None | |
| cli_config = CLIConfig() | |
| model_name = cli_config.model_name | |
| renderer_name = cli_config.renderer_name or model_info.get_recommended_renderer_name( | |
| cli_config.model_name | |
| ) | |
| dataset_builder = TwoPlayerTextArenaDatasetBuilder( | |
| batch_size=cli_config.batch_size, | |
| model_name=model_name, | |
| game_name=cli_config.game_name, | |
| num_train_datapoints=cli_config.num_train_datapoints, | |
| num_test_datapoints=cli_config.num_test_datapoints, | |
| renderer_name=renderer_name, | |
| ) | |
| dataset, maybe_test_dataset = await dataset_builder() | |
| batch = dataset.get_batch(0) | |
| envs_builder = batch[0] | |
| envs = await envs_builder.make_envs() | |
| env = envs[0] | |
| obs, stop = await env.initial_observation() | |
| print(obs) | |
| tokenizer = tokenizer_utils.get_tokenizer(cli_config.model_name) | |
| print(tokenizer.decode(obs.to_ints())) # TTT board | |
| action = '[0]' | |
| action_tokens = tokenizer.encode(action, add_special_tokens=True) | |
| res = await env.step(action=action_tokens) | |
| print(res) # hangs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment