ls /usr/bin/python*
sudo apt-get remove python3.5
sudo apt-get remove --auto-remove python3.5
sudo apt-get purge python3.5
| """ | |
| Simple RL training script for teaching a model to add. | |
| Demonstrates REINFORCE and GRPO algorithms in a minimal implementation. | |
| If you want to run this script, put it inside of nanochat/scripts/ and run it with: | |
| python -m scripts.simple_rl | |
| First add "matplotlib>=3.9.0" to pyproject.toml and run 'uv sync' | |
| I wrote a separate script to download the weights for the model: |
| # train_grpo.py | |
| # | |
| # See https://github.com/willccbb/verifiers for ongoing developments | |
| # | |
| """ | |
| citation: | |
| @misc{brown2025grpodemo, | |
| title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models}, | |
| author={Brown, William}, |
| #!/bin/bash | |
| # This script initializes a GPU machine to start a vLLM server | |
| # Ensure the script is run as root | |
| if [[ $EUID -ne 0 ]]; then | |
| echo "This script must be run as root" | |
| exit 1 | |
| fi |
| import torch | |
| import os | |
| import argparse | |
| import matplotlib.pyplot as plt | |
| from tqdm import tqdm | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import seaborn as sns | |
| def get_parser(): |
| # Taken from https://johanwind.github.io/2023/03/23/rwkv_details.html. | |
| # I've added additional comments restructured it a tiny bit, which makes it clearer for me. | |
| import numpy as np | |
| from torch import load as torch_load # Only for loading the model weights | |
| from tokenizers import Tokenizer | |
| exp = np.exp | |
| layer_norm = lambda x, w, b : (x - np.mean(x)) / np.std(x) * w + b | |
| sigmoid = lambda x : 1/(1 + exp(-x)) |
| """ | |
| Download the latest wiki dump files for a language, | |
| If from_date is passed, the latest before that date is downloaded | |
| """ | |
| import requests | |
| from datetime import datetime | |
| from datetime import timedelta | |
| from string import Template | |
| from tqdm import tqdm |
| import os | |
| import json | |
| from tqdm import tqdm | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer, AutoModel | |
| from elasticsearch import Elasticsearch | |
| class JsonlCollectionIterator: |
| mod ml_thread; | |
| use gdnative::prelude::{godot_print, methods, Method, NativeClass, Node as GDNode, InitHandle, godot_init}; | |
| use ml_thread::start_language_model_thread; | |
| use std::sync::mpsc::{channel, Receiver, RecvError, Sender, SendError}; | |
| const MAX_INPUT_LENGTH: usize = 512; | |
| const BATCH_SIZE: usize = 1; |
| # ==== Non-communicating processes | |
| # 4x 1 chip per process: | |
| os.environ["TPU_CHIPS_PER_PROCESS_BOUNDS"] = "1,1,1" | |
| os.environ["TPU_PROCESS_BOUNDS"] = "1,1,1" | |
| # Different per process: | |
| os.environ["TPU_VISIBLE_DEVICES"] = "0" # "1", "2", "3" | |
| # 1-liner for bash: TPU_CHIPS_PER_PROCESS_BOUNDS=1,1,1 TPU_PROCESS_BOUNDS=1,1,1 TPU_VISIBLE_DEVICES=0 TPU_MESH_CONTROLLER_ADDRESS=localhost:8476 TPU_MESH_CONTROLLER_PORT=8476 |