Last active
February 18, 2026 16:13
-
-
Save alonsosilvaallende/36f2c7e9720dc8b8225dae082f58d046 to your computer and use it in GitHub Desktop.
from_outlines_core
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "093b1977-dc46-4508-b109-17ea5315877b", | |
| "metadata": {}, | |
| "source": [ | |
| "We already have the vocabulary from a model with the script you gave me so we don't need this cell:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "ec12bffc-60dd-48e3-8661-0110d61109c3", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:55:12.360538Z", | |
| "iopub.status.busy": "2026-02-18T15:55:12.360027Z", | |
| "iopub.status.idle": "2026-02-18T15:55:12.807528Z", | |
| "shell.execute_reply": "2026-02-18T15:55:12.806416Z", | |
| "shell.execute_reply.started": "2026-02-18T15:55:12.360490Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import outlines_core\n", | |
| "\n", | |
| "model_id = \"Qwen/Qwen3-0.6B\"\n", | |
| "vocabulary = outlines_core.Vocabulary.from_pretrained(model_id)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "22d884e2-c189-4e0b-923d-5b1fce9282c1", | |
| "metadata": {}, | |
| "source": [ | |
| "This is all we need from `outlines_core`:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "afa6ee68-3f32-49fb-875f-a281a33e074c", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:55:30.593216Z", | |
| "iopub.status.busy": "2026-02-18T15:55:30.592715Z", | |
| "iopub.status.idle": "2026-02-18T15:55:30.642589Z", | |
| "shell.execute_reply": "2026-02-18T15:55:30.641631Z", | |
| "shell.execute_reply.started": "2026-02-18T15:55:30.593170Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index {\n", | |
| " initial_state: 64,\n", | |
| " final_states: {\n", | |
| " 112,\n", | |
| " },\n", | |
| " transitions: {\n", | |
| " 112: {\n", | |
| " 151645: 112,\n", | |
| " },\n", | |
| " 64: {\n", | |
| " 54: 128,\n", | |
| " 1639: 80,\n", | |
| " 10234: 96,\n", | |
| " },\n", | |
| " 80: {\n", | |
| " 88: 96,\n", | |
| " },\n", | |
| " 128: {\n", | |
| " 8503: 96,\n", | |
| " 71: 80,\n", | |
| " },\n", | |
| " 96: {\n", | |
| " 220: 112,\n", | |
| " },\n", | |
| " },\n", | |
| " eos_token_id: 151645,\n", | |
| " vocab_size: 151656,\n", | |
| "}" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index = outlines_core.Index(\"Why \", vocabulary)\n", | |
| "index" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "50ebabed-c20f-4fea-867f-d5cea195b3ad", | |
| "metadata": {}, | |
| "source": [ | |
| "In reality, we only need the transitions (I can deduce the other info):" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "d744f7b5-6e58-434b-bdef-e19668d1d05b", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:56:14.351757Z", | |
| "iopub.status.busy": "2026-02-18T15:56:14.351001Z", | |
| "iopub.status.idle": "2026-02-18T15:56:14.360753Z", | |
| "shell.execute_reply": "2026-02-18T15:56:14.358920Z", | |
| "shell.execute_reply.started": "2026-02-18T15:56:14.351691Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{112: {151645: 112},\n", | |
| " 64: {54: 128, 1639: 80, 10234: 96},\n", | |
| " 80: {88: 96},\n", | |
| " 128: {8503: 96, 71: 80},\n", | |
| " 96: {220: 112}}" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index.get_transitions()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "8ade5c02-d253-402f-b508-0388e9808c59", | |
| "metadata": {}, | |
| "source": [ | |
| "The transitions consist of {state: {token: next_state}}." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "b5044b68-bd66-4e8e-8571-e15269049d12", | |
| "metadata": {}, | |
| "source": [ | |
| "## This is just to make the transitions 'prettier'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "fb075839-f549-4fb6-9819-4731428513fc", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:58:13.649553Z", | |
| "iopub.status.busy": "2026-02-18T15:58:13.649093Z", | |
| "iopub.status.idle": "2026-02-18T15:58:13.658346Z", | |
| "shell.execute_reply": "2026-02-18T15:58:13.656443Z", | |
| "shell.execute_reply.started": "2026-02-18T15:58:13.649512Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def my_recursive(\n", | |
| " state: int,\n", | |
| " index: outlines_core.Index,\n", | |
| " mapping: dict[int, int],\n", | |
| " visited: set[int],\n", | |
| " final_states: set[int],\n", | |
| ") -> None:\n", | |
| " if state in final_states:\n", | |
| " return\n", | |
| " visited.add(state)\n", | |
| " for symbol, new_state in index.get_transitions().get(state, {}).items():\n", | |
| " if new_state in final_states:\n", | |
| " continue # Skip final states entirely\n", | |
| " if new_state not in mapping:\n", | |
| " mapping[new_state] = len(mapping)\n", | |
| " if new_state not in visited:\n", | |
| " my_recursive(new_state, index, mapping, visited, final_states)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "8dadfe8d-e228-4ebb-b7f6-cbcdd293dc95", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:58:19.521604Z", | |
| "iopub.status.busy": "2026-02-18T15:58:19.521152Z", | |
| "iopub.status.idle": "2026-02-18T15:58:19.529955Z", | |
| "shell.execute_reply": "2026-02-18T15:58:19.528246Z", | |
| "shell.execute_reply.started": "2026-02-18T15:58:19.521562Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_state_mapping(index: outlines_core.Index) -> dict[int, int]:\n", | |
| " initial_state = index.get_initial_state()\n", | |
| " final_states = index.get_final_states()\n", | |
| " num_states = len(index.get_transitions().keys())\n", | |
| " mapping = {}\n", | |
| " # Start from initial state (mapped to 0)\n", | |
| " mapping[initial_state] = 0\n", | |
| " visited = set()\n", | |
| " my_recursive(initial_state, index, mapping, visited, final_states)\n", | |
| " # End with final states (mapped at the end)\n", | |
| " for i, final_state in enumerate(final_states):\n", | |
| " mapping[final_state] = num_states - (i + 1)\n", | |
| " return mapping" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "92f4f768-9ab1-42a0-8b6f-19555abf7ded", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:58:25.853038Z", | |
| "iopub.status.busy": "2026-02-18T15:58:25.852579Z", | |
| "iopub.status.idle": "2026-02-18T15:58:25.861045Z", | |
| "shell.execute_reply": "2026-02-18T15:58:25.859315Z", | |
| "shell.execute_reply.started": "2026-02-18T15:58:25.852996Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_dfa(index: outlines_core.Index) -> dict[int, dict[int, int]]:\n", | |
| " mapping = get_state_mapping(index)\n", | |
| " dfa = {}\n", | |
| " for state, transitions in index.get_transitions().items():\n", | |
| " new_transitions = {}\n", | |
| " for token, new_state in transitions.items():\n", | |
| " new_transitions[token] = mapping[new_state]\n", | |
| " if state not in index.get_final_states():\n", | |
| " dfa[mapping[state]] = new_transitions\n", | |
| " return dfa" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "b3afd24b-fd37-43fa-b1cd-0266de321e1d", | |
| "metadata": {}, | |
| "source": [ | |
| "I consider this:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "3b08fbb1-ca69-4c7f-8870-ae3744355ec3", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:58:28.185285Z", | |
| "iopub.status.busy": "2026-02-18T15:58:28.184789Z", | |
| "iopub.status.idle": "2026-02-18T15:58:28.192492Z", | |
| "shell.execute_reply": "2026-02-18T15:58:28.190267Z", | |
| "shell.execute_reply.started": "2026-02-18T15:58:28.185242Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{0: {54: 1, 1639: 3, 10234: 2}, 3: {88: 2}, 1: {8503: 2, 71: 3}, 2: {220: 4}}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(get_dfa(index))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "568145ca-3197-47da-b8dc-bdcf910808cb", | |
| "metadata": {}, | |
| "source": [ | |
| "prettier than this:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "1aba4457-29c1-4cb0-935d-283db83e0449", | |
| "metadata": { | |
| "execution": { | |
| "iopub.execute_input": "2026-02-18T15:58:30.189048Z", | |
| "iopub.status.busy": "2026-02-18T15:58:30.188546Z", | |
| "iopub.status.idle": "2026-02-18T15:58:30.197936Z", | |
| "shell.execute_reply": "2026-02-18T15:58:30.196146Z", | |
| "shell.execute_reply.started": "2026-02-18T15:58:30.189003Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{112: {151645: 112},\n", | |
| " 64: {54: 128, 1639: 80, 10234: 96},\n", | |
| " 80: {88: 96},\n", | |
| " 128: {8503: 96, 71: 80},\n", | |
| " 96: {220: 112}}" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "index.get_transitions()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "89816d12-5e25-46da-859c-81f22423ad57", | |
| "metadata": {}, | |
| "source": [ | |
| "but that's just me." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "8803c412-9e9d-4473-9e7f-ab377a3840a4", | |
| "metadata": {}, | |
| "source": [ | |
| "The mapping from `outlines_core` states to the prettier states is:\n", | |
| "```\n", | |
| "64 -> 0\n", | |
| "128 -> 1\n", | |
| "96 -> 2\n", | |
| "80 -> 3\n", | |
| "112 -> 4\n", | |
| "```" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment