Last active
December 8, 2025 14:32
-
-
Save monday8am/a1182a0d2ce0320959f053841ec35ddd to your computer and use it in GitHub Desktop.
LLMConversion.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "mount_file_id": "1u2VhWA8D-Y3DYU8xyCCP6cKvGgQNFOrw", | |
| "authorship_tag": "ABX9TyON/8O//qj+XPp8GNldI8S9", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/monday8am/a1182a0d2ce0320959f053841ec35ddd/llmconversion.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 1 — Install dependencies\n", | |
| "!pip uninstall -y tensorflow tf-nightly tb-nightly keras-nightly ai-edge-litert-nightly ai-edge-quantizer-nightly google-ai-generativelanguage grpcio-status protobuf\n", | |
| "!pip install protobuf>=6.31.1,<8.0.0\"\n", | |
| "!pip install grpcio-tools # Includes compatible protoc\n", | |
| "!pip install ai-edge-torch\n", | |
| "!pip install ai-edge-litert flatbuffers\n", | |
| "\n", | |
| "print(\"✅ Dependencies installed\")\n", | |
| "print(\"⚠️ Now go to: Runtime → Restart Session\")\n", | |
| "print(\"⚠️ Then continue with the rest of the notebook\")" | |
| ], | |
| "metadata": { | |
| "id": "3d_7msTkCMTy", | |
| "collapsed": true | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# -----------------------------------------------------------------------------\n", | |
| "# Configuration\n", | |
| "# -----------------------------------------------------------------------------\n", | |
| "\n", | |
| "# Model types\n", | |
| "QWEN3 = \"qwen3\"\n", | |
| "QWEN2P5 = \"qwen2.5\"\n", | |
| "HAMMER2P1 = \"hammer2.1\"\n", | |
| "\n", | |
| "# Hugging Face path\n", | |
| "CHECKPOINT_ID = \"MadeAgents/Hammer2.1-1.5b\"\n", | |
| "\n", | |
| "MODEL_NAME = HAMMER2P1\n", | |
| "MODEL_SIZE = \"0.5b\"\n", | |
| "QUANTIZATION = \"dynamic_int8\"\n", | |
| "CONTEXT_LENGTH = 4096\n", | |
| "OUTPUT_PREFIX = f\"{MODEL_NAME}_{MODEL_SIZE}\" # e.g., qwen3_0.6b\n", | |
| "\n", | |
| "# Derived paths\n", | |
| "OUTPUT_TFLITE = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.tflite\"\n", | |
| "OUTPUT_SPM = f\"/content/{OUTPUT_PREFIX}.spm.model\"\n", | |
| "OUTPUT_LITERT = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.litertlm\"\n", | |
| "OUTPUT_TASK = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.task\"" | |
| ], | |
| "metadata": { | |
| "id": "q7neigFUzv7v" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 2 — Clone and install ai-edge-torch repo (examples and converters)\n", | |
| "!git clone https://github.com/google-ai-edge/ai-edge-torch.git\n", | |
| "!pip install -e ./ai-edge-torch\n", | |
| "\n", | |
| "print(f\"✅ Installed ai-edge-torch with native {MODEL_NAME} support\")" | |
| ], | |
| "metadata": { | |
| "id": "gwFGIP0pCV2j", | |
| "collapsed": true | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 3 — Download checkpoint from Hugging Face\n", | |
| "from huggingface_hub import snapshot_download\n", | |
| "checkpoint_path = snapshot_download(CHECKPOINT_ID)\n", | |
| "print(f\"✅ Checkpoint downloaded to: {checkpoint_path}\")" | |
| ], | |
| "metadata": { | |
| "id": "rmzguJScC7wv" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 4 — Convert to TFLite with quantization\n", | |
| "hammer_converter = \"/content/ai-edge-torch/ai_edge_torch/generative/examples/hammer/convert_to_tflite.py\"\n", | |
| "qwen3_converter = \"/content/ai-edge-torch/ai_edge_torch/generative/examples/qwen/convert_v3_to_tflite.py\"\n", | |
| "\n", | |
| "if MODEL_NAME == HAMMER2P1:\n", | |
| " print(\"✅ Using Hammer-specific converter\")\n", | |
| " converter_script = hammer_converter\n", | |
| "else:\n", | |
| " print(\"✅ Using Qwen3-specific converter\")\n", | |
| " converter_script = qwen3_converter\n", | |
| "\n", | |
| "!python {converter_script} \\\n", | |
| " --checkpoint_path \"{checkpoint_path}\" \\\n", | |
| " --model_size \"{MODEL_SIZE}\" \\\n", | |
| " --output_path \"/content/\" \\\n", | |
| " --output_name_prefix \"{OUTPUT_PREFIX}\" \\\n", | |
| " --kv_cache_max_len {CONTEXT_LENGTH} \\\n", | |
| " --quantize \"{QUANTIZATION}\"" | |
| ], | |
| "metadata": { | |
| "id": "tGMJ5sTFDDmB", | |
| "collapsed": true | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 5 — Convert tokenizer to SentencePiece format\n", | |
| "%cd /content/ai-edge-torch/ai_edge_torch/generative/tools/\n", | |
| "\n", | |
| "!python tokenizer_to_sentencepiece.py \\\n", | |
| " --checkpoint=\"{CHECKPOINT_ID}\" \\\n", | |
| " --output_path=\"{OUTPUT_SPM}\" \\\n", | |
| " --normalize_tokens=decode \\\n", | |
| " --num_pairs_to_verify=300" | |
| ], | |
| "metadata": { | |
| "id": "8ObMiABzMTB3", | |
| "collapsed": true | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 6 — Build .litertlm using high-level API\n", | |
| "%cd /content/ai-edge-torch/\n", | |
| "\n", | |
| "import os\n", | |
| "import json\n", | |
| "import tempfile\n", | |
| "from ai_edge_torch.generative.utilities import litertlm_builder\n", | |
| "\n", | |
| "print(f\"🏗️ Building .litertlm bundle with {MODEL_NAME} metadata...\")\n", | |
| "\n", | |
| "# Load Jinja chat template from checkpoint\n", | |
| "jinja_template = None\n", | |
| "tokenizer_config_path = os.path.join(checkpoint_path, \"tokenizer_config.json\")\n", | |
| "if os.path.exists(tokenizer_config_path):\n", | |
| " with open(tokenizer_config_path, 'r') as f:\n", | |
| " config = json.load(f)\n", | |
| " jinja_template = config.get('chat_template')\n", | |
| " if jinja_template:\n", | |
| " print(f\"✅ Loaded Jinja template from checkpoint ({len(jinja_template)} chars)\")\n", | |
| " else:\n", | |
| " print(\"⚠️ No chat_template found in tokenizer_config.json\")\n", | |
| "else:\n", | |
| " print(\"⚠️ tokenizer_config.json not found\")\n", | |
| "\n", | |
| "\n", | |
| "# Map the string MODEL_NAME to the correct LlmModelType\n", | |
| "MODEL_TO_LLM_TYPE = {\n", | |
| " \"hammer2.1\": litertlm_builder.litertlm_builder.LlmModelType.GENERIC,\n", | |
| " \"qwen2.5\": litertlm_builder.litertlm_builder.LlmModelType.QWEN2P5,\n", | |
| " \"qwen3\": litertlm_builder.litertlm_builder.LlmModelType.QWEN3,\n", | |
| "}\n", | |
| "\n", | |
| "# Fallback: if unknown model name, default to generic one\n", | |
| "ll_model_type = MODEL_TO_LLM_TYPE.get(\n", | |
| " MODEL_NAME.lower(),\n", | |
| " litertlm_builder.litertlm_builder.LlmModelType.GENERIC\n", | |
| ")\n", | |
| "\n", | |
| "print(f\"{ll_model_type} model type selected!\")\n", | |
| "\n", | |
| "# Build bundle using high-level API (PR #858)\n", | |
| "with tempfile.TemporaryDirectory() as workdir:\n", | |
| " litertlm_builder.build_litertlm(\n", | |
| " tflite_model_path=OUTPUT_TFLITE,\n", | |
| " workdir=workdir,\n", | |
| " output_path=\"/content/\",\n", | |
| " context_length=CONTEXT_LENGTH,\n", | |
| " model_prompt_prefix=None,\n", | |
| " model_prompt_suffix=None,\n", | |
| " user_prompt_prefix=None,\n", | |
| " user_prompt_suffix=None,\n", | |
| " tokenizer_model_path=OUTPUT_SPM,\n", | |
| " hf_tokenizer_model_path=None,\n", | |
| " start_token=\"<|im_start|>\",\n", | |
| " stop_tokens=[\"<|im_end|>\", \"<|endoftext|>\"],\n", | |
| " llm_model_type=ll_model_type,\n", | |
| " jinja_prompt_template=jinja_template,\n", | |
| " )\n", | |
| "\n", | |
| "print(f\"✅ Bundle built with {ll_model_type} metadata and Jinja template\")\n", | |
| "\n", | |
| "if os.path.exists(OUTPUT_LITERT):\n", | |
| " size_mb = os.path.getsize(OUTPUT_LITERT) / (1024 ** 2)\n", | |
| " print(f\"🎉 Success! Bundle created ({size_mb:.2f} MB)\")\n", | |
| "else:\n", | |
| " print(\"❌ Conversion failed — check logs above\")" | |
| ], | |
| "metadata": { | |
| "id": "LBs1pO82ZUuB", | |
| "collapsed": true | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 6.4 — Install MediaPipe and compatible libs\n", | |
| "!pip uninstall -y protobuf ai-edge-torch tf-nightly tb-nightly keras-nightly ai-edge-litert-nightly ai-edge-quantizer-nightly\n", | |
| "!pip install \"protobuf>=4.25.3,<5\"\n", | |
| "!pip install mediapipe-nightly\n", | |
| "\n", | |
| "print(\"✅ STEP 6.4 ready - Restart runtime and create .task bundle\")" | |
| ], | |
| "metadata": { | |
| "id": "_SQGJJKHcwWa" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 6.5 — Build .task bundle using MediaPipe\n", | |
| "import os\n", | |
| "import mediapipe as mp\n", | |
| "from mediapipe.tasks.python.genai import bundler\n", | |
| "\n", | |
| "# Configure the bundle\n", | |
| "# Hammer2.1 uses Qwen2.5's tokenizer with ChatML format\n", | |
| "task_config = bundler.BundleConfig(\n", | |
| " tflite_model=OUTPUT_TFLITE,\n", | |
| " tokenizer_model=OUTPUT_SPM,\n", | |
| " start_token=\"<|im_start|>\",\n", | |
| " stop_tokens=[\"<|im_end|>\", \"<|endoftext|>\"],\n", | |
| " output_filename=OUTPUT_TASK,\n", | |
| " enable_bytes_to_unicode_mapping=False, # Required for Qwen-based tokenizers\n", | |
| ")\n", | |
| "\n", | |
| "# Build the bundle\n", | |
| "bundler.create_bundle(task_config)\n", | |
| "\n", | |
| "if os.path.exists(OUTPUT_TASK):\n", | |
| " size_mb = os.path.getsize(OUTPUT_TASK) / (1024 ** 2)\n", | |
| " print(f\"🎉 Success! MediaPipe .task bundle created ({size_mb:.2f} MB)\")\n", | |
| " print(f\"📦 Output: {OUTPUT_TASK}\")\n", | |
| " print(f\"✅ Use with MediaPipe LLM Inference API on Android/iOS/Web\")\n", | |
| "else:\n", | |
| " print(\"❌ MediaPipe bundle creation failed — check logs above\")" | |
| ], | |
| "metadata": { | |
| "id": "lLZNl4W3Ljn8" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 7 — Define output file\n", | |
| "\n", | |
| "%cd /content\n", | |
| "OUTPUT_FILE = None\n", | |
| "if MODEL_NAME == HAMMER2P1:\n", | |
| " print(\"✅ Using Hammer-specific output file\")\n", | |
| " OUTPUT_FILE = OUTPUT_TASK\n", | |
| "else:\n", | |
| " print(\"✅ Using Qwen3-specific converter\")\n", | |
| " OUTPUT_FILE = OUTPUT_LITERT\n" | |
| ], | |
| "metadata": { | |
| "id": "86rY4N4bgvpX" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 7.1 — Download model\n", | |
| "\n", | |
| "if os.path.exists(OUTPUT_FILE):\n", | |
| " from google.colab import files\n", | |
| " files.download(OUTPUT_FILE)\n", | |
| " print(f\"📥 Downloaded: {OUTPUT_FILE}\")\n", | |
| "else:\n", | |
| " print(\"❌ Cannot download - file not found\")" | |
| ], | |
| "metadata": { | |
| "id": "aAVRlOe7OoD4" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# STEP 7.5 — Save model in Drive\n", | |
| "from google.colab import drive\n", | |
| "drive.mount('/content/drive')\n", | |
| "\n", | |
| "# Copy to Drive\n", | |
| "!cp {OUTPUT_FILE} /content/drive/MyDrive/\n", | |
| "\n", | |
| "print(f\"✅ Saved {OUTPUT_FILE} to Google Drive\")" | |
| ], | |
| "metadata": { | |
| "id": "AkXIG1rX7VZ7" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment