Skip to content

Instantly share code, notes, and snippets.

@monday8am
Last active December 8, 2025 14:32
Show Gist options
  • Select an option

  • Save monday8am/a1182a0d2ce0320959f053841ec35ddd to your computer and use it in GitHub Desktop.

Select an option

Save monday8am/a1182a0d2ce0320959f053841ec35ddd to your computer and use it in GitHub Desktop.
LLMConversion.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"mount_file_id": "1u2VhWA8D-Y3DYU8xyCCP6cKvGgQNFOrw",
"authorship_tag": "ABX9TyON/8O//qj+XPp8GNldI8S9",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/monday8am/a1182a0d2ce0320959f053841ec35ddd/llmconversion.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"# STEP 1 — Install dependencies\n",
"!pip uninstall -y tensorflow tf-nightly tb-nightly keras-nightly ai-edge-litert-nightly ai-edge-quantizer-nightly google-ai-generativelanguage grpcio-status protobuf\n",
"!pip install protobuf>=6.31.1,<8.0.0\"\n",
"!pip install grpcio-tools # Includes compatible protoc\n",
"!pip install ai-edge-torch\n",
"!pip install ai-edge-litert flatbuffers\n",
"\n",
"print(\"✅ Dependencies installed\")\n",
"print(\"⚠️ Now go to: Runtime → Restart Session\")\n",
"print(\"⚠️ Then continue with the rest of the notebook\")"
],
"metadata": {
"id": "3d_7msTkCMTy",
"collapsed": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# -----------------------------------------------------------------------------\n",
"# Configuration\n",
"# -----------------------------------------------------------------------------\n",
"\n",
"# Model types\n",
"QWEN3 = \"qwen3\"\n",
"QWEN2P5 = \"qwen2.5\"\n",
"HAMMER2P1 = \"hammer2.1\"\n",
"\n",
"# Hugging Face path\n",
"CHECKPOINT_ID = \"MadeAgents/Hammer2.1-1.5b\"\n",
"\n",
"MODEL_NAME = HAMMER2P1\n",
"MODEL_SIZE = \"0.5b\"\n",
"QUANTIZATION = \"dynamic_int8\"\n",
"CONTEXT_LENGTH = 4096\n",
"OUTPUT_PREFIX = f\"{MODEL_NAME}_{MODEL_SIZE}\" # e.g., qwen3_0.6b\n",
"\n",
"# Derived paths\n",
"OUTPUT_TFLITE = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.tflite\"\n",
"OUTPUT_SPM = f\"/content/{OUTPUT_PREFIX}.spm.model\"\n",
"OUTPUT_LITERT = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.litertlm\"\n",
"OUTPUT_TASK = f\"/content/{OUTPUT_PREFIX}_q8_ekv{CONTEXT_LENGTH}.task\""
],
"metadata": {
"id": "q7neigFUzv7v"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 2 — Clone and install ai-edge-torch repo (examples and converters)\n",
"!git clone https://github.com/google-ai-edge/ai-edge-torch.git\n",
"!pip install -e ./ai-edge-torch\n",
"\n",
"print(f\"✅ Installed ai-edge-torch with native {MODEL_NAME} support\")"
],
"metadata": {
"id": "gwFGIP0pCV2j",
"collapsed": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 3 — Download checkpoint from Hugging Face\n",
"from huggingface_hub import snapshot_download\n",
"checkpoint_path = snapshot_download(CHECKPOINT_ID)\n",
"print(f\"✅ Checkpoint downloaded to: {checkpoint_path}\")"
],
"metadata": {
"id": "rmzguJScC7wv"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 4 — Convert to TFLite with quantization\n",
"hammer_converter = \"/content/ai-edge-torch/ai_edge_torch/generative/examples/hammer/convert_to_tflite.py\"\n",
"qwen3_converter = \"/content/ai-edge-torch/ai_edge_torch/generative/examples/qwen/convert_v3_to_tflite.py\"\n",
"\n",
"if MODEL_NAME == HAMMER2P1:\n",
" print(\"✅ Using Hammer-specific converter\")\n",
" converter_script = hammer_converter\n",
"else:\n",
" print(\"✅ Using Qwen3-specific converter\")\n",
" converter_script = qwen3_converter\n",
"\n",
"!python {converter_script} \\\n",
" --checkpoint_path \"{checkpoint_path}\" \\\n",
" --model_size \"{MODEL_SIZE}\" \\\n",
" --output_path \"/content/\" \\\n",
" --output_name_prefix \"{OUTPUT_PREFIX}\" \\\n",
" --kv_cache_max_len {CONTEXT_LENGTH} \\\n",
" --quantize \"{QUANTIZATION}\""
],
"metadata": {
"id": "tGMJ5sTFDDmB",
"collapsed": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 5 — Convert tokenizer to SentencePiece format\n",
"%cd /content/ai-edge-torch/ai_edge_torch/generative/tools/\n",
"\n",
"!python tokenizer_to_sentencepiece.py \\\n",
" --checkpoint=\"{CHECKPOINT_ID}\" \\\n",
" --output_path=\"{OUTPUT_SPM}\" \\\n",
" --normalize_tokens=decode \\\n",
" --num_pairs_to_verify=300"
],
"metadata": {
"id": "8ObMiABzMTB3",
"collapsed": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 6 — Build .litertlm using high-level API\n",
"%cd /content/ai-edge-torch/\n",
"\n",
"import os\n",
"import json\n",
"import tempfile\n",
"from ai_edge_torch.generative.utilities import litertlm_builder\n",
"\n",
"print(f\"🏗️ Building .litertlm bundle with {MODEL_NAME} metadata...\")\n",
"\n",
"# Load Jinja chat template from checkpoint\n",
"jinja_template = None\n",
"tokenizer_config_path = os.path.join(checkpoint_path, \"tokenizer_config.json\")\n",
"if os.path.exists(tokenizer_config_path):\n",
" with open(tokenizer_config_path, 'r') as f:\n",
" config = json.load(f)\n",
" jinja_template = config.get('chat_template')\n",
" if jinja_template:\n",
" print(f\"✅ Loaded Jinja template from checkpoint ({len(jinja_template)} chars)\")\n",
" else:\n",
" print(\"⚠️ No chat_template found in tokenizer_config.json\")\n",
"else:\n",
" print(\"⚠️ tokenizer_config.json not found\")\n",
"\n",
"\n",
"# Map the string MODEL_NAME to the correct LlmModelType\n",
"MODEL_TO_LLM_TYPE = {\n",
" \"hammer2.1\": litertlm_builder.litertlm_builder.LlmModelType.GENERIC,\n",
" \"qwen2.5\": litertlm_builder.litertlm_builder.LlmModelType.QWEN2P5,\n",
" \"qwen3\": litertlm_builder.litertlm_builder.LlmModelType.QWEN3,\n",
"}\n",
"\n",
"# Fallback: if unknown model name, default to generic one\n",
"ll_model_type = MODEL_TO_LLM_TYPE.get(\n",
" MODEL_NAME.lower(),\n",
" litertlm_builder.litertlm_builder.LlmModelType.GENERIC\n",
")\n",
"\n",
"print(f\"{ll_model_type} model type selected!\")\n",
"\n",
"# Build bundle using high-level API (PR #858)\n",
"with tempfile.TemporaryDirectory() as workdir:\n",
" litertlm_builder.build_litertlm(\n",
" tflite_model_path=OUTPUT_TFLITE,\n",
" workdir=workdir,\n",
" output_path=\"/content/\",\n",
" context_length=CONTEXT_LENGTH,\n",
" model_prompt_prefix=None,\n",
" model_prompt_suffix=None,\n",
" user_prompt_prefix=None,\n",
" user_prompt_suffix=None,\n",
" tokenizer_model_path=OUTPUT_SPM,\n",
" hf_tokenizer_model_path=None,\n",
" start_token=\"<|im_start|>\",\n",
" stop_tokens=[\"<|im_end|>\", \"<|endoftext|>\"],\n",
" llm_model_type=ll_model_type,\n",
" jinja_prompt_template=jinja_template,\n",
" )\n",
"\n",
"print(f\"✅ Bundle built with {ll_model_type} metadata and Jinja template\")\n",
"\n",
"if os.path.exists(OUTPUT_LITERT):\n",
" size_mb = os.path.getsize(OUTPUT_LITERT) / (1024 ** 2)\n",
" print(f\"🎉 Success! Bundle created ({size_mb:.2f} MB)\")\n",
"else:\n",
" print(\"❌ Conversion failed — check logs above\")"
],
"metadata": {
"id": "LBs1pO82ZUuB",
"collapsed": true
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 6.4 — Install MediaPipe and compatible libs\n",
"!pip uninstall -y protobuf ai-edge-torch tf-nightly tb-nightly keras-nightly ai-edge-litert-nightly ai-edge-quantizer-nightly\n",
"!pip install \"protobuf>=4.25.3,<5\"\n",
"!pip install mediapipe-nightly\n",
"\n",
"print(\"✅ STEP 6.4 ready - Restart runtime and create .task bundle\")"
],
"metadata": {
"id": "_SQGJJKHcwWa"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 6.5 — Build .task bundle using MediaPipe\n",
"import os\n",
"import mediapipe as mp\n",
"from mediapipe.tasks.python.genai import bundler\n",
"\n",
"# Configure the bundle\n",
"# Hammer2.1 uses Qwen2.5's tokenizer with ChatML format\n",
"task_config = bundler.BundleConfig(\n",
" tflite_model=OUTPUT_TFLITE,\n",
" tokenizer_model=OUTPUT_SPM,\n",
" start_token=\"<|im_start|>\",\n",
" stop_tokens=[\"<|im_end|>\", \"<|endoftext|>\"],\n",
" output_filename=OUTPUT_TASK,\n",
" enable_bytes_to_unicode_mapping=False, # Required for Qwen-based tokenizers\n",
")\n",
"\n",
"# Build the bundle\n",
"bundler.create_bundle(task_config)\n",
"\n",
"if os.path.exists(OUTPUT_TASK):\n",
" size_mb = os.path.getsize(OUTPUT_TASK) / (1024 ** 2)\n",
" print(f\"🎉 Success! MediaPipe .task bundle created ({size_mb:.2f} MB)\")\n",
" print(f\"📦 Output: {OUTPUT_TASK}\")\n",
" print(f\"✅ Use with MediaPipe LLM Inference API on Android/iOS/Web\")\n",
"else:\n",
" print(\"❌ MediaPipe bundle creation failed — check logs above\")"
],
"metadata": {
"id": "lLZNl4W3Ljn8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 7 — Define output file\n",
"\n",
"%cd /content\n",
"OUTPUT_FILE = None\n",
"if MODEL_NAME == HAMMER2P1:\n",
" print(\"✅ Using Hammer-specific output file\")\n",
" OUTPUT_FILE = OUTPUT_TASK\n",
"else:\n",
" print(\"✅ Using Qwen3-specific converter\")\n",
" OUTPUT_FILE = OUTPUT_LITERT\n"
],
"metadata": {
"id": "86rY4N4bgvpX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 7.1 — Download model\n",
"\n",
"if os.path.exists(OUTPUT_FILE):\n",
" from google.colab import files\n",
" files.download(OUTPUT_FILE)\n",
" print(f\"📥 Downloaded: {OUTPUT_FILE}\")\n",
"else:\n",
" print(\"❌ Cannot download - file not found\")"
],
"metadata": {
"id": "aAVRlOe7OoD4"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# STEP 7.5 — Save model in Drive\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"\n",
"# Copy to Drive\n",
"!cp {OUTPUT_FILE} /content/drive/MyDrive/\n",
"\n",
"print(f\"✅ Saved {OUTPUT_FILE} to Google Drive\")"
],
"metadata": {
"id": "AkXIG1rX7VZ7"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment