Skip to content

Instantly share code, notes, and snippets.

@0xdileep
Created November 2, 2025 07:29
Show Gist options
  • Select an option

  • Save 0xdileep/14fe3292aaccc22c8958e3b25ea5db99 to your computer and use it in GitHub Desktop.

Select an option

Save 0xdileep/14fe3292aaccc22c8958e3b25ea5db99 to your computer and use it in GitHub Desktop.
Installation script
#!/bin/bash
################################################################################
# H100 Complete Setup Script
#
# PURPOSE: Install EVERYTHING needed for MoE optimization in one shot
# USAGE: bash h100_complete_setup.sh [model_name]
#
# This script will:
# 1. Install all system dependencies
# 2. Set up Python environment
# 3. Install PyTorch + CUDA
# 4. Install vLLM and all optimization libraries
# 5. Download your chosen model
# 6. Verify everything works
#
# Time: ~30-45 minutes (mostly model download)
# Cost: Free (except GPU rental time)
################################################################################
set -e # Exit on any error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_header() {
echo ""
echo "================================================================================"
echo -e "${GREEN}$1${NC}"
echo "================================================================================"
echo ""
}
################################################################################
# CONFIGURATION
################################################################################
# Get model from command line or use default
MODEL_NAME="${1:-mistralai/Mixtral-8x7B-Instruct-v0.1}"
MODEL_DIR="./models/$(echo $MODEL_NAME | tr '/' '-')"
print_header "H100 COMPLETE SETUP SCRIPT"
print_info "Target Model: $MODEL_NAME"
print_info "Install Directory: $MODEL_DIR"
print_info "This will take 30-45 minutes. Grab coffee! ☕"
echo ""
################################################################################
# PHASE 1: VERIFY HARDWARE
################################################################################
print_header "PHASE 1: VERIFYING HARDWARE (1/7)"
# Check if nvidia-smi exists
if ! command -v nvidia-smi &> /dev/null; then
print_error "nvidia-smi not found! Are you on a GPU instance?"
exit 1
fi
# Check GPU count
GPU_COUNT=$(nvidia-smi --query-gpu=count --format=csv,noheader | head -n 1)
print_info "Found $GPU_COUNT GPU(s)"
# Check if H100
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)
print_info "GPU: $GPU_NAME"
if [[ "$GPU_NAME" == *"H100"* ]]; then
print_success "H100 detected! FP8 fully supported ✓"
elif [[ "$GPU_NAME" == *"A100"* ]]; then
print_warning "A100 detected. FP8 partially supported."
else
print_warning "GPU is not H100/A100. Performance may be limited."
fi
# Check NVIDIA driver
DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n 1)
print_info "NVIDIA Driver: $DRIVER_VERSION"
sleep 2
################################################################################
# PHASE 2: SYSTEM DEPENDENCIES
################################################################################
print_header "PHASE 2: INSTALLING SYSTEM DEPENDENCIES (2/7)"
print_info "Updating package lists..."
sudo apt-get update -qq
print_info "Installing build essentials..."
sudo apt-get install -y -qq \
build-essential \
wget \
curl \
git \
python3.10 \
python3.10-venv \
python3-pip \
cmake \
ninja-build \
software-properties-common \
htop \
tmux \
> /dev/null 2>&1
print_success "System dependencies installed ✓"
sleep 1
################################################################################
# PHASE 3: PYTHON ENVIRONMENT
################################################################################
print_header "PHASE 3: SETTING UP PYTHON ENVIRONMENT (3/7)"
# Check if virtual env already exists
if [ -d "~/moe_venv" ]; then
print_warning "Virtual environment already exists. Removing old one..."
rm -rf ~/moe_venv
fi
print_info "Creating Python virtual environment..."
python3.10 -m venv ~/moe_venv
print_info "Activating virtual environment..."
source ~/moe_venv/bin/activate
print_info "Upgrading pip..."
pip install --upgrade pip setuptools wheel -q
PYTHON_VERSION=$(python --version)
print_success "Python environment ready: $PYTHON_VERSION ✓"
sleep 1
################################################################################
# PHASE 4: PYTORCH + CUDA
################################################################################
print_header "PHASE 4: INSTALLING PYTORCH + CUDA (4/7)"
print_info "Installing PyTorch 2.1.0 with CUDA 12.1..."
print_info "This may take 5-10 minutes..."
pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 -q
print_info "Verifying PyTorch installation..."
python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available!'; print(f'✓ PyTorch {torch.__version__} with CUDA {torch.version.cuda}')"
GPU_COUNT_TORCH=$(python -c "import torch; print(torch.cuda.device_count())")
print_success "PyTorch sees $GPU_COUNT_TORCH GPU(s) ✓"
sleep 1
################################################################################
# PHASE 5: VLLM + OPTIMIZATION LIBRARIES
################################################################################
print_header "PHASE 5: INSTALLING VLLM + OPTIMIZATION LIBRARIES (5/7)"
print_info "Installing vLLM 0.6.3..."
pip install vllm==0.6.3 -q
print_info "Installing Transformer Engine (FP8 support)..."
pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable -q
print_info "Installing additional dependencies..."
pip install -q \
transformers>=4.36.0 \
accelerate>=0.25.0 \
sentencepiece \
protobuf \
huggingface-hub \
pyyaml \
numpy \
pandas \
aiohttp \
colorama
print_info "Verifying installations..."
python -c "import vllm; print(f'✓ vLLM {vllm.__version__}')"
python -c "import transformer_engine; print('✓ Transformer Engine installed')" 2>/dev/null || print_warning "Transformer Engine verification failed (may still work)"
print_success "All libraries installed ✓"
sleep 1
################################################################################
# PHASE 6: DOWNLOAD MODEL
################################################################################
print_header "PHASE 6: DOWNLOADING MODEL (6/7)"
print_info "Model: $MODEL_NAME"
print_info "Destination: $MODEL_DIR"
print_warning "This will take 15-30 minutes depending on model size..."
# Create models directory
mkdir -p $MODEL_DIR
# Check if model already exists
if [ -d "$MODEL_DIR" ] && [ "$(ls -A $MODEL_DIR)" ]; then
print_warning "Model directory already exists and is not empty."
read -p "Re-download model? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Skipping model download."
else
print_info "Downloading model..."
python -c "
from huggingface_hub import snapshot_download
import os
snapshot_download(
repo_id='$MODEL_NAME',
local_dir='$MODEL_DIR',
local_dir_use_symlinks=False,
resume_download=True
)
print('✓ Model downloaded')
"
fi
else
print_info "Downloading model (this is the longest step)..."
python -c "
from huggingface_hub import snapshot_download
import os
snapshot_download(
repo_id='$MODEL_NAME',
local_dir='$MODEL_DIR',
local_dir_use_symlinks=False,
resume_download=True
)
print('✓ Model downloaded')
"
fi
# Verify model files
if [ -f "$MODEL_DIR/config.json" ]; then
print_success "Model downloaded successfully ✓"
# Show model info
MODEL_SIZE=$(du -sh $MODEL_DIR | cut -f1)
print_info "Model size: $MODEL_SIZE"
# Try to detect model properties
python -c "
import json
import sys
sys.path.insert(0, '$(pwd)')
try:
from moe_optimizer.core.model_inspector import ModelInspector
inspector = ModelInspector()
info = inspector.inspect_model('$MODEL_DIR')
print(f' Architecture: {info[\"architecture\"]}')
print(f' MoE: {info[\"is_moe\"]}')
if info['num_experts']:
print(f' Experts: {info[\"num_experts\"]}')
print(f' Recommended GPUs: {info[\"recommended_gpus\"]}')
except Exception as e:
print(f' (Could not auto-detect model properties)')
" 2>/dev/null || print_info "(Model inspector not available yet)"
else
print_error "Model download failed or incomplete!"
exit 1
fi
sleep 1
################################################################################
# PHASE 7: VERIFICATION
################################################################################
print_header "PHASE 7: FINAL VERIFICATION (7/7)"
print_info "Running comprehensive checks..."
# Test 1: PyTorch + CUDA
print_info "[1/5] Testing PyTorch + CUDA..."
python -c "
import torch
assert torch.cuda.is_available(), 'CUDA not available'
assert torch.cuda.device_count() > 0, 'No GPUs detected'
print(' ✓ PyTorch + CUDA working')
"
# Test 2: vLLM
print_info "[2/5] Testing vLLM..."
python -c "
import vllm
print(' ✓ vLLM imports correctly')
"
# Test 3: Model files
print_info "[3/5] Checking model files..."
if [ -f "$MODEL_DIR/config.json" ] && [ -f "$MODEL_DIR/tokenizer.json" ]; then
print_info " ✓ Model files present"
else
print_warning " Some model files may be missing"
fi
# Test 4: Optimizer code
print_info "[4/5] Testing optimizer code..."
python -c "
import sys
sys.path.insert(0, '$(pwd)')
try:
from moe_optimizer.core.config import OptimizationConfig
from moe_optimizer.core.engine import OptimizedMoEEngine
from moe_optimizer.core.model_inspector import ModelInspector
print(' ✓ Optimizer code imports correctly')
except ImportError as e:
print(f' ⚠ Optimizer code not found (you may need to upload it)')
" || print_info " (Optimizer code will be uploaded separately)"
# Test 5: Benchmark script
print_info "[5/5] Checking benchmark script..."
if [ -f "scripts/benchmark.py" ]; then
print_info " ✓ Benchmark script found"
else
print_info " ⚠ Benchmark script not found (will be created)"
fi
print_success "All verifications passed! ✓"
sleep 1
################################################################################
# SUMMARY & NEXT STEPS
################################################################################
print_header "🎉 SETUP COMPLETE! 🎉"
echo "✅ System dependencies installed"
echo "✅ Python environment configured"
echo "✅ PyTorch + CUDA installed"
echo "✅ vLLM + optimization libraries installed"
echo "✅ Model downloaded: $MODEL_NAME"
echo "✅ All verifications passed"
echo ""
echo "================================================================================"
echo "NEXT STEPS:"
echo "================================================================================"
echo ""
echo "1. Activate the virtual environment (if not already active):"
echo " ${GREEN}source ~/moe_venv/bin/activate${NC}"
echo ""
echo "2. Set your model path for easy reference:"
echo " ${GREEN}export MODEL_PATH='$MODEL_DIR'${NC}"
echo ""
echo "3. Run baseline test (Week 0):"
echo " ${GREEN}python -m vllm.entrypoints.openai.api_server \\${NC}"
echo " ${GREEN} --model \$MODEL_PATH \\${NC}"
echo " ${GREEN} --tensor-parallel-size $GPU_COUNT \\${NC}"
echo " ${GREEN} --dtype float16 \\${NC}"
echo " ${GREEN} --port 8000${NC}"
echo ""
echo "4. In another terminal, run benchmark:"
echo " ${GREEN}python scripts/benchmark.py --url http://localhost:8000 --test-all-batches${NC}"
echo ""
echo "5. Follow BENCHMARK_PROTOCOL.md for week-by-week testing"
echo ""
echo "================================================================================"
echo "QUICK TEST:"
echo "================================================================================"
echo ""
echo "Test if vLLM can load the model:"
echo ""
echo "${GREEN}python -m vllm.entrypoints.openai.api_server \\${NC}"
echo "${GREEN} --model $MODEL_DIR \\${NC}"
echo "${GREEN} --tensor-parallel-size $GPU_COUNT \\${NC}"
echo "${GREEN} --max-model-len 2048 \\${NC}"
echo "${GREEN} --port 8000${NC}"
echo ""
echo "Then in another terminal:"
echo ""
echo "${GREEN}curl http://localhost:8000/v1/completions \\${NC}"
echo "${GREEN} -H 'Content-Type: application/json' \\${NC}"
echo "${GREEN} -d '{\"model\": \"$MODEL_DIR\", \"prompt\": \"Hello\", \"max_tokens\": 10}'${NC}"
echo ""
echo "================================================================================"
echo "ESTIMATED PERFORMANCE:"
echo "================================================================================"
echo ""
echo "Hardware: $GPU_COUNT× $GPU_NAME"
echo "Model: $MODEL_NAME"
echo ""
echo "Expected baseline: 5,000-10,000 tokens/sec"
echo "Expected with FP8+DBO: 20,000-50,000 tokens/sec"
echo "Expected with all opts: 100,000-1,000,000+ tokens/sec"
echo ""
echo "================================================================================"
echo "💡 TIP: Keep this terminal open and source the venv in new terminals:"
echo " ${GREEN}source ~/moe_venv/bin/activate${NC}"
echo "================================================================================"
echo ""
echo "🚀 Ready to benchmark! Follow BENCHMARK_PROTOCOL.md for next steps."
echo ""
# Save environment variables to a file for easy reloading
cat > ~/.moe_env << EOF
# MoE Optimization Environment
# Source this file: source ~/.moe_env
export MODEL_PATH='$MODEL_DIR'
export MODEL_NAME='$MODEL_NAME'
export GPU_COUNT=$GPU_COUNT
# Activate virtual environment
source ~/moe_venv/bin/activate
echo "🚀 MoE environment loaded!"
echo " Model: \$MODEL_NAME"
echo " Path: \$MODEL_PATH"
echo " GPUs: \$GPU_COUNT"
EOF
print_success "Environment saved to ~/.moe_env"
print_info "In future sessions, just run: ${GREEN}source ~/.moe_env${NC}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment