Skip to content

Instantly share code, notes, and snippets.

@HDCharles
Last active November 19, 2025 16:13
Show Gist options
  • Select an option

  • Save HDCharles/3bb894b13388d8983332e79a65e89852 to your computer and use it in GitHub Desktop.

Select an option

Save HDCharles/3bb894b13388d8983332e79a65e89852 to your computer and use it in GitHub Desktop.
Demo AWQ Mapping vs Targets
"""
Test to verify what happens when layers are in mappings but not in targets for AWQ.
Adapted from https://github.com/vllm-project/llm-compressor/tree/main/examples/awq
"""
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor import oneshot
from llmcompressor.modifiers.awq import AWQModifier
# Select model and load it - using TinyLlama for faster testing
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
# Save original weights for comparison
print("=" * 80)
print("Saving original weights for comparison...")
print("=" * 80)
# Let's check layer 0 (will be in targets) and layer 10 (will NOT be in targets)
layer0_qproj_weight = model.model.layers[0].self_attn.q_proj.weight.data.clone()
layer0_input_ln_weight = model.model.layers[0].input_layernorm.weight.data.clone()
layer10_qproj_weight = model.model.layers[10].self_attn.q_proj.weight.data.clone()
layer10_input_ln_weight = model.model.layers[10].input_layernorm.weight.data.clone()
print(f"Layer 0 q_proj weight norm: {layer0_qproj_weight.norm():.6f}")
print(f"Layer 0 input_layernorm weight norm: {layer0_input_ln_weight.norm():.6f}")
print(f"Layer 10 q_proj weight norm: {layer10_qproj_weight.norm():.6f}")
print(f"Layer 10 input_layernorm weight norm: {layer10_input_ln_weight.norm():.6f}")
print()
# Select calibration dataset.
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
DATASET_SPLIT = "train_sft"
# Using fewer samples for faster testing
NUM_CALIBRATION_SAMPLES = 16
MAX_SEQUENCE_LENGTH = 512
# Load dataset and preprocess.
ds = load_dataset(DATASET_ID, split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES}]")
ds = ds.shuffle(seed=42)
def preprocess(example):
return {
"text": tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
)
}
ds = ds.map(preprocess)
# Tokenize inputs.
def tokenize(sample):
return tokenizer(
sample["text"],
padding=False,
max_length=MAX_SEQUENCE_LENGTH,
truncation=True,
add_special_tokens=False,
)
# Configure the quantization algorithm to run.
# Key test:
# - Mappings explicitly include BOTH layer 0 and layer 10
# - Targets only include layers 0-9 (NOT layer 10)
print("=" * 80)
print("Configuring AWQ:")
print(" - Mappings: Explicitly set to include layers 0-10 (all layers)")
print(" - Targets: Only match layers 0-9 (layer 10 excluded from quantization)")
print(" - This tests: layer 10 in mappings but NOT in targets")
print("=" * 80)
print()
from llmcompressor.modifiers.awq.mappings import get_layer_mappings_from_architecture
# Get default mappings for LlamaForCausalLM (will cover all layers)
default_mappings = get_layer_mappings_from_architecture("LlamaForCausalLM")
recipe = [
AWQModifier(
mappings=default_mappings, # This will smooth ALL layers including layer 10
config_groups={
"group_0": {
# Only quantize Linear layers in layers 0-9, NOT layer 10
# Use a pattern that matches q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
# in layers 0-9 only
"targets": [
"re:model.layers.[0-9].self_attn.(q|k|v|o)_proj$",
"re:model.layers.[0-9].mlp.(gate|up|down)_proj$",
],
"weights": {
"num_bits": 4,
"type": "int",
"symmetric": False,
"strategy": "group",
"group_size": 128,
}
}
},
ignore=["lm_head"], # Only ignore lm_head, not layer 10
),
]
# Apply algorithms.
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)
# Check results
print("\n\n")
print("=" * 80)
print("RESULTS: Checking if weights changed and quantization status")
print("=" * 80)
# Check if weights changed
layer0_qproj_changed = not torch.allclose(
model.model.layers[0].self_attn.q_proj.weight.data.cpu(),
layer0_qproj_weight.cpu(),
rtol=1e-5
)
layer0_ln_changed = not torch.allclose(
model.model.layers[0].input_layernorm.weight.data.cpu(),
layer0_input_ln_weight.cpu(),
rtol=1e-5
)
layer10_qproj_changed = not torch.allclose(
model.model.layers[10].self_attn.q_proj.weight.data.cpu(),
layer10_qproj_weight.cpu(),
rtol=1e-5
)
layer10_ln_changed = not torch.allclose(
model.model.layers[10].input_layernorm.weight.data.cpu(),
layer10_input_ln_weight.cpu(),
rtol=1e-5
)
print("\nWeight changes:")
print(f" Layer 0 q_proj changed: {layer0_qproj_changed}")
print(f" Layer 0 input_layernorm changed: {layer0_ln_changed}")
print(f" Layer 10 q_proj changed: {layer10_qproj_changed}")
print(f" Layer 10 input_layernorm changed: {layer10_ln_changed}")
# Check quantization status
layer0_has_scale = hasattr(model.model.layers[0].self_attn.q_proj, "weight_scale")
layer10_has_scale = hasattr(model.model.layers[10].self_attn.q_proj, "weight_scale")
print("\nQuantization status:")
print(f" Layer 0 q_proj has weight_scale (quantized): {layer0_has_scale}")
print(f" Layer 10 q_proj has weight_scale (quantized): {layer10_has_scale}")
print("\n" + "=" * 80)
print("INTERPRETATION")
print("=" * 80)
if layer10_ln_changed and layer10_qproj_changed:
print("\n✓ CONFIRMED: Layer 10 (in mappings but not in targets) WAS SMOOTHED")
print(" - Both input_layernorm and q_proj weights were modified")
if layer10_has_scale:
print(" ✗ Layer 10 was ALSO quantized (unexpected!)")
else:
print(" ✓ Layer 10 was NOT quantized (as expected)")
else:
print("\n✗ Layer 10 was NOT smoothed (unexpected)")
print(f" - input_layernorm changed: {layer10_ln_changed}")
print(f" - q_proj changed: {layer10_qproj_changed}")
if layer0_has_scale:
print("\n✓ Layer 0 (in targets) was quantized as expected")
else:
print("\n✗ Layer 0 was NOT quantized (unexpected!)")
print("\n" + "=" * 80)
@HDCharles
Copy link
Author

 ================================================================================
 RESULTS: Checking if weights changed and quantization status
 ================================================================================

 Weight changes:
   Layer 0 q_proj changed: True
   Layer 0 input_layernorm changed: True
   Layer 10 q_proj changed: True
   Layer 10 input_layernorm changed: True

 Quantization status:
   Layer 0 q_proj has weight_scale (quantized): True
   Layer 10 q_proj has weight_scale (quantized): False

 ================================================================================
 INTERPRETATION
 ================================================================================

 ✓ CONFIRMED: Layer 10 (in mappings but not in targets) WAS SMOOTHED
   - Both input_layernorm and q_proj weights were modified
   ✓ Layer 10 was NOT quantized (as expected)

 ✓ Layer 0 (in targets) was quantized as expected

 ================================================================================

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment