Skip to content

Instantly share code, notes, and snippets.

@HDCharles
Last active June 12, 2025 20:36
Show Gist options
  • Select an option

  • Save HDCharles/03903b2612c727c39cd11a47594c66b0 to your computer and use it in GitHub Desktop.

Select an option

Save HDCharles/03903b2612c727c39cd11a47594c66b0 to your computer and use it in GitHub Desktop.
_________________ TestAutoQuant.test_autoquant_compile_12_cuda _________________
a = (<test_integration.TestAutoQuant testMethod=test_autoquant_compile_12_cuda>,)
kw = {}
@wraps(func)
def standalone_func(*a, **kw):
> return func(*(a + p.args), **p.kwargs, **kw)
/opt/conda/envs/venv/lib/python3.9/site-packages/parameterized/parameterized.py:620:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test/integration/test_integration.py:1647: in test_autoquant_compile
out2 = mod(example_input)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:372: in __call__
return super().__call__(*args, **kwargs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1767: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1778: in _call_impl
return forward_call(*args, **kwargs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:699: in compile_wrapper
return fn(*args, **kwargs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/external_utils.py:68: in inner
@functools.wraps(fn)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:893: in _fn
return fn(*args, **kwargs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/aot_autograd.py:1231: in forward
return compiled_fn(full_args)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:344: in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:100: in g
return f(*args)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/autograd/function.py:579: in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:2034: in forward
fw_outs = call_func_at_runtime_with_args(
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:529: in wrapper
return compiled_fn(runtime_args)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:689: in inner_fn
unwrapped_outs = compiled_fn(unwrapped_args)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:723: in inner_fn
outs = compiled_fn(args)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/output_code.py:583: in __call__
return self.current_callable(inputs)
/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/utils.py:2665: in run
out = model(new_inputs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
args = []
def call(args):
primals_1, primals_2, primals_3, primals_4, primals_5 = args
args.clear()
assert_size_stride(primals_1, (1, 128), (128, 1))
assert_size_stride(primals_2, (128, 128), (128, 1))
assert_size_stride(primals_3, (128, ), (1, ))
assert_size_stride(primals_4, (128, ), (1, ))
assert_size_stride(primals_5, (128, ), (1, ))
with torch.cuda._DeviceGuard(0):
torch.cuda.set_device(0)
buf0 = empty_strided_cuda((1, ), (1, ), torch.float16)
buf1 = empty_strided_cuda((1, ), (1, ), torch.float16)
buf2 = empty_strided_cuda((1, 128), (128, 1), torch.int8)
# Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.amin, aten.amax, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy]
stream0 = get_raw_stream(0)
triton_per_fused__to_copy_add_amax_amin_clamp_mul_reciprocal_relu_0.run(primals_1, buf0, buf1, buf2, 1, 128, stream=stream0)
del primals_1
buf3 = empty_strided_cuda((1, 128), (128, 1), torch.int32)
# Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy, aten.view, aten._int_mm]
> extern_kernels._int_mm(buf2, reinterpret_tensor(primals_2, (128, 128), (1, 128), 0), out=buf3)
E RuntimeError: self.size(0) needs to be greater than 16, but got 1
/tmp/torchinductor_root/gs/cgskb5wnh5ly6ocvujqu2hbviwbq2y63wkkdik25qi5cs4pd4354.py:207: RuntimeError
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment