Last active
June 12, 2025 20:36
-
-
Save HDCharles/03903b2612c727c39cd11a47594c66b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| _________________ TestAutoQuant.test_autoquant_compile_12_cuda _________________ | |
| a = (<test_integration.TestAutoQuant testMethod=test_autoquant_compile_12_cuda>,) | |
| kw = {} | |
| @wraps(func) | |
| def standalone_func(*a, **kw): | |
| > return func(*(a + p.args), **p.kwargs, **kw) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/parameterized/parameterized.py:620: | |
| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
| test/integration/test_integration.py:1647: in test_autoquant_compile | |
| out2 = mod(example_input) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:372: in __call__ | |
| return super().__call__(*args, **kwargs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1767: in _wrapped_call_impl | |
| return self._call_impl(*args, **kwargs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1778: in _call_impl | |
| return forward_call(*args, **kwargs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:699: in compile_wrapper | |
| return fn(*args, **kwargs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/external_utils.py:68: in inner | |
| @functools.wraps(fn) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:893: in _fn | |
| return fn(*args, **kwargs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/aot_autograd.py:1231: in forward | |
| return compiled_fn(full_args) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:344: in runtime_wrapper | |
| all_outs = call_func_at_runtime_with_args( | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args | |
| out = normalize_as_list(f(args)) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:100: in g | |
| return f(*args) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/autograd/function.py:579: in apply | |
| return super().apply(*args, **kwargs) # type: ignore[misc] | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:2034: in forward | |
| fw_outs = call_func_at_runtime_with_args( | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args | |
| out = normalize_as_list(f(args)) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:529: in wrapper | |
| return compiled_fn(runtime_args) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:689: in inner_fn | |
| unwrapped_outs = compiled_fn(unwrapped_args) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:723: in inner_fn | |
| outs = compiled_fn(args) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/output_code.py:583: in __call__ | |
| return self.current_callable(inputs) | |
| /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/utils.py:2665: in run | |
| out = model(new_inputs) | |
| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
| args = [] | |
| def call(args): | |
| primals_1, primals_2, primals_3, primals_4, primals_5 = args | |
| args.clear() | |
| assert_size_stride(primals_1, (1, 128), (128, 1)) | |
| assert_size_stride(primals_2, (128, 128), (128, 1)) | |
| assert_size_stride(primals_3, (128, ), (1, )) | |
| assert_size_stride(primals_4, (128, ), (1, )) | |
| assert_size_stride(primals_5, (128, ), (1, )) | |
| with torch.cuda._DeviceGuard(0): | |
| torch.cuda.set_device(0) | |
| buf0 = empty_strided_cuda((1, ), (1, ), torch.float16) | |
| buf1 = empty_strided_cuda((1, ), (1, ), torch.float16) | |
| buf2 = empty_strided_cuda((1, 128), (128, 1), torch.int8) | |
| # Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.amin, aten.amax, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy] | |
| stream0 = get_raw_stream(0) | |
| triton_per_fused__to_copy_add_amax_amin_clamp_mul_reciprocal_relu_0.run(primals_1, buf0, buf1, buf2, 1, 128, stream=stream0) | |
| del primals_1 | |
| buf3 = empty_strided_cuda((1, 128), (128, 1), torch.int32) | |
| # Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy, aten.view, aten._int_mm] | |
| > extern_kernels._int_mm(buf2, reinterpret_tensor(primals_2, (128, 128), (1, 128), 0), out=buf3) | |
| E RuntimeError: self.size(0) needs to be greater than 16, but got 1 | |
| /tmp/torchinductor_root/gs/cgskb5wnh5ly6ocvujqu2hbviwbq2y63wkkdik25qi5cs4pd4354.py:207: RuntimeError |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment