HDCharles · June 12, 2025 20:36
diff --git a/gistfile1.txt b/gistfile1.txt
 _________________ TestAutoQuant.test_autoquant_compile_12_cuda _________________

 a = (<test_integration.TestAutoQuant testMethod=test_autoquant_compile_12_cuda>,)
 kw = {}

    @wraps(func)
    def standalone_func(*a, **kw):
 >       return func(*(a + p.args), **p.kwargs, **kw)

 /opt/conda/envs/venv/lib/python3.9/site-packages/parameterized/parameterized.py:620: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 test/integration/test_integration.py:1647: in test_autoquant_compile
    out2 = mod(example_input)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:372: in __call__
    return super().__call__(*args, **kwargs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1767: in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1778: in _call_impl
    return forward_call(*args, **kwargs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:699: in compile_wrapper
    return fn(*args, **kwargs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/external_utils.py:68: in inner
    @functools.wraps(fn)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:893: in _fn
    return fn(*args, **kwargs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/aot_autograd.py:1231: in forward
    return compiled_fn(full_args)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:344: in runtime_wrapper
    all_outs = call_func_at_runtime_with_args(
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
    out = normalize_as_list(f(args))
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:100: in g
    return f(*args)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/autograd/function.py:579: in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:2034: in forward
    fw_outs = call_func_at_runtime_with_args(
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
    out = normalize_as_list(f(args))
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:529: in wrapper
    return compiled_fn(runtime_args)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:689: in inner_fn
    unwrapped_outs = compiled_fn(unwrapped_args)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:723: in inner_fn
    outs = compiled_fn(args)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/output_code.py:583: in __call__
    return self.current_callable(inputs)
 /opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/utils.py:2665: in run
    out = model(new_inputs)
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

 args = []

    def call(args):
        primals_1, primals_2, primals_3, primals_4, primals_5 = args
        args.clear()
        assert_size_stride(primals_1, (1, 128), (128, 1))
        assert_size_stride(primals_2, (128, 128), (128, 1))
        assert_size_stride(primals_3, (128, ), (1, ))
        assert_size_stride(primals_4, (128, ), (1, ))
        assert_size_stride(primals_5, (128, ), (1, ))
        with torch.cuda._DeviceGuard(0):
            torch.cuda.set_device(0)
            buf0 = empty_strided_cuda((1, ), (1, ), torch.float16)
            buf1 = empty_strided_cuda((1, ), (1, ), torch.float16)
            buf2 = empty_strided_cuda((1, 128), (128, 1), torch.int8)
            # Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.amin, aten.amax, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy]
            stream0 = get_raw_stream(0)
            triton_per_fused__to_copy_add_amax_amin_clamp_mul_reciprocal_relu_0.run(primals_1, buf0, buf1, buf2, 1, 128, stream=stream0)
            del primals_1
            buf3 = empty_strided_cuda((1, 128), (128, 1), torch.int32)
            # Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy, aten.view, aten._int_mm]
 >           extern_kernels._int_mm(buf2, reinterpret_tensor(primals_2, (128, 128), (1, 128), 0), out=buf3)
 E           RuntimeError: self.size(0) needs to be greater than 16, but got 1

 /tmp/torchinductor_root/gs/cgskb5wnh5ly6ocvujqu2hbviwbq2y63wkkdik25qi5cs4pd4354.py:207: RuntimeError
	_________________ TestAutoQuant.test_autoquant_compile_12_cuda _________________

	a = (<test_integration.TestAutoQuant testMethod=test_autoquant_compile_12_cuda>,)
	kw = {}

	@wraps(func)
	def standalone_func(a, *kw):
	> return func((a + p.args), p.kwargs, *kw)

	/opt/conda/envs/venv/lib/python3.9/site-packages/parameterized/parameterized.py:620:
	_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
	test/integration/test_integration.py:1647: in test_autoquant_compile
	out2 = mod(example_input)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:372: in __call__
	return super().__call__(args, *kwargs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1767: in _wrapped_call_impl
	return self._call_impl(args, *kwargs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1778: in _call_impl
	return forward_call(args, *kwargs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:699: in compile_wrapper
	return fn(args, *kwargs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/external_utils.py:68: in inner
	@functools.wraps(fn)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py:893: in _fn
	return fn(args, *kwargs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/aot_autograd.py:1231: in forward
	return compiled_fn(full_args)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:344: in runtime_wrapper
	all_outs = call_func_at_runtime_with_args(
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
	out = normalize_as_list(f(args))
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:100: in g
	return f(*args)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/autograd/function.py:579: in apply
	return super().apply(args, *kwargs) # type: ignore[misc]
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:2034: in forward
	fw_outs = call_func_at_runtime_with_args(
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/utils.py:126: in call_func_at_runtime_with_args
	out = normalize_as_list(f(args))
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:529: in wrapper
	return compiled_fn(runtime_args)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:689: in inner_fn
	unwrapped_outs = compiled_fn(unwrapped_args)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py:723: in inner_fn
	outs = compiled_fn(args)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/output_code.py:583: in __call__
	return self.current_callable(inputs)
	/opt/conda/envs/venv/lib/python3.9/site-packages/torch/_inductor/utils.py:2665: in run
	out = model(new_inputs)
	_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

	args = []

	def call(args):
	primals_1, primals_2, primals_3, primals_4, primals_5 = args
	args.clear()
	assert_size_stride(primals_1, (1, 128), (128, 1))
	assert_size_stride(primals_2, (128, 128), (128, 1))
	assert_size_stride(primals_3, (128, ), (1, ))
	assert_size_stride(primals_4, (128, ), (1, ))
	assert_size_stride(primals_5, (128, ), (1, ))
	with torch.cuda._DeviceGuard(0):
	torch.cuda.set_device(0)
	buf0 = empty_strided_cuda((1, ), (1, ), torch.float16)
	buf1 = empty_strided_cuda((1, ), (1, ), torch.float16)
	buf2 = empty_strided_cuda((1, 128), (128, 1), torch.int8)
	# Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.amin, aten.amax, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy]
	stream0 = get_raw_stream(0)
	triton_per_fused__to_copy_add_amax_amin_clamp_mul_reciprocal_relu_0.run(primals_1, buf0, buf1, buf2, 1, 128, stream=stream0)
	del primals_1
	buf3 = empty_strided_cuda((1, 128), (128, 1), torch.int32)
	# Topologically Sorted Source Nodes: [input_1, input_2], Original ATen: [aten.relu, aten.reciprocal, aten.mul, aten.add, aten.clamp, aten._to_copy, aten.view, aten._int_mm]
	> extern_kernels._int_mm(buf2, reinterpret_tensor(primals_2, (128, 128), (1, 128), 0), out=buf3)
	E RuntimeError: self.size(0) needs to be greater than 16, but got 1

	/tmp/torchinductor_root/gs/cgskb5wnh5ly6ocvujqu2hbviwbq2y63wkkdik25qi5cs4pd4354.py:207: RuntimeError
No results found