Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save skimo-openhub/dcfa7fd414e3487bb8df2a3cc84f1f3e to your computer and use it in GitHub Desktop.

Select an option

Save skimo-openhub/dcfa7fd414e3487bb8df2a3cc84f1f3e to your computer and use it in GitHub Desktop.
auto TC = makeMatmulTc();
auto options =
tc::CudaMappingOptions::makeNaiveMappingOptions()
.outerScheduleFusionStrategy(tc::FusionStrategy::Max)
.outerScheduleAllowSkewing(false)
.outerSchedulePositiveOrthant(true)
.intraTileScheduleFusionStrategy(tc::FusionStrategy::Min)
.intraTileScheduleAllowSkewing(false)
.intraTileSchedulePositiveOrthant(true)
.fixParametersBeforeScheduling(false)
.tile(56, 32, 4, 14, 16)
.unroll(16)
.tileImperfectlyNested(false)
.matchLibraryCalls(false)
.mapToThreads(4, 128)
.mapToBlocks(1, 32, 32)
.useSharedMemory(false)
.usePrivateMemory(true)
.unrollCopyShared(false)
.useReadOnlyCache(false);
uint32_t N = 500, K = 400, M = 100;
at::Tensor A = at::CUDA(at::kFloat).rand({N, K});
at::Tensor B = at::CUDA(at::kFloat).rand({K, M});
std::vector<at::Tensor> inputs = {A, B};
auto pExecutor =
tc::aten::compile<tc::CudaBackend>(TC, "matmul", inputs, options);
auto outputs = tc::aten::prepareOutputs(TC, "matmul", inputs);
tc::aten::run(*pExecutor, inputs, outputs);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment