From c5f6c1d16dbe964dc23f0f8c89fa7dc19100ff59 Mon Sep 17 00:00:00 2001 From: Felix Mathew Date: Wed, 4 Mar 2026 12:38:30 -0600 Subject: [PATCH 1/2] Fix inference extension build by including torch/nn/functional.h explicitly --- src/layers/extensions/inference/impl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/layers/extensions/inference/impl.cpp b/src/layers/extensions/inference/impl.cpp index 6419fff..9d37389 100644 --- a/src/layers/extensions/inference/impl.cpp +++ b/src/layers/extensions/inference/impl.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +#include #include "def.h" namespace F = torch::nn::functional; From b1e1d69064c3b507ba0de64f06ea01aa87a50fc3 Mon Sep 17 00:00:00 2001 From: Felix Mathew Date: Wed, 4 Mar 2026 13:09:39 -0600 Subject: [PATCH 2/2] Fix Windows CUDA inference build by avoiding torch/extension.h in CUDA TU --- src/layers/extensions/inference/common.h | 2 +- src/layers/extensions/inference/def.h | 2 +- src/layers/extensions/inference/kernel.cu | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/layers/extensions/inference/common.h b/src/layers/extensions/inference/common.h index 7a8bfed..e42ae52 100644 --- a/src/layers/extensions/inference/common.h +++ b/src/layers/extensions/inference/common.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include // T maybe vector type, and may be different from t.dtype template diff --git a/src/layers/extensions/inference/def.h b/src/layers/extensions/inference/def.h index 17e7782..19ecc92 100644 --- a/src/layers/extensions/inference/def.h +++ b/src/layers/extensions/inference/def.h @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -#include +#include std::tuple process_with_mask_cuda(const torch::Tensor& y, const torch::Tensor& scales, const torch::Tensor& means, diff --git a/src/layers/extensions/inference/kernel.cu b/src/layers/extensions/inference/kernel.cu index 41ebc78..a855d85 100644 --- a/src/layers/extensions/inference/kernel.cu +++ b/src/layers/extensions/inference/kernel.cu @@ -6,7 +6,9 @@ #include #include #include -#include +#include +#include +#include #include "common.h" #include "def.h" @@ -121,10 +123,10 @@ std::tuple process_with_mask_cuda(const torch::Tensor& y, const torch::Tensor& scales, const torch::Tensor& means, const torch::Tensor& mask, const float force_zero_thres) { - auto y_res = torch::empty_like(y); - auto y_q = torch::empty_like(y); - auto y_hat = torch::empty_like(y); - auto s_hat = torch::empty_like(y); + auto y_res = at::empty_like(y); + auto y_q = at::empty_like(y); + auto y_hat = at::empty_like(y); + auto s_hat = at::empty_like(y); if (y.dtype() == torch::kFloat32) { process_with_mask_dispatcher(y_res, y_q, y_hat, s_hat, y, scales, means, @@ -855,7 +857,7 @@ __forceinline__ void round_and_to_int8_dispatcher(torch::Tensor& z, torch::Tenso torch::Tensor round_and_to_int8_cuda(torch::Tensor& z) { - auto z_int8 = torch::empty_like(z, at::TensorOptions().dtype(torch::kInt8)); + auto z_int8 = at::empty_like(z, at::TensorOptions().dtype(torch::kInt8)); if (z.dtype() == torch::kFloat32) { round_and_to_int8_dispatcher(z, z_int8); } else if (z.dtype() == torch::kFloat16) { @@ -900,7 +902,7 @@ __forceinline__ void clamp_reciprocal_with_quant_dispatcher(torch::Tensor& q_dec torch::Tensor clamp_reciprocal_with_quant_cuda(const torch::Tensor& q_dec, torch::Tensor& y, const float min_val) { - auto q_dec_clamp = torch::empty_like(q_dec); + auto q_dec_clamp = at::empty_like(q_dec); if (q_dec.dtype() == torch::kFloat32) { clamp_reciprocal_with_quant_dispatcher(q_dec_clamp, q_dec, y, min_val); } else if (q_dec.dtype() == torch::kFloat16) { @@ -1123,7 +1125,7 @@ torch::Tensor bias_wsilu_depthwise_conv2d_cuda(const torch::Tensor& x, const tor const int H = x_shape[2]; const int W = x_shape[3]; - auto out = torch::empty_like(x); + auto out = at::empty_like(x); const int BLOCK_SIZE = 32; const int THREAD_NUM_X = 16;