diff --git a/src/infiniop/ops/causal_softmax/moore/causal_softmax_kernel.h b/src/infiniop/ops/causal_softmax/moore/causal_softmax_kernel.h index 19d364552..111e7cd4c 100644 --- a/src/infiniop/ops/causal_softmax/moore/causal_softmax_kernel.h +++ b/src/infiniop/ops/causal_softmax/moore/causal_softmax_kernel.h @@ -28,7 +28,7 @@ __device__ void causalSoftmaxKernel( // 1 | * * * ... * * | // 2 | * * * ... * * * | // height: 3 col_id-> - if (width + blockIdx.x >= threadIdx.x + height) { + if (width + blockIdx.x >= col + height) { if constexpr (std::is_same_v || std::is_same_v) { /* * MUSA does not support CUDA's native `hexp` function.