Fix: conv2dreduce_kernel_ offset

2023-04-24 02:30:41 +08:00 · 2023-04-24 02:30:41 +08:00 · c1275cddb6
parent 51cc042f56
commit c1275cddb6
1 changed files with 1 additions and 1 deletions
--- a/src/kernels/cuda/conv2dreduce.cu
+++ b/src/kernels/cuda/conv2dreduce.cu
@ -16,7 +16,7 @@ __global__ void conv2dreduce_kernel_(float *__restrict__ input,
    int nid = blockIdx.x, fid = blockIdx.y;
    int hid = threadIdx.x, wid = threadIdx.y;
    const int fchunck = r * s, wchunk = f * fchunck, hchunk = w * wchunk,
-              nchunck = n * hchunk;
+              nchunck = h * hchunk;
    float *nfinput = input + nid * nchunck + fid * fchunck;
    if (nid < n && fid < f && hid < oh && wid < ow) {
        float imm = 0.0;