original blocks=2*(..)/threads, reverting to blocks=(..)/threads

2018-02-06 21:22:34 +01:00 · 2018-02-06 21:22:34 +01:00 · a96d481b8b
parent 3574960791
commit a96d481b8b
1 changed files with 3 additions and 3 deletions
--- a/src/lib/Dirac/lbfgs.c
+++ b/src/lib/Dirac/lbfgs.c
@ -102,7 +102,7 @@ pipeline_slave_code_b(void *data)
    checkCudaError(err,__FILE__,__LINE__);
  } else if (dp->status[tid]==PT_DO_CCOST) {
   /* divide total baselines by 2 */
-   int BlocksPerGrid= 2*(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock;
+   int BlocksPerGrid=(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock;
   int  boff=dp->boff[tid];
   /* copy the current solution to device */
   err=cudaMemcpy(dp->cpp[tid], dp->lmdata[tid]->p, m*sizeof(double), cudaMemcpyHostToDevice);
@ -901,12 +901,12 @@ lbfgs_fit_common(
  /* parameters per thread (GPU) */
  int Nparm=(m+2-1)/2;
  /* find number of blocks */
-  int BlocksPerGrid = 2* (Nparm+ThreadsPerBlock-1)/ThreadsPerBlock;
+  int BlocksPerGrid =(Nparm+ThreadsPerBlock-1)/ThreadsPerBlock;
  ci=0;
  int nth;
  for (nth=0; nth<2; nth++) {
   threaddata[nth].ThreadsPerBlock=ThreadsPerBlock;
-   threaddata[nth].BlocksPerGrid= 2*BlocksPerGrid;
+   threaddata[nth].BlocksPerGrid=BlocksPerGrid;
   threaddata[nth].card=nth;
   threaddata[nth].Nbase=dp->Nbase;
   threaddata[nth].tilesz=dp->tilesz;