From a96d481b8b23d885075a41c4ad269b56b618d9ab Mon Sep 17 00:00:00 2001 From: Sarod Yatawatta Date: Tue, 6 Feb 2018 21:22:34 +0100 Subject: [PATCH] original blocks=2*(..)/threads, reverting to blocks=(..)/threads --- src/lib/Dirac/lbfgs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/Dirac/lbfgs.c b/src/lib/Dirac/lbfgs.c index 776101d..d8ac3cc 100644 --- a/src/lib/Dirac/lbfgs.c +++ b/src/lib/Dirac/lbfgs.c @@ -102,7 +102,7 @@ pipeline_slave_code_b(void *data) checkCudaError(err,__FILE__,__LINE__); } else if (dp->status[tid]==PT_DO_CCOST) { /* divide total baselines by 2 */ - int BlocksPerGrid= 2*(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock; + int BlocksPerGrid=(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock; int boff=dp->boff[tid]; /* copy the current solution to device */ err=cudaMemcpy(dp->cpp[tid], dp->lmdata[tid]->p, m*sizeof(double), cudaMemcpyHostToDevice); @@ -901,12 +901,12 @@ lbfgs_fit_common( /* parameters per thread (GPU) */ int Nparm=(m+2-1)/2; /* find number of blocks */ - int BlocksPerGrid = 2* (Nparm+ThreadsPerBlock-1)/ThreadsPerBlock; + int BlocksPerGrid =(Nparm+ThreadsPerBlock-1)/ThreadsPerBlock; ci=0; int nth; for (nth=0; nth<2; nth++) { threaddata[nth].ThreadsPerBlock=ThreadsPerBlock; - threaddata[nth].BlocksPerGrid= 2*BlocksPerGrid; + threaddata[nth].BlocksPerGrid=BlocksPerGrid; threaddata[nth].card=nth; threaddata[nth].Nbase=dp->Nbase; threaddata[nth].tilesz=dp->tilesz;