original blocks=2*(..)/threads, reverting to blocks=(..)/threads

This commit is contained in:
Sarod Yatawatta 2018-02-06 21:22:34 +01:00
parent 3574960791
commit a96d481b8b
1 changed files with 3 additions and 3 deletions

View File

@ -102,7 +102,7 @@ pipeline_slave_code_b(void *data)
checkCudaError(err,__FILE__,__LINE__);
} else if (dp->status[tid]==PT_DO_CCOST) {
/* divide total baselines by 2 */
int BlocksPerGrid= 2*(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock;
int BlocksPerGrid=(dp->Nbase[tid]+dp->lmdata[tid]->ThreadsPerBlock-1)/dp->lmdata[tid]->ThreadsPerBlock;
int boff=dp->boff[tid];
/* copy the current solution to device */
err=cudaMemcpy(dp->cpp[tid], dp->lmdata[tid]->p, m*sizeof(double), cudaMemcpyHostToDevice);
@ -901,12 +901,12 @@ lbfgs_fit_common(
/* parameters per thread (GPU) */
int Nparm=(m+2-1)/2;
/* find number of blocks */
int BlocksPerGrid = 2* (Nparm+ThreadsPerBlock-1)/ThreadsPerBlock;
int BlocksPerGrid =(Nparm+ThreadsPerBlock-1)/ThreadsPerBlock;
ci=0;
int nth;
for (nth=0; nth<2; nth++) {
threaddata[nth].ThreadsPerBlock=ThreadsPerBlock;
threaddata[nth].BlocksPerGrid= 2*BlocksPerGrid;
threaddata[nth].BlocksPerGrid=BlocksPerGrid;
threaddata[nth].card=nth;
threaddata[nth].Nbase=dp->Nbase;
threaddata[nth].tilesz=dp->tilesz;