fixed compile issues for both CPU/GPU versions

This commit is contained in:
Sarod Yatawatta 2018-02-07 13:34:18 +01:00
parent a7464acc2d
commit d6ee3966b9
7 changed files with 37 additions and 55 deletions

View File

@ -404,6 +404,35 @@ typedef struct thread_data_findsumprod_ {
float sum2; /* y^T |x| */
} thread_data_findsumprod_t;
/******* some common routines that need to be moved - FIXME **********/
/****************************** readsky.c ****************************/
/* rearranges coherencies for GPU use later */
/* barr: 2*Nbase x 1
* coh: M*Nbase*4 x 1 complex
* ddcoh: M*Nbase*8 x 1
* ddbase: 2*Nbase x 1 (sta1,sta2) = -1 if flagged
* */
extern int
rearrange_coherencies(int Nbase, baseline_t *barr, complex double *coh, double *ddcoh, short *ddbase, int M, int Nt);
/* rearranges baselines for GPU use later */
/* barr: 2*Nbase x 1
* ddbase: 2*Nbase x 1
* */
extern int
rearrange_baselines(int Nbase, baseline_t *barr, short *ddbase, int Nt);
/* cont how many baselines contribute to each station */
extern int
count_baselines(int Nbase, int N, float *iw, short *ddbase, int Nt);
/* initialize array b (size Nx1) to given value a */
#ifdef USE_MIC
__attribute__ ((target(MIC)))
#endif
extern void
setweights(int N, double *b, double a, int Nt);
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -1935,32 +1935,6 @@ double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latit
extern int
precess_source_locations(double jd_tdb, clus_source_t *carr, int M, double *ra_beam, double *dec_beam, int Nt);
/****************************** predict_withbeam_gpu.c ****************************/
/* if dobeam==0, beam calculation is off */
extern int
precalculate_coherencies_withbeam_gpu(double *u, double *v, double *w, complex double *x, int N,
int Nbase, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax,
double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc, int tileze, int *Nelem, double **xx, double **yy, double **zz, int dobeam, int Nt);
extern int
predict_visibilities_multifreq_withbeam_gpu(double *u,double *v,double *w,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta, double dec0,
double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int dobeam, int Nt, int add_to_data);
/****************************** predict_model.cu ****************************/
extern void
cudakernel_array_beam(int N, int T, int K, int F, float *freqs, float *longitude, float *latitude,
double *time_utc, int *Nelem, float **xx, float **yy, float **zz, float *ra, float *dec, float ph_ra0, float ph_dec0, float ph_freq0, float *beam);
extern void
cudakernel_coherencies(int B, int N, int T, int K, int F, float *u, float *v, float *w,baseline_t *barr, float *freqs, float *beam, float *ll, float *mm, float *nn, float *sI,
unsigned char *stype, float *sI0, float *f0, float *spec_idx, float *spec_idx1, float *spec_idx2, int **exs, float deltaf, float deltat, float dec0, float *coh,int dobeam);
extern void
cudakernel_convert_time(int T, double *time_utc);
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -23,7 +23,7 @@ INCLUDES= -I. -I$(CUDAINC) -I$(NVML_INC)
LIBPATH= $(CUDALIB)
OBJECTS=lmfit.o lbfgs.o myblas.o mderiv.o clmfit.o clmfit_nocuda.o residual.o barrier.o robust.o robustlm.o oslmfit.o mderiv_fl.o clmfit_fl.o updatenu.o robust_lbfgs_nocuda.o robust_fl.o manifold_fl.o rtr_solve_cuda.o rtr_solve_robust_cuda.o manifold_average.o consensus_poly.o rtr_solve_robust_cuda_admm.o rtr_solve_robust_admm.o admm_solve.o load_balance.o
OBJECTS=lmfit.o lbfgs.o myblas.o mderiv.o clmfit.o clmfit_nocuda.o barrier.o robust.o robustlm.o oslmfit.o mderiv_fl.o clmfit_fl.o updatenu.o robust_lbfgs_nocuda.o robust_fl.o manifold_fl.o rtr_solve_cuda.o rtr_solve_robust_cuda.o manifold_average.o consensus_poly.o rtr_solve_robust_cuda_admm.o rtr_solve_robust_admm.o admm_solve.o load_balance.o
default:libdirac.a
@ -39,8 +39,6 @@ clmfit.o:clmfit.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
clmfit_nocuda.o:clmfit_nocuda.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
residual.o:residual.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
barrier.o:barrier.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
robustlm.o:robustlm.c

View File

@ -26,7 +26,7 @@ INCLUDES= -I. -I$(CUDAINC) -I$(NVML_INC) -I../Dirac
LIBPATH= $(CUDALIB)
OBJECTS=readsky.o predict.o transforms.o stationbeam.o predict_withbeam.o predict_withbeam_gpu.o predict_model.o predict_model_device.o
OBJECTS=readsky.o predict.o residual.o transforms.o stationbeam.o predict_withbeam.o predict_withbeam_gpu.o predict_model.o
default:libradio.a
@ -34,6 +34,8 @@ readsky.o:readsky.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
predict.o:predict.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
residual.o:residual.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
transforms.o:transforms.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
stationbeam.o:stationbeam.c
@ -42,11 +44,8 @@ predict_withbeam.o:predict_withbeam.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
predict_withbeam_gpu.o:predict_withbeam_gpu.c
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
## for dynamic parallelism, two stage compilation
predict_model.o:predict_model_device.o
$(NVCC) $(NVCFLAGS) -lineinfo -dlink $(INCLUDES) $(GLIBI) -o $@ $<
predict_model_device.o:predict_model.cu
$(NVCC) $(NVCFLAGS) -lineinfo -rdc=true $(INCLUDES) $(GLIBI) -o $@ -c $<
predict_model.o:predict_model.cu
$(NVCC) $(NVCFLAGS) $(INCLUDES) $(GLIBI) -o $@ -c $<
RANLIB=ranlib

View File

@ -212,30 +212,10 @@ precalculate_coherencies(double *u, double *v, double *w, complex double *x, int
ddcoh: M*Nbase*8 x 1
ddbase: 2*Nbase x 1 (sta1,sta2) = -1 if flagged
*/
extern int
rearrange_coherencies(int Nbase, baseline_t *barr, complex double *coh, double *ddcoh, short *ddbase, int M, int Nt);
/* ddbase: 3*Nbase x 1 (sta1,sta2,flag) */
extern int
rearrange_coherencies2(int Nbase, baseline_t *barr, complex double *coh, double *ddcoh, short *ddbase, int M, int Nt);
/* rearranges baselines for GPU use later */
/* barr: 2*Nbase x 1
ddbase: 2*Nbase x 1
*/
extern int
rearrange_baselines(int Nbase, baseline_t *barr, short *ddbase, int Nt);
/* cont how many baselines contribute to each station */
extern int
count_baselines(int Nbase, int N, float *iw, short *ddbase, int Nt);
/* initialize array b (size Nx1) to given value a */
#ifdef USE_MIC
__attribute__ ((target(MIC)))
#endif
extern void
setweights(int N, double *b, double a, int Nt);
/* update baseline flags, also make data zero if flagged
this is needed for solving (calculate error) ignore flagged data */
/* Nbase: total actual data points = Nbasextilesz

View File

@ -24,6 +24,7 @@
#include <stdlib.h>
#include <pthread.h>
#include "Radio.h"
#include "Dirac.h"
/******************** shapalet stuff **********************/
/* evaluate Hermite polynomial value using recursion

View File

@ -19,6 +19,7 @@
#include "Dirac.h"
#include "Radio.h"
//#define CUDA_DEBUG