diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d2dcda --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +*/*.png +*/*.out +*/*.output +*/*.solutions +*/*.dot +test/sm.ms/ +test/nvprof-resultaten/ +src/MS/sagecal +test/analysis-*.txt +test/extended_*.* + +*/*/*.a +*/*/*.o +*/*/*.swp +*/*/*.swo +*/*/*.out +*/*/*.output + +*/*/*/*.o +*/*/*/*.a +*/*/*/*.swp +*/*/*/*.swo +*/*/*/*.out +*/*/*/*.output + diff --git a/src/MS/Makefile b/src/MS/Makefile index cd2fe2a..a84825a 100644 --- a/src/MS/Makefile +++ b/src/MS/Makefile @@ -1,9 +1,11 @@ OUTPUT= CXX=g++ -# CXXFLAGS=-O3 -Wall -g -pg -std=c++11 #-pg #-fnostack-protector -# Extra arguments for making callgraphs. -CXXFLAGS=-O3 -Wall -g -pg -std=c++11 -ansi -fPIC -fpermissive -fno-omit-frame-pointer -DNDEBUG -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls + +CXXFLAGS=-O3 -Wall -g -pg #-fnostack-protector +# CASA_LIBDIR=-L/cm/shared/package/casacore/v2.1.0-gcc-4.9.3/lib -L/cm/shared/package/cfitsio/3380-gcc-4.9.3/lib -L/cm/shared/package/lapack/3.6.0-gcc-4.9.3/lib64 CASA_LIBDIR=-L/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/lib -L/cm/shared/package/cfitsio/3380-gcc-4.9.3/lib -L/cm/shared/package/lapack/3.6.0-gcc-4.9.3/lib64 +# CASA_INCDIR=-I/cm/shared/package/casacore/v2.1.0-g++-4.9.3/include -I/cm/shared/package/casacore/v2.1.0-g++-4.9.3/include/casacore + CASA_INCDIR=-I/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/include -I/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/include/casacore CASA_LIBS=-lcasa_casa -lcasa_tables -lcasa_measures -lcasa_ms -lcfitsio #LAPACK=-llapack -lblas @@ -34,4 +36,5 @@ data.o:data.cpp data.h sagecal:$(OBJECTS) ../lib/Radio/libsagecal.a ../lib/Dirac/libdirac.a $(CXX) $(CXXFLAGS) $(LDFLAGS) $(INCLUDES) $(GLIBI) $(LIBPATH) -o $@ $(OBJECTS) $(MY_LIBS) $(LAPACK) $(CASA_LIBS) $(GLIBL) clean: - rm *.o *.tmp *.fits + rm *.o *.tmp *.fits *.swp *.swo *.o *.output + diff --git a/src/MS/Makefile.gpu b/src/MS/Makefile.gpu index aea5e14..e6ffe6e 100644 --- a/src/MS/Makefile.gpu +++ b/src/MS/Makefile.gpu @@ -1,11 +1,10 @@ OUTPUT= CXX=g++ -CXXFLAGS=-O3 -Wall -g -DHAVE_CUDA -# CXXFLAGS=-O3 -Wall -g -DHAVE_CUDA -DONE_GPU -CASA_LIBDIR=-L/cm/shared/package/casacore/v2.1.0-gcc-4.9.3/lib -L/cm/shared/package/cfitsio/3380-gcc-4.9.3/lib -L/cm/shared/package/lapack/3.6.0-gcc-4.9.3/lib64 -CASA_INCDIR=-I/cm/shared/package/casacore/v2.1.0-g++-4.9.3/include -I/cm/shared/package/casacore/v2.1.0-g++-4.9.3/include/casacore +CXXFLAGS=-O3 -Wall -g -DHAVE_CUDA -pg -std=c++11 +CASA_LIBDIR=-L/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/lib -L/cm/shared/package/cfitsio/3380-gcc-4.9.3/lib -L/cm/shared/package/lapack/3.6.0-gcc-4.9.3/lib64 +CASA_INCDIR=-I/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/include -I/cm/shared/package/casacore/v2.3.0-gcc-4.9.3/include/casacore CASA_LIBS=-lcasa_casa -lcasa_tables -lcasa_measures -lcasa_ms -lcfitsio -#LAPACK=-llapack -lblas +# LAPACK=-llapack -lblas LAPACK=-lopenblas -lgfortran -lpthread LAPACK_DIR=/cm/shared/apps/openblas/0.2.8/lib #LAPACK_DIR=/usr/lib/atlas/sse/ diff --git a/src/MS/lib b/src/MS/lib deleted file mode 120000 index dc598c5..0000000 --- a/src/MS/lib +++ /dev/null @@ -1 +0,0 @@ -../lib \ No newline at end of file diff --git a/src/MS/main.cpp b/src/MS/main.cpp index 680a50d..da0c711 100644 --- a/src/MS/main.cpp +++ b/src/MS/main.cpp @@ -237,11 +237,7 @@ main(int argc, char **argv) { Data::readMSlist(Data::MSlist,&msnames); } if (Data::TableName) { - if (!doBeam) { - Data::readAuxData(Data::TableName,&iodata); - } else { Data::readAuxData(Data::TableName,&iodata,&beam); - } cout<<"Only one MS"<more()) { start_time = time(0); if (iodata.Nms==1) { - if (!doBeam) { - Data::loadData(msitr[0]->table(),iodata,&iodata.fratio); - } else { Data::loadData(msitr[0]->table(),iodata,beam,&iodata.fratio); - } } else { Data::loadDataList(msitr,iodata,&iodata.fratio); } @@ -450,15 +443,9 @@ main(int argc, char **argv) { preset_flags_and_data(iodata.Nbase*iodata.tilesz,iodata.flag,barr,iodata.x,Data::Nt); /* if data is being whitened, whiten x here, no need for a copy because we use xo for residual calculation */ - if (Data::whiten) { - whiten_data(iodata.Nbase*iodata.tilesz,iodata.x,iodata.u,iodata.v,iodata.freq0,Data::Nt); - } /* precess source locations (also beam pointing) from J2000 to JAPP if we do any beam predictions, using first time slot as epoch */ - if (doBeam && !sources_precessed) { - precess_source_locations(beam.time_utc[iodata.tilesz/2],carr,M,&beam.p_ra0,&beam.p_dec0,Data::Nt); - sources_precessed=1; - } + // sources_precessed=1; @@ -468,6 +455,9 @@ main(int argc, char **argv) { #ifdef HAVE_CUDA precalculate_coherencies_withbeam_gpu(iodata.u,iodata.v,iodata.w,coh,iodata.N,iodata.Nbase*iodata.tilesz,barr,carr,M,iodata.freq0,iodata.deltaf,iodata.deltat,iodata.dec0,Data::min_uvcut,Data::max_uvcut, beam.p_ra0,beam.p_dec0,iodata.freq0,beam.sx,beam.sy,beam.time_utc,iodata.tilesz,beam.Nelem,beam.xx,beam.yy,beam.zz,doBeam,Data::Nt); +#endif +#ifndef HAVE_CUDA + precalculate_coherencies(iodata.u,iodata.v,iodata.w,coh,iodata.N,iodata.Nbase*iodata.tilesz,barr,carr,M,iodata.freq0,iodata.deltaf,iodata.deltat,iodata.dec0,Data::min_uvcut,Data::max_uvcut,Data::Nt); #endif /****************** end calibration **************************/ /****************** begin diagnostics ************************/ @@ -476,7 +466,17 @@ main(int argc, char **argv) { predict_visibilities_multifreq_withbeam_gpu(iodata.u,iodata.v,iodata.w,iodata.xo,iodata.N,iodata.Nbase,iodata.tilesz,barr,carr,M,iodata.freqs,iodata.Nchan,iodata.deltaf,iodata.deltat,iodata.dec0, beam.p_ra0,beam.p_dec0,iodata.freq0,beam.sx,beam.sy,beam.time_utc,beam.Nelem,beam.xx,beam.yy,beam.zz,doBeam,Data::Nt,(Data::DoSim>1?1:0)); #endif - } +#ifndef HAVE_CUDA + precalculate_coherencies_withbeam(iodata.u,iodata.v,iodata.w,coh,iodata.N,iodata.Nbase*iodata.tilesz,barr,carr,M,iodata.freq0,iodata.deltaf,iodata.deltat,iodata.dec0,Data::min_uvcut,Data::max_uvcut, + beam.p_ra0,beam.p_dec0,iodata.freq0,beam.sx,beam.sy,beam.time_utc,iodata.tilesz,beam.Nelem,beam.xx,beam.yy,beam.zz,Data::Nt); +#endif +} + +#ifdef HAVE_CUDA + cudaDeviceSynchronize(); + cudaProfilerStop(); + exit(0); +#endif tilex+=iodata.tilesz; /* print solutions to file */ @@ -496,10 +496,6 @@ main(int argc, char **argv) { for(int cm=0; cm>>(N,T,K,F,freqs,longitude,latitude,time_utc,Nelem,xx,yy,zz,ra,dec,ph_ra0,ph_dec0,ph_freq0,beam,buffer); @@ -786,6 +788,7 @@ cudakernel_coherencies(int B, int N, int T, int K, int F, float *u, float *v, fl /* spawn threads to handle baselines, these threads will spawn threads for sources */ int ThreadsPerBlock=DEFAULT_TH_PER_BK; + // int ThreadsPerBlock=16; /* note: make sure we do not exceed max no of blocks available, otherwise (too many baselines, loop over source id) */ int BlocksPerGrid= 2*(B+ThreadsPerBlock-1)/ThreadsPerBlock; diff --git a/src/lib/Radio/predict_withbeam_gpu.c b/src/lib/Radio/predict_withbeam_gpu.c index 9be9764..d69da91 100644 --- a/src/lib/Radio/predict_withbeam_gpu.c +++ b/src/lib/Radio/predict_withbeam_gpu.c @@ -122,7 +122,15 @@ precalcoh_threadfn(void *data) { float *ud,*vd,*wd,*cohd; baseline_t *barrd; float *freqsd; - float *longd,*latd; double *timed; + + float *longd,*latd; double *timed, *copyoftimed; + // This is needed to write times to file. + // Because of the convert_time applied to timed, we cannot use t->time_utc as input to fwrite. + if ((copyoftimed=(double*)calloc((size_t) t->tilesz, sizeof(double)))==0) { + fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__); + exit(1); + } + int *Nelemd; float **xx_p,**yy_p,**zz_p; float **xxd,**yyd,**zzd; @@ -151,6 +159,9 @@ precalcoh_threadfn(void *data) { /* convert time jd to GMST angle */ cudakernel_convert_time(t->tilesz,timed); + // Fill copyoftimed. + err=cudaMemcpy((double*)copyoftimed, timed, t->tilesz*sizeof(double), cudaMemcpyDeviceToHost); + err=cudaMemcpy(Nelemd, t->Nelem, t->N*sizeof(int), cudaMemcpyHostToDevice); checkCudaError(err,__FILE__,__LINE__); @@ -161,7 +172,6 @@ precalcoh_threadfn(void *data) { exit(1); } - /* jagged arrays for element locations */ err=cudaMalloc((void**)&xxd, t->N*sizeof(int*)); checkCudaError(err,__FILE__,__LINE__); @@ -204,8 +214,10 @@ precalcoh_threadfn(void *data) { err=cudaMemcpy(zzd, zz_p, t->N*sizeof(int*), cudaMemcpyHostToDevice); checkCudaError(err,__FILE__,__LINE__); - float *beamd; + /* temp host storage for beam */ + float *tempbeam; + float *lld,*mmd,*nnd,*sId,*rad,*decd; unsigned char *styped; float *sI0d,*f0d,*spec_idxd,*spec_idx1d,*spec_idx2d; @@ -216,7 +228,6 @@ precalcoh_threadfn(void *data) { err=cudaMalloc((void**)&beamd, t->N*t->tilesz*t->carr[ncl].N*t->Nf*sizeof(float)); checkCudaError(err,__FILE__,__LINE__); - /* copy cluster details to GPU */ err=cudaMalloc((void**)&styped, t->carr[ncl].N*sizeof(unsigned char)); checkCudaError(err,__FILE__,__LINE__); @@ -286,14 +297,119 @@ precalcoh_threadfn(void *data) { } + + if ((tempbeam=(float*)calloc((size_t) t->N*t->tilesz*t->carr[ncl].N*t->Nf,sizeof(float)))==0) { + fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__); + exit(1); + } + /* now copy pointer locations to device */ err=cudaMemcpy(dev_p, host_p, t->carr[ncl].N*sizeof(int*), cudaMemcpyHostToDevice); checkCudaError(err,__FILE__,__LINE__); + FILE *t_N; + t_N=fopen("t_N.bin","wb"); + fwrite(&t->N, sizeof(int), sizeof(t->N)/sizeof(int), t_N); + fclose(t_N); + + FILE *t_tilesz; + t_tilesz=fopen("t_tilesz.bin","wb"); + fwrite(&t->tilesz, sizeof(int), sizeof(t->tilesz)/sizeof(int), t_tilesz); + fclose(t_tilesz); + + FILE *t_carr_ncl_N; + t_carr_ncl_N=fopen("t_carr_ncl_N.bin","wb"); + fwrite(&t->carr[ncl].N, sizeof(int), sizeof(t->carr[ncl].N)/sizeof(int), t_carr_ncl_N); + fclose(t_carr_ncl_N); + + FILE *t_Nf; + t_Nf=fopen("t_Nf.bin","wb"); + fwrite(&t->Nf, sizeof(int), sizeof(t->Nf)/sizeof(int), t_Nf); + fclose(t_Nf); + + FILE *freq_sd; + freq_sd=fopen("freq_sd.bin","wb"); + fwrite(t->freqs, sizeof(double), t->Nf, freq_sd); + fclose(freq_sd); + + FILE *long_d; + long_d=fopen("long_d.bin","wb"); + fwrite(t->longitude, sizeof(double), t->N, long_d); + fclose(long_d); + + FILE *lat_d; + lat_d=fopen("lat_d.bin","wb"); + fwrite(t->latitude, sizeof(double), t->N, lat_d); + fclose(lat_d); + + FILE *time_d; + time_d=fopen("time_d.bin","wb"); + fwrite(©oftimed, sizeof(double), t->tilesz, time_d); + fclose(time_d); + + FILE *Nelem_d; + Nelem_d=fopen("Nelem_d.bin","wb"); + fwrite(t->Nelem, sizeof(int), t->N, Nelem_d); + fclose(Nelem_d); + + FILE *xx_d; + xx_d=fopen("xx_d.bin","wb"); + int j; + for (j=0; jN; j++){ + fwrite(t->xx[j], sizeof(double), t->Nelem[j], xx_d); + } + fclose(xx_d); + + FILE *yy_d; + yy_d=fopen("yy_d.bin","wb"); + for (j=0; jN; j++){ + fwrite(t->yy[j], sizeof(double), t->Nelem[j], yy_d); + } + fclose(yy_d); + + FILE *zz_d; + zz_d=fopen("zz_d.bin","wb"); + for (j=0; jN; j++){ + fwrite(t->zz[j], sizeof(double), t->Nelem[j], zz_d); + } + fclose(zz_d); + + FILE *ra_d; + ra_d=fopen("ra_d.bin","wb"); + fwrite(t->carr[ncl].ra, t->carr[ncl].N, sizeof(double), ra_d); + fclose(ra_d); + + FILE *dec_d; + dec_d=fopen("dec_d.bin","wb"); + fwrite(t->carr[ncl].dec, t->carr[ncl].N, sizeof(double), dec_d); + fclose(dec_d); + + FILE *t_ph_ra0; + t_ph_ra0=fopen("t_ph_ra0.bin","wb"); + fwrite((double *)&t->ph_ra0, 1, sizeof(double), t_ph_ra0); + fclose(t_ph_ra0); + + FILE *t_ph_dec0; + t_ph_dec0=fopen("t_ph_dec0.bin","wb"); + fwrite((double *)&t->ph_dec0, 1, sizeof(double), t_ph_dec0); + fclose(t_ph_dec0); + + FILE *t_ph_freq0; + t_ph_freq0=fopen("t_ph_freq0.bin","wb"); + fwrite((double *)&t->ph_freq0, 1, sizeof(double), t_ph_freq0); + fclose(t_ph_freq0); /* now calculate beam for all sources in this cluster */ cudakernel_array_beam(t->N,t->tilesz,t->carr[ncl].N,t->Nf,freqsd,longd,latd,timed,Nelemd,xxd,yyd,zzd,rad,decd,(float)t->ph_ra0,(float)t->ph_dec0,(float)t->ph_freq0,beamd); + FILE *beam_d; + beam_d=fopen("beam_d.bin","wb"); + err=cudaMemcpy((float*)tempbeam, beamd, t->N*t->tilesz*t->carr[ncl].N*t->Nf*sizeof(float), cudaMemcpyDeviceToHost); + + for (j=0; jN*t->tilesz*t->carr[ncl].N*t->Nf; j++){ + fwrite(&tempbeam[j], sizeof(float), 1, beam_d); + } + fclose(beam_d); /* calculate coherencies for all sources in this cluster, add them up */ cudakernel_coherencies(t->Nbase,t->N,t->tilesz,t->carr[ncl].N,t->Nf,ud,vd,wd,barrd,freqsd,beamd, @@ -387,6 +503,8 @@ precalcoh_threadfn(void *data) { /******************* end loop over clusters **************************/ free(tempcoh); + free(tempbeam); + free(copyoftimed); /* free memory */ err=cudaFree(ud); diff --git a/src/lib/Radio/reserve/radio-reserve.h b/src/lib/Radio/reserve/radio-reserve.h deleted file mode 100644 index 2ae4ce7..0000000 --- a/src/lib/Radio/reserve/radio-reserve.h +++ /dev/null @@ -1,470 +0,0 @@ -/* - * - Copyright (C) 2006-2008 Sarod Yatawatta - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - $Id$ - */ -#ifndef SAGECAL_H -#define SAGECAL_H -#ifdef __cplusplus - extern "C" { -#endif - -#include - -/* structures to store extra source info for extended sources */ -typedef struct exinfo_gaussian_ { - double eX,eY,eP; /* major,minor,PA */ - - double cxi,sxi,cphi,sphi; /* projection of [0,0,1] to [l,m,n] */ - int use_projection; -} exinfo_gaussian; - -typedef struct exinfo_disk_ { - double eX; /* diameter */ - - double cxi,sxi,cphi,sphi; /* projection of [0,0,1] to [l,m,n] */ - int use_projection; -} exinfo_disk; - -typedef struct exinfo_ring_ { - double eX; /* diameter */ - - double cxi,sxi,cphi,sphi; /* projection of [0,0,1] to [l,m,n] */ - int use_projection; -} exinfo_ring; - -typedef struct exinfo_shapelet_ { - int n0; /* model order, no of modes=n0*n0 */ - double beta; /* scale*/ - double *modes; /* array of n0*n0 x 1 values */ - double eX,eY,eP; /* linear transform parameters */ - - double cxi,sxi,cphi,sphi; /* projection of [0,0,1] to [l,m,n] */ - int use_projection; -} exinfo_shapelet; - - -/* when to project l,m coordinates */ -#ifndef PROJ_CUT -#define PROJ_CUT 0.998 -#endif - - -/* struct for a cluster GList item */ -typedef struct clust_t_{ - int id; /* cluster id */ - int nchunk; /* no of chunks the data is divided for solving */ - GList *slist; /* list of sources in this cluster (string)*/ -} clust_t; - -typedef struct clust_n_{ - char *name; /* source name (string)*/ -} clust_n; - -/* struct to store source info in hash table */ -typedef struct sinfo_t_ { - double ll,mm,ra,dec,sI[4]; /* sI:4x1 for I,Q,U,V, note sI is updated for central freq (ra,dec) for Az,El */ - unsigned char stype; /* source type */ - void *exdata; /* pointer to carry additional data, if needed */ - double sI0[4],f0,spec_idx,spec_idx1,spec_idx2; /* for multi channel data, original sI,Q,U,V, f0 and spectral index */ -} sinfo_t; - -/* struct for array of the sky model, with clusters */ -typedef struct clus_source_t_ { - int N; /* no of source in this cluster */ - int id; /* cluster id */ - double *ll,*mm,*nn,*sI,*sQ,*sU,*sV; /* arrays Nx1 of source info, note: sI is at reference freq of data */ - /* nn=sqrt(1-ll^2-mm^2)-1 */ - double *ra,*dec; /* arrays Nx1 for Az,El calculation */ - unsigned char *stype; /* source type array Nx1 */ - void **ex; /* array for extra source information Nx1 */ - - int nchunk; /* no of chunks the data is divided for solving */ - int *p; /* array nchunkx1 points to parameter array indices */ - - - double *sI0,*sQ0,*sU0,*sV0,*f0,*spec_idx,*spec_idx1,*spec_idx2; /* for multi channel data, original sI, f0 and spectral index */ -} clus_source_t; - -/* strutct to store baseline to station mapping */ -typedef struct baseline_t_ { - int sta1,sta2; - unsigned char flag; /* if this baseline is flagged, set to 1, otherwise 0: - special case: 2 if baseline is not used in solution, but will be - subtracted */ -} baseline_t; - - -/****************************** readsky.c ****************************/ -/* read sky/cluster files, - carr: return array size Mx1 of clusters - M : no of clusters - freq0: obs frequency Hz - ra0,dec0 : ra,dec of phase center (radians) - format: 0: LSM, 1: LSM with 3 order spec index - each element has source infor for that cluster */ -extern int -read_sky_cluster(const char *skymodel, const char *clusterfile, clus_source_t **carr, int *M, double freq0, double ra0, double dec0,int format); - -/* read solution file, only a set of solutions and load to p - sfp: solution file pointer - p: solutions vector Mt x 1 - carr: for getting correct offset in p - N : stations - M : clusters -*/ -extern int -read_solutions(FILE *sfp,double *p,clus_source_t *carr,int N,int M); - -/* set ignlist[ci]=1 if - cluster id 'cid' is mentioned in ignfile and carr[ci].id==cid -*/ -extern int -update_ignorelist(const char *ignfile, int *ignlist, int M, clus_source_t *carr); - -/* read ADMM regularization factor per cluster from text file, format: - cluster_id hybrid_parameter admm_rho - ... - ... - (M values) - and store it in array arho : size Mtx1, taking into account the hybrid parameter - also in array arhoslave : size Mx1, without taking hybrid params into account - - admm_rho : can be 0 to ignore consensus, just normal calibration -*/ - -extern int -read_arho_fromfile(const char *admm_rho_file,int Mt,double *arho, int M, double *arhoslave); - -/****************************** predict.c ****************************/ -/************* extended source contributions ************/ -extern complex double -shapelet_contrib(void*dd, double u, double v, double w); - -extern complex double -gaussian_contrib(void*dd, double u, double v, double w); - -extern complex double -ring_contrib(void*dd, double u, double v, double w); - -extern complex double -disk_contrib(void*dd, double u, double v, double w); - - -/* time smearing TMS eq. 6.80 for EW-array formula - note u,v,w: meter/c so multiply by freq. to get wavelength - ll,mm: source - dec0: phase center declination - tdelta: integration time */ -extern double -time_smear(double ll,double mm,double dec0,double tdelta,double u,double v,double w,double freq0); - -/* predict visibilities - u,v,w: u,v,w coordinates (wavelengths) size Nbase*tilesz x 1 - u,v,w are ordered with baselines, timeslots - x: data to write size Nbase*8*tileze x 1 - ordered by XX(re,im),XY(re,im),YX(re,im), YY(re,im), baseline, timeslots - N: no of stations - Nbase: no of baselines - tilesz: tile size - barr: baseline to station map, size Nbase*tilesz x 1 - carr: sky model/cluster info size Mx1 of clusters - M: no of clusters - freq0: frequency - fdelta: bandwidth for freq smearing - tdelta: integration time for time smearing - dec0: declination for time smearing - Nt: no of threads -*/ -extern int -predict_visibilities(double *u, double *v, double *w, double *x, int N, - int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, int Nt); - - -/* precalculate cluster coherencies - u,v,w: u,v,w coordinates (wavelengths) size Nbase*tilesz x 1 - u,v,w are ordered with baselines, timeslots - x: coherencies size Nbase*4*Mx 1 - ordered by XX(re,im),XY(re,im),YX(re,im), YY(re,im), baseline, timeslots - N: no of stations - Nbase: no of baselines (including more than one tile) - barr: baseline to station map, size Nbase*tilesz x 1 - carr: sky model/cluster info size Mx1 of clusters - M: no of clusters - freq0: frequency - fdelta: bandwidth for freq smearing - tdelta: integration time for time smearing - dec0: declination for time smearing - uvmin: baseline length sqrt(u^2+v^2) below which not to include in solution - uvmax: baseline length higher than this not included in solution - Nt: no of threads - - NOTE: prediction is done for all baselines, even flagged ones - and flags are set to 2 for baselines lower than uvcut -*/ -extern int -precalculate_coherencies(double *u, double *v, double *w, complex double *x, int N, - int Nbase, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax, int Nt); - - - -/* rearranges coherencies for GPU use later */ -/* barr: 2*Nbase x 1 - coh: M*Nbase*4 x 1 complex - ddcoh: M*Nbase*8 x 1 - ddbase: 2*Nbase x 1 (sta1,sta2) = -1 if flagged -*/ -extern int -rearrange_coherencies(int Nbase, baseline_t *barr, complex double *coh, double *ddcoh, short *ddbase, int M, int Nt); -/* ddbase: 3*Nbase x 1 (sta1,sta2,flag) */ -extern int -rearrange_coherencies2(int Nbase, baseline_t *barr, complex double *coh, double *ddcoh, short *ddbase, int M, int Nt); - -/* rearranges baselines for GPU use later */ -/* barr: 2*Nbase x 1 - ddbase: 2*Nbase x 1 -*/ -extern int -rearrange_baselines(int Nbase, baseline_t *barr, short *ddbase, int Nt); - -/* cont how many baselines contribute to each station */ -extern int -count_baselines(int Nbase, int N, float *iw, short *ddbase, int Nt); - -/* initialize array b (size Nx1) to given value a */ -#ifdef USE_MIC -__attribute__ ((target(MIC))) -#endif -extern void -setweights(int N, double *b, double a, int Nt); - -/* update baseline flags, also make data zero if flagged - this is needed for solving (calculate error) ignore flagged data */ -/* Nbase: total actual data points = Nbasextilesz - flag: flag array Nbasex1 - barr: baseline array Nbasex1 - x: data Nbase*8 x 1 ( 8 value per baseline ) - Nt: no of threads -*/ -extern int -preset_flags_and_data(int Nbase, double *flag, baseline_t *barr, double *x, int Nt); - -/* generte baselines -> sta1,sta2 pairs for later use */ -/* barr: Nbasextilesz - N : stations - Nt : threads -*/ -extern int -generate_baselines(int Nbase, int tilesz, int N, baseline_t *barr,int Nt); - -/* convert types */ -/* both arrays size nx1 - Nt: no of threads -*/ -extern int -double_to_float(float *farr, double *darr,int n, int Nt); -extern int -float_to_double(double *darr, float *farr,int n, int Nt); - -/* create a vector with 1's at flagged data points */ -/* - ddbase: 3*Nbase x 1 (sta1,sta2,flag) - x: 8*Nbase (set to 0's and 1's) -*/ -extern int -create_onezerovec(int Nbase, short *ddbase, float *x, int Nt); - -/* - find sum1=sum(|x|), and sum2=y^T |x| - x,y: nx1 arrays -*/ -extern int -find_sumproduct(int N, float *x, float *y, float *sum1, float *sum2, int Nt); - -/****************************** transforms.c ****************************/ -#ifndef ASEC2RAD -#define ASEC2RAD 4.848136811095359935899141e-6 -#endif - -/* - convert xyz ITRF 2000 coords (m) to - long,lat, (rad) height (m) - References: -*/ -extern int -xyz2llh(double *x, double *y, double *z, double *longitude, double *latitude, double *height, int N); - -/* convert ra,dec to az,el - ra,dec: radians - longitude,latitude: rad,rad - time_jd: JD days - - az,el: output rad,rad - -References: Darin C. Koblick MATLAB code, based on - % Fundamentals of Astrodynamics and Applications - % D. Vallado, Second Edition - % Example 3-5. Finding Local Siderial Time (pg. 192) - % Algorithm 28: AzElToRaDec (pg. 259) -*/ -extern int -radec2azel(double ra, double dec, double longitude, double latitude, double time_jd, double *az, double *el); - -/* convert time to Greenwitch Mean Sideral Angle (deg) - time_jd : JD days - thetaGMST : GMST angle (deg) -*/ -extern int -jd2gmst(double time_jd, double *thetaGMST); - - -/* convert ra,dec to az,el - ra,dec: radians - longitude,latitude: rad,rad - thetaGMST : GMST angle (deg) - - az,el: output rad,rad - -*/ -extern int -radec2azel_gmst(double ra, double dec, double longitude, double latitude, double thetaGMST, double *az, double *el); - - - -/* given the epoch jd_tdb2, - calculate rotation matrix params needed to precess from J2000 - NOVAS (Naval Observatory Vector Astronomy Software) - PURPOSE: - Precesses equatorial rectangular coordinates from one epoch to - another. One of the two epochs must be J2000.0. The coordinates - are referred to the mean dynamical equator and equinox of the two - respective epochs. - - REFERENCES: - Explanatory Supplement To The Astronomical Almanac, pp. 103-104. - Capitaine, N. et al. (2003), Astronomy And Astrophysics 412, - pp. 567-586. - Hilton, J. L. et al. (2006), IAU WG report, Celest. Mech., 94, - pp. 351-367. - -*/ -extern int -get_precession_params(double jd_tdb2, double Tr[9]); -/* precess ra0,dec0 at J2000 - to ra,dec at epoch given by transform Tr - using NOVAS library */ -extern int -precession(double ra0, double dec0, double Tr[9], double *ra, double *dec); - -/****************************** stationbeam.c ****************************/ -/* - ra,dec: source direction (rad) - ra0,dec0: beam center (rad) - f: frequency (Hz) - f0: beam forming frequency (Hz) - - longitude,latitude : Nx1 array of station positions (rad,rad) - time_jd: JD (day) time - Nelem : Nx1 array, no. of elements used in each station - x,y,z: Nx1 pointer arrays to station positions, each station has Nelem[]x1 arrays - - beamgain: Nx1 array of station beam gain along the source direction -*/ -extern int -arraybeam(double ra, double dec, double ra0, double dec0, double f, double f0, int N, double *longitude, double *latitude, double time_jd, int *Nelem, double **x, double **y, double **z, double *beamgain); - - -/****************************** predict_withbeam.c ****************************/ -/* precalculate cluster coherencies - u,v,w: u,v,w coordinates (wavelengths) size Nbase*tilesz x 1 - u,v,w are ordered with baselines, timeslots - x: coherencies size Nbase*4*Mx 1 - ordered by XX(re,im),XY(re,im),YX(re,im), YY(re,im), baseline, timeslots - N: no of stations - Nbase: total no of baselines (including more than one tile or timeslot) - barr: baseline to station map, size Nbase*tilesz x 1 - carr: sky model/cluster info size Mx1 of clusters - M: no of clusters - freq0: frequency - fdelta: bandwidth for freq smearing - tdelta: integration time for time smearing - dec0: declination for time smearing - uvmin: baseline length sqrt(u^2+v^2) below which not to include in solution - uvmax: baseline length higher than this not included in solution - - Station beam specific parameters - ph_ra0,ph_dec0: beam pointing rad,rad - ph_freq0: beam reference freq - longitude,latitude: Nx1 arrays (rad,rad) station locations - time_utc: JD (day) : tilesz x 1 - tilesz: how many tiles: == unique time_utc - Nelem: Nx1 array, size of stations (elements) - xx,yy,zz: Nx1 arrays of station element locations arrays xx[],yy[],zz[] - Nt: no of threads - - NOTE: prediction is done for all baselines, even flagged ones - and flags are set to 2 for baselines lower than uvcut -*/ - -extern int -precalculate_coherencies_withbeam(double *u, double *v, double *w, complex double *x, int N, - int Nbase, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax, - double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc, int tileze, int *Nelem, double **xx, double **yy, double **zz, int Nt); - - -extern int -predict_visibilities_multifreq_withbeam(double *u,double *v,double *w,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta, double dec0, - double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int Nt, int add_to_data); - -extern int -calculate_residuals_multifreq_withbeam(double *u,double *v,double *w,double *p,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta,double dec0, -double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int Nt, int ccid, double rho, int phase_only); - - -/* change epoch of soure ra,dec from J2000 to JAPP */ -/* also the beam pointing ra_beam,dec_beam */ -extern int -precess_source_locations(double jd_tdb, clus_source_t *carr, int M, double *ra_beam, double *dec_beam, int Nt); - -/****************************** predict_withbeam_gpu.c ****************************/ -/* if dobeam==0, beam calculation is off */ -extern int -precalculate_coherencies_withbeam_gpu(double *u, double *v, double *w, complex double *x, int N, - int Nbase, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax, - double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc, int tileze, int *Nelem, double **xx, double **yy, double **zz, int dobeam, int Nt); - -extern int -predict_visibilities_multifreq_withbeam_gpu(double *u,double *v,double *w,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta, double dec0, - double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int dobeam, int Nt, int add_to_data); - - - -/****************************** predict_model.cu ****************************/ -extern void -cudakernel_array_beam(int N, int T, int K, int F, float *freqs, float *longitude, float *latitude, - double *time_utc, int *Nelem, float **xx, float **yy, float **zz, float *ra, float *dec, float ph_ra0, float ph_dec0, float ph_freq0, float *beam); - - -extern void -cudakernel_coherencies(int B, int N, int T, int K, int F, float *u, float *v, float *w,baseline_t *barr, float *freqs, float *beam, float *ll, float *mm, float *nn, float *sI, - unsigned char *stype, float *sI0, float *f0, float *spec_idx, float *spec_idx1, float *spec_idx2, int **exs, float deltaf, float deltat, float dec0, float *coh,int dobeam); - - -extern void -cudakernel_convert_time(int T, double *time_utc); -#ifdef __cplusplus - } /* extern "C" */ -#endif diff --git a/test/Generate_sources.py b/test/Generate_sources.py index 6d45968..742aee2 100644 --- a/test/Generate_sources.py +++ b/test/Generate_sources.py @@ -11,7 +11,7 @@ import warnings number_of_parameters = 19 number_of_digits_for_sources = 5 -number_of_sources = 5e3 +number_of_sources = 10000 try: assert number_of_sources < 10**number_of_digits_for_sources except AssertionError: @@ -19,9 +19,39 @@ except AssertionError: print("Sorry, number of sources too large, reset to {0}".format(number_of_sources)) -# For the declinations I have chosen sources between 45 and 90 degrees declination. -decl_low = 45 -decl_high = 90 +# Best to center sources around 3C196 if using the sm.ms measurement set with a +# four degree tolerance +tol_seconds_of_decl = 3600 * 10 +tol_seconds_of_RA = tol_seconds_of_decl * 24/360 + +RA_hours_3C196 = 8 +RA_minutes_3C196 = 13 +RA_seconds_3C196 = 35.981540 + +RA_seconds_3C196 = (RA_hours_3C196 * 60 + RA_minutes_3C196) *60 + RA_seconds_3C196 + +RA_seconds_low = RA_seconds_3C196 - tol_seconds_of_RA +RA_minutes_low, RA_seconds_low = divmod(RA_seconds_low, 60) +RA_hours_low, RA_minutes_low = divmod(RA_minutes_low, 60) + +RA_seconds_high = RA_seconds_3C196 + tol_seconds_of_RA +RA_minutes_high, RA_seconds_high = divmod(RA_seconds_high, 60) +RA_hours_high, RA_minutes_high = divmod(RA_minutes_high, 60) + +decl_degrees_3C196 = 48 +decl_minutes_3C196 = 12 +decl_seconds_3C196 = 59.174770 + +decl_seconds_3C196 = (decl_degrees_3C196 * 60 + decl_minutes_3C196) *60 + decl_seconds_3C196 + +decl_seconds_low = decl_seconds_3C196 - tol_seconds_of_decl +decl_minutes_low, decl_seconds_low = divmod(decl_seconds_low, 60) +decl_degrees_low, decl_minutes_low = divmod(decl_minutes_low, 60) + +decl_seconds_high = decl_seconds_3C196 + tol_seconds_of_decl +decl_minutes_high, decl_seconds_high = divmod(decl_seconds_high, 60) +decl_degrees_high, decl_minutes_high = divmod(decl_minutes_high, 60) + I_low = 10 I_high = 100 @@ -48,13 +78,17 @@ with warnings.catch_warnings(): sources_parameters.name[source_name] = 'P' + str(source_name).zfill(number_of_digits_for_sources) # Right ascension can have all values. - sources_parameters.rah = np.random.randint(0, 24, size=number_of_sources) - sources_parameters.ram = np.random.randint(0, 59, size=number_of_sources) - sources_parameters.ras = 60 * np.random.rand(number_of_sources) + # sources_parameters.rah = np.random.randint(ra_low, ra_high, size=number_of_sources) + # sources_parameters.ram = np.random.randint(0, 59, size=number_of_sources) + # sources_parameters.ras = 60 * np.random.rand(number_of_sources) - sources_parameters.dad = np.random.randint(decl_low, decl_high, size=number_of_sources) - sources_parameters.dam = np.random.randint(0, 59, size=number_of_sources) - sources_parameters.das = 60 * np.random.rand(number_of_sources) + sources_parameters.rah = (RA_hours_high - RA_hours_low) * np.random.random_sample(number_of_sources) + RA_hours_low + sources_parameters.ram = (RA_minutes_high - RA_minutes_low) * np.random.random_sample(number_of_sources) + RA_minutes_low + sources_parameters.ras = (RA_seconds_high - RA_seconds_low) * np.random.random_sample(number_of_sources) + RA_seconds_low + + sources_parameters.dad = (decl_degrees_high - decl_degrees_low) * np.random.random_sample(number_of_sources) + decl_degrees_low + sources_parameters.dam = (decl_minutes_high - decl_minutes_low) * np.random.random_sample(number_of_sources) + decl_minutes_low + sources_parameters.das = (decl_seconds_high - decl_seconds_low) * np.random.random_sample(number_of_sources) + decl_seconds_low sources_parameters.I = (I_high - I_low) * np.random.rand(number_of_sources) + I_low sources_parameters.Q = 0 @@ -67,25 +101,40 @@ with warnings.catch_warnings(): sources_parameters.sp2 = 0 sources_parameters.RM = 0 - sources_parameters.extX = 0 - sources_parameters.extY = 0 - sources_parameters.pos_angle = 0. + + # Let's make the first half of the sources extended. + first_half = int(number_of_sources/2) + # Make major axes with a maximum of 0.1 degrees. + major_axes_in_degrees = 0.1 * np.random.rand(first_half) + semi_major_axes_in_radians = major_axes_in_degrees * np.pi/360. + # For convenience, I'll just construct semi-minor axes half the size of the semi-major axes. + semi_minor_axes_in_radians = semi_major_axes_in_radians/2. + sources_parameters.extX[0: first_half] = semi_major_axes_in_radians + sources_parameters.extX[first_half: number_of_sources] = 0 + sources_parameters.extY[0: first_half] = semi_minor_axes_in_radians + sources_parameters.extY[first_half: number_of_sources] = 0 + sources_parameters.pos_angle[0: first_half] = 360. * np.random.rand(first_half) + sources_parameters.pos_angle[first_half: number_of_sources] = 0 + sources_parameters.freq0 = 143000000.0 -print([elem for elem in sources_parameters[100]]) +# print([elem for elem in sources_parameters[100]]) -sources_parameters.tofile("extended_source_list_using_tofile.txt", sep='\n') +# sources_parameters.tofile("extended_source_list_using_tofile.txt", sep='\n') -with open("extended_source_list.txt", 'wb') as f: +with open("extended_source_list_centered_on_3C196.txt", 'wb') as f: f.write(b"## From Generate_sources.py by Hanno Spreeuw.\n") f.write(b"## Generates point sources at random positions with random brighnesses within some range.\n") f.write(b"## this is an LSM text (hms/dms) file\n") f.write(b"## fields are (where h:m:s is RA, d:m:s is Dec):\n") f.write(b"## name h m s d m s I Q U V spectral_index0 spectral_index1 spectral_index2 " + b"RM extent_X(rad) extent_Y(rad) pos_angle(rad) freq0\n") + f.write(b"\n") np.savetxt(f, sources_parameters, fmt=formats_reformatted) # Now write the cluster file -with open("extended_source_list.cluster", 'wb') as f: - np.savetxt(f, (sources_parameters.name).reshape(1, sources_parameters.name.shape[0]), fmt='%s', delimiter=' ') +# First add '1' and '1' to indicate the cluster id and chunk size. +cluster_array = np.concatenate((np.array(['1', '1']), sources_parameters.name)) +with open("extended_source_list_centered_on_3C196.txt.cluster", 'wb') as f: + np.savetxt(f, (cluster_array).reshape(1, cluster_array.shape[0]), fmt='%s', delimiter=' ') diff --git a/test/dosage.sh b/test/dosage.sh index ecaddfe..365aac5 100755 --- a/test/dosage.sh +++ b/test/dosage.sh @@ -1,2 +1,2 @@ # Before running this, untar sm.ms.tar and build sagecal -../src/MS/sagecal -d sm.ms -s 3c196.sky.txt -c 3c196.sky.txt.cluster -n 4 -t 10 -p sm.ms.solutions -e 4 -g 2 -l 10 -m 7 -x 30 -F 1 -j 5 -k -1 -B 1 -W 0 > sm.ms.output +../src/MS/sagecal -d sm.ms -s extended_source_list.txt -c extended_source_list.txt.cluster -n 4 -t 10 -p sm.ms.solutions -e 4 -g 2 -l 10 -m 7 -x 30 -F 1 -j 2 -k -1 -B 1 -W 0 > sm.ms.output