cleaning up LBFGS related code, removed duplicates
This commit is contained in:
parent
c366e18977
commit
fa1448be01
|
@ -175,7 +175,7 @@ which can be used to count the row number. It will keep repeating this, for each
|
||||||
The rows 0 to 7 belong to the solutions for the 1st station. The rows 8 to 15 for the 2nd station and so on.
|
The rows 0 to 7 belong to the solutions for the 1st station. The rows 8 to 15 for the 2nd station and so on.
|
||||||
Each 8 rows of any given column represent the 8 values of a 2x2 Jones matrix. Lets say these are ```S0,S1,S2,S3,S4,S5,S6``` and ```S7```. Then the Jones matrix is ```[S0+j*S1, S4+j*S5; S2+j*S3, S6+j*S7]``` (the ';' denotes the 1st row of the 2x2 matrix).
|
Each 8 rows of any given column represent the 8 values of a 2x2 Jones matrix. Lets say these are ```S0,S1,S2,S3,S4,S5,S6``` and ```S7```. Then the Jones matrix is ```[S0+j*S1, S4+j*S5; S2+j*S3, S6+j*S7]``` (the ';' denotes the 1st row of the 2x2 matrix).
|
||||||
|
|
||||||
When a luster has a chunk size > 1, there will be more than 1 solution per given time interval.
|
When a cluster has a chunk size > 1, there will be more than 1 solution per given time interval.
|
||||||
So for this cluster, there will be more than 1 column in the solution file, the exact number of columns being equal to the chunk size.
|
So for this cluster, there will be more than 1 column in the solution file, the exact number of columns being equal to the chunk size.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,6 @@ set (objects
|
||||||
manifold_average
|
manifold_average
|
||||||
mdl
|
mdl
|
||||||
myblas
|
myblas
|
||||||
robust_lbfgs_nocuda
|
|
||||||
robustlm
|
robustlm
|
||||||
rtr_solve_robust_admm
|
rtr_solve_robust_admm
|
||||||
updatenu
|
updatenu
|
||||||
|
@ -50,6 +49,7 @@ else()
|
||||||
lmfit_nocuda
|
lmfit_nocuda
|
||||||
consensus_poly
|
consensus_poly
|
||||||
lbfgs_nocuda
|
lbfgs_nocuda
|
||||||
|
robust_lbfgs_nocuda
|
||||||
rtr_solve
|
rtr_solve
|
||||||
rtr_solve_robust
|
rtr_solve_robust
|
||||||
)
|
)
|
||||||
|
|
|
@ -139,8 +139,27 @@ find_sumproduct(int N, float *x, float *y, float *sum1, float *sum2, int Nt);
|
||||||
/****************************** lbfgs.c ****************************/
|
/****************************** lbfgs.c ****************************/
|
||||||
/****************************** lbfgs_nocuda.c ****************************/
|
/****************************** lbfgs_nocuda.c ****************************/
|
||||||
/* LBFGS routines */
|
/* LBFGS routines */
|
||||||
/* func: vector function to minimize, actual cost minimized is ||func-x||^2
|
|
||||||
NOTE: gradient function given seperately
|
#ifndef HAVE_CUDA
|
||||||
|
/* line search */
|
||||||
|
/* func: scalar function
|
||||||
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
|
alpha1: initial value for step
|
||||||
|
sigma,rho,t1,t2,t3: line search parameters (from Fletcher)
|
||||||
|
m: size or parameter vector
|
||||||
|
step: step size for differencing
|
||||||
|
adata: additional data passed to the function
|
||||||
|
*/
|
||||||
|
extern double
|
||||||
|
linesearch(
|
||||||
|
double (*func)(double *p, int m, void *adata),
|
||||||
|
double *xk, double *pk, double alpha1, double sigma, double rho, double t1, double t2, double t3, int m, double step, void *adata);
|
||||||
|
|
||||||
|
/* cost function : return a scalar cost, input : p (mx1) parameters, m: no. of params, adata: additional data
|
||||||
|
grad function: return gradient (mx1): input : p (mx1) parameters, g (mx1) gradient vector, m: no. of params, adata: additional data
|
||||||
|
*/
|
||||||
|
/*
|
||||||
p: parameters m x 1 (used as initial value, output final value)
|
p: parameters m x 1 (used as initial value, output final value)
|
||||||
x: data n x 1
|
x: data n x 1
|
||||||
itmax: max iterations
|
itmax: max iterations
|
||||||
|
@ -153,8 +172,20 @@ __attribute__ ((target(MIC)))
|
||||||
#endif
|
#endif
|
||||||
extern int
|
extern int
|
||||||
lbfgs_fit(
|
lbfgs_fit(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double (*cost_func)(double *p, int m, void *adata),
|
||||||
|
void (*grad_func)(double *p, double *g, int m, void *adata),
|
||||||
|
void (*change_batch)(int iter, void *adata),
|
||||||
|
double *p, int m, int itmax, int M, void *adata);
|
||||||
|
#endif /* !HAVE_CUDA */
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
extern int
|
||||||
|
lbfgs_fit(
|
||||||
|
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata);
|
||||||
|
extern int
|
||||||
|
lbfgs_fit_robust_cuda(
|
||||||
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads, void *adata);
|
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads, void *adata);
|
||||||
|
#endif /* HAVE_CUDA */
|
||||||
|
|
||||||
/****************************** robust_lbfgs_nocuda.c ****************************/
|
/****************************** robust_lbfgs_nocuda.c ****************************/
|
||||||
typedef struct thread_data_logf_t_ {
|
typedef struct thread_data_logf_t_ {
|
||||||
|
@ -164,22 +195,21 @@ typedef struct thread_data_logf_t_ {
|
||||||
int start,end;
|
int start,end;
|
||||||
double sum;
|
double sum;
|
||||||
} thread_data_logf_t;
|
} thread_data_logf_t;
|
||||||
|
|
||||||
/* robust_nu: nu in T distribution */
|
/* robust_nu: nu in T distribution */
|
||||||
|
|
||||||
#ifdef USE_MIC
|
#ifdef USE_MIC
|
||||||
__attribute__ ((target(MIC)))
|
__attribute__ ((target(MIC)))
|
||||||
#endif
|
#endif
|
||||||
extern int
|
extern int
|
||||||
lbfgs_fit_robust(
|
lbfgs_fit_robust_wrapper(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads,
|
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads,
|
||||||
void *adata);
|
void *adata);
|
||||||
#ifdef HAVE_CUDA
|
|
||||||
extern int
|
extern int
|
||||||
lbfgs_fit_robust_cuda(
|
lbfgs_fit_wrapper(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads,
|
||||||
double *p, double *x, int m, int n, int itmax, int lbfgs_m, int gpu_threads, void *adata);
|
void *adata);
|
||||||
#endif
|
|
||||||
|
|
||||||
/****************************** mderiv.cu ****************************/
|
/****************************** mderiv.cu ****************************/
|
||||||
/* cuda driver for kernel */
|
/* cuda driver for kernel */
|
||||||
|
@ -1300,6 +1330,15 @@ sagefit_visibilities_admm_dual_pt_flt(double *u, double *v, double *w, double *x
|
||||||
extern void
|
extern void
|
||||||
openblas_set_num_threads(int num_threads);
|
openblas_set_num_threads(int num_threads);
|
||||||
|
|
||||||
|
/****************************** lmfit.c ****************************/
|
||||||
|
/****************************** lmfit_nocuda.c ****************************/
|
||||||
|
/* minimization (or vector cost) function (multithreaded) */
|
||||||
|
/* p: size mx1 parameters
|
||||||
|
x: size nx1 model being calculated
|
||||||
|
data: extra info needed */
|
||||||
|
extern void
|
||||||
|
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data);
|
||||||
|
|
||||||
/********* solver modes *********/
|
/********* solver modes *********/
|
||||||
#define SM_LM_LBFGS 1
|
#define SM_LM_LBFGS 1
|
||||||
#define SM_OSLM_LBFGS 0
|
#define SM_OSLM_LBFGS 0
|
||||||
|
@ -1381,15 +1420,13 @@ bfgsfit_visibilities(double *u, double *v, double *w, double *x, int N,
|
||||||
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_lbfgs, int lbfgs_m, int gpu_threads, int solver_mode, double mean_nu, double *res_0, double *res_1);
|
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_lbfgs, int lbfgs_m, int gpu_threads, int solver_mode, double mean_nu, double *res_0, double *res_1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
extern int
|
extern int
|
||||||
bfgsfit_visibilities_gpu(double *u, double *v, double *w, double *x, int N,
|
bfgsfit_visibilities_gpu(double *u, double *v, double *w, double *x, int N,
|
||||||
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_lbfgs, int lbfgs_m, int gpu_threads, int solver_mode, double mean_nu, double *res_0, double *res_1);
|
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_lbfgs, int lbfgs_m, int gpu_threads, int solver_mode, double mean_nu, double *res_0, double *res_1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CUDA
|
|
||||||
/* data struct shared by all threads */
|
/* data struct shared by all threads */
|
||||||
typedef struct gb_data_ {
|
typedef struct gb_data_ {
|
||||||
int status[2]; /* 0: do nothing,
|
int status[2]; /* 0: do nothing,
|
||||||
|
@ -1492,75 +1529,6 @@ extern int
|
||||||
sagefit_visibilities_dual_pt_flt(double *u, double *v, double *w, double *x, int N,
|
sagefit_visibilities_dual_pt_flt(double *u, double *v, double *w, double *x, int N,
|
||||||
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_emiter, int max_iter, int max_lbfgs, int lbfgs_m, int gpu_threads, int linsolv,int solver_mode, double nulow, double nuhigh, int randomize, double *mean_nu, double *res_0, double *res_1);
|
int Nbase, int tilesz, baseline_t *barr, clus_source_t *carr, complex double *coh, int M, int Mt, double freq0, double fdelta, double *pp, double uvmin, int Nt, int max_emiter, int max_iter, int max_lbfgs, int lbfgs_m, int gpu_threads, int linsolv,int solver_mode, double nulow, double nuhigh, int randomize, double *mean_nu, double *res_0, double *res_1);
|
||||||
|
|
||||||
/****************************** stationbeam.c ****************************/
|
|
||||||
/*
|
|
||||||
ra,dec: source direction (rad)
|
|
||||||
ra0,dec0: beam center (rad)
|
|
||||||
f: frequency (Hz)
|
|
||||||
f0: beam forming frequency (Hz)
|
|
||||||
|
|
||||||
longitude,latitude : Nx1 array of station positions (rad,rad)
|
|
||||||
time_jd: JD (day) time
|
|
||||||
Nelem : Nx1 array, no. of elements used in each station
|
|
||||||
x,y,z: Nx1 pointer arrays to station positions, each station has Nelem[]x1 arrays
|
|
||||||
|
|
||||||
beamgain: Nx1 array of station beam gain along the source direction
|
|
||||||
*/
|
|
||||||
extern int
|
|
||||||
arraybeam(double ra, double dec, double ra0, double dec0, double f, double f0, int N, double *longitude, double *latitude, double time_jd, int *Nelem, double **x, double **y, double **z, double *beamgain);
|
|
||||||
|
|
||||||
|
|
||||||
/****************************** predict_withbeam.c ****************************/
|
|
||||||
/* precalculate cluster coherencies
|
|
||||||
u,v,w: u,v,w coordinates (wavelengths) size Nbase*tilesz x 1
|
|
||||||
u,v,w are ordered with baselines, timeslots
|
|
||||||
x: coherencies size Nbase*4*Mx 1
|
|
||||||
ordered by XX(re,im),XY(re,im),YX(re,im), YY(re,im), baseline, timeslots
|
|
||||||
N: no of stations
|
|
||||||
Nbase: total no of baselines (including more than one tile or timeslot)
|
|
||||||
barr: baseline to station map, size Nbase*tilesz x 1
|
|
||||||
carr: sky model/cluster info size Mx1 of clusters
|
|
||||||
M: no of clusters
|
|
||||||
freq0: frequency
|
|
||||||
fdelta: bandwidth for freq smearing
|
|
||||||
tdelta: integration time for time smearing
|
|
||||||
dec0: declination for time smearing
|
|
||||||
uvmin: baseline length sqrt(u^2+v^2) below which not to include in solution
|
|
||||||
uvmax: baseline length higher than this not included in solution
|
|
||||||
|
|
||||||
Station beam specific parameters
|
|
||||||
ph_ra0,ph_dec0: beam pointing rad,rad
|
|
||||||
ph_freq0: beam reference freq
|
|
||||||
longitude,latitude: Nx1 arrays (rad,rad) station locations
|
|
||||||
time_utc: JD (day) : tilesz x 1
|
|
||||||
tilesz: how many tiles: == unique time_utc
|
|
||||||
Nelem: Nx1 array, size of stations (elements)
|
|
||||||
xx,yy,zz: Nx1 arrays of station element locations arrays xx[],yy[],zz[]
|
|
||||||
Nt: no of threads
|
|
||||||
|
|
||||||
NOTE: prediction is done for all baselines, even flagged ones
|
|
||||||
and flags are set to 2 for baselines lower than uvcut
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern int
|
|
||||||
precalculate_coherencies_withbeam(double *u, double *v, double *w, complex double *x, int N,
|
|
||||||
int Nbase, baseline_t *barr, clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax,
|
|
||||||
double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc, int tileze, int *Nelem, double **xx, double **yy, double **zz, int Nt);
|
|
||||||
|
|
||||||
|
|
||||||
extern int
|
|
||||||
predict_visibilities_multifreq_withbeam(double *u,double *v,double *w,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta, double dec0,
|
|
||||||
double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int Nt, int add_to_data);
|
|
||||||
|
|
||||||
extern int
|
|
||||||
calculate_residuals_multifreq_withbeam(double *u,double *v,double *w,double *p,double *x,int N,int Nbase,int tilesz,baseline_t *barr, clus_source_t *carr, int M,double *freqs,int Nchan, double fdelta,double tdelta,double dec0,
|
|
||||||
double ph_ra0, double ph_dec0, double ph_freq0, double *longitude, double *latitude, double *time_utc,int *Nelem, double **xx, double **yy, double **zz, int Nt, int ccid, double rho, int phase_only);
|
|
||||||
|
|
||||||
|
|
||||||
/* change epoch of soure ra,dec from J2000 to JAPP */
|
|
||||||
/* also the beam pointing ra_beam,dec_beam */
|
|
||||||
extern int
|
|
||||||
precess_source_locations(double jd_tdb, clus_source_t *carr, int M, double *ra_beam, double *dec_beam, int Nt);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
|
|
@ -21,7 +21,7 @@ INCLUDES=-I.
|
||||||
LIBPATH= $(CUDALIB)
|
LIBPATH= $(CUDALIB)
|
||||||
|
|
||||||
|
|
||||||
OBJECTS=lmfit.o lbfgs.o myblas.o mderiv.o clmfit.o clmfit_nocuda.o barrier.o robust.o robustlm.o oslmfit.o mderiv_fl.o clmfit_fl.o updatenu.o robust_lbfgs_nocuda.o robust_fl.o manifold_fl.o rtr_solve_cuda.o rtr_solve_robust_cuda.o manifold_average.o consensus_poly.o rtr_solve_robust_cuda_admm.o rtr_solve_robust_admm.o admm_solve.o load_balance.o mdl.o
|
OBJECTS=lmfit.o lbfgs.o myblas.o mderiv.o clmfit.o clmfit_nocuda.o barrier.o robust.o robustlm.o oslmfit.o mderiv_fl.o clmfit_fl.o updatenu.o robust_fl.o manifold_fl.o rtr_solve_cuda.o rtr_solve_robust_cuda.o manifold_average.o consensus_poly.o rtr_solve_robust_cuda_admm.o rtr_solve_robust_admm.o admm_solve.o load_balance.o mdl.o
|
||||||
|
|
||||||
|
|
||||||
default:libdirac.a
|
default:libdirac.a
|
||||||
|
@ -47,8 +47,6 @@ robust_fl.o:robust_fl.cu
|
||||||
$(NVCC) $(NVCFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
$(NVCC) $(NVCFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
||||||
oslmfit.o:oslmfit.c
|
oslmfit.o:oslmfit.c
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
||||||
robust_lbfgs_nocuda.o:robust_lbfgs_nocuda.c
|
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
|
||||||
clmfit_fl.o:clmfit_fl.c
|
clmfit_fl.o:clmfit_fl.c
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
$(CC) $(CFLAGS) $(INCLUDES) $(GLIBI) -c $<
|
||||||
updatenu.o:updatenu.c
|
updatenu.o:updatenu.c
|
||||||
|
|
|
@ -213,174 +213,6 @@ mylm_fit_single_pth(double *p, double *x, int m, int n, void *data) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* worker thread function for prediction */
|
|
||||||
static void *
|
|
||||||
predict_threadfn_withgain_full(void *data) {
|
|
||||||
thread_data_base_t *t=(thread_data_base_t*)data;
|
|
||||||
|
|
||||||
int ci,cm,sta1,sta2;
|
|
||||||
double *pm;
|
|
||||||
complex double C[4],G1[4],G2[4],T1[4],T2[4];
|
|
||||||
int M=(t->M);
|
|
||||||
int Ntilebase=(t->Nbase)*(t->tilesz);
|
|
||||||
int px;
|
|
||||||
|
|
||||||
for (ci=0; ci<t->Nb; ci++) {
|
|
||||||
/* iterate over the sky model and calculate contribution */
|
|
||||||
/* for this x[8*ci:8*(ci+1)-1] */
|
|
||||||
memset(&(t->x[8*ci]),0,sizeof(double)*8);
|
|
||||||
|
|
||||||
/* if this baseline is flagged, we do not compute */
|
|
||||||
if (!t->barr[ci+t->boff].flag) {
|
|
||||||
|
|
||||||
/* stations for this baseline */
|
|
||||||
sta1=t->barr[ci+t->boff].sta1;
|
|
||||||
sta2=t->barr[ci+t->boff].sta2;
|
|
||||||
for (cm=0; cm<M; cm++) { /* clusters */
|
|
||||||
/* gains for this cluster, for sta1,sta2 */
|
|
||||||
/* depending on the baseline index, and cluster chunk size,
|
|
||||||
select proper parameter addres */
|
|
||||||
/* depending on the chunk size and the baseline index,
|
|
||||||
select right set of parameters
|
|
||||||
data x=[0,........,Nbase*tilesz]
|
|
||||||
divided into nchunk chunks
|
|
||||||
p[0] -> x[0.....Nbase*tilesz/nchunk-1]
|
|
||||||
p[1] -> x[Nbase*tilesz/nchunk......2*Nbase*tilesz-1]
|
|
||||||
....
|
|
||||||
p[last] -> x[(nchunk-1)*Nbase*tilesz/nchunk......Nbase*tilesz]
|
|
||||||
|
|
||||||
so given bindex, right p[] is bindex/((Nbase*tilesz+nchunk-1)/nchunk)
|
|
||||||
*/
|
|
||||||
|
|
||||||
px=(ci+t->boff)/((Ntilebase+t->carr[cm].nchunk-1)/t->carr[cm].nchunk);
|
|
||||||
//pm=&(t->p[cm*8*N]);
|
|
||||||
pm=&(t->p[t->carr[cm].p[px]]);
|
|
||||||
G1[0]=(pm[sta1*8])+_Complex_I*(pm[sta1*8+1]);
|
|
||||||
G1[1]=(pm[sta1*8+2])+_Complex_I*(pm[sta1*8+3]);
|
|
||||||
G1[2]=(pm[sta1*8+4])+_Complex_I*(pm[sta1*8+5]);
|
|
||||||
G1[3]=(pm[sta1*8+6])+_Complex_I*(pm[sta1*8+7]);
|
|
||||||
G2[0]=(pm[sta2*8])+_Complex_I*(pm[sta2*8+1]);
|
|
||||||
G2[1]=(pm[sta2*8+2])+_Complex_I*(pm[sta2*8+3]);
|
|
||||||
G2[2]=(pm[sta2*8+4])+_Complex_I*(pm[sta2*8+5]);
|
|
||||||
G2[3]=(pm[sta2*8+6])+_Complex_I*(pm[sta2*8+7]);
|
|
||||||
|
|
||||||
|
|
||||||
/* use pre calculated values */
|
|
||||||
C[0]=t->coh[4*M*ci+4*cm];
|
|
||||||
C[1]=t->coh[4*M*ci+4*cm+1];
|
|
||||||
C[2]=t->coh[4*M*ci+4*cm+2];
|
|
||||||
C[3]=t->coh[4*M*ci+4*cm+3];
|
|
||||||
|
|
||||||
/* form G1*C*G2' */
|
|
||||||
/* T1=G1*C */
|
|
||||||
amb(G1,C,T1);
|
|
||||||
/* T2=T1*G2' */
|
|
||||||
ambt(T1,G2,T2);
|
|
||||||
|
|
||||||
/* add to baseline visibilities */
|
|
||||||
t->x[8*ci]+=creal(T2[0]);
|
|
||||||
t->x[8*ci+1]+=cimag(T2[0]);
|
|
||||||
t->x[8*ci+2]+=creal(T2[1]);
|
|
||||||
t->x[8*ci+3]+=cimag(T2[1]);
|
|
||||||
t->x[8*ci+4]+=creal(T2[2]);
|
|
||||||
t->x[8*ci+5]+=cimag(T2[2]);
|
|
||||||
t->x[8*ci+6]+=creal(T2[3]);
|
|
||||||
t->x[8*ci+7]+=cimag(T2[3]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* minimization function (multithreaded) */
|
|
||||||
/* p: size mx1 parameters
|
|
||||||
x: size nx1 data calculated
|
|
||||||
data: extra info needed */
|
|
||||||
static void
|
|
||||||
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data) {
|
|
||||||
|
|
||||||
me_data_t *dp=(me_data_t*)data;
|
|
||||||
/* u,v,w : size Nbase*tilesz x 1 x: size Nbase*8*tilesz x 1 */
|
|
||||||
/* barr: size Nbase*tilesz x 1 carr: size Mx1 */
|
|
||||||
/* pp: size 8*N*M x 1 */
|
|
||||||
/* pm: size Mx1 of double */
|
|
||||||
|
|
||||||
int nth,nth1,ci;
|
|
||||||
|
|
||||||
/* no of threads */
|
|
||||||
int Nt=(dp->Nt);
|
|
||||||
int Nthb0,Nthb;
|
|
||||||
pthread_attr_t attr;
|
|
||||||
pthread_t *th_array;
|
|
||||||
thread_data_base_t *threaddata;
|
|
||||||
|
|
||||||
int Nbase1=(dp->Nbase)*(dp->tilesz);
|
|
||||||
|
|
||||||
/* calculate min baselines a thread can handle */
|
|
||||||
//Nthb0=ceil((double)Nbase1/(double)Nt);
|
|
||||||
Nthb0=(Nbase1+Nt-1)/Nt;
|
|
||||||
|
|
||||||
/* setup threads */
|
|
||||||
pthread_attr_init(&attr);
|
|
||||||
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE);
|
|
||||||
|
|
||||||
if ((th_array=(pthread_t*)malloc((size_t)Nt*sizeof(pthread_t)))==0) {
|
|
||||||
fprintf(stderr,"%s: %d: No free memory\n",__FILE__,__LINE__);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if ((threaddata=(thread_data_base_t*)malloc((size_t)Nt*sizeof(thread_data_base_t)))==0) {
|
|
||||||
fprintf(stderr,"%s: %d: No free memory\n",__FILE__,__LINE__);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* iterate over threads, allocating baselines per thread */
|
|
||||||
ci=0;
|
|
||||||
for (nth=0; nth<Nt && ci<Nbase1; nth++) {
|
|
||||||
/* this thread will handle baselines [ci:min(Nbase1-1,ci+Nthb0-1)] */
|
|
||||||
/* determine actual no. of baselines */
|
|
||||||
if (ci+Nthb0<Nbase1) {
|
|
||||||
Nthb=Nthb0;
|
|
||||||
} else {
|
|
||||||
Nthb=Nbase1-ci;
|
|
||||||
}
|
|
||||||
|
|
||||||
threaddata[nth].boff=ci;
|
|
||||||
threaddata[nth].Nb=Nthb;
|
|
||||||
threaddata[nth].barr=dp->barr;
|
|
||||||
threaddata[nth].u=&(dp->u[ci]);
|
|
||||||
threaddata[nth].v=&(dp->v[ci]);
|
|
||||||
threaddata[nth].w=&(dp->w[ci]);
|
|
||||||
threaddata[nth].carr=dp->carr;
|
|
||||||
threaddata[nth].M=dp->M;
|
|
||||||
threaddata[nth].x=&(x[8*ci]);
|
|
||||||
threaddata[nth].p=p;
|
|
||||||
threaddata[nth].N=dp->N;
|
|
||||||
threaddata[nth].Nbase=dp->Nbase;
|
|
||||||
threaddata[nth].tilesz=dp->tilesz;
|
|
||||||
threaddata[nth].coh=&(dp->coh[4*(dp->M)*ci]);
|
|
||||||
|
|
||||||
//printf("thread %d predict data from %d baselines %d\n",nth,8*ci,Nthb);
|
|
||||||
pthread_create(&th_array[nth],&attr,predict_threadfn_withgain_full,(void*)(&threaddata[nth]));
|
|
||||||
/* next baseline set */
|
|
||||||
ci=ci+Nthb;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now wait for threads to finish */
|
|
||||||
for(nth1=0; nth1<nth; nth1++) {
|
|
||||||
pthread_join(th_array[nth1],NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pthread_attr_destroy(&attr);
|
|
||||||
|
|
||||||
|
|
||||||
free(th_array);
|
|
||||||
free(threaddata);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/******************** CUP version *****************************/
|
/******************** CUP version *****************************/
|
||||||
|
|
||||||
|
|
|
@ -286,7 +286,7 @@ mult_hessian(int m, double *pk, double *gk, double *s, double *y, double *rho, i
|
||||||
|
|
||||||
/* cubic interpolation in interval [a,b] (a>b is possible)
|
/* cubic interpolation in interval [a,b] (a>b is possible)
|
||||||
to find step that minimizes cost function */
|
to find step that minimizes cost function */
|
||||||
/* func: vector function
|
/*
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
a/b: interval for interpolation
|
a/b: interval for interpolation
|
||||||
|
@ -299,7 +299,6 @@ mult_hessian(int m, double *pk, double *gk, double *s, double *y, double *rho, i
|
||||||
*/
|
*/
|
||||||
static double
|
static double
|
||||||
cubic_interp(
|
cubic_interp(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *xk, double *pk, double a, double b, double *x, double *xp, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
double *xk, double *pk, double a, double b, double *x, double *xp, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
||||||
|
|
||||||
double f0,f1,f0d,f1d; /* function values and derivatives at a,b */
|
double f0,f1,f0d,f1d; /* function values and derivatives at a,b */
|
||||||
|
@ -308,10 +307,6 @@ cubic_interp(
|
||||||
|
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,a,xp); /* xp<=xp+(a)*pk */
|
my_daxpy(m,pk,a,xp); /* xp<=xp+(a)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// f0=my_dnrm2(n,x);
|
|
||||||
// f0*=f0;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
|
@ -323,9 +318,6 @@ cubic_interp(
|
||||||
|
|
||||||
/* grad(phi_0): evaluate at -step and +step */
|
/* grad(phi_0): evaluate at -step and +step */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(a+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(a+step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p01=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -336,9 +328,6 @@ cubic_interp(
|
||||||
|
|
||||||
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(a-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(a-step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p02=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -348,16 +337,9 @@ cubic_interp(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
|
|
||||||
// f0d=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
f0d=(p01-p02)/(2.0*step);
|
f0d=(p01-p02)/(2.0*step);
|
||||||
|
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp=xk+(a-step)*pk */
|
|
||||||
//my_daxpy(m,pk,b,xp); /* xp<=xp+(b)*pk */
|
|
||||||
my_daxpy(m,pk,-a+step+b,xp); /* xp<=xp+(b)*pk */
|
my_daxpy(m,pk,-a+step+b,xp); /* xp<=xp+(b)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// f1=my_dnrm2(n,x);
|
|
||||||
// f1*=f1;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -369,9 +351,6 @@ cubic_interp(
|
||||||
|
|
||||||
/* grad(phi_1): evaluate at -step and +step */
|
/* grad(phi_1): evaluate at -step and +step */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(b+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(b+step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p01=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -382,9 +361,6 @@ cubic_interp(
|
||||||
|
|
||||||
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(b-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(b-step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p02=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -394,7 +370,6 @@ cubic_interp(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
|
|
||||||
// f1d=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
f1d=(p01-p02)/(2.0*step);
|
f1d=(p01-p02)/(2.0*step);
|
||||||
|
|
||||||
|
|
||||||
|
@ -417,13 +392,7 @@ cubic_interp(
|
||||||
fz0=f0+f1;
|
fz0=f0+f1;
|
||||||
} else {
|
} else {
|
||||||
/* evaluate function for this root */
|
/* evaluate function for this root */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp=xk+(b-step)*pk */
|
|
||||||
//my_daxpy(m,pk,a+z0*(b-a),xp); /* xp<=xp+(a+z0(b-a))*pk */
|
|
||||||
my_daxpy(m,pk,-b+step+a+z0*(b-a),xp); /* xp<=xp+(a+z0(b-a))*pk */
|
my_daxpy(m,pk,-b+step+a+z0*(b-a),xp); /* xp<=xp+(a+z0(b-a))*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// fz0=my_dnrm2(n,x);
|
|
||||||
// fz0*=fz0;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -460,7 +429,7 @@ cubic_interp(
|
||||||
|
|
||||||
/*************** Fletcher line search **********************************/
|
/*************** Fletcher line search **********************************/
|
||||||
/* zoom function for line search */
|
/* zoom function for line search */
|
||||||
/* func: vector function
|
/*
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
a/b: bracket interval [a,b] (a>b) is possible
|
a/b: bracket interval [a,b] (a>b) is possible
|
||||||
|
@ -475,7 +444,6 @@ cubic_interp(
|
||||||
*/
|
*/
|
||||||
static double
|
static double
|
||||||
linesearch_zoom(
|
linesearch_zoom(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *xk, double *pk, double a, double b, double *x, double *xp, double phi_0, double gphi_0, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
double *xk, double *pk, double a, double b, double *x, double *xp, double phi_0, double gphi_0, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
||||||
|
|
||||||
double alphaj,phi_j,phi_aj;
|
double alphaj,phi_j,phi_aj;
|
||||||
|
@ -490,17 +458,12 @@ linesearch_zoom(
|
||||||
/* choose alphaj from [a+t2(b-a),b-t3(b-a)] */
|
/* choose alphaj from [a+t2(b-a),b-t3(b-a)] */
|
||||||
p01=aj+t2*(bj-aj);
|
p01=aj+t2*(bj-aj);
|
||||||
p02=bj-t3*(bj-aj);
|
p02=bj-t3*(bj-aj);
|
||||||
alphaj=cubic_interp(func,xk,pk,p01,p02,x,xp,xo,m,n,step,adata,tp,tpg);
|
alphaj=cubic_interp(xk,pk,p01,p02,x,xp,xo,m,n,step,adata,tp,tpg);
|
||||||
//printf("cubic intep [%lf,%lf]->%lf\n",p01,p02,alphaj);
|
//printf("cubic intep [%lf,%lf]->%lf\n",p01,p02,alphaj);
|
||||||
|
|
||||||
/* evaluate phi(alphaj) */
|
/* evaluate phi(alphaj) */
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,alphaj,xp); /* xp<=xp+(alphaj)*pk */
|
my_daxpy(m,pk,alphaj,xp); /* xp<=xp+(alphaj)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// phi_j=my_dnrm2(n,x);
|
|
||||||
// phi_j*=phi_j;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
|
@ -512,14 +475,7 @@ linesearch_zoom(
|
||||||
|
|
||||||
|
|
||||||
/* evaluate phi(aj) */
|
/* evaluate phi(aj) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp = xk+alphaj*pk */
|
|
||||||
//my_daxpy(m,pk,aj,xp); /* xp<=xp+(aj)*pk */
|
|
||||||
my_daxpy(m,pk,-alphaj+aj,xp); /* xp<=xp+(aj)*pk */
|
my_daxpy(m,pk,-alphaj+aj,xp); /* xp<=xp+(aj)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// phi_aj=my_dnrm2(n,x);
|
|
||||||
// phi_aj*=phi_aj;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -533,13 +489,7 @@ linesearch_zoom(
|
||||||
bj=alphaj; /* aj unchanged */
|
bj=alphaj; /* aj unchanged */
|
||||||
} else {
|
} else {
|
||||||
/* evaluate grad(alphaj) */
|
/* evaluate grad(alphaj) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp = xk+aj*pk */
|
|
||||||
//my_daxpy(m,pk,alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
|
||||||
my_daxpy(m,pk,-aj+alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
my_daxpy(m,pk,-aj+alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p01=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -550,10 +500,6 @@ linesearch_zoom(
|
||||||
|
|
||||||
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphaj-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphaj-step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p02=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -563,7 +509,6 @@ linesearch_zoom(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
|
|
||||||
// gphi_j=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
gphi_j=(p01-p02)/(2.0*step);
|
gphi_j=(p01-p02)/(2.0*step);
|
||||||
|
|
||||||
/* termination due to roundoff/other errors pp. 38, Fletcher */
|
/* termination due to roundoff/other errors pp. 38, Fletcher */
|
||||||
|
@ -601,7 +546,7 @@ linesearch_zoom(
|
||||||
|
|
||||||
|
|
||||||
/* line search */
|
/* line search */
|
||||||
/* func: vector function
|
/*
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
alpha1: initial value for step
|
alpha1: initial value for step
|
||||||
|
@ -613,7 +558,6 @@ linesearch_zoom(
|
||||||
*/
|
*/
|
||||||
static double
|
static double
|
||||||
linesearch(
|
linesearch(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *xk, double *pk, double alpha1, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
double *xk, double *pk, double alpha1, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata, th_pipeline *tp, gbdata_b *tpg) {
|
||||||
/* phi(alpha)=f(xk+alpha pk)
|
/* phi(alpha)=f(xk+alpha pk)
|
||||||
for vector function func
|
for vector function func
|
||||||
|
@ -642,11 +586,6 @@ linesearch(
|
||||||
|
|
||||||
alphak=1.0;
|
alphak=1.0;
|
||||||
/* evaluate phi_0 and grad(phi_0) */
|
/* evaluate phi_0 and grad(phi_0) */
|
||||||
//func(xk,x,m,n,adata);
|
|
||||||
//my_daxpy(n,xo,-1.0,x);
|
|
||||||
//phi_0=my_dnrm2(n,x);
|
|
||||||
//phi_0*=phi_0;
|
|
||||||
//printf("CPU cost=%lf\n",phi_0);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xk;
|
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xk;
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
|
@ -663,10 +602,6 @@ linesearch(
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(0.0+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(0.0+step)*pk */
|
||||||
|
|
||||||
//func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
//my_daxpy(n,xo,-1.0,x);
|
|
||||||
//p01=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
tpg->lmdata[0]->p=tpg->lmdata[1]->p=xp;
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
|
@ -677,10 +612,6 @@ linesearch(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(0.0-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(0.0-step)*pk */
|
||||||
//func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
//my_daxpy(n,xo,-1.0,x);
|
|
||||||
//p02=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -690,7 +621,6 @@ linesearch(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
|
|
||||||
// gphi_0=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
gphi_0=(p01-p02)/(2.0*step);
|
gphi_0=(p01-p02)/(2.0*step);
|
||||||
|
|
||||||
|
|
||||||
|
@ -719,11 +649,6 @@ linesearch(
|
||||||
/* evalualte phi(alpha(i))=f(xk+alphai pk) */
|
/* evalualte phi(alpha(i))=f(xk+alphai pk) */
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,alphai,xp); /* xp<=xp+alphai*pk */
|
my_daxpy(m,pk,alphai,xp); /* xp<=xp+alphai*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// phi_alphai=my_dnrm2(n,x);
|
|
||||||
// phi_alphai*=phi_alphai;
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -743,7 +668,7 @@ linesearch(
|
||||||
|
|
||||||
if ((phi_alphai>phi_0+alphai*gphi_0) || (ci>1 && phi_alphai>=phi_alphai1)) {
|
if ((phi_alphai>phi_0+alphai*gphi_0) || (ci>1 && phi_alphai>=phi_alphai1)) {
|
||||||
/* ai=alphai1, bi=alphai bracket */
|
/* ai=alphai1, bi=alphai bracket */
|
||||||
alphak=linesearch_zoom(func,xk,pk,alphai1,alphai,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata,tp,tpg);
|
alphak=linesearch_zoom(xk,pk,alphai1,alphai,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata,tp,tpg);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Linesearch : Condition 1 met\n");
|
printf("Linesearch : Condition 1 met\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -751,13 +676,7 @@ linesearch(
|
||||||
}
|
}
|
||||||
|
|
||||||
/* evaluate grad(phi(alpha(i))) */
|
/* evaluate grad(phi(alpha(i))) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* NOT NEEDED here because already xp=xk+alphai*pk */
|
|
||||||
//my_daxpy(m,pk,alphai+step,xp); /* xp<=xp+(alphai+step)*pk */
|
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(alphai+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(alphai+step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p01=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -767,10 +686,6 @@ linesearch(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphai-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphai-step)*pk */
|
||||||
// func(xp,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
// my_daxpy(n,xo,-1.0,x);
|
|
||||||
// p02=my_dnrm2(n,x);
|
|
||||||
sync_barrier(&(tp->gate1));
|
sync_barrier(&(tp->gate1));
|
||||||
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
tpg->status[0]=tpg->status[1]=PT_DO_CCOST;
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
@ -780,7 +695,6 @@ linesearch(
|
||||||
sync_barrier(&(tp->gate2));
|
sync_barrier(&(tp->gate2));
|
||||||
|
|
||||||
|
|
||||||
// gphi_i=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
gphi_i=(p01-p02)/(2.0*step);
|
gphi_i=(p01-p02)/(2.0*step);
|
||||||
|
|
||||||
if (fabs(gphi_i)<=-sigma*gphi_0) {
|
if (fabs(gphi_i)<=-sigma*gphi_0) {
|
||||||
|
@ -793,7 +707,7 @@ linesearch(
|
||||||
|
|
||||||
if (gphi_i>=0) {
|
if (gphi_i>=0) {
|
||||||
/* ai=alphai, bi=alphai1 bracket */
|
/* ai=alphai, bi=alphai1 bracket */
|
||||||
alphak=linesearch_zoom(func,xk,pk,alphai,alphai1,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata,tp,tpg);
|
alphak=linesearch_zoom(xk,pk,alphai,alphai1,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata,tp,tpg);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Linesearch : Condition 3 met\n");
|
printf("Linesearch : Condition 3 met\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -809,7 +723,7 @@ linesearch(
|
||||||
/* choose by interpolation in [2*alphai-alphai1,min(mu,alphai+t1*(alphai-alphai1)] */
|
/* choose by interpolation in [2*alphai-alphai1,min(mu,alphai+t1*(alphai-alphai1)] */
|
||||||
p01=2.0*alphai-alphai1;
|
p01=2.0*alphai-alphai1;
|
||||||
p02=MIN(mu,alphai+t1*(alphai-alphai1));
|
p02=MIN(mu,alphai+t1*(alphai-alphai1));
|
||||||
alphai=cubic_interp(func,xk,pk,p01,p02,x,xp,xo,m,n,step,adata,tp,tpg);
|
alphai=cubic_interp(xk,pk,p01,p02,x,xp,xo,m,n,step,adata,tp,tpg);
|
||||||
//printf("cubic interp [%lf,%lf]->%lf\n",p01,p02,alphai);
|
//printf("cubic interp [%lf,%lf]->%lf\n",p01,p02,alphai);
|
||||||
}
|
}
|
||||||
phi_alphai1=phi_alphai;
|
phi_alphai1=phi_alphai;
|
||||||
|
@ -832,7 +746,6 @@ linesearch(
|
||||||
/* note M here is LBFGS memory size */
|
/* note M here is LBFGS memory size */
|
||||||
static int
|
static int
|
||||||
lbfgs_fit_common(
|
lbfgs_fit_common(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, int do_robust, void *adata) {
|
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, int do_robust, void *adata) {
|
||||||
|
|
||||||
double *gk; /* gradients at both k+1 and k iter */
|
double *gk; /* gradients at both k+1 and k iter */
|
||||||
|
@ -987,14 +900,6 @@ lbfgs_fit_common(
|
||||||
sync_barrier(&(tp.gate1));
|
sync_barrier(&(tp.gate1));
|
||||||
tpg.status[0]=tpg.status[1]=PT_DO_NOTHING;
|
tpg.status[0]=tpg.status[1]=PT_DO_NOTHING;
|
||||||
sync_barrier(&(tp.gate2));
|
sync_barrier(&(tp.gate2));
|
||||||
// for (ci=0; ci<20; ci++) {
|
|
||||||
// printf("GPU %d %lf\n",ci,gk[ci]);
|
|
||||||
// }
|
|
||||||
/* gradient gk=grad(f)_k */
|
|
||||||
// func_grad(func,xk,gk,x,m,n,step,gpu_threads,adata);
|
|
||||||
// for (ci=0; ci<20; ci++) {
|
|
||||||
// printf("CPU %d %lf\n",ci,gk[ci]);
|
|
||||||
// }
|
|
||||||
|
|
||||||
double gradnrm=my_dnrm2(m,gk);
|
double gradnrm=my_dnrm2(m,gk);
|
||||||
/* if gradient is too small, no need to solve, so stop */
|
/* if gradient is too small, no need to solve, so stop */
|
||||||
|
@ -1025,7 +930,7 @@ lbfgs_fit_common(
|
||||||
/* linesearch to find step length */
|
/* linesearch to find step length */
|
||||||
/* parameters alpha1=10.0,sigma=0.1, rho=0.01, t1=9, t2=0.1, t3=0.5 */
|
/* parameters alpha1=10.0,sigma=0.1, rho=0.01, t1=9, t2=0.1, t3=0.5 */
|
||||||
/* FIXME: update paramters for GPU gradient */
|
/* FIXME: update paramters for GPU gradient */
|
||||||
alphak=linesearch(func,xk,pk,10.0,0.1,0.01,9,0.1,0.5,x,m,n,step,adata,&tp, &tpg);
|
alphak=linesearch(xk,pk,10.0,0.1,0.01,9,0.1,0.5,x,m,n,step,adata,&tp, &tpg);
|
||||||
/* check if step size is too small, or nan, then stop */
|
/* check if step size is too small, or nan, then stop */
|
||||||
if (!isnormal(alphak) || fabs(alphak)<CLM_EPSILON) {
|
if (!isnormal(alphak) || fabs(alphak)<CLM_EPSILON) {
|
||||||
break;
|
break;
|
||||||
|
@ -1053,7 +958,6 @@ lbfgs_fit_common(
|
||||||
tpg.status[0]=tpg.status[1]=PT_DO_NOTHING;
|
tpg.status[0]=tpg.status[1]=PT_DO_NOTHING;
|
||||||
sync_barrier(&(tp.gate2));
|
sync_barrier(&(tp.gate2));
|
||||||
|
|
||||||
// func_grad(func,xk1,gk,x,m,n,step,gpu_threads,adata);
|
|
||||||
/* yk=yk+gk1 */
|
/* yk=yk+gk1 */
|
||||||
my_daxpy(m,gk,1.0,&y[cm]);
|
my_daxpy(m,gk,1.0,&y[cm]);
|
||||||
|
|
||||||
|
@ -1107,16 +1011,15 @@ lbfgs_fit_common(
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* wrapper functions */
|
||||||
int
|
int
|
||||||
lbfgs_fit(
|
lbfgs_fit(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata) {
|
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata) {
|
||||||
return lbfgs_fit_common(func, p, x, m, n, itmax, M, gpu_threads, 0, adata);
|
return lbfgs_fit_common(p, x, m, n, itmax, M, gpu_threads, 0, adata);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
lbfgs_fit_robust_cuda(
|
lbfgs_fit_robust_cuda(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata) {
|
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata) {
|
||||||
return lbfgs_fit_common(func, p, x, m, n, itmax, M, gpu_threads, 1, adata);
|
return lbfgs_fit_common(p, x, m, n, itmax, M, gpu_threads, 1, adata);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,282 +21,6 @@
|
||||||
#include "Dirac.h"
|
#include "Dirac.h"
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
|
|
||||||
/**** repeated code here ********************/
|
|
||||||
/* Jones matrix multiplication
|
|
||||||
C=A*B
|
|
||||||
*/
|
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static void
|
|
||||||
amb(complex double * __restrict a, complex double * __restrict b, complex double * __restrict c) {
|
|
||||||
c[0]=a[0]*b[0]+a[1]*b[2];
|
|
||||||
c[1]=a[0]*b[1]+a[1]*b[3];
|
|
||||||
c[2]=a[2]*b[0]+a[3]*b[2];
|
|
||||||
c[3]=a[2]*b[1]+a[3]*b[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Jones matrix multiplication
|
|
||||||
C=A*B^H
|
|
||||||
*/
|
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static void
|
|
||||||
ambt(complex double * __restrict a, complex double * __restrict b, complex double * __restrict c) {
|
|
||||||
c[0]=a[0]*conj(b[0])+a[1]*conj(b[1]);
|
|
||||||
c[1]=a[0]*conj(b[2])+a[1]*conj(b[3]);
|
|
||||||
c[2]=a[2]*conj(b[0])+a[3]*conj(b[1]);
|
|
||||||
c[3]=a[2]*conj(b[2])+a[3]*conj(b[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**** end repeated code ********************/
|
|
||||||
|
|
||||||
/* worker thread for a cpu */
|
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static void *
|
|
||||||
cpu_calc_deriv(void *adata) {
|
|
||||||
thread_data_grad_t *t=(thread_data_grad_t*)adata;
|
|
||||||
|
|
||||||
int ci,nb;
|
|
||||||
int stc,stoff,stm,sta1,sta2;
|
|
||||||
int N=t->N; /* stations */
|
|
||||||
int M=t->M; /* clusters */
|
|
||||||
int Nbase=(t->Nbase)*(t->tilesz);
|
|
||||||
|
|
||||||
|
|
||||||
complex double xr[4]; /* residuals */
|
|
||||||
complex double G1[4],G2[4],C[4],T1[4],T2[4];
|
|
||||||
double pp[8];
|
|
||||||
complex double csum;
|
|
||||||
int cli,tpchunk,pstart,nchunk,tilesperchunk,stci,ttile,tptile,poff;
|
|
||||||
|
|
||||||
/* iterate over each paramter */
|
|
||||||
for (ci=t->g_start; ci<=t->g_end; ++ci) {
|
|
||||||
t->g[ci]=0.0;
|
|
||||||
/* find station and parameter corresponding to this value of ci */
|
|
||||||
/* this parameter should correspond to the right baseline (x tilesz)
|
|
||||||
to contribute to the derivative */
|
|
||||||
cli=0;
|
|
||||||
while((cli<M) && (ci<t->carr[cli].p[0] || ci>t->carr[cli].p[0]+8*N*t->carr[cli].nchunk-1)) {
|
|
||||||
cli++;
|
|
||||||
}
|
|
||||||
/* now either cli>=M: cluster not found
|
|
||||||
or cli<M and cli is the right cluster */
|
|
||||||
if (cli==M && ci>=t->carr[cli-1].p[0] && ci<=t->carr[cli-1].p[0]+8*N*t->carr[cli-1].nchunk-1) {
|
|
||||||
cli--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cli<M) {
|
|
||||||
/* right parameter offset */
|
|
||||||
stci=ci-t->carr[cli].p[0];
|
|
||||||
|
|
||||||
stc=(stci%(8*N))/8; /* 0..N-1 */
|
|
||||||
/* make sure this baseline contribute to this parameter */
|
|
||||||
tpchunk=stci/(8*N);
|
|
||||||
nchunk=t->carr[cli].nchunk;
|
|
||||||
pstart=t->carr[cli].p[0];
|
|
||||||
tilesperchunk=(t->tilesz+nchunk-1)/nchunk;
|
|
||||||
|
|
||||||
|
|
||||||
/* iterate over all baselines and accumulate sum */
|
|
||||||
for (nb=0; nb<Nbase; ++nb) {
|
|
||||||
/* which tile is this ? */
|
|
||||||
ttile=nb/t->Nbase;
|
|
||||||
/* which chunk this tile belongs to */
|
|
||||||
tptile=ttile/tilesperchunk;
|
|
||||||
/* now tptile has to match tpchunk, otherwise ignore calculation */
|
|
||||||
if (tptile==tpchunk) {
|
|
||||||
|
|
||||||
sta1=t->barr[nb].sta1;
|
|
||||||
sta2=t->barr[nb].sta2;
|
|
||||||
|
|
||||||
if (((stc==sta1)||(stc==sta2))&& !t->barr[nb].flag) {
|
|
||||||
/* this baseline has a contribution */
|
|
||||||
/* which paramter of this station */
|
|
||||||
stoff=(stci%(8*N))%8; /* 0..7 */
|
|
||||||
/* which cluster */
|
|
||||||
stm=cli; /* 0..M-1 */
|
|
||||||
|
|
||||||
/* exact expression for derivative
|
|
||||||
2 real( vec^H(residual_this_baseline)
|
|
||||||
* vec(-J_{pm}C_{pqm} J_{qm}^H)
|
|
||||||
where m: chosen cluster
|
|
||||||
J_{pm},J_{qm} Jones matrices for baseline p-q
|
|
||||||
depending on the parameter, J ==> E
|
|
||||||
E: zero matrix, except 1 at location of m
|
|
||||||
|
|
||||||
residual : in x[8*nb:8*nb+7]
|
|
||||||
C coh: in coh[8*M*nb+m*8:8*M*nb+m*8+7] (double storage)
|
|
||||||
coh[4*M*nb+4*m:4*M*nb+4*m+3] (complex storage)
|
|
||||||
J_p,J_q: in p[sta1*8+m*8*N: sta1*8+m*8*N+7]
|
|
||||||
and p[sta2*8+m*8*N: sta2*8+m*8*N+ 7]
|
|
||||||
*/
|
|
||||||
/* read in residual vector, conjugated */
|
|
||||||
xr[0]=(t->x[nb*8])-_Complex_I*(t->x[nb*8+1]);
|
|
||||||
xr[1]=(t->x[nb*8+2])-_Complex_I*(t->x[nb*8+3]);
|
|
||||||
xr[2]=(t->x[nb*8+4])-_Complex_I*(t->x[nb*8+5]);
|
|
||||||
xr[3]=(t->x[nb*8+6])-_Complex_I*(t->x[nb*8+7]);
|
|
||||||
|
|
||||||
/* read in coherency */
|
|
||||||
C[0]=t->coh[4*M*nb+4*stm];
|
|
||||||
C[1]=t->coh[4*M*nb+4*stm+1];
|
|
||||||
C[2]=t->coh[4*M*nb+4*stm+2];
|
|
||||||
C[3]=t->coh[4*M*nb+4*stm+3];
|
|
||||||
|
|
||||||
memset(pp,0,sizeof(double)*8);
|
|
||||||
if (stc==sta1) {
|
|
||||||
/* this station parameter gradient */
|
|
||||||
pp[stoff]=1.0;
|
|
||||||
memset(G1,0,sizeof(complex double)*4);
|
|
||||||
G1[0]=pp[0]+_Complex_I*pp[1];
|
|
||||||
G1[1]=pp[2]+_Complex_I*pp[3];
|
|
||||||
G1[2]=pp[4]+_Complex_I*pp[5];
|
|
||||||
G1[3]=pp[6]+_Complex_I*pp[7];
|
|
||||||
poff=pstart+tpchunk*8*N+sta2*8;
|
|
||||||
G2[0]=(t->p[poff])+_Complex_I*(t->p[poff+1]);
|
|
||||||
G2[1]=(t->p[poff+2])+_Complex_I*(t->p[poff+3]);
|
|
||||||
G2[2]=(t->p[poff+4])+_Complex_I*(t->p[poff+4]);
|
|
||||||
G2[3]=(t->p[poff+6])+_Complex_I*(t->p[poff+7]);
|
|
||||||
|
|
||||||
} else if (stc==sta2) {
|
|
||||||
memset(G2,0,sizeof(complex double)*4);
|
|
||||||
pp[stoff]=1.0;
|
|
||||||
G2[0]=pp[0]+_Complex_I*pp[1];
|
|
||||||
G2[1]=pp[2]+_Complex_I*pp[3];
|
|
||||||
G2[2]=pp[4]+_Complex_I*pp[5];
|
|
||||||
G2[3]=pp[6]+_Complex_I*pp[7];
|
|
||||||
poff=pstart+tpchunk*8*N+sta1*8;
|
|
||||||
G1[0]=(t->p[poff])+_Complex_I*(t->p[poff+1]);
|
|
||||||
G1[1]=(t->p[poff+2])+_Complex_I*(t->p[poff+3]);
|
|
||||||
G1[2]=(t->p[poff+4])+_Complex_I*(t->p[poff+5]);
|
|
||||||
G1[3]=(t->p[poff+6])+_Complex_I*(t->p[poff+7]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* T1=G1*C */
|
|
||||||
amb(G1,C,T1);
|
|
||||||
/* T2=T1*G2' */
|
|
||||||
ambt(T1,G2,T2);
|
|
||||||
|
|
||||||
/* calculate product xr*vec(J_p C J_q^H ) */
|
|
||||||
csum=xr[0]*T2[0];
|
|
||||||
csum+=xr[1]*T2[1];
|
|
||||||
csum+=xr[2]*T2[2];
|
|
||||||
csum+=xr[3]*T2[3];
|
|
||||||
|
|
||||||
/* accumulate sum */
|
|
||||||
t->g[ci]+=-2.0*creal(csum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static int
|
|
||||||
func_grad(
|
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
|
||||||
double *p, double *g, double *xo, int m, int n, double step, void *adata) {
|
|
||||||
/* gradient for each parameter is
|
|
||||||
(||func(p+step*e_i)-x||^2-||func(p-step*e_i)-x||^2)/2*step
|
|
||||||
i=0,...,m-1 for all parameters
|
|
||||||
e_i: unit vector, 1 only at i-th location
|
|
||||||
*/
|
|
||||||
|
|
||||||
double *x; /* array to store residual */
|
|
||||||
int ci;
|
|
||||||
me_data_t *dp=(me_data_t*)adata;
|
|
||||||
|
|
||||||
int Nt=dp->Nt;
|
|
||||||
|
|
||||||
pthread_attr_t attr;
|
|
||||||
pthread_t *th_array;
|
|
||||||
thread_data_grad_t *threaddata;
|
|
||||||
|
|
||||||
|
|
||||||
if ((x=(double*)calloc((size_t)n,sizeof(double)))==0) {
|
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
|
||||||
#endif
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
/* evaluate func once, store in x, and create threads */
|
|
||||||
/* and calculate the residual x=xo-func */
|
|
||||||
func(p,x,m,n,adata);
|
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
|
|
||||||
/* setup threads */
|
|
||||||
pthread_attr_init(&attr);
|
|
||||||
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE);
|
|
||||||
|
|
||||||
if ((th_array=(pthread_t*)malloc((size_t)Nt*sizeof(pthread_t)))==0) {
|
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: No free memory\n",__FILE__,__LINE__);
|
|
||||||
#endif
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if ((threaddata=(thread_data_grad_t*)malloc((size_t)Nt*sizeof(thread_data_grad_t)))==0) {
|
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: No free memory\n",__FILE__,__LINE__);
|
|
||||||
#endif
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int nth,nth1,Nparm;
|
|
||||||
|
|
||||||
/* parameters per thread */
|
|
||||||
Nparm=(m+Nt-1)/Nt;
|
|
||||||
|
|
||||||
/* each thread will calculate derivative of part of
|
|
||||||
parameters */
|
|
||||||
ci=0;
|
|
||||||
for (nth=0; nth<Nt; nth++) {
|
|
||||||
threaddata[nth].Nbase=dp->Nbase;
|
|
||||||
threaddata[nth].tilesz=dp->tilesz;
|
|
||||||
threaddata[nth].barr=dp->barr;
|
|
||||||
threaddata[nth].carr=dp->carr;
|
|
||||||
threaddata[nth].M=dp->M;
|
|
||||||
threaddata[nth].N=dp->N;
|
|
||||||
threaddata[nth].coh=dp->coh;
|
|
||||||
threaddata[nth].m=m;
|
|
||||||
threaddata[nth].n=n;
|
|
||||||
threaddata[nth].x=x;
|
|
||||||
threaddata[nth].p=p;
|
|
||||||
threaddata[nth].g=g;
|
|
||||||
threaddata[nth].g_start=ci;
|
|
||||||
threaddata[nth].g_end=ci+Nparm-1;
|
|
||||||
if (threaddata[nth].g_end>=m) {
|
|
||||||
threaddata[nth].g_end=m-1;
|
|
||||||
}
|
|
||||||
ci=ci+Nparm;
|
|
||||||
pthread_create(&th_array[nth],&attr,cpu_calc_deriv,(void*)(&threaddata[nth]));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* now wait for threads to finish */
|
|
||||||
for(nth1=0; nth1<nth; nth1++) {
|
|
||||||
pthread_join(th_array[nth1],NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pthread_attr_destroy(&attr);
|
|
||||||
|
|
||||||
free(th_array);
|
|
||||||
free(threaddata);
|
|
||||||
|
|
||||||
free(x);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* use algorithm 9.1 to compute pk=Hk gk */
|
/* use algorithm 9.1 to compute pk=Hk gk */
|
||||||
/* pk,gk: size m x 1
|
/* pk,gk: size m x 1
|
||||||
s, y: size mM x 1
|
s, y: size mM x 1
|
||||||
|
@ -381,24 +105,18 @@ mult_hessian(int m, double *pk, double *gk, double *s, double *y, double *rho, i
|
||||||
|
|
||||||
/* cubic interpolation in interval [a,b] (a>b is possible)
|
/* cubic interpolation in interval [a,b] (a>b is possible)
|
||||||
to find step that minimizes cost function */
|
to find step that minimizes cost function */
|
||||||
/* func: vector function
|
/* func: scalar function
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
a/b: interval for interpolation
|
a/b: interval for interpolation
|
||||||
x: size n x 1 (storage)
|
|
||||||
xp: size m x 1 (storage)
|
xp: size m x 1 (storage)
|
||||||
xo: observed data size n x 1
|
|
||||||
n: size of vector function
|
|
||||||
step: step size for differencing
|
step: step size for differencing
|
||||||
adata: additional data passed to the function
|
adata: additional data passed to the function
|
||||||
*/
|
*/
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static double
|
static double
|
||||||
cubic_interp(
|
cubic_interp(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double (*func)(double *p, int m, void *adata),
|
||||||
double *xk, double *pk, double a, double b, double *x, double *xp, double *xo, int m, int n, double step, void *adata) {
|
double *xk, double *pk, double a, double b, double *xp, int m, double step, void *adata) {
|
||||||
|
|
||||||
double f0,f1,f0d,f1d; /* function values and derivatives at a,b */
|
double f0,f1,f0d,f1d; /* function values and derivatives at a,b */
|
||||||
double p01,p02,z0,fz0;
|
double p01,p02,z0,fz0;
|
||||||
|
@ -406,38 +124,24 @@ cubic_interp(
|
||||||
|
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,a,xp); /* xp<=xp+(a)*pk */
|
my_daxpy(m,pk,a,xp); /* xp<=xp+(a)*pk */
|
||||||
func(xp,x,m,n,adata);
|
f0=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
f0=my_dnrm2(n,x);
|
|
||||||
f0*=f0;
|
|
||||||
/* grad(phi_0): evaluate at -step and +step */
|
/* grad(phi_0): evaluate at -step and +step */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(a+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(a+step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p01=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p01=my_dnrm2(n,x);
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(a-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(a-step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p02=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
f0d=(p01-p02)/(2.0*step);
|
||||||
p02=my_dnrm2(n,x);
|
|
||||||
f0d=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
|
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp=xk+(a-step)*pk */
|
//FIXME my_dcopy(m,xk,1,xp,1); /* not necessary because xp=xk+(a-step)*pk */
|
||||||
//my_daxpy(m,pk,b,xp); /* xp<=xp+(b)*pk */
|
//FIXME my_daxpy(m,pk,b,xp); /* xp<=xp+(b)*pk */
|
||||||
my_daxpy(m,pk,-a+step+b,xp); /* xp<=xp+(b)*pk */
|
my_daxpy(m,pk,-a+step+b,xp); /* xp<=xp+(b)*pk */
|
||||||
func(xp,x,m,n,adata);
|
f1=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
f1=my_dnrm2(n,x);
|
|
||||||
f1*=f1;
|
|
||||||
/* grad(phi_1): evaluate at -step and +step */
|
/* grad(phi_1): evaluate at -step and +step */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(b+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(b+step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p01=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p01=my_dnrm2(n,x);
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(b-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(b-step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p02=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
f1d=(p01-p02)/(2.0*step);
|
||||||
p02=my_dnrm2(n,x);
|
|
||||||
f1d=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
|
|
||||||
|
|
||||||
//printf("Interp a,f(a),f'(a): (%lf,%lf,%lf) (%lf,%lf,%lf)\n",a,f0,f0d,b,f1,f1d);
|
//printf("Interp a,f(a),f'(a): (%lf,%lf,%lf) (%lf,%lf,%lf)\n",a,f0,f0d,b,f1,f1d);
|
||||||
|
@ -459,13 +163,10 @@ cubic_interp(
|
||||||
fz0=f0+f1;
|
fz0=f0+f1;
|
||||||
} else {
|
} else {
|
||||||
/* evaluate function for this root */
|
/* evaluate function for this root */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp=xk+(b-step)*pk */
|
//FIXME my_dcopy(m,xk,1,xp,1); /* not necessary because xp=xk+(b-step)*pk */
|
||||||
//my_daxpy(m,pk,a+z0*(b-a),xp); /* xp<=xp+(a+z0(b-a))*pk */
|
//my_daxpy(m,pk,a+z0*(b-a),xp); /* xp<=xp+(z0)*pk */
|
||||||
my_daxpy(m,pk,-b+step+a+z0*(b-a),xp); /* xp<=xp+(a+z0(b-a))*pk */
|
my_daxpy(m,pk,-b+step+a+z0*(b-a),xp); /* xp<=xp+(a+z0*(b-a))*pk */
|
||||||
func(xp,x,m,n,adata);
|
fz0=func(xp,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
fz0=my_dnrm2(n,x);
|
|
||||||
fz0*=fz0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* now choose between f0,f1,fz0,fz1 */
|
/* now choose between f0,f1,fz0,fz1 */
|
||||||
|
@ -496,26 +197,21 @@ cubic_interp(
|
||||||
|
|
||||||
/*************** Fletcher line search **********************************/
|
/*************** Fletcher line search **********************************/
|
||||||
/* zoom function for line search */
|
/* zoom function for line search */
|
||||||
/* func: vector function
|
/* func: scalar function
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
a/b: bracket interval [a,b] (a>b) is possible
|
a/b: bracket interval [a,b] (a>b) is possible
|
||||||
x: size n x 1 (storage)
|
|
||||||
xp: size m x 1 (storage)
|
xp: size m x 1 (storage)
|
||||||
phi_0: phi(0)
|
phi_0: phi(0)
|
||||||
gphi_0: grad(phi(0))
|
gphi_0: grad(phi(0))
|
||||||
xo: observed data size n x 1
|
sigma,rho,t1,t2,t3: line search parameters (from Fletcher)
|
||||||
n: size of vector function
|
|
||||||
step: step size for differencing
|
step: step size for differencing
|
||||||
adata: additional data passed to the function
|
adata: additional data passed to the function
|
||||||
*/
|
*/
|
||||||
#ifdef USE_MIC
|
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static double
|
static double
|
||||||
linesearch_zoom(
|
linesearch_zoom(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double (*func)(double *p, int m, void *adata),
|
||||||
double *xk, double *pk, double a, double b, double *x, double *xp, double phi_0, double gphi_0, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata) {
|
double *xk, double *pk, double a, double b, double *xp, double phi_0, double gphi_0, double sigma, double rho, double t1, double t2, double t3, int m, double step, void *adata) {
|
||||||
|
|
||||||
double alphaj,phi_j,phi_aj;
|
double alphaj,phi_j,phi_aj;
|
||||||
double gphi_j,p01,p02,aj,bj;
|
double gphi_j,p01,p02,aj,bj;
|
||||||
|
@ -529,46 +225,31 @@ linesearch_zoom(
|
||||||
/* choose alphaj from [a+t2(b-a),b-t3(b-a)] */
|
/* choose alphaj from [a+t2(b-a),b-t3(b-a)] */
|
||||||
p01=aj+t2*(bj-aj);
|
p01=aj+t2*(bj-aj);
|
||||||
p02=bj-t3*(bj-aj);
|
p02=bj-t3*(bj-aj);
|
||||||
alphaj=cubic_interp(func,xk,pk,p01,p02,x,xp,xo,m,n,step,adata);
|
alphaj=cubic_interp(func,xk,pk,p01,p02,xp,m,step,adata);
|
||||||
//printf("cubic intep [%lf,%lf]->%lf\n",p01,p02,alphaj);
|
//printf("cubic intep [%lf,%lf]->%lf\n",p01,p02,alphaj);
|
||||||
|
|
||||||
/* evaluate phi(alphaj) */
|
/* evaluate phi(alphaj) */
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,alphaj,xp); /* xp<=xp+(alphaj)*pk */
|
my_daxpy(m,pk,alphaj,xp); /* xp<=xp+(alphaj)*pk */
|
||||||
func(xp,x,m,n,adata);
|
phi_j=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
phi_j=my_dnrm2(n,x);
|
|
||||||
phi_j*=phi_j;
|
|
||||||
|
|
||||||
/* evaluate phi(aj) */
|
/* evaluate phi(aj) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp = xk+alphaj*pk */
|
//FIXME my_dcopy(m,xk,1,xp,1); /* xp<=xk : not necessary because already copied */
|
||||||
//my_daxpy(m,pk,aj,xp); /* xp<=xp+(aj)*pk */
|
//FIXME my_daxpy(m,pk,aj,xp); /* xp<=xp+(aj)*pk */
|
||||||
my_daxpy(m,pk,-alphaj+aj,xp); /* xp<=xp+(aj)*pk */
|
my_daxpy(m,pk,-alphaj+aj,xp); /* xp<=xp+(aj)*pk */
|
||||||
func(xp,x,m,n,adata);
|
phi_aj=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
phi_aj=my_dnrm2(n,x);
|
|
||||||
phi_aj*=phi_aj;
|
|
||||||
|
|
||||||
|
|
||||||
if ((phi_j>phi_0+rho*alphaj*gphi_0) || phi_j>=phi_aj) {
|
if ((phi_j>phi_0+rho*alphaj*gphi_0) || phi_j>=phi_aj) {
|
||||||
bj=alphaj; /* aj unchanged */
|
bj=alphaj; /* aj unchanged */
|
||||||
} else {
|
} else {
|
||||||
/* evaluate grad(alphaj) */
|
/* evaluate grad(alphaj) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* xp<=xk: NOT NEEDED because already xp = xk+aj*pk */
|
//FIXME my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
//my_daxpy(m,pk,alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
//FIXME my_daxpy(m,pk,alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
||||||
my_daxpy(m,pk,-aj+alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
my_daxpy(m,pk,-aj+alphaj+step,xp); /* xp<=xp+(alphaj+step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p01=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p01=my_dnrm2(n,x);
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphaj-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphaj-step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p02=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
gphi_j=(p01-p02)/(2.0*step);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p02=my_dnrm2(n,x);
|
|
||||||
gphi_j=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
|
|
||||||
/* termination due to roundoff/other errors pp. 38, Fletcher */
|
/* termination due to roundoff/other errors pp. 38, Fletcher */
|
||||||
if ((aj-alphaj)*gphi_j<=step) {
|
if ((aj-alphaj)*gphi_j<=step) {
|
||||||
|
@ -605,29 +286,25 @@ linesearch_zoom(
|
||||||
|
|
||||||
|
|
||||||
/* line search */
|
/* line search */
|
||||||
/* func: vector function
|
/* func: scalar function
|
||||||
xk: parameter values size m x 1 (at which step is calculated)
|
xk: parameter values size m x 1 (at which step is calculated)
|
||||||
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
pk: step direction size m x 1 (x(k+1)=x(k)+alphak * pk)
|
||||||
alpha1: initial value for step
|
alpha1: initial value for step
|
||||||
sigma,rho,t1,t2,t3: line search parameters (from Fletcher)
|
sigma,rho,t1,t2,t3: line search parameters (from Fletcher)
|
||||||
xo: observed data size n x 1
|
m: size or parameter vector
|
||||||
n: size of vector function
|
|
||||||
step: step size for differencing
|
step: step size for differencing
|
||||||
adata: additional data passed to the function
|
adata: additional data passed to the function
|
||||||
*/
|
*/
|
||||||
#ifdef USE_MIC
|
double
|
||||||
__attribute__ ((target(MIC)))
|
|
||||||
#endif
|
|
||||||
static double
|
|
||||||
linesearch(
|
linesearch(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double (*func)(double *p, int m, void *adata),
|
||||||
double *xk, double *pk, double alpha1, double sigma, double rho, double t1, double t2, double t3, double *xo, int m, int n, double step, void *adata) {
|
double *xk, double *pk, double alpha1, double sigma, double rho, double t1, double t2, double t3, int m, double step, void *adata) {
|
||||||
|
|
||||||
/* phi(alpha)=f(xk+alpha pk)
|
/* phi(alpha)=f(xk+alpha pk)
|
||||||
for vector function func
|
for vector function func
|
||||||
f(xk) =||func(xk)||^2 */
|
f(xk) =||func(xk)||^2 */
|
||||||
|
|
||||||
double *x,*xp;
|
double *xp;
|
||||||
double alphai,alphai1;
|
double alphai,alphai1;
|
||||||
double phi_0,phi_alphai,phi_alphai1;
|
double phi_0,phi_alphai,phi_alphai1;
|
||||||
double p01,p02;
|
double p01,p02;
|
||||||
|
@ -639,54 +316,36 @@ linesearch(
|
||||||
|
|
||||||
int ci;
|
int ci;
|
||||||
|
|
||||||
if ((x=(double*)calloc((size_t)n,sizeof(double)))==0) {
|
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
|
||||||
#endif
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if ((xp=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
if ((xp=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
alphak=1.0;
|
alphak=1.0;
|
||||||
/* evaluate phi_0 and grad(phi_0) */
|
/* evaluate phi_0 and grad(phi_0) */
|
||||||
func(xk,x,m,n,adata);
|
phi_0=func(xk,m,adata);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
phi_0=my_dnrm2(n,x);
|
|
||||||
phi_0*=phi_0;
|
|
||||||
/* select tolarance 1/100 of current function value */
|
/* select tolarance 1/100 of current function value */
|
||||||
tol=MIN(0.01*phi_0,1e-6);
|
tol=MIN(0.01*phi_0,1e-6);
|
||||||
|
|
||||||
/* grad(phi_0): evaluate at -step and +step */
|
/* grad(phi_0): evaluate at -step and +step */
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(0.0+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(0.0+step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p01=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p01=my_dnrm2(n,x);
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(0.0-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(0.0-step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p02=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
gphi_0=(p01-p02)/(2.0*step);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p02=my_dnrm2(n,x);
|
|
||||||
gphi_0=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
|
|
||||||
/* estimate for mu */
|
/* estimate for mu */
|
||||||
/* mu = (tol-phi_0)/(rho gphi_0) */
|
/* mu = (tol-phi_0)/(rho gphi_0) */
|
||||||
mu=(tol-phi_0)/(rho*gphi_0);
|
mu=(tol-phi_0)/(rho*gphi_0);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("mu=%lf, alpha1=%lf\n",mu,alpha1);
|
printf("deltaphi=%lf, mu=%lf, alpha1=%lf\n",gphi_0,mu,alpha1);
|
||||||
#endif
|
#endif
|
||||||
/* catch if not finite (deltaphi=0 or nan) */
|
/* catch if not finite (deltaphi=0 or nan) */
|
||||||
if (!isnormal(mu)) {
|
if (!isnormal(mu)) {
|
||||||
free(x);
|
|
||||||
free(xp);
|
free(xp);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("line interval too small\n");
|
printf("line interval too small\n");
|
||||||
#endif
|
#endif
|
||||||
return mu;
|
return mu;
|
||||||
}
|
}
|
||||||
|
@ -699,11 +358,7 @@ linesearch(
|
||||||
/* evalualte phi(alpha(i))=f(xk+alphai pk) */
|
/* evalualte phi(alpha(i))=f(xk+alphai pk) */
|
||||||
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
my_dcopy(m,xk,1,xp,1); /* xp<=xk */
|
||||||
my_daxpy(m,pk,alphai,xp); /* xp<=xp+alphai*pk */
|
my_daxpy(m,pk,alphai,xp); /* xp<=xp+alphai*pk */
|
||||||
func(xp,x,m,n,adata);
|
phi_alphai=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
phi_alphai=my_dnrm2(n,x);
|
|
||||||
phi_alphai*=phi_alphai;
|
|
||||||
|
|
||||||
if (phi_alphai<tol) {
|
if (phi_alphai<tol) {
|
||||||
alphak=alphai;
|
alphak=alphai;
|
||||||
|
@ -715,7 +370,7 @@ linesearch(
|
||||||
|
|
||||||
if ((phi_alphai>phi_0+alphai*gphi_0) || (ci>1 && phi_alphai>=phi_alphai1)) {
|
if ((phi_alphai>phi_0+alphai*gphi_0) || (ci>1 && phi_alphai>=phi_alphai1)) {
|
||||||
/* ai=alphai1, bi=alphai bracket */
|
/* ai=alphai1, bi=alphai bracket */
|
||||||
alphak=linesearch_zoom(func,xk,pk,alphai1,alphai,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata);
|
alphak=linesearch_zoom(func,xk,pk,alphai1,alphai,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,m,step,adata);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Linesearch : Condition 1 met\n");
|
printf("Linesearch : Condition 1 met\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -723,19 +378,14 @@ linesearch(
|
||||||
}
|
}
|
||||||
|
|
||||||
/* evaluate grad(phi(alpha(i))) */
|
/* evaluate grad(phi(alpha(i))) */
|
||||||
//my_dcopy(m,xk,1,xp,1); /* NOT NEEDED here because already xp=xk+alphai*pk */
|
//FIXME my_dcopy(m,xk,1,xp,1); /* NOT NEEDED here?? xp<=xk */
|
||||||
//my_daxpy(m,pk,alphai+step,xp); /* xp<=xp+(alphai+step)*pk */
|
//FIXME my_daxpy(m,pk,alphai+step,xp); /* xp<=xp+(alphai+step)*pk */
|
||||||
|
/* note that xp already is xk+alphai. pk, so only add the missing term */
|
||||||
my_daxpy(m,pk,step,xp); /* xp<=xp+(alphai+step)*pk */
|
my_daxpy(m,pk,step,xp); /* xp<=xp+(alphai+step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p01=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p01=my_dnrm2(n,x);
|
|
||||||
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphai-step)*pk */
|
my_daxpy(m,pk,-2.0*step,xp); /* xp<=xp+(alphai-step)*pk */
|
||||||
func(xp,x,m,n,adata);
|
p02=func(xp,m,adata);
|
||||||
/* calculate x<=x-xo */
|
gphi_i=(p01-p02)/(2.0*step);
|
||||||
my_daxpy(n,xo,-1.0,x);
|
|
||||||
p02=my_dnrm2(n,x);
|
|
||||||
gphi_i=(p01*p01-p02*p02)/(2.0*step);
|
|
||||||
|
|
||||||
if (fabs(gphi_i)<=-sigma*gphi_0) {
|
if (fabs(gphi_i)<=-sigma*gphi_0) {
|
||||||
alphak=alphai;
|
alphak=alphai;
|
||||||
|
@ -747,7 +397,7 @@ linesearch(
|
||||||
|
|
||||||
if (gphi_i>=0) {
|
if (gphi_i>=0) {
|
||||||
/* ai=alphai, bi=alphai1 bracket */
|
/* ai=alphai, bi=alphai1 bracket */
|
||||||
alphak=linesearch_zoom(func,xk,pk,alphai,alphai1,x,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,xo,m,n,step,adata);
|
alphak=linesearch_zoom(func,xk,pk,alphai,alphai1,xp,phi_0,gphi_0,sigma,rho,t1,t2,t3,m,step,adata);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Linesearch : Condition 3 met\n");
|
printf("Linesearch : Condition 3 met\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -763,7 +413,7 @@ linesearch(
|
||||||
/* choose by interpolation in [2*alphai-alphai1,min(mu,alphai+t1*(alphai-alphai1)] */
|
/* choose by interpolation in [2*alphai-alphai1,min(mu,alphai+t1*(alphai-alphai1)] */
|
||||||
p01=2.0*alphai-alphai1;
|
p01=2.0*alphai-alphai1;
|
||||||
p02=MIN(mu,alphai+t1*(alphai-alphai1));
|
p02=MIN(mu,alphai+t1*(alphai-alphai1));
|
||||||
alphai=cubic_interp(func,xk,pk,p01,p02,x,xp,xo,m,n,step,adata);
|
alphai=cubic_interp(func,xk,pk,p01,p02,xp,m,step,adata);
|
||||||
//printf("cubic interp [%lf,%lf]->%lf\n",p01,p02,alphai);
|
//printf("cubic interp [%lf,%lf]->%lf\n",p01,p02,alphai);
|
||||||
}
|
}
|
||||||
phi_alphai1=phi_alphai;
|
phi_alphai1=phi_alphai;
|
||||||
|
@ -773,7 +423,6 @@ linesearch(
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
free(x);
|
|
||||||
free(xp);
|
free(xp);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("Step size=%g\n",alphak);
|
printf("Step size=%g\n",alphak);
|
||||||
|
@ -782,10 +431,26 @@ linesearch(
|
||||||
}
|
}
|
||||||
/*************** END Fletcher line search **********************************/
|
/*************** END Fletcher line search **********************************/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* cost function : return a scalar cost, input : p (mx1) parameters, m: no. of params, adata: additional data
|
||||||
|
grad function: return gradient (mx1): input : p (mx1) parameters, g (mx1) gradient vector, m: no. of params, adata: additional data
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
p: parameters m x 1 (used as initial value, output final value)
|
||||||
|
itmax: max iterations
|
||||||
|
M: BFGS memory size
|
||||||
|
adata: additional data
|
||||||
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
lbfgs_fit(
|
lbfgs_fit(
|
||||||
void (*func)(double *p, double *hx, int m, int n, void *adata),
|
double (*cost_func)(double *p, int m, void *adata),
|
||||||
double *p, double *x, int m, int n, int itmax, int M, int gpu_threads, void *adata) {
|
void (*grad_func)(double *p, double *g, int m, void *adata),
|
||||||
|
/* the following is the mini batch function, called per eath iteration
|
||||||
|
to change the input/output data used for the next iteration, if =NULL, full batch mode */
|
||||||
|
void (*change_batch)(int iter, void *adata), /* iter= iteration number */
|
||||||
|
double *p, int m, int itmax, int M, void *adata) {
|
||||||
|
|
||||||
double *gk; /* gradients at both k+1 and k iter */
|
double *gk; /* gradients at both k+1 and k iter */
|
||||||
double *xk1,*xk; /* parameters at k+1 and k iter */
|
double *xk1,*xk; /* parameters at k+1 and k iter */
|
||||||
|
@ -799,56 +464,45 @@ lbfgs_fit(
|
||||||
|
|
||||||
|
|
||||||
if ((gk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
if ((gk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((xk1=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
if ((xk1=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((xk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
if ((xk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((pk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
if ((pk=(double*)calloc((size_t)m,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* storage size mM x 1*/
|
/* storage size mM x 1*/
|
||||||
if ((s=(double*)calloc((size_t)m*M,sizeof(double)))==0) {
|
if ((s=(double*)calloc((size_t)m*M,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((y=(double*)calloc((size_t)m*M,sizeof(double)))==0) {
|
if ((y=(double*)calloc((size_t)m*M,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if ((rho=(double*)calloc((size_t)M,sizeof(double)))==0) {
|
if ((rho=(double*)calloc((size_t)M,sizeof(double)))==0) {
|
||||||
#ifndef USE_MIC
|
|
||||||
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
fprintf(stderr,"%s: %d: no free memory\n",__FILE__,__LINE__);
|
||||||
#endif
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initial value for params xk=p */
|
/* initial value for params xk=p */
|
||||||
my_dcopy(m,p,1,xk,1);
|
my_dcopy(m,p,1,xk,1);
|
||||||
|
if (change_batch) {
|
||||||
|
change_batch(0,adata);
|
||||||
|
}
|
||||||
/* gradient gk=grad(f)_k */
|
/* gradient gk=grad(f)_k */
|
||||||
func_grad(func,xk,gk,x,m,n,step,adata);
|
grad_func(xk,gk,m,adata);
|
||||||
double gradnrm=my_dnrm2(m,gk);
|
double gradnrm=my_dnrm2(m,gk);
|
||||||
/* if gradient is too small, no need to solve, so stop */
|
/* if gradient is too small, no need to solve, so stop */
|
||||||
if (gradnrm<CLM_STOP_THRESH) {
|
if (gradnrm<CLM_STOP_THRESH) {
|
||||||
|
@ -877,15 +531,14 @@ lbfgs_fit(
|
||||||
|
|
||||||
/* linesearch to find step length */
|
/* linesearch to find step length */
|
||||||
/* parameters alpha1=10.0,sigma=0.1, rho=0.01, t1=9, t2=0.1, t3=0.5 */
|
/* parameters alpha1=10.0,sigma=0.1, rho=0.01, t1=9, t2=0.1, t3=0.5 */
|
||||||
alphak=linesearch(func,xk,pk,10.0,0.1,0.01,9,0.1,0.5,x,m,n,step,adata);
|
alphak=linesearch(cost_func,xk,pk,10.0,0.1,0.01,9,0.1,0.5,m,step,adata);
|
||||||
/* parameters c1=1e-4 c2=0.9, alpha1=1.0, alphamax=10.0, step (for alpha)=1e-4*/
|
|
||||||
//alphak=linesearch_nw(func,xk,pk,1.0,10.0,1e-4,0.9,x,m,n,1e-4,adata);
|
/* Armijo line search */
|
||||||
//alphak=1.0;
|
///alphak=linesearch_backtrack(cost_func,xk,pk,gk,m,1.0,adata);
|
||||||
/* check if step size is too small, or nan, then stop */
|
/* check if step size is too small, or nan, then stop */
|
||||||
if (!isnormal(alphak) || fabs(alphak)<CLM_EPSILON) {
|
if (!isnormal(alphak) || fabs(alphak)<CLM_EPSILON) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update parameters xk1=xk+alpha_k *pk */
|
/* update parameters xk1=xk+alpha_k *pk */
|
||||||
my_dcopy(m,xk,1,xk1,1);
|
my_dcopy(m,xk,1,xk1,1);
|
||||||
my_daxpy(m,pk,alphak,xk1);
|
my_daxpy(m,pk,alphak,xk1);
|
||||||
|
@ -900,7 +553,12 @@ lbfgs_fit(
|
||||||
my_dscal(m,-1.0,&y[cm]);
|
my_dscal(m,-1.0,&y[cm]);
|
||||||
|
|
||||||
/* update gradient */
|
/* update gradient */
|
||||||
func_grad(func,xk1,gk,x,m,n,step,adata);
|
if (change_batch) {
|
||||||
|
change_batch(ck+1,adata);
|
||||||
|
}
|
||||||
|
|
||||||
|
grad_func(xk1,gk,m,adata);
|
||||||
|
gradnrm=my_dnrm2(m,gk);
|
||||||
/* yk=yk+gk1 */
|
/* yk=yk+gk1 */
|
||||||
my_daxpy(m,gk,1.0,&y[cm]);
|
my_daxpy(m,gk,1.0,&y[cm]);
|
||||||
|
|
||||||
|
@ -920,15 +578,21 @@ lbfgs_fit(
|
||||||
} else {
|
} else {
|
||||||
cm=ci=0;
|
cm=ci=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
printf("iter %d alpha=%g ||grad||=%g\n",ck,alphak,gradnrm);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* copy back solution to p */
|
/* copy back solution to p */
|
||||||
my_dcopy(m,xk,1,p,1);
|
my_dcopy(m,xk,1,p,1);
|
||||||
|
|
||||||
/* for (ci=0; ci<m; ci++) {
|
#ifdef DEBUG
|
||||||
|
for (ci=0; ci<m; ci++) {
|
||||||
printf("grad %d=%lf\n",ci,gk[ci]);
|
printf("grad %d=%lf\n",ci,gk[ci]);
|
||||||
} */
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
free(gk);
|
free(gk);
|
||||||
free(xk1);
|
free(xk1);
|
||||||
|
|
|
@ -331,9 +331,9 @@ predict_threadfn_withgain_full(void *data) {
|
||||||
|
|
||||||
/* minimization function (multithreaded) */
|
/* minimization function (multithreaded) */
|
||||||
/* p: size mx1 parameters
|
/* p: size mx1 parameters
|
||||||
x: size nx1 data calculated
|
x: size nx1 model being calculated
|
||||||
data: extra info needed */
|
data: extra info needed */
|
||||||
static void
|
void
|
||||||
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data) {
|
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data) {
|
||||||
|
|
||||||
me_data_t *dp=(me_data_t*)data;
|
me_data_t *dp=(me_data_t*)data;
|
||||||
|
@ -985,10 +985,10 @@ sagefit_visibilities_dual_pt(double *u, double *v, double *w, double *x, int N,
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==2) {
|
if (solver_mode==2) {
|
||||||
lmdata0.robust_nu=robust_nu0;
|
lmdata0.robust_nu=robust_nu0;
|
||||||
lbfgs_fit_robust_cuda(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
lbfgs_fit_robust_cuda(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
||||||
/* also print robust nu to output */
|
/* also print robust nu to output */
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
lbfgs_fit(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1380,9 +1380,9 @@ sagefit_visibilities_dual_pt_one_gpu(double *u, double *v, double *w, double *x,
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==2 || solver_mode==3) {
|
if (solver_mode==2 || solver_mode==3) {
|
||||||
lmdata.robust_nu=robust_nu0;
|
lmdata.robust_nu=robust_nu0;
|
||||||
lbfgs_fit_robust_cuda(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_robust_cuda(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* final residual calculation */
|
/* final residual calculation */
|
||||||
|
@ -1457,9 +1457,9 @@ bfgsfit_visibilities_gpu(double *u, double *v, double *w, double *x, int N,
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==2 || solver_mode==3) {
|
if (solver_mode==2 || solver_mode==3) {
|
||||||
lmdata.robust_nu=mean_nu;
|
lmdata.robust_nu=mean_nu;
|
||||||
lbfgs_fit_robust_cuda(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_robust_cuda(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* final residual calculation */
|
/* final residual calculation */
|
||||||
|
@ -2145,13 +2145,13 @@ printf("1: %lf -> %lf\n\n\n",info0[0],info0[1]);
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==SM_RLM_RLBFGS || solver_mode==SM_OSLM_OSRLM_RLBFGS || solver_mode==SM_RTR_OSRLM_RLBFGS || solver_mode==SM_NSD_RLBFGS) {
|
if (solver_mode==SM_RLM_RLBFGS || solver_mode==SM_OSLM_OSRLM_RLBFGS || solver_mode==SM_RTR_OSRLM_RLBFGS || solver_mode==SM_NSD_RLBFGS) {
|
||||||
lmdata0.robust_nu=robust_nu0;
|
lmdata0.robust_nu=robust_nu0;
|
||||||
lbfgs_fit_robust_cuda(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
lbfgs_fit_robust_cuda(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
lbfgs_fit(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* final residual calculation */
|
/* final residual calculation: FIXME: possible GPU accel here? */
|
||||||
minimize_viz_full_pth(p, xsub, m, n, (void*)&lmdata0);
|
minimize_viz_full_pth(p, xsub, m, n, (void*)&lmdata0);
|
||||||
my_daxpy(n, xsub, -1.0, x);
|
my_daxpy(n, xsub, -1.0, x);
|
||||||
|
|
||||||
|
|
|
@ -688,7 +688,7 @@ predict_threadfn_withgain_full(void *data) {
|
||||||
#ifdef USE_MIC
|
#ifdef USE_MIC
|
||||||
__attribute__ ((target(MIC)))
|
__attribute__ ((target(MIC)))
|
||||||
#endif
|
#endif
|
||||||
static void
|
void
|
||||||
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data) {
|
minimize_viz_full_pth(double *p, double *x, int m, int n, void *data) {
|
||||||
|
|
||||||
me_data_t *dp=(me_data_t*)data;
|
me_data_t *dp=(me_data_t*)data;
|
||||||
|
@ -1023,9 +1023,9 @@ printf("residual init=%lf final=%lf\n\n",init_res,final_res);
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==SM_OSLM_OSRLM_RLBFGS || solver_mode==SM_RLM_RLBFGS || solver_mode==SM_RTR_OSRLM_RLBFGS || solver_mode==SM_NSD_RLBFGS) {
|
if (solver_mode==SM_OSLM_OSRLM_RLBFGS || solver_mode==SM_RLM_RLBFGS || solver_mode==SM_RTR_OSRLM_RLBFGS || solver_mode==SM_NSD_RLBFGS) {
|
||||||
lmdata.robust_nu=robust_nu0;
|
lmdata.robust_nu=robust_nu0;
|
||||||
lbfgs_fit_robust(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_robust_wrapper(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_wrapper(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
}
|
}
|
||||||
#ifdef USE_MIC
|
#ifdef USE_MIC
|
||||||
lmdata.Nt=Nt; /* reset threads for MIC */
|
lmdata.Nt=Nt; /* reset threads for MIC */
|
||||||
|
@ -1181,9 +1181,9 @@ bfgsfit_visibilities(double *u, double *v, double *w, double *x, int N,
|
||||||
/* use LBFGS */
|
/* use LBFGS */
|
||||||
if (solver_mode==2 || solver_mode==3) {
|
if (solver_mode==2 || solver_mode==3) {
|
||||||
lmdata.robust_nu=mean_nu;
|
lmdata.robust_nu=mean_nu;
|
||||||
lbfgs_fit_robust(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_robust_wrapper(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
} else {
|
} else {
|
||||||
lbfgs_fit(minimize_viz_full_pth, p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
lbfgs_fit_wrapper(p, x, m, n, max_lbfgs, lbfgs_m, gpu_threads, (void*)&lmdata);
|
||||||
}
|
}
|
||||||
#ifdef USE_MIC
|
#ifdef USE_MIC
|
||||||
lmdata.Nt=Nt; /* reset threads for MIC */
|
lmdata.Nt=Nt; /* reset threads for MIC */
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue