00001
00002
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <math.h>
00012 #include <sys/resource.h>
00013 #include <time.h>
00014 #include <complex.h>
00015
00016 #include "nfft3.h"
00017 #include "util.h"
00018
00027 #define DGT_PRE_CEXP (1U<< 0)
00028
00038 #define FGT_NDFT (1U<< 1)
00039
00048 #define FGT_APPROX_B (1U<< 2)
00049
00051 typedef struct
00052 {
00053 int N;
00054 int M;
00056 double complex *alpha;
00057 double complex *f;
00059 unsigned flags;
00062 double complex sigma;
00064 double *x;
00065 double *y;
00067 double complex *pre_cexp;
00069 int n;
00070 double p;
00072 double complex *b;
00074 nfft_plan *nplan1;
00075 nfft_plan *nplan2;
00077 } fgt_plan;
00078
00086 void dgt_trafo(fgt_plan *ths)
00087 {
00088 int j,k,l;
00089
00090 for(j=0; j<ths->M; j++)
00091 ths->f[j] = 0;
00092
00093 if(ths->flags & DGT_PRE_CEXP)
00094 for(j=0,l=0; j<ths->M; j++)
00095 for(k=0; k<ths->N; k++,l++)
00096 ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00097 else
00098 for(j=0; j<ths->M; j++)
00099 for(k=0; k<ths->N; k++)
00100 ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00101 (ths->y[j]-ths->x[k]));
00102 }
00103
00111 void fgt_trafo(fgt_plan *ths)
00112 {
00113 int l;
00114
00115 if(ths->flags & FGT_NDFT)
00116 {
00117 ndft_adjoint(ths->nplan1);
00118
00119 for(l=0; l<ths->n; l++)
00120 ths->nplan1->f_hat[l] *= ths->b[l];
00121
00122 ndft_trafo(ths->nplan2);
00123 }
00124 else
00125 {
00126 nfft_adjoint(ths->nplan1);
00127
00128 for(l=0; l<ths->n; l++)
00129 ths->nplan1->f_hat[l] *= ths->b[l];
00130
00131 nfft_trafo(ths->nplan2);
00132 }
00133 }
00134
00149 void fgt_init_guru(fgt_plan *ths, int N, int M, double complex sigma, int n,
00150 double p, int m, unsigned flags)
00151 {
00152 int j,n_fftw;
00153 fftw_plan fplan;
00154
00155 ths->M = M;
00156 ths->N = N;
00157 ths->sigma = sigma;
00158 ths->flags = flags;
00159
00160 ths->x = (double*)fftw_malloc(ths->N*sizeof(double));
00161 ths->y = (double*)fftw_malloc(ths->M*sizeof(double));
00162 ths->alpha = (double complex*)fftw_malloc(ths->N*sizeof(double complex));
00163 ths->f = (double complex*)fftw_malloc(ths->M*sizeof(double complex));
00164
00165 ths->n = n;
00166 ths->p = p;
00167
00168 ths->b = (double complex*)fftw_malloc(ths->n*sizeof(double complex));
00169
00170 ths->nplan1 = (nfft_plan*) fftw_malloc(sizeof(nfft_plan));
00171 ths->nplan2 = (nfft_plan*) fftw_malloc(sizeof(nfft_plan));
00172
00173 n_fftw=nfft_next_power_of_2(2*ths->n);
00174
00175 nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00176 PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00177 nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00178 PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00179
00180 ths->nplan1->f = ths->alpha;
00181 ths->nplan2->f_hat = ths->nplan1->f_hat;
00182 ths->nplan2->f = ths->f;
00183
00184 if(ths->flags & FGT_APPROX_B)
00185 {
00186 fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00187 FFTW_MEASURE);
00188
00189 for(j=0; j<ths->n; j++)
00190 ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00191 ((double)ths->n*ths->n)) / ths->n;
00192
00193 nfft_fftshift_complex(ths->b, 1, &ths->n);
00194 fftw_execute(fplan);
00195 nfft_fftshift_complex(ths->b, 1, &ths->n);
00196
00197 fftw_destroy_plan(fplan);
00198 }
00199 else
00200 {
00201 for(j=0; j<ths->n; j++)
00202 ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00203 cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00204 (ths->p*ths->p*ths->sigma));
00205 }
00206 }
00207
00219 void fgt_init(fgt_plan *ths, int N, int M, double complex sigma, double eps)
00220 {
00221 double p;
00222 int n;
00223
00224 p=0.5+sqrt(-log(eps)/creal(sigma));
00225 if(p<1)
00226 p=1;
00227
00228 n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00229
00230 if(N*M<=((int)(1U<<20)))
00231 fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00232 else
00233 fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00234 }
00235
00242 void fgt_init_node_dependent(fgt_plan *ths)
00243 {
00244 int j,k,l;
00245
00246 if(ths->flags & DGT_PRE_CEXP)
00247 {
00248 ths->pre_cexp=(double complex*)fftw_malloc(ths->M*ths->N*
00249 sizeof(double complex));
00250
00251 for(j=0,l=0; j<ths->M; j++)
00252 for(k=0; k<ths->N; k++,l++)
00253 ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00254 (ths->y[j]-ths->x[k]));
00255 }
00256
00257 for(j=0; j<ths->nplan1->M_total; j++)
00258 ths->nplan1->x[j] = ths->x[j]/ths->p;
00259 for(j=0; j<ths->nplan2->M_total; j++)
00260 ths->nplan2->x[j] = ths->y[j]/ths->p;
00261
00262 if(ths->nplan1->nfft_flags & PRE_PSI)
00263 nfft_precompute_psi(ths->nplan1);
00264 if(ths->nplan2->nfft_flags & PRE_PSI)
00265 nfft_precompute_psi(ths->nplan2);
00266 }
00267
00274 void fgt_finalize(fgt_plan *ths)
00275 {
00276 nfft_finalize(ths->nplan2);
00277 nfft_finalize(ths->nplan1);
00278
00279 fftw_free(ths->nplan2);
00280 fftw_free(ths->nplan1);
00281
00282 fftw_free(ths->b);
00283
00284 fftw_free(ths->f);
00285 fftw_free(ths->y);
00286
00287 fftw_free(ths->alpha);
00288 fftw_free(ths->x);
00289 }
00290
00297 void fgt_test_init_rand(fgt_plan *ths)
00298 {
00299 int j,k;
00300
00301 for(k=0; k<ths->N; k++)
00302 ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00303
00304 for(j=0; j<ths->M; j++)
00305 ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00306
00307 for(k=0; k<ths->N; k++)
00308 ths->alpha[k] = (double)rand()/(RAND_MAX)-1.0/2.0
00309 + I*(double)rand()/(RAND_MAX)-I/2.0;
00310 }
00311
00320 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00321 {
00322 int r;
00323 double t_out,t;
00324 double tau=0.01;
00325
00326 t_out=0;
00327 r=0;
00328 while(t_out<tau)
00329 {
00330 r++;
00331 if(dgt)
00332 {
00333 t=nfft_second();
00334 dgt_trafo(ths);
00335 }
00336 else
00337 {
00338 t=nfft_second();
00339 fgt_trafo(ths);
00340 }
00341 t_out+=nfft_second()-t;
00342 }
00343 t_out/=r;
00344
00345 return t_out;
00346 }
00347
00357 void fgt_test_simple(int N, int M, double complex sigma, double eps)
00358 {
00359 fgt_plan my_plan;
00360 double complex *swap_dgt;
00361
00362 fgt_init(&my_plan, N, M, sigma, eps);
00363 swap_dgt = (double complex*)fftw_malloc(my_plan.M*sizeof(double complex));
00364
00365 fgt_test_init_rand(&my_plan);
00366 fgt_init_node_dependent(&my_plan);
00367
00368 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00369 dgt_trafo(&my_plan);
00370 nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00371 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00372
00373 fgt_trafo(&my_plan);
00374 nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00375
00376 printf("\n relative error: %1.3e\n", nfft_error_l_infty_1_complex(swap_dgt,
00377 my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00378
00379 fftw_free(swap_dgt);
00380 fgt_finalize(&my_plan);
00381 }
00382
00392 void fgt_test_andersson()
00393 {
00394 fgt_plan my_plan;
00395 double complex *swap_dgt;
00396 int N;
00397
00398 double complex sigma=4*(138+I*100);
00399 int n=128;
00400 int N_dgt_pre_exp=(int)(1U<<11);
00401 int N_dgt=(int)(1U<<19);
00402
00403 printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00404
00405 for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00406 {
00407 printf("$%d$\t & ",N);
00408
00409 if(N<N_dgt_pre_exp)
00410 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00411 else
00412 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00413
00414 swap_dgt = (double complex*)fftw_malloc(my_plan.M*
00415 sizeof(double complex));
00416
00417 fgt_test_init_rand(&my_plan);
00418
00419 fgt_init_node_dependent(&my_plan);
00420
00421 if(N<N_dgt)
00422 {
00423 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00424 if(N<N_dgt_pre_exp)
00425 my_plan.flags^=DGT_PRE_CEXP;
00426
00427 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00428 if(N<N_dgt_pre_exp)
00429 my_plan.flags^=DGT_PRE_CEXP;
00430 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00431 }
00432 else
00433 printf("\t\t & ");
00434
00435 if(N<N_dgt_pre_exp)
00436 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00437 else
00438 printf("\t\t & ");
00439
00440 my_plan.flags^=FGT_NDFT;
00441 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00442 my_plan.flags^=FGT_NDFT;
00443
00444 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00445
00446 printf("$%1.1e$\t \\\\ \n",
00447 nfft_error_l_infty_1_complex(swap_dgt, my_plan.f, my_plan.M,
00448 my_plan.alpha, my_plan.N));
00449 fflush(stdout);
00450
00451 fftw_free(swap_dgt);
00452 fgt_finalize(&my_plan);
00453 fftw_cleanup();
00454 }
00455 }
00456
00463 void fgt_test_error()
00464 {
00465 fgt_plan my_plan;
00466 double complex *swap_dgt;
00467 int n,mi;
00468
00469 double complex sigma=4*(138+I*100);
00470 int N=1000;
00471 int M=1000;
00472 int m[2]={7,3};
00473
00474 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00475 printf("error=[\n");
00476
00477 swap_dgt = (double complex*)fftw_malloc(M*sizeof(double complex));
00478
00479 for(n=8; n<=128; n+=4)
00480 {
00481 printf("%d\t",n);
00482 for(mi=0;mi<2;mi++)
00483 {
00484 fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00485 fgt_test_init_rand(&my_plan);
00486 fgt_init_node_dependent(&my_plan);
00487
00488 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00489 dgt_trafo(&my_plan);
00490 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00491
00492 fgt_trafo(&my_plan);
00493
00494 printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00495 my_plan.M, my_plan.alpha, my_plan.N));
00496 fflush(stdout);
00497
00498 fgt_finalize(&my_plan);
00499 fftw_cleanup();
00500 }
00501 printf("\n");
00502 }
00503 printf("];\n");
00504
00505 fftw_free(swap_dgt);
00506 }
00507
00514 void fgt_test_error_p()
00515 {
00516 fgt_plan my_plan;
00517 double complex *swap_dgt;
00518 int n,pi;
00519
00520 double complex sigma=20+40I;
00521 int N=1000;
00522 int M=1000;
00523 double p[3]={1,1.5,2};
00524
00525 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00526 printf("error=[\n");
00527
00528 swap_dgt = (double complex*)fftw_malloc(M*sizeof(double complex));
00529
00530 for(n=8; n<=128; n+=4)
00531 {
00532 printf("%d\t",n);
00533 for(pi=0;pi<3;pi++)
00534 {
00535 fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00536 fgt_test_init_rand(&my_plan);
00537 fgt_init_node_dependent(&my_plan);
00538
00539 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00540 dgt_trafo(&my_plan);
00541 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00542
00543 fgt_trafo(&my_plan);
00544
00545 printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00546 my_plan.M, my_plan.alpha, my_plan.N));
00547 fflush(stdout);
00548
00549 fgt_finalize(&my_plan);
00550 fftw_cleanup();
00551 }
00552 printf("\n");
00553 }
00554 printf("];\n");
00555 }
00556
00562 int main(int argc,char **argv)
00563 {
00564 if(argc!=2)
00565 {
00566 fprintf(stderr,"fastgauss type\n");
00567 fprintf(stderr," type\n");
00568 fprintf(stderr," 0 - Simple test.\n");
00569 fprintf(stderr," 1 - Compares accuracy and execution time.\n");
00570 fprintf(stderr," Pipe to output_andersson.tex\n");
00571 fprintf(stderr," 2 - Compares accuracy.\n");
00572 fprintf(stderr," Pipe to output_error.m\n");
00573 fprintf(stderr," 3 - Compares accuracy.\n");
00574 fprintf(stderr," Pipe to output_error_p.m\n");
00575 return -1;
00576 }
00577
00578 if(atoi(argv[1])==0)
00579 fgt_test_simple(10, 10, 5+3*I, 0.001);
00580
00581 if(atoi(argv[1])==1)
00582 fgt_test_andersson();
00583
00584 if(atoi(argv[1])==2)
00585 fgt_test_error();
00586
00587 if(atoi(argv[1])==3)
00588 fgt_test_error_p();
00589
00590 return 1;
00591 }
00592