NFFT Logo 3.0 API Reference
Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals

fastgauss.c

00001 /* $Id$ */
00002 
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <math.h>
00012 #include <sys/resource.h>
00013 #include <time.h>
00014 #include <complex.h>
00015 
00016 #include "nfft3.h"
00017 #include "util.h"
00018 
00027 #define DGT_PRE_CEXP     (1U<< 0)
00028 
00038 #define FGT_NDFT         (1U<< 1)
00039 
00048 #define FGT_APPROX_B     (1U<< 2)
00049 
00051 typedef struct
00052 {
00053   int N;                                
00054   int M;                                
00056   double complex *alpha;                
00057   double complex *f;                    
00059   unsigned flags;                       
00062   double complex sigma;                 
00064   double *x;                            
00065   double *y;                            
00067   double complex *pre_cexp;             
00069   int n;                                
00070   double p;                             
00072   double complex *b;                    
00074   nfft_plan *nplan1;                    
00075   nfft_plan *nplan2;                    
00077 } fgt_plan;
00078 
00086 void dgt_trafo(fgt_plan *ths)
00087 {
00088   int j,k,l;
00089   
00090   for(j=0; j<ths->M; j++)
00091     ths->f[j] = 0;
00092   
00093   if(ths->flags & DGT_PRE_CEXP)
00094     for(j=0,l=0; j<ths->M; j++)
00095       for(k=0; k<ths->N; k++,l++)
00096         ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00097   else
00098     for(j=0; j<ths->M; j++)
00099       for(k=0; k<ths->N; k++)
00100         ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00101           (ths->y[j]-ths->x[k]));
00102 }
00103 
00111 void fgt_trafo(fgt_plan *ths)
00112 {
00113   int l;
00114 
00115   if(ths->flags & FGT_NDFT)
00116     {
00117       ndft_adjoint(ths->nplan1);
00118 
00119       for(l=0; l<ths->n; l++)
00120         ths->nplan1->f_hat[l] *= ths->b[l];
00121   
00122       ndft_trafo(ths->nplan2);
00123     }
00124   else
00125     {
00126       nfft_adjoint(ths->nplan1);
00127 
00128       for(l=0; l<ths->n; l++)
00129         ths->nplan1->f_hat[l] *= ths->b[l];
00130   
00131       nfft_trafo(ths->nplan2);
00132     }
00133 }
00134 
00149 void fgt_init_guru(fgt_plan *ths, int N, int M, double complex sigma, int n,
00150        double p, int m, unsigned flags)
00151 {
00152   int j,n_fftw;
00153   fftw_plan fplan;
00154 
00155   ths->M = M;
00156   ths->N = N;
00157   ths->sigma = sigma;
00158   ths->flags = flags;
00159 
00160   ths->x = (double*)fftw_malloc(ths->N*sizeof(double));
00161   ths->y = (double*)fftw_malloc(ths->M*sizeof(double));
00162   ths->alpha = (double complex*)fftw_malloc(ths->N*sizeof(double complex));
00163   ths->f = (double complex*)fftw_malloc(ths->M*sizeof(double complex));
00164 
00165   ths->n = n;
00166   ths->p = p;
00167 
00168   ths->b = (double complex*)fftw_malloc(ths->n*sizeof(double complex));
00169 
00170   ths->nplan1 = (nfft_plan*) fftw_malloc(sizeof(nfft_plan));
00171   ths->nplan2 = (nfft_plan*) fftw_malloc(sizeof(nfft_plan));
00172 
00173   n_fftw=nfft_next_power_of_2(2*ths->n);
00174 
00175   nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00176                  PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00177   nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00178                  PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00179 
00180   ths->nplan1->f = ths->alpha;
00181   ths->nplan2->f_hat = ths->nplan1->f_hat;
00182   ths->nplan2->f = ths->f;
00183 
00184   if(ths->flags & FGT_APPROX_B)
00185     {
00186       fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00187                                FFTW_MEASURE);
00188 
00189       for(j=0; j<ths->n; j++)
00190   ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00191                           ((double)ths->n*ths->n)) / ths->n;
00192       
00193       nfft_fftshift_complex(ths->b, 1, &ths->n);  
00194       fftw_execute(fplan);
00195       nfft_fftshift_complex(ths->b, 1, &ths->n);
00196       
00197       fftw_destroy_plan(fplan);
00198     }
00199   else
00200     {
00201       for(j=0; j<ths->n; j++)
00202   ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00203     cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00204          (ths->p*ths->p*ths->sigma));
00205     }
00206 }
00207 
00219 void fgt_init(fgt_plan *ths, int N, int M, double complex sigma, double eps)
00220 {
00221   double p;
00222   int n;
00223 
00224   p=0.5+sqrt(-log(eps)/creal(sigma));
00225   if(p<1)
00226     p=1;
00227 
00228   n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00229 
00230   if(N*M<=((int)(1U<<20)))
00231     fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00232   else
00233     fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00234 }
00235 
00242 void fgt_init_node_dependent(fgt_plan *ths)
00243 {
00244   int j,k,l;
00245 
00246   if(ths->flags & DGT_PRE_CEXP)
00247    {
00248      ths->pre_cexp=(double complex*)fftw_malloc(ths->M*ths->N*
00249             sizeof(double complex));
00250 
00251      for(j=0,l=0; j<ths->M; j++)
00252        for(k=0; k<ths->N; k++,l++)
00253          ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00254                                 (ths->y[j]-ths->x[k]));
00255    }
00256 
00257   for(j=0; j<ths->nplan1->M_total; j++)
00258     ths->nplan1->x[j] = ths->x[j]/ths->p;
00259   for(j=0; j<ths->nplan2->M_total; j++)
00260     ths->nplan2->x[j] = ths->y[j]/ths->p;
00261   
00262   if(ths->nplan1->nfft_flags & PRE_PSI)
00263     nfft_precompute_psi(ths->nplan1);
00264   if(ths->nplan2->nfft_flags & PRE_PSI)
00265     nfft_precompute_psi(ths->nplan2);
00266 }
00267 
00274 void fgt_finalize(fgt_plan *ths)
00275 {
00276   nfft_finalize(ths->nplan2);
00277   nfft_finalize(ths->nplan1);
00278 
00279   fftw_free(ths->nplan2);
00280   fftw_free(ths->nplan1);
00281 
00282   fftw_free(ths->b);
00283 
00284   fftw_free(ths->f);
00285   fftw_free(ths->y);
00286 
00287   fftw_free(ths->alpha);
00288   fftw_free(ths->x);
00289 }
00290 
00297 void fgt_test_init_rand(fgt_plan *ths)
00298 {
00299   int j,k;
00300 
00301   for(k=0; k<ths->N; k++)
00302     ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00303 
00304   for(j=0; j<ths->M; j++)
00305     ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00306 
00307   for(k=0; k<ths->N; k++)
00308     ths->alpha[k] =   (double)rand()/(RAND_MAX)-1.0/2.0
00309             + I*(double)rand()/(RAND_MAX)-I/2.0;
00310 }
00311 
00320 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00321 {
00322   int r;
00323   double t_out,t; 
00324   double tau=0.01;
00325 
00326   t_out=0;
00327   r=0; 
00328   while(t_out<tau)
00329     {
00330       r++;
00331       if(dgt)
00332         {
00333           t=nfft_second();
00334           dgt_trafo(ths);
00335         }
00336       else
00337         {
00338           t=nfft_second();
00339           fgt_trafo(ths);
00340         }
00341       t_out+=nfft_second()-t;
00342     }
00343   t_out/=r;
00344 
00345   return t_out;
00346 }
00347 
00357 void fgt_test_simple(int N, int M, double complex sigma, double eps)
00358 {
00359   fgt_plan my_plan;
00360   double complex *swap_dgt;
00361      
00362   fgt_init(&my_plan, N, M, sigma, eps);
00363   swap_dgt = (double complex*)fftw_malloc(my_plan.M*sizeof(double complex));
00364 
00365   fgt_test_init_rand(&my_plan);
00366   fgt_init_node_dependent(&my_plan);   
00367 
00368   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00369   dgt_trafo(&my_plan);
00370   nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00371   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00372 
00373   fgt_trafo(&my_plan);
00374   nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00375 
00376   printf("\n relative error: %1.3e\n", nfft_error_l_infty_1_complex(swap_dgt,
00377          my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00378 
00379   fftw_free(swap_dgt);
00380   fgt_finalize(&my_plan);
00381 }
00382 
00392 void fgt_test_andersson()
00393 {
00394   fgt_plan my_plan;
00395   double complex *swap_dgt;
00396   int N;
00397 
00398   double complex sigma=4*(138+I*100);
00399   int n=128;
00400   int N_dgt_pre_exp=(int)(1U<<11);
00401   int N_dgt=(int)(1U<<19);
00402 
00403   printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00404 
00405   for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00406     {
00407       printf("$%d$\t & ",N);
00408 
00409       if(N<N_dgt_pre_exp)
00410         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00411       else
00412         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00413 
00414       swap_dgt = (double complex*)fftw_malloc(my_plan.M*
00415                 sizeof(double complex));
00416 
00417       fgt_test_init_rand(&my_plan);
00418       
00419       fgt_init_node_dependent(&my_plan);      
00420 
00421       if(N<N_dgt)
00422   {
00423           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00424           if(N<N_dgt_pre_exp)
00425             my_plan.flags^=DGT_PRE_CEXP;
00426  
00427     printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00428           if(N<N_dgt_pre_exp)
00429             my_plan.flags^=DGT_PRE_CEXP;
00430           NFFT_SWAP_complex(swap_dgt,my_plan.f);  
00431   }
00432       else
00433   printf("\t\t & ");  
00434 
00435       if(N<N_dgt_pre_exp)
00436   printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00437       else
00438   printf("\t\t & ");  
00439 
00440       my_plan.flags^=FGT_NDFT;
00441       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00442       my_plan.flags^=FGT_NDFT;
00443 
00444       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00445 
00446       printf("$%1.1e$\t \\\\ \n",
00447        nfft_error_l_infty_1_complex(swap_dgt, my_plan.f, my_plan.M,
00448             my_plan.alpha, my_plan.N));
00449       fflush(stdout);
00450 
00451       fftw_free(swap_dgt);
00452       fgt_finalize(&my_plan);
00453       fftw_cleanup();
00454     }
00455 }
00456 
00463 void fgt_test_error()
00464 {
00465   fgt_plan my_plan;
00466   double complex *swap_dgt;
00467   int n,mi;
00468 
00469   double complex sigma=4*(138+I*100);
00470   int N=1000;
00471   int M=1000;
00472   int m[2]={7,3};
00473   
00474   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00475   printf("error=[\n");
00476 
00477   swap_dgt = (double complex*)fftw_malloc(M*sizeof(double complex));
00478 
00479   for(n=8; n<=128; n+=4)
00480     {
00481       printf("%d\t",n);
00482       for(mi=0;mi<2;mi++)
00483         {
00484           fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00485           fgt_test_init_rand(&my_plan);    
00486           fgt_init_node_dependent(&my_plan);    
00487 
00488           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00489           dgt_trafo(&my_plan);
00490           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00491 
00492           fgt_trafo(&my_plan);
00493 
00494           printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00495                  my_plan.M, my_plan.alpha, my_plan.N));
00496           fflush(stdout);
00497 
00498           fgt_finalize(&my_plan);
00499           fftw_cleanup();
00500         }
00501       printf("\n");
00502     }
00503   printf("];\n");
00504 
00505   fftw_free(swap_dgt);
00506 }
00507 
00514 void fgt_test_error_p()
00515 {
00516   fgt_plan my_plan;
00517   double complex *swap_dgt;
00518   int n,pi;
00519 
00520   double complex sigma=20+40I;
00521   int N=1000;
00522   int M=1000;
00523   double p[3]={1,1.5,2};
00524   
00525   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00526   printf("error=[\n");
00527 
00528   swap_dgt = (double complex*)fftw_malloc(M*sizeof(double complex));
00529 
00530   for(n=8; n<=128; n+=4)
00531     {
00532       printf("%d\t",n);
00533       for(pi=0;pi<3;pi++)
00534         {
00535           fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00536           fgt_test_init_rand(&my_plan);    
00537           fgt_init_node_dependent(&my_plan);    
00538 
00539           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00540           dgt_trafo(&my_plan);
00541           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00542 
00543           fgt_trafo(&my_plan);
00544 
00545           printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00546                  my_plan.M, my_plan.alpha, my_plan.N));
00547           fflush(stdout);
00548 
00549           fgt_finalize(&my_plan);
00550           fftw_cleanup();
00551         }
00552       printf("\n");
00553     }
00554   printf("];\n");  
00555 }
00556 
00562 int main(int argc,char **argv)
00563 {
00564   if(argc!=2)
00565     {
00566       fprintf(stderr,"fastgauss type\n");
00567       fprintf(stderr," type\n");
00568       fprintf(stderr,"  0 - Simple test.\n");
00569       fprintf(stderr,"  1 - Compares accuracy and execution time.\n");
00570       fprintf(stderr,"      Pipe to output_andersson.tex\n");
00571       fprintf(stderr,"  2 - Compares accuracy.\n");
00572       fprintf(stderr,"      Pipe to output_error.m\n");
00573       fprintf(stderr,"  3 - Compares accuracy.\n");
00574       fprintf(stderr,"      Pipe to output_error_p.m\n");
00575       return -1;
00576     }
00577 
00578   if(atoi(argv[1])==0)
00579     fgt_test_simple(10, 10, 5+3*I, 0.001);
00580 
00581   if(atoi(argv[1])==1)
00582     fgt_test_andersson();
00583 
00584   if(atoi(argv[1])==2)
00585     fgt_test_error();
00586 
00587   if(atoi(argv[1])==3)
00588     fgt_test_error_p();
00589 
00590   return 1;
00591 }
00592 /* \} */

Generated on 1 Nov 2006 by Doxygen 1.4.4