00001
00008 #include <stdlib.h>
00009 #include <complex.h>
00010 #include <math.h>
00011
00012 #include "util.h"
00013 #include "nfft3.h"
00014 #include "fastsum.h"
00015
00022 double fak(int n)
00023 {
00024 if (n<=1) return 1.0;
00025 else return (double)n*fak(n-1);
00026 }
00027
00029 double binom(int n, int m)
00030 {
00031 return fak(n)/fak(m)/fak(n-m);
00032 }
00033
00035 double BasisPoly(int m, int r, double xx)
00036 {
00037 int k;
00038 double sum=0.0;
00039
00040 for (k=0; k<=m-r; k++) {
00041 sum+=binom(m+k,k)*pow((xx+1.0)/2.0,(double)k);
00042 }
00043 return sum*pow((xx+1.0),(double)r)*pow(1.0-xx,(double)(m+1))/(1<<(m+1))/fak(r);
00044 }
00045
00047 double regkern(complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00048 {
00049 int r;
00050 double sum=0.0;
00051
00052 if (xx<-0.5)
00053 xx=-0.5;
00054 if (xx>0.5)
00055 xx=0.5;
00056 if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b)) {
00057 return kernel(xx,0,param);
00058 }
00059 else if (xx<-0.5+b) {
00060 sum=(kernel(-0.5,0,param)+kernel(0.5,0,param))/2.0
00061 *BasisPoly(p-1,0,2.0*xx/b+(1.0-b)/b);
00062 for (r=0; r<p; r++) {
00063 sum+=pow(-b/2.0,(double)r)
00064 *kernel(-0.5+b,r,param)
00065 *BasisPoly(p-1,r,-2.0*xx/b+(b-1.0)/b);
00066 }
00067 return sum;
00068 }
00069 else if ((xx>-a) && (xx<a)) {
00070 for (r=0; r<p; r++) {
00071 sum+=pow(a,(double)r)
00072 *( kernel(-a,r,param)*BasisPoly(p-1,r,xx/a)
00073 +kernel( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00074 }
00075 return sum;
00076 }
00077 else if (xx>0.5-b) {
00078 sum=(kernel(-0.5,0,param)+kernel(0.5,0,param))/2.0
00079 *BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00080 for (r=0; r<p; r++) {
00081 sum+=pow(b/2.0,(double)r)
00082 *kernel(0.5-b,r,param)
00083 *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00084 }
00085 return sum;
00086 }
00087 return kernel(xx,0,param);
00088 }
00089
00093 double regkern1(complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00094 {
00095 int r;
00096 double sum=0.0;
00097
00098 if (xx<-0.5)
00099 xx=-0.5;
00100 if (xx>0.5)
00101 xx=0.5;
00102 if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b))
00103 {
00104 return kernel(xx,0,param);
00105 }
00106 else if ((xx>-a) && (xx<a))
00107 {
00108 for (r=0; r<p; r++) {
00109 sum+=pow(a,(double)r)
00110 *( kernel(-a,r,param)*BasisPoly(p-1,r,xx/a)
00111 +kernel( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00112 }
00113 return sum;
00114 }
00115 else if (xx<-0.5+b)
00116 {
00117 for (r=0; r<p; r++) {
00118 sum+=pow(b,(double)r)
00119 *( kernel(0.5-b,r,param)*BasisPoly(p-1,r,(xx+0.5)/b)
00120 +kernel(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx+0.5)/b)*(r & 1 ? -1 : 1));
00121 }
00122 return sum;
00123 }
00124 else if (xx>0.5-b)
00125 {
00126 for (r=0; r<p; r++) {
00127 sum+=pow(b,(double)r)
00128 *( kernel(0.5-b,r,param)*BasisPoly(p-1,r,(xx-0.5)/b)
00129 +kernel(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx-0.5)/b)*(r & 1 ? -1 : 1));
00130 }
00131 return sum;
00132 }
00133 return kernel(xx,0,param);
00134 }
00135
00137 double regkern2(complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00138 {
00139 int r;
00140 double sum=0.0;
00141
00142 xx=fabs(xx);
00143
00144 if (xx>0.5) {
00145 for (r=0; r<p; r++) {
00146 sum+=pow(b,(double)r)*kernel(0.5-b,r,param)
00147 *(BasisPoly(p-1,r,0)+BasisPoly(p-1,r,0));
00148 }
00149 return sum;
00150 }
00151 else if ((a<=xx) && (xx<=0.5-b)) {
00152 return kernel(xx,0,param);
00153 }
00154 else if (xx<a) {
00155 for (r=0; r<p; r++) {
00156 sum+=pow(-a,(double)r)*kernel(a,r,param)
00157 *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00158 }
00159 return sum;
00160 }
00161 else if ((0.5-b<xx) && (xx<=0.5)) {
00162 for (r=0; r<p; r++) {
00163 sum+=pow(b,(double)r)*kernel(0.5-b,r,param)
00164 *(BasisPoly(p-1,r,(xx-0.5)/b)+BasisPoly(p-1,r,-(xx-0.5)/b));
00165 }
00166 return sum;
00167 }
00168 return 0.0;
00169 }
00170
00174 double regkern3(complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00175 {
00176 int r;
00177 double sum=0.0;
00178
00179 xx=fabs(xx);
00180
00181 if (xx>=0.5) {
00182
00183 xx=0.5;
00184 }
00185
00186 if ((a<=xx) && (xx<=0.5-b)) {
00187 return kernel(xx,0,param);
00188 }
00189 else if (xx<a) {
00190 for (r=0; r<p; r++) {
00191 sum+=pow(-a,(double)r)*kernel(a,r,param)
00192 *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00193 }
00194
00195 return sum;
00196 }
00197 else if ((0.5-b<xx) && (xx<=0.5)) {
00198 sum=kernel(0.5,0,param)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00199
00200 for (r=0; r<p; r++) {
00201 sum+=pow(b/2.0,(double)r)
00202 *kernel(0.5-b,r,param)
00203 *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00204 }
00205 return sum;
00206 }
00207 printf("hier ");
00208 return 0.0;
00209 }
00210
00212 double kubintkern(double x, double *Add, int Ad, double a)
00213 {
00214 double c;
00215 int r;
00216 double f0,f1,f2,f3,c1,c2,c3,c4;
00217 c=x*Ad/a;
00218 r=c; r=abs(r);
00219 if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00220 else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00221 c=fabs(c);
00222 c1=c-r;
00223 c2=c1+1.0;
00224 c3=c1-1.0;
00225 c4=c1-2.0;
00226
00227
00228 return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00229 }
00230
00232 double kubintkern1(double x, double *Add, int Ad, double a)
00233 {
00234 double c;
00235 int r;
00236 double f0,f1,f2,f3,c1,c2,c3,c4;
00237 Add+=2;
00238 c=(x+a)*Ad/2/a;
00239 r=c; r=abs(r);
00240
00241
00242 { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00243 c=fabs(c);
00244 c1=c-r;
00245 c2=c1+1.0;
00246 c3=c1-1.0;
00247 c4=c1-2.0;
00248
00249
00250 return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00251 }
00252
00254 void quicksort(int d, int t, double *x, complex *alpha, int N)
00255 {
00256 int lpos=0;
00257 int rpos=N-1;
00258
00259 double pivot=x[(N/2)*d+t];
00260
00261 int k;
00262 double temp1;
00263 complex temp2;
00264
00265 while (lpos<=rpos)
00266 {
00267 while (x[lpos*d+t]<pivot)
00268 lpos++;
00269 while (x[rpos*d+t]>pivot)
00270 rpos--;
00271 if (lpos<=rpos)
00272 {
00273 for (k=0; k<d; k++)
00274 {
00275 temp1=x[lpos*d+k];
00276 x[lpos*d+k]=x[rpos*d+k];
00277 x[rpos*d+k]=temp1;
00278 }
00279 temp2=alpha[lpos];
00280 alpha[lpos]=alpha[rpos];
00281 alpha[rpos]=temp2;
00282
00283 lpos++;
00284 rpos--;
00285 }
00286 }
00287 if (0<rpos)
00288 quicksort(d,t,x,alpha,rpos+1);
00289 if (lpos<N-1)
00290 quicksort(d,t,x+lpos*d,alpha+lpos,N-lpos);
00291 }
00292
00294 void BuildTree(int d, int t, double *x, complex *alpha, int N)
00295 {
00296 if (N>1)
00297 {
00298 int m=N/2;
00299
00300 quicksort(d,t,x,alpha,N);
00301
00302 BuildTree(d, (t+1)%d, x, alpha, m);
00303 BuildTree(d, (t+1)%d, x+(m+1)*d, alpha+(m+1), N-m-1);
00304 }
00305 }
00306
00308 complex SearchTree(int d, int t, double *x, complex *alpha, double *xmin, double *xmax, int N, complex (*kernel)(double , int , const double *), const double *param, int Ad, double *Add, int p, unsigned flags)
00309 {
00310 int m=N/2;
00311 double Min=xmin[t], Max=xmax[t], Median=x[m*d+t];
00312 double a=fabs(Max-Min)/2;
00313 int l;
00314 int E=0;
00315 double r;
00316 complex result=0.0;
00317
00318 if (N==0)
00319 return 0.0;
00320
00321 if (Min>Median)
00322 result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,kernel,param,Ad,Add,p,flags);
00323 else if (Max<Median)
00324 result += SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,kernel,param,Ad,Add,p,flags);
00325 else
00326 {
00327 E=0;
00328 for (l=0; l<d; l++)
00329 {
00330 if ( x[m*d+l]>xmin[l] && x[m*d+l]<xmax[l] )
00331 E++;
00332 }
00333 if (E==d)
00334 {
00335 if (d==1)
00336 r = xmin[0]+a-x[m];
00337 else
00338 {
00339 r=0.0;
00340 for (l=0; l<d; l++)
00341 r+=(xmin[l]+a-x[m*d+l])*(xmin[l]+a-x[m*d+l]);
00342 r=sqrt(r);
00343 }
00344 if (fabs(r)<a)
00345 {
00346 result += alpha[m]*kernel(r,0,param);
00347 if (d==1)
00348 {
00349 if (flags & EXACT_NEARFIELD)
00350 result -= alpha[m]*regkern1(kernel,r,p,param,a,1.0/16.0);
00351 else
00352 result -= alpha[m]*kubintkern1(r,Add,Ad,a);
00353 }
00354 else
00355 {
00356 if (flags & EXACT_NEARFIELD)
00357 result -= alpha[m]*regkern(kernel,r,p,param,a,1.0/16.0);
00358 else
00359 result -= alpha[m]*kubintkern(r,Add,Ad,a);
00360 }
00361 }
00362 }
00363 result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,kernel,param,Ad,Add,p,flags);
00364 result += SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,kernel,param,Ad,Add,p,flags);
00365 }
00366 return result;
00367 }
00368
00370 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, complex (*kernel)(), double *param, unsigned flags, int nn, int m, int p, double eps_I, double eps_B)
00371 {
00372 int t;
00373 int N[d], n[d];
00374 int n_total;
00375
00376 ths->d = d;
00377
00378 ths->N_total = N_total;
00379 ths->M_total = M_total;
00380
00381 ths->x = (double *)malloc(d*N_total*(sizeof(double)));
00382 ths->alpha = (complex *)malloc(N_total*(sizeof(complex)));
00383
00384 ths->y = (double *)malloc(d*M_total*(sizeof(double)));
00385 ths->f = (complex *)malloc(M_total*(sizeof(complex)));
00386
00387 ths->kernel = kernel;
00388 ths->kernel_param = param;
00389
00390 ths->flags = flags;
00391
00392 ths->p = p;
00393 ths->eps_I = eps_I;
00394 ths->eps_B = eps_B;
00397 if (!(ths->flags & EXACT_NEARFIELD))
00398 {
00399 if (ths->d==1)
00400 {
00401 ths->Ad = 4*(ths->p)*(ths->p);
00402 ths->Add = (double *)malloc((ths->Ad+5)*(sizeof(double)));
00403 }
00404 else
00405 {
00406 ths->Ad = 2*(ths->p)*(ths->p);
00407 ths->Add = (double *)malloc((ths->Ad+3)*(sizeof(double)));
00408 }
00409 }
00410
00412 ths->n = nn;
00413 for (t=0; t<d; t++)
00414 {
00415 N[t] = nn;
00416 n[t] = 2*nn;
00417 }
00418 nfft_init_guru(&(ths->mv1), d, N, N_total, n, m,
00419 PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00420 FFTW_MEASURE| FFTW_DESTROY_INPUT);
00421 nfft_init_guru(&(ths->mv2), d, N, M_total, n, m,
00422 PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00423 FFTW_MEASURE| FFTW_DESTROY_INPUT);
00424
00426 n_total = 1;
00427 for (t=0; t<d; t++)
00428 n_total *= nn;
00429
00430 ths->b = (fftw_complex *)fftw_malloc(n_total*sizeof(fftw_complex));
00431 ths->fft_plan = fftw_plan_dft(d,N,ths->b,ths->b,FFTW_FORWARD,FFTW_ESTIMATE);
00432
00433 }
00434
00436 void fastsum_finalize(fastsum_plan *ths)
00437 {
00438 free(ths->x);
00439 free(ths->alpha);
00440 free(ths->y);
00441 free(ths->f);
00442
00443 if (!(ths->flags & EXACT_NEARFIELD))
00444 free(ths->Add);
00445
00446 nfft_finalize(&(ths->mv1));
00447 nfft_finalize(&(ths->mv2));
00448
00449 fftw_destroy_plan(ths->fft_plan);
00450 fftw_free(ths->b);
00451 }
00452
00454 void fastsum_exact(fastsum_plan *ths)
00455 {
00456 int j,k;
00457 int t;
00458 double r;
00459
00460 for (j=0; j<ths->M_total; j++)
00461 {
00462 ths->f[j]=0.0;
00463 for (k=0; k<ths->N_total; k++)
00464 {
00465 if (ths->d==1)
00466 r = ths->y[j] - ths->x[k];
00467 else
00468 {
00469 r=0.0;
00470 for (t=0; t<ths->d; t++)
00471 r += (ths->y[j*ths->d+t]-ths->x[k*ths->d+t])*(ths->y[j*ths->d+t]-ths->x[k*ths->d+t]);
00472 r=sqrt(r);
00473 }
00474 ths->f[j] += ths->alpha[k] * ths->kernel(r,0,ths->kernel_param);
00475 }
00476 }
00477 }
00478
00480 void fastsum_precompute(fastsum_plan *ths)
00481 {
00482 int j,k,t;
00483 int n_total;
00484
00486 BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total);
00487
00489 if (!(ths->flags & EXACT_NEARFIELD))
00490 {
00491 if (ths->d==1)
00492 for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++)
00493 ths->Add[k+ths->Ad/2+2] = regkern1(ths->kernel, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00494 else
00495 for (k=0; k <= ths->Ad+2; k++)
00496 ths->Add[k] = regkern3(ths->kernel, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00497 }
00498
00500 for (k=0; k<ths->mv1.M_total; k++)
00501 for (t=0; t<ths->mv1.d; t++)
00502 ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t];
00503
00505 if(ths->mv1.nfft_flags & PRE_LIN_PSI)
00506 nfft_precompute_lin_psi(&(ths->mv1));
00507
00508 if(ths->mv1.nfft_flags & PRE_PSI)
00509 nfft_precompute_psi(&(ths->mv1));
00510
00511 if(ths->mv1.nfft_flags & PRE_FULL_PSI)
00512 nfft_precompute_full_psi(&(ths->mv1));
00513
00515 for(k=0; k<ths->mv1.M_total;k++)
00516 ths->mv1.f[k] = ths->alpha[k];
00517
00519 for (j=0; j<ths->mv2.M_total; j++)
00520 for (t=0; t<ths->mv2.d; t++)
00521 ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t];
00522
00524 if(ths->mv2.nfft_flags & PRE_LIN_PSI)
00525 nfft_precompute_lin_psi(&(ths->mv2));
00526
00527 if(ths->mv2.nfft_flags & PRE_PSI)
00528 nfft_precompute_psi(&(ths->mv2));
00529
00530 if(ths->mv2.nfft_flags & PRE_FULL_PSI)
00531 nfft_precompute_full_psi(&(ths->mv2));
00532
00533
00535 n_total = 1;
00536 for (t=0; t<ths->d; t++)
00537 n_total *= ths->n;
00538
00539 for (j=0; j<n_total; j++)
00540 {
00541 if (ths->d==1)
00542 ths->b[j] = regkern1(ths->kernel, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00543 else
00544 {
00545 k=j;
00546 ths->b[j]=0.0;
00547 for (t=0; t<ths->d; t++)
00548 {
00549 ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5);
00550 k = k / (ths->n);
00551 }
00552 ths->b[j] = regkern3(ths->kernel, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00553 }
00554 }
00555
00556 nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00557 fftw_execute(ths->fft_plan);
00558 nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00559
00560 }
00561
00563 void fastsum_trafo(fastsum_plan *ths)
00564 {
00565 int j,k,t;
00566 double *ymin, *ymax;
00568 ymin = (double *)malloc(ths->d*(sizeof(double)));
00569 ymax = (double *)malloc(ths->d*(sizeof(double)));
00570
00572 nfft_adjoint(&(ths->mv1));
00573
00575 for (k=0; k<ths->mv2.N_total; k++)
00576 ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
00577
00579 nfft_trafo(&(ths->mv2));
00580
00582 for (j=0; j<ths->M_total; j++)
00583 {
00584 for (t=0; t<ths->d; t++)
00585 {
00586 ymin[t] = ths->y[ths->d*j+t] - ths->eps_I;
00587 ymax[t] = ths->y[ths->d*j+t] + ths->eps_I;
00588 }
00589 ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->kernel, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
00590
00591
00592 }
00593 }
00594
00595
00596