00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include <complex.h>
00031
00032 #include "util.h"
00033 #include "nfft3.h"
00034 #include "fastsum.h"
00035
00042 double fak(int n)
00043 {
00044 if (n<=1) return 1.0;
00045 else return (double)n*fak(n-1);
00046 }
00047
00049 double binom(int n, int m)
00050 {
00051 return fak(n)/fak(m)/fak(n-m);
00052 }
00053
00055 double BasisPoly(int m, int r, double xx)
00056 {
00057 int k;
00058 double sum=0.0;
00059
00060 for (k=0; k<=m-r; k++) {
00061 sum+=binom(m+k,k)*pow((xx+1.0)/2.0,(double)k);
00062 }
00063 return sum*pow((xx+1.0),(double)r)*pow(1.0-xx,(double)(m+1))/(1<<(m+1))/fak(r);
00064 }
00065
00067 double regkern(double _Complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00068 {
00069 int r;
00070 double sum=0.0;
00071
00072 if (xx<-0.5)
00073 xx=-0.5;
00074 if (xx>0.5)
00075 xx=0.5;
00076 if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b)) {
00077 return kernel(xx,0,param);
00078 }
00079 else if (xx<-0.5+b) {
00080 sum=(kernel(-0.5,0,param)+kernel(0.5,0,param))/2.0
00081 *BasisPoly(p-1,0,2.0*xx/b+(1.0-b)/b);
00082 for (r=0; r<p; r++) {
00083 sum+=pow(-b/2.0,(double)r)
00084 *kernel(-0.5+b,r,param)
00085 *BasisPoly(p-1,r,-2.0*xx/b+(b-1.0)/b);
00086 }
00087 return sum;
00088 }
00089 else if ((xx>-a) && (xx<a)) {
00090 for (r=0; r<p; r++) {
00091 sum+=pow(a,(double)r)
00092 *( kernel(-a,r,param)*BasisPoly(p-1,r,xx/a)
00093 +kernel( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00094 }
00095 return sum;
00096 }
00097 else if (xx>0.5-b) {
00098 sum=(kernel(-0.5,0,param)+kernel(0.5,0,param))/2.0
00099 *BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00100 for (r=0; r<p; r++) {
00101 sum+=pow(b/2.0,(double)r)
00102 *kernel(0.5-b,r,param)
00103 *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00104 }
00105 return sum;
00106 }
00107 return kernel(xx,0,param);
00108 }
00109
00113 double regkern1(double _Complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00114 {
00115 int r;
00116 double sum=0.0;
00117
00118 if (xx<-0.5)
00119 xx=-0.5;
00120 if (xx>0.5)
00121 xx=0.5;
00122 if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b))
00123 {
00124 return kernel(xx,0,param);
00125 }
00126 else if ((xx>-a) && (xx<a))
00127 {
00128 for (r=0; r<p; r++) {
00129 sum+=pow(a,(double)r)
00130 *( kernel(-a,r,param)*BasisPoly(p-1,r,xx/a)
00131 +kernel( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00132 }
00133 return sum;
00134 }
00135 else if (xx<-0.5+b)
00136 {
00137 for (r=0; r<p; r++) {
00138 sum+=pow(b,(double)r)
00139 *( kernel(0.5-b,r,param)*BasisPoly(p-1,r,(xx+0.5)/b)
00140 +kernel(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx+0.5)/b)*(r & 1 ? -1 : 1));
00141 }
00142 return sum;
00143 }
00144 else if (xx>0.5-b)
00145 {
00146 for (r=0; r<p; r++) {
00147 sum+=pow(b,(double)r)
00148 *( kernel(0.5-b,r,param)*BasisPoly(p-1,r,(xx-0.5)/b)
00149 +kernel(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx-0.5)/b)*(r & 1 ? -1 : 1));
00150 }
00151 return sum;
00152 }
00153 return kernel(xx,0,param);
00154 }
00155
00157 double regkern2(double _Complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00158 {
00159 int r;
00160 double sum=0.0;
00161
00162 xx=fabs(xx);
00163
00164 if (xx>0.5) {
00165 for (r=0; r<p; r++) {
00166 sum+=pow(b,(double)r)*kernel(0.5-b,r,param)
00167 *(BasisPoly(p-1,r,0)+BasisPoly(p-1,r,0));
00168 }
00169 return sum;
00170 }
00171 else if ((a<=xx) && (xx<=0.5-b)) {
00172 return kernel(xx,0,param);
00173 }
00174 else if (xx<a) {
00175 for (r=0; r<p; r++) {
00176 sum+=pow(-a,(double)r)*kernel(a,r,param)
00177 *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00178 }
00179 return sum;
00180 }
00181 else if ((0.5-b<xx) && (xx<=0.5)) {
00182 for (r=0; r<p; r++) {
00183 sum+=pow(b,(double)r)*kernel(0.5-b,r,param)
00184 *(BasisPoly(p-1,r,(xx-0.5)/b)+BasisPoly(p-1,r,-(xx-0.5)/b));
00185 }
00186 return sum;
00187 }
00188 return 0.0;
00189 }
00190
00194 double regkern3(double _Complex (*kernel)(double , int , const double *), double xx, int p, const double *param, double a, double b)
00195 {
00196 int r;
00197 double sum=0.0;
00198
00199 xx=fabs(xx);
00200
00201 if (xx>=0.5) {
00202
00203 xx=0.5;
00204 }
00205
00206 if ((a<=xx) && (xx<=0.5-b)) {
00207 return kernel(xx,0,param);
00208 }
00209 else if (xx<a) {
00210 for (r=0; r<p; r++) {
00211 sum+=pow(-a,(double)r)*kernel(a,r,param)
00212 *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00213 }
00214
00215 return sum;
00216 }
00217 else if ((0.5-b<xx) && (xx<=0.5)) {
00218 sum=kernel(0.5,0,param)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00219
00220 for (r=0; r<p; r++) {
00221 sum+=pow(b/2.0,(double)r)
00222 *kernel(0.5-b,r,param)
00223 *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00224 }
00225 return sum;
00226 }
00227 printf("hier ");
00228 return 0.0;
00229 }
00230
00232 double kubintkern(double x, double *Add, int Ad, double a)
00233 {
00234 double c;
00235 int r;
00236 double f0,f1,f2,f3,c1,c2,c3,c4;
00237 c=x*Ad/a;
00238 r=c; r=abs(r);
00239 if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00240 else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00241 c=fabs(c);
00242 c1=c-r;
00243 c2=c1+1.0;
00244 c3=c1-1.0;
00245 c4=c1-2.0;
00246
00247
00248 return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00249 }
00250
00252 double kubintkern1(double x, double *Add, int Ad, double a)
00253 {
00254 double c;
00255 int r;
00256 double f0,f1,f2,f3,c1,c2,c3,c4;
00257 Add+=2;
00258 c=(x+a)*Ad/2/a;
00259 r=c; r=abs(r);
00260
00261
00262 { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00263 c=fabs(c);
00264 c1=c-r;
00265 c2=c1+1.0;
00266 c3=c1-1.0;
00267 c4=c1-2.0;
00268
00269
00270 return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00271 }
00272
00274 void quicksort(int d, int t, double *x, double _Complex *alpha, int N)
00275 {
00276 int lpos=0;
00277 int rpos=N-1;
00278
00279 double pivot=x[(N/2)*d+t];
00280
00281 int k;
00282 double temp1;
00283 double _Complex temp2;
00284
00285 while (lpos<=rpos)
00286 {
00287 while (x[lpos*d+t]<pivot)
00288 lpos++;
00289 while (x[rpos*d+t]>pivot)
00290 rpos--;
00291 if (lpos<=rpos)
00292 {
00293 for (k=0; k<d; k++)
00294 {
00295 temp1=x[lpos*d+k];
00296 x[lpos*d+k]=x[rpos*d+k];
00297 x[rpos*d+k]=temp1;
00298 }
00299 temp2=alpha[lpos];
00300 alpha[lpos]=alpha[rpos];
00301 alpha[rpos]=temp2;
00302
00303 lpos++;
00304 rpos--;
00305 }
00306 }
00307 if (0<rpos)
00308 quicksort(d,t,x,alpha,rpos+1);
00309 if (lpos<N-1)
00310 quicksort(d,t,x+lpos*d,alpha+lpos,N-lpos);
00311 }
00312
00314 void BuildTree(int d, int t, double *x, double _Complex *alpha, int N)
00315 {
00316 if (N>1)
00317 {
00318 int m=N/2;
00319
00320 quicksort(d,t,x,alpha,N);
00321
00322 BuildTree(d, (t+1)%d, x, alpha, m);
00323 BuildTree(d, (t+1)%d, x+(m+1)*d, alpha+(m+1), N-m-1);
00324 }
00325 }
00326
00328 double _Complex SearchTree(int d, int t, double *x, double _Complex *alpha, double *xmin, double *xmax, int N, double _Complex (*kernel)(double , int , const double *), const double *param, int Ad, double *Add, int p, unsigned flags)
00329 {
00330 int m=N/2;
00331 double Min=xmin[t], Max=xmax[t], Median=x[m*d+t];
00332 double a=fabs(Max-Min)/2;
00333 int l;
00334 int E=0;
00335 double r;
00336 double _Complex result=0.0;
00337
00338 if (N==0)
00339 return 0.0;
00340
00341 if (Min>Median)
00342 result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,kernel,param,Ad,Add,p,flags);
00343 else if (Max<Median)
00344 result += SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,kernel,param,Ad,Add,p,flags);
00345 else
00346 {
00347 E=0;
00348 for (l=0; l<d; l++)
00349 {
00350 if ( x[m*d+l]>xmin[l] && x[m*d+l]<xmax[l] )
00351 E++;
00352 }
00353 if (E==d)
00354 {
00355 if (d==1)
00356 r = xmin[0]+a-x[m];
00357 else
00358 {
00359 r=0.0;
00360 for (l=0; l<d; l++)
00361 r+=(xmin[l]+a-x[m*d+l])*(xmin[l]+a-x[m*d+l]);
00362 r=sqrt(r);
00363 }
00364 if (fabs(r)<a)
00365 {
00366 result += alpha[m]*kernel(r,0,param);
00367 if (d==1)
00368 {
00369 if (flags & EXACT_NEARFIELD)
00370 result -= alpha[m]*regkern1(kernel,r,p,param,a,1.0/16.0);
00371 else
00372 result -= alpha[m]*kubintkern1(r,Add,Ad,a);
00373 }
00374 else
00375 {
00376 if (flags & EXACT_NEARFIELD)
00377 result -= alpha[m]*regkern(kernel,r,p,param,a,1.0/16.0);
00378 else
00379 result -= alpha[m]*kubintkern(r,Add,Ad,a);
00380 }
00381 }
00382 }
00383 result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,kernel,param,Ad,Add,p,flags);
00384 result += SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,kernel,param,Ad,Add,p,flags);
00385 }
00386 return result;
00387 }
00388
00390 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, double _Complex (*kernel)(), double *param, unsigned flags, int nn, int m, int p, double eps_I, double eps_B)
00391 {
00392 int t;
00393 int N[d], n[d];
00394 int n_total;
00395
00396 ths->d = d;
00397
00398 ths->N_total = N_total;
00399 ths->M_total = M_total;
00400
00401 ths->x = (double *)nfft_malloc(d*N_total*(sizeof(double)));
00402 ths->alpha = (double _Complex *)nfft_malloc(N_total*(sizeof(double _Complex)));
00403
00404 ths->y = (double *)nfft_malloc(d*M_total*(sizeof(double)));
00405 ths->f = (double _Complex *)nfft_malloc(M_total*(sizeof(double _Complex)));
00406
00407 ths->kernel = kernel;
00408 ths->kernel_param = param;
00409
00410 ths->flags = flags;
00411
00412 ths->p = p;
00413 ths->eps_I = eps_I;
00414 ths->eps_B = eps_B;
00417 if (!(ths->flags & EXACT_NEARFIELD))
00418 {
00419 if (ths->d==1)
00420 {
00421 ths->Ad = 4*(ths->p)*(ths->p);
00422 ths->Add = (double *)nfft_malloc((ths->Ad+5)*(sizeof(double)));
00423 }
00424 else
00425 {
00426 ths->Ad = 2*(ths->p)*(ths->p);
00427 ths->Add = (double *)nfft_malloc((ths->Ad+3)*(sizeof(double)));
00428 }
00429 }
00430
00432 ths->n = nn;
00433 for (t=0; t<d; t++)
00434 {
00435 N[t] = nn;
00436 n[t] = 2*nn;
00437 }
00438 nfft_init_guru(&(ths->mv1), d, N, N_total, n, m,
00439 PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00440 FFTW_MEASURE| FFTW_DESTROY_INPUT);
00441 nfft_init_guru(&(ths->mv2), d, N, M_total, n, m,
00442 PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00443 FFTW_MEASURE| FFTW_DESTROY_INPUT);
00444
00446 n_total = 1;
00447 for (t=0; t<d; t++)
00448 n_total *= nn;
00449
00450 ths->b = (fftw_complex *)nfft_malloc(n_total*sizeof(fftw_complex));
00451 ths->fft_plan = fftw_plan_dft(d,N,ths->b,ths->b,FFTW_FORWARD,FFTW_ESTIMATE);
00452
00453 }
00454
00456 void fastsum_finalize(fastsum_plan *ths)
00457 {
00458 nfft_free(ths->x);
00459 nfft_free(ths->alpha);
00460 nfft_free(ths->y);
00461 nfft_free(ths->f);
00462
00463 if (!(ths->flags & EXACT_NEARFIELD))
00464 nfft_free(ths->Add);
00465
00466 nfft_finalize(&(ths->mv1));
00467 nfft_finalize(&(ths->mv2));
00468
00469 fftw_destroy_plan(ths->fft_plan);
00470 nfft_free(ths->b);
00471 }
00472
00474 void fastsum_exact(fastsum_plan *ths)
00475 {
00476 int j,k;
00477 int t;
00478 double r;
00479
00480 for (j=0; j<ths->M_total; j++)
00481 {
00482 ths->f[j]=0.0;
00483 for (k=0; k<ths->N_total; k++)
00484 {
00485 if (ths->d==1)
00486 r = ths->y[j] - ths->x[k];
00487 else
00488 {
00489 r=0.0;
00490 for (t=0; t<ths->d; t++)
00491 r += (ths->y[j*ths->d+t]-ths->x[k*ths->d+t])*(ths->y[j*ths->d+t]-ths->x[k*ths->d+t]);
00492 r=sqrt(r);
00493 }
00494 ths->f[j] += ths->alpha[k] * ths->kernel(r,0,ths->kernel_param);
00495 }
00496 }
00497 }
00498
00500 void fastsum_precompute(fastsum_plan *ths)
00501 {
00502 int j,k,t;
00503 int n_total;
00504
00506 BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total);
00507
00509 if (!(ths->flags & EXACT_NEARFIELD))
00510 {
00511 if (ths->d==1)
00512 for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++)
00513 ths->Add[k+ths->Ad/2+2] = regkern1(ths->kernel, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00514 else
00515 for (k=0; k <= ths->Ad+2; k++)
00516 ths->Add[k] = regkern3(ths->kernel, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00517 }
00518
00520 for (k=0; k<ths->mv1.M_total; k++)
00521 for (t=0; t<ths->mv1.d; t++)
00522 ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t];
00523
00525 if(ths->mv1.nfft_flags & PRE_LIN_PSI)
00526 nfft_precompute_lin_psi(&(ths->mv1));
00527
00528 if(ths->mv1.nfft_flags & PRE_PSI)
00529 nfft_precompute_psi(&(ths->mv1));
00530
00531 if(ths->mv1.nfft_flags & PRE_FULL_PSI)
00532 nfft_precompute_full_psi(&(ths->mv1));
00533
00535 for(k=0; k<ths->mv1.M_total;k++)
00536 ths->mv1.f[k] = ths->alpha[k];
00537
00539 for (j=0; j<ths->mv2.M_total; j++)
00540 for (t=0; t<ths->mv2.d; t++)
00541 ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t];
00542
00544 if(ths->mv2.nfft_flags & PRE_LIN_PSI)
00545 nfft_precompute_lin_psi(&(ths->mv2));
00546
00547 if(ths->mv2.nfft_flags & PRE_PSI)
00548 nfft_precompute_psi(&(ths->mv2));
00549
00550 if(ths->mv2.nfft_flags & PRE_FULL_PSI)
00551 nfft_precompute_full_psi(&(ths->mv2));
00552
00553
00555 n_total = 1;
00556 for (t=0; t<ths->d; t++)
00557 n_total *= ths->n;
00558
00559 for (j=0; j<n_total; j++)
00560 {
00561 if (ths->d==1)
00562 ths->b[j] = regkern1(ths->kernel, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00563 else
00564 {
00565 k=j;
00566 ths->b[j]=0.0;
00567 for (t=0; t<ths->d; t++)
00568 {
00569 ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5);
00570 k = k / (ths->n);
00571 }
00572 ths->b[j] = regkern3(ths->kernel, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00573 }
00574 }
00575
00576 nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00577 fftw_execute(ths->fft_plan);
00578 nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00579
00580 }
00581
00583 void fastsum_trafo(fastsum_plan *ths)
00584 {
00585 int j,k,t;
00586 double *ymin, *ymax;
00588 ymin = (double *)nfft_malloc(ths->d*(sizeof(double)));
00589 ymax = (double *)nfft_malloc(ths->d*(sizeof(double)));
00590
00592 nfft_adjoint(&(ths->mv1));
00593
00595 for (k=0; k<ths->mv2.N_total; k++)
00596 ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
00597
00599 nfft_trafo(&(ths->mv2));
00600
00602 for (j=0; j<ths->M_total; j++)
00603 {
00604 for (t=0; t<ths->d; t++)
00605 {
00606 ymin[t] = ths->y[ths->d*j+t] - ths->eps_I;
00607 ymax[t] = ths->y[ths->d*j+t] + ths->eps_I;
00608 }
00609 ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->kernel, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
00610
00611
00612 }
00613 }
00614
00615
00616