00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034
00035 #include "nfft3.h"
00036 #include "util.h"
00037 #include "infft.h"
00038
00039
00040
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052
00053 #define N_TILDE(y) (y-1)
00054
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056
00057 #define FPT_BREAK_EVEN 4
00058
00062 typedef struct fpt_step_
00063 {
00064 bool stable;
00067 int Ns;
00068 int ts;
00069 double **a11,**a12,**a21,**a22;
00070 double *g;
00071 } fpt_step;
00072
00076 typedef struct fpt_data_
00077 {
00078 fpt_step **steps;
00079 int k_start;
00080 double *alphaN;
00081 double *betaN;
00082 double *gammaN;
00083 double alpha_0;
00084 double beta_0;
00085 double gamma_m1;
00086
00087 double *alpha;
00088 double *beta;
00089 double *gamma;
00090 } fpt_data;
00091
00095 typedef struct fpt_set_s_
00096 {
00097 int flags;
00098 int M;
00099 int N;
00101 int t;
00102 fpt_data *dpt;
00103 double **xcvecs;
00106 double *xc;
00107 double _Complex *temp;
00108 double _Complex *work;
00109 double _Complex *result;
00110 double _Complex *vec3;
00111 double _Complex *vec4;
00112 double _Complex *z;
00113 fftw_plan *plans_dct3;
00115 fftw_plan *plans_dct2;
00117 fftw_r2r_kind *kinds;
00119 fftw_r2r_kind *kindsr;
00122 int *lengths;
00124
00125 double *xc_slow;
00126 } fpt_set_s;
00127
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129 double* v, double _Complex* y, double* w, int n)
00130 {
00131 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132 double *v_ptr = v, *w_ptr = w;
00133 for (l = 0; l < n; l++)
00134 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139 double* v, double _Complex* y, double* w, int n) \
00140 { \
00141 const int n2 = n>>1; \
00142 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143 double *v_ptr = v, *w_ptr = w; \
00144 for (l = 0; l < n2; l++) \
00145 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146 v_ptr--; w_ptr--; \
00147 for (l = 0; l < n2; l++) \
00148 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156 double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158 const int n2 = n>>1; \
00159 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160 double *v_ptr = v, *xx_ptr = xx; \
00161 for (l = 0; l < n2; l++, v_ptr++) \
00162 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163 v_ptr--; \
00164 for (l = 0; l < n2; l++, v_ptr--) \
00165 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173 double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175 const int n2 = n>>1; \
00176 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177 double *w_ptr = w, *xx_ptr = xx; \
00178 for (l = 0; l < n2; l++, w_ptr++) \
00179 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180 w_ptr--; \
00181 for (l = 0; l < n2; l++, w_ptr--) \
00182 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189 double _Complex* y, double* w, int n)
00190 {
00191 int l;
00192 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193 double *v_ptr = v, *w_ptr = w;
00194 for (l = n; l > 0; l--)
00195 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199 double* v, double _Complex* y, double* w, int n)
00200 {
00201 const int n2 = n>>1; \
00202 int l;
00203 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204 double *v_ptr = v, *w_ptr = w;
00205 for (l = n2; l > 0; l--)
00206 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207 v_ptr--; w_ptr--;
00208 for (l = n2; l > 0; l--)
00209 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213 double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215 const int n2 = n>>1; \
00216 int l;
00217 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219 for (l = n2; l > 0; l--, xx_ptr++)
00220 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221 v_ptr--; w_ptr--;
00222 for (l = n2; l > 0; l--, xx_ptr++)
00223 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227 double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229 const int n2 = n>>1; \
00230 int l;
00231 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233 for (l = n2; l > 0; l--, xx_ptr++)
00234 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235 v_ptr--; w_ptr--;
00236 for (l = n2; l > 0; l--, xx_ptr++)
00237 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
00242 double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244 \
00245 int length = 1<<(tau+1); \
00246 \
00247 double norm = 1.0/(length<<1); \
00248 \
00249 \
00250 a[0] *= 2.0; \
00251 b[0] *= 2.0; \
00252 \
00253 \
00254 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256 \
00257 \
00258 \
00259 \
00260 \
00261 \
00262 \
00263 \
00264 if (g == 0.0) \
00265 { \
00266 \
00267 \
00268 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269 } \
00270 else \
00271 { \
00272 \
00273 \
00274 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276 memcpy(b,set->z,length*sizeof(double _Complex)); \
00277 \
00278 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279 \
00280 a[0] *= 0.5; \
00281 } \
00282 \
00283 \
00284 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285 \
00286 b[0] *= 0.5; \
00287 }
00288
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291
00292
00293
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295 double *a11, double *a12, double *a21, double *a22, double *x,
00296 double gamma, int tau, fpt_set set)
00297 {
00299 int length = 1<<(tau+1);
00301 double norm = 1.0/(length<<1);
00302
00303 UNUSED(a21); UNUSED(a22);
00304
00305
00306 a[0] *= 2.0;
00307 b[0] *= 2.0;
00308
00309
00310 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312
00313
00314
00315
00316
00317
00318
00319
00320 if (gamma == 0.0)
00321 {
00322
00323
00324 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325 }
00326 else
00327 {
00328
00329
00330 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331 auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332 memcpy(b,set->z,length*sizeof(double _Complex));
00333
00334 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335
00336 a[0] *= 0.5;
00337 }
00338
00339
00340 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341
00342 b[0] *= 0.5;
00343 }
00344
00345 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
00346 double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349 int length = 1<<(tau+1);
00351 double norm = 1.0/(length<<1);
00352
00353
00354 a[0] *= 2.0;
00355 b[0] *= 2.0;
00356
00357 UNUSED(a11); UNUSED(a12);
00358
00359
00360 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362
00363
00364
00365
00366
00367
00368
00369
00370 if (gamma == 0.0)
00371 {
00372
00373
00374 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375 }
00376 else
00377 {
00378
00379
00380 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381 auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382 memcpy(b,set->z,length*sizeof(double _Complex));
00383
00384 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385
00386 a[0] *= 0.5;
00387 }
00388
00389
00390 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391
00392 b[0] *= 0.5;
00393 }
00394
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
00397 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399 \
00400 int length = 1<<(tau+1); \
00401 \
00402 double norm = 1.0/(length<<1); \
00403 \
00404 \
00405 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407 \
00408 \
00409 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411 memcpy(a,set->z,length*sizeof(double _Complex)); \
00412 \
00413 \
00414 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420
00421
00422
00423
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
00425 double _Complex *b, double *a11, double *a12, double *x,
00426 double gamma, int tau, fpt_set set)
00427 {
00429 int length = 1<<(tau+1);
00431 double norm = 1.0/(length<<1);
00432
00433
00434 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436
00437
00438 abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439 abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440 memcpy(a,set->z,length*sizeof(double _Complex));
00441
00442
00443 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
00448 double _Complex *b, double *a21, double *a22,
00449 double *x, double gamma, int tau, fpt_set set)
00450 {
00452 int length = 1<<(tau+1);
00454 double norm = 1.0/(length<<1);
00455
00456
00457 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459
00460
00461 abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462 abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463 memcpy(a,set->z,length*sizeof(double _Complex));
00464
00465
00466 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469
00470
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472 const double *beta, const double *gamma)
00473 {
00474
00475
00476
00477 int i,j;
00478 double a,b,x_val_act,a_old;
00479 const double *x_act;
00480 double *y_act;
00481 const double *alpha_act, *beta_act, *gamma_act;
00482
00483
00484 x_act = x;
00485 y_act = y;
00486 for (i = 0; i < size; i++)
00487 {
00488 a = 1.0;
00489 b = 0.0;
00490 x_val_act = *x_act;
00491
00492 if (k == 0)
00493 {
00494 *y_act = 1.0;
00495 }
00496 else
00497 {
00498 alpha_act = &(alpha[k]);
00499 beta_act = &(beta[k]);
00500 gamma_act = &(gamma[k]);
00501 for (j = k; j > 1; j--)
00502 {
00503 a_old = a;
00504 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505 b = a_old*(*gamma_act);
00506 alpha_act--;
00507 beta_act--;
00508 gamma_act--;
00509 }
00510 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511 }
00512 x_act++;
00513 y_act++;
00514 }
00515 }
00516
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518 const double *beta, const double *gamma)
00519 {
00520
00521
00522
00523 int i,j;
00524 double a,b,x_val_act,a_old;
00525 const double *x_act;
00526 double *y_act, *z_act;
00527 const double *alpha_act, *beta_act, *gamma_act;
00528
00529
00530 x_act = x;
00531 y_act = y;
00532 z_act = z;
00533 for (i = 0; i < size; i++)
00534 {
00535 a = 1.0;
00536 b = 0.0;
00537 x_val_act = *x_act;
00538
00539 if (k == 0)
00540 {
00541 *y_act = 1.0;
00542 *z_act = 0.0;
00543 }
00544 else
00545 {
00546 alpha_act = &(alpha[k]);
00547 beta_act = &(beta[k]);
00548 gamma_act = &(gamma[k]);
00549 for (j = k; j > 1; j--)
00550 {
00551 a_old = a;
00552 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553 b = a_old*(*gamma_act);
00554 alpha_act--;
00555 beta_act--;
00556 gamma_act--;
00557 }
00558 if (i < size1)
00559 *z_act = a;
00560 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561 }
00562
00563 x_act++;
00564 y_act++;
00565 z_act++;
00566 }
00567
00568
00569
00570
00571 }
00572
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574 const double *alpha, const double *beta, const double *gamma, const
00575 double threshold)
00576 {
00577
00578
00579
00580 int i,j;
00581 double a,b,x_val_act,a_old;
00582 const double *x_act;
00583 double *y_act, *z_act;
00584 const double *alpha_act, *beta_act, *gamma_act;
00585
00586
00587 x_act = x;
00588 y_act = y;
00589 z_act = z;
00590 for (i = 0; i < size; i++)
00591 {
00592 a = 1.0;
00593 b = 0.0;
00594 x_val_act = *x_act;
00595
00596 if (k == 0)
00597 {
00598 *y_act = 1.0;
00599 *z_act = 0.0;
00600 }
00601 else
00602 {
00603 alpha_act = &(alpha[k]);
00604 beta_act = &(beta[k]);
00605 gamma_act = &(gamma[k]);
00606 for (j = k; j > 1; j--)
00607 {
00608 a_old = a;
00609 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00610 b = a_old*(*gamma_act);
00611 alpha_act--;
00612 beta_act--;
00613 gamma_act--;
00614 }
00615 *z_act = a;
00616 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00617 if (fabs(*y_act) > threshold)
00618 {
00619 return 1;
00620 }
00621 }
00622 x_act++;
00623 y_act++;
00624 z_act++;
00625 }
00626 return 0;
00627 }
00628
00629 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00630 const double _Complex *a, const double *x, double _Complex *y,
00631 const double *alpha, const double *beta, const double *gamma,
00632 const double lambda)
00633 {
00634 int j,k;
00635 double _Complex tmp1, tmp2, tmp3;
00636 double xc;
00637
00638 if (N == 0)
00639 for (j = 0; j <= M; j++)
00640 y[j] = a[0];
00641 else
00642 {
00643 for (j = 0; j <= M; j++)
00644 {
00645 #if 0
00646 xc = x[j];
00647 tmp2 = a[N];
00648 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00649 for (k = N-1; k > 0; k--)
00650 {
00651 tmp3 = a[k-1]
00652 + (alpha[k-1] * xc + beta[k-1]) * tmp1
00653 + gamma[k] * tmp2;
00654 tmp2 = tmp1;
00655 tmp1 = tmp3;
00656 }
00657 y[j] = lambda * tmp1;
00658 #else
00659 xc = x[j];
00660 tmp1 = a[N-1];
00661 tmp2 = a[N];
00662 for (k = N-1; k > 0; k--)
00663 {
00664 tmp3 = a[k-1] + tmp2 * gamma[k];
00665 tmp2 *= (alpha[k] * xc + beta[k]);
00666 tmp2 += tmp1;
00667 tmp1 = tmp3;
00668 }
00669 tmp2 *= (alpha[0] * xc + beta[0]);
00670 y[j] = lambda * (tmp2 + tmp1);
00671 #endif
00672 }
00673 }
00674 }
00675
00694 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00695 double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00696 double lambda)
00697 {
00698 int j,k;
00699 double _Complex* it1 = temp;
00700 double _Complex* it2 = y;
00701 double _Complex aux;
00702
00703
00704 a[0] = 0.0;
00705 for (j = 0; j <= M; j++)
00706 {
00707 it2[j] = lambda * y[j];
00708 a[0] += it2[j];
00709 }
00710
00711 if (N > 0)
00712 {
00713
00714 a[1] = 0.0;
00715 for (j = 0; j <= M; j++)
00716 {
00717 it1[j] = it2[j];
00718 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00719 a[1] += it2[j];
00720 }
00721
00722 for (k = 2; k <= N; k++)
00723 {
00724 a[k] = 0.0;
00725 for (j = 0; j <= M; j++)
00726 {
00727 aux = it1[j];
00728 it1[j] = it2[j];
00729 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00730 a[k] += it2[j];
00731 }
00732 }
00733 }
00734 }
00735
00736
00737 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00738 {
00740 int plength;
00742 int tau;
00744 int m;
00745 int k;
00746
00747
00748 fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00749
00750
00751 set->flags = flags;
00752
00753 set->M = M;
00754 set->t = t;
00755 set->N = 1<<t;
00756
00757
00758 set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00759
00760
00761 for (m = 0; m < set->M; m++)
00762 set->dpt[m].steps = 0;
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772 set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00773
00774
00775 plength = 4;
00776 for (tau = 1; tau < t+1; tau++)
00777 {
00778
00779 set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00780 for (k = 0; k < plength; k++)
00781 {
00782 set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00783 }
00784 plength = plength << 1;
00785 }
00786
00788 set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00789 set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00790
00791
00792 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00793 {
00795 set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00796 set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00797 set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00798
00800 set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00801 set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00802 set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00803 set->kinds[0] = FFTW_REDFT01;
00804 set->kinds[1] = FFTW_REDFT01;
00805 set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00806 set->kindsr[0] = FFTW_REDFT10;
00807 set->kindsr[1] = FFTW_REDFT10;
00808 set->lengths = (int*) nfft_malloc((set->t)*sizeof(int));
00809 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00810 {
00811 set->lengths[tau] = plength;
00812 set->plans_dct3[tau] =
00813 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00814 2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00815 0);
00816 set->plans_dct2[tau] =
00817 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00818 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00819 0);
00820 }
00821 nfft_free(set->lengths);
00822 nfft_free(set->kinds);
00823 nfft_free(set->kindsr);
00824 set->lengths = NULL;
00825 set->kinds = NULL;
00826 set->kindsr = NULL;
00827 }
00828
00829 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00830 {
00831 set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00832 set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00833 }
00834
00835
00836 return set;
00837 }
00838
00839 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00840 double *gam, int k_start, const double threshold)
00841 {
00842
00843 int tau;
00844 int l;
00845 int plength;
00847 int degree;
00849 int firstl;
00850 int lastl;
00851 int plength_stab;
00853 int degree_stab;
00855 double *a11;
00857 double *a12;
00859 double *a21;
00861 double *a22;
00863 const double *calpha;
00864 const double *cbeta;
00865 const double *cgamma;
00866 int needstab = 0;
00867 int k_start_tilde;
00868 int N_tilde;
00869 int clength;
00870 int clength_1;
00871 int clength_2;
00872 int t_stab, N_stab;
00873 fpt_data *data;
00874
00875
00876 data = &(set->dpt[m]);
00877
00878
00879 if (data->steps != NULL)
00880 return;
00881
00882
00883 data->k_start = k_start;
00884
00885
00886 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00887 {
00888
00889 data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00890 data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00891 data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00892
00893 for (tau = 2; tau <= set->t; tau++)
00894 {
00895
00896 data->alphaN[tau-2] = alpha[1<<tau];
00897 data->betaN[tau-2] = beta[1<<tau];
00898 data->gammaN[tau-2] = gam[1<<tau];
00899 }
00900
00901 data->alpha_0 = alpha[1];
00902 data->beta_0 = beta[1];
00903 data->gamma_m1 = gam[0];
00904
00905 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00906 );
00907 N_tilde = N_TILDE(set->N);
00908
00909
00910 data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00911
00912
00913 plength = 4;
00914 for (tau = 1; tau < set->t; tau++)
00915 {
00916
00917 degree = plength>>1;
00918
00919 firstl = FIRST_L(k_start_tilde,plength);
00920
00921 lastl = LAST_L(N_tilde,plength);
00922
00923
00924
00925 data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00926 * (lastl+1));
00927
00928
00929 for (l = firstl; l <= lastl; l++)
00930 {
00931 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00932 {
00933
00934
00935 clength = plength/2;
00936 }
00937 else
00938 {
00939 clength = plength;
00940 }
00941
00942
00943 a11 = (double*) nfft_malloc(sizeof(double)*clength);
00944 a12 = (double*) nfft_malloc(sizeof(double)*clength);
00945 a21 = (double*) nfft_malloc(sizeof(double)*clength);
00946 a22 = (double*) nfft_malloc(sizeof(double)*clength);
00947
00948
00949
00950
00951
00952 calpha = &(alpha[plength*l+1+1]);
00953 cbeta = &(beta[plength*l+1+1]);
00954 cgamma = &(gam[plength*l+1+1]);
00955
00956 if (set->flags & FPT_NO_STABILIZATION)
00957 {
00958
00959 calpha--;
00960 cbeta--;
00961 cgamma--;
00962 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00963 cgamma);
00964 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00965 cgamma);
00966 needstab = 0;
00967 }
00968 else
00969 {
00970 calpha--;
00971 cbeta--;
00972 cgamma--;
00973
00974 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00975 degree-1, calpha, cbeta, cgamma, threshold);
00976 if (needstab == 0)
00977 {
00978
00979 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00980 degree, calpha, cbeta, cgamma, threshold);
00981 }
00982 }
00983
00984
00985 if (needstab == 0)
00986 {
00987 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00988 data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00989 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00990 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00991 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00992
00993 data->steps[tau][l].a11[0] = a11;
00994 data->steps[tau][l].a12[0] = a12;
00995 data->steps[tau][l].a21[0] = a21;
00996 data->steps[tau][l].a22[0] = a22;
00997 data->steps[tau][l].g[0] = gam[plength*l+1+1];
00998 data->steps[tau][l].stable = true;
00999 }
01000 else
01001 {
01002
01003 degree_stab = degree*(2*l+1);
01004 nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01005
01006
01007 nfft_free(a11);
01008 nfft_free(a12);
01009 nfft_free(a21);
01010 nfft_free(a22);
01011
01012 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01013 data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01014 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01015 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01016 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01017
01018 plength_stab = N_stab;
01019
01020 if (set->flags & FPT_AL_SYMMETRY)
01021 {
01022 if (m <= 1)
01023 {
01024
01025 clength_1 = plength_stab;
01026 clength_2 = plength_stab;
01027
01028 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01029 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01030 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01031 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01032
01033 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01034 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01035 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01036 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01037 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01038 }
01039 else
01040 {
01041 clength = plength_stab/2;
01042 if (m%2 == 0)
01043 {
01044 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01045 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01046 a21 = 0;
01047 a22 = 0;
01048 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01049 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01050 degree_stab-2, calpha, cbeta, cgamma);
01051 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01052 degree_stab-1, calpha, cbeta, cgamma);
01053 }
01054 else
01055 {
01056 a11 = 0;
01057 a12 = 0;
01058 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01059 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01060 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01061 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01062 degree_stab-1,calpha, cbeta, cgamma);
01063 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01064 degree_stab+0, calpha, cbeta, cgamma);
01065 }
01066 }
01067 }
01068 else
01069 {
01070 clength_1 = plength_stab;
01071 clength_2 = plength_stab;
01072 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01073 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01074 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01075 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01076 calpha = &(alpha[2]);
01077 cbeta = &(beta[2]);
01078 cgamma = &(gam[2]);
01079 calpha--;
01080 cbeta--;
01081 cgamma--;
01082 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01083 calpha, cbeta, cgamma);
01084 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01085 calpha, cbeta, cgamma);
01086
01087 }
01088 data->steps[tau][l].a11[0] = a11;
01089 data->steps[tau][l].a12[0] = a12;
01090 data->steps[tau][l].a21[0] = a21;
01091 data->steps[tau][l].a22[0] = a22;
01092
01093 data->steps[tau][l].g[0] = gam[1+1];
01094 data->steps[tau][l].stable = false;
01095 data->steps[tau][l].ts = t_stab;
01096 data->steps[tau][l].Ns = N_stab;
01097 }
01098 }
01100 plength = plength << 1;
01101 }
01102 }
01103
01104 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01105 {
01106
01107 if (set->flags & FPT_PERSISTENT_DATA)
01108 {
01109 data->alpha = (double*) alpha;
01110 data->beta = (double*) beta;
01111 data->gamma = (double*) gam;
01112 }
01113 else
01114 {
01115 data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01116 data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01117 data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01118 memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01119 memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01120 memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01121 }
01122 }
01123 }
01124
01125 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01126 const int k_end, const unsigned int flags)
01127 {
01128 int j;
01129 fpt_data *data = &(set->dpt[m]);
01130 int Nk;
01131 int tk;
01132 double norm;
01133
01134 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01135 norm = 2.0/(Nk<<1);
01136
01137 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01138 {
01139 return;
01140 }
01141
01142 if (flags & FPT_FUNCTION_VALUES)
01143 {
01144
01145 for (j = 0; j <= k_end; j++)
01146 {
01147 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01148 }
01149
01150 memset(set->result,0U,data->k_start*sizeof(double _Complex));
01151 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01152
01153
01154
01155
01156 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01157 y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01158 }
01159 else
01160 {
01161 memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01162 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01163
01164 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01165 set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01166 data->gamma_m1);
01167
01168 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01169 (double*)set->result);
01170
01171 set->result[0] *= 0.5;
01172 for (j = 0; j < Nk; j++)
01173 {
01174 set->result[j] *= norm;
01175 }
01176
01177 memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01178 }
01179 }
01180
01181 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01182 const int k_end, const unsigned int flags)
01183 {
01184
01185 fpt_data *data = &(set->dpt[m]);
01187 int Nk;
01189 int tk;
01191 int k_start_tilde;
01193 int k_end_tilde;
01194
01196 int tau;
01198 int firstl;
01200 int lastl;
01202 int l;
01204 int plength;
01206 int plength_stab;
01207 int t_stab;
01209 fpt_step *step;
01211 fftw_plan plan = 0;
01212 int length = k_end+1;
01213 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01214
01216 int k;
01217
01218 double _Complex *work_ptr;
01219 const double _Complex *x_ptr;
01220
01221
01222 if (k_end < FPT_BREAK_EVEN)
01223 {
01224
01225 dpt_trafo(set, m, x, y, k_end, flags);
01226 return;
01227 }
01228
01229 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01230 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01231 k_end_tilde = K_END_TILDE(k_end,Nk);
01232
01233
01234 if (set->flags & FPT_NO_FAST_ALGORITHM)
01235 return;
01236
01237 if (flags & FPT_FUNCTION_VALUES)
01238 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01239 (double*)set->work, NULL, 2, 1, kinds, 0U);
01240
01241
01242 memset(set->result,0U,2*Nk*sizeof(double _Complex));
01243
01244
01245
01246
01247 memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01248
01249 work_ptr = &set->work[2*data->k_start];
01250 x_ptr = x;
01251
01252 for (k = 0; k < k_end_tilde-data->k_start+1; k++)
01253 {
01254 *work_ptr++ = *x_ptr++;
01255 *work_ptr++ = 0;
01256 }
01257
01258
01259 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01260
01261
01262
01263 if (k_end == Nk)
01264 {
01265 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01266 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01267 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01268 }
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280 plength = 4;
01281 for (tau = 1; tau < tk; tau++)
01282 {
01283
01284 firstl = FIRST_L(k_start_tilde,plength);
01285
01286 lastl = LAST_L(k_end_tilde,plength);
01287
01288
01289 for (l = firstl; l <= lastl; l++)
01290 {
01291
01292 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01293 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01294 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01295 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01296
01297
01298 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01299 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01300 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01301
01302
01303 step = &(data->steps[tau][l]);
01304
01305
01306 if (step->stable)
01307 {
01308
01309 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01310 {
01311
01312
01313
01314
01315
01316
01317
01318 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01319 step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01320 }
01321 else
01322 {
01323
01324 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01325 step->a21[0], step->a22[0], step->g[0], tau, set);
01326 }
01327
01328 if (step->g[0] != 0.0)
01329 {
01330 for (k = 0; k < plength; k++)
01331 {
01332 set->work[plength*2*l+k] += set->vec3[k];
01333 }
01334 }
01335 for (k = 0; k < plength; k++)
01336 {
01337 set->work[plength*(2*l+1)+k] += set->vec4[k];
01338 }
01339 }
01340 else
01341 {
01342
01343
01344
01345 plength_stab = step->Ns;
01346 t_stab = step->ts;
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01359 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01360
01361
01362
01363 if (set->flags & FPT_AL_SYMMETRY)
01364 {
01365 if (m <= 1)
01366 {
01367 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01368 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01369 }
01370 else if (m%2 == 0)
01371 {
01372
01373
01374 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01375 step->a21[0], step->a22[0],
01376 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01377
01378
01379 }
01380 else
01381 {
01382
01383
01384 fpt_do_step_symmetric_l(set->vec3, set->vec4,
01385 step->a11[0], step->a12[0],
01386 step->a21[0],
01387 step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01388
01389
01390 }
01391 }
01392 else
01393 {
01394 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01395 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01396 }
01397
01398 if (step->g[0] != 0.0)
01399 {
01400 for (k = 0; k < plength_stab; k++)
01401 {
01402 set->result[k] += set->vec3[k];
01403 }
01404 }
01405
01406 for (k = 0; k < plength_stab; k++)
01407 {
01408 set->result[Nk+k] += set->vec4[k];
01409 }
01410 }
01411 }
01412
01413 plength = plength<<1;
01414
01415
01416
01417
01418
01419
01420
01421
01422
01423 }
01424
01425
01426
01427 for (k = 0; k < 2*Nk; k++)
01428 {
01429 set->result[k] += set->work[k];
01430 }
01431
01432
01433
01434 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01435 data->alpha_0*set->result[Nk+1]*0.5);
01436 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01437 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01438 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01439 data->beta_0*set->result[Nk+k_end-1] +
01440 data->alpha_0*set->result[Nk+k_end-2]*0.5);
01441 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01442 for (k = 2; k <= k_end-2; k++)
01443 {
01444 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01445 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01446 }
01447
01448 if (flags & FPT_FUNCTION_VALUES)
01449 {
01450 y[0] *= 2.0;
01451 fftw_execute_r2r(plan,(double*)y,(double*)y);
01452 fftw_destroy_plan(plan);
01453 for (k = 0; k <= k_end; k++)
01454 {
01455 y[k] *= 0.5;
01456 }
01457 }
01458 }
01459
01460 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01461 double _Complex *y, const int k_end, const unsigned int flags)
01462 {
01463 int j;
01464 fpt_data *data = &(set->dpt[m]);
01465 int Nk;
01466 int tk;
01467 double norm;
01468
01469 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01470 norm = 2.0/(Nk<<1);
01471
01472 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01473 {
01474 return;
01475 }
01476
01477 if (flags & FPT_FUNCTION_VALUES)
01478 {
01479 for (j = 0; j <= k_end; j++)
01480 {
01481 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01482 }
01483
01484 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01485 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01486 data->gamma_m1);
01487
01488 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01489 sizeof(double _Complex));
01490 }
01491 else
01492 {
01493 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01494 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01495
01496 for (j = 0; j < Nk; j++)
01497 {
01498 set->result[j] *= norm;
01499 }
01500
01501 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01502 (double*)set->result);
01503
01504 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01505 set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01506 data->gamma_m1);
01507
01508 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01509 }
01510 }
01511
01512 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01513 double _Complex *y, const int k_end, const unsigned int flags)
01514 {
01515
01516 fpt_data *data = &(set->dpt[m]);
01518 int Nk;
01520 int tk;
01522 int k_start_tilde;
01524 int k_end_tilde;
01525
01527 int tau;
01529 int firstl;
01531 int lastl;
01533 int l;
01535 int plength;
01537 int plength_stab;
01539 fpt_step *step;
01541 fftw_plan plan;
01542 int length = k_end+1;
01543 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01545 int k;
01546 int t_stab;
01547
01548
01549 if (k_end < FPT_BREAK_EVEN)
01550 {
01551
01552 dpt_transposed(set, m, x, y, k_end, flags);
01553 return;
01554 }
01555
01556 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01557 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01558 k_end_tilde = K_END_TILDE(k_end,Nk);
01559
01560
01561 if (set->flags & FPT_NO_FAST_ALGORITHM)
01562 {
01563 return;
01564 }
01565
01566 if (flags & FPT_FUNCTION_VALUES)
01567 {
01568 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01569 (double*)set->work, NULL, 2, 1, kinds, 0U);
01570 fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01571 fftw_destroy_plan(plan);
01572 for (k = 0; k <= k_end; k++)
01573 {
01574 set->result[k] *= 0.5;
01575 }
01576 }
01577 else
01578 {
01579 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01580 }
01581
01582
01583 memset(set->work,0U,2*Nk*sizeof(double _Complex));
01584
01585
01586 for (k = 0; k <= k_end; k++)
01587 {
01588 set->work[k] = data->gamma_m1*set->result[k];
01589 }
01590
01591
01592 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01593 data->alpha_0*set->result[1]);
01594 for (k = 1; k < k_end; k++)
01595 {
01596 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01597 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01598 }
01599 if (k_end<Nk)
01600 {
01601 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01602 }
01603
01605 memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01606
01607
01608 plength = Nk;
01609 for (tau = tk-1; tau >= 1; tau--)
01610 {
01611
01612 firstl = FIRST_L(k_start_tilde,plength);
01613
01614 lastl = LAST_L(k_end_tilde,plength);
01615
01616
01617 for (l = firstl; l <= lastl; l++)
01618 {
01619
01620 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01621 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01622
01623 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01624 (plength/2)*sizeof(double _Complex));
01625
01626
01627 step = &(data->steps[tau][l]);
01628
01629
01630 if (step->stable)
01631 {
01632 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01633 {
01634
01635 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01636 step->a21[0], step->a22[0], step->g[0], tau, set);
01637 }
01638 else
01639 {
01640
01641 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01642 step->a21[0], step->a22[0], step->g[0], tau, set);
01643 }
01644 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01645
01646 for (k = 0; k < plength; k++)
01647 {
01648 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01649 }
01650 }
01651 else
01652 {
01653
01654 plength_stab = step->Ns;
01655 t_stab = step->ts;
01656
01657 memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01658 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01659
01660
01661 if (set->flags & FPT_AL_SYMMETRY)
01662 {
01663 if (m <= 1)
01664 {
01665 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01666 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01667 }
01668 else if (m%2 == 0)
01669 {
01670 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01671 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01672 }
01673 else
01674 {
01675 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01676 step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01677 }
01678 }
01679 else
01680 {
01681 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01682 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01683 }
01684
01685 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01686
01687 for (k = 0; k < plength; k++)
01688 {
01689 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01690 }
01691 }
01692 }
01693
01694 plength = plength>>1;
01695 }
01696
01697
01698 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01699 {
01700 x[k] = set->work[2*(data->k_start+k)];
01701 }
01702 if (k_end == Nk)
01703 {
01704 x[Nk-data->k_start] =
01705 data->gammaN[tk-2]*set->work[2*(Nk-2)]
01706 + data->betaN[tk-2] *set->work[2*(Nk-1)]
01707 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01708 }
01709 }
01710
01711 void fpt_finalize(fpt_set set)
01712 {
01713 int tau;
01714 int l;
01715 int m;
01716 fpt_data *data;
01717 int k_start_tilde;
01718 int N_tilde;
01719 int firstl, lastl;
01720 int plength;
01721 const int M = set->M;
01722
01723
01724 for (m = 0; m < M; m++)
01725 {
01726
01727 data = &set->dpt[m];
01728 if (data->steps != (fpt_step**)NULL)
01729 {
01730 nfft_free(data->alphaN);
01731 nfft_free(data->betaN);
01732 nfft_free(data->gammaN);
01733 data->alphaN = NULL;
01734 data->betaN = NULL;
01735 data->gammaN = NULL;
01736
01737
01738 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01739 );
01740 N_tilde = N_TILDE(set->N);
01741 plength = 4;
01742 for (tau = 1; tau < set->t; tau++)
01743 {
01744
01745 firstl = FIRST_L(k_start_tilde,plength);
01746
01747 lastl = LAST_L(N_tilde,plength);
01748
01749
01750 for (l = firstl; l <= lastl; l++)
01751 {
01752
01753 nfft_free(data->steps[tau][l].a11[0]);
01754 nfft_free(data->steps[tau][l].a12[0]);
01755 nfft_free(data->steps[tau][l].a21[0]);
01756 nfft_free(data->steps[tau][l].a22[0]);
01757 data->steps[tau][l].a11[0] = NULL;
01758 data->steps[tau][l].a12[0] = NULL;
01759 data->steps[tau][l].a21[0] = NULL;
01760 data->steps[tau][l].a22[0] = NULL;
01761
01762 nfft_free(data->steps[tau][l].a11);
01763 nfft_free(data->steps[tau][l].a12);
01764 nfft_free(data->steps[tau][l].a21);
01765 nfft_free(data->steps[tau][l].a22);
01766 nfft_free(data->steps[tau][l].g);
01767 data->steps[tau][l].a11 = NULL;
01768 data->steps[tau][l].a12 = NULL;
01769 data->steps[tau][l].a21 = NULL;
01770 data->steps[tau][l].a22 = NULL;
01771 data->steps[tau][l].g = NULL;
01772 }
01773
01774 nfft_free(data->steps[tau]);
01775 data->steps[tau] = NULL;
01776
01777 plength = plength<<1;
01778 }
01779
01780 nfft_free(data->steps);
01781 data->steps = NULL;
01782 }
01783
01784 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01785 {
01786
01787
01788 if (!(set->flags & FPT_PERSISTENT_DATA))
01789 {
01790 nfft_free(data->alpha);
01791 nfft_free(data->beta);
01792 nfft_free(data->gamma);
01793 }
01794 data->alpha = NULL;
01795 data->beta = NULL;
01796 data->gamma = NULL;
01797 }
01798 }
01799
01800
01801 nfft_free(set->dpt);
01802 set->dpt = NULL;
01803
01804 for (tau = 1; tau < set->t+1; tau++)
01805 {
01806 nfft_free(set->xcvecs[tau-1]);
01807 set->xcvecs[tau-1] = NULL;
01808 }
01809 nfft_free(set->xcvecs);
01810 set->xcvecs = NULL;
01811
01812
01813 nfft_free(set->work);
01814 nfft_free(set->result);
01815
01816
01817 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01818 {
01819
01820 nfft_free(set->vec3);
01821 nfft_free(set->vec4);
01822 nfft_free(set->z);
01823 set->work = NULL;
01824 set->result = NULL;
01825 set->vec3 = NULL;
01826 set->vec4 = NULL;
01827 set->z = NULL;
01828
01829
01830 for(tau = 0; tau < set->t; tau++)
01831 {
01832 fftw_destroy_plan(set->plans_dct3[tau]);
01833 fftw_destroy_plan(set->plans_dct2[tau]);
01834 set->plans_dct3[tau] = NULL;
01835 set->plans_dct2[tau] = NULL;
01836 }
01837
01838 nfft_free(set->plans_dct3);
01839 nfft_free(set->plans_dct2);
01840 set->plans_dct3 = NULL;
01841 set->plans_dct2 = NULL;
01842 }
01843
01844 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01845 {
01846
01847 nfft_free(set->xc_slow);
01848 set->xc_slow = NULL;
01849 nfft_free(set->temp);
01850 set->temp = NULL;
01851 }
01852
01853
01854 nfft_free(set);
01855 }