NFFT 3.0 API Reference - NFFT: fpt.c Source File

00001 
00007 /* Include standard C headers. */
00008 #include <math.h>
00009 #include <string.h>
00010 #include <stdlib.h>
00011 #include <stdbool.h>
00012 
00013 /* Include FPT module header. */
00014 #include "nfft3.h"
00015 
00016 /* Include NFFT3 utilities header. */
00017 #include "util.h"
00018 
00019 /* Some macros for index calculation. */
00020 
00022 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00023 
00024 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00025 
00029 #define FIRST_L(x,y) ((int)floor((x)/(double)y))
00030 
00034 #define LAST_L(x,y) ((int)ceil(((x)+1)/(double)y)-1)
00035 
00036 #define N_TILDE(y) (y-1)
00037 
00038 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00039 //#define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00040 
00041 #ifdef TEST_STAB
00042   #define NFFT_MAX(a,b) ((a>b)?(a):(b))
00043 #endif
00044 
00048 typedef struct fpt_step_
00049 {
00050   bool stable;                            
00053   int N_stab;                             
00054   int t_stab;                             
00055   double **a11,**a12,**a21,**a22;         
00056   double *gamma;                          
00057 } fpt_step;
00058 
00062 typedef struct fpt_data_
00063 {
00064   fpt_step **steps;                       
00065   int k_start;
00066   double *alphaN;
00067   double *betaN;
00068   double *gammaN;
00069   double alpha_0;
00070   double beta_0;
00071   double gamma_m1;
00072   /* Data for direct transform. */
00073   double *alpha;
00074   double *beta;
00075   double *gamma;
00076 } fpt_data;
00077 
00081 typedef struct fpt_set_s_
00082 {
00083   int flags;                              
00084   int M;                                  
00085   int N;                                  
00087   int t;                                  
00088   fpt_data *dpt;                          
00089   double **xcvecs;                        
00092   double *xc;                             
00093   double complex *temp;                          
00094   double complex *work;                          
00095   double complex *result;                        
00096   double complex *vec3;
00097   double complex *vec4;
00098   double complex *z;
00099   fftw_plan *plans_dct3;                  
00101   fftw_plan *plans_dct2;                  
00103   fftw_r2r_kind *kinds;                   
00105   fftw_r2r_kind *kindsr;                  
00108   int *lengths; 
00110   /* Data for slow transforms. */
00111   double *xc_slow;
00112 } fpt_set_s;
00113 
00114 inline void abuvxpwy(double a, double b, double complex* u, double complex* x, double* v,
00115   double complex* y, double* w, int n)
00116 {
00117   int l;
00118   double complex *u_ptr, *x_ptr, *y_ptr;
00119   double *v_ptr, *w_ptr;
00120 
00121   u_ptr = u;
00122   x_ptr = x;
00123   v_ptr = v;
00124   y_ptr = y;
00125   w_ptr = w;
00126 
00127   for (l = 0; l < n; l++)
00128   {
00129     *u++ = a * (b * (*v++) * (*x++) + (*w++) * (*y++));
00130   }
00131 }
00132 
00133 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00134 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00135   double complex* y, double* w, int n) \
00136 { \
00137   int l; \
00138   double complex *u_ptr, *x_ptr, *y_ptr; \
00139   double *v_ptr, *w_ptr; \
00140   \
00141   u_ptr = u; \
00142   x_ptr = x; \
00143   v_ptr = v; \
00144   y_ptr = y; \
00145   w_ptr = w; \
00146   \
00147   for (l = 0; l < n/2; l++) \
00148   { \
00149     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00150   } \
00151   v_ptr--; \
00152   w_ptr--; \
00153   for (l = 0; l < n/2; l++) \
00154   { \
00155     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00156   } \
00157 }
00158 
00159 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00160 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00161 
00162 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00163 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00164   double complex* y, double* w, int n) \
00165 { \
00166   int l; \
00167   double complex *u_ptr, *x_ptr, *y_ptr; \
00168   double *v_ptr, *w_ptr; \
00169   \
00170   u_ptr = u; \
00171   x_ptr = x; \
00172   v_ptr = v; \
00173   y_ptr = y; \
00174   w_ptr = w; \
00175   \
00176   for (l = 0; l < n/2; l++) \
00177   { \
00178     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00179   } \
00180   v_ptr--; \
00181   /*w_ptr--;*/ \
00182   for (l = 0; l < n/2; l++) \
00183   { \
00184     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00185   } \
00186 }
00187 
00188 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00189 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00190 
00191 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00192 inline void NAME(double a, double b, double complex* u, double complex* x, double* v, \
00193   double complex* y, double* w, int n) \
00194 { \
00195   int l; \
00196   double complex *u_ptr, *x_ptr, *y_ptr; \
00197   double *v_ptr, *w_ptr; \
00198   \
00199   u_ptr = u; \
00200   x_ptr = x; \
00201   v_ptr = v; \
00202   y_ptr = y; \
00203   w_ptr = w; \
00204   \
00205   for (l = 0; l < n/2; l++) \
00206   { \
00207     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00208   } \
00209   /*v_ptr--;*/ \
00210   w_ptr--; \
00211   for (l = 0; l < n/2; l++) \
00212   { \
00213     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + S1 * (*w_ptr--) * (*y_ptr++)); \
00214   } \
00215 }
00216 
00217 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00218 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00219 
00220 inline void auvxpwy(double a, double complex* u, double complex* x, double* v, double complex* y,
00221   double* w, int n)
00222 {
00223   int l;
00224   double complex *u_ptr, *x_ptr, *y_ptr;
00225   double *v_ptr, *w_ptr;
00226 
00227   u_ptr = u;
00228   x_ptr = x;
00229   v_ptr = v;
00230   y_ptr = y;
00231   w_ptr = w;
00232 
00233   for (l = 0; l < n; l++)
00234   {
00235     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u_ptr,*v_ptr,*x_ptr,*w_ptr,*y_ptr);*/
00236     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00237   }
00238   /*fprintf(stderr,"\n");*/
00239 }
00240 
00241 #define AUVXPWY_SYMMETRIC(NAME,S1,S2) \
00242 inline void NAME(double a, double complex* u, double complex* x, double* v, double complex* y, \
00243   double* w, int n) \
00244 { \
00245   int l; \
00246   double complex *u_ptr, *x_ptr, *y_ptr; \
00247   double *v_ptr, *w_ptr; \
00248 \
00249   u_ptr = u; \
00250   x_ptr = x; \
00251   v_ptr = v; \
00252   y_ptr = y; \
00253   w_ptr = w; \
00254 \
00255   /*for (l = 0; l < n; l++)*/ \
00256   /*{*/ \
00257     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*/ \
00258     /*  u_ptr[l],v_ptr[l],x_ptr[l],w_ptr[l],y_ptr[l]);*/ \
00259   /*}*/ \
00260   \
00261   \
00262   for (l = 0; l < n/2; l++) \
00263   { \
00264     /*fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u,*v,*x,*w,*y);*/ \
00265     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00266   } \
00267   v_ptr--; \
00268   w_ptr--; \
00269   for (l = 0; l < n/2; l++) \
00270   { \
00271     /* fprintf(stderr,"u = %le, v = %le, x = %le, w = %le, y = %le\n",*u,*v,*x,*w,*y);*/ \
00272     *u_ptr++ = a * (S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00273   } \
00274   /*fprintf(stderr,"\n");*/ \
00275 }
00276 
00277 AUVXPWY_SYMMETRIC(auvxpwy_symmetric,1.0,-1.0)
00278 
00279 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00280 inline void NAME(double complex  *a, double complex *b, double *a11, double *a12, \
00281   double *a21, double *a22, double gamma, int tau, fpt_set set) \
00282 { \ \
00284   int length = 1<<(tau+1); \ \
00286   double norm = 1.0/(length<<1); \
00287   \
00288   /* Compensate for factors introduced by a raw DCT-III. */ \
00289   a[0] *= 2.0; \
00290   b[0] *= 2.0; \
00291   \
00292   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00293   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00294   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00295   \
00296   /*for (k = 0; k < length; k++)*/ \
00297   /*{*/ \
00298     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00299     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00300   /*}*/ \
00301   \
00302   /* Check, if gamma is zero. */ \
00303   if (gamma == 0.0) \
00304   { \
00305     /*fprintf(stderr,"gamma == 0!\n");*/ \
00306     /* Perform multiplication only for second row. */ \
00307     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00308   } \
00309   else \
00310   { \
00311     /*fprintf(stderr,"gamma != 0!\n");*/ \
00312     /* Perform multiplication for both rows. */ \
00313     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00314     M1_FUNCTION(norm*gamma,a,a,a11,b,a12,length); \
00315     memcpy(b,set->z,length*sizeof(double complex)); \
00316     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00317     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00318     /* Compensate for factors introduced by a raw DCT-II. */ \
00319     a[0] *= 0.5; \
00320   } \
00321   \
00322   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00323   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00324   /* Compensate for factors introduced by a raw DCT-II. */ \
00325   b[0] *= 0.5; \
00326 }
00327 
00328 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00329 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00330 FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00331 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)
00332 
00333 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00334 inline void NAME(double complex  *a, double complex *b, double *a11, \
00335   double *a12, double *a21, double *a22, double gamma, int tau, fpt_set set) \
00336 { \ \
00338   int length = 1<<(tau+1); \ \
00340   double norm = 1.0/(length<<1); \
00341   \
00342   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00343   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00344   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00345   \
00346   /* Perform matrix multiplication. */ \
00347   M1_FUNCTION(norm,gamma,set->z,a,a11,b,a21,length); \
00348   M2_FUNCTION(norm,gamma,b,a,a12,b,a22,length); \
00349   memcpy(a,set->z,length*sizeof(double complex)); \
00350   \
00351   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00352   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00353   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00354 }
00355 
00356 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed,abuvxpwy,abuvxpwy)
00357 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00358 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)
00359 FPT_DO_STEP_TRANSPOSED(fpt_do_step_transposed_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)
00360 
00361 void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00362   const double *beta, const double *gamma)
00363 {
00364   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00365    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00366    */
00367   int i,j;
00368   double a,b,x_val_act,a_old;
00369   const double *x_act;
00370   double *y_act;
00371   const double *alpha_act, *beta_act, *gamma_act;
00372 
00373   /* Traverse all nodes. */
00374   x_act = x;
00375   y_act = y;
00376   for (i = 0; i < size; i++)
00377   {
00378     a = 1.0;
00379     b = 0.0;
00380     x_val_act = *x_act;
00381 
00382     if (k == 0)
00383     {
00384       *y_act = 1.0;
00385     }
00386     else
00387     {
00388       alpha_act = &(alpha[k]);
00389       beta_act = &(beta[k]);
00390       gamma_act = &(gamma[k]);
00391       for (j = k; j > 1; j--)
00392       {
00393         a_old = a;
00394         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00395          b = a_old*(*gamma_act);
00396         alpha_act--;
00397         beta_act--;
00398         gamma_act--;
00399       }
00400       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00401     }
00402     x_act++;
00403     y_act++;
00404   }
00405 }
00406 
00407 int eval_clenshaw_thresh(const double *x, double *y, int size, int k, 
00408   const double *alpha, const double *beta, const double *gamma, const 
00409   double threshold)
00410 {
00411   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00412    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00413    */
00414   int i,j;
00415   double a,b,x_val_act,a_old;
00416   const double *x_act;
00417   double *y_act;
00418   const double *alpha_act, *beta_act, *gamma_act;
00419 
00420   /* Traverse all nodes. */
00421   x_act = x;
00422   y_act = y;
00423   for (i = 0; i < size; i++)
00424   {
00425     a = 1.0;
00426     b = 0.0;
00427     x_val_act = *x_act;
00428 
00429     if (k == 0)
00430     {
00431      *y_act = 1.0;
00432     }
00433     else
00434     {
00435       alpha_act = &(alpha[k]);
00436       beta_act = &(beta[k]);
00437       gamma_act = &(gamma[k]);
00438       for (j = k; j > 1; j--)
00439       {
00440         a_old = a;
00441         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00442          b = a_old*(*gamma_act);
00443         alpha_act--;
00444         beta_act--;
00445         gamma_act--;
00446       }
00447       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00448       if (fabs(*y_act) > threshold)
00449       {
00450         return 1;
00451       }
00452     }
00453     x_act++;
00454     y_act++;
00455   }
00456   return 0;
00457 }
00458 
00477 void eval_sum_clenshaw(int N, int M, double complex* a, double *x, double complex *y,
00478   double complex *temp, double *alpha, double *beta, double *gamma, double lambda)
00479 {
00480   int j,k;
00481   double complex* it1 = temp;
00482   double complex* it2 = y;
00483   double complex aux;
00484 
00485   /* Clenshaw's algorithm */
00486   for (j = 0; j <= M; j++)
00487   {
00488     it2[j] = a[N];
00489   }
00490 
00491   if (N > 0)
00492   {
00493     for (j = 0; j <= M; j++)
00494     {
00495       it1[j] = a[N-1];
00496     }
00497 
00498     //fprintf(stdout,"N = %d\n",N);
00499     for (k = N; k > 1; k--)
00500     {
00501 
00502       for (j = 0; j <= M; j++)
00503       {
00504         aux = a[k-2] + it2[j] * gamma[k-1];
00505         it2[j] = it1[j] + it2[j] * (alpha[k-1] * x[j] + beta[k-1]);
00506         it1[j] = aux;
00507       }
00508     }
00509 
00510 
00511     /* Compute final step. */
00512     for (j = 0; j <= M; j++)
00513     {
00514       it2[j] = it1[j] + it2[j] * (alpha[0] * x[j] + beta[0]);
00515     }
00516   }
00517 
00518   /* Compute final result by multiplying with the constant lambda */
00519   for (j = 0; j <= M; j++)
00520   {
00521     y[j] = lambda * it2[j];
00522   }
00523 }
00524 
00543 void eval_sum_clenshaw_transposed(int N, int M, double complex* a, double *x,
00544   double complex *y, double complex *temp, double *alpha, double *beta, double *gamma,
00545   double lambda)
00546 {
00547   int j,k;
00548   double complex* it1 = temp;
00549   double complex* it2 = y;
00550   double complex aux;
00551 
00552   /* Compute final result by multiplying with the constant lambda */
00553   a[0] = 0.0;
00554   for (j = 0; j <= M; j++)
00555   {
00556     it2[j] = lambda * y[j];
00557     a[0] += it2[j];
00558   }
00559 
00560   if (N > 0)
00561   {
00562     /* Compute final step. */
00563     a[1] = 0.0;
00564     for (j = 0; j <= M; j++)
00565     {
00566       it1[j] = it2[j];
00567       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00568       a[1] += it2[j];
00569     }
00570 
00571     for (k = 2; k <= N; k++)
00572     {
00573       a[k] = 0.0;
00574       for (j = 0; j <= M; j++)
00575       {
00576         aux = it1[j];
00577         it1[j] = it2[j];
00578         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00579         a[k] += it2[j];
00580       }
00581     }
00582   }
00583 }
00584 
00585 
00586 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00587 {
00589   int plength;
00591   int tau;
00593   int m;
00594   int k;
00595 
00596   /* Allocate memory for new DPT set. */
00597   fpt_set_s *set = (fpt_set_s*)malloc(sizeof(fpt_set_s));
00598 
00599   /* Save parameters in structure. */
00600   set->flags = flags;
00601 
00602   //fprintf(stderr,"\nfpt_init: flags = %d \t %d\n",set->flags,flags);
00603 
00604   set->M = M;
00605   set->t = t;
00606   set->N = 1<<t;
00607 
00608   /* Allocate memory for L transforms. */
00609   set->dpt = (fpt_data*) malloc((M+1)*sizeof(fpt_data));
00610 
00611   /* Initialize with NULL pointer. */
00612   for (m = 0; m <= set->M; m++)
00613   {
00614     set->dpt[m].steps = (fpt_step**) NULL;
00615   }
00616 
00617   /* Create arrays with Chebyshev nodes. */
00618 
00619   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00620    * would be trivially an array containing a 1 as second entry with all other
00621    * coefficients set to zero. In order to compensate for the multiplicative
00622    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00623 
00624   /* Allocate memory for array of pointers to node arrays. */
00625   set->xcvecs = (double**) malloc((set->t/*-1*/)*sizeof(double*));
00626   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00627    * using a DCT-III. */
00628   plength = 4;
00629   for (tau = 1; tau < /*t*/t+1; tau++)
00630   {
00631     /* Allocate memory for current array. */
00632     set->xcvecs[tau-1] = (double*) malloc(plength*sizeof(double));
00633     for (k = 0; k < plength; k++)
00634     {
00635       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00636     }
00637     plength = plength << 1;
00638   }
00639 
00641   set->work = (double complex*) malloc((2*set->N)*sizeof(double complex));
00642   set->result = (double complex*) malloc((2*set->N)*sizeof(double complex));
00643 
00644   /* Check if fast transform is activated. */
00645   if (set->flags & FPT_NO_FAST_ALGORITHM)
00646   {
00647   }
00648   else
00649   {
00651     set->vec3 = (double complex*) malloc(set->N*sizeof(double complex));
00652     set->vec4 = (double complex*) malloc(set->N*sizeof(double complex));
00653     set->z = (double complex*) malloc(set->N*sizeof(double complex));
00654 
00656     set->plans_dct3 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00657     set->plans_dct2 = (fftw_plan*) fftw_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00658     set->kinds      = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00659     set->kinds[0]   = FFTW_REDFT01;
00660     set->kinds[1]   = FFTW_REDFT01;
00661     set->kindsr     = (fftw_r2r_kind*) malloc(2*sizeof(fftw_r2r_kind));
00662     set->kindsr[0]  = FFTW_REDFT10;
00663     set->kindsr[1]  = FFTW_REDFT10;
00664     set->lengths    = (int*) malloc((set->t/*-1*/)*sizeof(int));
00665     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00666     {
00667       set->lengths[tau] = plength;
00668       set->plans_dct3[tau] =
00669         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00670                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00671                            0);
00672       set->plans_dct2[tau] =
00673         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00674                            2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00675                            0);
00676     }
00677     free(set->lengths);
00678     free(set->kinds);
00679     free(set->kindsr);
00680     set->lengths = NULL;
00681     set->kinds = NULL;
00682     set->kindsr = NULL;
00683   }
00684 
00685   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00686   {
00687   }
00688   else
00689   {
00690     set->xc_slow = (double*) malloc((set->N+1)*sizeof(double));
00691     set->temp = (double complex*) calloc((set->N+1),sizeof(double complex));
00692   }
00693 
00694   /* Return the newly created DPT set. */
00695   return set;
00696 }
00697 
00698 void fpt_precompute(fpt_set set, const int m, const double *alpha,
00699                     const double *beta, const double *gamma, int k_start,
00700                     const double threshold)
00701 {
00702 
00703   int tau;          
00704   int l;            
00705   int plength;      
00707   int degree;       
00709   int firstl;       
00710   int lastl;        
00711   int plength_stab; 
00713   int degree_stab;  
00715   double *a11;      
00717   double *a12;      
00719   double *a21;      
00721   double *a22;      
00723   const double *calpha;
00724   const double *cbeta;
00725   const double *cgamma;
00726   int needstab = 0; 
00727   int k_start_tilde;
00728   int N_tilde;
00729   int clength;
00730   int clength_1;
00731   int clength_2;
00732   int t_stab, N_stab;
00733 
00734   fpt_data *data;
00735 
00736   /* Allocate memory for DPT transform data. */
00737   //set->dpt[m] = (fpt_data*) malloc(sizeof(fpt_data));
00738 
00739   /* Get pointer to DPT data. */
00740   data = &(set->dpt[m]);
00741 
00742   /* Check, if already precomputed. */
00743   if (data->steps != NULL)
00744   {
00745     return;
00746   }
00747 
00748   /* Save k_start. */
00749   data->k_start = k_start;
00750 
00751   /* Check if fast transform is activated. */
00752   if (set->flags & FPT_NO_FAST_ALGORITHM)
00753   {
00754   }
00755   else
00756   {
00757     /* Save recursion coefficients. */
00758     data->alphaN = (double*) malloc((set->t-1)*sizeof(double complex));
00759     data->betaN = (double*) malloc((set->t-1)*sizeof(double complex));
00760     data->gammaN = (double*) malloc((set->t-1)*sizeof(double complex));
00761 
00762     for (tau = 2; tau <= set->t; tau++)
00763     {
00764 
00765       data->alphaN[tau-2] = alpha[1<<tau];
00766       data->betaN[tau-2] = beta[1<<tau];
00767       data->gammaN[tau-2] = gamma[1<<tau];
00768     }
00769 
00770     data->alpha_0 = alpha[1];
00771     data->beta_0 = beta[1];
00772     data->gamma_m1 = gamma[0];
00773 
00774     k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00775       /*set->N*/);
00776     N_tilde = N_TILDE(set->N);
00777 
00778     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00779     data->steps = (fpt_step**) malloc(sizeof(fpt_step*)*set->t);
00780 
00781     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00782     plength = 4;
00783     for (tau = 1; tau < set->t; tau++)
00784     {
00785       /* Compute auxilliary values. */
00786       degree = plength>>1;
00787       /* Compute first l. */
00788       firstl = FIRST_L(k_start_tilde,plength);
00789       /* Compute last l. */
00790       lastl = LAST_L(N_tilde,plength);
00791 
00792       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00793        * many matrices. */
00794       data->steps[tau] = (fpt_step*) fftw_malloc(sizeof(fpt_step)
00795                          * (lastl+1));
00796 
00797       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00798       for (l = firstl; l <= lastl; l++)
00799       {
00800         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00801         {
00802           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00803           //fflush(stderr);
00804           clength = plength/2;
00805         }
00806         else
00807         {
00808           clength = plength;
00809         }
00810 
00811         /* Allocate memory for the components of U_{n,tau,l}. */
00812         a11 = (double*) fftw_malloc(sizeof(double)*clength);
00813         a12 = (double*) fftw_malloc(sizeof(double)*clength);
00814         a21 = (double*) fftw_malloc(sizeof(double)*clength);
00815         a22 = (double*) fftw_malloc(sizeof(double)*clength);
00816 
00817         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00818          * nodes. */
00819 
00820         /* Get the pointers to the three-term recurrence coeffcients. */
00821         calpha = &(alpha[plength*l+1+1]);
00822         cbeta = &(beta[plength*l+1+1]);
00823         cgamma = &(gamma[plength*l+1+1]);
00824 
00825         if (set->flags & FPT_NO_STABILIZATION)
00826         {
00827           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
00828           eval_clenshaw(set->xcvecs[tau-1], a11, clength, degree-2, calpha, cbeta,
00829             cgamma);
00830           eval_clenshaw(set->xcvecs[tau-1], a12, clength, degree-1, calpha, cbeta,
00831             cgamma);
00832           calpha--;
00833           cbeta--;
00834           cgamma--;
00835           eval_clenshaw(set->xcvecs[tau-1], a21, clength, degree-1, calpha, cbeta,
00836             cgamma);
00837           eval_clenshaw(set->xcvecs[tau-1], a22, clength, degree, calpha, cbeta,
00838             cgamma);
00839           needstab = 0;
00840         }
00841         else
00842         {
00843           needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a11, clength, degree-2,
00844             calpha, cbeta, cgamma, threshold);
00845           if (needstab == 0)
00846           {
00847             /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+2). */
00848             needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a12, clength, degree-1,
00849               calpha, cbeta, cgamma, threshold);
00850             if (needstab == 0)
00851             {
00852               calpha--;
00853               cbeta--;
00854               cgamma--;
00855               /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
00856               needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a21, clength,
00857                 degree-1, calpha, cbeta, cgamma, threshold);
00858               if (needstab == 0)
00859               {
00860                 /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
00861                 needstab = eval_clenshaw_thresh(set->xcvecs[tau-1], a22, clength,
00862                   degree, calpha, cbeta, cgamma, threshold);
00863               }
00864             }
00865           }
00866         }
00867 
00868         /* Check if stabilization needed. */
00869         if (needstab == 0)
00870         {
00871           data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00872           data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00873           data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00874           data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00875           data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00876           /* No stabilization needed. */
00877           data->steps[tau][l].a11[0] = a11;
00878           data->steps[tau][l].a12[0] = a12;
00879           data->steps[tau][l].a21[0] = a21;
00880           data->steps[tau][l].a22[0] = a22;
00881           data->steps[tau][l].gamma[0] = gamma[plength*l+1+1];
00882           data->steps[tau][l].stable = true;
00883         }
00884         else
00885         {
00886           /* Stabilize. */
00887           degree_stab = degree*(2*l+1);
00888           nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
00889           /*fprintf(stderr,"(l+1)*(1<<(tau+2)) = %d, N_stab = %d, t_stab = %d\n",
00890             (l+1)*(1<<(tau+2)),N_stab,t_stab);*/
00891 
00892           /* Old arrays are to small. */
00893           fftw_free(a11);
00894           fftw_free(a12);
00895           fftw_free(a21);
00896           fftw_free(a22);
00897 
00898           data->steps[tau][l].a11 = (double**) fftw_malloc(sizeof(double*));
00899           data->steps[tau][l].a12 = (double**) fftw_malloc(sizeof(double*));
00900           data->steps[tau][l].a21 = (double**) fftw_malloc(sizeof(double*));
00901           data->steps[tau][l].a22 = (double**) fftw_malloc(sizeof(double*));
00902           data->steps[tau][l].gamma = (double*) fftw_malloc(sizeof(double));
00903 
00904           plength_stab = N_stab;
00905 
00906           if (m <= 1)
00907           {
00908             clength_1 = plength_stab/2;
00909             clength_2 = plength_stab/2;
00910           }
00911           else if (m%2 == 0)
00912           {
00913             clength_1 = plength_stab/2;
00914             clength_2 = plength_stab;
00915           }
00916           else
00917           {
00918             clength_1 = plength_stab;
00919             clength_2 = plength_stab/2;
00920           }
00921 
00922 
00923           /* Allocate memory for arrays. */
00924           a11 = (double*) fftw_malloc(sizeof(double)*clength_1);
00925           a12 = (double*) fftw_malloc(sizeof(double)*clength_1);
00926           a21 = (double*) fftw_malloc(sizeof(double)*clength_2);
00927           a22 = (double*) fftw_malloc(sizeof(double)*clength_2);
00928 
00929           /* Get the pointers to the three-term recurrence coeffcients. */
00930           calpha = &(alpha[2]);
00931           cbeta = &(beta[2]);
00932           cgamma = &(gamma[2]);
00933           /* Evaluate P_{2^{tau}(2l+1)-2}^n(\cdot,2). */
00934           eval_clenshaw(set->xcvecs[t_stab-2], a11, clength_1, degree_stab-2,
00935             calpha, cbeta, cgamma);
00936           /* Evaluate P_{2^{tau}(2l+1)-1}^n(\cdot,2). */
00937           eval_clenshaw(set->xcvecs[t_stab-2], a12, clength_1, degree_stab-1,
00938             calpha, cbeta, cgamma);
00939           calpha--;
00940           cbeta--;
00941           cgamma--;
00942           /* Evaluate P_{2^{tau}(2l+1)-1}^n(\cdot,1). */
00943           eval_clenshaw(set->xcvecs[t_stab-2], a21, clength_2, degree_stab-1,
00944             calpha, cbeta, cgamma);
00945           /* Evaluate P_{2^{tau}(2l+1)}^n(\cdot,1). */
00946           eval_clenshaw(set->xcvecs[t_stab-2], a22, clength_2, degree_stab+0,
00947             calpha, cbeta, cgamma);
00948 
00949           data->steps[tau][l].a11[0] = a11;
00950           data->steps[tau][l].a12[0] = a12;
00951           data->steps[tau][l].a21[0] = a21;
00952           data->steps[tau][l].a22[0] = a22;
00953           data->steps[tau][l].gamma[0] =  gamma[1+1];
00954           data->steps[tau][l].stable = false;
00955           data->steps[tau][l].t_stab = t_stab;
00956           data->steps[tau][l].N_stab = N_stab;
00957         }
00958       }
00960       plength = plength << 1;
00961     }
00962   }
00963 
00964   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
00965   {
00966   }
00967   else
00968   {
00969     /* Check, if recurrence coefficients must be copied. */
00970     if (set->flags & FPT_PERSISTENT_DATA)
00971     {
00972       data->alpha = (double*) alpha;
00973       data->beta = (double*) beta;
00974       data->gamma = (double*) gamma;
00975     }
00976     else
00977     {
00978       data->alpha = (double*) malloc((set->N+1)*sizeof(double));
00979       data->beta = (double*) malloc((set->N+1)*sizeof(double));
00980       data->gamma = (double*) malloc((set->N+1)*sizeof(double));
00981       memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
00982       memcpy(data->beta,beta,(set->N+1)*sizeof(double));
00983       memcpy(data->gamma,gamma,(set->N+1)*sizeof(double));
00984     }
00985   }
00986 }
00987 
00988 void dpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
00989   const int k_end, const unsigned int flags)
00990 {
00991   int j;
00992   fpt_data *data = &(set->dpt[m]);
00993   int Nk;
00994   int tk;
00995   double norm;
00996 
00997   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
00998   norm = 2.0/(Nk<<1);
00999 
01000   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01001   {
01002     return;
01003   }
01004 
01005   if (flags & FPT_FUNCTION_VALUES)
01006   {
01007     /* Fill array with Chebyshev nodes. */
01008     for (j = 0; j <= k_end; j++)
01009     {
01010       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01011     }
01012 
01013     memset(set->result,0U,data->k_start*sizeof(double complex));
01014     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01015 
01016     eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01017       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01018       data->gamma_m1);
01019   }
01020   else
01021   {
01022     memset(set->temp,0U,data->k_start*sizeof(double complex));
01023     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double complex));
01024 
01025     eval_sum_clenshaw(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01026       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01027       data->gamma_m1);
01028 
01029     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01030       (double*)set->result);
01031 
01032     set->result[0] *= 0.5;
01033     for (j = 0; j < Nk; j++)
01034     {
01035       set->result[j] *= norm;
01036     }
01037 
01038     memcpy(y,set->result,(k_end+1)*sizeof(double complex));
01039   }
01040 }
01041 
01042 void fpt_trafo(fpt_set set, const int m, const double complex *x, double complex *y,
01043   const int k_end, const unsigned int flags)
01044 {
01045   /* Get transformation data. */
01046   fpt_data *data = &(set->dpt[m]);
01048   int Nk;
01050   int tk;
01052   int k_start_tilde;
01054   int k_end_tilde;
01055 
01057   int tau;
01059   int firstl;
01061   int lastl;
01063   int l;
01065   int plength;
01067   int plength_stab;
01068   int t_stab;
01070   fpt_step *step;
01072   fftw_plan plan;
01073   int length = k_end+1;
01074   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01075 
01077   int k;
01078 
01079   double complex *work_ptr;
01080   const double complex *x_ptr;
01081 
01082   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01083   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01084   k_end_tilde = K_END_TILDE(k_end,Nk);
01085 
01086   /* Check if fast transform is activated. */
01087   if (set->flags & FPT_NO_FAST_ALGORITHM)
01088   {
01089     return;
01090   }
01091 
01092   if (flags & FPT_FUNCTION_VALUES)
01093   {
01094     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01095       (double*)set->work, NULL, 2, 1, kinds, 0U);
01096   }
01097 
01098   /* Initialize working arrays. */
01099   memset(set->result,0U,2*Nk*sizeof(double complex));
01100 
01101   /* The first step. */
01102 
01103   /* Set the first 2*data->k_start coefficients to zero. */
01104   memset(set->work,0U,2*data->k_start*sizeof(double complex));
01105 
01106   work_ptr = &set->work[2*data->k_start];
01107   x_ptr = x;
01108 
01109   for (k = 0; k < k_end_tilde-data->k_start+1; k++)
01110   {
01111     *work_ptr++ = *x_ptr++;
01112     *work_ptr++ = 0;
01113   }
01114 
01115   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01116   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double complex));
01117 
01118   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01119    * x_{Nk-1} and x_{Nk-2}. */
01120   if (k_end == Nk)
01121   {
01122     set->work[2*(Nk-2)]   += data->gammaN[tk-2]*x[Nk-data->k_start];
01123     set->work[2*(Nk-1)]   += data->betaN[tk-2]*x[Nk-data->k_start];
01124     set->work[2*(Nk-1)+1]  = data->alphaN[tk-2]*x[Nk-data->k_start];
01125   }
01126 
01127   /*--------*/
01128   /*for (k = 0; k < 2*Nk; k++)
01129   {
01130     fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01131       k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01132       cimag(set->result[k]));
01133   }*/
01134   /*--------*/
01135 
01136   /* Compute the remaining steps. */
01137   plength = 4;
01138   for (tau = 1; tau < tk; tau++)
01139   {
01140     /* Compute first l. */
01141     firstl = FIRST_L(k_start_tilde,plength);
01142     /* Compute last l. */
01143     lastl = LAST_L(k_end_tilde,plength);
01144 
01145     /* Compute the multiplication steps. */
01146     for (l = firstl; l <= lastl; l++)
01147     {
01148       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01149       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double complex));
01150       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double complex));
01151       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double complex));
01152       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double complex));
01153 
01154       /* Copy coefficients into first half. */
01155       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double complex));
01156       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double complex));
01157       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double complex));
01158 
01159       /* Get matrix U_{n,tau,l} */
01160       step = &(data->steps[tau][l]);
01161 
01162       /* Check if step is stable. */
01163       if (step->stable)
01164       {
01165         /* Check, if we should do a symmetrizised step. */
01166         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01167         {
01168           /*for (k = 0; k < plength; k++)
01169           {
01170             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01171               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01172           }*/
01173           /* Multiply third and fourth polynomial with matrix U. */
01174           //fprintf(stderr,"\nhallo\n");
01175           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01176             step->a12[0], step->a21[0], step->a22[0], step->gamma[0], tau, set);
01177         }
01178         else
01179         {
01180           /* Multiply third and fourth polynomial with matrix U. */
01181           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01182             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01183         }
01184 
01185         if (step->gamma[0] != 0.0)
01186         {
01187           for (k = 0; k < plength; k++)
01188           {
01189             set->work[plength*2*l+k] += set->vec3[k];
01190           }
01191         }
01192         for (k = 0; k < plength; k++)
01193         {
01194           set->work[plength*(2*l+1)+k] += set->vec4[k];
01195         }
01196       }
01197       else
01198       {
01199         /* Stabilize. */
01200 
01201         /* The lengh of the polynomials */
01202         plength_stab = step->N_stab;
01203         t_stab = step->t_stab;
01204 
01205         /*---------*/
01206         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01207         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01208         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01209         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01210         /*---------*/
01211 
01212         /* Set rest of vectors explicitely to zero */
01213         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01214           plength, plength_stab);*/
01215         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01216         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double complex));
01217 
01218         /* Multiply third and fourth polynomial with matrix U. */
01219         if (m <= 1)
01220         {
01221           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01222             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01223         }
01224         else if (m%2 == 0)
01225         {
01226           fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01227             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01228         }
01229         else
01230         {
01231           fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01232             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01233         }
01234 
01235         if (step->gamma[0] != 0.0)
01236         {
01237           for (k = 0; k < plength_stab; k++)
01238           {
01239             set->result[k] += set->vec3[k];
01240           }
01241         }
01242 
01243         for (k = 0; k < plength_stab; k++)
01244         {
01245           set->result[Nk+k] += set->vec4[k];
01246         }
01247       }
01248     }
01249     /* Double length of polynomials. */
01250     plength = plength<<1;
01251 
01252     /*--------*/
01253     /*for (k = 0; k < 2*Nk; k++)
01254     {
01255       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01256         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01257         cimag(set->result[k]));
01258     }*/
01259     /*--------*/
01260   }
01261 
01262   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01263    * the stabilization steps. */
01264   for (k = 0; k < 2*Nk; k++)
01265   {
01266     set->result[k] += set->work[k];
01267   }
01268 
01269   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01270    * polynomials in front of P_0^n and P_1^n. */
01271   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01272     data->alpha_0*set->result[Nk+1]*0.5);
01273   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01274     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01275   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01276     data->beta_0*set->result[Nk+k_end-1] +
01277     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01278   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01279   for (k = 2; k <= k_end-2; k++)
01280   {
01281     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01282       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01283   }
01284 
01285   if (flags & FPT_FUNCTION_VALUES)
01286   {
01287     y[0] *= 2.0;
01288     fftw_execute_r2r(plan,(double*)y,(double*)y);
01289     fftw_destroy_plan(plan);
01290     for (k = 0; k <= k_end; k++)
01291     {
01292       y[k] *= 0.5;
01293     }
01294   }
01295 }
01296 
01297 void dpt_transposed(fpt_set set, const int m, double complex *x, 
01298   double complex *y, const int k_end, const unsigned int flags)
01299 {
01300   int j;
01301   fpt_data *data = &(set->dpt[m]);
01302   int Nk;
01303   int tk;
01304   double norm;
01305 
01306   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01307   norm = 2.0/(Nk<<1);
01308 
01309   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01310   {
01311     return;
01312   }
01313 
01314   if (flags & FPT_FUNCTION_VALUES)
01315   {
01316     for (j = 0; j <= k_end; j++)
01317     {
01318       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01319     }
01320 
01321     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01322       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01323       data->gamma_m1);
01324 
01325     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01326       sizeof(double complex));
01327   }
01328   else
01329   {
01330     memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01331     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double complex));
01332 
01333     for (j = 0; j < Nk; j++)
01334     {
01335       set->result[j] *= norm;
01336     }
01337 
01338     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01339       (double*)set->result);
01340 
01341     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01342       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01343       data->gamma_m1);
01344 
01345     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double complex));
01346   }
01347 }
01348 
01349 void fpt_transposed(fpt_set set, const int m, double complex *x, 
01350   const double complex *y, const int k_end, const unsigned int flags)
01351 {
01352   /* Get transformation data. */
01353   fpt_data *data = &(set->dpt[m]);
01355   int Nk;
01357   int tk;
01359   int k_start_tilde;
01361   int k_end_tilde;
01362 
01364   int tau;
01366   int firstl;
01368   int lastl;
01370   int l;
01372   int plength;
01374   int plength_stab;
01376   fpt_step *step;
01378   fftw_plan plan;
01379   int length = k_end+1;
01380   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01382   int k;
01383   int t_stab;
01384 
01385   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01386   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01387   k_end_tilde = K_END_TILDE(k_end,Nk);
01388 
01389   /* Check if fast transform is activated. */
01390   if (set->flags & FPT_NO_FAST_ALGORITHM)
01391   {
01392     return;
01393   }
01394 
01395   if (flags & FPT_FUNCTION_VALUES)
01396   {
01397     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01398       (double*)set->work, NULL, 2, 1, kinds, 0U);
01399     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01400     fftw_destroy_plan(plan);
01401     for (k = 0; k <= k_end; k++)
01402     {
01403       set->result[k] *= 0.5;
01404     }
01405   }
01406   else
01407   {
01408     memcpy(set->result,y,(k_end+1)*sizeof(double complex));
01409   }
01410 
01411   /* Initialize working arrays. */
01412   memset(set->work,0U,2*Nk*sizeof(double complex));
01413 
01414   /* The last step is now the first step. */
01415   for (k = 0; k <= k_end; k++)
01416   {
01417     set->work[k] = data->gamma_m1*set->result[k];
01418   }
01419   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double complex));
01420 
01421   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01422     data->alpha_0*set->result[1]);
01423   for (k = 1; k < k_end; k++)
01424   {
01425     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01426       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01427   }
01428   if (k_end<Nk)
01429   {
01430     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double complex));
01431   }
01432 
01434   memcpy(set->result,set->work,2*Nk*sizeof(double complex));
01435 
01436   /* Compute the remaining steps. */
01437   plength = Nk;
01438   for (tau = tk-1; tau >= 1; tau--)
01439   {
01440     /* Compute first l. */
01441     firstl = FIRST_L(k_start_tilde,plength);
01442     /* Compute last l. */
01443     lastl = LAST_L(k_end_tilde,plength);
01444 
01445     /* Compute the multiplication steps. */
01446     for (l = firstl; l <= lastl; l++)
01447     {
01448       /* Initialize second half of coefficient arrays with zeros. */
01449       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double complex));
01450       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double complex));
01451 
01452       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01453         (plength/2)*sizeof(double complex));
01454 
01455       /* Get matrix U_{n,tau,l} */
01456       step = &(data->steps[tau][l]);
01457 
01458       /* Check if step is stable. */
01459       if (step->stable)
01460       {
01461         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01462         {
01463           /* Multiply third and fourth polynomial with matrix U. */
01464           fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01465             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01466         }
01467         else
01468         {
01469           /* Multiply third and fourth polynomial with matrix U. */
01470           fpt_do_step_transposed(set->vec3, set->vec4, step->a11[0], step->a12[0],
01471             step->a21[0], step->a22[0], step->gamma[0], tau, set);
01472         }
01473         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double complex));
01474 
01475         for (k = 0; k < plength; k++)
01476         {
01477           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01478         }
01479       }
01480       else
01481       {
01482         /* Stabilize. */
01483         plength_stab = step->N_stab;
01484         t_stab = step->t_stab;
01485 
01486         memcpy(set->vec3,set->result,plength_stab*sizeof(double complex));
01487         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double complex));
01488 
01489         /* Multiply third and fourth polynomial with matrix U. */
01490         if (m <= 1)
01491         {
01492           fpt_do_step_transposed_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01493             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01494         }
01495         else if (m%2 == 0)
01496         {
01497           fpt_do_step_transposed_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01498             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01499         }
01500         else
01501         {
01502           fpt_do_step_transposed_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01503             step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);
01504         }
01505 
01506         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double complex));
01507 
01508         for (k = 0; k < plength; k++)
01509         {
01510           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01511         }
01512        }
01513     }
01514     /* Half the length of polynomial arrays. */
01515     plength = plength>>1;
01516   }
01517 
01518   /* First step */
01519   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01520   {
01521     x[k] = set->work[2*(data->k_start+k)];
01522   }
01523   if (k_end == Nk)
01524   {
01525     x[Nk-data->k_start] =
01526         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01527       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01528       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01529   }
01530 }
01531 
01532 void fpt_finalize(fpt_set set)
01533 {
01534   int tau;
01535   int l;
01536   int m;
01537   fpt_data *data;
01538   int k_start_tilde;
01539   int N_tilde;
01540   int firstl, lastl;
01541   int plength;
01542 
01543   /* TODO Clean up DPT transform data structures. */
01544   for (m = 0; m <= set->M; m++)
01545   {
01546     /* Check if precomputed. */
01547     data = &set->dpt[m];
01548     if (data->steps != (fpt_step**)NULL)
01549     {
01550       free(data->alphaN);
01551       free(data->betaN);
01552       free(data->gammaN);
01553       data->alphaN = NULL;
01554       data->betaN = NULL;
01555       data->gammaN = NULL;
01556 
01557       /* Free precomputed data. */
01558       k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01559         /*set->N*/);
01560       N_tilde = N_TILDE(set->N);
01561       plength = 4;
01562       for (tau = 1; tau < set->t; tau++)
01563       {
01564         /* Compute first l. */
01565         firstl = FIRST_L(k_start_tilde,plength);
01566         /* Compute last l. */
01567         lastl = LAST_L(N_tilde,plength);
01568 
01569         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01570         for (l = firstl; l <= lastl; l++)
01571         {
01572           /* Free components. */
01573           free(data->steps[tau][l].a11[0]);
01574           free(data->steps[tau][l].a12[0]);
01575           free(data->steps[tau][l].a21[0]);
01576           free(data->steps[tau][l].a22[0]);
01577           data->steps[tau][l].a11[0] = NULL;
01578           data->steps[tau][l].a12[0] = NULL;
01579           data->steps[tau][l].a21[0] = NULL;
01580           data->steps[tau][l].a22[0] = NULL;
01581           /* Free components. */
01582           free(data->steps[tau][l].a11);
01583           free(data->steps[tau][l].a12);
01584           free(data->steps[tau][l].a21);
01585           free(data->steps[tau][l].a22);
01586           free(data->steps[tau][l].gamma);
01587           data->steps[tau][l].a11 = NULL;
01588           data->steps[tau][l].a12 = NULL;
01589           data->steps[tau][l].a21 = NULL;
01590           data->steps[tau][l].a22 = NULL;
01591           data->steps[tau][l].gamma = NULL;
01592         }
01593         /* Free pointers for current level. */
01594         free(data->steps[tau]);
01595         data->steps[tau] = NULL;
01596         /* Double length of polynomials. */
01597         plength = plength<<1;
01598       }
01599       /* Free steps. */
01600       free(data->steps);
01601       data->steps = NULL;
01602     }
01603 
01604     if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01605     {
01606     }
01607     else
01608     {
01609       /* Check, if recurrence coefficients must be copied. */
01610       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01611       if (set->flags & FPT_PERSISTENT_DATA)
01612       {
01613       }
01614       else
01615       {
01616         free(data->alpha);
01617         free(data->beta);
01618         free(data->gamma);
01619       }
01620       data->alpha = NULL;
01621       data->beta = NULL;
01622       data->gamma = NULL;
01623     }
01624   }
01625 
01626   /* Delete array of DPT transform data. */
01627   free(set->dpt);
01628   set->dpt = NULL;
01629 
01630   for (tau = 1; tau < /*set->t*/set->t+1; tau++)
01631   {
01632     free(set->xcvecs[tau-1]);
01633     set->xcvecs[tau-1] = NULL;
01634   }
01635   free(set->xcvecs);
01636   set->xcvecs = NULL;
01637 
01638   /* Free auxilliary arrays. */
01639   free(set->work);
01640   free(set->result);
01641 
01642   /* Check if fast transform is activated. */
01643   if (set->flags & FPT_NO_FAST_ALGORITHM)
01644   {
01645   }
01646   else
01647   {
01648     /* Free auxilliary arrays. */
01649     free(set->vec3);
01650     free(set->vec4);
01651     free(set->z);
01652     set->work = NULL;
01653     set->result = NULL;
01654     set->vec3 = NULL;
01655     set->vec4 = NULL;
01656     set->z = NULL;
01657 
01658     /* Free FFTW plans. */
01659     for(tau = 0; tau < set->t/*-1*/; tau++)
01660     {
01661       fftw_destroy_plan(set->plans_dct3[tau]);
01662       fftw_destroy_plan(set->plans_dct2[tau]);
01663       set->plans_dct3[tau] = NULL;
01664       set->plans_dct2[tau] = NULL;
01665     }
01666 
01667     free(set->plans_dct3);
01668     free(set->plans_dct2);
01669     set->plans_dct3 = NULL;
01670     set->plans_dct2 = NULL;
01671   }
01672 
01673   //fprintf(stderr,"fpt_finalize: flags = %d\n",set->flags);
01674 
01675   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01676   {
01677   }
01678   else
01679   {
01680     /* Delete arrays of Chebyshev nodes. */
01681     free(set->xc_slow);
01682     set->xc_slow = NULL;
01683     free(set->temp);
01684     set->temp = NULL;
01685   }
01686 
01687   /* Free DPT set structure. */
01688   free(set);
01689 }