NFFT Logo 3.1.0 API Reference

fpt.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2009 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fpt.c 3110 2009-03-13 16:32:18Z keiner $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034 
00035 #include "nfft3.h"
00036 #include "util.h"
00037 #include "infft.h"
00038 
00039 /* Macros for index calculation. */
00040 
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043 
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046 
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049 
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052 
00053 #define N_TILDE(y) (y-1)
00054 
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056 
00057 #define FPT_BREAK_EVEN 4
00058 
00062 typedef struct fpt_step_
00063 {
00064   bool stable;                            
00067   int Ns;                                 
00068   int ts;                                 
00069   double **a11,**a12,**a21,**a22;         
00070   double *g;                              
00071 } fpt_step;
00072 
00076 typedef struct fpt_data_
00077 {
00078   fpt_step **steps;                       
00079   int k_start;                            
00080   double *alphaN;                         
00081   double *betaN;                          
00082   double *gammaN;                         
00083   double alpha_0;                         
00084   double beta_0;                          
00085   double gamma_m1;                        
00086   /* Data for direct transform. */        
00087   double *alpha;                          
00088   double *beta;                           
00089   double *gamma;                          
00090 } fpt_data;
00091 
00095 typedef struct fpt_set_s_
00096 {
00097   int flags;                              
00098   int M;                                  
00099   int N;                                  
00101   int t;                                  
00102   fpt_data *dpt;                          
00103   double **xcvecs;                        
00106   double *xc;                             
00107   double _Complex *temp;                          
00108   double _Complex *work;                          
00109   double _Complex *result;                        
00110   double _Complex *vec3;
00111   double _Complex *vec4;
00112   double _Complex *z;
00113   fftw_plan *plans_dct3;                  
00115   fftw_plan *plans_dct2;                  
00117   fftw_r2r_kind *kinds;                   
00119   fftw_r2r_kind *kindsr;                  
00122   int *lengths; 
00124   /* Data for slow transforms. */
00125   double *xc_slow;
00126 } fpt_set_s;
00127 
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129   double* v, double _Complex* y, double* w, int n)
00130 {
00131   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132   double *v_ptr = v, *w_ptr = w;
00133   for (l = 0; l < n; l++)
00134     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136 
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139   double* v, double _Complex* y, double* w, int n) \
00140 { \
00141   const int n2 = n>>1; \
00142   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143   double *v_ptr = v, *w_ptr = w; \
00144   for (l = 0; l < n2; l++) \
00145     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146   v_ptr--; w_ptr--; \
00147   for (l = 0; l < n2; l++) \
00148     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150 
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153 
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156   double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158   const int n2 = n>>1; \
00159   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160   double *v_ptr = v, *xx_ptr = xx; \
00161   for (l = 0; l < n2; l++, v_ptr++) \
00162     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163   v_ptr--; \
00164   for (l = 0; l < n2; l++, v_ptr--) \
00165     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167 
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170 
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173   double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175   const int n2 = n>>1; \
00176   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177   double *w_ptr = w, *xx_ptr = xx; \
00178   for (l = 0; l < n2; l++, w_ptr++) \
00179     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180   w_ptr--; \
00181   for (l = 0; l < n2; l++, w_ptr--) \
00182     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184 
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187 
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189   double _Complex* y, double* w, int n)
00190 {
00191   int l;
00192   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193   double *v_ptr = v, *w_ptr = w;
00194   for (l = n; l > 0; l--)
00195     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197 
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199   double* v, double _Complex* y, double* w, int n)
00200 {
00201   const int n2 = n>>1; \
00202   int l;
00203   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204   double *v_ptr = v, *w_ptr = w;
00205   for (l = n2; l > 0; l--)
00206     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207   v_ptr--; w_ptr--;
00208   for (l = n2; l > 0; l--)
00209     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211 
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213   double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215   const int n2 = n>>1; \
00216   int l;
00217   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219   for (l = n2; l > 0; l--, xx_ptr++)
00220     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221   v_ptr--; w_ptr--;
00222   for (l = n2; l > 0; l--, xx_ptr++)
00223     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225 
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227   double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229   const int n2 = n>>1; \
00230   int l;
00231   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233   for (l = n2; l > 0; l--, xx_ptr++)
00234     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235   v_ptr--; w_ptr--;
00236   for (l = n2; l > 0; l--, xx_ptr++)
00237     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239 
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \
00242   double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244  \
00245   int length = 1<<(tau+1); \
00246  \
00247   double norm = 1.0/(length<<1); \
00248   \
00249   /* Compensate for factors introduced by a raw DCT-III. */ \
00250   a[0] *= 2.0; \
00251   b[0] *= 2.0; \
00252   \
00253   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00254   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256   \
00257   /*for (k = 0; k < length; k++)*/ \
00258   /*{*/ \
00259     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00260     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00261   /*}*/ \
00262   \
00263   /* Check, if gamma is zero. */ \
00264   if (g == 0.0) \
00265   { \
00266     /*fprintf(stderr,"gamma == 0!\n");*/ \
00267     /* Perform multiplication only for second row. */ \
00268     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269   } \
00270   else \
00271   { \
00272     /*fprintf(stderr,"gamma != 0!\n");*/ \
00273     /* Perform multiplication for both rows. */ \
00274     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276     memcpy(b,set->z,length*sizeof(double _Complex)); \
00277     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00278     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279     /* Compensate for factors introduced by a raw DCT-II. */ \
00280     a[0] *= 0.5; \
00281   } \
00282   \
00283   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00284   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285   /* Compensate for factors introduced by a raw DCT-II. */ \
00286   b[0] *= 0.5; \
00287 }
00288 
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00292 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
00293 
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295   double *a11, double *a12, double *a21, double *a22, double *x,
00296   double gamma, int tau, fpt_set set)
00297 {
00299   int length = 1<<(tau+1);
00301   double norm = 1.0/(length<<1);
00302 
00303   UNUSED(a21); UNUSED(a22);
00304 
00305   /* Compensate for factors introduced by a raw DCT-III. */
00306   a[0] *= 2.0;
00307   b[0] *= 2.0;
00308 
00309   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00310   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312 
00313   /*for (k = 0; k < length; k++)*/
00314   /*{*/
00315     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00316     /*  a11[k],a12[k],a21[k],a22[k]);*/
00317   /*}*/
00318 
00319   /* Check, if gamma is zero. */
00320   if (gamma == 0.0)
00321   {
00322     /*fprintf(stderr,"gamma == 0!\n");*/
00323     /* Perform multiplication only for second row. */
00324     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325   }
00326   else
00327   {
00328     /*fprintf(stderr,"gamma != 0!\n");*/
00329     /* Perform multiplication for both rows. */
00330     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331     auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332     memcpy(b,set->z,length*sizeof(double _Complex));
00333     /* Compute Chebyshev-coefficients using a DCT-II. */
00334     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335     /* Compensate for factors introduced by a raw DCT-II. */
00336     a[0] *= 0.5;
00337   }
00338 
00339   /* Compute Chebyshev-coefficients using a DCT-II. */
00340   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341   /* Compensate for factors introduced by a raw DCT-II. */
00342   b[0] *= 0.5;
00343 }
00344 
00345 static inline void fpt_do_step_symmetric_l(double _Complex  *a, double _Complex *b,
00346   double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349   int length = 1<<(tau+1);
00351   double norm = 1.0/(length<<1);
00352 
00353   /* Compensate for factors introduced by a raw DCT-III. */
00354   a[0] *= 2.0;
00355   b[0] *= 2.0;
00356 
00357   UNUSED(a11); UNUSED(a12);
00358 
00359   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00360   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362 
00363   /*for (k = 0; k < length; k++)*/
00364   /*{*/
00365     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00366     /*  a11[k],a12[k],a21[k],a22[k]);*/
00367   /*}*/
00368 
00369   /* Check, if gamma is zero. */
00370   if (gamma == 0.0)
00371   {
00372     /*fprintf(stderr,"gamma == 0!\n");*/
00373     /* Perform multiplication only for second row. */
00374     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375   }
00376   else
00377   {
00378     /*fprintf(stderr,"gamma != 0!\n");*/
00379     /* Perform multiplication for both rows. */
00380     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381     auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382     memcpy(b,set->z,length*sizeof(double _Complex));
00383     /* Compute Chebyshev-coefficients using a DCT-II. */
00384     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385     /* Compensate for factors introduced by a raw DCT-II. */
00386     a[0] *= 0.5;
00387   }
00388 
00389   /* Compute Chebyshev-coefficients using a DCT-II. */
00390   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391   /* Compensate for factors introduced by a raw DCT-II. */
00392   b[0] *= 0.5;
00393 }
00394 
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \
00397   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399  \
00400   int length = 1<<(tau+1); \
00401  \
00402   double norm = 1.0/(length<<1); \
00403   \
00404   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00405   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407   \
00408   /* Perform matrix multiplication. */ \
00409   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411   memcpy(a,set->z,length*sizeof(double _Complex)); \
00412   \
00413   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00414   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417 
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
00421 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
00422 
00423 
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex  *a,
00425   double _Complex *b, double *a11, double *a12, double *x,
00426   double gamma, int tau, fpt_set set)
00427 {
00429   int length = 1<<(tau+1);
00431   double norm = 1.0/(length<<1);
00432 
00433   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00434   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436 
00437   /* Perform matrix multiplication. */
00438   abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439   abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440   memcpy(a,set->z,length*sizeof(double _Complex));
00441 
00442   /* Compute Chebyshev-coefficients using a DCT-II. */
00443   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446 
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex  *a,
00448   double _Complex *b, double *a21, double *a22,
00449   double *x, double gamma, int tau, fpt_set set)
00450 {
00452   int length = 1<<(tau+1);
00454   double norm = 1.0/(length<<1);
00455 
00456   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00457   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459 
00460   /* Perform matrix multiplication. */
00461   abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462   abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463   memcpy(a,set->z,length*sizeof(double _Complex));
00464 
00465   /* Compute Chebyshev-coefficients using a DCT-II. */
00466   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469 
00470 
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472   const double *beta, const double *gamma)
00473 {
00474   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00475    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00476    */
00477   int i,j;
00478   double a,b,x_val_act,a_old;
00479   const double *x_act;
00480   double *y_act;
00481   const double *alpha_act, *beta_act, *gamma_act;
00482 
00483   /* Traverse all nodes. */
00484   x_act = x;
00485   y_act = y;
00486   for (i = 0; i < size; i++)
00487   {
00488     a = 1.0;
00489     b = 0.0;
00490     x_val_act = *x_act;
00491 
00492     if (k == 0)
00493     {
00494       *y_act = 1.0;
00495     }
00496     else
00497     {
00498       alpha_act = &(alpha[k]);
00499       beta_act = &(beta[k]);
00500       gamma_act = &(gamma[k]);
00501       for (j = k; j > 1; j--)
00502       {
00503         a_old = a;
00504         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505          b = a_old*(*gamma_act);
00506         alpha_act--;
00507         beta_act--;
00508         gamma_act--;
00509       }
00510       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511     }
00512     x_act++;
00513     y_act++;
00514   }
00515 }
00516 
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518   const double *beta, const double *gamma)
00519 {
00520   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00521    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00522    */
00523   int i,j;
00524   double a,b,x_val_act,a_old;
00525   const double *x_act;
00526   double *y_act, *z_act;
00527   const double *alpha_act, *beta_act, *gamma_act;
00528 
00529   /* Traverse all nodes. */
00530   x_act = x;
00531   y_act = y;
00532   z_act = z;
00533   for (i = 0; i < size; i++)
00534   {
00535     a = 1.0;
00536     b = 0.0;
00537     x_val_act = *x_act;
00538 
00539     if (k == 0)
00540     {
00541       *y_act = 1.0;
00542       *z_act = 0.0;
00543     }
00544     else
00545     {
00546       alpha_act = &(alpha[k]);
00547       beta_act = &(beta[k]);
00548       gamma_act = &(gamma[k]);
00549       for (j = k; j > 1; j--)
00550       {
00551         a_old = a;
00552         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553         b = a_old*(*gamma_act);
00554         alpha_act--;
00555         beta_act--;
00556         gamma_act--;
00557       }
00558       if (i < size1)
00559         *z_act = a;
00560       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561     }
00562 
00563     x_act++;
00564     y_act++;
00565     z_act++;
00566   }
00567   /*gamma_act++;
00568   FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
00569   fprintf(f,"size1: %10d, size: %10d\n",size1,size);
00570   fclose(f);*/
00571 }
00572 
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574   const double *alpha, const double *beta, const double *gamma, const
00575   double threshold)
00576 {
00577   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00578    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00579    */
00580   int i,j;
00581   double a,b,x_val_act,a_old;
00582   const double *x_act;
00583   double *y_act, *z_act;
00584   const double *alpha_act, *beta_act, *gamma_act;
00585 
00586   /* Traverse all nodes. */
00587   x_act = x;
00588   y_act = y;
00589   z_act = z;
00590   for (i = 0; i < size; i++)
00591   {
00592     a = 1.0;
00593     b = 0.0;
00594     x_val_act = *x_act;
00595 
00596     if (k == 0)
00597     {
00598      *y_act = 1.0;
00599      *z_act = 0.0;
00600     }
00601     else
00602     {
00603       alpha_act = &(alpha[k]);
00604       beta_act = &(beta[k]);
00605       gamma_act = &(gamma[k]);
00606       for (j = k; j > 1; j--)
00607       {
00608         a_old = a;
00609         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00610          b = a_old*(*gamma_act);
00611         alpha_act--;
00612         beta_act--;
00613         gamma_act--;
00614       }
00615       *z_act = a;
00616       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00617       if (fabs(*y_act) > threshold)
00618       {
00619         return 1;
00620       }
00621     }
00622     x_act++;
00623     y_act++;
00624     z_act++;
00625   }
00626   return 0;
00627 }
00628 
00629 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00630   const double _Complex *a, const double *x, double _Complex *y,
00631   const double *alpha, const double *beta, const double *gamma,
00632   const double lambda)
00633 {
00634   int j,k;
00635   double _Complex tmp1, tmp2, tmp3;
00636   double xc;
00637 
00638   if (N == 0)
00639     for (j = 0; j <= M; j++)
00640       y[j] = a[0];
00641   else
00642   {
00643     for (j = 0; j <= M; j++)
00644     {
00645 #if 0
00646       xc = x[j];
00647       tmp2 = a[N];
00648       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00649       for (k = N-1; k > 0; k--)
00650       {
00651         tmp3 =   a[k-1]
00652                + (alpha[k-1] * xc + beta[k-1]) * tmp1
00653                + gamma[k] * tmp2;
00654         tmp2 = tmp1;
00655         tmp1 = tmp3;
00656       }
00657       y[j] = lambda * tmp1;
00658 #else
00659       xc = x[j];
00660       tmp1 = a[N-1];
00661       tmp2 = a[N];
00662       for (k = N-1; k > 0; k--)
00663       {
00664         tmp3 = a[k-1] + tmp2 * gamma[k];
00665         tmp2 *= (alpha[k] * xc + beta[k]);
00666         tmp2 += tmp1;
00667         tmp1 = tmp3;
00668       }
00669       tmp2 *= (alpha[0] * xc + beta[0]);
00670       y[j] = lambda * (tmp2 + tmp1);
00671 #endif
00672     }
00673   }
00674 }
00675 
00694 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00695   double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00696   double lambda)
00697 {
00698   int j,k;
00699   double _Complex* it1 = temp;
00700   double _Complex* it2 = y;
00701   double _Complex aux;
00702 
00703   /* Compute final result by multiplying with the constant lambda */
00704   a[0] = 0.0;
00705   for (j = 0; j <= M; j++)
00706   {
00707     it2[j] = lambda * y[j];
00708     a[0] += it2[j];
00709   }
00710 
00711   if (N > 0)
00712   {
00713     /* Compute final step. */
00714     a[1] = 0.0;
00715     for (j = 0; j <= M; j++)
00716     {
00717       it1[j] = it2[j];
00718       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00719       a[1] += it2[j];
00720     }
00721 
00722     for (k = 2; k <= N; k++)
00723     {
00724       a[k] = 0.0;
00725       for (j = 0; j <= M; j++)
00726       {
00727         aux = it1[j];
00728         it1[j] = it2[j];
00729         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00730         a[k] += it2[j];
00731       }
00732     }
00733   }
00734 }
00735 
00736 
00737 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00738 {
00740   int plength;
00742   int tau;
00744   int m;
00745   int k;
00746 
00747   /* Allocate memory for new DPT set. */
00748   fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00749 
00750   /* Save parameters in structure. */
00751   set->flags = flags;
00752 
00753   set->M = M;
00754   set->t = t;
00755   set->N = 1<<t;
00756 
00757   /* Allocate memory for M transforms. */
00758   set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00759 
00760   /* Initialize with NULL pointer. */
00761   for (m = 0; m < set->M; m++)
00762     set->dpt[m].steps = 0;
00763 
00764   /* Create arrays with Chebyshev nodes. */
00765 
00766   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00767    * would be trivially an array containing a 1 as second entry with all other
00768    * coefficients set to zero. In order to compensate for the multiplicative
00769    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00770 
00771   /* Allocate memory for array of pointers to node arrays. */
00772   set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00773   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00774    * using a DCT-III. */
00775   plength = 4;
00776   for (tau = 1; tau < t+1; tau++)
00777   {
00778     /* Allocate memory for current array. */
00779     set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00780     for (k = 0; k < plength; k++)
00781     {
00782       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00783     }
00784     plength = plength << 1;
00785   }
00786 
00788   set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00789   set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00790 
00791   /* Check if fast transform is activated. */
00792   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00793   {
00795     set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00796     set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00797     set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00798 
00800     set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00801     set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00802     set->kinds      = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00803     set->kinds[0]   = FFTW_REDFT01;
00804     set->kinds[1]   = FFTW_REDFT01;
00805     set->kindsr     = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00806     set->kindsr[0]  = FFTW_REDFT10;
00807     set->kindsr[1]  = FFTW_REDFT10;
00808     set->lengths    = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
00809     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00810     {
00811       set->lengths[tau] = plength;
00812       set->plans_dct3[tau] =
00813         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00814                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00815                            0);
00816       set->plans_dct2[tau] =
00817         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00818                            2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00819                            0);
00820     }
00821     nfft_free(set->lengths);
00822     nfft_free(set->kinds);
00823     nfft_free(set->kindsr);
00824     set->lengths = NULL;
00825     set->kinds = NULL;
00826     set->kindsr = NULL;
00827   }
00828 
00829   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00830   {
00831     set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00832     set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00833   }
00834 
00835   /* Return the newly created DPT set. */
00836   return set;
00837 }
00838 
00839 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00840   double *gam, int k_start, const double threshold)
00841 {
00842 
00843   int tau;          
00844   int l;            
00845   int plength;      
00847   int degree;       
00849   int firstl;       
00850   int lastl;        
00851   int plength_stab; 
00853   int degree_stab;  
00855   double *a11;      
00857   double *a12;      
00859   double *a21;      
00861   double *a22;      
00863   const double *calpha;
00864   const double *cbeta;
00865   const double *cgamma;
00866   int needstab = 0; 
00867   int k_start_tilde;
00868   int N_tilde;
00869   int clength;
00870   int clength_1;
00871   int clength_2;
00872   int t_stab, N_stab;
00873   fpt_data *data;
00874 
00875   /* Get pointer to DPT data. */
00876   data = &(set->dpt[m]);
00877 
00878   /* Check, if already precomputed. */
00879   if (data->steps != NULL)
00880     return;
00881 
00882   /* Save k_start. */
00883   data->k_start = k_start;
00884 
00885   /* Check if fast transform is activated. */
00886   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00887   {
00888     /* Save recursion coefficients. */
00889     data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00890     data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00891     data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00892 
00893     for (tau = 2; tau <= set->t; tau++)
00894     {
00895 
00896       data->alphaN[tau-2] = alpha[1<<tau];
00897       data->betaN[tau-2] = beta[1<<tau];
00898       data->gammaN[tau-2] = gam[1<<tau];
00899     }
00900 
00901     data->alpha_0 = alpha[1];
00902     data->beta_0 = beta[1];
00903     data->gamma_m1 = gam[0];
00904 
00905     k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00906       /*set->N*/);
00907     N_tilde = N_TILDE(set->N);
00908 
00909     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00910     data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00911 
00912     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00913     plength = 4;
00914     for (tau = 1; tau < set->t; tau++)
00915     {
00916       /* Compute auxilliary values. */
00917       degree = plength>>1;
00918       /* Compute first l. */
00919       firstl = FIRST_L(k_start_tilde,plength);
00920       /* Compute last l. */
00921       lastl = LAST_L(N_tilde,plength);
00922 
00923       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00924        * many matrices. */
00925       data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00926                          * (lastl+1));
00927 
00928       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00929       for (l = firstl; l <= lastl; l++)
00930       {
00931         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00932         {
00933           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00934           //fflush(stderr);
00935           clength = plength/2;
00936         }
00937         else
00938         {
00939           clength = plength;
00940         }
00941 
00942         /* Allocate memory for the components of U_{n,tau,l}. */
00943         a11 = (double*) nfft_malloc(sizeof(double)*clength);
00944         a12 = (double*) nfft_malloc(sizeof(double)*clength);
00945         a21 = (double*) nfft_malloc(sizeof(double)*clength);
00946         a22 = (double*) nfft_malloc(sizeof(double)*clength);
00947 
00948         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00949          * nodes. */
00950 
00951         /* Get the pointers to the three-term recurrence coeffcients. */
00952         calpha = &(alpha[plength*l+1+1]);
00953         cbeta = &(beta[plength*l+1+1]);
00954         cgamma = &(gam[plength*l+1+1]);
00955 
00956         if (set->flags & FPT_NO_STABILIZATION)
00957         {
00958           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
00959           calpha--;
00960           cbeta--;
00961           cgamma--;
00962           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00963             cgamma);
00964           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00965             cgamma);
00966           needstab = 0;
00967         }
00968         else
00969         {
00970           calpha--;
00971           cbeta--;
00972           cgamma--;
00973           /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
00974           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00975             degree-1, calpha, cbeta, cgamma, threshold);
00976           if (needstab == 0)
00977           {
00978             /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
00979             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00980               degree, calpha, cbeta, cgamma, threshold);
00981           }
00982         }
00983 
00984         /* Check if stabilization needed. */
00985         if (needstab == 0)
00986         {
00987           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00988           data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00989           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00990           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00991           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00992           /* No stabilization needed. */
00993           data->steps[tau][l].a11[0] = a11;
00994           data->steps[tau][l].a12[0] = a12;
00995           data->steps[tau][l].a21[0] = a21;
00996           data->steps[tau][l].a22[0] = a22;
00997           data->steps[tau][l].g[0] = gam[plength*l+1+1];
00998           data->steps[tau][l].stable = true;
00999         }
01000         else
01001         {
01002           /* Stabilize. */
01003           degree_stab = degree*(2*l+1);
01004           nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01005 
01006           /* Old arrays are to small. */
01007           nfft_free(a11);
01008           nfft_free(a12);
01009           nfft_free(a21);
01010           nfft_free(a22);
01011 
01012           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01013           data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01014           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01015           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01016           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01017 
01018           plength_stab = N_stab;
01019 
01020           if (set->flags & FPT_AL_SYMMETRY)
01021           {
01022             if (m <= 1)
01023             {
01024               /* This should never be executed */
01025               clength_1 = plength_stab;
01026               clength_2 = plength_stab;
01027               /* Allocate memory for arrays. */
01028               a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01029               a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01030               a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01031               a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01032               /* Get the pointers to the three-term recurrence coeffcients. */
01033               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01034               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01035                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01036               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01037                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01038             }
01039             else
01040             {
01041               clength = plength_stab/2;
01042               if (m%2 == 0)
01043               {
01044                 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01045                 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01046                 a21 = 0;
01047                 a22 = 0;
01048                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01049                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01050                   degree_stab-2, calpha, cbeta, cgamma);
01051                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01052                   degree_stab-1, calpha, cbeta, cgamma);
01053               }
01054               else
01055               {
01056                 a11 = 0;
01057                 a12 = 0;
01058                 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01059                 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01060                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01061                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01062                   degree_stab-1,calpha, cbeta, cgamma);
01063                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01064                   degree_stab+0, calpha, cbeta, cgamma);
01065               }
01066             }
01067           }
01068           else
01069           {
01070             clength_1 = plength_stab;
01071             clength_2 = plength_stab;
01072             a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01073             a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01074             a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01075             a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01076             calpha = &(alpha[2]);
01077             cbeta = &(beta[2]);
01078             cgamma = &(gam[2]);
01079             calpha--;
01080             cbeta--;
01081             cgamma--;
01082             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01083               calpha, cbeta, cgamma);
01084             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01085               calpha, cbeta, cgamma);
01086 
01087           }
01088           data->steps[tau][l].a11[0] = a11;
01089           data->steps[tau][l].a12[0] = a12;
01090           data->steps[tau][l].a21[0] = a21;
01091           data->steps[tau][l].a22[0] = a22;
01092 
01093           data->steps[tau][l].g[0] =  gam[1+1];
01094           data->steps[tau][l].stable = false;
01095           data->steps[tau][l].ts = t_stab;
01096           data->steps[tau][l].Ns = N_stab;
01097         }
01098       }
01100       plength = plength << 1;
01101     }
01102   }
01103 
01104   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01105   {
01106     /* Check, if recurrence coefficients must be copied. */
01107     if (set->flags & FPT_PERSISTENT_DATA)
01108     {
01109       data->alpha = (double*) alpha;
01110       data->beta = (double*) beta;
01111       data->gamma = (double*) gam;
01112     }
01113     else
01114     {
01115       data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01116       data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01117       data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01118       memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01119       memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01120       memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01121     }
01122   }
01123 }
01124 
01125 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01126   const int k_end, const unsigned int flags)
01127 {
01128   int j;
01129   fpt_data *data = &(set->dpt[m]);
01130   int Nk;
01131   int tk;
01132   double norm;
01133 
01134   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01135   norm = 2.0/(Nk<<1);
01136 
01137   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01138   {
01139     return;
01140   }
01141 
01142   if (flags & FPT_FUNCTION_VALUES)
01143   {
01144     /* Fill array with Chebyshev nodes. */
01145     for (j = 0; j <= k_end; j++)
01146     {
01147       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01148     }
01149 
01150     memset(set->result,0U,data->k_start*sizeof(double _Complex));
01151     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01152 
01153     /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01154       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01155       data->gamma_m1);*/
01156     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01157       y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01158   }
01159   else
01160   {
01161     memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01162     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01163 
01164     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01165       set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01166       data->gamma_m1);
01167 
01168     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01169       (double*)set->result);
01170 
01171     set->result[0] *= 0.5;
01172     for (j = 0; j < Nk; j++)
01173     {
01174       set->result[j] *= norm;
01175     }
01176 
01177     memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01178   }
01179 }
01180 
01181 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01182   const int k_end, const unsigned int flags)
01183 {
01184   /* Get transformation data. */
01185   fpt_data *data = &(set->dpt[m]);
01187   int Nk;
01189   int tk;
01191   int k_start_tilde;
01193   int k_end_tilde;
01194 
01196   int tau;
01198   int firstl;
01200   int lastl;
01202   int l;
01204   int plength;
01206   int plength_stab;
01207   int t_stab;
01209   fpt_step *step;
01211   fftw_plan plan = 0;
01212   int length = k_end+1;
01213   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01214 
01216   int k;
01217 
01218   double _Complex *work_ptr;
01219   const double _Complex *x_ptr;
01220 
01221   /* Check, if slow transformation should be used due to small bandwidth. */
01222   if (k_end < FPT_BREAK_EVEN)
01223   {
01224     /* Use NDSFT. */
01225     dpt_trafo(set, m, x, y, k_end, flags);
01226     return;
01227   }
01228 
01229   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01230   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01231   k_end_tilde = K_END_TILDE(k_end,Nk);
01232 
01233   /* Check if fast transform is activated. */
01234   if (set->flags & FPT_NO_FAST_ALGORITHM)
01235     return;
01236 
01237   if (flags & FPT_FUNCTION_VALUES)
01238     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01239       (double*)set->work, NULL, 2, 1, kinds, 0U);
01240 
01241   /* Initialize working arrays. */
01242   memset(set->result,0U,2*Nk*sizeof(double _Complex));
01243 
01244   /* The first step. */
01245 
01246   /* Set the first 2*data->k_start coefficients to zero. */
01247   memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01248 
01249   work_ptr = &set->work[2*data->k_start];
01250   x_ptr = x;
01251 
01252   for (k = 0; k < k_end_tilde-data->k_start+1; k++)
01253   {
01254     *work_ptr++ = *x_ptr++;
01255     *work_ptr++ = 0;
01256   }
01257 
01258   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01259   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01260 
01261   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01262    * x_{Nk-1} and x_{Nk-2}. */
01263   if (k_end == Nk)
01264   {
01265     set->work[2*(Nk-2)]   += data->gammaN[tk-2]*x[Nk-data->k_start];
01266     set->work[2*(Nk-1)]   += data->betaN[tk-2]*x[Nk-data->k_start];
01267     set->work[2*(Nk-1)+1]  = data->alphaN[tk-2]*x[Nk-data->k_start];
01268   }
01269 
01270   /*--------*/
01271   /*for (k = 0; k < 2*Nk; k++)
01272   {
01273     fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01274       k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01275       cimag(set->result[k]));
01276   }*/
01277   /*--------*/
01278 
01279   /* Compute the remaining steps. */
01280   plength = 4;
01281   for (tau = 1; tau < tk; tau++)
01282   {
01283     /* Compute first l. */
01284     firstl = FIRST_L(k_start_tilde,plength);
01285     /* Compute last l. */
01286     lastl = LAST_L(k_end_tilde,plength);
01287 
01288     /* Compute the multiplication steps. */
01289     for (l = firstl; l <= lastl; l++)
01290     {
01291       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01292       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01293       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01294       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01295       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01296 
01297       /* Copy coefficients into first half. */
01298       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01299       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01300       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01301 
01302       /* Get matrix U_{n,tau,l} */
01303       step = &(data->steps[tau][l]);
01304 
01305       /* Check if step is stable. */
01306       if (step->stable)
01307       {
01308         /* Check, if we should do a symmetrizised step. */
01309         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01310         {
01311           /*for (k = 0; k < plength; k++)
01312           {
01313             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01314               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01315           }*/
01316           /* Multiply third and fourth polynomial with matrix U. */
01317           //fprintf(stderr,"\nhallo\n");
01318           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01319             step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01320         }
01321         else
01322         {
01323           /* Multiply third and fourth polynomial with matrix U. */
01324           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01325             step->a21[0], step->a22[0], step->g[0], tau, set);
01326         }
01327 
01328         if (step->g[0] != 0.0)
01329         {
01330           for (k = 0; k < plength; k++)
01331           {
01332             set->work[plength*2*l+k] += set->vec3[k];
01333           }
01334         }
01335         for (k = 0; k < plength; k++)
01336         {
01337           set->work[plength*(2*l+1)+k] += set->vec4[k];
01338         }
01339       }
01340       else
01341       {
01342         /* Stabilize. */
01343 
01344         /* The lengh of the polynomials */
01345         plength_stab = step->Ns;
01346         t_stab = step->ts;
01347 
01348         /*---------*/
01349         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01350         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01351         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01352         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01353         /*---------*/
01354 
01355         /* Set rest of vectors explicitely to zero */
01356         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01357           plength, plength_stab);*/
01358         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01359         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01360 
01361         /* Multiply third and fourth polynomial with matrix U. */
01362         /* Check for symmetry. */
01363         if (set->flags & FPT_AL_SYMMETRY)
01364         {
01365           if (m <= 1)
01366           {
01367             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01368               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01369           }
01370           else if (m%2 == 0)
01371           {
01372             /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01373               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01374             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01375               step->a21[0], step->a22[0],
01376               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01377             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01378               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01379           }
01380           else
01381           {
01382             /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01383               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01384             fpt_do_step_symmetric_l(set->vec3, set->vec4,
01385               step->a11[0], step->a12[0],
01386               step->a21[0],
01387               step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01388             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01389               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01390           }
01391         }
01392         else
01393         {
01394             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01395               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01396         }
01397 
01398         if (step->g[0] != 0.0)
01399         {
01400           for (k = 0; k < plength_stab; k++)
01401           {
01402             set->result[k] += set->vec3[k];
01403           }
01404         }
01405 
01406         for (k = 0; k < plength_stab; k++)
01407         {
01408           set->result[Nk+k] += set->vec4[k];
01409         }
01410       }
01411     }
01412     /* Double length of polynomials. */
01413     plength = plength<<1;
01414 
01415     /*--------*/
01416     /*for (k = 0; k < 2*Nk; k++)
01417     {
01418       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01419         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01420         cimag(set->result[k]));
01421     }*/
01422     /*--------*/
01423   }
01424 
01425   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01426    * the stabilization steps. */
01427   for (k = 0; k < 2*Nk; k++)
01428   {
01429     set->result[k] += set->work[k];
01430   }
01431 
01432   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01433    * polynomials in front of P_0^n and P_1^n. */
01434   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01435     data->alpha_0*set->result[Nk+1]*0.5);
01436   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01437     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01438   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01439     data->beta_0*set->result[Nk+k_end-1] +
01440     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01441   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01442   for (k = 2; k <= k_end-2; k++)
01443   {
01444     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01445       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01446   }
01447 
01448   if (flags & FPT_FUNCTION_VALUES)
01449   {
01450     y[0] *= 2.0;
01451     fftw_execute_r2r(plan,(double*)y,(double*)y);
01452     fftw_destroy_plan(plan);
01453     for (k = 0; k <= k_end; k++)
01454     {
01455       y[k] *= 0.5;
01456     }
01457   }
01458 }
01459 
01460 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01461   double _Complex *y, const int k_end, const unsigned int flags)
01462 {
01463   int j;
01464   fpt_data *data = &(set->dpt[m]);
01465   int Nk;
01466   int tk;
01467   double norm;
01468 
01469   nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01470   norm = 2.0/(Nk<<1);
01471 
01472   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01473   {
01474     return;
01475   }
01476 
01477   if (flags & FPT_FUNCTION_VALUES)
01478   {
01479     for (j = 0; j <= k_end; j++)
01480     {
01481       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01482     }
01483 
01484     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01485       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01486       data->gamma_m1);
01487 
01488     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01489       sizeof(double _Complex));
01490   }
01491   else
01492   {
01493     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01494     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01495 
01496     for (j = 0; j < Nk; j++)
01497     {
01498       set->result[j] *= norm;
01499     }
01500 
01501     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01502       (double*)set->result);
01503 
01504     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01505       set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01506       data->gamma_m1);
01507 
01508     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01509   }
01510 }
01511 
01512 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01513   double _Complex *y, const int k_end, const unsigned int flags)
01514 {
01515   /* Get transformation data. */
01516   fpt_data *data = &(set->dpt[m]);
01518   int Nk;
01520   int tk;
01522   int k_start_tilde;
01524   int k_end_tilde;
01525 
01527   int tau;
01529   int firstl;
01531   int lastl;
01533   int l;
01535   int plength;
01537   int plength_stab;
01539   fpt_step *step;
01541   fftw_plan plan;
01542   int length = k_end+1;
01543   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01545   int k;
01546   int t_stab;
01547 
01548   /* Check, if slow transformation should be used due to small bandwidth. */
01549   if (k_end < FPT_BREAK_EVEN)
01550   {
01551     /* Use NDSFT. */
01552     dpt_transposed(set, m, x, y, k_end, flags);
01553     return;
01554   }
01555 
01556   nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01557   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01558   k_end_tilde = K_END_TILDE(k_end,Nk);
01559 
01560   /* Check if fast transform is activated. */
01561   if (set->flags & FPT_NO_FAST_ALGORITHM)
01562   {
01563     return;
01564   }
01565 
01566   if (flags & FPT_FUNCTION_VALUES)
01567   {
01568     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01569       (double*)set->work, NULL, 2, 1, kinds, 0U);
01570     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01571     fftw_destroy_plan(plan);
01572     for (k = 0; k <= k_end; k++)
01573     {
01574       set->result[k] *= 0.5;
01575     }
01576   }
01577   else
01578   {
01579     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01580   }
01581 
01582   /* Initialize working arrays. */
01583   memset(set->work,0U,2*Nk*sizeof(double _Complex));
01584 
01585   /* The last step is now the first step. */
01586   for (k = 0; k <= k_end; k++)
01587   {
01588     set->work[k] = data->gamma_m1*set->result[k];
01589   }
01590   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
01591 
01592   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01593     data->alpha_0*set->result[1]);
01594   for (k = 1; k < k_end; k++)
01595   {
01596     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01597       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01598   }
01599   if (k_end<Nk)
01600   {
01601     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01602   }
01603 
01605   memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01606 
01607   /* Compute the remaining steps. */
01608   plength = Nk;
01609   for (tau = tk-1; tau >= 1; tau--)
01610   {
01611     /* Compute first l. */
01612     firstl = FIRST_L(k_start_tilde,plength);
01613     /* Compute last l. */
01614     lastl = LAST_L(k_end_tilde,plength);
01615 
01616     /* Compute the multiplication steps. */
01617     for (l = firstl; l <= lastl; l++)
01618     {
01619       /* Initialize second half of coefficient arrays with zeros. */
01620       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01621       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01622 
01623       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01624         (plength/2)*sizeof(double _Complex));
01625 
01626       /* Get matrix U_{n,tau,l} */
01627       step = &(data->steps[tau][l]);
01628 
01629       /* Check if step is stable. */
01630       if (step->stable)
01631       {
01632         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01633         {
01634           /* Multiply third and fourth polynomial with matrix U. */
01635           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01636             step->a21[0], step->a22[0], step->g[0], tau, set);
01637         }
01638         else
01639         {
01640           /* Multiply third and fourth polynomial with matrix U. */
01641           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01642             step->a21[0], step->a22[0], step->g[0], tau, set);
01643         }
01644         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01645 
01646         for (k = 0; k < plength; k++)
01647         {
01648           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01649         }
01650       }
01651       else
01652       {
01653         /* Stabilize. */
01654         plength_stab = step->Ns;
01655         t_stab = step->ts;
01656 
01657         memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01658         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01659 
01660         /* Multiply third and fourth polynomial with matrix U. */
01661         if (set->flags & FPT_AL_SYMMETRY)
01662         {
01663           if (m <= 1)
01664           {
01665             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01666               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01667           }
01668           else if (m%2 == 0)
01669           {
01670             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01671               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01672           }
01673           else
01674           {
01675             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01676               step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01677           }
01678         }
01679         else
01680         {
01681             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01682               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01683         }
01684 
01685         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01686 
01687         for (k = 0; k < plength; k++)
01688         {
01689           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01690         }
01691        }
01692     }
01693     /* Half the length of polynomial arrays. */
01694     plength = plength>>1;
01695   }
01696 
01697   /* First step */
01698   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01699   {
01700     x[k] = set->work[2*(data->k_start+k)];
01701   }
01702   if (k_end == Nk)
01703   {
01704     x[Nk-data->k_start] =
01705         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01706       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01707       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01708   }
01709 }
01710 
01711 void fpt_finalize(fpt_set set)
01712 {
01713   int tau;
01714   int l;
01715   int m;
01716   fpt_data *data;
01717   int k_start_tilde;
01718   int N_tilde;
01719   int firstl, lastl;
01720   int plength;
01721   const int M = set->M;
01722 
01723   /* TODO Clean up DPT transform data structures. */
01724   for (m = 0; m < M; m++)
01725   {
01726     /* Check if precomputed. */
01727     data = &set->dpt[m];
01728     if (data->steps != (fpt_step**)NULL)
01729     {
01730       nfft_free(data->alphaN);
01731       nfft_free(data->betaN);
01732       nfft_free(data->gammaN);
01733       data->alphaN = NULL;
01734       data->betaN = NULL;
01735       data->gammaN = NULL;
01736 
01737       /* Free precomputed data. */
01738       k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01739         /*set->N*/);
01740       N_tilde = N_TILDE(set->N);
01741       plength = 4;
01742       for (tau = 1; tau < set->t; tau++)
01743       {
01744         /* Compute first l. */
01745         firstl = FIRST_L(k_start_tilde,plength);
01746         /* Compute last l. */
01747         lastl = LAST_L(N_tilde,plength);
01748 
01749         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01750         for (l = firstl; l <= lastl; l++)
01751         {
01752           /* Free components. */
01753           nfft_free(data->steps[tau][l].a11[0]);
01754           nfft_free(data->steps[tau][l].a12[0]);
01755           nfft_free(data->steps[tau][l].a21[0]);
01756           nfft_free(data->steps[tau][l].a22[0]);
01757           data->steps[tau][l].a11[0] = NULL;
01758           data->steps[tau][l].a12[0] = NULL;
01759           data->steps[tau][l].a21[0] = NULL;
01760           data->steps[tau][l].a22[0] = NULL;
01761           /* Free components. */
01762           nfft_free(data->steps[tau][l].a11);
01763           nfft_free(data->steps[tau][l].a12);
01764           nfft_free(data->steps[tau][l].a21);
01765           nfft_free(data->steps[tau][l].a22);
01766           nfft_free(data->steps[tau][l].g);
01767           data->steps[tau][l].a11 = NULL;
01768           data->steps[tau][l].a12 = NULL;
01769           data->steps[tau][l].a21 = NULL;
01770           data->steps[tau][l].a22 = NULL;
01771           data->steps[tau][l].g = NULL;
01772         }
01773         /* Free pointers for current level. */
01774         nfft_free(data->steps[tau]);
01775         data->steps[tau] = NULL;
01776         /* Double length of polynomials. */
01777         plength = plength<<1;
01778       }
01779       /* Free steps. */
01780       nfft_free(data->steps);
01781       data->steps = NULL;
01782     }
01783 
01784     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01785     {
01786       /* Check, if recurrence coefficients must be copied. */
01787       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01788       if (!(set->flags & FPT_PERSISTENT_DATA))
01789       {
01790         nfft_free(data->alpha);
01791         nfft_free(data->beta);
01792         nfft_free(data->gamma);
01793       }
01794       data->alpha = NULL;
01795       data->beta = NULL;
01796       data->gamma = NULL;
01797     }
01798   }
01799 
01800   /* Delete array of DPT transform data. */
01801   nfft_free(set->dpt);
01802   set->dpt = NULL;
01803 
01804   for (tau = 1; tau < set->t+1; tau++)
01805   {
01806     nfft_free(set->xcvecs[tau-1]);
01807     set->xcvecs[tau-1] = NULL;
01808   }
01809   nfft_free(set->xcvecs);
01810   set->xcvecs = NULL;
01811 
01812   /* Free auxilliary arrays. */
01813   nfft_free(set->work);
01814   nfft_free(set->result);
01815 
01816   /* Check if fast transform is activated. */
01817   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01818   {
01819     /* Free auxilliary arrays. */
01820     nfft_free(set->vec3);
01821     nfft_free(set->vec4);
01822     nfft_free(set->z);
01823     set->work = NULL;
01824     set->result = NULL;
01825     set->vec3 = NULL;
01826     set->vec4 = NULL;
01827     set->z = NULL;
01828 
01829     /* Free FFTW plans. */
01830     for(tau = 0; tau < set->t/*-1*/; tau++)
01831     {
01832       fftw_destroy_plan(set->plans_dct3[tau]);
01833       fftw_destroy_plan(set->plans_dct2[tau]);
01834       set->plans_dct3[tau] = NULL;
01835       set->plans_dct2[tau] = NULL;
01836     }
01837 
01838     nfft_free(set->plans_dct3);
01839     nfft_free(set->plans_dct2);
01840     set->plans_dct3 = NULL;
01841     set->plans_dct2 = NULL;
01842   }
01843 
01844   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01845   {
01846     /* Delete arrays of Chebyshev nodes. */
01847     nfft_free(set->xc_slow);
01848     set->xc_slow = NULL;
01849     nfft_free(set->temp);
01850     set->temp = NULL;
01851   }
01852 
01853   /* Free DPT set structure. */
01854   nfft_free(set);
01855 }

Generated on 19 Mar 2009 by Doxygen 1.5.3