NFFT  3.4.1
fpt.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include "config.h"
26 
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdbool.h>
30 #include <math.h>
31 #ifdef HAVE_COMPLEX_H
32 #include <complex.h>
33 #endif
34 
35 #include "nfft3.h"
36 #include "infft.h"
37 
38 /* Macros for index calculation. */
39 
41 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
42 
44 #define K_END_TILDE(x,y) MIN(x,y-1)
45 
47 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
48 
50 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
51 
52 #define N_TILDE(y) (y-1)
53 
54 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
55 
56 #define FPT_BREAK_EVEN 4
57 
61 typedef struct fpt_step_
62 {
63  bool stable;
66  int Ns;
67  int ts;
68  double **a11,**a12,**a21,**a22;
69  double *g;
70 } fpt_step;
71 
75 typedef struct fpt_data_
76 {
78  int k_start;
79  double *alphaN;
80  double *betaN;
81  double *gammaN;
82  double alpha_0;
83  double beta_0;
84  double gamma_m1;
85  /* Data for direct transform. */
86  double *_alpha;
87  double *_beta;
88  double *_gamma;
89 } fpt_data;
90 
94 typedef struct fpt_set_s_
95 {
96  int flags;
97  int M;
98  int N;
100  int t;
102  double **xcvecs;
105  double *xc;
106  double _Complex *temp;
107  double _Complex *work;
108  double _Complex *result;
109  double _Complex *vec3;
110  double _Complex *vec4;
111  double _Complex *z;
112  fftw_plan *plans_dct3;
114  fftw_plan *plans_dct2;
116  fftw_r2r_kind *kinds;
118  fftw_r2r_kind *kindsr;
121  int *lengths;
123  /* Data for slow transforms. */
124  double *xc_slow;
125 } fpt_set_s;
126 
127 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
128  double* v, double _Complex* y, double* w, int n)
129 {
130  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
131  double *v_ptr = v, *w_ptr = w;
132  for (l = 0; l < n; l++)
133  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
134 }
135 
136 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
137 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
138  double* v, double _Complex* y, double* w, int n) \
139 { \
140  const int n2 = n>>1; \
141  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
142  double *v_ptr = v, *w_ptr = w; \
143  for (l = 0; l < n2; l++) \
144  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
145  v_ptr--; w_ptr--; \
146  for (l = 0; l < n2; l++) \
147  *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
148 }
149 
150 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
152 
153 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
154 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
155  double* v, double _Complex* y, int n, double *xx) \
156 { \
157  const int n2 = n>>1; \
158  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
159  double *v_ptr = v, *xx_ptr = xx; \
160  for (l = 0; l < n2; l++, v_ptr++) \
161  *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
162  v_ptr--; \
163  for (l = 0; l < n2; l++, v_ptr--) \
164  *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
165 }
166 
167 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
169 
170 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
171 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
172  double _Complex* y, double* w, int n, double *xx) \
173 { \
174  const int n2 = n>>1; \
175  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
176  double *w_ptr = w, *xx_ptr = xx; \
177  for (l = 0; l < n2; l++, w_ptr++) \
178  *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
179  w_ptr--; \
180  for (l = 0; l < n2; l++, w_ptr--) \
181  *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
182 }
183 
184 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
186 
187 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
188  double _Complex* y, double* w, int n)
189 {
190  int l;
191  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
192  double *v_ptr = v, *w_ptr = w;
193  for (l = n; l > 0; l--)
194  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
195 }
196 
197 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
198  double* v, double _Complex* y, double* w, int n)
199 {
200  const int n2 = n>>1; \
201  int l;
202  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
203  double *v_ptr = v, *w_ptr = w;
204  for (l = n2; l > 0; l--)
205  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
206  v_ptr--; w_ptr--;
207  for (l = n2; l > 0; l--)
208  *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
209 }
210 
211 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
212  double* v, double _Complex* y, double* w, int n, double *xx)
213 {
214  const int n2 = n>>1; \
215  int l;
216  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
217  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
218  for (l = n2; l > 0; l--, xx_ptr++)
219  *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
220  v_ptr--; w_ptr--;
221  for (l = n2; l > 0; l--, xx_ptr++)
222  *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
223 }
224 
225 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
226  double* v, double _Complex* y, double* w, int n, double *xx)
227 {
228  const int n2 = n>>1; \
229  int l;
230  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
231  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
232  for (l = n2; l > 0; l--, xx_ptr++)
233  *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
234  v_ptr--; w_ptr--;
235  for (l = n2; l > 0; l--, xx_ptr++)
236  *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
237 }
238 
239 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
240 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
241  double *a21, double *a22, double g, int tau, fpt_set set) \
242 { \
243  \
244  int length = 1<<(tau+1); \
245  \
246  double norm = 1.0/(length<<1); \
247  \
248  /* Compensate for factors introduced by a raw DCT-III. */ \
249  a[0] *= 2.0; \
250  b[0] *= 2.0; \
251  \
252  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
253  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
254  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
255  \
256  /*for (k = 0; k < length; k++)*/ \
257  /*{*/ \
258  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
259  /* a11[k],a12[k],a21[k],a22[k]);*/ \
260  /*}*/ \
261  \
262  /* Check, if gamma is zero. */ \
263  if (g == 0.0) \
264  { \
265  /*fprintf(stderr,"gamma == 0!\n");*/ \
266  /* Perform multiplication only for second row. */ \
267  M2_FUNCTION(norm,b,b,a22,a,a21,length); \
268  } \
269  else \
270  { \
271  /*fprintf(stderr,"gamma != 0!\n");*/ \
272  /* Perform multiplication for both rows. */ \
273  M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
274  M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
275  memcpy(b,set->z,length*sizeof(double _Complex)); \
276  /* Compute Chebyshev-coefficients using a DCT-II. */ \
277  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
278  /* Compensate for factors introduced by a raw DCT-II. */ \
279  a[0] *= 0.5; \
280  } \
281  \
282  /* Compute Chebyshev-coefficients using a DCT-II. */ \
283  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
284  /* Compensate for factors introduced by a raw DCT-II. */ \
285  b[0] *= 0.5; \
286 }
287 
288 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
289 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
290 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
291 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
292 
293 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
294  double *a11, double *a12, double *a21, double *a22, double *x,
295  double gam, int tau, fpt_set set)
296 {
298  int length = 1<<(tau+1);
300  double norm = 1.0/(length<<1);
301 
302  UNUSED(a21); UNUSED(a22);
303 
304  /* Compensate for factors introduced by a raw DCT-III. */
305  a[0] *= 2.0;
306  b[0] *= 2.0;
307 
308  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
309  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
310  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
311 
312  /*for (k = 0; k < length; k++)*/
313  /*{*/
314  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
315  /* a11[k],a12[k],a21[k],a22[k]);*/
316  /*}*/
317 
318  /* Check, if gamma is zero. */
319  if (gam == 0.0)
320  {
321  /*fprintf(stderr,"gamma == 0!\n");*/
322  /* Perform multiplication only for second row. */
323  auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
324  }
325  else
326  {
327  /*fprintf(stderr,"gamma != 0!\n");*/
328  /* Perform multiplication for both rows. */
329  auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
330  auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
331  memcpy(b,set->z,length*sizeof(double _Complex));
332  /* Compute Chebyshev-coefficients using a DCT-II. */
333  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
334  /* Compensate for factors introduced by a raw DCT-II. */
335  a[0] *= 0.5;
336  }
337 
338  /* Compute Chebyshev-coefficients using a DCT-II. */
339  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
340  /* Compensate for factors introduced by a raw DCT-II. */
341  b[0] *= 0.5;
342 }
343 
344 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
345  double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
346 {
348  int length = 1<<(tau+1);
350  double norm = 1.0/(length<<1);
351 
352  /* Compensate for factors introduced by a raw DCT-III. */
353  a[0] *= 2.0;
354  b[0] *= 2.0;
355 
356  UNUSED(a11); UNUSED(a12);
357 
358  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
359  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
360  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
361 
362  /*for (k = 0; k < length; k++)*/
363  /*{*/
364  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
365  /* a11[k],a12[k],a21[k],a22[k]);*/
366  /*}*/
367 
368  /* Check, if gamma is zero. */
369  if (gam == 0.0)
370  {
371  /*fprintf(stderr,"gamma == 0!\n");*/
372  /* Perform multiplication only for second row. */
373  auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
374  }
375  else
376  {
377  /*fprintf(stderr,"gamma != 0!\n");*/
378  /* Perform multiplication for both rows. */
379  auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
380  auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
381  memcpy(b,set->z,length*sizeof(double _Complex));
382  /* Compute Chebyshev-coefficients using a DCT-II. */
383  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
384  /* Compensate for factors introduced by a raw DCT-II. */
385  a[0] *= 0.5;
386  }
387 
388  /* Compute Chebyshev-coefficients using a DCT-II. */
389  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
390  /* Compensate for factors introduced by a raw DCT-II. */
391  b[0] *= 0.5;
392 }
393 
394 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
395 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
396  double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
397 { \
398  \
399  int length = 1<<(tau+1); \
400  \
401  double norm = 1.0/(length<<1); \
402  \
403  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
404  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
405  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
406  \
407  /* Perform matrix multiplication. */ \
408  M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
409  M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
410  memcpy(a,set->z,length*sizeof(double _Complex)); \
411  \
412  /* Compute Chebyshev-coefficients using a DCT-II. */ \
413  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
414  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
415 }
416 
417 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
419 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
420 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
421 
422 
423 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
424  double _Complex *b, double *a11, double *a12, double *x,
425  double gam, int tau, fpt_set set)
426 {
428  int length = 1<<(tau+1);
430  double norm = 1.0/(length<<1);
431 
432  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
433  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
434  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
435 
436  /* Perform matrix multiplication. */
437  abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
438  abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
439  memcpy(a,set->z,length*sizeof(double _Complex));
440 
441  /* Compute Chebyshev-coefficients using a DCT-II. */
442  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
443  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
444 }
445 
446 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
447  double _Complex *b, double *a21, double *a22,
448  double *x, double gam, int tau, fpt_set set)
449 {
451  int length = 1<<(tau+1);
453  double norm = 1.0/(length<<1);
454 
455  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
456  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
457  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
458 
459  /* Perform matrix multiplication. */
460  abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
461  abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
462  memcpy(a,set->z,length*sizeof(double _Complex));
463 
464  /* Compute Chebyshev-coefficients using a DCT-II. */
465  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
466  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
467 }
468 
469 
470 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
471  const double *beta, const double *gam)
472 {
473  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
474  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
475  */
476  int i,j;
477  double a,b,x_val_act,a_old;
478  const double *x_act;
479  double *y_act;
480  const double *alpha_act, *beta_act, *gamma_act;
481 
482  /* Traverse all nodes. */
483  x_act = x;
484  y_act = y;
485  for (i = 0; i < size; i++)
486  {
487  a = 1.0;
488  b = 0.0;
489  x_val_act = *x_act;
490 
491  if (k == 0)
492  {
493  *y_act = 1.0;
494  }
495  else
496  {
497  alpha_act = &(alpha[k]);
498  beta_act = &(beta[k]);
499  gamma_act = &(gam[k]);
500  for (j = k; j > 1; j--)
501  {
502  a_old = a;
503  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
504  b = a_old*(*gamma_act);
505  alpha_act--;
506  beta_act--;
507  gamma_act--;
508  }
509  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
510  }
511  x_act++;
512  y_act++;
513  }
514 }
515 
516 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
517  const double *beta, const double *gam)
518 {
519  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
520  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
521  */
522  int i,j;
523  double a,b,x_val_act,a_old;
524  const double *x_act;
525  double *y_act, *z_act;
526  const double *alpha_act, *beta_act, *gamma_act;
527 
528  /* Traverse all nodes. */
529  x_act = x;
530  y_act = y;
531  z_act = z;
532  for (i = 0; i < size; i++)
533  {
534  a = 1.0;
535  b = 0.0;
536  x_val_act = *x_act;
537 
538  if (k == 0)
539  {
540  *y_act = 1.0;
541  *z_act = 0.0;
542  }
543  else
544  {
545  alpha_act = &(alpha[k]);
546  beta_act = &(beta[k]);
547  gamma_act = &(gam[k]);
548  for (j = k; j > 1; j--)
549  {
550  a_old = a;
551  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
552  b = a_old*(*gamma_act);
553  alpha_act--;
554  beta_act--;
555  gamma_act--;
556  }
557  if (i < size1)
558  *z_act = a;
559  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
560  }
561 
562  x_act++;
563  y_act++;
564  z_act++;
565  }
566  /*gamma_act++;
567  FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
568  fprintf(f,"size1: %10d, size: %10d\n",size1,size);
569  fclose(f);*/
570 }
571 
572 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
573  const double *alpha, const double *beta, const double *gam, const
574  double threshold)
575 {
576  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
577  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
578  */
579  int i,j;
580  double a,b,x_val_act,a_old;
581  const double *x_act;
582  double *y_act, *z_act;
583  const double *alpha_act, *beta_act, *gamma_act;
584  R max = -nfft_float_property(NFFT_R_MAX);
585  const R t = LOG10(FABS(threshold));
586 
587  /* Traverse all nodes. */
588  x_act = x;
589  y_act = y;
590  z_act = z;
591  for (i = 0; i < size; i++)
592  {
593  a = 1.0;
594  b = 0.0;
595  x_val_act = *x_act;
596 
597  if (k == 0)
598  {
599  *y_act = 1.0;
600  *z_act = 0.0;
601  }
602  else
603  {
604  alpha_act = &(alpha[k]);
605  beta_act = &(beta[k]);
606  gamma_act = &(gam[k]);
607  for (j = k; j > 1; j--)
608  {
609  a_old = a;
610  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
611  b = a_old*(*gamma_act);
612  alpha_act--;
613  beta_act--;
614  gamma_act--;
615  }
616  *z_act = a;
617  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
618  max = FMAX(max,LOG10(FABS(*y_act)));
619  if (max > t)
620  return 1;
621  }
622  x_act++;
623  y_act++;
624  z_act++;
625  }
626  return 0;
627 }
628 
629 static inline void eval_sum_clenshaw_fast(const int N, const int M,
630  const double _Complex *a, const double *x, double _Complex *y,
631  const double *alpha, const double *beta, const double *gam,
632  const double lambda)
633 {
634  int j,k;
635  double _Complex tmp1, tmp2, tmp3;
636  double xc;
637 
638  /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n");
639  fprintf(stderr, "Before transform:\n");
640  for (j = 0; j < N; j++)
641  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
642  for (j = 0; j <= M; j++)
643  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/
644 
645  if (N == 0)
646  for (j = 0; j <= M; j++)
647  y[j] = a[0];
648  else
649  {
650  for (j = 0; j <= M; j++)
651  {
652 #if 0
653  xc = x[j];
654  tmp2 = a[N];
655  tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
656  for (k = N-1; k > 0; k--)
657  {
658  tmp3 = a[k-1]
659  + (alpha[k-1] * xc + beta[k-1]) * tmp1
660  + gam[k] * tmp2;
661  tmp2 = tmp1;
662  tmp1 = tmp3;
663  }
664  y[j] = lambda * tmp1;
665 #else
666  xc = x[j];
667  tmp1 = a[N-1];
668  tmp2 = a[N];
669  for (k = N-1; k > 0; k--)
670  {
671  tmp3 = a[k-1] + tmp2 * gam[k];
672  tmp2 *= (alpha[k] * xc + beta[k]);
673  tmp2 += tmp1;
674  tmp1 = tmp3;
675  /*if (j == 1515)
676  {
677  fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2);
678  }*/
679  }
680  tmp2 *= (alpha[0] * xc + beta[0]);
681  //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]);
682  y[j] = lambda * (tmp2 + tmp1);
683  //fprintf(stderr, "lambda = %e.\n", lambda);
684 #endif
685  }
686  }
687  /*fprintf(stderr, "Before transform:\n");
688  for (j = 0; j < N; j++)
689  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
690  for (j = 0; j <= M; j++)
691  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]); */
692 }
693 
712 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
713  double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
714  double lambda)
715 {
716  int j,k;
717  double _Complex* it1 = temp;
718  double _Complex* it2 = y;
719  double _Complex aux;
720 
721  /* Compute final result by multiplying with the constant lambda */
722  a[0] = 0.0;
723  for (j = 0; j <= M; j++)
724  {
725  it2[j] = lambda * y[j];
726  a[0] += it2[j];
727  }
728 
729  if (N > 0)
730  {
731  /* Compute final step. */
732  a[1] = 0.0;
733  for (j = 0; j <= M; j++)
734  {
735  it1[j] = it2[j];
736  it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
737  a[1] += it2[j];
738  }
739 
740  for (k = 2; k <= N; k++)
741  {
742  a[k] = 0.0;
743  for (j = 0; j <= M; j++)
744  {
745  aux = it1[j];
746  it1[j] = it2[j];
747  it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
748  a[k] += it2[j];
749  }
750  }
751  }
752 }
753 
754 
755 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
756 {
758  int plength;
760  int tau;
762  int m;
763  int k;
764 #ifdef _OPENMP
765  int nthreads = X(get_num_threads)();
766 #endif
767 
768  /* Allocate memory for new DPT set. */
769  fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
770 
771  /* Save parameters in structure. */
772  set->flags = flags;
773 
774  set->M = M;
775  set->t = t;
776  set->N = 1<<t;
777 
778  /* Allocate memory for M transforms. */
779  set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
780 
781  /* Initialize with NULL pointer. */
782  for (m = 0; m < set->M; m++)
783  set->dpt[m].steps = 0;
784 
785  /* Create arrays with Chebyshev nodes. */
786 
787  /* Initialize array with Chebyshev coefficients for the polynomial x. This
788  * would be trivially an array containing a 1 as second entry with all other
789  * coefficients set to zero. In order to compensate for the multiplicative
790  * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
791 
792  /* Allocate memory for array of pointers to node arrays. */
793  set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
794  /* For each polynomial length starting with 4, compute the Chebyshev nodes
795  * using a DCT-III. */
796  plength = 4;
797  for (tau = 1; tau < t+1; tau++)
798  {
799  /* Allocate memory for current array. */
800  set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
801  for (k = 0; k < plength; k++)
802  {
803  set->xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
804  }
805  plength = plength << 1;
806  }
807 
809  set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
810  set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
811 
812  set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
813  set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
814  set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
815  set->kindsr[0] = FFTW_REDFT10;
816  set->kindsr[1] = FFTW_REDFT10;
817  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
818  {
819  set->lengths[tau] = plength;
820 #ifdef _OPENMP
821 #pragma omp critical (nfft_omp_critical_fftw_plan)
822 {
823  fftw_plan_with_nthreads(nthreads);
824 #endif
825  set->plans_dct2[tau] =
826  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
827  2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
828  0);
829 #ifdef _OPENMP
830 }
831 #endif
832  }
833 
834  /* Check if fast transform is activated. */
835  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
836  {
838  set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
839  set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
840  set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
841 
843  set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
844  set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
845  set->kinds[0] = FFTW_REDFT01;
846  set->kinds[1] = FFTW_REDFT01;
847  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
848  {
849  set->lengths[tau] = plength;
850 #ifdef _OPENMP
851 #pragma omp critical (nfft_omp_critical_fftw_plan)
852 {
853  fftw_plan_with_nthreads(nthreads);
854 #endif
855  set->plans_dct3[tau] =
856  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
857  2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
858  0);
859 #ifdef _OPENMP
860 }
861 #endif
862  }
863  nfft_free(set->lengths);
864  nfft_free(set->kinds);
865  nfft_free(set->kindsr);
866  set->lengths = NULL;
867  set->kinds = NULL;
868  set->kindsr = NULL;
869  }
870 
871  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
872  {
873  set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
874  set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
875  }
876 
877  /* Return the newly created DPT set. */
878  return set;
879 }
880 
881 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
882  double *gam, int k_start, const double threshold)
883 {
884 
885  int tau;
886  int l;
887  int plength;
889  int degree;
891  int firstl;
892  int lastl;
893  int plength_stab;
895  int degree_stab;
897  double *a11;
899  double *a12;
901  double *a21;
903  double *a22;
905  const double *calpha;
906  const double *cbeta;
907  const double *cgamma;
908  int needstab = 0;
909  int k_start_tilde;
910  int N_tilde;
911  int clength;
912  int clength_1;
913  int clength_2;
914  int t_stab, N_stab;
915  fpt_data *data;
916 
917  /* Get pointer to DPT data. */
918  data = &(set->dpt[m]);
919 
920  /* Check, if already precomputed. */
921  if (data->steps != NULL)
922  return;
923 
924  /* Save k_start. */
925  data->k_start = k_start;
926 
927  data->gamma_m1 = gam[0];
928 
929  /* Check if fast transform is activated. */
930  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
931  {
932  /* Save recursion coefficients. */
933  data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
934  data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
935  data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
936 
937  for (tau = 2; tau <= set->t; tau++)
938  {
939 
940  data->alphaN[tau-2] = alpha[1<<tau];
941  data->betaN[tau-2] = beta[1<<tau];
942  data->gammaN[tau-2] = gam[1<<tau];
943  }
944 
945  data->alpha_0 = alpha[1];
946  data->beta_0 = beta[1];
947 
948  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
949  /*set->N*/);
950  N_tilde = N_TILDE(set->N);
951 
952  /* Allocate memory for the cascade with t = log_2(N) many levels. */
953  data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
954 
955  /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
956  plength = 4;
957  for (tau = 1; tau < set->t; tau++)
958  {
959  /* Compute auxilliary values. */
960  degree = plength>>1;
961  /* Compute first l. */
962  firstl = FIRST_L(k_start_tilde,plength);
963  /* Compute last l. */
964  lastl = LAST_L(N_tilde,plength);
965 
966  /* Allocate memory for current level. This level will contain 2^{t-tau-1}
967  * many matrices. */
968  data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
969  * (lastl+1));
970 
971  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
972  for (l = firstl; l <= lastl; l++)
973  {
974  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
975  {
976  //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
977  //fflush(stderr);
978  clength = plength/2;
979  }
980  else
981  {
982  clength = plength;
983  }
984 
985  /* Allocate memory for the components of U_{n,tau,l}. */
986  a11 = (double*) nfft_malloc(sizeof(double)*clength);
987  a12 = (double*) nfft_malloc(sizeof(double)*clength);
988  a21 = (double*) nfft_malloc(sizeof(double)*clength);
989  a22 = (double*) nfft_malloc(sizeof(double)*clength);
990 
991  /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
992  * nodes. */
993 
994  /* Get the pointers to the three-term recurrence coeffcients. */
995  calpha = &(alpha[plength*l+1+1]);
996  cbeta = &(beta[plength*l+1+1]);
997  cgamma = &(gam[plength*l+1+1]);
998 
999  if (set->flags & FPT_NO_STABILIZATION)
1000  {
1001  /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
1002  calpha--;
1003  cbeta--;
1004  cgamma--;
1005  eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1006  cgamma);
1007  eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1008  cgamma);
1009  needstab = 0;
1010  }
1011  else
1012  {
1013  calpha--;
1014  cbeta--;
1015  cgamma--;
1016  /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
1017  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
1018  degree-1, calpha, cbeta, cgamma, threshold);
1019  if (needstab == 0)
1020  {
1021  /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
1022  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
1023  degree, calpha, cbeta, cgamma, threshold);
1024  }
1025  }
1026 
1027 
1028  /* Check if stabilization needed. */
1029  if (needstab == 0)
1030  {
1031  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1032  data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
1033  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1034  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1035  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1036  /* No stabilization needed. */
1037  data->steps[tau][l].a11[0] = a11;
1038  data->steps[tau][l].a12[0] = a12;
1039  data->steps[tau][l].a21[0] = a21;
1040  data->steps[tau][l].a22[0] = a22;
1041  data->steps[tau][l].g[0] = gam[plength*l+1+1];
1042  data->steps[tau][l].stable = true;
1043  }
1044  else
1045  {
1046  /* Stabilize. */
1047  degree_stab = degree*(2*l+1);
1048  X(next_power_of_2_exp_int)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1049 
1050  /* Old arrays are to small. */
1051  nfft_free(a11);
1052  nfft_free(a12);
1053  nfft_free(a21);
1054  nfft_free(a22);
1055 
1056  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1057  data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
1058  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1059  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1060  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1061 
1062  plength_stab = N_stab;
1063 
1064  if (set->flags & FPT_AL_SYMMETRY)
1065  {
1066  if (m <= 1)
1067  {
1068  /* This should never be executed */
1069  clength_1 = plength_stab;
1070  clength_2 = plength_stab;
1071  /* Allocate memory for arrays. */
1072  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1073  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1074  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1075  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1076  /* Get the pointers to the three-term recurrence coeffcients. */
1077  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1078  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
1079  clength_2, degree_stab-1, calpha, cbeta, cgamma);
1080  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
1081  clength_2, degree_stab+0, calpha, cbeta, cgamma);
1082  }
1083  else
1084  {
1085  clength = plength_stab/2;
1086  if (m%2 == 0)
1087  {
1088  a11 = (double*) nfft_malloc(sizeof(double)*clength);
1089  a12 = (double*) nfft_malloc(sizeof(double)*clength);
1090  a21 = 0;
1091  a22 = 0;
1092  calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1093  eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
1094  degree_stab-2, calpha, cbeta, cgamma);
1095  eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
1096  degree_stab-1, calpha, cbeta, cgamma);
1097  }
1098  else
1099  {
1100  a11 = 0;
1101  a12 = 0;
1102  a21 = (double*) nfft_malloc(sizeof(double)*clength);
1103  a22 = (double*) nfft_malloc(sizeof(double)*clength);
1104  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1105  eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
1106  degree_stab-1,calpha, cbeta, cgamma);
1107  eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
1108  degree_stab+0, calpha, cbeta, cgamma);
1109  }
1110  }
1111  }
1112  else
1113  {
1114  clength_1 = plength_stab;
1115  clength_2 = plength_stab;
1116  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1117  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1118  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1119  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1120  calpha = &(alpha[2]);
1121  cbeta = &(beta[2]);
1122  cgamma = &(gam[2]);
1123  calpha--;
1124  cbeta--;
1125  cgamma--;
1126  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1127  calpha, cbeta, cgamma);
1128  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1129  calpha, cbeta, cgamma);
1130 
1131  }
1132  data->steps[tau][l].a11[0] = a11;
1133  data->steps[tau][l].a12[0] = a12;
1134  data->steps[tau][l].a21[0] = a21;
1135  data->steps[tau][l].a22[0] = a22;
1136 
1137  data->steps[tau][l].g[0] = gam[1+1];
1138  data->steps[tau][l].stable = false;
1139  data->steps[tau][l].ts = t_stab;
1140  data->steps[tau][l].Ns = N_stab;
1141  }
1142  }
1144  plength = plength << 1;
1145  }
1146  }
1147 
1148  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1149  {
1150  /* Check, if recurrence coefficients must be copied. */
1151  if (set->flags & FPT_PERSISTENT_DATA)
1152  {
1153  data->_alpha = (double*) alpha;
1154  data->_beta = (double*) beta;
1155  data->_gamma = (double*) gam;
1156  }
1157  else
1158  {
1159  data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
1160  data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
1161  data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
1162  memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
1163  memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
1164  memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
1165  }
1166  }
1167 }
1168 
1169 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1170  const int k_end, const unsigned int flags)
1171 {
1172  int j;
1173  fpt_data *data = &(set->dpt[m]);
1174  int Nk;
1175  int tk;
1176  double norm;
1177 
1178  //fprintf(stderr, "Executing dpt.\n");
1179 
1180  X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1181  norm = 2.0/(Nk<<1);
1182 
1183  //fprintf(stderr, "Norm = %e.\n", norm);
1184 
1185  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1186  {
1187  return;
1188  }
1189 
1190  if (flags & FPT_FUNCTION_VALUES)
1191  {
1192  /* Fill array with Chebyshev nodes. */
1193  for (j = 0; j <= k_end; j++)
1194  {
1195  set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1196  //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]);
1197  }
1198 
1199  memset(set->result,0U,data->k_start*sizeof(double _Complex));
1200  memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1201 
1202  /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
1203  y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
1204  data->gamma_m1);*/
1205  eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1206  y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
1207  }
1208  else
1209  {
1210  memset(set->temp,0U,data->k_start*sizeof(double _Complex));
1211  memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1212 
1213  eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1214  set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1215  data->gamma_m1);
1216 
1217  fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
1218  (double*)set->result);
1219 
1220  set->result[0] *= 0.5;
1221  for (j = 0; j < Nk; j++)
1222  {
1223  set->result[j] *= norm;
1224  }
1225 
1226  memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
1227  }
1228 }
1229 
1230 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1231  const int k_end, const unsigned int flags)
1232 {
1233  /* Get transformation data. */
1234  fpt_data *data = &(set->dpt[m]);
1236  int Nk;
1238  int tk;
1240  int k_start_tilde;
1242  int k_end_tilde;
1243 
1245  int tau;
1247  int firstl;
1249  int lastl;
1251  int l;
1253  int plength;
1255  int plength_stab;
1256  int t_stab;
1258  fpt_step *step;
1260  fftw_plan plan = 0;
1261  int length = k_end+1;
1262  fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1263 
1265  int k;
1266 
1267  double _Complex *work_ptr;
1268  const double _Complex *x_ptr;
1269 
1270  /* Check, if slow transformation should be used due to small bandwidth. */
1271  if (k_end < FPT_BREAK_EVEN)
1272  {
1273  /* Use NDSFT. */
1274  fpt_trafo_direct(set, m, x, y, k_end, flags);
1275  return;
1276  }
1277 
1278  X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1279  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1280  k_end_tilde = K_END_TILDE(k_end,Nk);
1281 
1282  /* Check if fast transform is activated. */
1283  if (set->flags & FPT_NO_FAST_ALGORITHM)
1284  return;
1285 
1286  if (flags & FPT_FUNCTION_VALUES)
1287  {
1288 #ifdef _OPENMP
1289  int nthreads = X(get_num_threads)();
1290 #pragma omp critical (nfft_omp_critical_fftw_plan)
1291 {
1292  fftw_plan_with_nthreads(nthreads);
1293 #endif
1294  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1295  (double*)set->work, NULL, 2, 1, kinds, 0U);
1296 #ifdef _OPENMP
1297 }
1298 #endif
1299  }
1300 
1301  /* Initialize working arrays. */
1302  memset(set->result,0U,2*Nk*sizeof(double _Complex));
1303 
1304  /* The first step. */
1305 
1306  /* Set the first 2*data->k_start coefficients to zero. */
1307  memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
1308 
1309  work_ptr = &set->work[2*data->k_start];
1310  x_ptr = x;
1311 
1312  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1313  {
1314  *work_ptr++ = *x_ptr++;
1315  *work_ptr++ = K(0.0);
1316  }
1317 
1318  /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
1319  memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
1320 
1321  /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
1322  * x_{Nk-1} and x_{Nk-2}. */
1323  if (k_end == Nk)
1324  {
1325  set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
1326  set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
1327  set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
1328  }
1329 
1330  /* Compute the remaining steps. */
1331  plength = 4;
1332  for (tau = 1; tau < tk; tau++)
1333  {
1334  /* Compute first l. */
1335  firstl = FIRST_L(k_start_tilde,plength);
1336  /* Compute last l. */
1337  lastl = LAST_L(k_end_tilde,plength);
1338 
1339  /* Compute the multiplication steps. */
1340  for (l = firstl; l <= lastl; l++)
1341  {
1342  /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
1343  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
1344  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
1345  memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
1346  memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
1347 
1348  /* Copy coefficients into first half. */
1349  memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
1350  memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
1351  memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
1352 
1353  /* Get matrix U_{n,tau,l} */
1354  step = &(data->steps[tau][l]);
1355 
1356  /* Check if step is stable. */
1357  if (step->stable)
1358  {
1359  /* Check, if we should do a symmetrizised step. */
1360  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1361  {
1362  /*for (k = 0; k < plength; k++)
1363  {
1364  fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
1365  step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
1366  }*/
1367  /* Multiply third and fourth polynomial with matrix U. */
1368  //fprintf(stderr,"\nhallo\n");
1369  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1370  step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
1371  }
1372  else
1373  {
1374  /* Multiply third and fourth polynomial with matrix U. */
1375  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1376  step->a21[0], step->a22[0], step->g[0], tau, set);
1377  }
1378 
1379  if (step->g[0] != 0.0)
1380  {
1381  for (k = 0; k < plength; k++)
1382  {
1383  set->work[plength*2*l+k] += set->vec3[k];
1384  }
1385  }
1386  for (k = 0; k < plength; k++)
1387  {
1388  set->work[plength*(2*l+1)+k] += set->vec4[k];
1389  }
1390  }
1391  else
1392  {
1393  /* Stabilize. */
1394 
1395  /* The lengh of the polynomials */
1396  plength_stab = step->Ns;
1397  t_stab = step->ts;
1398 
1399  /*---------*/
1400  /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
1401  fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
1402  fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
1403  fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
1404  /*---------*/
1405 
1406  /* Set rest of vectors explicitely to zero */
1407  /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
1408  plength, plength_stab);*/
1409  memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1410  memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1411 
1412  /* Multiply third and fourth polynomial with matrix U. */
1413  /* Check for symmetry. */
1414  if (set->flags & FPT_AL_SYMMETRY)
1415  {
1416  if (m <= 1)
1417  {
1418  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1419  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1420  }
1421  else if (m%2 == 0)
1422  {
1423  /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1424  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1425  fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1426  step->a21[0], step->a22[0],
1427  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1428  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1429  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1430  }
1431  else
1432  {
1433  /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
1434  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1435  fpt_do_step_symmetric_l(set->vec3, set->vec4,
1436  step->a11[0], step->a12[0],
1437  step->a21[0],
1438  step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1439  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1440  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1441  }
1442  }
1443  else
1444  {
1445  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1446  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1447  }
1448 
1449  if (step->g[0] != 0.0)
1450  {
1451  for (k = 0; k < plength_stab; k++)
1452  {
1453  set->result[k] += set->vec3[k];
1454  }
1455  }
1456 
1457  for (k = 0; k < plength_stab; k++)
1458  {
1459  set->result[Nk+k] += set->vec4[k];
1460  }
1461  }
1462  }
1463  /* Double length of polynomials. */
1464  plength = plength<<1;
1465 
1466  /*--------*/
1467  /*for (k = 0; k < 2*Nk; k++)
1468  {
1469  fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
1470  k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
1471  cimag(set->result[k]));
1472  }*/
1473  /*--------*/
1474  }
1475 
1476  /* Add the resulting cascade coeffcients to the coeffcients accumulated from
1477  * the stabilization steps. */
1478  for (k = 0; k < 2*Nk; k++)
1479  {
1480  set->result[k] += set->work[k];
1481  }
1482 
1483  /* The last step. Compute the Chebyshev coeffcients c_k^n from the
1484  * polynomials in front of P_0^n and P_1^n. */
1485  y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
1486  data->alpha_0*set->result[Nk+1]*0.5);
1487  y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
1488  data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1489  y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
1490  data->beta_0*set->result[Nk+k_end-1] +
1491  data->alpha_0*set->result[Nk+k_end-2]*0.5);
1492  y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
1493  for (k = 2; k <= k_end-2; k++)
1494  {
1495  y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
1496  data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1497  }
1498 
1499  if (flags & FPT_FUNCTION_VALUES)
1500  {
1501  y[0] *= 2.0;
1502  fftw_execute_r2r(plan,(double*)y,(double*)y);
1503 #ifdef _OPENMP
1504  #pragma omp critical (nfft_omp_critical_fftw_plan)
1505 #endif
1506  fftw_destroy_plan(plan);
1507  for (k = 0; k <= k_end; k++)
1508  {
1509  y[k] *= 0.5;
1510  }
1511  }
1512 }
1513 
1514 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
1515  double _Complex *y, const int k_end, const unsigned int flags)
1516 {
1517  int j;
1518  fpt_data *data = &(set->dpt[m]);
1519  int Nk;
1520  int tk;
1521  double norm;
1522 
1523  X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1524  norm = 2.0/(Nk<<1);
1525 
1526  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1527  {
1528  return;
1529  }
1530 
1531  if (flags & FPT_FUNCTION_VALUES)
1532  {
1533  for (j = 0; j <= k_end; j++)
1534  {
1535  set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1536  }
1537 
1538  eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
1539  y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1540  data->gamma_m1);
1541 
1542  memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
1543  sizeof(double _Complex));
1544  }
1545  else
1546  {
1547  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1548  memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
1549 
1550  for (j = 0; j < Nk; j++)
1551  {
1552  set->result[j] *= norm;
1553  }
1554 
1555  fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
1556  (double*)set->result);
1557 
1558  eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1559  set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1560  data->gamma_m1);
1561 
1562  memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
1563  }
1564 }
1565 
1566 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
1567  double _Complex *y, const int k_end, const unsigned int flags)
1568 {
1569  /* Get transformation data. */
1570  fpt_data *data = &(set->dpt[m]);
1572  int Nk;
1574  int tk;
1576  int k_start_tilde;
1578  int k_end_tilde;
1579 
1581  int tau;
1583  int firstl;
1585  int lastl;
1587  int l;
1589  int plength;
1591  int plength_stab;
1593  fpt_step *step;
1595  fftw_plan plan;
1596  int length = k_end+1;
1597  fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1599  int k;
1600  int t_stab;
1601 
1602  /* Check, if slow transformation should be used due to small bandwidth. */
1603  if (k_end < FPT_BREAK_EVEN)
1604  {
1605  /* Use NDSFT. */
1606  fpt_transposed_direct(set, m, x, y, k_end, flags);
1607  return;
1608  }
1609 
1610  X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1611  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1612  k_end_tilde = K_END_TILDE(k_end,Nk);
1613 
1614  /* Check if fast transform is activated. */
1615  if (set->flags & FPT_NO_FAST_ALGORITHM)
1616  {
1617  return;
1618  }
1619 
1620  if (flags & FPT_FUNCTION_VALUES)
1621  {
1622 #ifdef _OPENMP
1623  int nthreads = X(get_num_threads)();
1624 #pragma omp critical (nfft_omp_critical_fftw_plan)
1625 {
1626  fftw_plan_with_nthreads(nthreads);
1627 #endif
1628  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1629  (double*)set->work, NULL, 2, 1, kinds, 0U);
1630 #ifdef _OPENMP
1631 }
1632 #endif
1633  fftw_execute_r2r(plan,(double*)y,(double*)set->result);
1634 #ifdef _OPENMP
1635  #pragma omp critical (nfft_omp_critical_fftw_plan)
1636 #endif
1637  fftw_destroy_plan(plan);
1638  for (k = 0; k <= k_end; k++)
1639  {
1640  set->result[k] *= 0.5;
1641  }
1642  }
1643  else
1644  {
1645  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1646  }
1647 
1648  /* Initialize working arrays. */
1649  memset(set->work,0U,2*Nk*sizeof(double _Complex));
1650 
1651  /* The last step is now the first step. */
1652  for (k = 0; k <= k_end; k++)
1653  {
1654  set->work[k] = data->gamma_m1*set->result[k];
1655  }
1656  //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
1657 
1658  set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
1659  data->alpha_0*set->result[1]);
1660  for (k = 1; k < k_end; k++)
1661  {
1662  set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
1663  data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1664  }
1665  if (k_end<Nk)
1666  {
1667  memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
1668  }
1669 
1671  memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
1672 
1673  /* Compute the remaining steps. */
1674  plength = Nk;
1675  for (tau = tk-1; tau >= 1; tau--)
1676  {
1677  /* Compute first l. */
1678  firstl = FIRST_L(k_start_tilde,plength);
1679  /* Compute last l. */
1680  lastl = LAST_L(k_end_tilde,plength);
1681 
1682  /* Compute the multiplication steps. */
1683  for (l = firstl; l <= lastl; l++)
1684  {
1685  /* Initialize second half of coefficient arrays with zeros. */
1686  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
1687  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
1688 
1689  memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1690  (plength/2)*sizeof(double _Complex));
1691 
1692  /* Get matrix U_{n,tau,l} */
1693  step = &(data->steps[tau][l]);
1694 
1695  /* Check if step is stable. */
1696  if (step->stable)
1697  {
1698  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1699  {
1700  /* Multiply third and fourth polynomial with matrix U. */
1701  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1702  step->a21[0], step->a22[0], step->g[0], tau, set);
1703  }
1704  else
1705  {
1706  /* Multiply third and fourth polynomial with matrix U. */
1707  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1708  step->a21[0], step->a22[0], step->g[0], tau, set);
1709  }
1710  memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
1711 
1712  for (k = 0; k < plength; k++)
1713  {
1714  set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1715  }
1716  }
1717  else
1718  {
1719  /* Stabilize. */
1720  plength_stab = step->Ns;
1721  t_stab = step->ts;
1722 
1723  memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
1724  memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
1725 
1726  /* Multiply third and fourth polynomial with matrix U. */
1727  if (set->flags & FPT_AL_SYMMETRY)
1728  {
1729  if (m <= 1)
1730  {
1731  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1732  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1733  }
1734  else if (m%2 == 0)
1735  {
1736  fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1737  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1738  }
1739  else
1740  {
1741  fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1742  step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1743  }
1744  }
1745  else
1746  {
1747  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1748  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1749  }
1750 
1751  memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
1752 
1753  for (k = 0; k < plength; k++)
1754  {
1755  set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1756  }
1757  }
1758  }
1759  /* Half the length of polynomial arrays. */
1760  plength = plength>>1;
1761  }
1762 
1763  /* First step */
1764  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1765  {
1766  x[k] = set->work[2*(data->k_start+k)];
1767  }
1768  if (k_end == Nk)
1769  {
1770  x[Nk-data->k_start] =
1771  data->gammaN[tk-2]*set->work[2*(Nk-2)]
1772  + data->betaN[tk-2] *set->work[2*(Nk-1)]
1773  + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
1774  }
1775 }
1776 
1777 void fpt_finalize(fpt_set set)
1778 {
1779  int tau;
1780  int l;
1781  int m;
1782  fpt_data *data;
1783  int k_start_tilde;
1784  int N_tilde;
1785  int firstl, lastl;
1786  int plength;
1787  const int M = set->M;
1788 
1789  /* TODO Clean up DPT transform data structures. */
1790  for (m = 0; m < M; m++)
1791  {
1792  /* Check if precomputed. */
1793  data = &set->dpt[m];
1794  if (data->steps != (fpt_step**)NULL)
1795  {
1796  nfft_free(data->alphaN);
1797  nfft_free(data->betaN);
1798  nfft_free(data->gammaN);
1799  data->alphaN = NULL;
1800  data->betaN = NULL;
1801  data->gammaN = NULL;
1802 
1803  /* Free precomputed data. */
1804  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
1805  /*set->N*/);
1806  N_tilde = N_TILDE(set->N);
1807  plength = 4;
1808  for (tau = 1; tau < set->t; tau++)
1809  {
1810  /* Compute first l. */
1811  firstl = FIRST_L(k_start_tilde,plength);
1812  /* Compute last l. */
1813  lastl = LAST_L(N_tilde,plength);
1814 
1815  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
1816  for (l = firstl; l <= lastl; l++)
1817  {
1818  /* Free components. */
1819  nfft_free(data->steps[tau][l].a11[0]);
1820  nfft_free(data->steps[tau][l].a12[0]);
1821  nfft_free(data->steps[tau][l].a21[0]);
1822  nfft_free(data->steps[tau][l].a22[0]);
1823  data->steps[tau][l].a11[0] = NULL;
1824  data->steps[tau][l].a12[0] = NULL;
1825  data->steps[tau][l].a21[0] = NULL;
1826  data->steps[tau][l].a22[0] = NULL;
1827  /* Free components. */
1828  nfft_free(data->steps[tau][l].a11);
1829  nfft_free(data->steps[tau][l].a12);
1830  nfft_free(data->steps[tau][l].a21);
1831  nfft_free(data->steps[tau][l].a22);
1832  nfft_free(data->steps[tau][l].g);
1833  data->steps[tau][l].a11 = NULL;
1834  data->steps[tau][l].a12 = NULL;
1835  data->steps[tau][l].a21 = NULL;
1836  data->steps[tau][l].a22 = NULL;
1837  data->steps[tau][l].g = NULL;
1838  }
1839  /* Free pointers for current level. */
1840  nfft_free(data->steps[tau]);
1841  data->steps[tau] = NULL;
1842  /* Double length of polynomials. */
1843  plength = plength<<1;
1844  }
1845  /* Free steps. */
1846  nfft_free(data->steps);
1847  data->steps = NULL;
1848  }
1849 
1850  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1851  {
1852  /* Check, if recurrence coefficients must be copied. */
1853  //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
1854  if (!(set->flags & FPT_PERSISTENT_DATA))
1855  {
1856  nfft_free(data->_alpha);
1857  nfft_free(data->_beta);
1858  nfft_free(data->_gamma);
1859  }
1860  data->_alpha = NULL;
1861  data->_beta = NULL;
1862  data->_gamma = NULL;
1863  }
1864  }
1865 
1866  /* Delete array of DPT transform data. */
1867  nfft_free(set->dpt);
1868  set->dpt = NULL;
1869 
1870  for (tau = 1; tau < set->t+1; tau++)
1871  {
1872  nfft_free(set->xcvecs[tau-1]);
1873  set->xcvecs[tau-1] = NULL;
1874  }
1875  nfft_free(set->xcvecs);
1876  set->xcvecs = NULL;
1877 
1878  /* Free auxilliary arrays. */
1879  nfft_free(set->work);
1880  nfft_free(set->result);
1881 
1882  /* Check if fast transform is activated. */
1883  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
1884  {
1885  /* Free auxilliary arrays. */
1886  nfft_free(set->vec3);
1887  nfft_free(set->vec4);
1888  nfft_free(set->z);
1889  set->work = NULL;
1890  set->result = NULL;
1891  set->vec3 = NULL;
1892  set->vec4 = NULL;
1893  set->z = NULL;
1894 
1895  /* Free FFTW plans. */
1896  for(tau = 0; tau < set->t/*-1*/; tau++)
1897  {
1898 #ifdef _OPENMP
1899 #pragma omp critical (nfft_omp_critical_fftw_plan)
1900 #endif
1901 {
1902  fftw_destroy_plan(set->plans_dct3[tau]);
1903  fftw_destroy_plan(set->plans_dct2[tau]);
1904 }
1905  set->plans_dct3[tau] = NULL;
1906  set->plans_dct2[tau] = NULL;
1907  }
1908 
1909  nfft_free(set->plans_dct3);
1910  nfft_free(set->plans_dct2);
1911  set->plans_dct3 = NULL;
1912  set->plans_dct2 = NULL;
1913  }
1914 
1915  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1916  {
1917  /* Delete arrays of Chebyshev nodes. */
1918  nfft_free(set->xc_slow);
1919  set->xc_slow = NULL;
1920  nfft_free(set->temp);
1921  set->temp = NULL;
1922  }
1923 
1924  /* Free DPT set structure. */
1925  nfft_free(set);
1926 }
Holds data for a single multiplication step in the cascade summation.
Definition: fpt.c:61
double * xc
Array for Chebychev-nodes.
Definition: fpt.c:105
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
Definition: nfft3.h:632
bool stable
Indicates if the values contained represent a fast or a slow stabilized step.
Definition: fpt.c:63
double * alphaN
TODO Add comment here.
Definition: fpt.c:79
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
Definition: fpt.c:1566
int M
The number of DPT transforms.
Definition: fpt.c:97
double * _alpha
< TODO Add comment here.
Definition: fpt.c:86
int * lengths
Transform lengths for fftw library.
Definition: fpt.c:121
double alpha_0
TODO Add comment here.
Definition: fpt.c:82
#define FPT_NO_STABILIZATION
If set, no stabilization will be used.
Definition: nfft3.h:629
int t
The exponent of N.
Definition: fpt.c:100
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
Definition: fpt.c:881
#define FPT_FUNCTION_VALUES
If set, the output are function values at Chebyshev nodes rather than Chebyshev coefficients.
Definition: nfft3.h:635
Holds data for a set of cascade summations.
Definition: fpt.c:94
#define LAST_L(x, y)
Index of last block of four functions at level.
Definition: fpt.c:50
#define K_START_TILDE(x, y)
Minimum degree at top of a cascade.
Definition: fpt.c:41
fftw_r2r_kind * kinds
Transform kinds for fftw library.
Definition: fpt.c:116
void nfft_free(void *p)
fpt_data * dpt
The DPT transform data.
Definition: fpt.c:101
fftw_plan * plans_dct3
Transform plans for the fftw library.
Definition: fpt.c:112
double * gammaN
TODO Add comment here.
Definition: fpt.c:81
#define FPT_NO_FAST_ALGORITHM
If set, TODO complete comment.
Definition: nfft3.h:630
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
int N
The transform length.
Definition: fpt.c:98
Holds data for a single cascade summation.
Definition: fpt.c:75
fftw_plan * plans_dct2
Transform plans for the fftw library.
Definition: fpt.c:114
#define UNUSED(x)
Dummy use of unused parameters to silence compiler warnings.
Definition: infft.h:1365
struct fpt_set_s_ fpt_set_s
Holds data for a set of cascade summations.
int Ns
TODO Add comment here.
Definition: fpt.c:66
double * _beta
TODO Add comment here.
Definition: fpt.c:87
void * nfft_malloc(size_t n)
double beta_0
TODO Add comment here.
Definition: fpt.c:83
static void eval_sum_clenshaw_transposed(int N, int M, double _Complex *a, double *x, double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, double lambda)
Clenshaw algorithm.
Definition: fpt.c:712
double * _gamma
TODO Add comment here.
Definition: fpt.c:88
double ** a22
The matrix components.
Definition: fpt.c:68
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
Definition: fpt.c:755
fpt_step ** steps
The cascade summation steps.
Definition: fpt.c:77
double gamma_m1
TODO Add comment here.
Definition: fpt.c:84
Header file for the nfft3 library.
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
Definition: nfft3.h:636
#define FIRST_L(x, y)
Index of first block of four functions at level.
Definition: fpt.c:47
#define FPT_NO_DIRECT_ALGORITHM
If set, TODO complete comment.
Definition: nfft3.h:631
int k_start
TODO Add comment here.
Definition: fpt.c:78
int flags
The flags.
Definition: fpt.c:96
int ts
TODO Add comment here.
Definition: fpt.c:67
double * betaN
TODO Add comment here.
Definition: fpt.c:80
struct fpt_data_ fpt_data
Holds data for a single cascade summation.
struct fpt_step_ fpt_step
Holds data for a single multiplication step in the cascade summation.
#define K_END_TILDE(x, y)
Maximum degree at top of a cascade.
Definition: fpt.c:44
double ** xcvecs
Array of pointers to arrays containing the Chebyshev nodes.
Definition: fpt.c:102
fftw_r2r_kind * kindsr
Transform kinds for fftw library.
Definition: fpt.c:118