41 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
44 #define K_END_TILDE(x,y) MIN(x,y-1)
47 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
50 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
52 #define N_TILDE(y) (y-1)
54 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
56 #define FPT_BREAK_EVEN 4
68 double **a11,**a12,**a21,**
a22;
106 double _Complex *temp;
107 double _Complex *work;
108 double _Complex *result;
109 double _Complex *vec3;
110 double _Complex *vec4;
127 static inline void abuvxpwy(
double a,
double b,
double _Complex* u,
double _Complex* x,
128 double* v,
double _Complex* y,
double* w,
int n)
130 int l;
double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
131 double *v_ptr = v, *w_ptr = w;
132 for (l = 0; l < n; l++)
133 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
136 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
137 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
138 double* v, double _Complex* y, double* w, int n) \
140 const int n2 = n>>1; \
141 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
142 double *v_ptr = v, *w_ptr = w; \
143 for (l = 0; l < n2; l++) \
144 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
146 for (l = 0; l < n2; l++) \
147 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
150 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
153 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
154 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
155 double* v, double _Complex* y, int n, double *xx) \
157 const int n2 = n>>1; \
158 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
159 double *v_ptr = v, *xx_ptr = xx; \
160 for (l = 0; l < n2; l++, v_ptr++) \
161 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
163 for (l = 0; l < n2; l++, v_ptr--) \
164 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
167 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
170 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
171 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
172 double _Complex* y, double* w, int n, double *xx) \
174 const int n2 = n>>1; \
175 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
176 double *w_ptr = w, *xx_ptr = xx; \
177 for (l = 0; l < n2; l++, w_ptr++) \
178 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
180 for (l = 0; l < n2; l++, w_ptr--) \
181 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
184 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
187 static inline
void auvxpwy(
double a,
double _Complex* u,
double _Complex* x,
double* v,
188 double _Complex* y,
double* w,
int n)
191 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
192 double *v_ptr = v, *w_ptr = w;
193 for (l = n; l > 0; l--)
194 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
197 static inline void auvxpwy_symmetric(
double a,
double _Complex* u,
double _Complex* x,
198 double* v,
double _Complex* y,
double* w,
int n)
200 const int n2 = n>>1; \
202 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
203 double *v_ptr = v, *w_ptr = w;
204 for (l = n2; l > 0; l--)
205 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
207 for (l = n2; l > 0; l--)
208 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
211 static inline void auvxpwy_symmetric_1(
double a,
double _Complex* u,
double _Complex* x,
212 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
214 const int n2 = n>>1; \
216 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
217 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
218 for (l = n2; l > 0; l--, xx_ptr++)
219 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
221 for (l = n2; l > 0; l--, xx_ptr++)
222 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
225 static inline void auvxpwy_symmetric_2(
double a,
double _Complex* u,
double _Complex* x,
226 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
228 const int n2 = n>>1; \
230 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
231 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
232 for (l = n2; l > 0; l--, xx_ptr++)
233 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
235 for (l = n2; l > 0; l--, xx_ptr++)
236 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
239 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
240 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
241 double *a21, double *a22, double g, int tau, fpt_set set) \
244 int length = 1<<(tau+1); \
246 double norm = 1.0/(length<<1); \
253 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
254 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
267 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
273 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
274 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
275 memcpy(b,set->z,length*sizeof(double _Complex)); \
277 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
283 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
288 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
289 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
293 static inline
void fpt_do_step_symmetric_u(
double _Complex *a,
double _Complex *b,
294 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
295 double gam,
int tau,
fpt_set set)
298 int length = 1<<(tau+1);
300 double norm = 1.0/(length<<1);
309 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
310 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
323 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
329 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
330 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
331 memcpy(b,set->z,length*
sizeof(
double _Complex));
333 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
339 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
344 static inline void fpt_do_step_symmetric_l(
double _Complex *a,
double _Complex *b,
345 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
double gam,
int tau,
fpt_set set)
348 int length = 1<<(tau+1);
350 double norm = 1.0/(length<<1);
359 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)a,(
double*)a);
360 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)b,(
double*)b);
373 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
379 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
380 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
381 memcpy(b,set->z,length*
sizeof(
double _Complex));
383 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)a,(
double*)a);
389 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)b,(
double*)b);
394 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
395 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
396 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
399 int length = 1<<(tau+1); \
401 double norm = 1.0/(length<<1); \
404 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
405 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
408 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
409 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
410 memcpy(a,set->z,length*sizeof(double _Complex)); \
413 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
414 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
417 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
423 static inline
void fpt_do_step_t_symmetric_u(
double _Complex *a,
424 double _Complex *b,
double *a11,
double *a12,
double *x,
425 double gam,
int tau,
fpt_set set)
428 int length = 1<<(tau+1);
430 double norm = 1.0/(length<<1);
433 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
434 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
437 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
438 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
439 memcpy(a,set->z,length*
sizeof(
double _Complex));
442 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
443 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
446 static inline void fpt_do_step_t_symmetric_l(
double _Complex *a,
447 double _Complex *b,
double *a21,
double *a22,
448 double *x,
double gam,
int tau,
fpt_set set)
451 int length = 1<<(tau+1);
453 double norm = 1.0/(length<<1);
456 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)a,(
double*)a);
457 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)b,(
double*)b);
460 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
461 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
462 memcpy(a,set->z,length*
sizeof(
double _Complex));
465 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)a,(
double*)a);
466 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)b,(
double*)b);
470 static void eval_clenshaw(
const double *x,
double *y,
int size,
int k,
const double *alpha,
471 const double *beta,
const double *gam)
477 double a,b,x_val_act,a_old;
480 const double *alpha_act, *beta_act, *gamma_act;
485 for (i = 0; i < size; i++)
497 alpha_act = &(alpha[k]);
498 beta_act = &(beta[k]);
499 gamma_act = &(gam[k]);
500 for (j = k; j > 1; j--)
503 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
504 b = a_old*(*gamma_act);
509 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
516 static void eval_clenshaw2(
const double *x,
double *z,
double *y,
int size1,
int size,
int k,
const double *alpha,
517 const double *beta,
const double *gam)
523 double a,b,x_val_act,a_old;
525 double *y_act, *z_act;
526 const double *alpha_act, *beta_act, *gamma_act;
532 for (i = 0; i < size; i++)
545 alpha_act = &(alpha[k]);
546 beta_act = &(beta[k]);
547 gamma_act = &(gam[k]);
548 for (j = k; j > 1; j--)
551 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
552 b = a_old*(*gamma_act);
559 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
572 static int eval_clenshaw_thresh2(
const double *x,
double *z,
double *y,
int size,
int k,
573 const double *alpha,
const double *beta,
const double *gam,
const
580 double a,b,x_val_act,a_old;
582 double *y_act, *z_act;
583 const double *alpha_act, *beta_act, *gamma_act;
584 R max = -nfft_float_property(NFFT_R_MAX);
585 const R t = LOG10(FABS(threshold));
591 for (i = 0; i < size; i++)
604 alpha_act = &(alpha[k]);
605 beta_act = &(beta[k]);
606 gamma_act = &(gam[k]);
607 for (j = k; j > 1; j--)
610 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
611 b = a_old*(*gamma_act);
617 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
618 max = FMAX(max,LOG10(FABS(*y_act)));
629 static inline void eval_sum_clenshaw_fast(
const int N,
const int M,
630 const double _Complex *a,
const double *x,
double _Complex *y,
631 const double *alpha,
const double *beta,
const double *gam,
635 double _Complex tmp1, tmp2, tmp3;
646 for (j = 0; j <= M; j++)
650 for (j = 0; j <= M; j++)
655 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
656 for (k = N-1; k > 0; k--)
659 + (alpha[k-1] * xc + beta[k-1]) * tmp1
664 y[j] = lambda * tmp1;
669 for (k = N-1; k > 0; k--)
671 tmp3 = a[k-1] + tmp2 * gam[k];
672 tmp2 *= (alpha[k] * xc + beta[k]);
680 tmp2 *= (alpha[0] * xc + beta[0]);
682 y[j] = lambda * (tmp2 + tmp1);
713 double _Complex *y,
double _Complex *temp,
double *alpha,
double *beta,
double *gam,
717 double _Complex* it1 = temp;
718 double _Complex* it2 = y;
723 for (j = 0; j <= M; j++)
725 it2[j] = lambda * y[j];
733 for (j = 0; j <= M; j++)
736 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
740 for (k = 2; k <= N; k++)
743 for (j = 0; j <= M; j++)
747 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
765 int nthreads =
X(get_num_threads)();
782 for (m = 0; m < set->
M; m++)
797 for (tau = 1; tau < t+1; tau++)
801 for (k = 0; k < plength; k++)
803 set->
xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
805 plength = plength << 1;
809 set->work = (
double _Complex*)
nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
810 set->result = (
double _Complex*)
nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
815 set->
kindsr[0] = FFTW_REDFT10;
816 set->
kindsr[1] = FFTW_REDFT10;
817 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
821 #pragma omp critical (nfft_omp_critical_fftw_plan)
823 fftw_plan_with_nthreads(nthreads);
826 fftw_plan_many_r2r(1, &set->
lengths[tau], 2, (
double*)set->work, NULL,
827 2, 1, (
double*)set->result, NULL, 2, 1,set->
kindsr,
838 set->vec3 = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
839 set->vec4 = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
840 set->z = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
845 set->
kinds[0] = FFTW_REDFT01;
846 set->
kinds[1] = FFTW_REDFT01;
847 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
851 #pragma omp critical (nfft_omp_critical_fftw_plan)
853 fftw_plan_with_nthreads(nthreads);
856 fftw_plan_many_r2r(1, &set->
lengths[tau], 2, (
double*)set->work, NULL,
857 2, 1, (
double*)set->result, NULL, 2, 1, set->
kinds,
873 set->xc_slow = (
double*)
nfft_malloc((set->
N+1)*
sizeof(double));
874 set->temp = (
double _Complex*)
nfft_malloc((set->
N+1)*
sizeof(
double _Complex));
882 double *gam,
int k_start,
const double threshold)
905 const double *calpha;
907 const double *cgamma;
918 data = &(set->
dpt[m]);
921 if (data->
steps != NULL)
937 for (tau = 2; tau <= set->
t; tau++)
940 data->
alphaN[tau-2] = alpha[1<<tau];
941 data->
betaN[tau-2] = beta[1<<tau];
942 data->
gammaN[tau-2] = gam[1<<tau];
950 N_tilde = N_TILDE(set->
N);
957 for (tau = 1; tau < set->
t; tau++)
962 firstl =
FIRST_L(k_start_tilde,plength);
964 lastl =
LAST_L(N_tilde,plength);
972 for (l = firstl; l <= lastl; l++)
986 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
987 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
988 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
989 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
995 calpha = &(alpha[plength*l+1+1]);
996 cbeta = &(beta[plength*l+1+1]);
997 cgamma = &(gam[plength*l+1+1]);
1005 eval_clenshaw2(set->
xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1007 eval_clenshaw2(set->
xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1017 needstab = eval_clenshaw_thresh2(set->
xcvecs[tau-1], a11, a21, clength,
1018 degree-1, calpha, cbeta, cgamma, threshold);
1022 needstab = eval_clenshaw_thresh2(set->
xcvecs[tau-1], a12, a22, clength,
1023 degree, calpha, cbeta, cgamma, threshold);
1037 data->
steps[tau][l].a11[0] = a11;
1038 data->
steps[tau][l].a12[0] = a12;
1039 data->
steps[tau][l].a21[0] = a21;
1041 data->
steps[tau][l].g[0] = gam[plength*l+1+1];
1047 degree_stab = degree*(2*l+1);
1048 X(next_power_of_2_exp_int)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1062 plength_stab = N_stab;
1069 clength_1 = plength_stab;
1070 clength_2 = plength_stab;
1072 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1073 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1074 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1075 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1077 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1078 eval_clenshaw2(set->
xcvecs[t_stab-2], a11, a21, clength_1,
1079 clength_2, degree_stab-1, calpha, cbeta, cgamma);
1080 eval_clenshaw2(set->
xcvecs[t_stab-2], a12, a22, clength_1,
1081 clength_2, degree_stab+0, calpha, cbeta, cgamma);
1085 clength = plength_stab/2;
1088 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1089 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1092 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1093 eval_clenshaw(set->
xcvecs[t_stab-2], a11, clength,
1094 degree_stab-2, calpha, cbeta, cgamma);
1095 eval_clenshaw(set->
xcvecs[t_stab-2], a12, clength,
1096 degree_stab-1, calpha, cbeta, cgamma);
1102 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1103 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1104 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1105 eval_clenshaw(set->
xcvecs[t_stab-2], a21, clength,
1106 degree_stab-1,calpha, cbeta, cgamma);
1107 eval_clenshaw(set->
xcvecs[t_stab-2], a22, clength,
1108 degree_stab+0, calpha, cbeta, cgamma);
1114 clength_1 = plength_stab;
1115 clength_2 = plength_stab;
1116 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1117 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1118 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1119 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1120 calpha = &(alpha[2]);
1126 eval_clenshaw2(set->
xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1127 calpha, cbeta, cgamma);
1128 eval_clenshaw2(set->
xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1129 calpha, cbeta, cgamma);
1132 data->
steps[tau][l].a11[0] = a11;
1133 data->
steps[tau][l].a12[0] = a12;
1134 data->
steps[tau][l].a21[0] = a21;
1137 data->
steps[tau][l].g[0] = gam[1+1];
1139 data->
steps[tau][l].
ts = t_stab;
1140 data->
steps[tau][l].
Ns = N_stab;
1144 plength = plength << 1;
1153 data->
_alpha = (
double*) alpha;
1154 data->
_beta = (
double*) beta;
1155 data->
_gamma = (
double*) gam;
1162 memcpy(data->
_alpha,alpha,(set->
N+1)*
sizeof(
double));
1163 memcpy(data->
_beta,beta,(set->
N+1)*
sizeof(
double));
1164 memcpy(data->
_gamma,gam,(set->
N+1)*
sizeof(
double));
1169 void fpt_trafo_direct(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1170 const int k_end,
const unsigned int flags)
1180 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1193 for (j = 0; j <= k_end; j++)
1195 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1199 memset(set->result,0U,data->
k_start*
sizeof(
double _Complex));
1200 memcpy(&set->result[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1205 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1210 memset(set->temp,0U,data->
k_start*
sizeof(
double _Complex));
1211 memcpy(&set->temp[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1213 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->
xcvecs[tk-2],
1217 fftw_execute_r2r(set->
plans_dct2[tk-2],(
double*)set->result,
1218 (
double*)set->result);
1220 set->result[0] *= 0.5;
1221 for (j = 0; j < Nk; j++)
1223 set->result[j] *= norm;
1226 memcpy(y,set->result,(k_end+1)*
sizeof(
double _Complex));
1230 void fpt_trafo(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1231 const int k_end,
const unsigned int flags)
1261 int length = k_end+1;
1262 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1267 double _Complex *work_ptr;
1268 const double _Complex *x_ptr;
1271 if (k_end < FPT_BREAK_EVEN)
1274 fpt_trafo_direct(set, m, x, y, k_end, flags);
1278 X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1286 if (flags & FPT_FUNCTION_VALUES)
1289 int nthreads =
X(get_num_threads)();
1290 #pragma omp critical (nfft_omp_critical_fftw_plan)
1292 fftw_plan_with_nthreads(nthreads);
1294 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1295 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1302 memset(set->result,0U,2*Nk*
sizeof(
double _Complex));
1307 memset(set->work,0U,2*data->
k_start*
sizeof(
double _Complex));
1309 work_ptr = &set->work[2*data->
k_start];
1312 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1314 *work_ptr++ = *x_ptr++;
1315 *work_ptr++ = K(0.0);
1319 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*
sizeof(
double _Complex));
1325 set->work[2*(Nk-2)] += data->
gammaN[tk-2]*x[Nk-data->
k_start];
1326 set->work[2*(Nk-1)] += data->
betaN[tk-2]*x[Nk-data->
k_start];
1327 set->work[2*(Nk-1)+1] = data->
alphaN[tk-2]*x[Nk-data->
k_start];
1332 for (tau = 1; tau < tk; tau++)
1335 firstl =
FIRST_L(k_start_tilde,plength);
1337 lastl =
LAST_L(k_end_tilde,plength);
1340 for (l = firstl; l <= lastl; l++)
1343 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*
sizeof(
double _Complex));
1344 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*
sizeof(
double _Complex));
1345 memset(&set->vec3[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1346 memset(&set->vec4[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1349 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*
sizeof(
double _Complex));
1350 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*
sizeof(
double _Complex));
1351 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*
sizeof(
double _Complex));
1354 step = &(data->
steps[tau][l]);
1369 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1370 step->a12[0], step->a21[0], step->
a22[0], step->g[0], tau, set);
1375 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1376 step->a21[0], step->
a22[0], step->g[0], tau, set);
1379 if (step->g[0] != 0.0)
1381 for (k = 0; k < plength; k++)
1383 set->work[plength*2*l+k] += set->vec3[k];
1386 for (k = 0; k < plength; k++)
1388 set->work[plength*(2*l+1)+k] += set->vec4[k];
1396 plength_stab = step->
Ns;
1409 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1410 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1418 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1419 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1425 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1426 step->a21[0], step->
a22[0],
1427 set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1435 fpt_do_step_symmetric_l(set->vec3, set->vec4,
1436 step->a11[0], step->a12[0],
1438 step->
a22[0], set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1445 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1446 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1449 if (step->g[0] != 0.0)
1451 for (k = 0; k < plength_stab; k++)
1453 set->result[k] += set->vec3[k];
1457 for (k = 0; k < plength_stab; k++)
1459 set->result[Nk+k] += set->vec4[k];
1464 plength = plength<<1;
1478 for (k = 0; k < 2*Nk; k++)
1480 set->result[k] += set->work[k];
1485 y[0] = data->
gamma_m1*(set->result[0] + data->
beta_0*set->result[Nk] +
1486 data->
alpha_0*set->result[Nk+1]*0.5);
1487 y[1] = data->
gamma_m1*(set->result[1] + data->
beta_0*set->result[Nk+1]+
1488 data->
alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1489 y[k_end-1] = data->
gamma_m1*(set->result[k_end-1] +
1490 data->
beta_0*set->result[Nk+k_end-1] +
1491 data->
alpha_0*set->result[Nk+k_end-2]*0.5);
1492 y[k_end] = data->
gamma_m1*(0.5*data->
alpha_0*set->result[Nk+k_end-1]);
1493 for (k = 2; k <= k_end-2; k++)
1495 y[k] = data->
gamma_m1*(set->result[k] + data->
beta_0*set->result[Nk+k] +
1496 data->
alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1499 if (flags & FPT_FUNCTION_VALUES)
1502 fftw_execute_r2r(plan,(
double*)y,(
double*)y);
1504 #pragma omp critical (nfft_omp_critical_fftw_plan)
1506 fftw_destroy_plan(plan);
1507 for (k = 0; k <= k_end; k++)
1514 void fpt_transposed_direct(
fpt_set set,
const int m,
double _Complex *x,
1515 double _Complex *y,
const int k_end,
const unsigned int flags)
1523 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1531 if (flags & FPT_FUNCTION_VALUES)
1533 for (j = 0; j <= k_end; j++)
1535 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1543 sizeof(
double _Complex));
1547 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1548 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*
sizeof(
double _Complex));
1550 for (j = 0; j < Nk; j++)
1552 set->result[j] *= norm;
1555 fftw_execute_r2r(set->
plans_dct3[tk-2],(
double*)set->result,
1556 (
double*)set->result);
1562 memcpy(x,&set->temp[data->
k_start],(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1567 double _Complex *y,
const int k_end,
const unsigned int flags)
1596 int length = k_end+1;
1597 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1603 if (k_end < FPT_BREAK_EVEN)
1606 fpt_transposed_direct(set, m, x, y, k_end, flags);
1610 X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1620 if (flags & FPT_FUNCTION_VALUES)
1623 int nthreads =
X(get_num_threads)();
1624 #pragma omp critical (nfft_omp_critical_fftw_plan)
1626 fftw_plan_with_nthreads(nthreads);
1628 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1629 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1633 fftw_execute_r2r(plan,(
double*)y,(
double*)set->result);
1635 #pragma omp critical (nfft_omp_critical_fftw_plan)
1637 fftw_destroy_plan(plan);
1638 for (k = 0; k <= k_end; k++)
1640 set->result[k] *= 0.5;
1645 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1649 memset(set->work,0U,2*Nk*
sizeof(
double _Complex));
1652 for (k = 0; k <= k_end; k++)
1654 set->work[k] = data->
gamma_m1*set->result[k];
1659 data->
alpha_0*set->result[1]);
1660 for (k = 1; k < k_end; k++)
1663 data->
alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1667 memset(&set->work[k_end],0U,(Nk-k_end)*
sizeof(
double _Complex));
1671 memcpy(set->result,set->work,2*Nk*
sizeof(
double _Complex));
1675 for (tau = tk-1; tau >= 1; tau--)
1678 firstl =
FIRST_L(k_start_tilde,plength);
1680 lastl =
LAST_L(k_end_tilde,plength);
1683 for (l = firstl; l <= lastl; l++)
1686 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*
sizeof(
double _Complex));
1687 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*
sizeof(
double _Complex));
1689 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1690 (plength/2)*
sizeof(
double _Complex));
1693 step = &(data->
steps[tau][l]);
1701 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1702 step->a21[0], step->
a22[0], step->g[0], tau, set);
1707 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1708 step->a21[0], step->
a22[0], step->g[0], tau, set);
1710 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*
sizeof(
double _Complex));
1712 for (k = 0; k < plength; k++)
1714 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1720 plength_stab = step->
Ns;
1723 memcpy(set->vec3,set->result,plength_stab*
sizeof(
double _Complex));
1724 memcpy(set->vec4,&(set->result[Nk]),plength_stab*
sizeof(
double _Complex));
1731 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1732 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1736 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1737 set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1741 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1742 step->a21[0], step->
a22[0], set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1747 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1748 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1751 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*
sizeof(
double _Complex));
1753 for (k = 0; k < plength; k++)
1755 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1760 plength = plength>>1;
1764 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1766 x[k] = set->work[2*(data->
k_start+k)];
1771 data->
gammaN[tk-2]*set->work[2*(Nk-2)]
1772 + data->
betaN[tk-2] *set->work[2*(Nk-1)]
1773 + data->
alphaN[tk-2]*set->work[2*(Nk-1)+1];
1777 void fpt_finalize(
fpt_set set)
1787 const int M = set->
M;
1790 for (m = 0; m < M; m++)
1793 data = &set->
dpt[m];
1806 N_tilde = N_TILDE(set->
N);
1808 for (tau = 1; tau < set->
t; tau++)
1811 firstl =
FIRST_L(k_start_tilde,plength);
1813 lastl =
LAST_L(N_tilde,plength);
1816 for (l = firstl; l <= lastl; l++)
1823 data->
steps[tau][l].a11[0] = NULL;
1824 data->
steps[tau][l].a12[0] = NULL;
1825 data->
steps[tau][l].a21[0] = NULL;
1833 data->
steps[tau][l].a11 = NULL;
1834 data->
steps[tau][l].a12 = NULL;
1835 data->
steps[tau][l].a21 = NULL;
1837 data->
steps[tau][l].g = NULL;
1841 data->
steps[tau] = NULL;
1843 plength = plength<<1;
1870 for (tau = 1; tau < set->
t+1; tau++)
1873 set->
xcvecs[tau-1] = NULL;
1896 for(tau = 0; tau < set->
t; tau++)
1899 #pragma omp critical (nfft_omp_critical_fftw_plan)
1919 set->xc_slow = NULL;
Holds data for a single multiplication step in the cascade summation.
double * xc
Array for Chebychev-nodes.
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
bool stable
Indicates if the values contained represent a fast or a slow stabilized step.
double * alphaN
TODO Add comment here.
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
int M
The number of DPT transforms.
double * _alpha
< TODO Add comment here.
int * lengths
Transform lengths for fftw library.
double alpha_0
TODO Add comment here.
#define FPT_NO_STABILIZATION
If set, no stabilization will be used.
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
#define FPT_FUNCTION_VALUES
If set, the output are function values at Chebyshev nodes rather than Chebyshev coefficients.
Holds data for a set of cascade summations.
#define LAST_L(x, y)
Index of last block of four functions at level.
#define K_START_TILDE(x, y)
Minimum degree at top of a cascade.
fftw_r2r_kind * kinds
Transform kinds for fftw library.
fpt_data * dpt
The DPT transform data.
fftw_plan * plans_dct3
Transform plans for the fftw library.
double * gammaN
TODO Add comment here.
#define FPT_NO_FAST_ALGORITHM
If set, TODO complete comment.
#define X(name)
Include header for C99 complex datatype.
int N
The transform length.
Holds data for a single cascade summation.
fftw_plan * plans_dct2
Transform plans for the fftw library.
#define UNUSED(x)
Dummy use of unused parameters to silence compiler warnings.
struct fpt_set_s_ fpt_set_s
Holds data for a set of cascade summations.
int Ns
TODO Add comment here.
double * _beta
TODO Add comment here.
void * nfft_malloc(size_t n)
double beta_0
TODO Add comment here.
static void eval_sum_clenshaw_transposed(int N, int M, double _Complex *a, double *x, double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, double lambda)
Clenshaw algorithm.
double * _gamma
TODO Add comment here.
double ** a22
The matrix components.
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
fpt_step ** steps
The cascade summation steps.
double gamma_m1
TODO Add comment here.
Header file for the nfft3 library.
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
#define FIRST_L(x, y)
Index of first block of four functions at level.
#define FPT_NO_DIRECT_ALGORITHM
If set, TODO complete comment.
int k_start
TODO Add comment here.
int ts
TODO Add comment here.
double * betaN
TODO Add comment here.
struct fpt_data_ fpt_data
Holds data for a single cascade summation.
struct fpt_step_ fpt_step
Holds data for a single multiplication step in the cascade summation.
#define K_END_TILDE(x, y)
Maximum degree at top of a cascade.
double ** xcvecs
Array of pointers to arrays containing the Chebyshev nodes.
fftw_r2r_kind * kindsr
Transform kinds for fftw library.