NFFT  3.4.1
nfct.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* Nonequispaced fast cosine transform */
20 
21 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
22 
23 /* configure header */
24 #include "config.h"
25 
26 /* complex datatype (maybe) */
27 #ifdef HAVE_COMPLEX_H
28 #include<complex.h>
29 #endif
30 
31 /* NFFT headers */
32 #include "nfft3.h"
33 #include "infft.h"
34 
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 #ifdef OMP_ASSERT
40 #include <assert.h>
41 #endif
42 
43 #undef X
44 #define X(name) NFCT(name)
45 
47 static inline INT intprod(const INT *vec, const INT a, const INT d)
48 {
49  INT t, p;
50 
51  p = 1;
52  for (t = 0; t < d; t++)
53  p *= vec[t] - a;
54 
55  return p;
56 }
57 
58 /* handy shortcuts */
59 #define BASE(x) COS(x)
60 #define NN(x) (x - 1)
61 #define OFFSET 0
62 #define FOURIER_TRAFO FFTW_REDFT00
63 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
64 
65 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
66 
67 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
68 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
69 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
70  - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
71 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
72 
88 void X(trafo_direct)(const X(plan) *ths)
89 {
90  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
91 
92  memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
93 
94  if (ths->d == 1)
95  {
96  /* specialize for univariate case, rationale: faster */
97  INT j;
98 #ifdef _OPENMP
99  #pragma omp parallel for default(shared) private(j)
100 #endif
101  for (j = 0; j < ths->M_total; j++)
102  {
103  INT k_L;
104  for (k_L = 0; k_L < ths->N_total; k_L++)
105  {
106  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
107  f[j] += f_hat[k_L] * BASE(omega);
108  }
109  }
110  }
111  else
112  {
113  /* multivariate case */
114  INT j;
115 #ifdef _OPENMP
116  #pragma omp parallel for default(shared) private(j)
117 #endif
118  for (j = 0; j < ths->M_total; j++)
119  {
120  R x[ths->d], omega, Omega[ths->d + 1];
121  INT t, t2, k_L, k[ths->d];
122  Omega[0] = K(1.0);
123  for (t = 0; t < ths->d; t++)
124  {
125  k[t] = OFFSET;
126  x[t] = K2PI * ths->x[j * ths->d + t];
127  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
128  }
129  omega = Omega[ths->d];
130 
131  for (k_L = 0; k_L < ths->N_total; k_L++)
132  {
133  f[j] += f_hat[k_L] * omega;
134  {
135  for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
136  k[t] = OFFSET;
137 
138  k[t]++;
139 
140  for (t2 = t; t2 < ths->d; t2++)
141  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
142 
143  omega = Omega[ths->d];
144  }
145  }
146  }
147  }
148 }
149 
150 void X(adjoint_direct)(const X(plan) *ths)
151 {
152  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
153 
154  memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
155 
156  if (ths->d == 1)
157  {
158  /* specialize for univariate case, rationale: faster */
159 #ifdef _OPENMP
160  INT k_L;
161  #pragma omp parallel for default(shared) private(k_L)
162  for (k_L = 0; k_L < ths->N_total; k_L++)
163  {
164  INT j;
165  for (j = 0; j < ths->M_total; j++)
166  {
167  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
168  f_hat[k_L] += f[j] * BASE(omega);
169  }
170  }
171 #else
172  INT j;
173  for (j = 0; j < ths->M_total; j++)
174  {
175  INT k_L;
176  for (k_L = 0; k_L < ths->N_total; k_L++)
177  {
178  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
179  f_hat[k_L] += f[j] * BASE(omega);
180  }
181  }
182 #endif
183  }
184  else
185  {
186  /* multivariate case */
187  INT j, k_L;
188 #ifdef _OPENMP
189  #pragma omp parallel for default(shared) private(j, k_L)
190  for (k_L = 0; k_L < ths->N_total; k_L++)
191  {
192  INT k[ths->d], k_temp, t;
193 
194  k_temp = k_L;
195 
196  for (t = ths->d - 1; t >= 0; t--)
197  {
198  k[t] = k_temp % ths->N[t];
199  k_temp /= ths->N[t];
200  }
201 
202  for (j = 0; j < ths->M_total; j++)
203  {
204  R omega = K(1.0);
205  for (t = 0; t < ths->d; t++)
206  omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
207  f_hat[k_L] += f[j] * omega;
208  }
209  }
210 #else
211  for (j = 0; j < ths->M_total; j++)
212  {
213  R x[ths->d], omega, Omega[ths->d+1];
214  INT t, t2, k[ths->d];
215  Omega[0] = K(1.0);
216  for (t = 0; t < ths->d; t++)
217  {
218  k[t] = OFFSET;
219  x[t] = K2PI * ths->x[j * ths->d + t];
220  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
221  }
222  omega = Omega[ths->d];
223  for (k_L = 0; k_L < ths->N_total; k_L++)
224  {
225  f_hat[k_L] += f[j] * omega;
226 
227  for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
228  k[t] = OFFSET;
229 
230  k[t]++;
231 
232  for (t2 = t; t2 < ths->d; t2++)
233  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
234 
235  omega = Omega[ths->d];
236  }
237  }
238 #endif
239  }
240 }
241 
261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
262  const INT act_dim)
263 {
264  const R xj = ths->x[j * ths->d + act_dim];
265  INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
266 
267  (*up) = c - (ths->m);
268  (*op) = c + 1 + (ths->m);
269 }
270 
271 #define MACRO_D_compute_A \
272 { \
273  g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
274 }
275 
276 #define MACRO_D_compute_T \
277 { \
278  f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
279 }
280 
281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
282 
283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
284 
285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
286 
287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
288 
289 #define MACRO_init_k_ks \
290 { \
291  for (t = 0; t < ths->d; t++) \
292  { \
293  kg[t] = 0; \
294  } \
295  i = 0; \
296 }
297 
298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
299 { \
300  for (t = i; t < ths->d; t++) \
301  { \
302  MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
303  kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
304  } \
305 }
306 
307 #define MACRO_update_c_phi_inv_k_A(which_phi) \
308 { \
309  c_phi_inv_k[t+1] = (kg[t] == 0 ? K(1.0) : K(0.5)) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
310 }
311 
312 #define MACRO_update_c_phi_inv_k_T(which_phi) \
313 { \
314  c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
315 }
316 
317 #define MACRO_count_k_ks \
318 { \
319  kg[ths->d - 1]++; \
320  i = ths->d - 1; \
321 \
322  while ((kg[i] == ths->N[i]) && (i > 0)) \
323  { \
324  kg[i - 1]++; \
325  kg[i] = 0; \
326  i--; \
327  } \
328 }
329 
330 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */
331 #define MACRO_D(which_one) \
332 static inline void D_ ## which_one (X(plan) *ths) \
333 { \
334  R *g_hat, *f_hat; /* local copy */ \
335  R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
336  INT t; /* index dimensions */ \
337  INT i; \
338  INT k_L; /* plain index */ \
339  INT kg[ths->d]; /* multi index in g_hat */ \
340  INT kg_plain[ths->d+1]; /* postfix plain index */ \
341 \
342  f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
343  MACRO_D_init_result_ ## which_one; \
344 \
345  c_phi_inv_k[0] = K(1.0); \
346  kg_plain[0] = 0; \
347 \
348  MACRO_init_k_ks; \
349 \
350  if (ths->flags & PRE_PHI_HUT) \
351  { \
352  for (k_L = 0; k_L < ths->N_total; k_L++) \
353  { \
354  MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
355  MACRO_D_compute_ ## which_one; \
356  MACRO_count_k_ks; \
357  } \
358  } \
359  else \
360  { \
361  for (k_L = 0; k_L < ths->N_total; k_L++) \
362  { \
363  MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
364  MACRO_D_compute_ ## which_one; \
365  MACRO_count_k_ks; \
366  } \
367  } \
368 }
369 
370 MACRO_D(A)
371 MACRO_D(T)
372 
373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
376 
377 #define MACRO_B_PRE_FULL_PSI_compute_A \
378 { \
379  (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
380 }
381 
382 #define MACRO_B_PRE_FULL_PSI_compute_T \
383 { \
384  R factor = K(1.0); \
385  INT d = ths->psi_index_g[ix]; \
386  for (t = ths->d - 1; t >= 0; t--) \
387  { \
388  INT m = d % ths->n[t]; \
389  if (m != 0 && m != ths->n[t] - 1) \
390  factor *= K(0.5); \
391  d = d / ths->n[t]; \
392  } \
393  g[ths->psi_index_g[ix]] += factor * ths->psi[ix] * (*fj); \
394 }
395 
396 #define MACRO_B_compute_A \
397 { \
398  (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
399 }
400 
401 #define MACRO_B_compute_T \
402 { \
403  g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
404 }
405 
406 #define MACRO_init_uo_l_lj_t \
407 { \
408  for (t2 = 0; t2 < ths->d; t2++) \
409  { \
410  uo(ths, j, &u[t2], &o[t2], t2); \
411  \
412  /* determine index in g-array corresponding to u[(t2)] */ \
413  if (u[(t2)] < 0) \
414  lg_offset[(t2)] = \
415  (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
416  else \
417  lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
418  if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
419  lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
420  \
421  if (lg_offset[t2] <= 0) \
422  { \
423  l[t2] = -lg_offset[t2]; \
424  count_lg[t2] = -1; \
425  } \
426  else \
427  { \
428  l[t2] = +lg_offset[t2]; \
429  count_lg[t2] = +1; \
430  } \
431  \
432  lj[t2] = 0; \
433  } \
434  t2 = 0; \
435 }
436 
437 #define FOO_A K(1.0)
438 
439 #define FOO_T ((l[t] == 0 || l[t] == ths->n[t] - 1) ? K(1.0) : K(0.5))
440 
441 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
442 { \
443  for (t = t2; t < ths->d; t++) \
444  { \
445  phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
446  ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t]; \
447  } \
448 }
449 
450 #define MACRO_count_uo_l_lj_t \
451 { \
452  /* turn around if we hit one of the boundaries */ \
453  if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
454  count_lg[(ths->d-1)] *= -1; \
455  \
456  /* move array index */ \
457  l[(ths->d-1)] += count_lg[(ths->d-1)]; \
458  \
459  lj[ths->d - 1]++; \
460  t2 = ths->d - 1; \
461  \
462  while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
463  { \
464  lj[t2 - 1]++; \
465  lj[t2] = 0; \
466  /* ansonsten lg[i-1] verschieben */ \
467  \
468  /* turn around if we hit one of the boundaries */ \
469  if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
470  count_lg[(t2 - 1)] *= -1; \
471  /* move array index */ \
472  l[(t2 - 1)] += count_lg[(t2 - 1)]; \
473  \
474  /* lg[i] = anfangswert */ \
475  if (lg_offset[t2] <= 0) \
476  { \
477  l[t2] = -lg_offset[t2]; \
478  count_lg[t2] = -1; \
479  } \
480  else \
481  { \
482  l[t2] = +lg_offset[t2]; \
483  count_lg[t2] = +1; \
484  } \
485  \
486  t2--; \
487  } \
488 }
489 
490 #define MACRO_B(which_one) \
491 static inline void B_ ## which_one (X(plan) *ths) \
492 { \
493  INT lprod; /* 'regular bandwidth' of matrix B */ \
494  INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
495  INT t, t2; /* index dimensions */ \
496  INT j; /* index nodes */ \
497  INT l_L, ix; /* index one row of B */ \
498  INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
499  INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
500  INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
501  R phi_prod[ths->d+1]; /* postfix product of PHI */ \
502  R *f, *g; /* local copy */ \
503  R *fj; /* local copy */ \
504  R y[ths->d]; \
505  R fg_psi[ths->d][2*ths->m+2]; \
506  R fg_exp_l[ths->d][2*ths->m+2]; \
507  INT l_fg,lj_fg; \
508  R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
509  R ip_w; \
510  INT ip_u; \
511  INT ip_s = ths->K/(ths->m+2); \
512  INT lg_offset[ths->d]; /* offset in g according to u */ \
513  INT count_lg[ths->d]; /* count summands (2m+2) */ \
514 \
515  f = (R*)ths->f; g = (R*)ths->g; \
516 \
517  MACRO_B_init_result_ ## which_one \
518 \
519  if (ths->flags & PRE_FULL_PSI) \
520  { \
521  for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
522  { \
523  for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
524  { \
525  MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
526  } \
527  } \
528  return; \
529  } \
530 \
531  phi_prod[0] = K(1.0); \
532  ll_plain[0] = 0; \
533 \
534  for (t = 0, lprod = 1; t < ths->d; t++) \
535  lprod *= (2 * ths->m + 2); \
536 \
537  if (ths->flags & PRE_PSI) \
538  { \
539  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
540  { \
541  MACRO_init_uo_l_lj_t; \
542  \
543  for (l_L = 0; l_L < lprod; l_L++) \
544  { \
545  MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
546  \
547  MACRO_B_compute_ ## which_one; \
548  \
549  MACRO_count_uo_l_lj_t; \
550  } /* for(l_L) */ \
551  } /* for(j) */ \
552  return; \
553  } /* if(PRE_PSI) */ \
554  \
555  if (ths->flags & PRE_FG_PSI) \
556  { \
557  for (t = 0; t < ths->d; t++) \
558  { \
559  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
560  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
561  tmp2 = K(1.0); \
562  tmp3 = K(1.0); \
563  fg_exp_l[t][0] = K(1.0); \
564  \
565  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
566  { \
567  tmp3 = tmp2 * tmpEXP2; \
568  tmp2 *= tmpEXP2sq; \
569  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
570  } \
571  } \
572  \
573  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
574  { \
575  MACRO_init_uo_l_lj_t; \
576  \
577  for (t = 0; t < ths->d; t++) \
578  { \
579  fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
580  tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
581  tmp1 = K(1.0); \
582  \
583  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
584  { \
585  tmp1 *= tmpEXP1; \
586  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
587  } \
588  } \
589  \
590  for (l_L= 0; l_L < lprod; l_L++) \
591  { \
592  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
593  \
594  MACRO_B_compute_ ## which_one; \
595  \
596  MACRO_count_uo_l_lj_t; \
597  } \
598  } \
599  return; \
600  } \
601  \
602  if (ths->flags & FG_PSI) \
603  { \
604  for (t = 0; t < ths->d; t++) \
605  { \
606  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
607  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
608  tmp2 = K(1.0); \
609  tmp3 = K(1.0); \
610  fg_exp_l[t][0] = K(1.0); \
611  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
612  { \
613  tmp3 = tmp2 * tmpEXP2; \
614  tmp2 *= tmpEXP2sq; \
615  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
616  } \
617  } \
618  \
619  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
620  { \
621  MACRO_init_uo_l_lj_t; \
622  \
623  for (t = 0; t < ths->d; t++) \
624  { \
625  fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
626  \
627  tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
628  tmp1 = K(1.0); \
629  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
630  { \
631  tmp1 *= tmpEXP1; \
632  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
633  } \
634  } \
635  \
636  for (l_L = 0; l_L < lprod; l_L++) \
637  { \
638  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
639  \
640  MACRO_B_compute_ ## which_one; \
641  \
642  MACRO_count_uo_l_lj_t; \
643  } \
644  } \
645  return; \
646  } \
647  \
648  if (ths->flags & PRE_LIN_PSI) \
649  { \
650  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
651  { \
652  MACRO_init_uo_l_lj_t; \
653  \
654  for (t = 0; t < ths->d; t++) \
655  { \
656  y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
657  * ((R)ths->K))/(ths->m + 2); \
658  ip_u = LRINT(FLOOR(y[t])); \
659  ip_w = y[t]-ip_u; \
660  for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
661  { \
662  fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
663  * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
664  * (ip_w); \
665  } \
666  } \
667  \
668  for (l_L = 0; l_L < lprod; l_L++) \
669  { \
670  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
671  \
672  MACRO_B_compute_ ## which_one; \
673  \
674  MACRO_count_uo_l_lj_t; \
675  } /* for(l_L) */ \
676  } /* for(j) */ \
677  return; \
678  } /* if(PRE_LIN_PSI) */ \
679  \
680  /* no precomputed psi at all */ \
681  for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
682  { \
683  MACRO_init_uo_l_lj_t; \
684  \
685  for (l_L = 0; l_L < lprod; l_L++) \
686  { \
687  MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
688  \
689  MACRO_B_compute_ ## which_one; \
690  \
691  MACRO_count_uo_l_lj_t; \
692  } /* for (l_L) */ \
693  } /* for (j) */ \
694 } /* B */
695 
696 MACRO_B(A)
697 MACRO_B(T)
698 
702 void X(trafo)(X(plan) *ths)
703 {
704  switch(ths->d)
705  {
706  default:
707  {
708  /* use ths->my_fftw_r2r_plan */
709  ths->g_hat = ths->g1;
710  ths->g = ths->g2;
711 
712  /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
713  * k \in I_N \f$ */
714  TIC(0)
715  D_A(ths);
716  TOC(0)
717 
718  /* Compute by d-variate discrete Fourier transform
719  * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
720  * \text{ for } l \in I_n \f$ */
721  TIC_FFTW(1)
722  FFTW(execute)(ths->my_fftw_r2r_plan);
723  TOC_FFTW(1)
724 
725  /*if (ths->flags & PRE_FULL_PSI)
726  full_psi__A(ths);*/
727 
728  /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
729  * \text{ for } j=0,\dots,M-1 \f$ */
730  TIC(2)
731  B_A(ths);
732  TOC(2)
733 
734  /*if (ths->flags & PRE_FULL_PSI)
735  {
736  Y(free)(ths->psi_index_g);
737  Y(free)(ths->psi_index_f);
738  }*/
739  }
740  }
741 } /* trafo */
742 
743 void X(adjoint)(X(plan) *ths)
744 {
745  switch(ths->d)
746  {
747  default:
748  {
749  /* use ths->my_fftw_plan */
750  ths->g_hat = ths->g2;
751  ths->g = ths->g1;
752 
753  /*if (ths->flags & PRE_FULL_PSI)
754  full_psi__T(ths);*/
755 
756  /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
757  * \text{ for } l \in I_n,m(x_j) \f$ */
758  TIC(2)
759  B_T(ths);
760  TOC(2)
761 
762  /* Compute by d-variate discrete cosine transform
763  * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
764  * \text{ for } k \in I_N\f$ */
765  TIC_FFTW(1)
766  FFTW(execute)(ths->my_fftw_r2r_plan);
767  TOC_FFTW(1)
768 
769  /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
770  * k \in I_N \f$ */
771  TIC(0)
772  D_T(ths);
773  TOC(0)
774  }
775  }
776 } /* adjoint */
777 
780 static inline void precompute_phi_hut(X(plan) *ths)
781 {
782  INT ks[ths->d]; /* index over all frequencies */
783  INT t; /* index over all dimensions */
784 
785  ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
786 
787  for (t = 0; t < ths->d; t++)
788  {
789  ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
790 
791  for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
792  {
793  ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
794  }
795  }
796 } /* phi_hut */
797 
803 void X(precompute_lin_psi)(X(plan) *ths)
804 {
805  INT t;
806  INT j;
807  R step;
809  for (t = 0; t < ths->d; t++)
810  {
811  step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
812 
813  for (j = 0; j <= ths->K; j++)
814  {
815  ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
816  } /* for(j) */
817  } /* for(t) */
818 }
819 
820 void X(precompute_fg_psi)(X(plan) *ths)
821 {
822  INT t; /* index over all dimensions */
823  INT u, o; /* depends on x_j */
824 
825 // sort(ths);
826 
827  for (t = 0; t < ths->d; t++)
828  {
829  INT j;
830 // #pragma omp parallel for default(shared) private(j,u,o)
831  for (j = 0; j < ths->M_total; j++)
832  {
833  uo(ths, j, &u, &o, t);
834 
835  ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
836  ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
837  } /* for(j) */
838  }
839  /* for(t) */
840 } /* nfft_precompute_fg_psi */
841 
842 void X(precompute_psi)(X(plan) *ths)
843 {
844  INT t; /* index over all dimensions */
845  INT lj; /* index 0<=lj<u+o+1 */
846  INT u, o; /* depends on x_j */
847 
848  //sort(ths);
849 
850  for (t = 0; t < ths->d; t++)
851  {
852  INT j;
853 
854  for (j = 0; j < ths->M_total; j++)
855  {
856  uo(ths, j, &u, &o, t);
857 
858  for(lj = 0; lj < (2 * ths->m + 2); lj++)
859  ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
860  (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
861  } /* for (j) */
862  } /* for (t) */
863 } /* precompute_psi */
864 
865 void X(precompute_full_psi)(X(plan) *ths)
866 {
867 //#ifdef _OPENMP
868 // sort(ths);
869 //
870 // nfft_precompute_full_psi_omp(ths);
871 //#else
872  INT t, t2; /* index over all dimensions */
873  INT j; /* index over all nodes */
874  INT l_L; /* plain index 0 <= l_L < lprod */
875  INT l[ths->d]; /* multi index u<=l<=o */
876  INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
877  INT ll_plain[ths->d+1]; /* postfix plain index */
878  INT lprod; /* 'bandwidth' of matrix B */
879  INT u[ths->d], o[ths->d]; /* depends on x_j */
880  INT count_lg[ths->d];
881  INT lg_offset[ths->d];
882 
883  R phi_prod[ths->d+1];
884 
885  INT ix, ix_old;
886 
887  //sort(ths);
888 
889  phi_prod[0] = K(1.0);
890  ll_plain[0] = 0;
891 
892  for (t = 0, lprod = 1; t < ths->d; t++)
893  lprod *= 2 * ths->m + 2;
894 
895  for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
896  {
897  MACRO_init_uo_l_lj_t;
898 
899  for (l_L = 0; l_L < lprod; l_L++, ix++)
900  {
901  MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
902 
903  ths->psi_index_g[ix] = ll_plain[ths->d];
904  ths->psi[ix] = phi_prod[ths->d];
905 
906  MACRO_count_uo_l_lj_t;
907  } /* for (l_L) */
908 
909  ths->psi_index_f[j] = ix - ix_old;
910  ix_old = ix;
911  } /* for(j) */
912 //#endif
913 }
914 
915 void X(precompute_one_psi)(X(plan) *ths)
916 {
917  if(ths->flags & PRE_PSI)
918  X(precompute_psi)(ths);
919  if(ths->flags & PRE_FULL_PSI)
920  X(precompute_full_psi)(ths);
921  if(ths->flags & PRE_FG_PSI)
922  X(precompute_fg_psi)(ths);
923  if(ths->flags & PRE_LIN_PSI)
924  X(precompute_lin_psi)(ths);
925 }
926 
927 static inline void init_help(X(plan) *ths)
928 {
929  INT t; /* index over all dimensions */
930  INT lprod; /* 'bandwidth' of matrix B */
931 
932  if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
933  ths->flags |= NFFT_SORT_NODES;
934 
935  ths->N_total = intprod(ths->N, OFFSET, ths->d);
936  ths->n_total = intprod(ths->n, 0, ths->d);
937 
938  ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
939 
940  for (t = 0; t < ths->d; t++)
941  ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
942 
943  /* Assign r2r transform kinds for each dimension */
944  ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
945  for (t = 0; t < ths->d; t++)
946  ths->r2r_kind[t] = FOURIER_TRAFO;
947 
948  WINDOW_HELP_INIT;
949 
950  if (ths->flags & MALLOC_X)
951  ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
952 
953  if (ths->flags & MALLOC_F_HAT)
954  ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
955 
956  if (ths->flags & MALLOC_F)
957  ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
958 
959  if (ths->flags & PRE_PHI_HUT)
960  precompute_phi_hut(ths);
961 
962  if(ths->flags & PRE_LIN_PSI)
963  {
964  ths->K = (1U<< 10) * (ths->m+2);
965  ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
966  }
967 
968  if(ths->flags & PRE_FG_PSI)
969  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
970 
971  if (ths->flags & PRE_PSI)
972  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
973 
974  if(ths->flags & PRE_FULL_PSI)
975  {
976  for (t = 0, lprod = 1; t < ths->d; t++)
977  lprod *= 2 * ths->m + 2;
978 
979  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
980 
981  ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
982  ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
983  }
984 
985  if (ths->flags & FFTW_INIT)
986  {
987  ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
988 
989  if (ths->flags & FFT_OUT_OF_PLACE)
990  ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
991  else
992  ths->g2 = ths->g1;
993 
994  {
995  int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
996 
997  for (t = 0; t < ths->d; t++)
998  _n[t] = (int)(ths->n[t]);
999 
1000  ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1001  Y(free)(_n);
1002  }
1003  }
1004 
1005 // if(ths->flags & NFFT_SORT_NODES)
1006 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1007 // else
1008 // ths->index_x = NULL;
1009 
1010  ths->mv_trafo = (void (*) (void* ))X(trafo);
1011  ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1012 }
1013 
1014 void X(init)(X(plan) *ths, int d, int *N, int M_total)
1015 {
1016  int t; /* index over all dimensions */
1017 
1018  ths->d = (INT)d;
1019 
1020  ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1021 
1022  for (t = 0; t < d; t++)
1023  ths->N[t] = (INT)N[t];
1024 
1025  ths->M_total = (INT)M_total;
1026 
1027  ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1028 
1029  for (t = 0; t < d; t++)
1030  ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1031 
1032  ths->m = WINDOW_HELP_ESTIMATE_m;
1033 
1034  if (d > 1)
1035  {
1036 //#ifdef _OPENMP
1037 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1038 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
1039 // NFFT_OMP_BLOCKWISE_ADJOINT;
1040 //#else
1041  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1042  FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
1043 //#endif
1044  }
1045  else
1046  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1048 
1049  ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1050 
1051  init_help(ths);
1052 }
1053 
1054 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1055  unsigned flags, unsigned fftw_flags)
1056 {
1057  INT t; /* index over all dimensions */
1058 
1059  ths->d = (INT)d;
1060  ths->M_total = (INT)M_total;
1061  ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1062 
1063  for (t = 0; t < d; t++)
1064  ths->N[t] = (INT)N[t];
1065 
1066  ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1067 
1068  for (t = 0; t < d; t++)
1069  ths->n[t] = (INT)n[t];
1070 
1071  ths->m = (INT)m;
1072 
1073  ths->flags = flags;
1074  ths->fftw_flags = fftw_flags;
1075 
1076  init_help(ths);
1077 }
1078 
1079 void X(init_1d)(X(plan) *ths, int N1, int M_total)
1080 {
1081  int N[1];
1082 
1083  N[0] = N1;
1084 
1085  X(init)(ths, 1, N, M_total);
1086 }
1087 
1088 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1089 {
1090  int N[2];
1091 
1092  N[0] = N1;
1093  N[1] = N2;
1094 
1095  X(init)(ths, 2, N, M_total);
1096 }
1097 
1098 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1099 {
1100  int N[3];
1101 
1102  N[0] = N1;
1103  N[1] = N2;
1104  N[2] = N3;
1105 
1106  X(init)(ths, 3, N, M_total);
1107 }
1108 
1109 const char* X(check)(X(plan) *ths)
1110 {
1111  INT j;
1112 
1113  if (!ths->f)
1114  return "Member f not initialized.";
1115 
1116  if (!ths->x)
1117  return "Member x not initialized.";
1118 
1119  if (!ths->f_hat)
1120  return "Member f_hat not initialized.";
1121 
1122  for (j = 0; j < ths->M_total * ths->d; j++)
1123  {
1124  if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1125  {
1126  return "ths->x out of range [0.0,0.5)";
1127  }
1128  }
1129 
1130  for (j = 0; j < ths->d; j++)
1131  {
1132  if (ths->sigma[j] <= 1)
1133  return "Oversampling factor too small";
1134 
1135  if(ths->N[j] - 1 <= ths->m)
1136  return "Polynomial degree N is smaller than cut-off m";
1137 
1138  if(ths->N[j]%2 == 1)
1139  return "polynomial degree N has to be even";
1140  }
1141  return 0;
1142 }
1143 
1144 void X(finalize)(X(plan) *ths)
1145 {
1146  INT t; /* index over dimensions */
1147 
1148 // if(ths->flags & NFFT_SORT_NODES)
1149 // Y(free)(ths->index_x);
1150 
1151  if (ths->flags & FFTW_INIT)
1152  {
1153 #ifdef _OPENMP
1154  #pragma omp critical (nfft_omp_critical_fftw_plan)
1155 #endif
1156  FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1157 
1158  if (ths->flags & FFT_OUT_OF_PLACE)
1159  Y(free)(ths->g2);
1160 
1161  Y(free)(ths->g1);
1162  }
1163 
1164  if(ths->flags & PRE_FULL_PSI)
1165  {
1166  Y(free)(ths->psi_index_g);
1167  Y(free)(ths->psi_index_f);
1168  Y(free)(ths->psi);
1169  }
1170 
1171  if (ths->flags & PRE_PSI)
1172  Y(free)(ths->psi);
1173 
1174  if(ths->flags & PRE_FG_PSI)
1175  Y(free)(ths->psi);
1176 
1177  if(ths->flags & PRE_LIN_PSI)
1178  Y(free)(ths->psi);
1179 
1180  if (ths->flags & PRE_PHI_HUT)
1181  {
1182  for (t = 0; t < ths->d; t++)
1183  Y(free)(ths->c_phi_inv[t]);
1184  Y(free)(ths->c_phi_inv);
1185  }
1186 
1187  if (ths->flags & MALLOC_F)
1188  Y(free)(ths->f);
1189 
1190  if(ths->flags & MALLOC_F_HAT)
1191  Y(free)(ths->f_hat);
1192 
1193  if (ths->flags & MALLOC_X)
1194  Y(free)(ths->x);
1195 
1196  WINDOW_HELP_FINALIZE;
1197 
1198  Y(free)(ths->N);
1199  Y(free)(ths->n);
1200  Y(free)(ths->sigma);
1201 
1202  Y(free)(ths->r2r_kind);
1203 } /* finalize */
#define TIC(a)
Timing, method works since the inaccurate timer is updated mostly in the measured function...
Definition: infft.h:1397
#define PRE_FG_PSI
Definition: nfft3.h:196
#define MALLOC_X
Definition: nfft3.h:199
#define MALLOC_F_HAT
Definition: nfft3.h:200
#define FFTW_INIT
Definition: nfft3.h:203
#define MALLOC_F
Definition: nfft3.h:201
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:202
#define PRE_LIN_PSI
Definition: nfft3.h:195
#define PRE_PSI
Definition: nfft3.h:197
#define PRE_FULL_PSI
Definition: nfft3.h:198
Header file for the nfft3 library.
#define PRE_PHI_HUT
Definition: nfft3.h:193