NFFT  3.4.1
nfst.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* Nonequispaced fast cosine transform */
20 
21 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
22 
23 /* configure header */
24 #include "config.h"
25 
26 /* complex datatype (maybe) */
27 #ifdef HAVE_COMPLEX_H
28 #include<complex.h>
29 #endif
30 
31 /* NFFT headers */
32 #include "nfft3.h"
33 #include "infft.h"
34 
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 #ifdef OMP_ASSERT
40 #include <assert.h>
41 #endif
42 
43 #undef X
44 #define X(name) NFST(name)
45 
47 static inline INT intprod(const INT *vec, const INT a, const INT d)
48 {
49  INT t, p;
50 
51  p = 1;
52  for (t = 0; t < d; t++)
53  p *= vec[t] - a;
54 
55  return p;
56 }
57 
58 /* handy shortcuts */
59 #define BASE(x) SIN(x)
60 #define NN(x) (x + 1)
61 #define OFFSET 1
62 #define FOURIER_TRAFO FFTW_RODFT00
63 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
64 
65 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
66 
67 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
68 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
69 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
70  - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
71 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
72 
88 void X(trafo_direct)(const X(plan) *ths)
89 {
90  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
91 
92  memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
93 
94  if (ths->d == 1)
95  {
96  /* specialize for univariate case, rationale: faster */
97  INT j;
98 #ifdef _OPENMP
99  #pragma omp parallel for default(shared) private(j)
100 #endif
101  for (j = 0; j < ths->M_total; j++)
102  {
103  INT k_L;
104  for (k_L = 0; k_L < ths->N_total; k_L++)
105  {
106  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
107  f[j] += f_hat[k_L] * BASE(omega);
108  }
109  }
110  }
111  else
112  {
113  /* multivariate case */
114  INT j;
115 #ifdef _OPENMP
116  #pragma omp parallel for default(shared) private(j)
117 #endif
118  for (j = 0; j < ths->M_total; j++)
119  {
120  R x[ths->d], omega, Omega[ths->d + 1];
121  INT t, t2, k_L, k[ths->d];
122  Omega[0] = K(1.0);
123  for (t = 0; t < ths->d; t++)
124  {
125  k[t] = OFFSET;
126  x[t] = K2PI * ths->x[j * ths->d + t];
127  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
128  }
129  omega = Omega[ths->d];
130 
131  for (k_L = 0; k_L < ths->N_total; k_L++)
132  {
133  f[j] += f_hat[k_L] * omega;
134  {
135  for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
136  k[t] = OFFSET;
137 
138  k[t]++;
139 
140  for (t2 = t; t2 < ths->d; t2++)
141  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
142 
143  omega = Omega[ths->d];
144  }
145  }
146  }
147  }
148 }
149 
150 void X(adjoint_direct)(const X(plan) *ths)
151 {
152  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
153 
154  memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
155 
156  if (ths->d == 1)
157  {
158  /* specialize for univariate case, rationale: faster */
159 #ifdef _OPENMP
160  INT k_L;
161  #pragma omp parallel for default(shared) private(k_L)
162  for (k_L = 0; k_L < ths->N_total; k_L++)
163  {
164  INT j;
165  for (j = 0; j < ths->M_total; j++)
166  {
167  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
168  f_hat[k_L] += f[j] * BASE(omega);
169  }
170  }
171 #else
172  INT j;
173  for (j = 0; j < ths->M_total; j++)
174  {
175  INT k_L;
176  for (k_L = 0; k_L < ths->N_total; k_L++)
177  {
178  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
179  f_hat[k_L] += f[j] * BASE(omega);
180  }
181  }
182 #endif
183  }
184  else
185  {
186  /* multivariate case */
187  INT j, k_L;
188 #ifdef _OPENMP
189  #pragma omp parallel for default(shared) private(j, k_L)
190  for (k_L = 0; k_L < ths->N_total; k_L++)
191  {
192  INT k[ths->d], k_temp, t;
193 
194  k_temp = k_L;
195 
196  for (t = ths->d - 1; t >= 0; t--)
197  {
198  k[t] = k_temp % ths->N[t];
199  k_temp /= ths->N[t];
200  }
201 
202  for (j = 0; j < ths->M_total; j++)
203  {
204  R omega = K(1.0);
205  for (t = 0; t < ths->d; t++)
206  omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
207  f_hat[k_L] += f[j] * omega;
208  }
209  }
210 #else
211  for (j = 0; j < ths->M_total; j++)
212  {
213  R x[ths->d], omega, Omega[ths->d+1];
214  INT t, t2, k[ths->d];
215  Omega[0] = K(1.0);
216  for (t = 0; t < ths->d; t++)
217  {
218  k[t] = OFFSET;
219  x[t] = K2PI * ths->x[j * ths->d + t];
220  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
221  }
222  omega = Omega[ths->d];
223  for (k_L = 0; k_L < ths->N_total; k_L++)
224  {
225  f_hat[k_L] += f[j] * omega;
226 
227  for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
228  k[t] = OFFSET;
229 
230  k[t]++;
231 
232  for (t2 = t; t2 < ths->d; t2++)
233  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
234 
235  omega = Omega[ths->d];
236  }
237  }
238 #endif
239  }
240 }
241 
261 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
262  const INT act_dim)
263 {
264  const R xj = ths->x[j * ths->d + act_dim];
265  INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
266 
267  (*up) = c - (ths->m);
268  (*op) = c + 1 + (ths->m);
269 }
270 
271 #define MACRO_D_compute_A \
272 { \
273  g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
274 }
275 
276 #define MACRO_D_compute_T \
277 { \
278  f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
279 }
280 
281 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
282 
283 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
284 
285 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
286 
287 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
288 
289 #define MACRO_init_k_ks \
290 { \
291  for (t = 0; t < ths->d; t++) \
292  { \
293  kg[t] = 0; \
294  } \
295  i = 0; \
296 }
297 
298 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
299 { \
300  for (t = i; t < ths->d; t++) \
301  { \
302  MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
303  kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
304  } \
305 }
306 
307 #define MACRO_update_c_phi_inv_k_A(which_phi) \
308 { \
309  c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
310 }
311 
312 #define MACRO_update_c_phi_inv_k_T(which_phi) \
313 { \
314  c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
315 }
316 
317 #define MACRO_count_k_ks \
318 { \
319  kg[ths->d - 1]++; \
320  i = ths->d - 1; \
321 \
322  while ((kg[i] == ths->N[i] - 1) && (i > 0)) \
323  { \
324  kg[i - 1]++; \
325  kg[i] = 0; \
326  i--; \
327  } \
328 }
329 
330 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */
331 #define MACRO_D(which_one) \
332 static inline void D_ ## which_one (X(plan) *ths) \
333 { \
334  R *g_hat, *f_hat; /* local copy */ \
335  R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
336  INT t; /* index dimensions */ \
337  INT i; \
338  INT k_L; /* plain index */ \
339  INT kg[ths->d]; /* multi index in g_hat */ \
340  INT kg_plain[ths->d+1]; /* postfix plain index */ \
341 \
342  f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
343  MACRO_D_init_result_ ## which_one; \
344 \
345  c_phi_inv_k[0] = K(1.0); \
346  kg_plain[0] = 0; \
347 \
348  MACRO_init_k_ks; \
349 \
350  if (ths->flags & PRE_PHI_HUT) \
351  { \
352  for (k_L = 0; k_L < ths->N_total; k_L++) \
353  { \
354  MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
355  MACRO_D_compute_ ## which_one; \
356  MACRO_count_k_ks; \
357  } \
358  } \
359  else \
360  { \
361  for (k_L = 0; k_L < ths->N_total; k_L++) \
362  { \
363  MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
364  MACRO_D_compute_ ## which_one; \
365  MACRO_count_k_ks; \
366  } \
367  } \
368 }
369 
370 MACRO_D(A)
371 MACRO_D(T)
372 
373 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
374 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
375 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
376 
377 #define MACRO_B_PRE_FULL_PSI_compute_A \
378 { \
379  (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
380 }
381 
382 #define MACRO_B_PRE_FULL_PSI_compute_T \
383 { \
384  g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
385 }
386 
387 #define MACRO_B_compute_A \
388 { \
389  (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
390 }
391 
392 #define MACRO_B_compute_T \
393 { \
394  g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
395 }
396 
397 #define MACRO_init_uo_l_lj_t \
398 { \
399  for (t2 = 0; t2 < ths->d; t2++) \
400  { \
401  uo(ths, j, &u[t2], &o[t2], t2); \
402  \
403  /* determine index in g-array corresponding to u[(t2)] */ \
404  if (u[(t2)] < 0) \
405  lg_offset[(t2)] = \
406  (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
407  else \
408  lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
409  if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
410  lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
411  \
412  if (lg_offset[t2] <= 0) \
413  { \
414  l[t2] = -lg_offset[t2]; \
415  count_lg[t2] = -1; \
416  } \
417  else \
418  { \
419  l[t2] = +lg_offset[t2]; \
420  count_lg[t2] = +1; \
421  } \
422  \
423  lj[t2] = 0; \
424  } \
425  t2 = 0; \
426 }
427 
428 #define FOO_A ((R)count_lg[t])
429 
430 #define FOO_T ((R)count_lg[t])
431 
432 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
433 { \
434  for (t = t2; t < ths->d; t++) \
435  { \
436  if ((l[t] != 0) && (l[t] != NN(ths->n[t]))) \
437  { \
438  phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
439  ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t] - 1; \
440  } \
441  else \
442  { \
443  phi_prod[t + 1] = K(0.0); \
444  ll_plain[t+1] = ll_plain[t] * ths->n[t]; \
445  } \
446  } \
447 }
448 
449 #define MACRO_count_uo_l_lj_t \
450 { \
451  /* turn around if we hit one of the boundaries */ \
452  if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
453  count_lg[(ths->d-1)] *= -1; \
454  \
455  /* move array index */ \
456  l[(ths->d-1)] += count_lg[(ths->d-1)]; \
457  \
458  lj[ths->d - 1]++; \
459  t2 = ths->d - 1; \
460  \
461  while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
462  { \
463  lj[t2 - 1]++; \
464  lj[t2] = 0; \
465  /* ansonsten lg[i-1] verschieben */ \
466  \
467  /* turn around if we hit one of the boundaries */ \
468  if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
469  count_lg[(t2 - 1)] *= -1; \
470  /* move array index */ \
471  l[(t2 - 1)] += count_lg[(t2 - 1)]; \
472  \
473  /* lg[i] = anfangswert */ \
474  if (lg_offset[t2] <= 0) \
475  { \
476  l[t2] = -lg_offset[t2]; \
477  count_lg[t2] = -1; \
478  } \
479  else \
480  { \
481  l[t2] = +lg_offset[t2]; \
482  count_lg[t2] = +1; \
483  } \
484  \
485  t2--; \
486  } \
487 }
488 
489 #define MACRO_B(which_one) \
490 static inline void B_ ## which_one (X(plan) *ths) \
491 { \
492  INT lprod; /* 'regular bandwidth' of matrix B */ \
493  INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
494  INT t, t2; /* index dimensions */ \
495  INT j; /* index nodes */ \
496  INT l_L, ix; /* index one row of B */ \
497  INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
498  INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
499  INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
500  R phi_prod[ths->d+1]; /* postfix product of PHI */ \
501  R *f, *g; /* local copy */ \
502  R *fj; /* local copy */ \
503  R y[ths->d]; \
504  R fg_psi[ths->d][2*ths->m+2]; \
505  R fg_exp_l[ths->d][2*ths->m+2]; \
506  INT l_fg,lj_fg; \
507  R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
508  R ip_w; \
509  INT ip_u; \
510  INT ip_s = ths->K/(ths->m+2); \
511  INT lg_offset[ths->d]; /* offset in g according to u */ \
512  INT count_lg[ths->d]; /* count summands (2m+2) */ \
513 \
514  f = (R*)ths->f; g = (R*)ths->g; \
515 \
516  MACRO_B_init_result_ ## which_one \
517 \
518  if (ths->flags & PRE_FULL_PSI) \
519  { \
520  for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
521  { \
522  for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
523  { \
524  MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
525  } \
526  } \
527  return; \
528  } \
529 \
530  phi_prod[0] = K(1.0); \
531  ll_plain[0] = 0; \
532 \
533  for (t = 0, lprod = 1; t < ths->d; t++) \
534  lprod *= (2 * ths->m + 2); \
535 \
536  if (ths->flags & PRE_PSI) \
537  { \
538  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
539  { \
540  MACRO_init_uo_l_lj_t; \
541  \
542  for (l_L = 0; l_L < lprod; l_L++) \
543  { \
544  MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
545  \
546  MACRO_B_compute_ ## which_one; \
547  \
548  MACRO_count_uo_l_lj_t; \
549  } /* for(l_L) */ \
550  } /* for(j) */ \
551  return; \
552  } /* if(PRE_PSI) */ \
553  \
554  if (ths->flags & PRE_FG_PSI) \
555  { \
556  for (t = 0; t < ths->d; t++) \
557  { \
558  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
559  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
560  tmp2 = K(1.0); \
561  tmp3 = K(1.0); \
562  fg_exp_l[t][0] = K(1.0); \
563  \
564  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
565  { \
566  tmp3 = tmp2 * tmpEXP2; \
567  tmp2 *= tmpEXP2sq; \
568  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
569  } \
570  } \
571  \
572  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
573  { \
574  MACRO_init_uo_l_lj_t; \
575  \
576  for (t = 0; t < ths->d; t++) \
577  { \
578  fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
579  tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
580  tmp1 = K(1.0); \
581  \
582  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
583  { \
584  tmp1 *= tmpEXP1; \
585  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
586  } \
587  } \
588  \
589  for (l_L= 0; l_L < lprod; l_L++) \
590  { \
591  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
592  \
593  MACRO_B_compute_ ## which_one; \
594  \
595  MACRO_count_uo_l_lj_t; \
596  } \
597  } \
598  return; \
599  } \
600  \
601  if (ths->flags & FG_PSI) \
602  { \
603  for (t = 0; t < ths->d; t++) \
604  { \
605  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
606  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
607  tmp2 = K(1.0); \
608  tmp3 = K(1.0); \
609  fg_exp_l[t][0] = K(1.0); \
610  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
611  { \
612  tmp3 = tmp2 * tmpEXP2; \
613  tmp2 *= tmpEXP2sq; \
614  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
615  } \
616  } \
617  \
618  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
619  { \
620  MACRO_init_uo_l_lj_t; \
621  \
622  for (t = 0; t < ths->d; t++) \
623  { \
624  fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
625  \
626  tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
627  tmp1 = K(1.0); \
628  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
629  { \
630  tmp1 *= tmpEXP1; \
631  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
632  } \
633  } \
634  \
635  for (l_L = 0; l_L < lprod; l_L++) \
636  { \
637  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
638  \
639  MACRO_B_compute_ ## which_one; \
640  \
641  MACRO_count_uo_l_lj_t; \
642  } \
643  } \
644  return; \
645  } \
646  \
647  if (ths->flags & PRE_LIN_PSI) \
648  { \
649  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
650  { \
651  MACRO_init_uo_l_lj_t; \
652  \
653  for (t = 0; t < ths->d; t++) \
654  { \
655  y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
656  * ((R)ths->K))/(ths->m + 2); \
657  ip_u = LRINT(FLOOR(y[t])); \
658  ip_w = y[t]-ip_u; \
659  for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
660  { \
661  fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
662  * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
663  * (ip_w); \
664  } \
665  } \
666  \
667  for (l_L = 0; l_L < lprod; l_L++) \
668  { \
669  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
670  \
671  MACRO_B_compute_ ## which_one; \
672  \
673  MACRO_count_uo_l_lj_t; \
674  } /* for(l_L) */ \
675  } /* for(j) */ \
676  return; \
677  } /* if(PRE_LIN_PSI) */ \
678  \
679  /* no precomputed psi at all */ \
680  for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
681  { \
682  MACRO_init_uo_l_lj_t; \
683  \
684  for (l_L = 0; l_L < lprod; l_L++) \
685  { \
686  MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
687  \
688  MACRO_B_compute_ ## which_one; \
689  \
690  MACRO_count_uo_l_lj_t; \
691  } /* for (l_L) */ \
692  } /* for (j) */ \
693 } /* B */
694 
695 MACRO_B(A)
696 MACRO_B(T)
697 
701 void X(trafo)(X(plan) *ths)
702 {
703  switch(ths->d)
704  {
705  default:
706  {
707  /* use ths->my_fftw_r2r_plan */
708  ths->g_hat = ths->g1;
709  ths->g = ths->g2;
710 
711  /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
712  * k \in I_N \f$ */
713  TIC(0)
714  D_A(ths);
715  TOC(0)
716 
717  /* Compute by d-variate discrete Fourier transform
718  * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
719  * \text{ for } l \in I_n \f$ */
720  TIC_FFTW(1)
721  FFTW(execute)(ths->my_fftw_r2r_plan);
722  TOC_FFTW(1)
723 
724  /*if (ths->flags & PRE_FULL_PSI)
725  full_psi__A(ths);*/
726 
727  /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
728  * \text{ for } j=0,\dots,M-1 \f$ */
729  TIC(2)
730  B_A(ths);
731  TOC(2)
732 
733  /*if (ths->flags & PRE_FULL_PSI)
734  {
735  Y(free)(ths->psi_index_g);
736  Y(free)(ths->psi_index_f);
737  }*/
738  }
739  }
740 } /* trafo */
741 
742 void X(adjoint)(X(plan) *ths)
743 {
744  switch(ths->d)
745  {
746  default:
747  {
748  /* use ths->my_fftw_plan */
749  ths->g_hat = ths->g2;
750  ths->g = ths->g1;
751 
752  /*if (ths->flags & PRE_FULL_PSI)
753  full_psi__T(ths);*/
754 
755  /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
756  * \text{ for } l \in I_n,m(x_j) \f$ */
757  TIC(2)
758  B_T(ths);
759  TOC(2)
760 
761  /* Compute by d-variate discrete cosine transform
762  * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
763  * \text{ for } k \in I_N\f$ */
764  TIC_FFTW(1)
765  FFTW(execute)(ths->my_fftw_r2r_plan);
766  TOC_FFTW(1)
767 
768  /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
769  * k \in I_N \f$ */
770  TIC(0)
771  D_T(ths);
772  TOC(0)
773  }
774  }
775 } /* adjoint */
776 
779 static inline void precompute_phi_hut(X(plan) *ths)
780 {
781  INT ks[ths->d]; /* index over all frequencies */
782  INT t; /* index over all dimensions */
783 
784  ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
785 
786  for (t = 0; t < ths->d; t++)
787  {
788  ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
789 
790  for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
791  {
792  ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
793  }
794  }
795 } /* phi_hut */
796 
802 void X(precompute_lin_psi)(X(plan) *ths)
803 {
804  INT t;
805  INT j;
806  R step;
808  for (t = 0; t < ths->d; t++)
809  {
810  step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
811 
812  for (j = 0; j <= ths->K; j++)
813  {
814  ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
815  } /* for(j) */
816  } /* for(t) */
817 }
818 
819 void X(precompute_fg_psi)(X(plan) *ths)
820 {
821  INT t; /* index over all dimensions */
822  INT u, o; /* depends on x_j */
823 
824 // sort(ths);
825 
826  for (t = 0; t < ths->d; t++)
827  {
828  INT j;
829 // #pragma omp parallel for default(shared) private(j,u,o)
830  for (j = 0; j < ths->M_total; j++)
831  {
832  uo(ths, j, &u, &o, t);
833 
834  ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
835  ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
836  } /* for(j) */
837  }
838  /* for(t) */
839 } /* nfft_precompute_fg_psi */
840 
841 void X(precompute_psi)(X(plan) *ths)
842 {
843  INT t; /* index over all dimensions */
844  INT lj; /* index 0<=lj<u+o+1 */
845  INT u, o; /* depends on x_j */
846 
847  //sort(ths);
848 
849  for (t = 0; t < ths->d; t++)
850  {
851  INT j;
852 
853  for (j = 0; j < ths->M_total; j++)
854  {
855  uo(ths, j, &u, &o, t);
856 
857  for(lj = 0; lj < (2 * ths->m + 2); lj++)
858  ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
859  (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
860  } /* for (j) */
861  } /* for (t) */
862 } /* precompute_psi */
863 
864 void X(precompute_full_psi)(X(plan) *ths)
865 {
866 //#ifdef _OPENMP
867 // sort(ths);
868 //
869 // nfft_precompute_full_psi_omp(ths);
870 //#else
871  INT t, t2; /* index over all dimensions */
872  INT j; /* index over all nodes */
873  INT l_L; /* plain index 0 <= l_L < lprod */
874  INT l[ths->d]; /* multi index u<=l<=o */
875  INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
876  INT ll_plain[ths->d+1]; /* postfix plain index */
877  INT lprod; /* 'bandwidth' of matrix B */
878  INT u[ths->d], o[ths->d]; /* depends on x_j */
879  INT count_lg[ths->d];
880  INT lg_offset[ths->d];
881 
882  R phi_prod[ths->d+1];
883 
884  INT ix, ix_old;
885 
886  //sort(ths);
887 
888  phi_prod[0] = K(1.0);
889  ll_plain[0] = 0;
890 
891  for (t = 0, lprod = 1; t < ths->d; t++)
892  lprod *= 2 * ths->m + 2;
893 
894  for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
895  {
896  MACRO_init_uo_l_lj_t;
897 
898  for (l_L = 0; l_L < lprod; l_L++, ix++)
899  {
900  MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
901 
902  ths->psi_index_g[ix] = ll_plain[ths->d];
903  ths->psi[ix] = phi_prod[ths->d];
904 
905  MACRO_count_uo_l_lj_t;
906  } /* for (l_L) */
907 
908  ths->psi_index_f[j] = ix - ix_old;
909  ix_old = ix;
910  } /* for(j) */
911 //#endif
912 }
913 
914 void X(precompute_one_psi)(X(plan) *ths)
915 {
916  if(ths->flags & PRE_PSI)
917  X(precompute_psi)(ths);
918  if(ths->flags & PRE_FULL_PSI)
919  X(precompute_full_psi)(ths);
920  if(ths->flags & PRE_FG_PSI)
921  X(precompute_fg_psi)(ths);
922  if(ths->flags & PRE_LIN_PSI)
923  X(precompute_lin_psi)(ths);
924 }
925 
926 static inline void init_help(X(plan) *ths)
927 {
928  INT t; /* index over all dimensions */
929  INT lprod; /* 'bandwidth' of matrix B */
930 
931  if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
932  ths->flags |= NFFT_SORT_NODES;
933 
934  ths->N_total = intprod(ths->N, OFFSET, ths->d);
935  ths->n_total = intprod(ths->n, 0, ths->d);
936 
937  ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
938 
939  for (t = 0; t < ths->d; t++)
940  ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
941 
942  /* Assign r2r transform kinds for each dimension */
943  ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
944  for (t = 0; t < ths->d; t++)
945  ths->r2r_kind[t] = FOURIER_TRAFO;
946 
947  WINDOW_HELP_INIT;
948 
949  if (ths->flags & MALLOC_X)
950  ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
951 
952  if (ths->flags & MALLOC_F_HAT)
953  ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
954 
955  if (ths->flags & MALLOC_F)
956  ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
957 
958  if (ths->flags & PRE_PHI_HUT)
959  precompute_phi_hut(ths);
960 
961  if(ths->flags & PRE_LIN_PSI)
962  {
963  ths->K = (1U<< 10) * (ths->m+2);
964  ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
965  }
966 
967  if(ths->flags & PRE_FG_PSI)
968  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
969 
970  if (ths->flags & PRE_PSI)
971  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
972 
973  if(ths->flags & PRE_FULL_PSI)
974  {
975  for (t = 0, lprod = 1; t < ths->d; t++)
976  lprod *= 2 * ths->m + 2;
977 
978  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
979 
980  ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
981  ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
982  }
983 
984  if (ths->flags & FFTW_INIT)
985  {
986  ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
987 
988  if (ths->flags & FFT_OUT_OF_PLACE)
989  ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
990  else
991  ths->g2 = ths->g1;
992 
993  {
994  int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
995 
996  for (t = 0; t < ths->d; t++)
997  _n[t] = (int)(ths->n[t]);
998 
999  ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1000  Y(free)(_n);
1001  }
1002  }
1003 
1004 // if(ths->flags & NFFT_SORT_NODES)
1005 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1006 // else
1007 // ths->index_x = NULL;
1008 
1009  ths->mv_trafo = (void (*) (void* ))X(trafo);
1010  ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1011 }
1012 
1013 void X(init)(X(plan) *ths, int d, int *N, int M_total)
1014 {
1015  int t; /* index over all dimensions */
1016 
1017  ths->d = (INT)d;
1018 
1019  ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1020 
1021  for (t = 0; t < d; t++)
1022  ths->N[t] = (INT)N[t];
1023 
1024  ths->M_total = (INT)M_total;
1025 
1026  ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1027 
1028  for (t = 0; t < d; t++)
1029  ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1030 
1031  ths->m = WINDOW_HELP_ESTIMATE_m;
1032 
1033  if (d > 1)
1034  {
1035 //#ifdef _OPENMP
1036 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1037 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
1038 // NFFT_OMP_BLOCKWISE_ADJOINT;
1039 //#else
1040  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1041  FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
1042 //#endif
1043  }
1044  else
1045  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1047 
1048  ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1049 
1050  init_help(ths);
1051 }
1052 
1053 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1054  unsigned flags, unsigned fftw_flags)
1055 {
1056  INT t; /* index over all dimensions */
1057 
1058  ths->d = (INT)d;
1059  ths->M_total = (INT)M_total;
1060  ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1061 
1062  for (t = 0; t < d; t++)
1063  ths->N[t] = (INT)N[t];
1064 
1065  ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1066 
1067  for (t = 0; t < d; t++)
1068  ths->n[t] = (INT)n[t];
1069 
1070  ths->m = (INT)m;
1071 
1072  ths->flags = flags;
1073  ths->fftw_flags = fftw_flags;
1074 
1075  init_help(ths);
1076 }
1077 
1078 void X(init_1d)(X(plan) *ths, int N1, int M_total)
1079 {
1080  int N[1];
1081 
1082  N[0] = N1;
1083 
1084  X(init)(ths, 1, N, M_total);
1085 }
1086 
1087 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1088 {
1089  int N[2];
1090 
1091  N[0] = N1;
1092  N[1] = N2;
1093 
1094  X(init)(ths, 2, N, M_total);
1095 }
1096 
1097 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1098 {
1099  int N[3];
1100 
1101  N[0] = N1;
1102  N[1] = N2;
1103  N[2] = N3;
1104 
1105  X(init)(ths, 3, N, M_total);
1106 }
1107 
1108 const char* X(check)(X(plan) *ths)
1109 {
1110  INT j;
1111 
1112  if (!ths->f)
1113  return "Member f not initialized.";
1114 
1115  if (!ths->x)
1116  return "Member x not initialized.";
1117 
1118  if (!ths->f_hat)
1119  return "Member f_hat not initialized.";
1120 
1121  for (j = 0; j < ths->M_total * ths->d; j++)
1122  {
1123  if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1124  {
1125  return "ths->x out of range [0.0,0.5)";
1126  }
1127  }
1128 
1129  for (j = 0; j < ths->d; j++)
1130  {
1131  if (ths->sigma[j] <= 1)
1132  return "Oversampling factor too small";
1133 
1134  if(ths->N[j] - 1 <= ths->m)
1135  return "Polynomial degree N is smaller than cut-off m";
1136 
1137  if(ths->N[j]%2 == 1)
1138  return "polynomial degree N has to be even";
1139  }
1140  return 0;
1141 }
1142 
1143 void X(finalize)(X(plan) *ths)
1144 {
1145  INT t; /* index over dimensions */
1146 
1147 // if(ths->flags & NFFT_SORT_NODES)
1148 // Y(free)(ths->index_x);
1149 
1150  if (ths->flags & FFTW_INIT)
1151  {
1152 #ifdef _OPENMP
1153  #pragma omp critical (nfft_omp_critical_fftw_plan)
1154 #endif
1155  FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1156 
1157  if (ths->flags & FFT_OUT_OF_PLACE)
1158  Y(free)(ths->g2);
1159 
1160  Y(free)(ths->g1);
1161  }
1162 
1163  if(ths->flags & PRE_FULL_PSI)
1164  {
1165  Y(free)(ths->psi_index_g);
1166  Y(free)(ths->psi_index_f);
1167  Y(free)(ths->psi);
1168  }
1169 
1170  if (ths->flags & PRE_PSI)
1171  Y(free)(ths->psi);
1172 
1173  if(ths->flags & PRE_FG_PSI)
1174  Y(free)(ths->psi);
1175 
1176  if(ths->flags & PRE_LIN_PSI)
1177  Y(free)(ths->psi);
1178 
1179  if (ths->flags & PRE_PHI_HUT)
1180  {
1181  for (t = 0; t < ths->d; t++)
1182  Y(free)(ths->c_phi_inv[t]);
1183  Y(free)(ths->c_phi_inv);
1184  }
1185 
1186  if (ths->flags & MALLOC_F)
1187  Y(free)(ths->f);
1188 
1189  if(ths->flags & MALLOC_F_HAT)
1190  Y(free)(ths->f_hat);
1191 
1192  if (ths->flags & MALLOC_X)
1193  Y(free)(ths->x);
1194 
1195  WINDOW_HELP_FINALIZE;
1196 
1197  Y(free)(ths->N);
1198  Y(free)(ths->n);
1199  Y(free)(ths->sigma);
1200 
1201  Y(free)(ths->r2r_kind);
1202 } /* finalize */
#define TIC(a)
Timing, method works since the inaccurate timer is updated mostly in the measured function...
Definition: infft.h:1397
#define PRE_FG_PSI
Definition: nfft3.h:196
#define MALLOC_X
Definition: nfft3.h:199
#define MALLOC_F_HAT
Definition: nfft3.h:200
#define FFTW_INIT
Definition: nfft3.h:203
#define MALLOC_F
Definition: nfft3.h:201
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:202
#define PRE_LIN_PSI
Definition: nfft3.h:195
#define PRE_PSI
Definition: nfft3.h:197
#define PRE_FULL_PSI
Definition: nfft3.h:198
Header file for the nfft3 library.
#define PRE_PHI_HUT
Definition: nfft3.h:193