NFFT  3.4.1
fastgauss.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <math.h>
28 #include <complex.h>
29 
30 #define NFFT_PRECISION_DOUBLE
31 
32 #include "nfft3mp.h"
33 
42 #define DGT_PRE_CEXP (1U<< 0)
43 
53 #define FGT_NDFT (1U<< 1)
54 
63 #define FGT_APPROX_B (1U<< 2)
64 
66 typedef struct
67 {
68  int N;
69  int M;
71  NFFT_C *alpha;
72  NFFT_C *f;
74  unsigned flags;
77  NFFT_C sigma;
79  NFFT_R *x;
80  NFFT_R *y;
82  NFFT_C *pre_cexp;
84  int n;
85  NFFT_R p;
87  NFFT_C *b;
89  NFFT(plan) *nplan1;
90  NFFT(plan) *nplan2;
92 } fgt_plan;
93 
101 static void dgt_trafo(fgt_plan *ths)
102 {
103  NFFT_INT j, k, l;
104 
105  for (j = 0; j < ths->M; j++)
106  ths->f[j] = NFFT_K(0.0);
107 
108  if (ths->flags & DGT_PRE_CEXP)
109  for (j = 0, l = 0; j < ths->M; j++)
110  for (k = 0; k < ths->N; k++, l++)
111  ths->f[j] += ths->alpha[k] * ths->pre_cexp[l];
112  else
113  for (j = 0; j < ths->M; j++)
114  for (k = 0; k < ths->N; k++)
115  ths->f[j] += ths->alpha[k]
116  * NFFT_M(cexp)(
117  -ths->sigma * (ths->y[j] - ths->x[k])
118  * (ths->y[j] - ths->x[k]));
119 }
120 
128 static void fgt_trafo(fgt_plan *ths)
129 {
130  NFFT_INT l;
131 
132  if (ths->flags & FGT_NDFT)
133  {
134  NFFT(adjoint_direct)(ths->nplan1);
135 
136  for (l = 0; l < ths->n; l++)
137  ths->nplan1->f_hat[l] *= ths->b[l];
138 
139  NFFT(trafo_direct)(ths->nplan2);
140  }
141  else
142  {
143  NFFT(adjoint)(ths->nplan1);
144 
145  for (l = 0; l < ths->n; l++)
146  ths->nplan1->f_hat[l] *= ths->b[l];
147 
148  NFFT(trafo)(ths->nplan2);
149  }
150 }
151 
166 static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m,
167  unsigned flags)
168 {
169  int j, n_fftw;
170  FFTW(plan) fplan;
171 
172  ths->M = M;
173  ths->N = N;
174  ths->sigma = sigma;
175  ths->flags = flags;
176 
177  ths->x = (NFFT_R*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_R));
178  ths->y = (NFFT_R*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_R));
179  ths->alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_C));
180  ths->f = (NFFT_C*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_C));
181 
182  ths->n = n;
183  ths->p = p;
184 
185  ths->b = (NFFT_C*) NFFT(malloc)((size_t)(ths->n) * sizeof(NFFT_C));
186 
187  ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
188  ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
189 
190  n_fftw = (int)NFFT(next_power_of_2)(2 * ths->n);
191 
192  {
193  NFFT(init_guru)(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT |
194  PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE);
195  NFFT(init_guru)(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT |
196  PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE);
197  }
198 
199  ths->nplan1->f = ths->alpha;
200  ths->nplan2->f_hat = ths->nplan1->f_hat;
201  ths->nplan2->f = ths->f;
202 
203  if (ths->flags & FGT_APPROX_B)
204  {
205  fplan = FFTW(plan_dft_1d)(ths->n, ths->b, ths->b, FFTW_FORWARD,
206  FFTW_MEASURE);
207 
208  for (j = 0; j < ths->n; j++)
209  ths->b[j] = NFFT_M(cexp)(
210  -ths->p * ths->p * ths->sigma * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
211  / ((NFFT_R) (ths->n * ths->n))) / ((NFFT_R)(ths->n));
212 
213  NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
214  FFTW(execute)(fplan);
215  NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
216 
217  FFTW(destroy_plan)(fplan);
218  }
219  else
220  {
221  for (j = 0; j < ths->n; j++)
222  ths->b[j] = NFFT_K(1.0) / ths->p * NFFT_M(csqrt)(NFFT_KPI / ths->sigma)
223  * NFFT_M(cexp)(
224  -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
225  / (ths->p * ths->p * ths->sigma));
226  }
227 }
228 
240 static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
241 {
242  NFFT_R p;
243  int n;
244 
245  p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));
246 
247  if (p < NFFT_K(1.0))
248  p = NFFT_K(1.0);
249 
250  n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));
251 
252  fgt_init_guru(ths, N, M, sigma, n, p, 7, N * M <= ((NFFT_INT) (1U << 20)) ? DGT_PRE_CEXP : 0);
253 }
254 
262 {
263  int j, k, l;
264 
265  if (ths->flags & DGT_PRE_CEXP)
266  {
267  ths->pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->M * ths->N) * sizeof(NFFT_C));
268 
269  for (j = 0, l = 0; j < ths->M; j++)
270  for (k = 0; k < ths->N; k++, l++)
271  ths->pre_cexp[l] = NFFT_M(cexp)(
272  -ths->sigma * (ths->y[j] - ths->x[k]) * (ths->y[j] - ths->x[k]));
273  }
274 
275  for (j = 0; j < ths->nplan1->M_total; j++)
276  ths->nplan1->x[j] = ths->x[j] / ths->p;
277  for (j = 0; j < ths->nplan2->M_total; j++)
278  ths->nplan2->x[j] = ths->y[j] / ths->p;
279 
280  if (ths->nplan1->flags & PRE_PSI)
281  NFFT(precompute_psi)(ths->nplan1);
282  if (ths->nplan2->flags & PRE_PSI)
283  NFFT(precompute_psi)(ths->nplan2);
284 }
285 
292 static void fgt_finalize(fgt_plan *ths)
293 {
294  NFFT(finalize)(ths->nplan2);
295  NFFT(finalize)(ths->nplan1);
296 
297  NFFT(free)(ths->nplan2);
298  NFFT(free)(ths->nplan1);
299 
300  NFFT(free)(ths->b);
301 
302  NFFT(free)(ths->f);
303  NFFT(free)(ths->y);
304 
305  NFFT(free)(ths->alpha);
306  NFFT(free)(ths->x);
307 }
308 
315 static void fgt_test_init_rand(fgt_plan *ths)
316 {
317  NFFT_INT j, k;
318 
319  for (k = 0; k < ths->N; k++)
320  ths->x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
321 
322  for (j = 0; j < ths->M; j++)
323  ths->y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
324 
325  for (k = 0; k < ths->N; k++)
326  ths->alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)
327  + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));
328 }
329 
338 static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
339 {
340  int r;
341  NFFT_R t0, t1, time_diff;
342  NFFT_R t_out;
343  NFFT_R tau = NFFT_K(0.01);
344 
345  t_out = NFFT_K(0.0);
346  r = 0;
347  while (t_out < tau)
348  {
349  r++;
350  t0 = NFFT(clock_gettime_seconds)();
351  if (dgt)
352  dgt_trafo(ths);
353  else
354  fgt_trafo(ths);
355  t1 = NFFT(clock_gettime_seconds)();
356  time_diff = t1 - t0;
357  t_out += time_diff;
358  }
359  t_out /= (NFFT_R)(r);
360 
361  return t_out;
362 }
363 
373 static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
374 {
375  fgt_plan my_plan;
376  NFFT_C *swap_dgt;
377 
378  fgt_init(&my_plan, N, M, sigma, eps);
379  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
380 
381  fgt_test_init_rand(&my_plan);
382  fgt_init_node_dependent(&my_plan);
383 
384  NFFT_CSWAP(swap_dgt, my_plan.f);
385  dgt_trafo(&my_plan);
386  NFFT(vpr_complex)(my_plan.f, my_plan.M, "discrete gauss transform");
387  NFFT_CSWAP(swap_dgt, my_plan.f);
388 
389  fgt_trafo(&my_plan);
390  NFFT(vpr_complex)(my_plan.f, my_plan.M, "fast gauss transform");
391 
392  printf("\n relative error: %1.3" NFFT__FES__ "\n",
393  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
394  my_plan.alpha, my_plan.N));
395 
396  NFFT(free)(swap_dgt);
397  fgt_finalize(&my_plan);
398 }
399 
409 static void fgt_test_andersson(void)
410 {
411  fgt_plan my_plan;
412  NFFT_C *swap_dgt;
413  int N;
414 
415  NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
416  int n = 128;
417  int N_dgt_pre_exp = (int) (1U << 11);
418  int N_dgt = (int) (1U << 19);
419 
420  printf("n=%d, sigma=%1.3" NFFT__FES__ "+i%1.3" NFFT__FES__ "\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));
421 
422  for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)
423  {
424  printf("$%d$\t & ", N);
425 
426  fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, N < N_dgt_pre_exp ? DGT_PRE_CEXP : 0);
427 
428  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
429 
430  fgt_test_init_rand(&my_plan);
431 
432  fgt_init_node_dependent(&my_plan);
433 
434  if (N < N_dgt)
435  {
436  NFFT_CSWAP(swap_dgt, my_plan.f);
437  if (N < N_dgt_pre_exp)
438  my_plan.flags ^= DGT_PRE_CEXP;
439 
440  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
441  if (N < N_dgt_pre_exp)
442  my_plan.flags ^= DGT_PRE_CEXP;
443  NFFT_CSWAP(swap_dgt, my_plan.f);
444  }
445  else
446  printf("\t\t & ");
447 
448  if (N < N_dgt_pre_exp)
449  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
450  else
451  printf("\t\t & ");
452 
453  my_plan.flags ^= FGT_NDFT;
454  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
455  my_plan.flags ^= FGT_NDFT;
456 
457  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
458 
459  printf("$%1.1" NFFT__FES__ "$\t \\\\ \n",
460  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
461  fflush(stdout);
462 
463  NFFT(free)(swap_dgt);
464  fgt_finalize(&my_plan);
465  FFTW(cleanup)();
466  }
467 }
468 
475 static void fgt_test_error(void)
476 {
477  fgt_plan my_plan;
478  NFFT_C *swap_dgt;
479  int n, mi;
480 
481  NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
482  int N = 1000;
483  int M = 1000;
484  int m[2] = { 7, 3 };
485 
486  printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
487  NFFT_M(cimag)(sigma));
488  printf("error=[\n");
489 
490  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
491 
492  for (n = 8; n <= 128; n += 4)
493  {
494  printf("%d\t", n);
495  for (mi = 0; mi < 2; mi++)
496  {
497  fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
498  fgt_test_init_rand(&my_plan);
499  fgt_init_node_dependent(&my_plan);
500 
501  NFFT_CSWAP(swap_dgt, my_plan.f);
502  dgt_trafo(&my_plan);
503  NFFT_CSWAP(swap_dgt, my_plan.f);
504 
505  fgt_trafo(&my_plan);
506 
507  printf("%1.3" NFFT__FES__ "\t",
508  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
509  fflush(stdout);
510 
511  fgt_finalize(&my_plan);
512  FFTW(cleanup)();
513  }
514  printf("\n");
515  }
516  printf("];\n");
517 
518  NFFT(free)(swap_dgt);
519 }
520 
527 static void fgt_test_error_p(void)
528 {
529  fgt_plan my_plan;
530  NFFT_C *swap_dgt;
531  int n, pi;
532 
533  NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;
534  int N = 1000;
535  int M = 1000;
536  NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};
537 
538  printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
539  NFFT_M(cimag)(sigma));
540  printf("error=[\n");
541 
542  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
543 
544  for (n = 8; n <= 128; n += 4)
545  {
546  printf("%d\t", n);
547  for (pi = 0; pi < 3; pi++)
548  {
549  fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
550  fgt_test_init_rand(&my_plan);
551  fgt_init_node_dependent(&my_plan);
552 
553  NFFT_CSWAP(swap_dgt, my_plan.f);
554  dgt_trafo(&my_plan);
555  NFFT_CSWAP(swap_dgt, my_plan.f);
556 
557  fgt_trafo(&my_plan);
558 
559  printf("%1.3" NFFT__FES__ "\t",
560  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
561  fflush(stdout);
562 
563  fgt_finalize(&my_plan);
564  FFTW(cleanup)();
565  }
566  printf("\n");
567  }
568  printf("];\n");
569 }
570 
576 int main(int argc, char **argv)
577 {
578  if (argc != 2)
579  {
580  fprintf(stderr, "fastgauss type\n");
581  fprintf(stderr, " type\n");
582  fprintf(stderr, " 0 - Simple test.\n");
583  fprintf(stderr, " 1 - Compares accuracy and execution time.\n");
584  fprintf(stderr, " Pipe to output_andersson.tex\n");
585  fprintf(stderr, " 2 - Compares accuracy.\n");
586  fprintf(stderr, " Pipe to output_error.m\n");
587  fprintf(stderr, " 3 - Compares accuracy.\n");
588  fprintf(stderr, " Pipe to output_error_p.m\n");
589  return EXIT_FAILURE;
590  }
591 
592  if (atoi(argv[1]) == 0)
593  fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));
594 
595  if (atoi(argv[1]) == 1)
597 
598  if (atoi(argv[1]) == 2)
599  fgt_test_error();
600 
601  if (atoi(argv[1]) == 3)
603 
604  return EXIT_SUCCESS;
605 }
606 /* \} */
static void fgt_test_andersson(void)
Compares accuracy and execution time of the fast Gauss transform with increasing expansion degree...
Definition: fastgauss.c:409
static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, unsigned flags)
Initialisation of a transform plan, guru.
Definition: fastgauss.c:166
unsigned flags
flags for precomputation and approximation type
Definition: fastgauss.c:74
#define MALLOC_X
Definition: nfft3.h:199
#define MALLOC_F_HAT
Definition: nfft3.h:200
#define FGT_APPROX_B
If this flag is set, the discrete Fourier coefficients of the uniformly sampled Gaussian are used ins...
Definition: fastgauss.c:63
NFFT_R p
period, at least 1
Definition: fastgauss.c:85
NFFT_C * f
target evaluations
Definition: fastgauss.c:72
#define DGT_PRE_CEXP
If this flag is set, the whole matrix is precomputed and stored for the discrete Gauss transfrom...
Definition: fastgauss.c:42
static void fgt_test_init_rand(fgt_plan *ths)
Random initialisation of a fgt plan.
Definition: fastgauss.c:315
int main(int argc, char **argv)
Different tests of the fast Gauss transform.
Definition: fastgauss.c:576
int N
number of source nodes
Definition: fastgauss.c:68
static void fgt_finalize(fgt_plan *ths)
Destroys the transform plan.
Definition: fastgauss.c:292
Structure for the Gauss transform.
Definition: fastgauss.c:66
int M
number of target nodes
Definition: fastgauss.c:69
NFFT_C sigma
parameter of the Gaussian
Definition: fastgauss.c:77
NFFT_R * y
target nodes in
Definition: fastgauss.c:80
#define FFTW_INIT
Definition: nfft3.h:203
NFFT_C * b
expansion coefficients
Definition: fastgauss.c:87
static void fgt_init_node_dependent(fgt_plan *ths)
Initialisation of a transform plan, depends on source and target nodes.
Definition: fastgauss.c:261
static void fgt_trafo(fgt_plan *ths)
Executes the fast Gauss transform.
Definition: fastgauss.c:128
static void fgt_test_error_p(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree and different periodis...
Definition: fastgauss.c:527
#define PRE_PSI
Definition: nfft3.h:197
static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
Compares execution times for the fast and discrete Gauss transform.
Definition: fastgauss.c:338
#define FGT_NDFT
If this flag is set, the fast Gauss transform uses the discrete instead of the fast Fourier transform...
Definition: fastgauss.c:53
NFFT_C * alpha
source coefficients
Definition: fastgauss.c:71
NFFT_R * x
source nodes in
Definition: fastgauss.c:79
NFFT_C * pre_cexp
precomputed values for dgt
Definition: fastgauss.c:82
static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
Simple example that computes fast and discrete Gauss transforms.
Definition: fastgauss.c:373
#define PRE_PHI_HUT
Definition: nfft3.h:193
int n
expansion degree
Definition: fastgauss.c:84
static void fgt_test_error(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree.
Definition: fastgauss.c:475
static void dgt_trafo(fgt_plan *ths)
Executes the discrete Gauss transform.
Definition: fastgauss.c:101
static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
Initialisation of a transform plan, simple.
Definition: fastgauss.c:240