guide34/html/fastgauss_8c_source.html

 /*

  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts

  *

  * This program is free software; you can redistribute it and/or modify it under

  * the terms of the GNU General Public License as published by the Free Software

  * Foundation; either version 2 of the License, or (at your option) any later

  * version.

  *

  * This program is distributed in the hope that it will be useful, but WITHOUT

  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more

  * details.

  *

  * You should have received a copy of the GNU General Public License along with

  * this program; if not, write to the Free Software Foundation, Inc., 51

  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

  */


 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

 #include <complex.h>


 #define NFFT_PRECISION_DOUBLE


 #include "nfft3mp.h"


 #define DGT_PRE_CEXP     (1U<< 0)


 #define FGT_NDFT         (1U<< 1)


 #define FGT_APPROX_B     (1U<< 2)


 typedef struct

 {

   int N;

   int M;

   NFFT_C *alpha;

   NFFT_C *f;

   unsigned flags;

   NFFT_C sigma;

   NFFT_R *x;

   NFFT_R *y;

   NFFT_C *pre_cexp;

   int n;

   NFFT_R p;

   NFFT_C *b;

   NFFT(plan) *nplan1;

   NFFT(plan) *nplan2;

 } fgt_plan;


 static void dgt_trafo(fgt_plan *ths)

 {

   NFFT_INT j, k, l;


   for (j = 0; j < ths->M; j++)

     ths->f[j] = NFFT_K(0.0);


   if (ths->flags & DGT_PRE_CEXP)

     for (j = 0, l = 0; j < ths->M; j++)

       for (k = 0; k < ths->N; k++, l++)

         ths->f[j] += ths->alpha[k] * ths->pre_cexp[l];

   else

     for (j = 0; j < ths->M; j++)

       for (k = 0; k < ths->N; k++)

         ths->f[j] += ths->alpha[k]

             * NFFT_M(cexp)(

                 -ths->sigma * (ths->y[j] - ths->x[k])

                     * (ths->y[j] - ths->x[k]));

 }


 static void fgt_trafo(fgt_plan *ths)

 {

   NFFT_INT l;


   if (ths->flags & FGT_NDFT)

   {

     NFFT(adjoint_direct)(ths->nplan1);


     for (l = 0; l < ths->n; l++)

       ths->nplan1->f_hat[l] *= ths->b[l];


     NFFT(trafo_direct)(ths->nplan2);

   }

   else

   {

     NFFT(adjoint)(ths->nplan1);


     for (l = 0; l < ths->n; l++)

       ths->nplan1->f_hat[l] *= ths->b[l];


     NFFT(trafo)(ths->nplan2);

   }

 }


 static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m,

     unsigned flags)

 {

   int j, n_fftw;

   FFTW(plan) fplan;


   ths->M = M;

   ths->N = N;

   ths->sigma = sigma;

   ths->flags = flags;


   ths->x = (NFFT_R*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_R));

   ths->y = (NFFT_R*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_R));

   ths->alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_C));

   ths->f = (NFFT_C*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_C));


   ths->n = n;

   ths->p = p;


   ths->b = (NFFT_C*) NFFT(malloc)((size_t)(ths->n) * sizeof(NFFT_C));


   ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));

   ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));


   n_fftw = (int)NFFT(next_power_of_2)(2 * ths->n);


   {

     NFFT(init_guru)(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT |

     PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE);

     NFFT(init_guru)(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT |

     PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE);

   }


   ths->nplan1->f = ths->alpha;

   ths->nplan2->f_hat = ths->nplan1->f_hat;

   ths->nplan2->f = ths->f;


   if (ths->flags & FGT_APPROX_B)

   {

     fplan = FFTW(plan_dft_1d)(ths->n, ths->b, ths->b, FFTW_FORWARD,

     FFTW_MEASURE);


     for (j = 0; j < ths->n; j++)

       ths->b[j] = NFFT_M(cexp)(

           -ths->p * ths->p * ths->sigma * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))

               / ((NFFT_R) (ths->n * ths->n))) / ((NFFT_R)(ths->n));


     NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);

     FFTW(execute)(fplan);

     NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);


     FFTW(destroy_plan)(fplan);

   }

   else

   {

     for (j = 0; j < ths->n; j++)

       ths->b[j] = NFFT_K(1.0) / ths->p * NFFT_M(csqrt)(NFFT_KPI / ths->sigma)

           * NFFT_M(cexp)(

               -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))

                   / (ths->p * ths->p * ths->sigma));

   }

 }


 static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)

 {

   NFFT_R p;

   int n;


   p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));


   if (p < NFFT_K(1.0))

     p = NFFT_K(1.0);


   n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));


   fgt_init_guru(ths, N, M, sigma, n, p, 7, N * M <= ((NFFT_INT) (1U << 20)) ? DGT_PRE_CEXP : 0);

 }


 static void fgt_init_node_dependent(fgt_plan *ths)

 {

   int j, k, l;


   if (ths->flags & DGT_PRE_CEXP)

   {

     ths->pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->M * ths->N) * sizeof(NFFT_C));


     for (j = 0, l = 0; j < ths->M; j++)

       for (k = 0; k < ths->N; k++, l++)

         ths->pre_cexp[l] = NFFT_M(cexp)(

             -ths->sigma * (ths->y[j] - ths->x[k]) * (ths->y[j] - ths->x[k]));

   }


   for (j = 0; j < ths->nplan1->M_total; j++)

     ths->nplan1->x[j] = ths->x[j] / ths->p;

   for (j = 0; j < ths->nplan2->M_total; j++)

     ths->nplan2->x[j] = ths->y[j] / ths->p;


   if (ths->nplan1->flags & PRE_PSI)

     NFFT(precompute_psi)(ths->nplan1);

   if (ths->nplan2->flags & PRE_PSI)

     NFFT(precompute_psi)(ths->nplan2);

 }


 static void fgt_finalize(fgt_plan *ths)

 {

   NFFT(finalize)(ths->nplan2);

   NFFT(finalize)(ths->nplan1);


   NFFT(free)(ths->nplan2);

   NFFT(free)(ths->nplan1);


   NFFT(free)(ths->b);


   NFFT(free)(ths->f);

   NFFT(free)(ths->y);


   NFFT(free)(ths->alpha);

   NFFT(free)(ths->x);

 }


 static void fgt_test_init_rand(fgt_plan *ths)

 {

   NFFT_INT j, k;


   for (k = 0; k < ths->N; k++)

     ths->x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);


   for (j = 0; j < ths->M; j++)

     ths->y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);


   for (k = 0; k < ths->N; k++)

     ths->alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)

         + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));

 }


 static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)

 {

   int r;

   NFFT_R t0, t1, time_diff;

   NFFT_R t_out;

   NFFT_R tau = NFFT_K(0.01);


   t_out = NFFT_K(0.0);

   r = 0;

   while (t_out < tau)

   {

     r++;

     t0 = NFFT(clock_gettime_seconds)();

     if (dgt)

       dgt_trafo(ths);

     else

       fgt_trafo(ths);

     t1 = NFFT(clock_gettime_seconds)();

     time_diff = t1 - t0;

     t_out += time_diff;

   }

   t_out /= (NFFT_R)(r);


   return t_out;

 }


 static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)

 {

   fgt_plan my_plan;

   NFFT_C *swap_dgt;


   fgt_init(&my_plan, N, M, sigma, eps);

   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));


   fgt_test_init_rand(&my_plan);

   fgt_init_node_dependent(&my_plan);


   NFFT_CSWAP(swap_dgt, my_plan.f);

   dgt_trafo(&my_plan);

   NFFT(vpr_complex)(my_plan.f, my_plan.M, "discrete gauss transform");

   NFFT_CSWAP(swap_dgt, my_plan.f);


   fgt_trafo(&my_plan);

   NFFT(vpr_complex)(my_plan.f, my_plan.M, "fast gauss transform");


   printf("\n relative error: %1.3" NFFT__FES__ "\n",

       NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,

           my_plan.alpha, my_plan.N));


   NFFT(free)(swap_dgt);

   fgt_finalize(&my_plan);

 }


 static void fgt_test_andersson(void)

 {

   fgt_plan my_plan;

   NFFT_C *swap_dgt;

   int N;


   NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));

   int n = 128;

   int N_dgt_pre_exp = (int) (1U << 11);

   int N_dgt = (int) (1U << 19);


   printf("n=%d, sigma=%1.3" NFFT__FES__ "+i%1.3" NFFT__FES__ "\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));


   for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)

   {

     printf("$%d$\t & ", N);


     fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, N < N_dgt_pre_exp ? DGT_PRE_CEXP : 0);


     swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));


     fgt_test_init_rand(&my_plan);


     fgt_init_node_dependent(&my_plan);


     if (N < N_dgt)

     {

       NFFT_CSWAP(swap_dgt, my_plan.f);

       if (N < N_dgt_pre_exp)

         my_plan.flags ^= DGT_PRE_CEXP;


       printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));

       if (N < N_dgt_pre_exp)

         my_plan.flags ^= DGT_PRE_CEXP;

       NFFT_CSWAP(swap_dgt, my_plan.f);

     }

     else

       printf("\t\t & ");


     if (N < N_dgt_pre_exp)

       printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));

     else

       printf("\t\t & ");


     my_plan.flags ^= FGT_NDFT;

     printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));

     my_plan.flags ^= FGT_NDFT;


     printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));


     printf("$%1.1" NFFT__FES__ "$\t \\\\ \n",

     NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));

     fflush(stdout);


     NFFT(free)(swap_dgt);

     fgt_finalize(&my_plan);

     FFTW(cleanup)();

   }

 }


 static void fgt_test_error(void)

 {

   fgt_plan my_plan;

   NFFT_C *swap_dgt;

   int n, mi;


   NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));

   int N = 1000;

   int M = 1000;

   int m[2] = { 7, 3 };


   printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),

       NFFT_M(cimag)(sigma));

   printf("error=[\n");


   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));


   for (n = 8; n <= 128; n += 4)

   {

     printf("%d\t", n);

     for (mi = 0; mi < 2; mi++)

     {

       fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);

       fgt_test_init_rand(&my_plan);

       fgt_init_node_dependent(&my_plan);


       NFFT_CSWAP(swap_dgt, my_plan.f);

       dgt_trafo(&my_plan);

       NFFT_CSWAP(swap_dgt, my_plan.f);


       fgt_trafo(&my_plan);


       printf("%1.3" NFFT__FES__ "\t",

         NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));

       fflush(stdout);


       fgt_finalize(&my_plan);

       FFTW(cleanup)();

     }

     printf("\n");

   }

   printf("];\n");


   NFFT(free)(swap_dgt);

 }


 static void fgt_test_error_p(void)

 {

   fgt_plan my_plan;

   NFFT_C *swap_dgt;

   int n, pi;


   NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;

   int N = 1000;

   int M = 1000;

   NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};


   printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),

       NFFT_M(cimag)(sigma));

   printf("error=[\n");


   swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));


   for (n = 8; n <= 128; n += 4)

   {

     printf("%d\t", n);

     for (pi = 0; pi < 3; pi++)

     {

       fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);

       fgt_test_init_rand(&my_plan);

       fgt_init_node_dependent(&my_plan);


       NFFT_CSWAP(swap_dgt, my_plan.f);

       dgt_trafo(&my_plan);

       NFFT_CSWAP(swap_dgt, my_plan.f);


       fgt_trafo(&my_plan);


       printf("%1.3" NFFT__FES__ "\t",

       NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));

       fflush(stdout);


       fgt_finalize(&my_plan);

       FFTW(cleanup)();

     }

     printf("\n");

   }

   printf("];\n");

 }


 int main(int argc, char **argv)

 {

   if (argc != 2)

   {

     fprintf(stderr, "fastgauss type\n");

     fprintf(stderr, " type\n");

     fprintf(stderr, "  0 - Simple test.\n");

     fprintf(stderr, "  1 - Compares accuracy and execution time.\n");

     fprintf(stderr, "      Pipe to output_andersson.tex\n");

     fprintf(stderr, "  2 - Compares accuracy.\n");

     fprintf(stderr, "      Pipe to output_error.m\n");

     fprintf(stderr, "  3 - Compares accuracy.\n");

     fprintf(stderr, "      Pipe to output_error_p.m\n");

     return EXIT_FAILURE;

   }


   if (atoi(argv[1]) == 0)

     fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));


   if (atoi(argv[1]) == 1)

     fgt_test_andersson();


   if (atoi(argv[1]) == 2)

     fgt_test_error();


   if (atoi(argv[1]) == 3)

     fgt_test_error_p();


   return EXIT_SUCCESS;

 }

 /* \} */

fgt_test_andersson
static void fgt_test_andersson(void)
Compares accuracy and execution time of the fast Gauss transform with increasing expansion degree...
Definition: fastgauss.c:409

fgt_init_guru
static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, unsigned flags)
Initialisation of a transform plan, guru.
Definition: fastgauss.c:166

fgt_plan::flags
unsigned flags
flags for precomputation and approximation type
Definition: fastgauss.c:74

MALLOC_X
#define MALLOC_X
Definition: nfft3.h:199

MALLOC_F_HAT
#define MALLOC_F_HAT
Definition: nfft3.h:200

FGT_APPROX_B
#define FGT_APPROX_B
If this flag is set, the discrete Fourier coefficients of the uniformly sampled Gaussian are used ins...
Definition: fastgauss.c:63

fgt_plan::p
NFFT_R p
period, at least 1
Definition: fastgauss.c:85

fgt_plan::f
NFFT_C * f
target evaluations
Definition: fastgauss.c:72

DGT_PRE_CEXP
#define DGT_PRE_CEXP
If this flag is set, the whole matrix is precomputed and stored for the discrete Gauss transfrom...
Definition: fastgauss.c:42

fgt_test_init_rand
static void fgt_test_init_rand(fgt_plan *ths)
Random initialisation of a fgt plan.
Definition: fastgauss.c:315

main
int main(int argc, char **argv)
Different tests of the fast Gauss transform.
Definition: fastgauss.c:576

fgt_plan::N
int N
number of source nodes
Definition: fastgauss.c:68

fgt_finalize
static void fgt_finalize(fgt_plan *ths)
Destroys the transform plan.
Definition: fastgauss.c:292

fgt_plan
Structure for the Gauss transform.
Definition: fastgauss.c:66

fgt_plan::M
int M
number of target nodes
Definition: fastgauss.c:69

fgt_plan::sigma
NFFT_C sigma
parameter of the Gaussian
Definition: fastgauss.c:77

fgt_plan::y
NFFT_R * y
target nodes in
Definition: fastgauss.c:80

FFTW_INIT
#define FFTW_INIT
Definition: nfft3.h:203

fgt_plan::b
NFFT_C * b
expansion coefficients
Definition: fastgauss.c:87

fgt_init_node_dependent
static void fgt_init_node_dependent(fgt_plan *ths)
Initialisation of a transform plan, depends on source and target nodes.
Definition: fastgauss.c:261

fgt_trafo
static void fgt_trafo(fgt_plan *ths)
Executes the fast Gauss transform.
Definition: fastgauss.c:128

fgt_test_error_p
static void fgt_test_error_p(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree and different periodis...
Definition: fastgauss.c:527

PRE_PSI
#define PRE_PSI
Definition: nfft3.h:197

fgt_test_measure_time
static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
Compares execution times for the fast and discrete Gauss transform.
Definition: fastgauss.c:338

FGT_NDFT
#define FGT_NDFT
If this flag is set, the fast Gauss transform uses the discrete instead of the fast Fourier transform...
Definition: fastgauss.c:53

fgt_plan::alpha
NFFT_C * alpha
source coefficients
Definition: fastgauss.c:71

fgt_plan::x
NFFT_R * x
source nodes in
Definition: fastgauss.c:79

fgt_plan::pre_cexp
NFFT_C * pre_cexp
precomputed values for dgt
Definition: fastgauss.c:82

fgt_test_simple
static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
Simple example that computes fast and discrete Gauss transforms.
Definition: fastgauss.c:373

PRE_PHI_HUT
#define PRE_PHI_HUT
Definition: nfft3.h:193

fgt_plan::n
int n
expansion degree
Definition: fastgauss.c:84

fgt_test_error
static void fgt_test_error(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree.
Definition: fastgauss.c:475

dgt_trafo
static void dgt_trafo(fgt_plan *ths)
Executes the discrete Gauss transform.
Definition: fastgauss.c:101

fgt_init
static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
Initialisation of a transform plan, simple.
Definition: fastgauss.c:240