30 #define NFFT_PRECISION_DOUBLE
42 #define DGT_PRE_CEXP (1U<< 0)
53 #define FGT_NDFT (1U<< 1)
63 #define FGT_APPROX_B (1U<< 2)
105 for (j = 0; j < ths->
M; j++)
106 ths->
f[j] = NFFT_K(0.0);
109 for (j = 0, l = 0; j < ths->
M; j++)
110 for (k = 0; k < ths->
N; k++, l++)
113 for (j = 0; j < ths->
M; j++)
114 for (k = 0; k < ths->
N; k++)
115 ths->
f[j] += ths->
alpha[k]
117 -ths->
sigma * (ths->
y[j] - ths->
x[k])
118 * (ths->
y[j] - ths->
x[k]));
134 NFFT(adjoint_direct)(ths->nplan1);
136 for (l = 0; l < ths->
n; l++)
137 ths->nplan1->f_hat[l] *= ths->
b[l];
139 NFFT(trafo_direct)(ths->nplan2);
143 NFFT(adjoint)(ths->nplan1);
145 for (l = 0; l < ths->
n; l++)
146 ths->nplan1->f_hat[l] *= ths->
b[l];
148 NFFT(trafo)(ths->nplan2);
177 ths->
x = (NFFT_R*) NFFT(malloc)((size_t)(ths->
N) *
sizeof(NFFT_R));
178 ths->
y = (NFFT_R*) NFFT(malloc)((size_t)(ths->
M) *
sizeof(NFFT_R));
179 ths->
alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->
N) *
sizeof(NFFT_C));
180 ths->
f = (NFFT_C*) NFFT(malloc)((size_t)(ths->
M) *
sizeof(NFFT_C));
185 ths->
b = (NFFT_C*) NFFT(malloc)((size_t)(ths->
n) *
sizeof(NFFT_C));
187 ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(
sizeof(NFFT(plan)));
188 ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(
sizeof(NFFT(plan)));
190 n_fftw = (int)NFFT(next_power_of_2)(2 * ths->
n);
193 NFFT(init_guru)(ths->nplan1, 1, &(ths->
n), ths->
N, &n_fftw, m,
PRE_PHI_HUT |
195 NFFT(init_guru)(ths->nplan2, 1, &(ths->
n), ths->
M, &n_fftw, m,
PRE_PHI_HUT |
199 ths->nplan1->
f = ths->
alpha;
200 ths->nplan2->f_hat = ths->nplan1->f_hat;
201 ths->nplan2->
f = ths->
f;
205 fplan = FFTW(plan_dft_1d)(ths->
n, ths->
b, ths->
b, FFTW_FORWARD,
208 for (j = 0; j < ths->
n; j++)
209 ths->
b[j] = NFFT_M(cexp)(
210 -ths->
p * ths->
p * ths->
sigma * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0))
211 / ((NFFT_R) (ths->
n * ths->
n))) / ((NFFT_R)(ths->
n));
213 NFFT(fftshift_complex_int)(ths->
b, 1, &ths->
n);
214 FFTW(execute)(fplan);
215 NFFT(fftshift_complex_int)(ths->
b, 1, &ths->
n);
217 FFTW(destroy_plan)(fplan);
221 for (j = 0; j < ths->
n; j++)
222 ths->
b[j] = NFFT_K(1.0) / ths->
p * NFFT_M(csqrt)(NFFT_KPI / ths->
sigma)
224 -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0))
225 / (ths->
p * ths->
p * ths->
sigma));
245 p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));
250 n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));
267 ths->
pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->
M * ths->
N) *
sizeof(NFFT_C));
269 for (j = 0, l = 0; j < ths->
M; j++)
270 for (k = 0; k < ths->
N; k++, l++)
272 -ths->
sigma * (ths->
y[j] - ths->
x[k]) * (ths->
y[j] - ths->
x[k]));
275 for (j = 0; j < ths->nplan1->M_total; j++)
276 ths->nplan1->
x[j] = ths->
x[j] / ths->
p;
277 for (j = 0; j < ths->nplan2->M_total; j++)
278 ths->nplan2->
x[j] = ths->
y[j] / ths->
p;
281 NFFT(precompute_psi)(ths->nplan1);
283 NFFT(precompute_psi)(ths->nplan2);
294 NFFT(finalize)(ths->nplan2);
295 NFFT(finalize)(ths->nplan1);
297 NFFT(free)(ths->nplan2);
298 NFFT(free)(ths->nplan1);
305 NFFT(free)(ths->
alpha);
319 for (k = 0; k < ths->
N; k++)
320 ths->
x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
322 for (j = 0; j < ths->
M; j++)
323 ths->
y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
325 for (k = 0; k < ths->
N; k++)
326 ths->
alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)
327 + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));
341 NFFT_R t0, t1, time_diff;
343 NFFT_R tau = NFFT_K(0.01);
350 t0 = NFFT(clock_gettime_seconds)();
355 t1 = NFFT(clock_gettime_seconds)();
359 t_out /= (NFFT_R)(r);
378 fgt_init(&my_plan, N, M, sigma, eps);
379 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.
M) *
sizeof(NFFT_C));
384 NFFT_CSWAP(swap_dgt, my_plan.
f);
386 NFFT(vpr_complex)(my_plan.
f, my_plan.
M,
"discrete gauss transform");
387 NFFT_CSWAP(swap_dgt, my_plan.
f);
390 NFFT(vpr_complex)(my_plan.
f, my_plan.
M,
"fast gauss transform");
392 printf(
"\n relative error: %1.3" NFFT__FES__
"\n",
393 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M,
394 my_plan.
alpha, my_plan.
N));
396 NFFT(free)(swap_dgt);
415 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
417 int N_dgt_pre_exp = (int) (1U << 11);
418 int N_dgt = (int) (1U << 19);
420 printf(
"n=%d, sigma=%1.3" NFFT__FES__
"+i%1.3" NFFT__FES__
"\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));
422 for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)
424 printf(
"$%d$\t & ", N);
428 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.
M) *
sizeof(NFFT_C));
436 NFFT_CSWAP(swap_dgt, my_plan.
f);
437 if (N < N_dgt_pre_exp)
441 if (N < N_dgt_pre_exp)
443 NFFT_CSWAP(swap_dgt, my_plan.
f);
448 if (N < N_dgt_pre_exp)
459 printf(
"$%1.1" NFFT__FES__
"$\t \\\\ \n",
460 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
463 NFFT(free)(swap_dgt);
481 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
486 printf(
"N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__
"+i*%1.3" NFFT__FES__
";\n", N, M, NFFT_M(creal)(sigma),
487 NFFT_M(cimag)(sigma));
490 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) *
sizeof(NFFT_C));
492 for (n = 8; n <= 128; n += 4)
495 for (mi = 0; mi < 2; mi++)
501 NFFT_CSWAP(swap_dgt, my_plan.
f);
503 NFFT_CSWAP(swap_dgt, my_plan.
f);
507 printf(
"%1.3" NFFT__FES__
"\t",
508 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
518 NFFT(free)(swap_dgt);
533 NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;
536 NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};
538 printf(
"N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__
"+i*%1.3" NFFT__FES__
";\n", N, M, NFFT_M(creal)(sigma),
539 NFFT_M(cimag)(sigma));
542 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) *
sizeof(NFFT_C));
544 for (n = 8; n <= 128; n += 4)
547 for (pi = 0; pi < 3; pi++)
553 NFFT_CSWAP(swap_dgt, my_plan.
f);
555 NFFT_CSWAP(swap_dgt, my_plan.
f);
559 printf(
"%1.3" NFFT__FES__
"\t",
560 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
576 int main(
int argc,
char **argv)
580 fprintf(stderr,
"fastgauss type\n");
581 fprintf(stderr,
" type\n");
582 fprintf(stderr,
" 0 - Simple test.\n");
583 fprintf(stderr,
" 1 - Compares accuracy and execution time.\n");
584 fprintf(stderr,
" Pipe to output_andersson.tex\n");
585 fprintf(stderr,
" 2 - Compares accuracy.\n");
586 fprintf(stderr,
" Pipe to output_error.m\n");
587 fprintf(stderr,
" 3 - Compares accuracy.\n");
588 fprintf(stderr,
" Pipe to output_error_p.m\n");
592 if (atoi(argv[1]) == 0)
593 fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));
595 if (atoi(argv[1]) == 1)
598 if (atoi(argv[1]) == 2)
601 if (atoi(argv[1]) == 3)
static void fgt_test_andersson(void)
Compares accuracy and execution time of the fast Gauss transform with increasing expansion degree...
static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, unsigned flags)
Initialisation of a transform plan, guru.
unsigned flags
flags for precomputation and approximation type
#define FGT_APPROX_B
If this flag is set, the discrete Fourier coefficients of the uniformly sampled Gaussian are used ins...
NFFT_R p
period, at least 1
NFFT_C * f
target evaluations
#define DGT_PRE_CEXP
If this flag is set, the whole matrix is precomputed and stored for the discrete Gauss transfrom...
static void fgt_test_init_rand(fgt_plan *ths)
Random initialisation of a fgt plan.
int main(int argc, char **argv)
Different tests of the fast Gauss transform.
int N
number of source nodes
static void fgt_finalize(fgt_plan *ths)
Destroys the transform plan.
Structure for the Gauss transform.
int M
number of target nodes
NFFT_C sigma
parameter of the Gaussian
NFFT_R * y
target nodes in
NFFT_C * b
expansion coefficients
static void fgt_init_node_dependent(fgt_plan *ths)
Initialisation of a transform plan, depends on source and target nodes.
static void fgt_trafo(fgt_plan *ths)
Executes the fast Gauss transform.
static void fgt_test_error_p(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree and different periodis...
static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
Compares execution times for the fast and discrete Gauss transform.
#define FGT_NDFT
If this flag is set, the fast Gauss transform uses the discrete instead of the fast Fourier transform...
NFFT_C * alpha
source coefficients
NFFT_R * x
source nodes in
NFFT_C * pre_cexp
precomputed values for dgt
static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
Simple example that computes fast and discrete Gauss transforms.
static void fgt_test_error(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree.
static void dgt_trafo(fgt_plan *ths)
Executes the discrete Gauss transform.
static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
Initialisation of a transform plan, simple.