NFFT  3.4.1
fastsum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include "config.h"
26 
27 #include <stdlib.h>
28 #include <math.h>
29 #ifdef HAVE_COMPLEX_H
30 #include <complex.h>
31 #endif
32 
33 #include "nfft3.h"
34 #include "fastsum.h"
35 #include "infft.h"
36 
37 // Required for test if (ths->k == one_over_x)
38 #include "kernels.h"
39 
46 static int max_i(int a, int b)
47 {
48  return a >= b ? a : b;
49 }
50 
52 static R fak(int n)
53 {
54  if (n <= 1)
55  return K(1.0);
56  else
57  return (R)(n) * fak(n - 1);
58 }
59 
61 static R binom(int n, int m)
62 {
63  return fak(n) / fak(m) / fak(n - m);
64 }
65 
67 static R BasisPoly(int m, int r, R xx)
68 {
69  int k;
70  R sum = K(0.0);
71 
72  for (k = 0; k <= m - r; k++)
73  {
74  sum += binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k);
75  }
76  return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1))
77  / (R)(1 << (m + 1)) / fak(r); /* 1<<(m+1) = 2^(m+1) */
78 }
79 
81 C regkern(kernel k, R xx, int p, const R *param, R a, R b)
82 {
83  int r;
84  C sum = K(0.0);
85 
86  if (xx < -K(0.5))
87  xx = -K(0.5);
88  if (xx > K(0.5))
89  xx = K(0.5);
90  if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
91  {
92  return k(xx, 0, param);
93  }
94  else if (xx < -K(0.5) + b)
95  {
96  sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
97  * BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b);
98  for (r = 0; r < p; r++)
99  {
100  sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param)
101  * BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b);
102  }
103  return sum;
104  }
105  else if ((xx > -a) && (xx < a))
106  {
107  for (r = 0; r < p; r++)
108  {
109  sum +=
110  POW(a, (R) r)
111  * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
112  + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
113  * (r & 1 ? -1 : 1));
114  }
115  return sum;
116  }
117  else if (xx > K(0.5) - b)
118  {
119  sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
120  * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
121  for (r = 0; r < p; r++)
122  {
123  sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
124  * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
125  }
126  return sum;
127  }
128  return k(xx, 0, param);
129 }
130 
134 static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
135 {
136  int r;
137  C sum = K(0.0);
138 
139  if (xx < -K(0.5))
140  xx = -K(0.5);
141  if (xx > K(0.5))
142  xx = K(0.5);
143  if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
144  {
145  return k(xx, 0, param);
146  }
147  else if ((xx > -a) && (xx < a))
148  {
149  for (r = 0; r < p; r++)
150  {
151  sum +=
152  POW(a, (R) r)
153  * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
154  + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
155  * (r & 1 ? -1 : 1));
156  }
157  return sum;
158  }
159  else if (xx < -K(0.5) + b)
160  {
161  for (r = 0; r < p; r++)
162  {
163  sum += POW(b, (R) r)
164  * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx + K(0.5)) / b)
165  + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx + K(0.5)) / b)
166  * (r & 1 ? -1 : 1));
167  }
168  return sum;
169  }
170  else if (xx > K(0.5) - b)
171  {
172  for (r = 0; r < p; r++)
173  {
174  sum += POW(b, (R) r)
175  * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx - K(0.5)) / b)
176  + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx - K(0.5)) / b)
177  * (r & 1 ? -1 : 1));
178  }
179  return sum;
180  }
181  return k(xx, 0, param);
182 }
183 
185 //static C regkern2(kernel k, R xx, int p, const R *param, R a, R b)
186 //{
187 // int r;
188 // C sum = K(0.0);
189 //
190 // xx = FABS(xx);
191 //
192 // if (xx > K(0.5))
193 // {
194 // for (r = 0; r < p; r++)
195 // {
196 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
197 // * (BasisPoly(p - 1, r, 0) + BasisPoly(p - 1, r, 0));
198 // }
199 // return sum;
200 // }
201 // else if ((a <= xx) && (xx <= K(0.5) - b))
202 // {
203 // return k(xx, 0, param);
204 // }
205 // else if (xx < a)
206 // {
207 // for (r = 0; r < p; r++)
208 // {
209 // sum += POW(-a, (R) r) * k(a, r, param)
210 // * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
211 // }
212 // return sum;
213 // }
214 // else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
215 // {
216 // for (r = 0; r < p; r++)
217 // {
218 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
219 // * (BasisPoly(p - 1, r, (xx - K(0.5)) / b)
220 // + BasisPoly(p - 1, r, -(xx - K(0.5)) / b));
221 // }
222 // return sum;
223 // }
224 // return K(0.0);
225 //}
226 
230 static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
231 {
232  int r;
233  C sum = K(0.0);
234 
235  xx = FABS(xx);
236 
237  if (xx >= K(0.5))
238  {
239  /*return kern(typ,c,0,K(0.5));*/
240  xx = K(0.5);
241  }
242  /* else */
243  if ((a <= xx) && (xx <= K(0.5) - b))
244  {
245  return k(xx, 0, param);
246  }
247  else if (xx < a)
248  {
249  for (r = 0; r < p; r++)
250  {
251  sum += POW(-a, (R) r) * k(a, r, param)
252  * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
253  }
254  /*sum=kern(typ,c,0,xx); */
255  return sum;
256  }
257  else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
258  {
259  sum = k(K(0.5), 0, param) * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
260  /* sum=regkern2(typ,c,p,a,b, K(0.5))*BasisPoly(p-1,0,-K(2.0)*xx/b+(K(1.0)-b)/b); */
261  for (r = 0; r < p; r++)
262  {
263  sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
264  * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
265  }
266  return sum;
267  }
268  return K(0.0);
269 }
270 
272 //static C linintkern(const R x, const C *Add, const int Ad, const R a)
273 //{
274 // R c, c1, c3;
275 // int r;
276 // C f1, f2;
277 //
278 // c = x * Ad / a;
279 // r = (int)(LRINT(c));
280 // r = abs(r);
281 // f1 = Add[r];
282 // f2 = Add[r + 1];
283 // c = FABS(c);
284 // c1 = c - r;
285 // c3 = c1 - K(1.0);
286 // return (-f1 * c3 + f2 * c1);
287 //}
288 //
289 //static C quadrintkern(const R x, const C *Add, const int Ad, const R a)
290 //{
291 // R c, c1, c2, c3;
292 // int r;
293 // C f0, f1, f2;
294 //
295 // c = x * Ad / a;
296 // r = (int)(LRINT(c));
297 // r = abs(r);
298 // if (r == 0)
299 // {
300 // f0 = Add[r + 1];
301 // f1 = Add[r];
302 // f2 = Add[r + 1];
303 // }
304 // else
305 // {
306 // f0 = Add[r - 1];
307 // f1 = Add[r];
308 // f2 = Add[r + 1];
309 // }
310 // c = FABS(c);
311 // c1 = c - r;
312 // c2 = c1 + K(1.0);
313 // c3 = c1 - K(1.0);
314 // return (f0 * c1 * c3 / K(2.0) - f1 * c2 * c3 + f2 * c2 * c1 / K(2.0));
315 //}
316 
318 C kubintkern(const R x, const C *Add, const int Ad, const R a)
319 {
320  R c, c1, c2, c3, c4;
321  int r;
322  C f0, f1, f2, f3;
323  c = x * (R)(Ad) / a;
324  r = (int)(LRINT(c));
325  r = abs(r);
326  if (r == 0)
327  {
328  f0 = Add[r + 1];
329  f1 = Add[r];
330  f2 = Add[r + 1];
331  f3 = Add[r + 2];
332  }
333  else
334  {
335  f0 = Add[r - 1];
336  f1 = Add[r];
337  f2 = Add[r + 1];
338  f3 = Add[r + 2];
339  }
340  c = FABS(c);
341  c1 = c - (R)(r);
342  c2 = c1 + K(1.0);
343  c3 = c1 - K(1.0);
344  c4 = c1 - K(2.0);
345  /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
346  f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
347  return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
348  - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
349 }
350 
352 static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
353 {
354  R c, c1, c2, c3, c4;
355  int r;
356  C f0, f1, f2, f3;
357  Add += 2;
358  c = (x + a) * (R)(Ad) / K(2.0) / a;
359  r = (int)(LRINT(c));
360  r = abs(r);
361  /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
362  else */
363  {
364  f0 = Add[r - 1];
365  f1 = Add[r];
366  f2 = Add[r + 1];
367  f3 = Add[r + 2];
368  }
369  c = FABS(c);
370  c1 = c - (R)(r);
371  c2 = c1 + K(1.0);
372  c3 = c1 - K(1.0);
373  c4 = c1 - K(2.0);
374  /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
375  f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
376  return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
377  - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
378 }
379 
381 static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
382 {
383  int lpos = 0;
384  int rpos = N - 1;
385  /*R pivot=x[((N-1)/2)*d+t];*/
386  R pivot = x[(N / 2) * d + t];
387 
388  int k;
389  R temp1;
390  C temp2;
391  int temp_int;
392 
393  while (lpos <= rpos)
394  {
395  while (x[lpos * d + t] < pivot)
396  lpos++;
397  while (x[rpos * d + t] > pivot)
398  rpos--;
399  if (lpos <= rpos)
400  {
401  for (k = 0; k < d; k++)
402  {
403  temp1 = x[lpos * d + k];
404  x[lpos * d + k] = x[rpos * d + k];
405  x[rpos * d + k] = temp1;
406  }
407  temp2 = alpha[lpos];
408  alpha[lpos] = alpha[rpos];
409  alpha[rpos] = temp2;
410 
411  if (permutation_x_alpha)
412  {
413  temp_int = permutation_x_alpha[lpos];
414  permutation_x_alpha[lpos] = permutation_x_alpha[rpos];
415  permutation_x_alpha[rpos] = temp_int;
416  }
417 
418  lpos++;
419  rpos--;
420  }
421  }
422  if (0 < rpos)
423  quicksort(d, t, x, alpha, permutation_x_alpha, rpos + 1);
424  if (lpos < N - 1)
425  quicksort(d, t, x + lpos * d, alpha + lpos, permutation_x_alpha ? permutation_x_alpha + lpos : NULL, N - lpos);
426 }
427 
429 static void BuildBox(fastsum_plan *ths)
430 {
431  int t, l;
432  int *box_index;
433  R val[ths->d];
434 
435  box_index = (int *) NFFT(malloc)((size_t)(ths->box_count) * sizeof(int));
436  for (t = 0; t < ths->box_count; t++)
437  box_index[t] = 0;
438 
439  for (l = 0; l < ths->N_total; l++)
440  {
441  int ind = 0;
442  for (t = 0; t < ths->d; t++)
443  {
444  val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
445  ind *= ths->box_count_per_dim;
446  ind += (int) (val[t] / ths->eps_I);
447  }
448  box_index[ind]++;
449  }
450 
451  ths->box_offset[0] = 0;
452  for (t = 1; t <= ths->box_count; t++)
453  {
454  ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1];
455  box_index[t - 1] = ths->box_offset[t - 1];
456  }
457 
458  for (l = 0; l < ths->N_total; l++)
459  {
460  int ind = 0;
461  for (t = 0; t < ths->d; t++)
462  {
463  val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
464  ind *= ths->box_count_per_dim;
465  ind += (int) (val[t] / ths->eps_I);
466  }
467 
468  ths->box_alpha[box_index[ind]] = ths->alpha[l];
469 
470  for (t = 0; t < ths->d; t++)
471  {
472  ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
473  }
474  box_index[ind]++;
475  }
476  NFFT(free)(box_index);
477 }
478 
480 static inline C calc_SearchBox(int d, R *y, R *x, C *alpha, int start,
481  int end_lt, const C *Add, const int Ad, int p, R a, const kernel k,
482  const R *param, const unsigned flags)
483 {
484  C result = K(0.0);
485 
486  int m, l;
487  R r;
488 
489  for (m = start; m < end_lt; m++)
490  {
491  if (d == 1)
492  {
493  r = y[0] - x[m];
494  }
495  else
496  {
497  r = K(0.0);
498  for (l = 0; l < d; l++)
499  r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]);
500  r = SQRT(r);
501  }
502  if (FABS(r) < a)
503  {
504  result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
505  if (d == 1)
506  {
507  if (flags & EXACT_NEARFIELD)
508  result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
509  else
510  result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
511  }
512  else
513  {
514  if (flags & EXACT_NEARFIELD)
515  result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
516  else
517 #if defined(NF_KUB)
518  result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
519 #elif defined(NF_QUADR)
520  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
521 #elif defined(NF_LIN)
522  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
523 #else
524 #error define interpolation method
525 #endif
526  }
527  }
528  }
529  return result;
530 }
531 
533 static C SearchBox(R *y, fastsum_plan *ths)
534 {
535  C val = K(0.0);
536  int t;
537  int y_multiind[ths->d];
538  int multiindex[ths->d];
539  int y_ind;
540 
541  for (t = 0; t < ths->d; t++)
542  {
543  y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->eps_B / K(2.0)) / ths->eps_I));
544  }
545 
546  if (ths->d == 1)
547  {
548  for (y_ind = max_i(0, y_multiind[0] - 1);
549  y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++)
550  {
551  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
552  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, ths->Ad,
553  ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
554  }
555  }
556  else if (ths->d == 2)
557  {
558  for (multiindex[0] = max_i(0, y_multiind[0] - 1);
559  multiindex[0] < ths->box_count_per_dim
560  && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
561  for (multiindex[1] = max_i(0, y_multiind[1] - 1);
562  multiindex[1] < ths->box_count_per_dim
563  && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
564  {
565  y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
566  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
567  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
568  ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
569  }
570  }
571  else if (ths->d == 3)
572  {
573  for (multiindex[0] = max_i(0, y_multiind[0] - 1);
574  multiindex[0] < ths->box_count_per_dim
575  && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
576  for (multiindex[1] = max_i(0, y_multiind[1] - 1);
577  multiindex[1] < ths->box_count_per_dim
578  && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
579  for (multiindex[2] = max_i(0, y_multiind[2] - 1);
580  multiindex[2] < ths->box_count_per_dim
581  && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++)
582  {
583  y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1])
584  * ths->box_count_per_dim + multiindex[2];
585  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
586  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
587  ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param,
588  ths->flags);
589  }
590  }
591  else
592  {
593  return 0.0/0.0; //exit(EXIT_FAILURE);
594  }
595  return val;
596 }
597 
599 static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
600 {
601  if (N > 1)
602  {
603  int m = N / 2;
604 
605  quicksort(d, t, x, alpha, permutation_x_alpha, N);
606 
607  BuildTree(d, (t + 1) % d, x, alpha, permutation_x_alpha, m);
608  BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), permutation_x_alpha ? permutation_x_alpha + (m + 1) : NULL, N - m - 1);
609  }
610 }
611 
613 static C SearchTree(const int d, const int t, const R *x, const C *alpha,
614  const R *xmin, const R *xmax, const int N, const kernel k, const R *param,
615  const int Ad, const C *Add, const int p, const unsigned flags)
616 {
617  if (N == 0)
618  {
619  return K(0.0);
620  }
621  else
622  {
623  int m = N / 2;
624  R Min = xmin[t];
625  R Max = xmax[t];
626  R Median = x[m * d + t];
627  R a = FABS(Max - Min) / 2;
628  int l;
629  int E = 0;
630  R r;
631 
632  if (Min > Median)
633  return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
634  xmax, N - m - 1, k, param, Ad, Add, p, flags);
635  else if (Max < Median)
636  return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad,
637  Add, p, flags);
638  else
639  {
640  C result = K(0.0);
641  E = 0;
642 
643  for (l = 0; l < d; l++)
644  {
645  if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l])
646  E++;
647  }
648 
649  if (E == d)
650  {
651  if (d == 1)
652  {
653  r = xmin[0] + a - x[m]; /* remember: xmin+a = y */
654  }
655  else
656  {
657  r = K(0.0);
658  for (l = 0; l < d; l++)
659  r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]); /* remember: xmin+a = y */
660  r = SQRT(r);
661  }
662  if (FABS(r) < a)
663  {
664  result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
665  if (d == 1)
666  {
667  if (flags & EXACT_NEARFIELD)
668  result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
669  else
670  result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
671  }
672  else
673  {
674  if (flags & EXACT_NEARFIELD)
675  result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
676  else
677 #if defined(NF_KUB)
678  result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
679 #elif defined(NF_QUADR)
680  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
681 #elif defined(NF_LIN)
682  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
683 #else
684 #error define interpolation method
685 #endif
686  }
687  }
688  }
689  result += SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
690  xmax, N - m - 1, k, param, Ad, Add, p, flags);
691  result += SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add,
692  p, flags);
693  return result;
694  }
695  }
696 }
697 
698 static void fastsum_precompute_kernel(fastsum_plan *ths)
699 {
700  int j, k, t;
701  INT N[ths->d];
702  int n_total;
703 #ifdef MEASURE_TIME
704  ticks t0, t1;
705 #endif
706 
707  ths->MEASURE_TIME_t[0] = K(0.0);
708 
709 #ifdef MEASURE_TIME
710  t0 = getticks();
711 #endif
712 
713  if (!(ths->flags & EXACT_NEARFIELD))
714  {
715  if (ths->d == 1)
716 #ifdef _OPENMP
717  #pragma omp parallel for default(shared) private(k)
718 #endif
719  for (k = -ths->Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++)
720  ths->Add[k + ths->Ad / 2 + 2] = regkern1(ths->k,
721  ths->eps_I * (R) k / (R)(ths->Ad) * K(2.0), ths->p, ths->kernel_param,
722  ths->eps_I, ths->eps_B);
723  else
724 #ifdef _OPENMP
725  #pragma omp parallel for default(shared) private(k)
726 #endif
727  for (k = 0; k <= ths->Ad + 2; k++)
728  ths->Add[k] = regkern3(ths->k, ths->eps_I * (R) k / (R)(ths->Ad), ths->p,
729  ths->kernel_param, ths->eps_I, ths->eps_B);
730  }
731 #ifdef MEASURE_TIME
732  t1 = getticks();
733  ths->MEASURE_TIME_t[0] += NFFT(elapsed_seconds)(t1,t0);
734 #endif
735 
736 #ifdef MEASURE_TIME
737  t0 = getticks();
738 #endif
739 
740  n_total = 1;
741  for (t = 0; t < ths->d; t++)
742  n_total *= ths->n;
743 
744 #ifdef _OPENMP
745  #pragma omp parallel for default(shared) private(j,k,t)
746 #endif
747  for (j = 0; j < n_total; j++)
748  {
749  if (ths->d == 1)
750  ths->b[j] = regkern1(ths->k, (R) - (j / (R)(ths->n) - K(0.5)), ths->p,
751  ths->kernel_param, ths->eps_I, ths->eps_B) / (R)(n_total);
752  else
753  {
754  k = j;
755  ths->b[j] = K(0.0);
756  for (t = 0; t < ths->d; t++)
757  {
758  ths->b[j] += ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5))
759  * ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5));
760  k = k / (ths->n);
761  }
762  ths->b[j] = regkern3(ths->k, SQRT(CREAL(ths->b[j])), ths->p, ths->kernel_param,
763  ths->eps_I, ths->eps_B) / (R)(n_total);
764  }
765  }
766 
767  for (t = 0; t < ths->d; t++)
768  N[t] = ths->n;
769 
770  NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
771  FFTW(execute)(ths->fft_plan);
772  NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
773 #ifdef MEASURE_TIME
774  t1 = getticks();
775  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
776 #endif
777 }
778 
779 void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param,
780  unsigned flags, int nn, int p, R eps_I, R eps_B)
781 {
782  int t;
783  int N[d];
784  int n_total;
785 #ifdef _OPENMP
786  int nthreads = NFFT(get_num_threads)();
787 #endif
788 
789  ths->d = d;
790 
791  ths->k = k;
792  ths->kernel_param = param;
793 
794  ths->flags = flags;
795 
796  ths->p = p;
797  ths->eps_I = eps_I; /* =(R)ths->p/(R)nn; */
798  ths->eps_B = eps_B; /* =K(1.0)/K(16.0); */
801  if (!(ths->flags & EXACT_NEARFIELD))
802  {
803  if (ths->d == 1)
804  {
805  ths->Ad = 4 * (ths->p) * (ths->p);
806  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 5) * (sizeof(C)));
807  }
808  else
809  {
810  if (ths->k == one_over_x)
811  {
812  R delta = K(1e-8);
813  switch (p)
814  {
815  case 2:
816  delta = K(1e-3);
817  break;
818  case 3:
819  delta = K(1e-4);
820  break;
821  case 4:
822  delta = K(1e-5);
823  break;
824  case 5:
825  delta = K(1e-6);
826  break;
827  case 6:
828  delta = K(1e-6);
829  break;
830  case 7:
831  delta = K(1e-7);
832  break;
833  default:
834  delta = K(1e-8);
835  }
836 
837 #if defined(NF_KUB)
838  ths->Ad = max_i(10, (int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0))))));
839  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
840 #elif defined(NF_QUADR)
841  ths->Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0)))));
842  ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
843 #elif defined(NF_LIN)
844  ths->Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0)))));
845  ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
846 #else
847 #error define NF_LIN or NF_QUADR or NF_KUB
848 #endif
849  }
850  else
851  {
852  ths->Ad = 2 * (ths->p) * (ths->p);
853  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
854  }
855  }
856  }
857 
858  ths->n = nn;
859  for (t = 0; t < d; t++)
860  {
861  N[t] = nn;
862  }
863 
865  n_total = 1;
866  for (t = 0; t < d; t++)
867  n_total *= nn;
868 
869  ths->b = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
870  ths->f_hat = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
871 #ifdef _OPENMP
872  #pragma omp critical (nfft_omp_critical_fftw_plan)
873  {
874  FFTW(plan_with_nthreads)(nthreads);
875 #endif
876 
877  ths->fft_plan = FFTW(plan_dft)(d, N, ths->b, ths->b, FFTW_FORWARD,
878  FFTW_ESTIMATE);
879 
880 #ifdef _OPENMP
881 }
882 #endif
883 
884  fastsum_precompute_kernel(ths);
885 }
886 
887 void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
888 {
889  int t;
890  int N[ths->d], n[ths->d];
891  unsigned sort_flags_adjoint = 0U;
892 
893  if (ths->d > 1)
894  {
895 #ifdef _OPENMP
896  sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
897 #else
898  sort_flags_adjoint = NFFT_SORT_NODES;
899 #endif
900  }
901 
902  ths->N_total = N_total;
903 
904  ths->x = (R *) NFFT(malloc)((size_t)(ths->d * N_total) * (sizeof(R)));
905  ths->alpha = (C *) NFFT(malloc)((size_t)(N_total) * (sizeof(C)));
906 
908  for (t = 0; t < ths->d; t++)
909  {
910  N[t] = ths->n;
911  n[t] = nn_oversampled;
912  }
913 
914  NFFT(init_guru)(&(ths->mv1), ths->d, N, N_total, n, m,
915  sort_flags_adjoint |
916  PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
918  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
919  ths->mv1.x = ths->x;
920  ths->mv1.f = ths->alpha;
921  ths->mv1.f_hat = ths->f_hat;
922 
923  ths->permutation_x_alpha = NULL;
924 
925  if (ths->flags & NEARFIELD_BOXES)
926  {
927  ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->eps_B) / ths->eps_I))) + 1;
928  ths->box_count = 1;
929  for (t = 0; t < ths->d; t++)
930  ths->box_count *= ths->box_count_per_dim;
931 
932  ths->box_offset = (int *) NFFT(malloc)((size_t)(ths->box_count + 1) * sizeof(int));
933 
934  ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(C)));
935 
936  ths->box_x = (R *) NFFT(malloc)((size_t)(ths->d * ths->N_total) * sizeof(R));
937  }
938  else
939  {
940  if (ths->flags & STORE_PERMUTATION_X_ALPHA)
941  {
942  ths->permutation_x_alpha = (int *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(int)));
943  for (int i=0; i<ths->N_total; i++)
944  ths->permutation_x_alpha[i] = i;
945  }
946  }
947 }
948 
949 void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
950 {
951  int t;
952  int N[ths->d], n[ths->d];
953  unsigned sort_flags_trafo = 0U;
954 
955  if (ths->d > 1)
956  sort_flags_trafo = NFFT_SORT_NODES;
957 
958  ths->M_total = M_total;
959 
960  ths->y = (R *) NFFT(malloc)((size_t)(ths->d * M_total) * (sizeof(R)));
961  ths->f = (C *) NFFT(malloc)((size_t)(M_total) * (sizeof(C)));
962 
964  for (t = 0; t < ths->d; t++)
965  {
966  N[t] = ths->n;
967  n[t] = nn_oversampled;
968  }
969 
970  NFFT(init_guru)(&(ths->mv2), ths->d, N, M_total, n, m,
971  sort_flags_trafo |
972  PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
974  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
975  ths->mv2.x = ths->y;
976  ths->mv2.f = ths->f;
977  ths->mv2.f_hat = ths->f_hat;
978 }
979 
981 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total,
982  kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
983 {
984  fastsum_init_guru_kernel(ths, d, k, param, flags, nn, p, eps_I, eps_B);
985  fastsum_init_guru_source_nodes(ths, N_total, 2*nn, m);
986  fastsum_init_guru_target_nodes(ths, M_total, 2*nn, m);
987 }
988 
991 {
992  NFFT(free)(ths->x);
993  NFFT(free)(ths->alpha);
994 
995  NFFT(finalize)(&(ths->mv1));
996 
997  if (ths->flags & NEARFIELD_BOXES)
998  {
999  NFFT(free)(ths->box_offset);
1000  NFFT(free)(ths->box_alpha);
1001  NFFT(free)(ths->box_x);
1002  }
1003  else
1004  {
1005  if (ths->flags & STORE_PERMUTATION_X_ALPHA)
1006  NFFT(free)(ths->permutation_x_alpha);
1007  }
1008 }
1009 
1012 {
1013  NFFT(free)(ths->y);
1014  NFFT(free)(ths->f);
1015 
1016  NFFT(finalize)(&(ths->mv2));
1017 }
1018 
1021 {
1022  if (!(ths->flags & EXACT_NEARFIELD))
1023  NFFT(free)(ths->Add);
1024 
1025 #ifdef _OPENMP
1026  #pragma omp critical (nfft_omp_critical_fftw_plan)
1027  {
1028 #endif
1029  FFTW(destroy_plan)(ths->fft_plan);
1030 #ifdef _OPENMP
1031 }
1032 #endif
1033 
1034  NFFT(free)(ths->b);
1035  NFFT(free)(ths->f_hat);
1036 }
1037 
1040 {
1044 }
1045 
1048 {
1049  int j, k;
1050  int t;
1051  R r;
1052 
1053 #ifdef _OPENMP
1054  #pragma omp parallel for default(shared) private(j,k,t,r)
1055 #endif
1056  for (j = 0; j < ths->M_total; j++)
1057  {
1058  ths->f[j] = K(0.0);
1059  for (k = 0; k < ths->N_total; k++)
1060  {
1061  if (ths->d == 1)
1062  r = ths->y[j] - ths->x[k];
1063  else
1064  {
1065  r = K(0.0);
1066  for (t = 0; t < ths->d; t++)
1067  r += (ths->y[j * ths->d + t] - ths->x[k * ths->d + t])
1068  * (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]);
1069  r = SQRT(r);
1070  }
1071  ths->f[j] += ths->alpha[k] * ths->k(r, 0, ths->kernel_param);
1072  }
1073  }
1074 }
1075 
1078 {
1079 #ifdef MEASURE_TIME
1080  ticks t0, t1;
1081 #endif
1082 
1083  ths->MEASURE_TIME_t[1] = K(0.0);
1084  ths->MEASURE_TIME_t[3] = K(0.0);
1085 
1086 #ifdef MEASURE_TIME
1087  t0 = getticks();
1088 #endif
1089 
1090  if (ths->flags & NEARFIELD_BOXES)
1091  {
1092  BuildBox(ths);
1093  }
1094  else
1095  {
1098  BuildTree(ths->d, 0, ths->x, ths->alpha, ths->permutation_x_alpha, ths->N_total);
1099  }
1100 
1101 #ifdef MEASURE_TIME
1102  t1 = getticks();
1103  ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
1104 #endif
1105 
1106 #ifdef MEASURE_TIME
1107  t0 = getticks();
1108 #endif
1109 
1110 // for (k = 0; k < ths->mv1.M_total; k++)
1111 // for (t = 0; t < ths->mv1.d; t++)
1112 // ths->mv1.x[ths->mv1.d * k + t] = -ths->x[ths->mv1.d * k + t]; /* note the factor -1 for transposed transform instead of adjoint*/
1113 
1115  if (ths->mv1.flags & PRE_LIN_PSI)
1116  NFFT(precompute_lin_psi)(&(ths->mv1));
1117 
1118  if (ths->mv1.flags & PRE_PSI)
1119  NFFT(precompute_psi)(&(ths->mv1));
1120 
1121  if (ths->mv1.flags & PRE_FULL_PSI)
1122  NFFT(precompute_full_psi)(&(ths->mv1));
1123 #ifdef MEASURE_TIME
1124  t1 = getticks();
1125  ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
1126 #endif
1127 
1128 // /** init Fourier coefficients */
1129 // for (k = 0; k < ths->mv1.M_total; k++)
1130 // ths->mv1.f[k] = ths->alpha[k];
1131 }
1132 
1135 {
1136 #ifdef MEASURE_TIME
1137  ticks t0, t1;
1138 #endif
1139 
1140  ths->MEASURE_TIME_t[2] = K(0.0);
1141 
1142 #ifdef MEASURE_TIME
1143  t0 = getticks();
1144 #endif
1145 
1146 // for (j = 0; j < ths->mv2.M_total; j++)
1147 // for (t = 0; t < ths->mv2.d; t++)
1148 // ths->mv2.x[ths->mv2.d * j + t] = -ths->y[ths->mv2.d * j + t]; /* note the factor -1 for conjugated transform instead of standard*/
1149 
1151  if (ths->mv2.flags & PRE_LIN_PSI)
1152  NFFT(precompute_lin_psi)(&(ths->mv2));
1153 
1154  if (ths->mv2.flags & PRE_PSI)
1155  NFFT(precompute_psi)(&(ths->mv2));
1156 
1157  if (ths->mv2.flags & PRE_FULL_PSI)
1158  NFFT(precompute_full_psi)(&(ths->mv2));
1159 #ifdef MEASURE_TIME
1160  t1 = getticks();
1161  ths->MEASURE_TIME_t[2] += NFFT(elapsed_seconds)(t1,t0);
1162 #endif
1163 }
1164 
1167 {
1170 }
1171 
1174 {
1175  int j, k, t;
1176 #ifdef MEASURE_TIME
1177  ticks t0, t1;
1178 #endif
1179 
1180  ths->MEASURE_TIME_t[4] = K(0.0);
1181  ths->MEASURE_TIME_t[5] = K(0.0);
1182  ths->MEASURE_TIME_t[6] = K(0.0);
1183  ths->MEASURE_TIME_t[7] = K(0.0);
1184 
1185 #ifdef MEASURE_TIME
1186  t0 = getticks();
1187 #endif
1188 
1189  NFFT(adjoint)(&(ths->mv1));
1190 #ifdef MEASURE_TIME
1191  t1 = getticks();
1192  ths->MEASURE_TIME_t[4] += NFFT(elapsed_seconds)(t1,t0);
1193 #endif
1194 
1195 #ifdef MEASURE_TIME
1196  t0 = getticks();
1197 #endif
1198 
1199 #ifdef _OPENMP
1200  #pragma omp parallel for default(shared) private(k)
1201 #endif
1202  for (k = 0; k < ths->mv2.N_total; k++)
1203  ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
1204 #ifdef MEASURE_TIME
1205  t1 = getticks();
1206  ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
1207 #endif
1208 
1209 #ifdef MEASURE_TIME
1210  t0 = getticks();
1211 #endif
1212 
1213  NFFT(trafo)(&(ths->mv2));
1214 #ifdef MEASURE_TIME
1215  t1 = getticks();
1216  ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
1217 #endif
1218 
1219 #ifdef MEASURE_TIME
1220  t0 = getticks();
1221 #endif
1222 
1223 #ifdef _OPENMP
1224  #pragma omp parallel for default(shared) private(j,k,t)
1225 #endif
1226  for (j = 0; j < ths->M_total; j++)
1227  {
1228  R ymin[ths->d], ymax[ths->d];
1230  if (ths->flags & NEARFIELD_BOXES)
1231  {
1232  ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d * j, ths);
1233  }
1234  else
1235  {
1236  for (t = 0; t < ths->d; t++)
1237  {
1238  ymin[t] = ths->y[ths->d * j + t] - ths->eps_I;
1239  ymax[t] = ths->y[ths->d * j + t] + ths->eps_I;
1240  }
1241  ths->f[j] = ths->mv2.f[j]
1242  + SearchTree(ths->d, 0, ths->x, ths->alpha, ymin, ymax, ths->N_total,
1243  ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
1244  }
1245  /* ths->f[j] = ths->mv2.f[j]; */
1246  /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */
1247  }
1248 
1249 #ifdef MEASURE_TIME
1250  t1 = getticks();
1251  ths->MEASURE_TIME_t[7] += NFFT(elapsed_seconds)(t1,t0);
1252 #endif
1253 }
1254 /* \} */
1255 
1256 /* fastsum.c */
int M_total
number of target knots
Definition: fastsum.h:87
static int max_i(int a, int b)
max
Definition: fastsum.c:46
C regkern(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B smooth to zero
Definition: fastsum.c:81
static R fak(int n)
factorial
Definition: fastsum.c:52
int * permutation_x_alpha
permutation vector of source nodes if STORE_PERMUTATION_X_ALPHA is set
Definition: fastsum.h:130
R eps_B
outer boundary
Definition: fastsum.h:112
#define EXACT_NEARFIELD
Constant symbols.
Definition: fastsum.h:73
static R BasisPoly(int m, int r, R xx)
basis polynomial for regularized kernel
Definition: fastsum.c:67
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
Header file with predefined kernels for the fast summation algorithm.
R * kernel_param
parameters for kernel function
Definition: fastsum.h:96
void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
initialize source nodes dependent part of fast summation plan
Definition: fastsum.c:887
C * Add
spline values
Definition: fastsum.h:119
C * f_hat
Fourier coefficients of nfft plans.
Definition: fastsum.h:108
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition: kernels.c:233
void fastsum_finalize_kernel(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1020
static C SearchBox(R *y, fastsum_plan *ths)
box-based near field correction
Definition: fastsum.c:533
int Ad
near field
Definition: fastsum.h:118
static C calc_SearchBox(int d, R *y, R *x, C *alpha, int start, int end_lt, const C *Add, const int Ad, int p, R a, const kernel k, const R *param, const unsigned flags)
inner computation function for box-based near field correction
Definition: fastsum.c:480
plan for fast summation algorithm
Definition: fastsum.h:80
C * b
expansion coefficients
Definition: fastsum.h:107
static R binom(int n, int m)
binomial coefficient
Definition: fastsum.c:61
#define FFTW_INIT
Definition: nfft3.h:203
void fastsum_precompute_source_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1077
Header file for the fast NFFT-based summation algorithm.
C * alpha
source coefficients
Definition: fastsum.h:89
unsigned flags
flags precomp.
Definition: fastsum.h:98
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition: fastsum.c:1173
int d
api
Definition: fastsum.h:84
static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B periodized (used in 1D)
Definition: fastsum.c:134
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1166
int N_total
number of source knots
Definition: fastsum.h:86
void fastsum_finalize_target_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1011
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:202
#define PRE_LIN_PSI
Definition: nfft3.h:195
static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
cubic spline interpolation in near field with arbitrary kernels
Definition: fastsum.c:352
#define PRE_PSI
Definition: nfft3.h:197
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition: fastsum.h:132
static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel for even kernels with K_I even and K_B mirrored
Definition: fastsum.c:230
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition: fastsum.c:981
C * f
target evaluations
Definition: fastsum.h:90
void fastsum_precompute_target_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1134
static C SearchTree(const int d, const int t, const R *x, const C *alpha, const R *xmin, const R *xmax, const int N, const kernel k, const R *param, const int Ad, const C *Add, const int p, const unsigned flags)
fast search in tree of source knots for near field computation
Definition: fastsum.c:613
void fastsum_finalize_source_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:990
kernel k
kernel function
Definition: fastsum.h:95
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:93
C kubintkern(const R x, const C *Add, const int Ad, const R a)
linear spline interpolation in near field with even kernels
Definition: fastsum.c:318
int n
FS__ - fast summation.
Definition: fastsum.h:106
void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param, unsigned flags, int nn, int p, R eps_I, R eps_B)
initialize node independent part of fast summation plan
Definition: fastsum.c:779
#define PRE_FULL_PSI
Definition: nfft3.h:198
void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
initialize target nodes dependent part of fast summation plan
Definition: fastsum.c:949
Header file for the nfft3 library.
int p
degree of smoothness of regularization
Definition: fastsum.h:110
#define PRE_PHI_HUT
Definition: nfft3.h:193
R eps_I
inner boundary
Definition: fastsum.h:111
static void BuildBox(fastsum_plan *ths)
initialize box-based search data structures
Definition: fastsum.c:429
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1039
void fastsum_exact(fastsum_plan *ths)
direct computation of sums
Definition: fastsum.c:1047
static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
quicksort algorithm for source knots and associated coefficients
Definition: fastsum.c:381
static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
recursive sort of source knots dimension by dimension to get tree structure
Definition: fastsum.c:599
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:92