NFFT  3.4.1
fastsumS2.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
24 #include "config.h"
25 
26 /* standard headers */
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <math.h>
30 #include <float.h>
31 #ifdef HAVE_COMPLEX_H
32 #include <complex.h>
33 #endif
34 
35 /* NFFT3 header */
36 #include "nfft3.h"
37 
38 /* NFFT3 utilities */
39 #include "infft.h"
40 
41 /* Fourier-Legendre coefficients for Abel-Poisson kernel */
42 #define SYMBOL_ABEL_POISSON(k,h) (pow(h,k))
43 
44 /* Fourier-Legendre coefficients for singularity kernel */
45 #define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k))
46 
47 /* Flags for the different kernel functions */
48 
50 #define KT_ABEL_POISSON (0)
51 
52 #define KT_SINGULARITY (1)
53 
54 #define KT_LOC_SUPP (2)
55 
56 #define KT_GAUSSIAN (3)
57 
59 enum pvalue {NO = 0, YES = 1, BOTH = 2};
60 
61 static inline int scaled_modified_bessel_i_series(const R x, const R alpha,
62  const int nb, const int ize, R *b)
63 {
64  const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
65  R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0);
66  int n, ncalc = nb;
67 
68  if (enmten < x)
69  halfx = x/K(2.0);
70 
71  if (alpha != K(0.0))
72  tempa = POW(halfx, alpha)/TGAMMA(empal);
73 
74  if (ize == 2)
75  tempa *= EXP(-x);
76 
77  if (K(1.0) < x + K(1.0))
78  tempb = halfx*halfx;
79 
80  b[0] = tempa + tempa*tempb/empal;
81 
82  if (x != K(0.0) && b[0] == K(0.0))
83  ncalc = 0;
84 
85  if (nb == 1)
86  return ncalc;
87 
88  if (K(0.0) < x)
89  {
90  R tempc = halfx, tover = (enmten + enmten)/x;
91 
92  if (tempb != K(0.0))
93  tover = enmten/tempb;
94 
95  for (n = 1; n < nb; n++)
96  {
97  tempa /= empal;
98  empal += K(1.0);
99  tempa *= tempc;
100 
101  if (tempa <= tover*empal)
102  tempa = K(0.0);
103 
104  b[n] = tempa + tempa*tempb/empal;
105 
106  if (b[n] == K(0.0) && n < ncalc)
107  ncalc = n;
108  }
109  }
110  else
111  for (n = 1; n < nb; n++)
112  b[n] = K(0.0);
113 
114  return ncalc;
115 }
116 
117 static inline void scaled_modified_bessel_i_normalize(const R x,
118  const R alpha, const int nb, const int ize, R *b, const R sum_)
119 {
120  const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
121  R sum = sum_, tempa;
122  int n;
123 
124  /* Normalize, i.e., divide all b[n] by sum */
125  if (alpha != K(0.0))
126  sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
127 
128  if (ize == 1)
129  sum *= EXP(-x);
130 
131  tempa = enmten;
132 
133  if (K(1.0) < sum)
134  tempa *= sum;
135 
136  for (n = 1; n <= nb; n++)
137  {
138  if (b[n-1] < tempa)
139  b[n-1] = K(0.0);
140 
141  b[n-1] /= sum;
142  }
143 }
144 
192 static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
193 {
194  /* machine dependent parameters */
195  /* NSIG - DECIMAL SIGNIFICANCE DESIRED. SHOULD BE SET TO */
196  /* IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */
197  /* BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */
198  /* SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */
199  /* WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */
200  /* WITHOUT INCREASING ACCURACY. THE TRUNCATION ERROR */
201  /* IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */
202  /* ENTEN - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */
203  /* ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */
204  /* ENSIG - 10.0 ** NSIG. */
205  /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */
206  /* K .GE. NSIG/4. */
207  /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */
208  /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2. BEAR */
209  /* IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */
210  /* OF THE BACKWARD RECURSION WILL BE EXECUTED. */
211  /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */
212  /* EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */
213  /* MAGNITUDE OF X WHEN IZE=1. */
214  const int nsig = MANT_DIG + 2;
215  const R enten = nfft_float_property(NFFT_R_MAX);
216  const R ensig = POW(K(10.0),(R)nsig);
217  const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
218  const R xlarge = K(1E4);
219  const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
220 
221  /* System generated locals */
222  int l, n, nend, magx, nbmx, ncalc, nstart;
223  R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
224  emp2al, psavel;
225 
226  magx = LRINT(FLOOR(x));
227 
228  /* return if x, nb, or ize out of range */
229  if ( nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
230  || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
231  return (MIN(nb,0) - 1);
232 
233  /* 2-term ascending series for small x */
234  if (x < rtnsig)
235  return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
236 
237  ncalc = nb;
238  /* forward sweep, Olver's p-sequence */
239 
240  nbmx = nb - magx;
241  n = magx + 1;
242 
243  en = (R) (n+n) + (alpha+alpha);
244  plast = K(1.0);
245  p = en/x;
246 
247  /* significance test */
248  test = ensig + ensig;
249 
250  if ((5*nsig) < (magx << 1))
251  test = SQRT(test*p);
252  else
253  test /= POW(K(1.585),(R)magx);
254 
255  if (3 <= nbmx)
256  {
257  /* calculate p-sequence until n = nb-1 */
258  tover = enten/ensig;
259  nstart = magx+2;
260  nend = nb - 1;
261 
262  for (n = nstart; n <= nend; n++)
263  {
264  en += K(2.0);
265  pold = plast;
266  plast = p;
267  p = en*plast/x + pold;
268  if (p > tover)
269  {
270  /* divide p-sequence by tover to avoid overflow. Calculate p-sequence
271  * until 1 <= |p| */
272  tover = enten;
273  p /= tover;
274  plast /= tover;
275  psave = p;
276  psavel = plast;
277  nstart = n + 1;
278 
279  do
280  {
281  n++;
282  en += K(2.0);
283  pold = plast;
284  plast = p;
285  p = en*plast/x + pold;
286  } while (p <= K(1.0));
287 
288  tempb = en/x;
289 
290  /* Backward test. Find ncalc as the largest n such that test is passed. */
291  test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
292  p = plast*tover;
293  n--;
294  en -= K(2.0);
295  nend = MIN(nb,n);
296 
297  for (ncalc = nstart; ncalc <= nend; ncalc++)
298  {
299  pold = psavel;
300  psavel = psave;
301  psave = en*psavel/x + pold;
302  if (test < psave * psavel)
303  break;
304  }
305 
306  ncalc--;
307  goto L80;
308  }
309  }
310 
311  n = nend;
312  en = (R) (n+n) + (alpha+alpha);
313 
314  /* special significance test for 2 <= nbmx */
315  test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
316  }
317 
318  /* calculate p-sequence until significance test is passed */
319  do
320  {
321  n++;
322  en += K(2.0);
323  pold = plast;
324  plast = p;
325  p = en*plast/x + pold;
326  } while (p < test);
327 
328  /* Initialize backward recursion and normalization sum. */
329 L80:
330  n++;
331  en += K(2.0);
332  tempb = K(0.0);
333  tempa = K(1.0)/p;
334  em = (R)(n-1);
335  empal = em + alpha;
336  emp2al = em - K(1.0) + (alpha+alpha);
337  sum = tempa*empal*emp2al/em;
338  nend = n-nb;
339 
340  if (nend < 0)
341  {
342  /* We have n <= nb. So store b[n] and set higher orders to zero */
343  b[n-1] = tempa;
344  nend = -nend;
345  for (l = 1; l <= nend; ++l)
346  b[n-1 + l] = K(0.0);
347  }
348  else
349  {
350  if (nend != 0)
351  {
352  /* recur backward via difference equation, calculating b[n] until n = nb */
353  for (l = 1; l <= nend; ++l)
354  {
355  n--;
356  en -= K(2.0);
357  tempc = tempb;
358  tempb = tempa;
359  tempa = en*tempb/x + tempc;
360  em -= K(1.0);
361  emp2al -= K(1.0);
362 
363  if (n == 1)
364  break;
365 
366  if (n == 2)
367  emp2al = K(1.0);
368 
369  empal -= K(1.0);
370  sum = (sum + tempa*empal)*emp2al/em;
371  }
372  }
373 
374  /* store b[nb] */
375  b[n-1] = tempa;
376 
377  if (nb <= 1)
378  {
379  sum = sum + sum + tempa;
380  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
381  return ncalc;
382  }
383 
384  /* calculate and store b[nb-1] */
385  n--;
386  en -= 2.0;
387  b[n-1] = en*tempa/x + tempb;
388 
389  if (n == 1)
390  {
391  sum = sum + sum + b[0];
392  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
393  return ncalc;
394  }
395 
396  em -= K(1.0);
397  emp2al -= K(1.0);
398 
399  if (n == 2)
400  emp2al = K(1.0);
401 
402  empal -= K(1.0);
403  sum = (sum + b[n-1]*empal)*emp2al/em;
404  }
405 
406  nend = n - 2;
407 
408  if (nend != 0)
409  {
410  /* Calculate and store b[n] until n = 2. */
411  for (l = 1; l <= nend; ++l)
412  {
413  n--;
414  en -= K(2.0);
415  b[n-1] = en*b[n]/x + b[n+1];
416  em -= K(1.0);
417  emp2al -= K(1.0);
418 
419  if (n == 2)
420  emp2al = K(1.0);
421 
422  empal -= K(1.0);
423  sum = (sum + b[n-1]*empal)*emp2al/em;
424  }
425  }
426 
427  /* calculate b[1] */
428  b[0] = K(2.0)*empal*b[1]/x + b[2];
429  sum = sum + sum + b[0];
430 
431  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
432  return ncalc;
433 }
434 
449 static inline double innerProduct(const double phi1, const double theta1,
450  const double phi2, const double theta2)
451 {
452  double pi2theta1 = K2PI*theta1, pi2theta2 = K2PI*theta2;
453  return (cos(pi2theta1)*cos(pi2theta2)
454  + sin(pi2theta1)*sin(pi2theta2)*cos(K2PI*(phi1-phi2)));
455 }
456 
468 static inline double poissonKernel(const double x, const double h)
469 {
470  return (1.0/(K4PI))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0);
471 }
472 
484 static inline double singularityKernel(const double x, const double h)
485 {
486  return (1.0/(K2PI))/sqrt(1.0-2.0*h*x+h*h);
487 }
488 
502 static inline double locallySupportedKernel(const double x, const double h,
503  const double lambda)
504 {
505  return (x<=h)?(0.0):(pow((x-h),lambda));
506 }
507 
520 static inline double gaussianKernel(const double x, const double sigma)
521 {
522  return exp(2.0*sigma*(x-1.0));
523 }
524 
535 int main (int argc, char **argv)
536 {
537  double **p; /* The array containing the parameter sets *
538  * for the kernel functions */
539  int *m; /* The array containing the cut-off degrees M */
540  int **ld; /* The array containing the numbers of source *
541  * and target nodes, L and D */
542  int ip; /* Index variable for p */
543  int im; /* Index variable for m */
544  int ild; /* Index variable for l */
545  int ipp; /* Index for kernel parameters */
546  int ip_max; /* The maximum index for p */
547  int im_max; /* The maximum index for m */
548  int ild_max; /* The maximum index for l */
549  int ipp_max; /* The maximum index for ip */
550  int tc_max; /* The number of testcases */
551  int m_max; /* The maximum cut-off degree M for the *
552  * current dataset */
553  int l_max; /* The maximum number of source nodes L for *
554  * the current dataset */
555  int d_max; /* The maximum number of target nodes D for *
556  * the current dataset */
557  long ld_max_prec; /* The maximum number of source and target *
558  * nodes for precomputation multiplied */
559  long l_max_prec; /* The maximum number of source nodes for *
560  * precomputation */
561  int tc; /* Index variable for testcases */
562  int kt; /* The kernel function */
563  int cutoff; /* The current NFFT cut-off parameter */
564  double threshold; /* The current NFSFT threshold parameter */
565  double t_d; /* Time for direct algorithm in seconds */
566  double t_dp; /* Time for direct algorithm with *
567  precomputation in seconds */
568  double t_fd; /* Time for fast direct algorithm in seconds */
569  double t_f; /* Time for fast algorithm in seconds */
570  double temp; /* */
571  double err_f; /* Error E_infty for fast algorithm */
572  double err_fd; /* Error E_\infty for fast direct algorithm */
573  ticks t0, t1; /* */
574  int precompute = NO; /* */
575  fftw_complex *ptr; /* */
576  double* steed; /* */
577  fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */
578  fftw_complex *f_hat; /* The spherical Fourier coefficients */
579  fftw_complex *a; /* The Fourier-Legendre coefficients */
580  double *xi; /* Target nodes */
581  double *eta; /* Source nodes */
582  fftw_complex *f_m; /* Approximate function values */
583  fftw_complex *f; /* Exact function values */
584  fftw_complex *prec = NULL; /* */
585  nfsft_plan plan; /* NFSFT plan */
586  nfsft_plan plan_adjoint; /* adjoint NFSFT plan */
587  int i; /* */
588  int k; /* */
589  int n; /* */
590  int d; /* */
591  int l; /* */
592  int use_nfsft; /* */
593  int use_nfft; /* */
594  int use_fpt; /* */
595  int rinc; /* */
596  double constant; /* */
597 
598  /* Read the number of testcases. */
599  fscanf(stdin,"testcases=%d\n",&tc_max);
600  fprintf(stdout,"%d\n",tc_max);
601 
602  /* Process each testcase. */
603  for (tc = 0; tc < tc_max; tc++)
604  {
605  /* Check if the fast transform shall be used. */
606  fscanf(stdin,"nfsft=%d\n",&use_nfsft);
607  fprintf(stdout,"%d\n",use_nfsft);
608  if (use_nfsft != NO)
609  {
610  /* Check if the NFFT shall be used. */
611  fscanf(stdin,"nfft=%d\n",&use_nfft);
612  fprintf(stdout,"%d\n",use_nfft);
613  if (use_nfft != NO)
614  {
615  /* Read the cut-off parameter. */
616  fscanf(stdin,"cutoff=%d\n",&cutoff);
617  fprintf(stdout,"%d\n",cutoff);
618  }
619  else
620  {
621  /* TODO remove this */
622  /* Initialize unused variable with dummy value. */
623  cutoff = 1;
624  }
625  /* Check if the fast polynomial transform shall be used. */
626  fscanf(stdin,"fpt=%d\n",&use_fpt);
627  fprintf(stdout,"%d\n",use_fpt);
628  /* Read the NFSFT threshold parameter. */
629  fscanf(stdin,"threshold=%lf\n",&threshold);
630  fprintf(stdout,"%lf\n",threshold);
631  }
632  else
633  {
634  /* TODO remove this */
635  /* Set dummy values. */
636  cutoff = 3;
637  threshold = 1000000000000.0;
638  }
639 
640  /* Initialize bandwidth bound. */
641  m_max = 0;
642  /* Initialize source nodes bound. */
643  l_max = 0;
644  /* Initialize target nodes bound. */
645  d_max = 0;
646  /* Initialize source nodes bound for precomputation. */
647  l_max_prec = 0;
648  /* Initialize source and target nodes bound for precomputation. */
649  ld_max_prec = 0;
650 
651  /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY,
652  * KT_LOC_SUPP and KT_GAUSSIAN. */
653  fscanf(stdin,"kernel=%d\n",&kt);
654  fprintf(stdout,"%d\n",kt);
655 
656  /* Read the number of parameter sets. */
657  fscanf(stdin,"parameter_sets=%d\n",&ip_max);
658  fprintf(stdout,"%d\n",ip_max);
659 
660  /* Allocate memory for pointers to parameter sets. */
661  p = (double**) nfft_malloc(ip_max*sizeof(double*));
662 
663  /* We now read in the parameter sets. */
664 
665  /* Read number of parameters. */
666  fscanf(stdin,"parameters=%d\n",&ipp_max);
667  fprintf(stdout,"%d\n",ipp_max);
668 
669  for (ip = 0; ip < ip_max; ip++)
670  {
671  /* Allocate memory for the parameters. */
672  p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));
673 
674  /* Read the parameters. */
675  for (ipp = 0; ipp < ipp_max; ipp++)
676  {
677  /* Read the next parameter. */
678  fscanf(stdin,"%lf\n",&p[ip][ipp]);
679  fprintf(stdout,"%lf\n",p[ip][ipp]);
680  }
681  }
682 
683  /* Read the number of cut-off degrees. */
684  fscanf(stdin,"bandwidths=%d\n",&im_max);
685  fprintf(stdout,"%d\n",im_max);
686  m = (int*) nfft_malloc(im_max*sizeof(int));
687 
688  /* Read the cut-off degrees. */
689  for (im = 0; im < im_max; im++)
690  {
691  /* Read cut-off degree. */
692  fscanf(stdin,"%d\n",&m[im]);
693  fprintf(stdout,"%d\n",m[im]);
694  m_max = MAX(m_max,m[im]);
695  }
696 
697  /* Read number of node specifications. */
698  fscanf(stdin,"node_sets=%d\n",&ild_max);
699  fprintf(stdout,"%d\n",ild_max);
700  ld = (int**) nfft_malloc(ild_max*sizeof(int*));
701 
702  /* Read the run specification. */
703  for (ild = 0; ild < ild_max; ild++)
704  {
705  /* Allocate memory for the run parameters. */
706  ld[ild] = (int*) nfft_malloc(5*sizeof(int));
707 
708  /* Read number of source nodes. */
709  fscanf(stdin,"L=%d ",&ld[ild][0]);
710  fprintf(stdout,"%d\n",ld[ild][0]);
711  l_max = MAX(l_max,ld[ild][0]);
712 
713  /* Read number of target nodes. */
714  fscanf(stdin,"D=%d ",&ld[ild][1]);
715  fprintf(stdout,"%d\n",ld[ild][1]);
716  d_max = MAX(d_max,ld[ild][1]);
717 
718  /* Determine whether direct and fast algorithm shall be compared. */
719  fscanf(stdin,"compare=%d ",&ld[ild][2]);
720  fprintf(stdout,"%d\n",ld[ild][2]);
721 
722  /* Check if precomputation for the direct algorithm is used. */
723  if (ld[ild][2] == YES)
724  {
725  /* Read whether the precomputed version shall also be used. */
726  fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
727  fprintf(stdout,"%d\n",ld[ild][3]);
728 
729  /* Read the number of repetitions over which measurements are
730  * averaged. */
731  fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
732  fprintf(stdout,"%d\n",ld[ild][4]);
733 
734  /* Update ld_max_prec and l_max_prec. */
735  if (ld[ild][3] == YES)
736  {
737  /* Update ld_max_prec. */
738  ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
739  /* Update l_max_prec. */
740  l_max_prec = MAX(l_max_prec,ld[ild][0]);
741  /* Turn on the precomputation for the direct algorithm. */
742  precompute = YES;
743  }
744  }
745  else
746  {
747  /* Set default value for the number of repetitions. */
748  ld[ild][4] = 1;
749  }
750  }
751 
752  /* Allocate memory for data structures. */
753  b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
754  eta = (double*) nfft_malloc(2*l_max*sizeof(double));
755  f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
756  a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
757  xi = (double*) nfft_malloc(2*d_max*sizeof(double));
758  f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
759  f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
760 
761  /* Allocate memory for precomputed data. */
762  if (precompute == YES)
763  {
764  prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
765  }
766 
767  /* Generate random source nodes and weights. */
768  for (l = 0; l < l_max; l++)
769  {
770  b[l] = (((double)rand())/RAND_MAX) - 0.5;
771  eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
772  eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
773  }
774 
775  /* Generate random target nodes. */
776  for (d = 0; d < d_max; d++)
777  {
778  xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
779  xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
780  }
781 
782  /* Do precomputation. */
783  nfsft_precompute(m_max,threshold,
784  ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U);
785 
786  /* Process all parameter sets. */
787  for (ip = 0; ip < ip_max; ip++)
788  {
789  /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */
790  switch (kt)
791  {
792  case KT_ABEL_POISSON:
793  /* Compute Fourier-Legendre coefficients for the Poisson kernel. */
794  for (k = 0; k <= m_max; k++)
795  a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
796  break;
797 
798  case KT_SINGULARITY:
799  /* Compute Fourier-Legendre coefficients for the singularity
800  * kernel. */
801  for (k = 0; k <= m_max; k++)
802  a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
803  break;
804 
805  case KT_LOC_SUPP:
806  /* Compute Fourier-Legendre coefficients for the locally supported
807  * kernel. */
808  a[0] = 1.0;
809  if (1 <= m_max)
810  a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
811  for (k = 2; k <= m_max; k++)
812  a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
813  (k-p[ip][1]-2)*a[k-2]);
814  break;
815 
816  case KT_GAUSSIAN:
817  /* Fourier-Legendre coefficients */
818  steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
819  smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
820  for (k = 0; k <= m_max; k++)
821  a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k];
822 
823  nfft_free(steed);
824  break;
825  }
826 
827  /* Normalize Fourier-Legendre coefficients. */
828  for (k = 0; k <= m_max; k++)
829  a[k] *= (2*k+1)/(K4PI);
830 
831  /* Process all node sets. */
832  for (ild = 0; ild < ild_max; ild++)
833  {
834  /* Check if the fast algorithm shall be used. */
835  if (ld[ild][2] != NO)
836  {
837  /* Check if the direct algorithm with precomputation should be
838  * tested. */
839  if (ld[ild][3] != NO)
840  {
841  /* Get pointer to start of data. */
842  ptr = prec;
843  /* Calculate increment from one row to the next. */
844  rinc = l_max_prec-ld[ild][0];
845 
846  /* Process al target nodes. */
847  for (d = 0; d < ld[ild][1]; d++)
848  {
849  /* Process all source nodes. */
850  for (l = 0; l < ld[ild][0]; l++)
851  {
852  /* Compute inner product between current source and target
853  * node. */
854  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
855 
856  /* Switch by the kernel type. */
857  switch (kt)
858  {
859  case KT_ABEL_POISSON:
860  /* Evaluate the Poisson kernel for the current value. */
861  *ptr++ = poissonKernel(temp,p[ip][0]);
862  break;
863 
864  case KT_SINGULARITY:
865  /* Evaluate the singularity kernel for the current
866  * value. */
867  *ptr++ = singularityKernel(temp,p[ip][0]);
868  break;
869 
870  case KT_LOC_SUPP:
871  /* Evaluate the localized kernel for the current
872  * value. */
873  *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
874  break;
875 
876  case KT_GAUSSIAN:
877  /* Evaluate the spherical Gaussian kernel for the current
878  * value. */
879  *ptr++ = gaussianKernel(temp,p[ip][0]);
880  break;
881  }
882  }
883  /* Increment pointer for next row. */
884  ptr += rinc;
885  }
886 
887  /* Initialize cumulative time variable. */
888  t_dp = 0.0;
889 
890  /* Initialize time measurement. */
891  t0 = getticks();
892 
893  /* Cycle through all runs. */
894  for (i = 0; i < ld[ild][4]; i++)
895  {
896 
897  /* Reset pointer to start of precomputed data. */
898  ptr = prec;
899  /* Calculate increment from one row to the next. */
900  rinc = l_max_prec-ld[ild][0];
901 
902  /* Check if the localized kernel is used. */
903  if (kt == KT_LOC_SUPP)
904  {
905  /* Perform final summation */
906 
907  /* Calculate the multiplicative constant. */
908  constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
909 
910  /* Process all target nodes. */
911  for (d = 0; d < ld[ild][1]; d++)
912  {
913  /* Initialize function value. */
914  f[d] = 0.0;
915 
916  /* Process all source nodes. */
917  for (l = 0; l < ld[ild][0]; l++)
918  f[d] += b[l]*(*ptr++);
919 
920  /* Multiply with the constant. */
921  f[d] *= constant;
922 
923  /* Proceed to next row. */
924  ptr += rinc;
925  }
926  }
927  else
928  {
929  /* Process all target nodes. */
930  for (d = 0; d < ld[ild][1]; d++)
931  {
932  /* Initialize function value. */
933  f[d] = 0.0;
934 
935  /* Process all source nodes. */
936  for (l = 0; l < ld[ild][0]; l++)
937  f[d] += b[l]*(*ptr++);
938 
939  /* Proceed to next row. */
940  ptr += rinc;
941  }
942  }
943  }
944 
945  /* Calculate the time needed. */
946  t1 = getticks();
947  t_dp = nfft_elapsed_seconds(t1,t0);
948 
949  /* Calculate average time needed. */
950  t_dp = t_dp/((double)ld[ild][4]);
951  }
952  else
953  {
954  /* Initialize cumulative time variable with dummy value. */
955  t_dp = -1.0;
956  }
957 
958  /* Initialize cumulative time variable. */
959  t_d = 0.0;
960 
961  /* Initialize time measurement. */
962  t0 = getticks();
963 
964  /* Cycle through all runs. */
965  for (i = 0; i < ld[ild][4]; i++)
966  {
967  /* Switch by the kernel type. */
968  switch (kt)
969  {
970  case KT_ABEL_POISSON:
971 
972  /* Process all target nodes. */
973  for (d = 0; d < ld[ild][1]; d++)
974  {
975  /* Initialize function value. */
976  f[d] = 0.0;
977 
978  /* Process all source nodes. */
979  for (l = 0; l < ld[ild][0]; l++)
980  {
981  /* Compute the inner product for the current source and
982  * target nodes. */
983  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
984 
985  /* Evaluate the Poisson kernel for the current value and add
986  * to the result. */
987  f[d] += b[l]*poissonKernel(temp,p[ip][0]);
988  }
989  }
990  break;
991 
992  case KT_SINGULARITY:
993  /* Process all target nodes. */
994  for (d = 0; d < ld[ild][1]; d++)
995  {
996  /* Initialize function value. */
997  f[d] = 0.0;
998 
999  /* Process all source nodes. */
1000  for (l = 0; l < ld[ild][0]; l++)
1001  {
1002  /* Compute the inner product for the current source and
1003  * target nodes. */
1004  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1005 
1006  /* Evaluate the Poisson kernel for the current value and add
1007  * to the result. */
1008  f[d] += b[l]*singularityKernel(temp,p[ip][0]);
1009  }
1010  }
1011  break;
1012 
1013  case KT_LOC_SUPP:
1014  /* Calculate the multiplicative constant. */
1015  constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
1016 
1017  /* Process all target nodes. */
1018  for (d = 0; d < ld[ild][1]; d++)
1019  {
1020  /* Initialize function value. */
1021  f[d] = 0.0;
1022 
1023  /* Process all source nodes. */
1024  for (l = 0; l < ld[ild][0]; l++)
1025  {
1026  /* Compute the inner product for the current source and
1027  * target nodes. */
1028  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1029 
1030  /* Evaluate the Poisson kernel for the current value and add
1031  * to the result. */
1032  f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
1033  }
1034 
1035  /* Multiply result with constant. */
1036  f[d] *= constant;
1037  }
1038  break;
1039 
1040  case KT_GAUSSIAN:
1041  /* Process all target nodes. */
1042  for (d = 0; d < ld[ild][1]; d++)
1043  {
1044  /* Initialize function value. */
1045  f[d] = 0.0;
1046 
1047  /* Process all source nodes. */
1048  for (l = 0; l < ld[ild][0]; l++)
1049  {
1050  /* Compute the inner product for the current source and
1051  * target nodes. */
1052  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1053  /* Evaluate the Poisson kernel for the current value and add
1054  * to the result. */
1055  f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
1056  }
1057  }
1058  break;
1059  }
1060  }
1061 
1062  /* Calculate and add the time needed. */
1063  t1 = getticks();
1064  t_d = nfft_elapsed_seconds(t1,t0);
1065  /* Calculate average time needed. */
1066  t_d = t_d/((double)ld[ild][4]);
1067  }
1068  else
1069  {
1070  /* Initialize cumulative time variable with dummy value. */
1071  t_d = -1.0;
1072  t_dp = -1.0;
1073  }
1074 
1075  /* Initialize error and cumulative time variables for the fast
1076  * algorithm. */
1077  err_fd = -1.0;
1078  err_f = -1.0;
1079  t_fd = -1.0;
1080  t_f = -1.0;
1081 
1082  /* Process all cut-off bandwidths. */
1083  for (im = 0; im < im_max; im++)
1084  {
1085  /* Init transform plans. */
1086  nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
1087  ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1088  ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1090  FFT_OUT_OF_PLACE, cutoff);
1091  nfsft_init_guru(&plan,m[im],ld[ild][1],
1092  ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1093  ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1096  cutoff);
1097  plan_adjoint.f_hat = f_hat;
1098  plan_adjoint.x = eta;
1099  plan_adjoint.f = b;
1100  plan.f_hat = f_hat;
1101  plan.x = xi;
1102  plan.f = f_m;
1103  nfsft_precompute_x(&plan_adjoint);
1104  nfsft_precompute_x(&plan);
1105 
1106  /* Check if direct algorithm shall also be tested. */
1107  if (use_nfsft == BOTH)
1108  {
1109  /* Initialize cumulative time variable. */
1110  t_fd = 0.0;
1111 
1112  /* Initialize time measurement. */
1113  t0 = getticks();
1114 
1115  /* Cycle through all runs. */
1116  for (i = 0; i < ld[ild][4]; i++)
1117  {
1118 
1119  /* Execute adjoint direct NDSFT transformation. */
1120  nfsft_adjoint_direct(&plan_adjoint);
1121 
1122  /* Multiplication with the Fourier-Legendre coefficients. */
1123  for (k = 0; k <= m[im]; k++)
1124  for (n = -k; n <= k; n++)
1125  f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1126 
1127  /* Execute direct NDSFT transformation. */
1128  nfsft_trafo_direct(&plan);
1129 
1130  }
1131 
1132  /* Calculate and add the time needed. */
1133  t1 = getticks();
1134  t_fd = nfft_elapsed_seconds(t1,t0);
1135 
1136  /* Calculate average time needed. */
1137  t_fd = t_fd/((double)ld[ild][4]);
1138 
1139  /* Check if error E_infty should be computed. */
1140  if (ld[ild][2] != NO)
1141  {
1142  /* Compute the error E_infinity. */
1143  err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1144  ld[ild][0]);
1145  }
1146  }
1147 
1148  /* Check if the fast NFSFT algorithm shall also be tested. */
1149  if (use_nfsft != NO)
1150  {
1151  /* Initialize cumulative time variable for the NFSFT algorithm. */
1152  t_f = 0.0;
1153  }
1154  else
1155  {
1156  /* Initialize cumulative time variable for the direct NDSFT
1157  * algorithm. */
1158  t_fd = 0.0;
1159  }
1160 
1161  /* Initialize time measurement. */
1162  t0 = getticks();
1163 
1164  /* Cycle through all runs. */
1165  for (i = 0; i < ld[ild][4]; i++)
1166  {
1167  /* Check if the fast NFSFT algorithm shall also be tested. */
1168  if (use_nfsft != NO)
1169  {
1170  /* Execute the adjoint NFSFT transformation. */
1171  nfsft_adjoint(&plan_adjoint);
1172  }
1173  else
1174  {
1175  /* Execute the adjoint direct NDSFT transformation. */
1176  nfsft_adjoint_direct(&plan_adjoint);
1177  }
1178 
1179  /* Multiplication with the Fourier-Legendre coefficients. */
1180  for (k = 0; k <= m[im]; k++)
1181  for (n = -k; n <= k; n++)
1182  f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1183 
1184  /* Check if the fast NFSFT algorithm shall also be tested. */
1185  if (use_nfsft != NO)
1186  {
1187  /* Execute the NFSFT transformation. */
1188  nfsft_trafo(&plan);
1189  }
1190  else
1191  {
1192  /* Execute the NDSFT transformation. */
1193  nfsft_trafo_direct(&plan);
1194  }
1195  }
1196 
1197  /* Check if the fast NFSFT algorithm has been used. */
1198  t1 = getticks();
1199 
1200  if (use_nfsft != NO)
1201  t_f = nfft_elapsed_seconds(t1,t0);
1202  else
1203  t_fd = nfft_elapsed_seconds(t1,t0);
1204 
1205  /* Check if the fast NFSFT algorithm has been used. */
1206  if (use_nfsft != NO)
1207  {
1208  /* Calculate average time needed. */
1209  t_f = t_f/((double)ld[ild][4]);
1210  }
1211  else
1212  {
1213  /* Calculate average time needed. */
1214  t_fd = t_fd/((double)ld[ild][4]);
1215  }
1216 
1217  /* Check if error E_infty should be computed. */
1218  if (ld[ild][2] != NO)
1219  {
1220  /* Check if the fast NFSFT algorithm has been used. */
1221  if (use_nfsft != NO)
1222  {
1223  /* Compute the error E_infinity. */
1224  err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1225  ld[ild][0]);
1226  }
1227  else
1228  {
1229  /* Compute the error E_infinity. */
1230  err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1231  ld[ild][0]);
1232  }
1233  }
1234 
1235  /* Print out the error measurements. */
1236  fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
1237  err_f);
1238 
1239  /* Finalize the NFSFT plans */
1240  nfsft_finalize(&plan_adjoint);
1241  nfsft_finalize(&plan);
1242  } /* for (im = 0; im < im_max; im++) - Process all cut-off
1243  * bandwidths.*/
1244  } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */
1245  } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */
1246 
1247  /* Delete precomputed data. */
1248  nfsft_forget();
1249 
1250  /* Check if memory for precomputed data of the matrix K has been
1251  * allocated. */
1252  if (precompute == YES)
1253  {
1254  /* Free memory for precomputed matrix K. */
1255  nfft_free(prec);
1256  }
1257  /* Free data arrays. */
1258  nfft_free(f);
1259  nfft_free(f_m);
1260  nfft_free(xi);
1261  nfft_free(eta);
1262  nfft_free(a);
1263  nfft_free(f_hat);
1264  nfft_free(b);
1265 
1266  /* Free memory for node sets. */
1267  for (ild = 0; ild < ild_max; ild++)
1268  nfft_free(ld[ild]);
1269  nfft_free(ld);
1270 
1271  /* Free memory for cut-off bandwidths. */
1272  nfft_free(m);
1273 
1274  /* Free memory for parameter sets. */
1275  for (ip = 0; ip < ip_max; ip++)
1276  nfft_free(p[ip]);
1277  nfft_free(p);
1278  } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */
1279 
1280  /* Return exit code for successful run. */
1281  return EXIT_SUCCESS;
1282 }
1283 /* \} */
double * x
the nodes for ,
Definition: nfft3.h:574
static double gaussianKernel(const double x, const double sigma)
Evaluates the spherical Gaussian kernel at a node .
Definition: fastsumS2.c:520
void nfsft_trafo(nfsft_plan *plan)
Definition: nfsft.c:921
#define KT_SINGULARITY
Singularity kernel.
Definition: fastsumS2.c:52
static double poissonKernel(const double x, const double h)
Evaluates the Poisson kernel at a node .
Definition: fastsumS2.c:468
void nfsft_adjoint(nfsft_plan *plan)
Definition: nfsft.c:1091
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:574
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition: nfsft.c:357
static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
Calculates the modified bessel function , possibly scaled by , for real non-negative with ...
Definition: fastsumS2.c:192
static double innerProduct(const double phi1, const double theta1, const double phi2, const double theta2)
Computes the standard inner product between two vectors on the unit sphere given in spherical coord...
Definition: fastsumS2.c:449
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition: nfft3.h:574
int main(int argc, char **argv)
The main program.
Definition: fastsumS2.c:535
#define NFSFT_NO_FAST_ALGORITHM
Definition: nfft3.h:590
void nfft_free(void *p)
#define FFTW_INIT
Definition: nfft3.h:203
static double locallySupportedKernel(const double x, const double h, const double lambda)
Evaluates the locally supported kernel at a node .
Definition: fastsumS2.c:502
static double singularityKernel(const double x, const double h)
Evaluates the singularity kernel at a node .
Definition: fastsumS2.c:484
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:202
#define PRE_PSI
Definition: nfft3.h:197
#define KT_ABEL_POISSON
Abel-Poisson kernel.
Definition: fastsumS2.c:50
void * nfft_malloc(size_t n)
fftw_complex * f
Samples.
Definition: nfft3.h:574
#define NFSFT_F_HAT_SIZE(N)
Definition: nfft3.h:595
#define NFSFT_USE_NDFT
Definition: nfft3.h:576
pvalue
Enumeration type for yes/no/both-type parameters.
Definition: fastsumS2.c:59
void nfsft_finalize(nfsft_plan *plan)
Definition: nfsft.c:572
#define KT_LOC_SUPP
Locally supported kernel.
Definition: fastsumS2.c:54
Header file for the nfft3 library.
#define PRE_PHI_HUT
Definition: nfft3.h:193
#define KT_GAUSSIAN
Gaussian kernel.
Definition: fastsumS2.c:56
#define NFSFT_INDEX(k, n, plan)
Definition: nfft3.h:594
void nfsft_forget(void)
Definition: nfsft.c:526
#define NFSFT_USE_DPT
Definition: nfft3.h:577