NFFT  3.4.1
nfsft_benchomp_detail.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 #include <stdio.h>
20 #include <math.h>
21 #include <string.h>
22 #include <stdlib.h>
23 #include <complex.h>
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30 
31 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
32 {
33  double re,im;
34  int k, n, j, t;
35  nfsft_plan plan;
36 
37  fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
38  *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
39  *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
40  *f = (C*)nfft_malloc((*M)*sizeof(C));
41 
42  memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
43  memset(*f,0U,(*M)*sizeof(C));
44 
45 #ifdef _OPENMP
46  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
47 #else
48  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
49 #endif
50 
51  nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
54 
55 #ifdef _OPENMP
56  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
57 #else
58  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
59 #endif
60 
61  for (j=0; j < *M; j++)
62  for (t=0; t < 2; t++)
63  fscanf(infile, "%lg", (*x)+2*j+t);
64 
65  if (trafo_adjoint==0)
66  {
67  for (k = 0; k <= *N; k++)
68  for (n = -k; n <= k; n++)
69  {
70  fscanf(infile, "%lg %lg", &re, &im);
71  (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
72  }
73  }
74  else
75  {
76  for (j=0; j < *M; j++)
77  {
78  fscanf(infile, "%lg %lg", &re, &im);
79  (*f)[j] = re + _Complex_I * im;
80  }
81  }
82 
83  nfsft_finalize(&plan);
84 }
85 
86 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
87 {
88  nfsft_plan plan;
89  int k, n;
90 // int N, M, trafo_adjoint;
91  int t, j;
92  ticks t0, t1;
93  double tt_total, tt_pre;
94 
95 // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
96 
97 /*#ifdef _OPENMP
98  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
99 #else
100  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
101 #endif*/
102 
103  /* precomputation (for fast polynomial transform) */
104 // nfsft_precompute(N,1000.0,0U,0U);
105 
106  /* Initialize transform plan using the guru interface. All input and output
107  * arrays are allocated by nfsft_init_guru(). Computations are performed with
108  * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
109  * Fourier coefficients is preserved during transformations. The NFFT uses a
110  * cut-off parameter m = 6. See the NFFT 3 manual for details.
111  */
112  nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
114  PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
115 
116 /*#ifdef _OPENMP
117  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
118 #else
119  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
120 #endif*/
121 
122  for (j=0; j < plan.M_total; j++)
123  {
124  for (t=0; t < 2; t++)
125  // fscanf(infile, "%lg", plan.x+2*j+t);
126  plan.x[2*j+t] = x[2*j+t];
127  }
128 
129  if (trafo_adjoint==0)
130  {
131  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
132  for (k = 0; k <= plan.N; k++)
133  for (n = -k; n <= k; n++)
134  {
135 // fscanf(infile, "%lg %lg", &re, &im);
136 // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
137  plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
138  }
139  }
140  else
141  {
142  for (j=0; j < plan.M_total; j++)
143  {
144 // fscanf(infile, "%lg %lg", &re, &im);
145 // plan.f[j] = re + _Complex_I * im;
146  plan.f[j] = f[j];
147  }
148 
149  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
150  }
151 
152  t0 = getticks();
153  /* precomputation (for NFFT, node-dependent) */
154  nfsft_precompute_x(&plan);
155  t1 = getticks();
156  tt_pre = nfft_elapsed_seconds(t1,t0);
157 
158  if (trafo_adjoint==0)
159  nfsft_trafo(&plan);
160  else
161  nfsft_adjoint(&plan);
162  t1 = getticks();
163  tt_total = nfft_elapsed_seconds(t1,t0);
164 
165 #ifndef MEASURE_TIME
166  plan.MEASURE_TIME_t[0] = 0.0;
167  plan.MEASURE_TIME_t[2] = 0.0;
168 #endif
169 
170 #ifndef MEASURE_TIME_FFTW
171  plan.MEASURE_TIME_t[1] = 0.0;
172 #endif
173 
174  printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
175 
177  nfsft_finalize(&plan);
178 }
179 
180 int main(int argc, char **argv)
181 {
182  int m, nfsft_flags, psi_flags;
183  int nrepeat;
184  int trafo_adjoint, N, M, r;
185  double *x;
186  C *f_hat, *f;
187 #ifdef _OPENMP
188  int nthreads;
189 
190  if (argc != 6)
191  return 1;
192 
193  nthreads = atoi(argv[5]);
194  fftw_init_threads();
195  omp_set_num_threads(nthreads);
196 #else
197  if (argc != 5)
198  return 1;
199 #endif
200 
201  m = atoi(argv[1]);
202  nfsft_flags = atoi(argv[2]);
203  psi_flags = atoi(argv[3]);
204  nrepeat = atoi(argv[4]);
205 
206  bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
207 
208  /* precomputation (for fast polynomial transform) */
209  nfsft_precompute(N,1000.0,0U,0U);
210 
211  for (r = 0; r < nrepeat; r++)
212  bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
213 
214  return 0;
215 }
#define NFSFT_MALLOC_F_HAT
Definition: nfft3.h:579
double * x
the nodes for ,
Definition: nfft3.h:574
#define NFSFT_NORMALIZED
Definition: nfft3.h:575
#define NFSFT_MALLOC_X
Definition: nfft3.h:578
void nfsft_trafo(nfsft_plan *plan)
Definition: nfsft.c:921
#define NFSFT_PRESERVE_F_HAT
Definition: nfft3.h:581
void nfsft_adjoint(nfsft_plan *plan)
Definition: nfsft.c:1091
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:574
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
int N
the bandwidth
Definition: nfft3.h:574
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition: nfsft.c:357
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition: nfft3.h:574
#define FFTW_INIT
Definition: nfft3.h:203
NFFT_INT M_total
Total number of samples.
Definition: nfft3.h:574
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
Definition: nfft3.h:574
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:202
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
Definition: nfft3.h:574
fftw_complex * f
Samples.
Definition: nfft3.h:574
#define NFSFT_MALLOC_F
Definition: nfft3.h:580
void nfsft_finalize(nfsft_plan *plan)
Definition: nfsft.c:572
Header file for the nfft3 library.
#define PRE_PHI_HUT
Definition: nfft3.h:193
#define NFSFT_INDEX(k, n, plan)
Definition: nfft3.h:594