NFFT  3.4.1
nfsft_benchomp.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 
23 #include "config.h"
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 
28 #define NREPEAT 5
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 const char *CMD_CREATEDATASET = "nfsft_benchomp_createdataset.exe";
32 const char *CMD_DETAIL_SINGLE = "nfsft_benchomp_detail_single.exe";
33 const char *CMD_DETAIL_THREADS = "nfsft_benchomp_detail_threads.exe";
34 #else
35 const char *CMD_CREATEDATASET = "./nfsft_benchomp_createdataset";
36 const char *CMD_DETAIL_SINGLE = "./nfsft_benchomp_detail_single";
37 const char *CMD_DETAIL_THREADS = "./nfsft_benchomp_detail_threads";
38 #endif
39 
40 static FILE* file_out_tex = NULL;
41 
42 int get_nthreads_array(int **arr)
43 {
44  int max_threads = X(get_num_threads)();
45  int alloc_num = 2;
46  int k;
47  int ret_number = 0;
48  int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
49 
50  if (max_threads <= 5)
51  {
52  *arr = (int*) malloc(max_threads*sizeof(int));
53  for (k = 0; k < max_threads; k++)
54  *(*arr + k) = k+1;
55  return max_threads;
56  }
57 
58  for (k = 1; k <= max_threads; k*=2, alloc_num++);
59 
60  *arr = (int*) malloc(alloc_num*sizeof(int));
61 
62  for (k = 1; k <= max_threads; k*=2)
63  {
64  if (k != max_threads && 2*k > max_threads && max_threads_pw2)
65  {
66  *(*arr + ret_number) = max_threads/2;
67  ret_number++;
68  }
69 
70  *(*arr + ret_number) = k;
71  ret_number++;
72 
73  if (k != max_threads && 2*k > max_threads)
74  {
75  *(*arr + ret_number) = max_threads;
76  ret_number++;
77  break;
78  }
79  }
80 
81  return ret_number;
82 }
83 
84 
85 void check_result_value(const int val, const int ok, const char *msg)
86 {
87  if (val != ok)
88  {
89  fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
90 
91  exit(1);
92  }
93 }
94 
95 void run_test_create(int trafo_adjoint, int N, int M)
96 {
97  char cmd[1025];
98 
99  snprintf(cmd, 1024, "%s %d %d %d > nfsft_benchomp_test.data", CMD_CREATEDATASET, trafo_adjoint, N, M);
100  fprintf(stderr, "%s\n", cmd);
101  check_result_value(system(cmd), 0, "createdataset");
102 }
103 
104 void run_test_init_output()
105 {
106  FILE *f = fopen("nfsft_benchomp_test.result", "w");
107  if (f!= NULL)
108  fclose(f);
109 }
110 
111 typedef struct
112 {
113  int trafo_adjoint;
114  int N;
115  int M;
116  int m;
117  int nfsft_flags;
118  int psi_flags;
119 } s_param;
120 
121 typedef struct
122 {
123  double avg;
124  double min;
125  double max;
126 } s_resval;
127 
128 typedef struct
129 {
130  int nthreads;
131  s_resval resval[6];
132 } s_result;
133 
134 typedef struct
135 {
136  s_param param;
137  s_result *results;
138  int nresults;
139 } s_testset;
140 
141 void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads)
142 {
143  FILE *f;
144  char cmd[1025];
145  int r,t;
146 
147  for (t = 0; t < 6; t++)
148  {
149  res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
150  }
151 
152  if (nthreads < 2)
153  snprintf(cmd, 1024, "%s %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_SINGLE, m, nfsft_flags, psi_flags, nrepeat);
154  else
155  snprintf(cmd, 1024, "%s %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_THREADS, m, nfsft_flags, psi_flags, nrepeat, nthreads);
156  fprintf(stderr, "%s\n", cmd);
157 
158  check_result_value(system(cmd), 0, cmd);
159 
160  f = fopen("nfsft_benchomp_test.out", "r");
161  for (r = 0; r < nrepeat; r++)
162  {
163  int retval;
164  double v[6];
165 // FILE *f;
166 // check_result_value(system(cmd), 0, cmd);
167 // f = fopen("nfsft_benchomp_test.out", "r");
168  retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
169  check_result_value(retval, 6, "read nfsft_benchomp_test.out");
170 // fclose(f);
171 // fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]);
172  for (t = 0; t < 6; t++)
173  {
174  res[t].avg += v[t];
175  if (res[t].min > v[t])
176  res[t].min = v[t];
177  if (res[t].max < v[t])
178  res[t].max = v[t];
179  }
180  }
181  fclose(f);
182 
183  for (t = 0; t < 6; t++)
184  res[t].avg /= nrepeat;
185 
186  fprintf(stderr, "%d %d: ", nthreads, nrepeat);
187  for (t = 0; t < 6; t++)
188  fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
189  fprintf(stderr, "\n");
190 }
191 
192 const char *get_psi_string(int flags)
193 {
194  if (flags & PRE_PSI)
195  return "prepsi";
196  else if (flags & PRE_ONE_PSI)
197  return "unknownPSI";
198 
199  return "nopsi";
200 }
201 const char *get_sort_string(int flags)
202 {
203  if (flags & NFFT_SORT_NODES)
204  return "sorted";
205 
206  return "unsorted";
207 }
208 
209 const char *get_adjoint_omp_string(int flags)
210 {
211  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
212  return "blockwise";
213 
214  return "";
215 }
216 
217 #define MASK_TA (1U<<1)
218 #define MASK_N (1U<<2)
219 #define MASK_M (1U<<4)
220 #define MASK_WINM (1U<<5)
221 #define MASK_FLAGS_PSI (1U<<6)
222 #define MASK_FLAGS_SORT (1U<<7)
223 #define MASK_FLAGS_BW (1U<<8)
224 #define MASK_FLAGS_FPT (1U<<9)
225 
226 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
227 {
228  int t;
229  unsigned int mask = 0;
230 
231  if (ntestsets < 2)
232  return 0;
233 
234  for (t = 1; t < ntestsets; t++)
235  {
236  if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
237  mask |= MASK_TA;
238  if (testsets[t-1].param.N != testsets[t].param.N)
239  mask |= MASK_N;
240  if (testsets[t-1].param.M != testsets[t].param.M)
241  mask |= MASK_M;
242  if (testsets[t-1].param.m != testsets[t].param.m)
243  mask |= MASK_WINM;
244  if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
245  mask |= MASK_FLAGS_PSI;
246  if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
247  mask |= MASK_FLAGS_SORT;
248  if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
249  mask |= MASK_FLAGS_BW;
250  if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
251  mask |= MASK_FLAGS_FPT;
252  }
253 
254  return mask;
255 }
256 
257 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
258 {
259  unsigned int mask = ~diff_mask;
260  int offset = 0;
261  int len;
262 
263  len = snprintf(outstr, maxlen, "%s", hostname);
264  if (len < 0 || len+offset >= maxlen-1) return;
265  offset += len;
266 
267  if (mask & MASK_TA)
268  {
269  len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top");
270  if (len < 0 || len+offset >= maxlen-1) return;
271  offset += len;
272  }
273 
274  if (mask & MASK_N)
275  {
276  len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
277  if (len < 0 || len+offset >= maxlen-1) return;
278  offset += len;
279  }
280 
281  if (mask & MASK_M)
282  {
283  len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
284  if (len < 0 || len+offset >= maxlen-1) return;
285  offset += len;
286  }
287 
288  if (mask & MASK_WINM)
289  {
290  len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
291  if (len < 0 || len+offset >= maxlen-1) return;
292  offset += len;
293  }
294 
295  if (mask & MASK_FLAGS_PSI)
296  {
297  len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags));
298  if (len < 0 || len+offset >= maxlen-1) return;
299  offset += len;
300  }
301 
302  if (mask & MASK_FLAGS_SORT)
303  {
304  len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags));
305  if (len < 0 || len+offset >= maxlen-1) return;
306  offset += len;
307  }
308 
309  if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
310  {
311  len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags));
312  if (len < 0 || len+offset >= maxlen-1) return;
313  offset += len;
314  }
315 
316  if (mask & MASK_FLAGS_FPT)
317  {
318  len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : "");
319  if (len < 0 || len+offset >= maxlen-1) return;
320  offset += len;
321  }
322 
323 }
324 
325 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref)
326 {
327  int i, t;
328  char hostname[1025];
329  char plottitle[1025];
330  unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
331 
332 #ifdef HAVE_GETHOSTNAME
333  if (gethostname(hostname, 1024) != 0)
334 #endif
335  strncpy(hostname, "unnamed", 1024);
336 
337  get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
338 
339  fprintf(out, "\\begin{tikzpicture}\n");
340  fprintf(out, "\\begin{axis}[");
341  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
342  fprintf(out, " title={%s}", plottitle);
343  fprintf(out, " ]\n");
344 
345  for (t = 0; t < ntestsets; t++)
346  {
347  s_testset testset = testsets[t];
348  fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
349  fprintf(stderr, "\n");
350 
351  fprintf(out, "\\addplot coordinates {");
352  for (i = 0; i < testset.nresults; i++)
353  if (use_tref == 1)
354  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
355  else
356  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
357  fprintf(out, "};\n");
358 
359  for (i = 0; i < testset.nresults; i++)
360  if (use_tref == 1)
361  fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
362  else
363  fprintf(stderr, "%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
364  fprintf(stderr, "\n\n");
365  }
366 
367  fprintf(out, "\\legend{{");
368  for (t = 0; t < ntestsets; t++)
369  {
370  char title[256];
371  if (t > 0)
372  fprintf(out, "},{");
373  get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask));
374  fprintf(out, "%s", title);
375  }
376  fprintf(out, "}}\n");
377  fprintf(out, "\\end{axis}\n");
378  fprintf(out, "\\end{tikzpicture}\n");
379  fprintf(out, "\n\n");
380 
381  fflush(out);
382 }
383 
384 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref)
385 {
386  double tref = 1.0/0.0;
387  int t, k;
388 
389  if (use_tref == 1)
390  for (t = 0; t < ntestsets; t++)
391  for (k = 0; k < testsets[t].nresults; k++)
392  if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
393  tref = testsets[t].results[k].resval[5].avg;
394 
395  print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
396 }
397 
398 void print_output_histo_PENRT(FILE *out, s_testset testset)
399 {
400  int i, size = testset.nresults;
401  char hostname[1025];
402 
403 #ifdef HAVE_GETHOSTNAME
404  if (gethostname(hostname, 1024) != 0)
405 #endif
406  strncpy(hostname, "unnamed", 1024);
407 
408  fprintf(out, "\\begin{tikzpicture}\n");
409  fprintf(out, "\\begin{axis}[");
410  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
411  fprintf(out, "symbolic x coords={");
412  for (i = 0; i < size; i++)
413  if (i > 0)
414  fprintf(out, ",%d", testset.results[i].nthreads);
415  else
416  fprintf(out, "%d", testset.results[i].nthreads);
417 
418  fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
419  fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
420  fprintf(out, " ]\n");
421  fprintf(out, "\\addplot coordinates {");
422  for (i = 0; i < size; i++)
423  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
424  fprintf(out, "};\n");
425 
426  fprintf(out, "\\addplot coordinates {");
427  for (i = 0; i < size; i++)
428  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
429  fprintf(out, "};\n");
430 
431  fprintf(out, "\\addplot coordinates {");
432  for (i = 0; i < size; i++)
433  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
434  fprintf(out, "};\n");
435 
436  fprintf(out, "\\addplot coordinates {");
437  for (i = 0; i < size; i++)
438  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
439  fprintf(out, "};\n");
440 
441  fprintf(out, "\\addplot coordinates {");
442  for (i = 0; i < size; i++)
443  fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
444  fprintf(out, "};\n");
445  fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top");
446  fprintf(out, "\\end{axis}\n");
447  fprintf(out, "\\end{tikzpicture}\n");
448  fprintf(out, "\n\n");
449 
450  fflush(out);
451 }
452 
453 void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size)
454 {
455  int i;
456  testset->param.trafo_adjoint = trafo_adjoint;
457  testset->param.N = N;
458  testset->param.M = M;
459  testset->param.m = m;
460  testset->param.nfsft_flags = nfsft_flags;
461  testset->param.psi_flags = psi_flags;
462 
463  testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
464  testset->nresults = n_threads_array_size;
465 
466  run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
467  for (i = 0; i < n_threads_array_size; i++)
468  {
469  testset->results[i].nthreads = nthreads_array[i];
470  run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
471  }
472 
473 }
474 
475 void test1(int *nthreads_array, int n_threads_array_size, int m)
476 {
477  s_testset testsets[4];
478 
479  run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
480 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
481  print_output_histo_PENRT(file_out_tex, testsets[0]);
482 #endif
483 
484  run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
485 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
486  print_output_histo_PENRT(file_out_tex, testsets[1]);
487 #endif
488 
489  print_output_speedup_total(file_out_tex, testsets, 2, 0);
490 
491  run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
492 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
493  print_output_histo_PENRT(file_out_tex, testsets[2]);
494 #endif
495 
496  run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
497 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
498  print_output_histo_PENRT(file_out_tex, testsets[3]);
499 #endif
500 
501  print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
502 }
503 
504 int main(int argc, char** argv)
505 {
506  int *nthreads_array;
507  int n_threads_array_size = get_nthreads_array(&nthreads_array);
508  int k;
509 
510 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
511  fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n");
512  fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
513  fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
514  fprintf(stderr, "and run \"make clean all\"\n\n");
515 #endif
516 
517  for (k = 0; k < n_threads_array_size; k++)
518  fprintf(stderr, "%d ", nthreads_array[k]);
519  fprintf(stderr, "\n");
520 
521  file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w");
522 
523  test1(nthreads_array, n_threads_array_size, 2);
524  test1(nthreads_array, n_threads_array_size, 4);
525  test1(nthreads_array, n_threads_array_size, 6);
526  test1(nthreads_array, n_threads_array_size, 8);
527 
528  fclose(file_out_tex);
529 
530  return 0;
531 }
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:57
#define PRE_PSI
Definition: nfft3.h:197
Header file for the nfft3 library.
#define PRE_ONE_PSI
Definition: nfft3.h:206
#define NFSFT_USE_DPT
Definition: nfft3.h:577