NFFT  3.4.1
fastsum_benchomp.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 
23 #include "config.h"
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 
28 #define NREPEAT 5
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 const char *CMD_CREATEDATASET = "fastsum_benchomp_createdataset.exe";
32 const char *CMD_DETAIL_SINGLE = "fastsum_benchomp_detail_single.exe";
33 const char *CMD_DETAIL_THREADS = "fastsum_benchomp_detail_threads.exe";
34 #else
35 const char *CMD_CREATEDATASET = "./fastsum_benchomp_createdataset";
36 const char *CMD_DETAIL_SINGLE = "./fastsum_benchomp_detail_single";
37 const char *CMD_DETAIL_THREADS = "./fastsum_benchomp_detail_threads";
38 #endif
39 
40 static FILE* file_out_tex = NULL;
41 
42 int get_nthreads_array(int **arr)
43 {
44  int max_threads = NFFT(get_num_threads)();
45  int alloc_num = 2;
46  int k;
47  int ret_number = 0;
48  int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
49 
50  if (max_threads <= 5)
51  {
52  *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int));
53  for (k = 0; k < max_threads; k++)
54  *(*arr + k) = k + 1;
55  return max_threads;
56  }
57 
58  for (k = 1; k <= max_threads; k *= 2, alloc_num++)
59  ;
60 
61  *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int));
62 
63  for (k = 1; k <= max_threads; k *= 2)
64  {
65  if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
66  {
67  *(*arr + ret_number) = max_threads / 2;
68  ret_number++;
69  }
70 
71  *(*arr + ret_number) = k;
72  ret_number++;
73 
74  if (k != max_threads && 2 * k > max_threads)
75  {
76  *(*arr + ret_number) = max_threads;
77  ret_number++;
78  break;
79  }
80  }
81 
82  return ret_number;
83 }
84 
85 void check_result_value(const int val, const int ok, const char *msg)
86 {
87  if (val != ok)
88  {
89  fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
90 
91  exit(EXIT_FAILURE);
92  }
93 }
94 
95 void run_test_create(int d, int L, int M)
96 {
97  char cmd[1025];
98 
99  snprintf(cmd, 1024,
100  "%s %d %d %d > fastsum_benchomp_test.data",
101  CMD_CREATEDATASET, d, L, M);
102  fprintf(stderr, "%s\n", cmd);
103  check_result_value(system(cmd), 0, "createdataset");
104 }
105 
106 void run_test_init_output()
107 {
108  FILE *f = fopen("fastsum_benchomp_test.result", "w");
109  if (f != NULL)
110  fclose(f);
111 }
112 
113 typedef struct
114 {
115  int d;
116  int L;
117  int M;
118  int n;
119  int m;
120  int p;
121  char *kernel_name;
122  R c;
123  R eps_I;
124  R eps_B;
125 } s_param;
126 
127 typedef struct
128 {
129  R avg;
130  R min;
131  R max;
132 } s_resval;
133 
134 typedef struct
135 {
136  int nthreads;
137  s_resval resval[16];
138 } s_result;
139 
140 typedef struct
141 {
142  s_param param;
143  s_result *results;
144  int nresults;
145 } s_testset;
146 
147 void run_test(s_resval *res, int nrepeat, int n, int m, int p,
148  char *kernel_name, R c, R eps_I, R eps_B, int nthreads)
149 {
150  char cmd[1025];
151  int r, t;
152 
153  for (t = 0; t < 16; t++)
154  {
155  res[t].avg = K(0.0);
156  res[t].min = K(1.0) / K(0.0);
157  res[t].max = K(0.0);
158  }
159 
160  if (nthreads < 2)
161  snprintf(cmd, 1024,
162  "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
163  CMD_DETAIL_SINGLE, n, m, p, kernel_name, c, eps_I, eps_B);
164  else
165  snprintf(cmd, 1024,
166  "%s %d %d %d %s " __FR__ " " __FR__ " " __FR__ " %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
167  CMD_DETAIL_THREADS, n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
168  fprintf(stderr, "%s\n", cmd);
169  check_result_value(system(cmd), 0, cmd);
170 
171  for (r = 0; r < nrepeat; r++)
172  {
173  int retval;
174  R v[16];
175  FILE *f;
176  check_result_value(system(cmd), 0, cmd);
177  f = fopen("fastsum_benchomp_test.out", "r");
178  retval = fscanf(f,
179  "" __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ "", v,
180  v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
181  v + 11, v + 12, v + 13, v + 14, v + 15);
182  check_result_value(retval, 16, "read fastsum_benchomp_test.out");
183  fclose(f);
184 
185  for (t = 0; t < 16; t++)
186  {
187  res[t].avg += v[t];
188  if (res[t].min > v[t])
189  res[t].min = v[t];
190  if (res[t].max < v[t])
191  res[t].max = v[t];
192  }
193  }
194 
195  for (t = 0; t < 16; t++)
196  res[t].avg /= (R)(nrepeat);
197 
198  fprintf(stderr, "%d %d: ", nthreads, nrepeat);
199  for (t = 0; t < 16; t++)
200  fprintf(stderr, "%.3" __FES__ " %.3" __FES__ " %.3" __FES__ " | ", res[t].avg, res[t].min, res[t].max);
201  fprintf(stderr, "\n");
202 }
203 
204 const char *get_psi_string(int flags)
205 {
206  if (flags & PRE_PSI)
207  return "prepsi";
208  else if (flags & PRE_ONE_PSI)
209  return "unknownPSI";
210 
211  return "nopsi";
212 }
213 const char *get_sort_string(int flags)
214 {
215  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
216  return "";
217 
218  if (flags & NFFT_SORT_NODES)
219  return "sorted";
220 
221  return "unsorted";
222 }
223 
224 const char *get_adjoint_omp_string(int flags)
225 {
226  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
227  return "blockwise";
228 
229  return "";
230 }
231 
232 #define MASK_FSUM_D (1U<<0)
233 #define MASK_FSUM_L (1U<<1)
234 #define MASK_FSUM_M (1U<<2)
235 #define MASK_FSUM_MULTIBW (1U<<3)
236 #define MASK_FSUM_WINM (1U<<4)
237 #define MASK_FSUM_P (1U<<5)
238 #define MASK_FSUM_KERNEL (1U<<6)
239 #define MASK_FSUM_EPSI (1U<<7)
240 #define MASK_FSUM_EPSB (1U<<8)
241 
242 unsigned int fastsum_determine_different_parameters(s_testset *testsets,
243  int ntestsets)
244 {
245  int t;
246  unsigned int mask = 0;
247 
248  if (ntestsets < 2)
249  return 0;
250 
251  for (t = 1; t < ntestsets; t++)
252  {
253  if (testsets[t - 1].param.d != testsets[t].param.d)
254  mask |= MASK_FSUM_D;
255  if (testsets[t - 1].param.L != testsets[t].param.L)
256  mask |= MASK_FSUM_L;
257  if (testsets[t - 1].param.M != testsets[t].param.M)
258  mask |= MASK_FSUM_M;
259  if (testsets[t - 1].param.n != testsets[t].param.n)
260  mask |= MASK_FSUM_MULTIBW;
261  if (testsets[t - 1].param.m != testsets[t].param.m)
262  mask |= MASK_FSUM_WINM;
263  if (testsets[t - 1].param.p != testsets[t].param.p)
264  mask |= MASK_FSUM_P;
265  if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
266  != 0)
267  mask |= MASK_FSUM_KERNEL;
268  if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
269  mask |= MASK_FSUM_EPSI;
270  if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
271  mask |= MASK_FSUM_EPSB;
272  }
273 
274  return mask;
275 }
276 
277 void strEscapeUnderscore(char *dst, char *src, int maxlen)
278 {
279  int i = 0;
280  int len;
281  int offset = 0;
282 
283  while (src[i] != '\0' && len + offset < maxlen - 1)
284  {
285  if (src[i] == '_')
286  len = snprintf(dst + offset, maxlen - offset, "\\_{}");
287  else
288  len = snprintf(dst + offset, maxlen - offset, "%c", src[i]);
289  offset += len;
290  i++;
291  }
292 }
293 
294 void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen,
295  char *hostname, s_param param, unsigned int diff_mask)
296 {
297  unsigned int mask = ~diff_mask;
298  int offset = 0;
299  int len;
300 
301  len = snprintf(outstr, maxlen, "%s", hostname);
302  if (len < 0 || len + offset >= maxlen - 1)
303  return;
304  offset += len;
305 
306  if (mask & MASK_FSUM_D)
307  {
308  len = snprintf(outstr + offset, maxlen - offset, " %dd fastsum", param.d);
309  if (len < 0 || len + offset >= maxlen - 1)
310  return;
311  offset += len;
312  }
313 
314  if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
315  {
316  len = snprintf(outstr + offset, maxlen - offset, " L=M=%d", param.L);
317  if (len < 0 || len + offset >= maxlen - 1)
318  return;
319  offset += len;
320  }
321  else
322  {
323  if (mask & MASK_FSUM_L)
324  {
325  len = snprintf(outstr + offset, maxlen - offset, " L=%d", param.L);
326  if (len < 0 || len + offset >= maxlen - 1)
327  return;
328  offset += len;
329  }
330 
331  if (mask & MASK_FSUM_M)
332  {
333  len = snprintf(outstr + offset, maxlen - offset, " M=%d", param.M);
334  if (len < 0 || len + offset >= maxlen - 1)
335  return;
336  offset += len;
337  }
338  }
339 
340  if (mask & MASK_FSUM_MULTIBW)
341  {
342  len = snprintf(outstr + offset, maxlen - offset, " n=%d", param.n);
343  if (len < 0 || len + offset >= maxlen - 1)
344  return;
345  offset += len;
346  }
347 
348  if (mask & MASK_FSUM_WINM)
349  {
350  len = snprintf(outstr + offset, maxlen - offset, " m=%d", param.m);
351  if (len < 0 || len + offset >= maxlen - 1)
352  return;
353  offset += len;
354  }
355 
356  if (mask & MASK_FSUM_P)
357  {
358  len = snprintf(outstr + offset, maxlen - offset, " p=%d", param.p);
359  if (len < 0 || len + offset >= maxlen - 1)
360  return;
361  offset += len;
362  }
363 
364  if (mask & MASK_FSUM_KERNEL)
365  {
366  char tmp[maxlen];
367  strEscapeUnderscore(tmp, param.kernel_name, maxlen);
368 
369  len = snprintf(outstr + offset, maxlen - offset, " %s", tmp);
370  if (len < 0 || len + offset >= maxlen - 1)
371  return;
372  offset += len;
373  }
374 
375  if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
376  {
377  len = snprintf(outstr + offset, maxlen - offset,
378  " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__ "",
379  param.eps_I);
380  if (len < 0 || len + offset >= maxlen - 1)
381  return;
382  offset += len;
383  }
384  else
385  {
386  if (mask & MASK_FSUM_EPSI)
387  {
388  len = snprintf(outstr + offset, maxlen - offset,
389  " $\\varepsilon_\\mathrm{I}$=%" __FGS__ "", param.eps_I);
390  if (len < 0 || len + offset >= maxlen - 1)
391  return;
392  offset += len;
393  }
394 
395  if (mask & MASK_FSUM_EPSB)
396  {
397  len = snprintf(outstr + offset, maxlen - offset,
398  " $\\varepsilon_\\mathrm{B}$=%" __FGS__ "", param.eps_B);
399  if (len < 0 || len + offset >= maxlen - 1)
400  return;
401  offset += len;
402  }
403  }
404 }
405 
406 void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset)
407 {
408  int i, size = testset.nresults;
409  char hostname[1025];
410 
411 #ifdef HAVE_GETHOSTNAME
412  if (gethostname(hostname, 1024) != 0)
413 #endif
414  strncpy(hostname, "unnamed", 1024);
415 
416  fprintf(out, "\\begin{tikzpicture}\n");
417  fprintf(out, "\\begin{axis}[");
418  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
419  fprintf(out, "symbolic x coords={");
420  for (i = 0; i < size; i++)
421  if (i > 0)
422  fprintf(out, ",%d", testset.results[i].nthreads);
423  else
424  fprintf(out, "%d", testset.results[i].nthreads);
425 
426  fprintf(out,
427  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
428  fprintf(out,
429  " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
430  hostname, testset.param.d, testset.param.n, testset.param.M,
431  testset.param.m);
432  fprintf(out, " ]\n");
433  fprintf(out, "\\addplot coordinates {");
434  for (i = 0; i < size; i++)
435  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
436  testset.results[i].resval[10].avg);
437  fprintf(out, "};\n");
438 
439  fprintf(out, "\\addplot coordinates {");
440  for (i = 0; i < size; i++)
441  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
442  testset.results[i].resval[11].avg);
443  fprintf(out, "};\n");
444 
445  fprintf(out, "\\addplot coordinates {");
446  for (i = 0; i < size; i++)
447  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
448  testset.results[i].resval[12].avg);
449  fprintf(out, "};\n");
450 
451  fprintf(out, "\\addplot coordinates {");
452  for (i = 0; i < size; i++)
453  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
454  testset.results[i].resval[1].avg);
455  fprintf(out, "};\n");
456 
457  fprintf(out, "\\addplot coordinates {");
458  for (i = 0; i < size; i++)
459  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
460  testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
461  fprintf(out, "};\n");
462  fprintf(out,
463  "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
464  fprintf(out, "\\end{axis}\n");
465  fprintf(out, "\\end{tikzpicture}\n");
466  fprintf(out, "\n\n");
467 
468  fflush(out);
469 }
470 
471 void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset)
472 {
473  int i, size = testset.nresults;
474  char hostname[1025];
475 
476 #ifdef HAVE_GETHOSTNAME
477  if (gethostname(hostname, 1024) != 0)
478 #endif
479  strncpy(hostname, "unnamed", 1024);
480 
481  fprintf(out, "\\begin{tikzpicture}\n");
482  fprintf(out, "\\begin{axis}[");
483  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
484  fprintf(out, "symbolic x coords={");
485  for (i = 0; i < size; i++)
486  if (i > 0)
487  fprintf(out, ",%d", testset.results[i].nthreads);
488  else
489  fprintf(out, "%d", testset.results[i].nthreads);
490 
491  fprintf(out,
492  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
493  fprintf(out,
494  " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
495  hostname, testset.param.d, testset.param.n, testset.param.M,
496  testset.param.m);
497  fprintf(out, " ]\n");
498  fprintf(out, "\\addplot coordinates {");
499  for (i = 0; i < size; i++)
500  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
501  testset.results[i].resval[13].avg);
502  fprintf(out, "};\n");
503 
504  fprintf(out, "\\addplot coordinates {");
505  for (i = 0; i < size; i++)
506  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
507  testset.results[i].resval[14].avg);
508  fprintf(out, "};\n");
509 
510  fprintf(out, "\\addplot coordinates {");
511  for (i = 0; i < size; i++)
512  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
513  testset.results[i].resval[15].avg);
514  fprintf(out, "};\n");
515 
516  fprintf(out, "\\addplot coordinates {");
517  for (i = 0; i < size; i++)
518  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
519  testset.results[i].resval[2].avg);
520  fprintf(out, "};\n");
521 
522  fprintf(out, "\\addplot coordinates {");
523  for (i = 0; i < size; i++)
524  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
525  testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
526  fprintf(out, "};\n");
527  fprintf(out, "\\legend{D,F,B,prepsi,total}\n");
528  fprintf(out, "\\end{axis}\n");
529  fprintf(out, "\\end{tikzpicture}\n");
530  fprintf(out, "\n\n");
531 
532  fflush(out);
533 }
534 
535 void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset)
536 {
537  int i, size = testset.nresults;
538  char hostname[1025];
539  char plottitle[1025];
540 
541 #ifdef HAVE_GETHOSTNAME
542  if (gethostname(hostname, 1024) != 0)
543 #endif
544  strncpy(hostname, "unnamed", 1024);
545 
546  fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
547  0);
548 
549  fprintf(out, "\\begin{tikzpicture}\n");
550  fprintf(out, "\\begin{axis}[");
551  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
552  fprintf(out, "symbolic x coords={");
553  for (i = 0; i < size; i++)
554  if (i > 0)
555  fprintf(out, ",%d", testset.results[i].nthreads);
556  else
557  fprintf(out, "%d", testset.results[i].nthreads);
558 
559  fprintf(out,
560  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
561  fprintf(out, " title={%s}", plottitle);
562  fprintf(out, " ]\n");
563  fprintf(out, "\\addplot coordinates {");
564  for (i = 0; i < size; i++)
565  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
566  testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
567  fprintf(out, "};\n");
568 
569  fprintf(out, "\\addplot coordinates {");
570  for (i = 0; i < size; i++)
571  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
572  testset.results[i].resval[3].avg);
573  fprintf(out, "};\n");
574 
575  fprintf(out, "\\addplot coordinates {");
576  for (i = 0; i < size; i++)
577  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
578  testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
579  + testset.results[i].resval[6].avg);
580  fprintf(out, "};\n");
581 
582  fprintf(out, "\\addplot coordinates {");
583  for (i = 0; i < size; i++)
584  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
585  testset.results[i].resval[7].avg);
586  fprintf(out, "};\n");
587 
588  fprintf(out, "\\addplot coordinates {");
589  for (i = 0; i < size; i++)
590  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
591  testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
592  fprintf(out, "};\n");
593  fprintf(out,
594  "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
595  fprintf(out, "\\end{axis}\n");
596  fprintf(out, "\\end{tikzpicture}\n");
597  fprintf(out, "\n\n");
598 
599  fflush(out);
600 }
601 
602 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
603  s_testset *testsets, int ntestsets)
604 {
605  int i, t;
606  char hostname[1025];
607  char plottitle[1025];
608  unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
609  ntestsets);
610 
611 #ifdef HAVE_GETHOSTNAME
612  if (gethostname(hostname, 1024) != 0)
613 #endif
614  strncpy(hostname, "unnamed", 1024);
615 
616  fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
617  testsets[0].param, diff_mask | MASK_FSUM_WINM);
618 
619  fprintf(out, "\\begin{tikzpicture}\n");
620  fprintf(out, "\\begin{axis}[");
621  fprintf(out,
622  "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
623  fprintf(out, " title={%s}", plottitle);
624  fprintf(out, " ]\n");
625 
626  for (t = 0; t < ntestsets; t++)
627  {
628  s_testset testset = testsets[t];
629 
630  R tref = K(0.0);
631  for (i = 0; i < testset.nresults; i++)
632  if (testset.results[i].nthreads == 1)
633  tref = testset.results[i].resval[9].avg
634  - testset.results[i].resval[0].avg;
635 
636  fprintf(out, "\\addplot coordinates {");
637  for (i = 0; i < testset.nresults; i++)
638  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
639  tref
640  / (testset.results[i].resval[9].avg
641  - testset.results[i].resval[0].avg));
642  fprintf(out, "};\n");
643 
644  for (i = 0; i < testset.nresults; i++)
645  {
646  fprintf(stderr, "%d:%.3" __FIS__ " ", testset.results[i].nthreads,
647  tref
648  / (testset.results[i].resval[9].avg
649  - testset.results[i].resval[0].avg));
650  }
651  fprintf(stderr, "\n\n");
652  }
653 
654  fprintf(out, "\\legend{{");
655  for (t = 0; t < ntestsets; t++)
656  {
657  char title[256];
658  if (t > 0)
659  fprintf(out, "},{");
660  fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param,
661  ~(diff_mask | MASK_FSUM_WINM));
662  fprintf(out, "%s", title);
663  }
664  fprintf(out, "}}\n");
665  fprintf(out, "\\end{axis}\n");
666  fprintf(out, "\\end{tikzpicture}\n");
667  fprintf(out, "\n\n");
668 
669  fflush(out);
670 }
671 
672 void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p,
673  char *kernel_name, R c, R eps_I, R eps_B,
674  int *nthreads_array, int n_threads_array_size)
675 {
676  int i;
677  testset->param.d = d;
678  testset->param.L = L;
679  testset->param.M = M;
680  testset->param.n = n;
681  testset->param.m = m;
682  testset->param.p = p;
683  testset->param.kernel_name = kernel_name;
684  testset->param.c = c;
685  testset->param.eps_I = eps_I;
686  testset->param.eps_B = eps_B;
687 
688  testset->results = (s_result*) NFFT(malloc)(
689  (size_t)(n_threads_array_size) * sizeof(s_result));
690  testset->nresults = n_threads_array_size;
691 
692  run_test_create(testset->param.d, testset->param.L, testset->param.M);
693  for (i = 0; i < n_threads_array_size; i++)
694  {
695  testset->results[i].nthreads = nthreads_array[i];
696  run_test(testset->results[i].resval, NREPEAT, testset->param.n,
697  testset->param.m, testset->param.p, testset->param.kernel_name,
698  testset->param.c, testset->param.eps_I, testset->param.eps_B,
699  testset->results[i].nthreads);
700  }
701 
702 }
703 
704 void test1(int *nthreads_array, int n_threads_array_size)
705 {
706  s_testset testsets[1];
707 
708 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
709  run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
710 
711  fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
712 
713  fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
714 
715  nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
716 
717  nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
718 #endif
719 }
720 
721 int main(int argc, char** argv)
722 {
723  int *nthreads_array;
724  int n_threads_array_size = get_nthreads_array(&nthreads_array);
725  int k;
726 
727 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
728  fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n");
729  fprintf(stderr, "Please re-run the configure script with options\n");
730  fprintf(stderr,
731  "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
732  fprintf(stderr, "and run \"make clean all\"\n\n");
733 #endif
734 
735  for (k = 0; k < n_threads_array_size; k++)
736  fprintf(stderr, "%d ", nthreads_array[k]);
737  fprintf(stderr, "\n");
738 
739  file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w");
740 
741  test1(nthreads_array, n_threads_array_size);
742 
743  fclose(file_out_tex);
744 
745  return EXIT_SUCCESS;
746 }
747 
#define PRE_PSI
Definition: nfft3.h:197
Header file for the nfft3 library.
#define PRE_ONE_PSI
Definition: nfft3.h:206