30 #if defined(_WIN32) || defined(_WIN64)
31 const char *CMD_CREATEDATASET =
"fastsum_benchomp_createdataset.exe";
32 const char *CMD_DETAIL_SINGLE =
"fastsum_benchomp_detail_single.exe";
33 const char *CMD_DETAIL_THREADS =
"fastsum_benchomp_detail_threads.exe";
35 const char *CMD_CREATEDATASET =
"./fastsum_benchomp_createdataset";
36 const char *CMD_DETAIL_SINGLE =
"./fastsum_benchomp_detail_single";
37 const char *CMD_DETAIL_THREADS =
"./fastsum_benchomp_detail_threads";
40 static FILE* file_out_tex = NULL;
42 int get_nthreads_array(
int **arr)
44 int max_threads = NFFT(get_num_threads)();
48 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
52 *arr = (
int*) NFFT(malloc)((size_t) (max_threads) *
sizeof(int));
53 for (k = 0; k < max_threads; k++)
58 for (k = 1; k <= max_threads; k *= 2, alloc_num++)
61 *arr = (
int*) NFFT(malloc)((size_t)(alloc_num) *
sizeof(int));
63 for (k = 1; k <= max_threads; k *= 2)
65 if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
67 *(*arr + ret_number) = max_threads / 2;
71 *(*arr + ret_number) = k;
74 if (k != max_threads && 2 * k > max_threads)
76 *(*arr + ret_number) = max_threads;
85 void check_result_value(
const int val,
const int ok,
const char *msg)
89 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
95 void run_test_create(
int d,
int L,
int M)
100 "%s %d %d %d > fastsum_benchomp_test.data",
101 CMD_CREATEDATASET, d, L, M);
102 fprintf(stderr,
"%s\n", cmd);
103 check_result_value(system(cmd), 0,
"createdataset");
106 void run_test_init_output()
108 FILE *f = fopen(
"fastsum_benchomp_test.result",
"w");
147 void run_test(
s_resval *res,
int nrepeat,
int n,
int m,
int p,
148 char *kernel_name, R c, R eps_I, R eps_B,
int nthreads)
153 for (t = 0; t < 16; t++)
156 res[t].min = K(1.0) / K(0.0);
162 "%s %d %d %d %s " __FR__
" " __FR__
" " __FR__
" < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
163 CMD_DETAIL_SINGLE, n, m, p, kernel_name, c, eps_I, eps_B);
166 "%s %d %d %d %s " __FR__
" " __FR__
" " __FR__
" %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
167 CMD_DETAIL_THREADS, n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
168 fprintf(stderr,
"%s\n", cmd);
169 check_result_value(system(cmd), 0, cmd);
171 for (r = 0; r < nrepeat; r++)
176 check_result_value(system(cmd), 0, cmd);
177 f = fopen(
"fastsum_benchomp_test.out",
"r");
179 "" __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
"", v,
180 v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
181 v + 11, v + 12, v + 13, v + 14, v + 15);
182 check_result_value(retval, 16,
"read fastsum_benchomp_test.out");
185 for (t = 0; t < 16; t++)
188 if (res[t].min > v[t])
190 if (res[t].max < v[t])
195 for (t = 0; t < 16; t++)
196 res[t].avg /= (R)(nrepeat);
198 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
199 for (t = 0; t < 16; t++)
200 fprintf(stderr,
"%.3" __FES__
" %.3" __FES__
" %.3" __FES__
" | ", res[t].avg, res[t].min, res[t].max);
201 fprintf(stderr,
"\n");
204 const char *get_psi_string(
int flags)
213 const char *get_sort_string(
int flags)
215 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
218 if (flags & NFFT_SORT_NODES)
224 const char *get_adjoint_omp_string(
int flags)
226 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
232 #define MASK_FSUM_D (1U<<0)
233 #define MASK_FSUM_L (1U<<1)
234 #define MASK_FSUM_M (1U<<2)
235 #define MASK_FSUM_MULTIBW (1U<<3)
236 #define MASK_FSUM_WINM (1U<<4)
237 #define MASK_FSUM_P (1U<<5)
238 #define MASK_FSUM_KERNEL (1U<<6)
239 #define MASK_FSUM_EPSI (1U<<7)
240 #define MASK_FSUM_EPSB (1U<<8)
242 unsigned int fastsum_determine_different_parameters(
s_testset *testsets,
246 unsigned int mask = 0;
251 for (t = 1; t < ntestsets; t++)
253 if (testsets[t - 1].param.d != testsets[t].param.d)
255 if (testsets[t - 1].param.L != testsets[t].param.L)
257 if (testsets[t - 1].param.M != testsets[t].param.M)
259 if (testsets[t - 1].param.n != testsets[t].param.n)
260 mask |= MASK_FSUM_MULTIBW;
261 if (testsets[t - 1].param.m != testsets[t].param.m)
262 mask |= MASK_FSUM_WINM;
263 if (testsets[t - 1].param.p != testsets[t].param.p)
265 if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
267 mask |= MASK_FSUM_KERNEL;
268 if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
269 mask |= MASK_FSUM_EPSI;
270 if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
271 mask |= MASK_FSUM_EPSB;
277 void strEscapeUnderscore(
char *dst,
char *src,
int maxlen)
283 while (src[i] !=
'\0' && len + offset < maxlen - 1)
286 len = snprintf(dst + offset, maxlen - offset,
"\\_{}");
288 len = snprintf(dst + offset, maxlen - offset,
"%c", src[i]);
294 void fastsum_get_plot_title_minus_indep(
char *outstr,
int maxlen,
295 char *hostname,
s_param param,
unsigned int diff_mask)
297 unsigned int mask = ~diff_mask;
301 len = snprintf(outstr, maxlen,
"%s", hostname);
302 if (len < 0 || len + offset >= maxlen - 1)
306 if (mask & MASK_FSUM_D)
308 len = snprintf(outstr + offset, maxlen - offset,
" %dd fastsum", param.d);
309 if (len < 0 || len + offset >= maxlen - 1)
314 if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
316 len = snprintf(outstr + offset, maxlen - offset,
" L=M=%d", param.L);
317 if (len < 0 || len + offset >= maxlen - 1)
323 if (mask & MASK_FSUM_L)
325 len = snprintf(outstr + offset, maxlen - offset,
" L=%d", param.L);
326 if (len < 0 || len + offset >= maxlen - 1)
331 if (mask & MASK_FSUM_M)
333 len = snprintf(outstr + offset, maxlen - offset,
" M=%d", param.M);
334 if (len < 0 || len + offset >= maxlen - 1)
340 if (mask & MASK_FSUM_MULTIBW)
342 len = snprintf(outstr + offset, maxlen - offset,
" n=%d", param.n);
343 if (len < 0 || len + offset >= maxlen - 1)
348 if (mask & MASK_FSUM_WINM)
350 len = snprintf(outstr + offset, maxlen - offset,
" m=%d", param.m);
351 if (len < 0 || len + offset >= maxlen - 1)
356 if (mask & MASK_FSUM_P)
358 len = snprintf(outstr + offset, maxlen - offset,
" p=%d", param.p);
359 if (len < 0 || len + offset >= maxlen - 1)
364 if (mask & MASK_FSUM_KERNEL)
367 strEscapeUnderscore(tmp, param.kernel_name, maxlen);
369 len = snprintf(outstr + offset, maxlen - offset,
" %s", tmp);
370 if (len < 0 || len + offset >= maxlen - 1)
375 if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
377 len = snprintf(outstr + offset, maxlen - offset,
378 " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__
"",
380 if (len < 0 || len + offset >= maxlen - 1)
386 if (mask & MASK_FSUM_EPSI)
388 len = snprintf(outstr + offset, maxlen - offset,
389 " $\\varepsilon_\\mathrm{I}$=%" __FGS__
"", param.eps_I);
390 if (len < 0 || len + offset >= maxlen - 1)
395 if (mask & MASK_FSUM_EPSB)
397 len = snprintf(outstr + offset, maxlen - offset,
398 " $\\varepsilon_\\mathrm{B}$=%" __FGS__
"", param.eps_B);
399 if (len < 0 || len + offset >= maxlen - 1)
406 void nfft_adjoint_print_output_histo_DFBRT(FILE *out,
s_testset testset)
408 int i, size = testset.nresults;
411 #ifdef HAVE_GETHOSTNAME
412 if (gethostname(hostname, 1024) != 0)
414 strncpy(hostname,
"unnamed", 1024);
416 fprintf(out,
"\\begin{tikzpicture}\n");
417 fprintf(out,
"\\begin{axis}[");
418 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
419 fprintf(out,
"symbolic x coords={");
420 for (i = 0; i < size; i++)
422 fprintf(out,
",%d", testset.results[i].nthreads);
424 fprintf(out,
"%d", testset.results[i].nthreads);
427 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
429 " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
430 hostname, testset.param.d, testset.param.n, testset.param.M,
432 fprintf(out,
" ]\n");
433 fprintf(out,
"\\addplot coordinates {");
434 for (i = 0; i < size; i++)
435 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
436 testset.results[i].resval[10].avg);
437 fprintf(out,
"};\n");
439 fprintf(out,
"\\addplot coordinates {");
440 for (i = 0; i < size; i++)
441 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
442 testset.results[i].resval[11].avg);
443 fprintf(out,
"};\n");
445 fprintf(out,
"\\addplot coordinates {");
446 for (i = 0; i < size; i++)
447 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
448 testset.results[i].resval[12].avg);
449 fprintf(out,
"};\n");
451 fprintf(out,
"\\addplot coordinates {");
452 for (i = 0; i < size; i++)
453 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
454 testset.results[i].resval[1].avg);
455 fprintf(out,
"};\n");
457 fprintf(out,
"\\addplot coordinates {");
458 for (i = 0; i < size; i++)
459 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
460 testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
461 fprintf(out,
"};\n");
463 "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
464 fprintf(out,
"\\end{axis}\n");
465 fprintf(out,
"\\end{tikzpicture}\n");
466 fprintf(out,
"\n\n");
471 void nfft_trafo_print_output_histo_DFBRT(FILE *out,
s_testset testset)
473 int i, size = testset.nresults;
476 #ifdef HAVE_GETHOSTNAME
477 if (gethostname(hostname, 1024) != 0)
479 strncpy(hostname,
"unnamed", 1024);
481 fprintf(out,
"\\begin{tikzpicture}\n");
482 fprintf(out,
"\\begin{axis}[");
483 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
484 fprintf(out,
"symbolic x coords={");
485 for (i = 0; i < size; i++)
487 fprintf(out,
",%d", testset.results[i].nthreads);
489 fprintf(out,
"%d", testset.results[i].nthreads);
492 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
494 " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
495 hostname, testset.param.d, testset.param.n, testset.param.M,
497 fprintf(out,
" ]\n");
498 fprintf(out,
"\\addplot coordinates {");
499 for (i = 0; i < size; i++)
500 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
501 testset.results[i].resval[13].avg);
502 fprintf(out,
"};\n");
504 fprintf(out,
"\\addplot coordinates {");
505 for (i = 0; i < size; i++)
506 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
507 testset.results[i].resval[14].avg);
508 fprintf(out,
"};\n");
510 fprintf(out,
"\\addplot coordinates {");
511 for (i = 0; i < size; i++)
512 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
513 testset.results[i].resval[15].avg);
514 fprintf(out,
"};\n");
516 fprintf(out,
"\\addplot coordinates {");
517 for (i = 0; i < size; i++)
518 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
519 testset.results[i].resval[2].avg);
520 fprintf(out,
"};\n");
522 fprintf(out,
"\\addplot coordinates {");
523 for (i = 0; i < size; i++)
524 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
525 testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
526 fprintf(out,
"};\n");
527 fprintf(out,
"\\legend{D,F,B,prepsi,total}\n");
528 fprintf(out,
"\\end{axis}\n");
529 fprintf(out,
"\\end{tikzpicture}\n");
530 fprintf(out,
"\n\n");
535 void fastsum_print_output_histo_PreRfNfT(FILE *out,
s_testset testset)
537 int i, size = testset.nresults;
539 char plottitle[1025];
541 #ifdef HAVE_GETHOSTNAME
542 if (gethostname(hostname, 1024) != 0)
544 strncpy(hostname,
"unnamed", 1024);
546 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
549 fprintf(out,
"\\begin{tikzpicture}\n");
550 fprintf(out,
"\\begin{axis}[");
551 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
552 fprintf(out,
"symbolic x coords={");
553 for (i = 0; i < size; i++)
555 fprintf(out,
",%d", testset.results[i].nthreads);
557 fprintf(out,
"%d", testset.results[i].nthreads);
560 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
561 fprintf(out,
" title={%s}", plottitle);
562 fprintf(out,
" ]\n");
563 fprintf(out,
"\\addplot coordinates {");
564 for (i = 0; i < size; i++)
565 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
566 testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
567 fprintf(out,
"};\n");
569 fprintf(out,
"\\addplot coordinates {");
570 for (i = 0; i < size; i++)
571 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
572 testset.results[i].resval[3].avg);
573 fprintf(out,
"};\n");
575 fprintf(out,
"\\addplot coordinates {");
576 for (i = 0; i < size; i++)
577 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
578 testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
579 + testset.results[i].resval[6].avg);
580 fprintf(out,
"};\n");
582 fprintf(out,
"\\addplot coordinates {");
583 for (i = 0; i < size; i++)
584 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
585 testset.results[i].resval[7].avg);
586 fprintf(out,
"};\n");
588 fprintf(out,
"\\addplot coordinates {");
589 for (i = 0; i < size; i++)
590 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
591 testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
592 fprintf(out,
"};\n");
594 "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
595 fprintf(out,
"\\end{axis}\n");
596 fprintf(out,
"\\end{tikzpicture}\n");
597 fprintf(out,
"\n\n");
602 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
607 char plottitle[1025];
608 unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
611 #ifdef HAVE_GETHOSTNAME
612 if (gethostname(hostname, 1024) != 0)
614 strncpy(hostname,
"unnamed", 1024);
616 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
617 testsets[0].param, diff_mask | MASK_FSUM_WINM);
619 fprintf(out,
"\\begin{tikzpicture}\n");
620 fprintf(out,
"\\begin{axis}[");
622 "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
623 fprintf(out,
" title={%s}", plottitle);
624 fprintf(out,
" ]\n");
626 for (t = 0; t < ntestsets; t++)
631 for (i = 0; i < testset.nresults; i++)
632 if (testset.results[i].nthreads == 1)
633 tref = testset.results[i].resval[9].avg
634 - testset.results[i].resval[0].avg;
636 fprintf(out,
"\\addplot coordinates {");
637 for (i = 0; i < testset.nresults; i++)
638 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
640 / (testset.results[i].resval[9].avg
641 - testset.results[i].resval[0].avg));
642 fprintf(out,
"};\n");
644 for (i = 0; i < testset.nresults; i++)
646 fprintf(stderr,
"%d:%.3" __FIS__
" ", testset.results[i].nthreads,
648 / (testset.results[i].resval[9].avg
649 - testset.results[i].resval[0].avg));
651 fprintf(stderr,
"\n\n");
654 fprintf(out,
"\\legend{{");
655 for (t = 0; t < ntestsets; t++)
660 fastsum_get_plot_title_minus_indep(title, 255,
"", testsets[t].param,
661 ~(diff_mask | MASK_FSUM_WINM));
662 fprintf(out,
"%s", title);
664 fprintf(out,
"}}\n");
665 fprintf(out,
"\\end{axis}\n");
666 fprintf(out,
"\\end{tikzpicture}\n");
667 fprintf(out,
"\n\n");
672 void run_testset(
s_testset *testset,
int d,
int L,
int M,
int n,
int m,
int p,
673 char *kernel_name, R c, R eps_I, R eps_B,
674 int *nthreads_array,
int n_threads_array_size)
677 testset->param.d = d;
678 testset->param.L = L;
679 testset->param.M = M;
680 testset->param.n = n;
681 testset->param.m = m;
682 testset->param.p = p;
683 testset->param.kernel_name = kernel_name;
684 testset->param.c = c;
685 testset->param.eps_I = eps_I;
686 testset->param.eps_B = eps_B;
688 testset->results = (
s_result*) NFFT(malloc)(
689 (size_t)(n_threads_array_size) *
sizeof(
s_result));
690 testset->nresults = n_threads_array_size;
692 run_test_create(testset->param.d, testset->param.L, testset->param.M);
693 for (i = 0; i < n_threads_array_size; i++)
695 testset->results[i].nthreads = nthreads_array[i];
696 run_test(testset->results[i].resval, NREPEAT, testset->param.n,
697 testset->param.m, testset->param.p, testset->param.kernel_name,
698 testset->param.c, testset->param.eps_I, testset->param.eps_B,
699 testset->results[i].nthreads);
704 void test1(
int *nthreads_array,
int n_threads_array_size)
708 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
709 run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7,
"one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
711 fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
713 fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
715 nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
717 nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
721 int main(
int argc,
char** argv)
724 int n_threads_array_size = get_nthreads_array(&nthreads_array);
727 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
728 fprintf(stderr,
"WARNING: Detailed time measurements are not activated.\n");
729 fprintf(stderr,
"Please re-run the configure script with options\n");
731 "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
732 fprintf(stderr,
"and run \"make clean all\"\n\n");
735 for (k = 0; k < n_threads_array_size; k++)
736 fprintf(stderr,
"%d ", nthreads_array[k]);
737 fprintf(stderr,
"\n");
739 file_out_tex = fopen(
"fastsum_benchomp_results_plots.tex",
"w");
741 test1(nthreads_array, n_threads_array_size);
743 fclose(file_out_tex);
Header file for the nfft3 library.