30 #if defined(_WIN32) || defined(_WIN64)
31 const char *CMD_CREATEDATASET =
"nfsft_benchomp_createdataset.exe";
32 const char *CMD_DETAIL_SINGLE =
"nfsft_benchomp_detail_single.exe";
33 const char *CMD_DETAIL_THREADS =
"nfsft_benchomp_detail_threads.exe";
35 const char *CMD_CREATEDATASET =
"./nfsft_benchomp_createdataset";
36 const char *CMD_DETAIL_SINGLE =
"./nfsft_benchomp_detail_single";
37 const char *CMD_DETAIL_THREADS =
"./nfsft_benchomp_detail_threads";
40 static FILE* file_out_tex = NULL;
42 int get_nthreads_array(
int **arr)
44 int max_threads =
X(get_num_threads)();
48 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
52 *arr = (
int*) malloc(max_threads*
sizeof(
int));
53 for (k = 0; k < max_threads; k++)
58 for (k = 1; k <= max_threads; k*=2, alloc_num++);
60 *arr = (
int*) malloc(alloc_num*
sizeof(
int));
62 for (k = 1; k <= max_threads; k*=2)
64 if (k != max_threads && 2*k > max_threads && max_threads_pw2)
66 *(*arr + ret_number) = max_threads/2;
70 *(*arr + ret_number) = k;
73 if (k != max_threads && 2*k > max_threads)
75 *(*arr + ret_number) = max_threads;
85 void check_result_value(
const int val,
const int ok,
const char *msg)
89 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
95 void run_test_create(
int trafo_adjoint,
int N,
int M)
99 snprintf(cmd, 1024,
"%s %d %d %d > nfsft_benchomp_test.data", CMD_CREATEDATASET, trafo_adjoint, N, M);
100 fprintf(stderr,
"%s\n", cmd);
101 check_result_value(system(cmd), 0,
"createdataset");
104 void run_test_init_output()
106 FILE *f = fopen(
"nfsft_benchomp_test.result",
"w");
141 void run_test(
s_resval *res,
int nrepeat,
int m,
int nfsft_flags,
int psi_flags,
int nthreads)
147 for (t = 0; t < 6; t++)
149 res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
153 snprintf(cmd, 1024,
"%s %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_SINGLE, m, nfsft_flags, psi_flags, nrepeat);
155 snprintf(cmd, 1024,
"%s %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_THREADS, m, nfsft_flags, psi_flags, nrepeat, nthreads);
156 fprintf(stderr,
"%s\n", cmd);
158 check_result_value(system(cmd), 0, cmd);
160 f = fopen(
"nfsft_benchomp_test.out",
"r");
161 for (r = 0; r < nrepeat; r++)
168 retval = fscanf(f,
"%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
169 check_result_value(retval, 6,
"read nfsft_benchomp_test.out");
172 for (t = 0; t < 6; t++)
175 if (res[t].min > v[t])
177 if (res[t].max < v[t])
183 for (t = 0; t < 6; t++)
184 res[t].avg /= nrepeat;
186 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
187 for (t = 0; t < 6; t++)
188 fprintf(stderr,
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
189 fprintf(stderr,
"\n");
192 const char *get_psi_string(
int flags)
201 const char *get_sort_string(
int flags)
203 if (flags & NFFT_SORT_NODES)
209 const char *get_adjoint_omp_string(
int flags)
211 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
217 #define MASK_TA (1U<<1)
218 #define MASK_N (1U<<2)
219 #define MASK_M (1U<<4)
220 #define MASK_WINM (1U<<5)
221 #define MASK_FLAGS_PSI (1U<<6)
222 #define MASK_FLAGS_SORT (1U<<7)
223 #define MASK_FLAGS_BW (1U<<8)
224 #define MASK_FLAGS_FPT (1U<<9)
226 unsigned int determine_different_parameters(
s_testset *testsets,
int ntestsets)
229 unsigned int mask = 0;
234 for (t = 1; t < ntestsets; t++)
236 if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
238 if (testsets[t-1].param.N != testsets[t].param.N)
240 if (testsets[t-1].param.M != testsets[t].param.M)
242 if (testsets[t-1].param.m != testsets[t].param.m)
245 mask |= MASK_FLAGS_PSI;
246 if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
247 mask |= MASK_FLAGS_SORT;
248 if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
249 mask |= MASK_FLAGS_BW;
251 mask |= MASK_FLAGS_FPT;
257 void get_plot_title(
char *outstr,
int maxlen,
char *hostname,
s_param param,
unsigned int diff_mask)
259 unsigned int mask = ~diff_mask;
263 len = snprintf(outstr, maxlen,
"%s", hostname);
264 if (len < 0 || len+offset >= maxlen-1)
return;
269 len = snprintf(outstr+offset, maxlen-offset,
" $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?
"":
"^\\top");
270 if (len < 0 || len+offset >= maxlen-1)
return;
276 len = snprintf(outstr+offset, maxlen-offset,
" N=%d", param.N);
277 if (len < 0 || len+offset >= maxlen-1)
return;
283 len = snprintf(outstr+offset, maxlen-offset,
" M=%d", param.M);
284 if (len < 0 || len+offset >= maxlen-1)
return;
288 if (mask & MASK_WINM)
290 len = snprintf(outstr+offset, maxlen-offset,
" m=%d", param.m);
291 if (len < 0 || len+offset >= maxlen-1)
return;
295 if (mask & MASK_FLAGS_PSI)
297 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_psi_string(param.psi_flags));
298 if (len < 0 || len+offset >= maxlen-1)
return;
302 if (mask & MASK_FLAGS_SORT)
304 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_sort_string(param.psi_flags));
305 if (len < 0 || len+offset >= maxlen-1)
return;
309 if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
311 len = snprintf(outstr+offset, maxlen-offset,
" %s", get_adjoint_omp_string(param.psi_flags));
312 if (len < 0 || len+offset >= maxlen-1)
return;
316 if (mask & MASK_FLAGS_FPT)
318 len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags &
NFSFT_USE_DPT ?
" DPT" :
"");
319 if (len < 0 || len+offset >= maxlen-1)
return;
325 void print_output_speedup_total_tref(FILE *out,
s_testset *testsets,
int ntestsets,
int use_tref,
double tref)
329 char plottitle[1025];
330 unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
332 #ifdef HAVE_GETHOSTNAME
333 if (gethostname(hostname, 1024) != 0)
335 strncpy(hostname,
"unnamed", 1024);
337 get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
339 fprintf(out,
"\\begin{tikzpicture}\n");
340 fprintf(out,
"\\begin{axis}[");
341 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
342 fprintf(out,
" title={%s}", plottitle);
343 fprintf(out,
" ]\n");
345 for (t = 0; t < ntestsets; t++)
348 fprintf(stderr,
"%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
349 fprintf(stderr,
"\n");
351 fprintf(out,
"\\addplot coordinates {");
352 for (i = 0; i < testset.nresults; i++)
354 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
356 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
357 fprintf(out,
"};\n");
359 for (i = 0; i < testset.nresults; i++)
361 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
363 fprintf(stderr,
"%d:%.3f ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
364 fprintf(stderr,
"\n\n");
367 fprintf(out,
"\\legend{{");
368 for (t = 0; t < ntestsets; t++)
373 get_plot_title(title, 255,
"", testsets[t].param, ~(diff_mask));
374 fprintf(out,
"%s", title);
376 fprintf(out,
"}}\n");
377 fprintf(out,
"\\end{axis}\n");
378 fprintf(out,
"\\end{tikzpicture}\n");
379 fprintf(out,
"\n\n");
384 void print_output_speedup_total(FILE *out,
s_testset *testsets,
int ntestsets,
int use_tref)
386 double tref = 1.0/0.0;
390 for (t = 0; t < ntestsets; t++)
391 for (k = 0; k < testsets[t].nresults; k++)
392 if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
393 tref = testsets[t].results[k].resval[5].avg;
395 print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
398 void print_output_histo_PENRT(FILE *out,
s_testset testset)
400 int i, size = testset.nresults;
403 #ifdef HAVE_GETHOSTNAME
404 if (gethostname(hostname, 1024) != 0)
406 strncpy(hostname,
"unnamed", 1024);
408 fprintf(out,
"\\begin{tikzpicture}\n");
409 fprintf(out,
"\\begin{axis}[");
410 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
411 fprintf(out,
"symbolic x coords={");
412 for (i = 0; i < size; i++)
414 fprintf(out,
",%d", testset.results[i].nthreads);
416 fprintf(out,
"%d", testset.results[i].nthreads);
418 fprintf(out,
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
419 fprintf(out,
" title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
420 fprintf(out,
" ]\n");
421 fprintf(out,
"\\addplot coordinates {");
422 for (i = 0; i < size; i++)
423 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
424 fprintf(out,
"};\n");
426 fprintf(out,
"\\addplot coordinates {");
427 for (i = 0; i < size; i++)
428 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
429 fprintf(out,
"};\n");
431 fprintf(out,
"\\addplot coordinates {");
432 for (i = 0; i < size; i++)
433 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
434 fprintf(out,
"};\n");
436 fprintf(out,
"\\addplot coordinates {");
437 for (i = 0; i < size; i++)
438 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
439 fprintf(out,
"};\n");
441 fprintf(out,
"\\addplot coordinates {");
442 for (i = 0; i < size; i++)
443 fprintf(out,
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
444 fprintf(out,
"};\n");
445 fprintf(out,
"\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags &
NFSFT_USE_DPT ?
"DPT" :
"FPT", testset.param.trafo_adjoint==0?
"c2e":
"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?
"":
"^\\top");
446 fprintf(out,
"\\end{axis}\n");
447 fprintf(out,
"\\end{tikzpicture}\n");
448 fprintf(out,
"\n\n");
453 void run_testset(
s_testset *testset,
int trafo_adjoint,
int N,
int M,
int m,
int nfsft_flags,
int psi_flags,
int *nthreads_array,
int n_threads_array_size)
456 testset->param.trafo_adjoint = trafo_adjoint;
457 testset->param.N = N;
458 testset->param.M = M;
459 testset->param.m = m;
460 testset->param.nfsft_flags = nfsft_flags;
461 testset->param.psi_flags = psi_flags;
463 testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
464 testset->nresults = n_threads_array_size;
466 run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
467 for (i = 0; i < n_threads_array_size; i++)
469 testset->results[i].nthreads = nthreads_array[i];
470 run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
475 void test1(
int *nthreads_array,
int n_threads_array_size,
int m)
479 run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
480 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
481 print_output_histo_PENRT(file_out_tex, testsets[0]);
484 run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
485 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
486 print_output_histo_PENRT(file_out_tex, testsets[1]);
489 print_output_speedup_total(file_out_tex, testsets, 2, 0);
491 run_testset(&testsets[2], 0, 1024, 1000000, m,
NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
492 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
493 print_output_histo_PENRT(file_out_tex, testsets[2]);
496 run_testset(&testsets[3], 1, 1024, 1000000, m,
NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
497 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
498 print_output_histo_PENRT(file_out_tex, testsets[3]);
501 print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
504 int main(
int argc,
char** argv)
507 int n_threads_array_size = get_nthreads_array(&nthreads_array);
510 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
511 fprintf(stderr,
"WARNING: Detailed time measurements for NFSFT are not activated.\n");
512 fprintf(stderr,
"For more detailed plots, please re-run the configure script with options\n");
513 fprintf(stderr,
"--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
514 fprintf(stderr,
"and run \"make clean all\"\n\n");
517 for (k = 0; k < n_threads_array_size; k++)
518 fprintf(stderr,
"%d ", nthreads_array[k]);
519 fprintf(stderr,
"\n");
521 file_out_tex = fopen(
"nfsft_benchomp_results_plots.tex",
"w");
523 test1(nthreads_array, n_threads_array_size, 2);
524 test1(nthreads_array, n_threads_array_size, 4);
525 test1(nthreads_array, n_threads_array_size, 6);
526 test1(nthreads_array, n_threads_array_size, 8);
528 fclose(file_out_tex);
#define X(name)
Include header for C99 complex datatype.
Header file for the nfft3 library.