NFFT  3.4.1
sort.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 
20 #ifdef _OPENMP
21 #include <omp.h>
22 #endif
23 
24 #include "infft.h"
25 
26 #define z_swap(_a_, _b_, _t_) do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
27 
33 static inline void sort_node_indices_sort_bubble(INT n, INT *keys)
34 {
35  INT i, j, ti;
36 
37  for (i = 0; i < n; ++i)
38  {
39  j = i;
40  while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
41  {
42  z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
43  z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
44  --j;
45  }
46  }
47 }
48 
54 static inline void sort_node_indices_radix_count(INT n, INT *keys, INT shift, INT mask, INT *counts)
55 {
56  INT i, k;
57 
58  for (i = 0; i < n; ++i)
59  {
60  k = (keys[2 * i + 0] >> shift) & mask;
61  ++counts[k];
62  }
63 }
64 
70 static inline void sort_node_indices_radix_rearrange(INT n, INT *keys_in, INT *keys_out, INT shift, INT mask, INT *displs)
71 {
72  INT i, k;
73 
74  for (i = 0; i < n; ++i)
75  {
76  k = (keys_in[2 * i + 0] >> shift) & mask;
77  keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
78  keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
79  ++displs[k];
80  }
81 }
82 
83 #define rwidth 9
84 #define radix_n (1 << rwidth)
85 
91 void Y(sort_node_indices_radix_lsdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
92 {
93  const INT radix_mask = radix_n - 1;
94  const INT rhigh_in = rhigh;
95 
96  const INT tmax =
97 #ifdef _OPENMP
98  omp_get_max_threads();
99 #else
100  1;
101 #endif
102 
103  INT *from, *to, *tmp;
104 
105  INT i, k, l, h;
106  INT *lcounts;
107 
108  INT tid = 0, tnum = 1;
109 
110  STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
111 
112  from = keys0;
113  to = keys1;
114 
115  while (rhigh >= 0)
116  {
117 #ifdef _OPENMP
118  #pragma omp parallel private(tid, tnum, i, l, h)
119  {
120  tid = omp_get_thread_num();
121  tnum = omp_get_num_threads();
122 #endif
123 
124  for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
125 
126  l = (tid * n) / tnum;
127  h = ((tid + 1) * n) / tnum;
128 
129  sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
130 #ifdef _OPENMP
131  }
132 #endif
133 
134  k = 0;
135  for (i = 0; i < radix_n; ++i)
136  {
137  for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
138  }
139 
140 #ifdef _OPENMP
141  #pragma omp parallel private(tid, tnum, i, l, h)
142  {
143  tid = omp_get_thread_num();
144  tnum = omp_get_num_threads();
145 #endif
146 
147  l = (tid * n) / tnum;
148  h = ((tid + 1) * n) / tnum;
149 
150  sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
151 #ifdef _OPENMP
152  }
153 #endif
154 
155 /* print_keys(n, to);*/
156 
157  tmp = from;
158  from = to;
159  to = tmp;
160 
161  rhigh -= rwidth;
162  }
163 
164  if (to == keys0) memcpy(to, from, (size_t)(n) * 2 * sizeof(INT));
165 
166  STACK_FREE(lcounts);
167 }
168 
174 void Y(sort_node_indices_radix_msdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
175 {
176  const INT radix_mask = radix_n - 1;
177 
178  const INT tmax =
179 #ifdef _OPENMP
180  omp_get_max_threads();
181 #else
182  1;
183 #endif
184 
185  INT i, k, l, h;
186  INT *lcounts;
187 
188  INT counts[radix_n], displs[radix_n];
189 
190  INT tid = 0, tnum = 1;
191 
192  STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
193 
194  rhigh -= rwidth;
195 
196 #ifdef _OPENMP
197  #pragma omp parallel private(tid, tnum, i, l, h)
198  {
199  tid = omp_get_thread_num();
200  tnum = omp_get_num_threads();
201 #endif
202 
203  for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
204 
205  l = (tid * n) / tnum;
206  h = ((tid + 1) * n) / tnum;
207 
208  sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
209 #ifdef _OPENMP
210  }
211 #endif
212 
213  k = 0;
214  for (i = 0; i < radix_n; ++i)
215  {
216  for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
217 
218  displs[i] = lcounts[0 * radix_n + i];
219  if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
220  }
221  counts[radix_n - 1] = n - displs[radix_n - 1];
222 
223 #ifdef _OPENMP
224  #pragma omp parallel private(tid, tnum, i, l, h)
225  {
226  tid = omp_get_thread_num();
227  tnum = omp_get_num_threads();
228 #endif
229 
230  l = (tid * n) / tnum;
231  h = ((tid + 1) * n) / tnum;
232 
233  sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
234 #ifdef _OPENMP
235  }
236 #endif
237 
238  memcpy(keys0, keys1, (size_t)(n) * 2 * sizeof(INT));
239 
240  if (rhigh >= 0)
241  {
242  for (i = 0; i < radix_n; ++i)
243  {
244  if (counts[i] > 1)
245  {
246  if (counts[i] > 256)
247  Y(sort_node_indices_radix_msdf)(counts[i], keys0 + 2 * displs[i], keys1 + 2 * displs[i], rhigh);
248  else
249  sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
250  }
251  }
252  }
253 
254  STACK_FREE(lcounts);
255 }