Basilisk CFD
Adaptive Cartesian mesh PDE framework
Loading...
Searching...
No Matches
reduction.h
Go to the documentation of this file.
1/** @file reduction.h
2 */
3double cpu_reduction (GLuint * src, size_t offset, size_t nb, const char op)
4{
6 double result;
7 switch (op) {
8 case '+': result = 0.; break;
9 case 'M': result = - HUGE; break;
10 case 'm': result = HUGE; break;
11 default: assert (false);
12 }
13 offset *= sizeof(real), nb *= sizeof(real);
14 int index = offset/GPUContext.max_ssbo_size;
15 offset -= index*GPUContext.max_ssbo_size;
16 while (nb) {
18 size_t size = min (nb, GPUContext.max_ssbo_size - offset);
20 assert (a);
21 switch (op) {
22 case '+':
23 for (size_t i = 0; i < size/sizeof(real); i++, a++)
24 result += *a;
25 break;
26 case 'M':
27 for (size_t i = 0; i < size/sizeof(real); i++, a++)
28 result = fmax (result, *a);
29 break;
30 case 'm':
31 for (size_t i = 0; i < size/sizeof(real); i++, a++)
32 result = fmin (result, *a);
33 break;
34 default: assert (false);
35 }
37 nb -= size, offset = 0, index++;
38 }
40 return result;
41}
42
43double gpu_reduction (size_t offset, const char op, const RegionParameters * region, size_t nb)
44{
45 const int stride = 64, nwgr = 64;
46 bool is_foreach_point = (region->n.x == 1 && region->n.y == 1);
47 if (!is_foreach_point && nb < nwgr*stride)
48 return cpu_reduction (GPUContext.ssbo, offset, nb, op);
49
50 GLuint * br = gpu_grid->reduct;
51 if (!br[0]) {
52 GL_C (glGenBuffers (2, br));
53 for (int i = 0; i < 2; i++) {
55 size_t size = (sq((size_t)N + 1)/stride + 1)*sizeof(real);
56 assert (size < GPUContext.max_ssbo_size); // must fit within a single SSBO
58 }
60 }
61
62 const char * operation;
63 static const char * opsum =
64 " real reduct = 0.;\n"
65 " for (uint j = 0; j < stride; j++, index++) {\n"
66 " val = _data_val(index);\n"
67 " reduct += val;\n"
68 " }\n";
69 static const char * opmin =
70 " real reduct = val;\n"
71 " for (uint j = 0; j < stride; j++, index++) {\n"
72 " val = _data_val(index);\n"
73 " if (val < reduct) reduct = val;\n"
74 " }\n";
75 static const char * opmax =
76 " real reduct = val;\n"
77 " for (uint j = 0; j < stride; j++, index++) {\n"
78 " val = _data_val(index);\n"
79 " if (val > reduct) reduct = val;\n"
80 " }\n";
81 switch (op) {
82 case '+': operation = opsum; break;
83 case 'M': operation = opmax; break;
84 case 'm': operation = opmin; break;
85 default: // unknown reduction operation
86 assert (false);
87 }
88
89 char nwgrs[20], strides[20];
90 snprintf (nwgrs, 19, "%d", nwgr);
91 snprintf (strides, 19, "%d", stride);
92
93 char * fs =
95 "#version 430\n", glsl_preproc,
96 "layout (std430, binding = 0) writeonly buffer _reduct_layout {"
97 " real _reduct[]; };\n"
98 "layout (std430, binding = 1) readonly buffer _data_layout { real f[]; } _data");
99 if (GPUContext.nssbo > 1) {
100 char a[20], s[30];
101 snprintf (a, 19, "%d", GPUContext.nssbo);
102 snprintf (s, 29, "%ld", GPUContext.max_ssbo_size/sizeof(real));
103 fs = str_append (fs, "[", a, "];\n"
104 "#define _data_val(index) _data[(index)/", s, "].f[(index)%", s, "]\n");
105 }
106 else
107 fs = str_append (fs, ";\n"
108 "#define _data_val(index) _data.f[index]\n");
109 fs = str_append (fs,
110 "layout (location = 3) uniform uint offset;\n"
111 "layout (location = 4) uniform uint nb;\n"
112 "layout (location = 5) uniform uint nbr;\n"
113 "layout (local_size_x = ", nwgrs, ") in;\n"
114 "void main() {\n"
115 "if (gl_GlobalInvocationID.x < nb) {\n"
116 " uint stride = ", strides, ";\n"
117 " uint index = stride*gl_GlobalInvocationID.x;\n"
118 " if (index + stride > nbr) stride = nbr - index;\n"
119 " index += offset;\n"
120 " real val = _data_val(index);\n",
121 operation,
122 " _reduct[gl_GlobalInvocationID.x] = reduct;\n"
123 "}}\n");
124 Shader * shader;
129 assert (shader);
130 if (shader->id != GPUContext.current_shader) {
131 GL_C (glUseProgram (shader->id));
132 GPUContext.current_shader = shader->id;
133 }
134 const GLint loffset = 3, lnb = 4, lnbr = 5;
135
136 int start = offset*sizeof(real)/GPUContext.max_ssbo_size;
137 offset -= start*GPUContext.max_ssbo_size/sizeof(real);
138 for (int i = start; i < GPUContext.nssbo; i++)
140
141 if (is_foreach_point) {
142 real result = 0.;
143 int i = (region->p.x - X0)/L0*N;
144 int j = (region->p.y - Y0)/L0*N;
145 if (i >= 0 && i < N && j >= 0 && j < N) {
146 offset += i*N + j;
147 GL_C (glUniform1ui (loffset, offset));
149 GL_C (glUniform1ui (lnbr, 1));
150 GL_C (glUniform1ui (lnb, 1));
152 GL_C (glDispatchCompute (1, 1, 1));
156 }
157 return result;
158 }
159
160 GL_C (glUniform1ui (loffset, offset));
161 int src = 0, dst = 1;
162 while (nb >= nwgr*stride) {
165 if (nb % stride) {
166 nb /= stride;
167 nb++;
168 }
169 else
170 nb /= stride;
171 int ng = nb/nwgr;
172 if (ng*nwgr < nb)
173 ng++;
174 GL_C (glUniform1ui (lnb, nb));
176 GL_C (glDispatchCompute (ng, 1, 1));
177 swap (int, src, dst);
178 if (offset) {
180 offset = 0;
181 }
183 }
184
185 return cpu_reduction (br + src, 0, nb, op);
186}
const vector a
Definition all-mach.h:59
int min
Definition balance.h:192
double real
Definition cartesian.h:6
int x
Definition common.h:76
#define HUGE
Definition common.h:6
double L0
Definition common.h:36
int N
Definition common.h:39
static uint32_t a32_hash(const Adler32Hash *hash)
Definition common.h:608
static number sq(number x)
Definition common.h:11
#define swap(type, a, b)
Definition common.h:19
static void a32_hash_add(Adler32Hash *hash, const void *data, size_t size)
Definition common.h:600
double X0
Definition common.h:34
double Y0
Definition common.h:34
static void a32_hash_init(Adler32Hash *hash)
Definition common.h:594
define sysmalloc malloc define syscalloc calloc define sysrealloc realloc define sysfree free define systrdup strdup define line line line line op define op
Definition config.h:573
#define assert(a)
Definition config.h:107
scalar s
Definition embed-tree.h:56
*cs[i, 0, 0] a *[i -1, 0, 0] j
Definition embed.h:88
vector fs[]
Definition embed.h:22
scalar int i
Definition embed.h:74
#define GL_MAP_READ_BIT
Definition glad.h:1211
#define glGenBuffers
Definition glad.h:3225
#define glMemoryBarrier
Definition glad.h:4524
#define glBindBuffer
Definition glad.h:3219
#define GL_DYNAMIC_READ
Definition glad.h:877
#define glDispatchCompute
Definition glad.h:4552
int GLint
Definition glad.h:100
#define GL_SHADER_STORAGE_BUFFER
Definition glad.h:1837
#define glGetBufferSubData
Definition glad.h:3237
#define glBindBufferBase
Definition glad.h:3588
#define glBufferData
Definition glad.h:3231
unsigned int GLuint
Definition glad.h:101
#define GL_BUFFER_UPDATE_BARRIER_BIT
Definition glad.h:1551
#define GL_SHADER_STORAGE_BARRIER_BIT
Definition glad.h:1851
#define glUseProgram
Definition glad.h:3361
#define glUnmapBuffer
Definition glad.h:3243
#define glUniform1ui
Definition glad.h:3684
#define glMapBufferRange
Definition glad.h:3795
#define GL_C(stmt)
Definition gpu.h:37
static struct @7 GPUContext
#define str_append(dst,...)
Definition grid.h:404
trace Shader * load_shader(const char *fs, uint32_t hash, const ForeachData *loop)
Definition grid.h:1130
static char glsl_preproc[]
Definition grid.h:408
#define gpu_grid
Definition grid.h:389
int dst
size_t size
double gpu_reduction(size_t offset, const char op, const RegionParameters *region, size_t nb)
Definition reduction.h:43
double cpu_reduction(GLuint *src, size_t offset, size_t nb, const char op)
Definition reduction.h:3
Definition grid.h:376
Array * index