8 case '+':
result = 0.;
break;
45 const int stride = 64,
nwgr = 64;
53 for (
int i = 0;
i < 2;
i++) {
55 size_t size = (
sq((
size_t)
N + 1)/stride + 1)*
sizeof(
real);
63 static const char *
opsum =
64 " real reduct = 0.;\n"
65 " for (uint j = 0; j < stride; j++, index++) {\n"
66 " val = _data_val(index);\n"
69 static const char *
opmin =
70 " real reduct = val;\n"
71 " for (uint j = 0; j < stride; j++, index++) {\n"
72 " val = _data_val(index);\n"
73 " if (val < reduct) reduct = val;\n"
75 static const char *
opmax =
76 " real reduct = val;\n"
77 " for (uint j = 0; j < stride; j++, index++) {\n"
78 " val = _data_val(index);\n"
79 " if (val > reduct) reduct = val;\n"
96 "layout (std430, binding = 0) writeonly buffer _reduct_layout {"
97 " real _reduct[]; };\n"
98 "layout (std430, binding = 1) readonly buffer _data_layout { real f[]; } _data");
104 "#define _data_val(index) _data[(index)/",
s,
"].f[(index)%",
s,
"]\n");
108 "#define _data_val(index) _data.f[index]\n");
110 "layout (location = 3) uniform uint offset;\n"
111 "layout (location = 4) uniform uint nb;\n"
112 "layout (location = 5) uniform uint nbr;\n"
113 "layout (local_size_x = ",
nwgrs,
") in;\n"
115 "if (gl_GlobalInvocationID.x < nb) {\n"
116 " uint stride = ",
strides,
";\n"
117 " uint index = stride*gl_GlobalInvocationID.x;\n"
118 " if (index + stride > nbr) stride = nbr - index;\n"
119 " index += offset;\n"
120 " real val = _data_val(index);\n",
122 " _reduct[gl_GlobalInvocationID.x] = reduct;\n"
162 while (
nb >=
nwgr*stride) {
static uint32_t a32_hash(const Adler32Hash *hash)
static number sq(number x)
static void a32_hash_add(Adler32Hash *hash, const void *data, size_t size)
static void a32_hash_init(Adler32Hash *hash)
define sysmalloc malloc define syscalloc calloc define sysrealloc realloc define sysfree free define systrdup strdup define line line line line op define op
*cs[i, 0, 0] a *[i -1, 0, 0] j
#define glDispatchCompute
#define GL_SHADER_STORAGE_BUFFER
#define glGetBufferSubData
#define GL_BUFFER_UPDATE_BARRIER_BIT
#define GL_SHADER_STORAGE_BARRIER_BIT
static struct @7 GPUContext
#define str_append(dst,...)
trace Shader * load_shader(const char *fs, uint32_t hash, const ForeachData *loop)
static char glsl_preproc[]
double gpu_reduction(size_t offset, const char op, const RegionParameters *region, size_t nb)
double cpu_reduction(GLuint *src, size_t offset, size_t nb, const char op)