[gegl] edge:laplace: Fix offsets & stride in OpenCL version



commit 2b4ecb8bda6f1dab9c819e5715f7df9af30b7c40
Author: Daniel Sabo <DanielSabo gmail com>
Date:   Thu Oct 17 00:05:15 2013 -0700

    edge:laplace: Fix offsets & stride in OpenCL version

 opencl/edge-laplace.cl   |  158 +++++++++++++++++++++------------------------
 opencl/edge-laplace.cl.h |  161 ++++++++++++++++++++++------------------------
 2 files changed, 149 insertions(+), 170 deletions(-)
---
diff --git a/opencl/edge-laplace.cl b/opencl/edge-laplace.cl
index 80d3767..f154e12 100644
--- a/opencl/edge-laplace.cl
+++ b/opencl/edge-laplace.cl
@@ -40,54 +40,51 @@ void minmax(float x1, float x2, float x3,
         *max_result = fmax(max2, x5);
 }
 
+float4 get_pix(global float4 *in, int x, int y, int rowstride)
+{
+    int idx = x + y * rowstride;
+    return in[idx];
+}
+
 kernel void pre_edgelaplace (global float4 *in,
                              global float4 *out)
 {
     int gidx = get_global_id(0);
     int gidy = get_global_id(1);
 
-    int src_width  = get_global_size(0) + LAPLACE_RADIUS;
-    int src_height = get_global_size(1);
-
-    int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1;
-    int gid1d = i + j * src_width;
-
-    float pix_fl[4] = {
-        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,
-        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w
-    };
-    float pix_fm[4] = {
-        in[gid1d     - src_width].x, in[gid1d     - src_width].y,
-        in[gid1d     - src_width].z, in[gid1d     - src_width].w
-    };
-    float pix_fr[4] = {
-        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,
-        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w
-    };
-    float pix_ml[4] = {
-        in[gid1d - 1            ].x, in[gid1d - 1            ].y,
-        in[gid1d - 1            ].z, in[gid1d - 1            ].w
-    };
-    float pix_mm[4] = {
-        in[gid1d                ].x, in[gid1d                ].y,
-        in[gid1d                ].z, in[gid1d                ].w
-    };
-    float pix_mr[4] = {
-        in[gid1d + 1            ].x, in[gid1d + 1            ].y,
-        in[gid1d + 1            ].z, in[gid1d + 1            ].w
-    };
-    float pix_bl[4] = {
-        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,
-        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w
-    };
-    float pix_bm[4] = {
-        in[gid1d     + src_width].x, in[gid1d     + src_width].y,
-        in[gid1d     + src_width].z, in[gid1d     + src_width].w
-    };
-    float pix_br[4] = {
-        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,
-        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w
-    };
+    int src_width  = get_global_size(0) + 2;
+    int src_height = get_global_size(1) + 2;
+
+    int i = gidx + 1, j = gidy + 1;
+
+    float4 cur_pix;
+
+    cur_pix = get_pix(in, i - 1, j - 1, src_width);
+    float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j - 1, src_width);
+    float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j - 1, src_width);
+    float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 1, j - 0, src_width);
+    float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j - 0, src_width);
+    float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j - 0, src_width);
+    float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 1, j + 1, src_width);
+    float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j + 1, src_width);
+    float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j + 1, src_width);
+    float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
 
     int c;
     float minval, maxval;
@@ -107,7 +104,7 @@ kernel void pre_edgelaplace (global float4 *in,
     }
     gradient[3] = pix_mm[3];
 
-    out[gid1d] = (float4)
+    out[gidx + gidy * get_global_size(0)] = (float4)
         (gradient[0], gradient[1], gradient[2], gradient[3]);
 }
 
@@ -117,48 +114,39 @@ kernel void knl_edgelaplace (global float4 *in,
     int gidx = get_global_id(0);
     int gidy = get_global_id(1);
 
-    int src_width  = get_global_size(0) + LAPLACE_RADIUS * 2;
-    int src_height = get_global_size(1);
-
-    int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS;
-    int gid1d = i + j * src_width;
-
-    float pix_fl[4] = {
-        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,
-        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w
-    };
-    float pix_fm[4] = {
-        in[gid1d     - src_width].x, in[gid1d     - src_width].y,
-        in[gid1d     - src_width].z, in[gid1d     - src_width].w
-    };
-    float pix_fr[4] = {
-        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,
-        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w
-    };
-    float pix_ml[4] = {
-        in[gid1d - 1            ].x, in[gid1d - 1            ].y,
-        in[gid1d - 1            ].z, in[gid1d - 1            ].w
-    };
-    float pix_mm[4] = {
-        in[gid1d                ].x, in[gid1d                ].y,
-        in[gid1d                ].z, in[gid1d                ].w
-    };
-    float pix_mr[4] = {
-        in[gid1d + 1            ].x, in[gid1d + 1            ].y,
-        in[gid1d + 1            ].z, in[gid1d + 1            ].w
-    };
-    float pix_bl[4] = {
-        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,
-        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w
-    };
-    float pix_bm[4] = {
-        in[gid1d     + src_width].x, in[gid1d     + src_width].y,
-        in[gid1d     + src_width].z, in[gid1d     + src_width].w
-    };
-    float pix_br[4] = {
-        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,
-        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w
-    };
+    int src_width  = get_global_size(0) + 2;
+    int src_height = get_global_size(1) + 2;
+
+    int i = gidx + 1, j = gidy + 1;
+
+    float4 cur_pix;
+
+    cur_pix = get_pix(in, i - 1, j - 1, src_width);
+    float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j - 1, src_width);
+    float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j - 1, src_width);
+    float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 1, j - 0, src_width);
+    float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j - 0, src_width);
+    float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j - 0, src_width);
+    float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 1, j + 1, src_width);
+    float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i - 0, j + 1, src_width);
+    float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
+
+    cur_pix = get_pix(in, i + 1, j + 1, src_width);
+    float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};
 
     int c;
     float value[4];
diff --git a/opencl/edge-laplace.cl.h b/opencl/edge-laplace.cl.h
index f0f7a0c..5aa630a 100644
--- a/opencl/edge-laplace.cl.h
+++ b/opencl/edge-laplace.cl.h
@@ -41,54 +41,52 @@ static const char* edge_laplace_cl_source =
 "        *max_result = fmax(max2, x5);                                         \n"
 "}                                                                             \n"
 "                                                                              \n"
+"float4 get_pix(global float4 *in, int x, int y, int rowstride)                \n"
+"{                                                                             \n"
+"    int idx = x + y * rowstride;                                              \n"
+"    return in[idx];                                                           \n"
+"}                                                                             \n"
+"                                                                              \n"
 "kernel void pre_edgelaplace (global float4 *in,                               \n"
 "                             global float4 *out)                              \n"
 "{                                                                             \n"
 "    int gidx = get_global_id(0);                                              \n"
 "    int gidy = get_global_id(1);                                              \n"
 "                                                                              \n"
-"    int src_width  = get_global_size(0) + LAPLACE_RADIUS;                     \n"
-"    int src_height = get_global_size(1);                                      \n"
-"                                                                              \n"
-"    int i = gidx + LAPLACE_RADIUS - 1, j = gidy + LAPLACE_RADIUS - 1;         \n"
-"    int gid1d = i + j * src_width;                                            \n"
-"                                                                              \n"
-"    float pix_fl[4] = {                                                       \n"
-"        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,             \n"
-"        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_fm[4] = {                                                       \n"
-"        in[gid1d     - src_width].x, in[gid1d     - src_width].y,             \n"
-"        in[gid1d     - src_width].z, in[gid1d     - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_fr[4] = {                                                       \n"
-"        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,             \n"
-"        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_ml[4] = {                                                       \n"
-"        in[gid1d - 1            ].x, in[gid1d - 1            ].y,             \n"
-"        in[gid1d - 1            ].z, in[gid1d - 1            ].w              \n"
-"    };                                                                        \n"
-"    float pix_mm[4] = {                                                       \n"
-"        in[gid1d                ].x, in[gid1d                ].y,             \n"
-"        in[gid1d                ].z, in[gid1d                ].w              \n"
-"    };                                                                        \n"
-"    float pix_mr[4] = {                                                       \n"
-"        in[gid1d + 1            ].x, in[gid1d + 1            ].y,             \n"
-"        in[gid1d + 1            ].z, in[gid1d + 1            ].w              \n"
-"    };                                                                        \n"
-"    float pix_bl[4] = {                                                       \n"
-"        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,             \n"
-"        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_bm[4] = {                                                       \n"
-"        in[gid1d     + src_width].x, in[gid1d     + src_width].y,             \n"
-"        in[gid1d     + src_width].z, in[gid1d     + src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_br[4] = {                                                       \n"
-"        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,             \n"
-"        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w              \n"
-"    };                                                                        \n"
+"    int src_width  = get_global_size(0) + 2;                                  \n"
+"    int src_height = get_global_size(1) + 2;                                  \n"
+"                                                                              \n"
+"    int i = gidx + 1, j = gidy + 1;                                           \n"
+"                                                                              \n"
+"    float4 cur_pix;                                                           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j - 1, src_width);                           \n"
+"    float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j - 1, src_width);                           \n"
+"    float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j - 1, src_width);                           \n"
+"    float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j - 0, src_width);                           \n"
+"    float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j - 0, src_width);                           \n"
+"    float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j - 0, src_width);                           \n"
+"    float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j + 1, src_width);                           \n"
+"    float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j + 1, src_width);                           \n"
+"    float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j + 1, src_width);                           \n"
+"    float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
 "                                                                              \n"
 "    int c;                                                                    \n"
 "    float minval, maxval;                                                     \n"
@@ -108,8 +106,10 @@ static const char* edge_laplace_cl_source =
 "    }                                                                         \n"
 "    gradient[3] = pix_mm[3];                                                  \n"
 "                                                                              \n"
-"    out[gid1d] = (float4)                                                     \n"
+"                                                                              \n"
+"    out[gidx + gidy * get_global_size(0)] = (float4)                          \n"
 "        (gradient[0], gradient[1], gradient[2], gradient[3]);                 \n"
+"                                                                              \n"
 "}                                                                             \n"
 "                                                                              \n"
 "kernel void knl_edgelaplace (global float4 *in,                               \n"
@@ -118,48 +118,39 @@ static const char* edge_laplace_cl_source =
 "    int gidx = get_global_id(0);                                              \n"
 "    int gidy = get_global_id(1);                                              \n"
 "                                                                              \n"
-"    int src_width  = get_global_size(0) + LAPLACE_RADIUS * 2;                 \n"
-"    int src_height = get_global_size(1);                                      \n"
-"                                                                              \n"
-"    int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS;                 \n"
-"    int gid1d = i + j * src_width;                                            \n"
-"                                                                              \n"
-"    float pix_fl[4] = {                                                       \n"
-"        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,             \n"
-"        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_fm[4] = {                                                       \n"
-"        in[gid1d     - src_width].x, in[gid1d     - src_width].y,             \n"
-"        in[gid1d     - src_width].z, in[gid1d     - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_fr[4] = {                                                       \n"
-"        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,             \n"
-"        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_ml[4] = {                                                       \n"
-"        in[gid1d - 1            ].x, in[gid1d - 1            ].y,             \n"
-"        in[gid1d - 1            ].z, in[gid1d - 1            ].w              \n"
-"    };                                                                        \n"
-"    float pix_mm[4] = {                                                       \n"
-"        in[gid1d                ].x, in[gid1d                ].y,             \n"
-"        in[gid1d                ].z, in[gid1d                ].w              \n"
-"    };                                                                        \n"
-"    float pix_mr[4] = {                                                       \n"
-"        in[gid1d + 1            ].x, in[gid1d + 1            ].y,             \n"
-"        in[gid1d + 1            ].z, in[gid1d + 1            ].w              \n"
-"    };                                                                        \n"
-"    float pix_bl[4] = {                                                       \n"
-"        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,             \n"
-"        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_bm[4] = {                                                       \n"
-"        in[gid1d     + src_width].x, in[gid1d     + src_width].y,             \n"
-"        in[gid1d     + src_width].z, in[gid1d     + src_width].w              \n"
-"    };                                                                        \n"
-"    float pix_br[4] = {                                                       \n"
-"        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,             \n"
-"        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w              \n"
-"    };                                                                        \n"
+"    int src_width  = get_global_size(0) + 2;                                  \n"
+"    int src_height = get_global_size(1) + 2;                                  \n"
+"                                                                              \n"
+"    int i = gidx + 1, j = gidy + 1;                                           \n"
+"                                                                              \n"
+"    float4 cur_pix;                                                           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j - 1, src_width);                           \n"
+"    float pix_fl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j - 1, src_width);                           \n"
+"    float pix_fm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j - 1, src_width);                           \n"
+"    float pix_fr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j - 0, src_width);                           \n"
+"    float pix_ml[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j - 0, src_width);                           \n"
+"    float pix_mm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j - 0, src_width);                           \n"
+"    float pix_mr[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 1, j + 1, src_width);                           \n"
+"    float pix_bl[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i - 0, j + 1, src_width);                           \n"
+"    float pix_bm[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
+"                                                                              \n"
+"    cur_pix = get_pix(in, i + 1, j + 1, src_width);                           \n"
+"    float pix_br[4] = {cur_pix.x, cur_pix.y, cur_pix.z, cur_pix.w};           \n"
 "                                                                              \n"
 "    int c;                                                                    \n"
 "    float value[4];                                                           \n"


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]