[gegl] opencl: bilateral-filter-fast was using too much local memory

From: Victor Matheus de Araujo Oliveira <vmaolive src gnome org>
To: commits-list gnome org
Cc:
Subject: [gegl] opencl: bilateral-filter-fast was using too much local memory
Date: Thu, 9 May 2013 01:53:31 +0000 (UTC)

commit 10271f3a70ccd6bfe9a55c3ca696b140064bb633
Author: Victor Oliveira <victormatheus gmail com>
Date:   Wed May 8 22:51:05 2013 -0300

    opencl: bilateral-filter-fast was using too much local memory

 opencl/bilateral-filter-fast.cl   |    7 ++++++-
 opencl/bilateral-filter-fast.cl.h |    7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)
---
diff --git a/opencl/bilateral-filter-fast.cl b/opencl/bilateral-filter-fast.cl
index 0205d8f..691149e 100644
--- a/opencl/bilateral-filter-fast.cl
+++ b/opencl/bilateral-filter-fast.cl
@@ -22,7 +22,12 @@
 #define LOCAL_H 8
 
 /* found by trial and error on a NVidia GPU */
-#define DEPTH_CHUNK 12
+
+// optimum value
+// #define DEPTH_CHUNK 12
+
+// a little less than 16k, works on most GPUs
+#define DEPTH_CHUNK 7
 
 __attribute__((reqd_work_group_size(8, 8, 1)))
 __kernel void bilateral_downsample(__global const float4 *input,
diff --git a/opencl/bilateral-filter-fast.cl.h b/opencl/bilateral-filter-fast.cl.h
index 2d3072a..3fc53dd 100644
--- a/opencl/bilateral-filter-fast.cl.h
+++ b/opencl/bilateral-filter-fast.cl.h
@@ -23,7 +23,12 @@ static const char* bilateral_filter_fast_cl_source =
 "#define LOCAL_H 8                                                             \n"
 "                                                                              \n"
 "/* found by trial and error on a NVidia GPU */                                \n"
-"#define DEPTH_CHUNK 12                                                        \n"
+"                                                                              \n"
+"// optimum value                                                              \n"
+"// #define DEPTH_CHUNK 12                                                     \n"
+"                                                                              \n"
+"// a little less than 16k, works on most GPUs                                 \n"
+"#define DEPTH_CHUNK 7                                                         \n"
 "                                                                              \n"
 "__attribute__((reqd_work_group_size(8, 8, 1)))                                \n"
 "__kernel void bilateral_downsample(__global const float4 *input,              \n"

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]