[gegl] gegl-parallel: improve optimal thread-count calculation



commit e1d01be80297bae226e0d86b7be0b10f6073c32e
Author: Ell <ell_se yahoo com>
Date:   Tue Nov 13 09:55:19 2018 -0500

    gegl-parallel: improve optimal thread-count calculation
    
    Previously, the number of threads used by
    gegl_parallel_distribute_{range,area}() was proportional to the
    number of elements to be processed, such that each thread processed
    at least a user-provided minimal number of elements.  This,
    however, fails to take into account the fact that each additional
    thread lowers the effective cost of processing each additional
    element, since processing is spread over more threads, and
    therefore more elements are necessary to justify each additional
    thread.
    
    To find the optimal number of threads to use, we assume that the
    cost of processing the elements is proportional to the number of
    elements to be processed by each thread, and that each thread
    additional incurs a fixed cost.  This cost is specified by a user-
    provided parameter, relative to the cost of processing a single
    element, replacing the old minimal per-thread element-count
    paramter (it is expected, however, that this parameter will take
    the same value as the old parameter; in particular, the minimal
    number of per-thread elements for using two threads equals the the
    cost.)
    
    In other words, the cost of processing n elements, using t threads,
    with a fixed realtive per-thread cost c, is assumed to be
    proportional to:
    
      n / t + c * t
    
    The number of threads, t, that minimize this cost, for a given per-
    thread cost, c, is roughly proportional to the square root of the
    number of elements, n.

 gegl/gegl-parallel.c | 91 ++++++++++++++++++++++++++++++++++++++++------------
 gegl/gegl-parallel.h | 39 +++++++++++-----------
 2 files changed, 90 insertions(+), 40 deletions(-)
---
diff --git a/gegl/gegl-parallel.c b/gegl/gegl-parallel.c
index 38f564da3..f98f9c9ab 100644
--- a/gegl/gegl-parallel.c
+++ b/gegl/gegl-parallel.c
@@ -18,6 +18,8 @@
 
 #include "config.h"
 
+#include <math.h>
+
 #include <glib.h>
 
 #include "gegl.h"
@@ -51,13 +53,16 @@ typedef struct
 
 /*  local function prototypes  */
 
-static void                       gegl_parallel_notify_threads           (GeglConfig                   
*config);
+static void                       gegl_parallel_notify_threads                   (GeglConfig                 
  *config);
+
+static void                       gegl_parallel_set_n_threads                    (gint                       
   n_threads,
+                                                                                  gboolean                   
   finish_tasks);
 
-static void                       gegl_parallel_set_n_threads            (gint                          
n_threads,
-                                                                          gboolean                      
finish_tasks);
+static void                       gegl_parallel_distribute_set_n_threads         (gint                       
   n_threads);
+static gpointer                   gegl_parallel_distribute_thread_func           
(GeglParallelDistributeThread *thread);
 
-static void                       gegl_parallel_distribute_set_n_threads (gint                          
n_threads);
-static gpointer                   gegl_parallel_distribute_thread_func   (GeglParallelDistributeThread 
*thread);
+static inline gint                gegl_parallel_distribute_get_optimal_n_threads (gdouble                    
   n_elements,
+                                                                                  gdouble                    
   thread_cost);
 
 
 /*  local variables  */
@@ -184,29 +189,35 @@ gegl_parallel_distribute_range_func (gint                             i,
 
 void
 gegl_parallel_distribute_range (gsize                           size,
-                                gsize                           min_sub_size,
+                                gdouble                         thread_cost,
                                 GeglParallelDistributeRangeFunc func,
                                 gpointer                        user_data)
 {
   GeglParallelDistributeRangeData data;
-  gsize                           n = size;
+  gint                            n_threads;
 
   g_return_if_fail (func != NULL);
 
   if (size == 0)
     return;
 
-  if (min_sub_size > 1)
-    n /= min_sub_size;
+  n_threads = gegl_parallel_distribute_get_optimal_n_threads (
+    size,
+    thread_cost);
+
+  if (n_threads == 1)
+    {
+      func (0, size, user_data);
 
-  n = CLAMP (n, 1, gegl_parallel_distribute_n_threads);
+      return;
+    }
 
   data.size      = size;
   data.func      = func;
   data.user_data = user_data;
 
   gegl_parallel_distribute (
-    n,
+    n_threads,
     (GeglParallelDistributeFunc) gegl_parallel_distribute_range_func,
     &data);
 }
@@ -261,13 +272,13 @@ gegl_parallel_distribute_area_func (gint                            i,
 
 void
 gegl_parallel_distribute_area (const GeglRectangle            *area,
-                               gsize                           min_sub_area,
+                               gdouble                         thread_cost,
                                GeglSplitStrategy               split_strategy,
                                GeglParallelDistributeAreaFunc  func,
                                gpointer                        user_data)
 {
   GeglParallelDistributeAreaData data;
-  gsize                          n;
+  gint                           n_threads;
 
   g_return_if_fail (area != NULL);
   g_return_if_fail (func != NULL);
@@ -275,6 +286,17 @@ gegl_parallel_distribute_area (const GeglRectangle            *area,
   if (area->width <= 0 || area->height <= 0)
     return;
 
+  n_threads = gegl_parallel_distribute_get_optimal_n_threads (
+    (gdouble) area->width * (gdouble) area->height,
+    thread_cost);
+
+  if (n_threads == 1)
+    {
+      func (area, user_data);
+
+      return;
+    }
+
   if (split_strategy == GEGL_SPLIT_STRATEGY_AUTO)
     {
       if (area->width > area->height)
@@ -283,20 +305,13 @@ gegl_parallel_distribute_area (const GeglRectangle            *area,
         split_strategy = GEGL_SPLIT_STRATEGY_HORIZONTAL;
     }
 
-  n = (gsize) area->width * (gsize) area->height;
-
-  if (min_sub_area > 1)
-    n /= min_sub_area;
-
-  n = CLAMP (n, 1, gegl_parallel_distribute_n_threads);
-
   data.area           = area;
   data.split_strategy = split_strategy;
   data.func           = func;
   data.user_data      = user_data;
 
   gegl_parallel_distribute (
-    n,
+    n_threads,
     (GeglParallelDistributeFunc) gegl_parallel_distribute_area_func,
     &data);
 }
@@ -416,3 +431,37 @@ gegl_parallel_distribute_thread_func (GeglParallelDistributeThread *thread)
 
   return NULL;
 }
+
+/* calculates the optimal number of threads, n_threads, to process n_elements
+ * elements, assuming the cost of processing the elements is proportional to
+ * the number of elements to be processed by each thread, and assuming that
+ * each thread additionally incurs a fixed cost of thread_cost, relative to the
+ * cost of processing a single element.
+ *
+ * in other words, the assumption is that the total cost of processing the
+ * elements is proportional to:
+ *
+ *   n_elements / n_threads + thread_cost * n_threads
+ */
+static inline gint
+gegl_parallel_distribute_get_optimal_n_threads (gdouble n_elements,
+                                                gdouble thread_cost)
+{
+  gint n_threads;
+
+  if (n_elements > 0 && thread_cost > 0.0)
+    {
+      gdouble n = n_elements;
+      gdouble c = thread_cost;
+
+      n_threads = floor ((c + sqrt (c * (c + 4.0 * n))) / (2.0 * c));
+      n_threads = CLAMP (n_threads, 1, gegl_parallel_distribute_n_threads);
+    }
+  else
+    {
+      n_threads = n_elements;
+      n_threads = CLAMP (n_threads, 0, gegl_parallel_distribute_n_threads);
+    }
+
+  return n_threads;
+}
diff --git a/gegl/gegl-parallel.h b/gegl/gegl-parallel.h
index 4e8798331..23cf04d6c 100644
--- a/gegl/gegl-parallel.h
+++ b/gegl/gegl-parallel.h
@@ -45,11 +45,11 @@ typedef void (* GeglParallelDistributeFunc)      (gint                 i,
  * @size: the current data size
  * @user_data: user data pointer
  *
- * Specifies the type of function passed to gegl_parallel_distribute_range().
+ * Specifies the type of function passed to
+ * gegl_parallel_distribute_range().
  *
  * The function should process @size elements of the data, starting
- * at @offset.  @size may be greater-than or equal-to the @min_sub_size
- * argument passed to gegl_parallel_distribute_range().
+ * at @offset.
  */
 typedef void (* GeglParallelDistributeRangeFunc) (gsize                offset,
                                                   gsize                size,
@@ -57,14 +57,13 @@ typedef void (* GeglParallelDistributeRangeFunc) (gsize                offset,
 
 /**
  * GeglParallelDistributeAreaFunc:
- * @area: the current sub-region
+ * @area: the current sub-area
  * @user_data: user data pointer
  *
- * Specifies the type of function passed to gegl_parallel_distribute_area().
+ * Specifies the type of function passed to
+ * gegl_parallel_distribute_area().
  *
- * The function should process the sub-region specified by @area, whose
- * area may be greater-than or equal-to the @min_sub_area argument passed
- * to gegl_parallel_distribute_area().
+ * The function should process the sub-area specified by @area.
  *
  */
 typedef void (* GeglParallelDistributeAreaFunc)  (const GeglRectangle *area,
@@ -87,7 +86,8 @@ void   gegl_parallel_distribute       (gint                             max_n,
 /**
  * gegl_parallel_distribute_range:
  * @size: the total size of the data
- * @min_sub_size: the minimal data size to be processed by each thread
+ * @thread_cost: the cost of using each additional thread, relative
+ *               to the cost of processing a single data element
  * @func: (closure user_data) (scope call): the function to call
  * @user_data: user data to pass to the function
  *
@@ -96,24 +96,25 @@ void   gegl_parallel_distribute       (gint                             max_n,
  * sub-ranges on different threads.
  */
 void   gegl_parallel_distribute_range (gsize                            size,
-                                       gsize                            min_sub_size,
+                                       gdouble                          thread_cost,
                                        GeglParallelDistributeRangeFunc  func,
                                        gpointer                         user_data);
 
 /**
  * gegl_parallel_distribute_area:
- * @area: the region to process
- * @min_sub_area: the minimal area to be processed by each thread
- * @split_strategy: the strategy to use for dividing the region
+ * @area: the area to process
+ * @thread_cost: the cost of using each additional thread, relative
+ *               to the cost of processing a single data element
+ * @split_strategy: the strategy to use for dividing the area
  * @func: (closure user_data) (scope call): the function to call
  * @user_data: user data to pass to the function
  *
  * Distributes the processing of a planar data-structure across
  * multiple threads, by calling the given function with different
- * sub-regions on different threads.
+ * sub-areas on different threads.
  */
 void   gegl_parallel_distribute_area  (const GeglRectangle             *area,
-                                       gsize                            min_sub_area,
+                                       gdouble                          thread_cost,
                                        GeglSplitStrategy                split_strategy,
                                        GeglParallelDistributeAreaFunc   func,
                                        gpointer                         user_data);
@@ -145,10 +146,10 @@ gegl_parallel_distribute (gint                   max_n,
 template <class ParallelDistributeRangeFunc>
 inline void
 gegl_parallel_distribute_range (gsize                       size,
-                                gsize                       min_sub_size,
+                                gdouble                     thread_cost,
                                 ParallelDistributeRangeFunc func)
 {
-  gegl_parallel_distribute_range (size, min_sub_size,
+  gegl_parallel_distribute_range (size, thread_cost,
                                   [] (gsize    offset,
                                       gsize    size,
                                       gpointer user_data)
@@ -164,11 +165,11 @@ gegl_parallel_distribute_range (gsize                       size,
 template <class ParallelDistributeAreaFunc>
 inline void
 gegl_parallel_distribute_area (const GeglRectangle        *area,
-                               gsize                       min_sub_area,
+                               gdouble                     thread_cost,
                                GeglSplitStrategy           split_strategy,
                                ParallelDistributeAreaFunc  func)
 {
-  gegl_parallel_distribute_area (area, min_sub_area, split_strategy,
+  gegl_parallel_distribute_area (area, thread_cost, split_strategy,
                                  [] (const GeglRectangle *area,
                                      gpointer             user_data)
                                  {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]