[cogl] Add a workaround for slow read pixels on Mesa



commit cb146dc515bf484192a8edfab716550a996b05df
Author: Neil Roberts <neil linux intel com>
Date:   Tue Mar 27 17:24:50 2012 +0100

    Add a workaround for slow read pixels on Mesa
    
    Mesa before version 8.0.2 has a slow read pixels path that gets used
    with the Intel driver where it converts all of the pixels into a
    floating point representation and back even if the data is being read
    into exactly the same format. There is however a faster path using the
    blitter when reading into a PBO with BGRA format. It works out faster
    to read into a PBO and then memcpy back out into the application's
    buffer even though it adds an extra memcpy. This patch adds a
    workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects
    this situation. In that case it will create a temporary CoglBitmap
    using cogl_bitmap_new_with_size, read into it and then memcpy the data
    back out.
    
    The main impetus for this patch is that Gnome Shell has implemented
    this workaround directly using GL calls but it seems like the kind of
    thing that would sit better at the Cogl layer.
    
    Reviewed-by: Robert Bragg <robert linux intel com>

 cogl/cogl-framebuffer-private.h |   10 +++
 cogl/cogl-framebuffer.c         |  127 +++++++++++++++++++++++++++++++++++++-
 cogl/cogl-gpu-info-private.h    |    7 ++-
 cogl/cogl-gpu-info.c            |   12 ++++-
 4 files changed, 150 insertions(+), 6 deletions(-)
---
diff --git a/cogl/cogl-framebuffer-private.h b/cogl/cogl-framebuffer-private.h
index 0081a2f..4c759cf 100644
--- a/cogl/cogl-framebuffer-private.h
+++ b/cogl/cogl-framebuffer-private.h
@@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
 
 #define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
 
+/* Private flags that can internally be added to CoglReadPixelsFlags */
+typedef enum
+{
+  /* If this is set then the data will not be flipped to compensate
+     for GL's upside-down coordinate system but instead will be left
+     in whatever order GL gives us (which will depend on whether the
+     framebuffer is offscreen or not) */
+  COGL_READ_PIXELS_NO_FLIP = 1L << 30
+} CoglPrivateReadPixelsFlags;
+
 struct _CoglFramebuffer
 {
   CoglObject          _parent;
diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c
index b55b016..bc6bad8 100644
--- a/cogl/cogl-framebuffer.c
+++ b/cogl/cogl-framebuffer.c
@@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
   return FALSE;
 }
 
+static gboolean
+_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
+                                               int x,
+                                               int y,
+                                               CoglReadPixelsFlags source,
+                                               CoglBitmap *bitmap)
+{
+  CoglContext *ctx;
+  CoglPixelFormat format;
+  CoglBitmap *pbo;
+  int width;
+  int height;
+  gboolean res;
+
+  _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
+  _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
+
+  ctx = cogl_framebuffer_get_context (framebuffer);
+
+  width = cogl_bitmap_get_width (bitmap);
+  height = cogl_bitmap_get_height (bitmap);
+  format = cogl_bitmap_get_format (bitmap);
+
+  pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
+  if (pbo == NULL)
+    return FALSE;
+
+  /* Read into the pbo. We need to disable the flipping because the
+     blit fast path in the driver does not work with
+     GL_PACK_INVERT_MESA is set */
+  res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
+                                                  x, y,
+                                                  source |
+                                                  COGL_READ_PIXELS_NO_FLIP,
+                                                  pbo);
+
+  if (res)
+    {
+      guint8 *dst;
+
+      /* Copy the pixels back into application's buffer */
+      dst = _cogl_bitmap_map (bitmap,
+                              COGL_BUFFER_ACCESS_WRITE,
+                              COGL_BUFFER_MAP_HINT_DISCARD);
+
+      if (dst == NULL)
+        res = FALSE;
+      else
+        {
+          const guint8 *src;
+
+          src = _cogl_bitmap_map (pbo,
+                                  COGL_BUFFER_ACCESS_READ,
+                                  0 /* hints */);
+          if (src == NULL)
+            res = FALSE;
+          else
+            {
+              int src_rowstride = cogl_bitmap_get_rowstride (pbo);
+              int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
+              int to_copy =
+                _cogl_pixel_format_get_bytes_per_pixel (format) * width;
+              int y;
+
+              /* If the framebuffer is onscreen we need to flip the
+                 data while copying */
+              if (!cogl_is_offscreen (framebuffer))
+                {
+                  src += src_rowstride * (height - 1);
+                  src_rowstride = -src_rowstride;
+                }
+
+              for (y = 0; y < height; y++)
+                {
+                  memcpy (dst, src, to_copy);
+                  dst += dst_rowstride;
+                  src += src_rowstride;
+                }
+
+              _cogl_bitmap_unmap (pbo);
+            }
+
+          _cogl_bitmap_unmap (bitmap);
+        }
+    }
+
+  cogl_object_unref (pbo);
+
+  return res;
+}
+
 gboolean
 cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
                                           int x,
@@ -1960,7 +2051,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
   int width;
   int height;
 
-  _COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
+  _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
   _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
 
   if (!cogl_framebuffer_allocate (framebuffer, NULL))
@@ -1970,6 +2061,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
 
   width = cogl_bitmap_get_width (bitmap);
   height = cogl_bitmap_get_height (bitmap);
+  format = cogl_bitmap_get_format (bitmap);
 
   if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
     {
@@ -1986,6 +2078,32 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
         return TRUE;
     }
 
+  /* Workaround for cases where its faster to read into a temporary
+   * PBO. This is only worth doing if:
+   *
+   * â The GPU is an Intel GPU. In that case there is a known
+   *   fast-path when reading into a PBO that will use the blitter
+   *   instead of the Mesa fallback code. The driver bug will only be
+   *   set if this is the case.
+   * â We're not already reading into a PBO.
+   * â The target format is BGRA. The fast-path blit does not get hit
+   *   otherwise.
+   * â The size of the data is not trivially small. This isn't a
+   *   requirement to hit the fast-path blit but intuitively it feels
+   *   like if the amount of data is too small then the cost of
+   *   allocating a PBO will outweigh the cost of temporarily
+   *   converting the data to floats.
+   */
+  if ((ctx->gpu.driver_bugs &
+       COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) &&
+      (width > 8 || height > 8) &&
+      (format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
+      cogl_bitmap_get_buffer (bitmap) == NULL)
+    return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
+                                                          x, y,
+                                                          source,
+                                                          bitmap);
+
   /* make sure any batched primitives get emitted to the GL driver
    * before issuing our read pixels...
    */
@@ -2006,8 +2124,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
   if (!cogl_is_offscreen (framebuffer))
     y = framebuffer_height - y - height;
 
-  format = cogl_bitmap_get_format (bitmap);
-
   required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
                                                             format,
                                                             &gl_intformat,
@@ -2017,6 +2133,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
   /* NB: All offscreen rendering is done upside down so there is no need
    * to flip in this case... */
   if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
+      (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
       !cogl_is_offscreen (framebuffer))
     {
       GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
@@ -2152,7 +2269,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
 
   /* NB: All offscreen rendering is done upside down so there is no need
    * to flip in this case... */
-  if (!cogl_is_offscreen (framebuffer) && !pack_invert_set)
+  if (!cogl_is_offscreen (framebuffer) &&
+      (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
+      !pack_invert_set)
     {
       guint8 *temprow;
       int rowstride;
diff --git a/cogl/cogl-gpu-info-private.h b/cogl/cogl-gpu-info-private.h
index 2930d6b..70f150a 100644
--- a/cogl/cogl-gpu-info-private.h
+++ b/cogl/cogl-gpu-info-private.h
@@ -40,7 +40,12 @@ typedef enum
 
 typedef enum
 {
-  COGL_GPU_INFO_DRIVER_STUB
+  /* If this bug is present then it is faster to read pixels into a
+   * PBO and then memcpy out of the PBO into system memory rather than
+   * directly read into system memory.
+   * https://bugs.freedesktop.org/show_bug.cgi?id=46631
+   */
+  COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0
 } CoglGpuInfoDriverBug;
 
 typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion;
diff --git a/cogl/cogl-gpu-info.c b/cogl/cogl-gpu-info.c
index f62bbc3..e64fdbd 100644
--- a/cogl/cogl-gpu-info.c
+++ b/cogl/cogl-gpu-info.c
@@ -250,5 +250,15 @@ _cogl_gpu_info_init (CoglContext *ctx,
     }
 
   /* Determine the driver bugs */
-  gpu->driver_bugs = 0;
+
+  /* In Mesa < 8.0.2 the glReadPixels implementation is really slow
+     because it converts each pixel to a floating point representation
+     and back even if the data could just be memcpy'd. The Intel
+     driver has a fast blit path when reading into a PBO. Reading into
+     a temporary PBO and then memcpying back out to the application's
+     memory is faster than a regular glReadPixels in this case */
+  if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL &&
+      gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA &&
+      gpu->driver_package_version < COGL_VERSION_ENCODE (8, 0, 2))
+    gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS;
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]