[gnome-remote-desktop] rdp: Allocate and register GL resources for each RDP buffer

From: Jonas Ådahl <jadahl src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-remote-desktop] rdp: Allocate and register GL resources for each RDP buffer
Date: Thu, 3 Mar 2022 14:23:08 +0000 (UTC)
commit 46a9167621139f28e0f7838681ce01822c1fc67d
Author: Pascal Nowack <Pascal Nowack gmx de>
Date:   Tue Jan 4 12:54:02 2022 +0100

    rdp: Allocate and register GL resources for each RDP buffer
    
    When uploading or mapping frame data on the GPU later, the buffer for
    the frame on the GPU is usually supposed to be already allocated and
    registered at the external API, which makes use of the mapped buffer.
    
    To ensure this, allocate and register the GL resources, when the size
    of the framebuffers in the pipewire stream is known.
    When the pipewire stream is resized, these framebuffers are resized
    too.
    This is relatively easy, as all framebuffers are tracked.
    
    When a new buffer is added to the buffer pool, due to all buffers of
    the buffer pool already being used, do not allocate any GL resources.
    
    This has two reasons:
    First, the EGL thread is about to take care of the framebuffer anyway,
    so adding additional synchronization for the allocation can impact the
    performance.
    The additional synchronization is still required, as the allocation can
    fail, which is a situation, that needs to be handled.
    
    Second, the submission of the allocation task and synchronization would
    happen on the PipeWire realtime thread, instead of the main thread,
    which can lead to a deadlock, where the buffer pool mutex won't be
    unlocked any more, since the EGL thread would wait on the PipeWire
    realtime thread to unlock it and the PipeWire realtime thread would
    wait on the EGL thread to finish the allocation task.

 src/grd-rdp-buffer-pool.c     | 111 ++++++++++++++++++++++++++++--------
 src/grd-rdp-buffer-pool.h     |  14 +++--
 src/grd-rdp-buffer.c          | 129 ++++++++++++++++++++++++++++++++++++++++--
 src/grd-rdp-buffer.h          |  24 ++++++--
 src/grd-rdp-pipewire-stream.c |  49 +++++++++++++---
 src/grd-rdp-pipewire-stream.h |  11 ++--
 src/grd-rdp-surface.c         |  20 ++++++-
 src/grd-rdp-surface.h         |   6 +-
 src/grd-session-rdp.c         |  15 ++++-
 9 files changed, 322 insertions(+), 57 deletions(-)
---
diff --git a/src/grd-rdp-buffer-pool.c b/src/grd-rdp-buffer-pool.c
index 379e91c6..42597d82 100644
--- a/src/grd-rdp-buffer-pool.c
+++ b/src/grd-rdp-buffer-pool.c
@@ -21,7 +21,9 @@
 
 #include "grd-rdp-buffer-pool.h"
 
+#include "grd-egl-thread.h"
 #include "grd-rdp-buffer.h"
+#include "grd-utils.h"
 
 typedef struct _BufferInfo
 {
@@ -32,6 +34,11 @@ struct _GrdRdpBufferPool
 {
   GObject parent;
 
+  GrdEglThread *egl_thread;
+  GrdHwAccelNvidia *hwaccel_nvidia;
+
+  CUstream cuda_stream;
+
   gboolean has_buffer_size;
   uint32_t buffer_width;
   uint32_t buffer_height;
@@ -47,37 +54,47 @@ struct _GrdRdpBufferPool
 
 G_DEFINE_TYPE (GrdRdpBufferPool, grd_rdp_buffer_pool, G_TYPE_OBJECT)
 
-static void
-add_buffer_to_pool (GrdRdpBufferPool *buffer_pool)
+static gboolean
+add_buffer_to_pool (GrdRdpBufferPool *buffer_pool,
+                    gboolean          preallocate_on_gpu)
 {
   GrdRdpBuffer *buffer;
   BufferInfo *buffer_info;
 
-  buffer = grd_rdp_buffer_new (buffer_pool);
-  buffer_info = g_new0 (BufferInfo, 1);
-
-  if (buffer_pool->has_buffer_size)
+  buffer = grd_rdp_buffer_new (buffer_pool,
+                               buffer_pool->egl_thread,
+                               buffer_pool->hwaccel_nvidia,
+                               buffer_pool->cuda_stream);
+  if (buffer_pool->has_buffer_size &&
+      !grd_rdp_buffer_resize (buffer,
+                              buffer_pool->buffer_width,
+                              buffer_pool->buffer_height,
+                              buffer_pool->buffer_stride,
+                              preallocate_on_gpu))
     {
-      grd_rdp_buffer_resize (buffer,
-                             buffer_pool->buffer_width,
-                             buffer_pool->buffer_height,
-                             buffer_pool->buffer_stride);
+      grd_rdp_buffer_free (buffer);
+      return FALSE;
     }
 
+  buffer_info = g_new0 (BufferInfo, 1);
+
   g_hash_table_insert (buffer_pool->buffer_table, buffer, buffer_info);
+
+  return TRUE;
 }
 
-void
+gboolean
 grd_rdp_buffer_pool_resize_buffers (GrdRdpBufferPool *buffer_pool,
                                     uint32_t          buffer_width,
                                     uint32_t          buffer_height,
                                     uint32_t          buffer_stride)
 {
+  g_autoptr (GMutexLocker) locker = NULL;
   GHashTableIter iter;
   GrdRdpBuffer *buffer;
   BufferInfo *buffer_info;
 
-  g_mutex_lock (&buffer_pool->pool_mutex);
+  locker = g_mutex_locker_new (&buffer_pool->pool_mutex);
   g_assert (buffer_pool->buffers_taken == 0);
 
   buffer_pool->buffer_width = buffer_width;
@@ -90,21 +107,26 @@ grd_rdp_buffer_pool_resize_buffers (GrdRdpBufferPool *buffer_pool,
                                         (gpointer *) &buffer_info))
     {
       g_assert (!buffer_info->buffer_taken);
-      grd_rdp_buffer_resize (buffer, buffer_width, buffer_height, buffer_stride);
+      if (!grd_rdp_buffer_resize (buffer, buffer_width, buffer_height,
+                                 buffer_stride, TRUE))
+        return FALSE;
     }
-  g_mutex_unlock (&buffer_pool->pool_mutex);
+
+  return TRUE;
 }
 
 GrdRdpBuffer *
 grd_rdp_buffer_pool_acquire (GrdRdpBufferPool *buffer_pool)
 {
+  g_autoptr (GMutexLocker) locker = NULL;
   GHashTableIter iter;
   GrdRdpBuffer *buffer;
   BufferInfo *buffer_info;
 
-  g_mutex_lock (&buffer_pool->pool_mutex);
-  if (g_hash_table_size (buffer_pool->buffer_table) <= buffer_pool->buffers_taken)
-    add_buffer_to_pool (buffer_pool);
+  locker = g_mutex_locker_new (&buffer_pool->pool_mutex);
+  if (g_hash_table_size (buffer_pool->buffer_table) <= buffer_pool->buffers_taken &&
+      !add_buffer_to_pool (buffer_pool, FALSE))
+    return NULL;
 
   g_hash_table_iter_init (&iter, buffer_pool->buffer_table);
   while (g_hash_table_iter_next (&iter, (gpointer *) &buffer,
@@ -116,7 +138,6 @@ grd_rdp_buffer_pool_acquire (GrdRdpBufferPool *buffer_pool)
 
   buffer_info->buffer_taken = TRUE;
   ++buffer_pool->buffers_taken;
-  g_mutex_unlock (&buffer_pool->pool_mutex);
 
   return buffer;
 }
@@ -196,21 +217,32 @@ static GSourceFuncs resize_pool_source_funcs =
   .dispatch = resize_pool_source_dispatch,
 };
 
-static void
+static gboolean
 fill_buffer_pool (GrdRdpBufferPool *buffer_pool)
 {
   uint32_t minimum_size = buffer_pool->minimum_pool_size;
 
   while (g_hash_table_size (buffer_pool->buffer_table) < minimum_size)
-    add_buffer_to_pool (buffer_pool);
+    {
+      if (!add_buffer_to_pool (buffer_pool, TRUE))
+        return FALSE;
+    }
+
+  return TRUE;
 }
 
 GrdRdpBufferPool *
-grd_rdp_buffer_pool_new (uint32_t minimum_size)
+grd_rdp_buffer_pool_new (GrdEglThread     *egl_thread,
+                         GrdHwAccelNvidia *hwaccel_nvidia,
+                         CUstream          cuda_stream,
+                         uint32_t          minimum_size)
 {
-  GrdRdpBufferPool *buffer_pool;
+  g_autoptr (GrdRdpBufferPool) buffer_pool = NULL;
 
   buffer_pool = g_object_new (GRD_TYPE_RDP_BUFFER_POOL, NULL);
+  buffer_pool->egl_thread = egl_thread;
+  buffer_pool->hwaccel_nvidia = hwaccel_nvidia;
+  buffer_pool->cuda_stream = cuda_stream;
   buffer_pool->minimum_pool_size = minimum_size;
 
   buffer_pool->resize_pool_source = g_source_new (&resize_pool_source_funcs,
@@ -220,9 +252,32 @@ grd_rdp_buffer_pool_new (uint32_t minimum_size)
   g_source_set_ready_time (buffer_pool->resize_pool_source, -1);
   g_source_attach (buffer_pool->resize_pool_source, NULL);
 
-  fill_buffer_pool (buffer_pool);
+  if (!fill_buffer_pool (buffer_pool))
+    return NULL;
+
+  return g_steal_pointer (&buffer_pool);
+}
+
+static void
+on_sync_complete (gboolean success,
+                  gpointer user_data)
+{
+  GrdSyncPoint *sync_point = user_data;
+
+  grd_sync_point_complete (sync_point, success);
+}
+
+static void
+sync_egl_thread (GrdRdpBufferPool *buffer_pool)
+{
+  GrdSyncPoint sync_point = {};
 
-  return buffer_pool;
+  grd_sync_point_init (&sync_point);
+  grd_egl_thread_sync (buffer_pool->egl_thread, on_sync_complete,
+                       &sync_point, NULL);
+
+  grd_sync_point_wait_for_completion (&sync_point);
+  grd_sync_point_clear (&sync_point);
 }
 
 static void
@@ -250,6 +305,14 @@ grd_rdp_buffer_pool_finalize (GObject *object)
 
   g_clear_pointer (&buffer_pool->buffer_table, g_hash_table_unref);
 
+  /*
+   * All buffers need to be destroyed, before the pool is freed to avoid use
+   * after free by the EGL thread, when the RDP server is shut down and with it
+   * the GrdHwAccelNvidia instance
+   */
+  if (buffer_pool->egl_thread)
+    sync_egl_thread (buffer_pool);
+
   G_OBJECT_CLASS (grd_rdp_buffer_pool_parent_class)->finalize (object);
 }
 
diff --git a/src/grd-rdp-buffer-pool.h b/src/grd-rdp-buffer-pool.h
index 121c86f5..afe55bd9 100644
--- a/src/grd-rdp-buffer-pool.h
+++ b/src/grd-rdp-buffer-pool.h
@@ -20,6 +20,7 @@
 #ifndef GRD_RDP_BUFFER_POOL_H
 #define GRD_RDP_BUFFER_POOL_H
 
+#include <ffnvcodec/dynlink_cuda.h>
 #include <glib-object.h>
 #include <stdint.h>
 
@@ -29,12 +30,15 @@
 G_DECLARE_FINAL_TYPE (GrdRdpBufferPool, grd_rdp_buffer_pool,
                       GRD, RDP_BUFFER_POOL, GObject)
 
-GrdRdpBufferPool *grd_rdp_buffer_pool_new (uint32_t minimum_size);
+GrdRdpBufferPool *grd_rdp_buffer_pool_new (GrdEglThread     *egl_thread,
+                                           GrdHwAccelNvidia *hwaccel_nvidia,
+                                           CUstream          cuda_stream,
+                                           uint32_t          minimum_size);
 
-void grd_rdp_buffer_pool_resize_buffers (GrdRdpBufferPool *buffer_pool,
-                                         uint32_t          buffer_width,
-                                         uint32_t          buffer_height,
-                                         uint32_t          buffer_stride);
+gboolean grd_rdp_buffer_pool_resize_buffers (GrdRdpBufferPool *buffer_pool,
+                                             uint32_t          buffer_width,
+                                             uint32_t          buffer_height,
+                                             uint32_t          buffer_stride);
 
 GrdRdpBuffer *grd_rdp_buffer_pool_acquire (GrdRdpBufferPool *buffer_pool);
 
diff --git a/src/grd-rdp-buffer.c b/src/grd-rdp-buffer.c
index eb33a310..a48a28c7 100644
--- a/src/grd-rdp-buffer.c
+++ b/src/grd-rdp-buffer.c
@@ -21,24 +21,84 @@
 
 #include "grd-rdp-buffer.h"
 
-#include <gio/gio.h>
+#include <ffnvcodec/dynlink_cuda.h>
 
+#include "grd-egl-thread.h"
+#include "grd-hwaccel-nvidia.h"
 #include "grd-rdp-buffer-pool.h"
+#include "grd-utils.h"
+
+typedef struct
+{
+  GrdHwAccelNvidia *hwaccel_nvidia;
+  CUgraphicsResource cuda_resource;
+  CUstream cuda_stream;
+  gboolean is_mapped;
+} ClearBufferData;
+
+typedef struct
+{
+  GrdHwAccelNvidia *hwaccel_nvidia;
+  GrdRdpBuffer *buffer;
+} AllocateBufferData;
 
 GrdRdpBuffer *
-grd_rdp_buffer_new (GrdRdpBufferPool *buffer_pool)
+grd_rdp_buffer_new (GrdRdpBufferPool *buffer_pool,
+                    GrdEglThread     *egl_thread,
+                    GrdHwAccelNvidia *hwaccel_nvidia,
+                    CUstream          cuda_stream)
 {
   GrdRdpBuffer *buffer;
 
   buffer = g_new0 (GrdRdpBuffer, 1);
   buffer->buffer_pool = buffer_pool;
+  buffer->egl_thread = egl_thread;
+  buffer->hwaccel_nvidia = hwaccel_nvidia;
+
+  buffer->cuda_stream = cuda_stream;
 
   return buffer;
 }
 
+static void
+cuda_deallocate_buffer (gpointer user_data)
+{
+  ClearBufferData *data = user_data;
+
+  if (data->is_mapped)
+    {
+      grd_hwaccel_nvidia_unmap_cuda_resource (data->hwaccel_nvidia,
+                                              data->cuda_resource,
+                                              data->cuda_stream);
+    }
+
+  grd_hwaccel_nvidia_unregister_cuda_resource (data->hwaccel_nvidia,
+                                               data->cuda_resource,
+                                               data->cuda_stream);
+}
+
 static void
 clear_buffers (GrdRdpBuffer *buffer)
 {
+  if (buffer->cuda_resource)
+    {
+      ClearBufferData *data;
+
+      data = g_new0 (ClearBufferData, 1);
+      data->hwaccel_nvidia = buffer->hwaccel_nvidia;
+      data->cuda_resource = buffer->cuda_resource;
+      data->cuda_stream = buffer->cuda_stream;
+      data->is_mapped = FALSE;
+      grd_egl_thread_deallocate (buffer->egl_thread,
+                                 buffer->pbo,
+                                 cuda_deallocate_buffer,
+                                 data,
+                                 NULL, data, g_free);
+
+      buffer->cuda_resource = NULL;
+      buffer->pbo = 0;
+    }
+
   g_clear_pointer (&buffer->local_data, g_free);
 }
 
@@ -55,15 +115,76 @@ grd_rdp_buffer_release (GrdRdpBuffer *buffer)
   grd_rdp_buffer_pool_release_buffer (buffer->buffer_pool, buffer);
 }
 
-void
+static gboolean
+cuda_allocate_buffer (gpointer user_data,
+                      uint32_t pbo)
+{
+  AllocateBufferData *data = user_data;
+  GrdRdpBuffer *buffer = data->buffer;
+  gboolean success;
+
+  success = grd_hwaccel_nvidia_register_read_only_gl_buffer (data->hwaccel_nvidia,
+                                                             &buffer->cuda_resource,
+                                                             pbo);
+  if (success)
+    buffer->pbo = pbo;
+
+  return success;
+}
+
+static void
+resources_ready (gboolean success,
+                 gpointer user_data)
+{
+  GrdSyncPoint *sync_point = user_data;
+
+  if (success)
+    g_debug ("[RDP] Allocating GL resources was successful");
+  else
+    g_warning ("[RDP] Failed to allocate GL resources");
+
+  grd_sync_point_complete (sync_point, success);
+}
+
+gboolean
 grd_rdp_buffer_resize (GrdRdpBuffer *buffer,
                        uint32_t      width,
                        uint32_t      height,
-                       uint32_t      stride)
+                       uint32_t      stride,
+                       gboolean      preallocate_on_gpu)
 {
+  gboolean success = TRUE;
+
   clear_buffers (buffer);
 
   buffer->width = width;
   buffer->height = height;
   buffer->local_data = g_malloc0 (stride * height * sizeof (uint8_t));
+
+  if (preallocate_on_gpu &&
+      buffer->hwaccel_nvidia)
+    {
+      AllocateBufferData data = {};
+      GrdSyncPoint sync_point = {};
+
+      g_assert (buffer->egl_thread);
+
+      grd_sync_point_init (&sync_point);
+      data.hwaccel_nvidia = buffer->hwaccel_nvidia;
+      data.buffer = buffer;
+
+      grd_egl_thread_allocate (buffer->egl_thread,
+                               buffer->height,
+                               stride,
+                               cuda_allocate_buffer,
+                               &data,
+                               resources_ready,
+                               &sync_point,
+                               NULL);
+
+      success = grd_sync_point_wait_for_completion (&sync_point);
+      grd_sync_point_clear (&sync_point);
+    }
+
+  return success;
 }
diff --git a/src/grd-rdp-buffer.h b/src/grd-rdp-buffer.h
index aeefa25d..8b376f3f 100644
--- a/src/grd-rdp-buffer.h
+++ b/src/grd-rdp-buffer.h
@@ -20,6 +20,8 @@
 #ifndef GRD_RDP_BUFFER_H
 #define GRD_RDP_BUFFER_H
 
+#include <ffnvcodec/dynlink_cuda.h>
+#include <gio/gio.h>
 #include <stdint.h>
 
 #include "grd-types.h"
@@ -28,21 +30,33 @@ struct _GrdRdpBuffer
 {
   GrdRdpBufferPool *buffer_pool;
 
+  GrdEglThread *egl_thread;
+  GrdHwAccelNvidia *hwaccel_nvidia;
+
   uint32_t width;
   uint32_t height;
 
   uint8_t *local_data;
+
+  uint32_t pbo;
+
+  CUgraphicsResource cuda_resource;
+  CUstream cuda_stream;
 };
 
-GrdRdpBuffer *grd_rdp_buffer_new (GrdRdpBufferPool *buffer_pool);
+GrdRdpBuffer *grd_rdp_buffer_new (GrdRdpBufferPool *buffer_pool,
+                                  GrdEglThread     *egl_thread,
+                                  GrdHwAccelNvidia *hwaccel_nvidia,
+                                  CUstream          cuda_stream);
 
 void grd_rdp_buffer_free (GrdRdpBuffer *buffer);
 
 void grd_rdp_buffer_release (GrdRdpBuffer *buffer);
 
-void grd_rdp_buffer_resize (GrdRdpBuffer *buffer,
-                            uint32_t      width,
-                            uint32_t      height,
-                            uint32_t      stride);
+gboolean grd_rdp_buffer_resize (GrdRdpBuffer *buffer,
+                                uint32_t      width,
+                                uint32_t      height,
+                                uint32_t      stride,
+                                gboolean      preallocate_on_gpu);
 
 #endif /* GRD_RDP_BUFFER_H */
diff --git a/src/grd-rdp-pipewire-stream.c b/src/grd-rdp-pipewire-stream.c
index d623f804..fd53d40a 100644
--- a/src/grd-rdp-pipewire-stream.c
+++ b/src/grd-rdp-pipewire-stream.c
@@ -371,13 +371,14 @@ on_stream_param_changed (void                 *user_data,
   release_all_buffers (stream);
 
   if (!grd_rdp_damage_detector_resize_surface (stream->rdp_surface->detector,
-                                               width, height))
+                                               width, height) ||
+      !grd_rdp_buffer_pool_resize_buffers (stream->buffer_pool,
+                                           width, height, stride))
     {
       grd_session_rdp_notify_error (
         stream->session_rdp, GRD_SESSION_RDP_ERROR_GRAPHICS_SUBSYSTEM_FAILED);
       return;
     }
-  grd_rdp_buffer_pool_resize_buffers (stream->buffer_pool, width, height, stride);
 
   pod_builder = SPA_POD_BUILDER_INIT (params_buffer, sizeof (params_buffer));
 
@@ -529,6 +530,13 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       src_data = SPA_MEMBER (map, buffer->datas[0].mapoffset, uint8_t);
 
       frame->buffer = grd_rdp_buffer_pool_acquire (stream->buffer_pool);
+      if (!frame->buffer)
+        {
+          grd_session_rdp_notify_error (stream->session_rdp,
+                                        GRD_SESSION_RDP_ERROR_GRAPHICS_SUBSYSTEM_FAILED);
+          callback (stream, g_steal_pointer (&frame), FALSE, user_data);
+          return;
+        }
 
       copy_frame_data (frame,
                        src_data,
@@ -556,6 +564,13 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       uint8_t *dst_data;
 
       frame->buffer = grd_rdp_buffer_pool_acquire (stream->buffer_pool);
+      if (!frame->buffer)
+        {
+          grd_session_rdp_notify_error (stream->session_rdp,
+                                        GRD_SESSION_RDP_ERROR_GRAPHICS_SUBSYSTEM_FAILED);
+          callback (stream, g_steal_pointer (&frame), FALSE, user_data);
+          return;
+        }
 
       row_width = dst_stride / bpp;
 
@@ -595,6 +610,13 @@ process_buffer (GrdRdpPipeWireStream     *stream,
       src_data = buffer->datas[0].data;
 
       frame->buffer = grd_rdp_buffer_pool_acquire (stream->buffer_pool);
+      if (!frame->buffer)
+        {
+          grd_session_rdp_notify_error (stream->session_rdp,
+                                        GRD_SESSION_RDP_ERROR_GRAPHICS_SUBSYSTEM_FAILED);
+          callback (stream, g_steal_pointer (&frame), FALSE, user_data);
+          return;
+        }
 
       copy_frame_data (frame,
                        src_data,
@@ -851,12 +873,16 @@ static const struct pw_core_events core_events = {
 };
 
 GrdRdpPipeWireStream *
-grd_rdp_pipewire_stream_new (GrdSessionRdp  *session_rdp,
-                             GMainContext   *render_context,
-                             GrdRdpSurface  *rdp_surface,
-                             uint32_t        src_node_id,
-                             GError        **error)
-{
+grd_rdp_pipewire_stream_new (GrdSessionRdp     *session_rdp,
+                             GrdHwAccelNvidia  *hwaccel_nvidia,
+                             GMainContext      *render_context,
+                             GrdRdpSurface     *rdp_surface,
+                             uint32_t           src_node_id,
+                             GError           **error)
+{
+  GrdSession *session = GRD_SESSION (session_rdp);
+  GrdContext *context = grd_session_get_context (session);
+  GrdEglThread *egl_thread = grd_context_get_egl_thread (context);
   g_autoptr (GrdRdpPipeWireStream) stream = NULL;
   GrdPipeWireSource *pipewire_source;
 
@@ -903,7 +929,12 @@ grd_rdp_pipewire_stream_new (GrdSessionRdp  *session_rdp,
   if (!connect_to_stream (stream, rdp_surface->refresh_rate, error))
     return NULL;
 
-  stream->buffer_pool = grd_rdp_buffer_pool_new (DEFAULT_BUFFER_POOL_SIZE);
+  stream->buffer_pool = grd_rdp_buffer_pool_new (egl_thread,
+                                                 hwaccel_nvidia,
+                                                 rdp_surface->cuda_stream,
+                                                 DEFAULT_BUFFER_POOL_SIZE);
+  if (!stream->buffer_pool)
+    return NULL;
 
   return g_steal_pointer (&stream);
 }
diff --git a/src/grd-rdp-pipewire-stream.h b/src/grd-rdp-pipewire-stream.h
index a9eec267..936e2901 100644
--- a/src/grd-rdp-pipewire-stream.h
+++ b/src/grd-rdp-pipewire-stream.h
@@ -30,10 +30,11 @@ G_DECLARE_FINAL_TYPE (GrdRdpPipeWireStream, grd_rdp_pipewire_stream,
                       GRD, RDP_PIPEWIRE_STREAM,
                       GObject)
 
-GrdRdpPipeWireStream *grd_rdp_pipewire_stream_new (GrdSessionRdp  *session_rdp,
-                                                   GMainContext   *render_context,
-                                                   GrdRdpSurface  *rdp_surface,
-                                                   uint32_t        src_node_id,
-                                                   GError        **error);
+GrdRdpPipeWireStream *grd_rdp_pipewire_stream_new (GrdSessionRdp     *session_rdp,
+                                                   GrdHwAccelNvidia  *hwaccel_nvidia,
+                                                   GMainContext      *render_context,
+                                                   GrdRdpSurface     *rdp_surface,
+                                                   uint32_t           src_node_id,
+                                                   GError           **error);
 
 #endif /* GRD_RDP_PIPEWIRE_STREAM_H */
diff --git a/src/grd-rdp-surface.c b/src/grd-rdp-surface.c
index e555e180..2c2f753d 100644
--- a/src/grd-rdp-surface.c
+++ b/src/grd-rdp-surface.c
@@ -21,21 +21,29 @@
 
 #include "grd-rdp-surface.h"
 
+#include "grd-hwaccel-nvidia.h"
 #include "grd-rdp-buffer.h"
 #include "grd-rdp-damage-detector-memcmp.h"
 
 GrdRdpSurface *
-grd_rdp_surface_new (void)
+grd_rdp_surface_new (GrdHwAccelNvidia *hwaccel_nvidia)
 {
-  GrdRdpSurface *rdp_surface;
+  g_autofree GrdRdpSurface *rdp_surface = NULL;
 
   rdp_surface = g_malloc0 (sizeof (GrdRdpSurface));
+  rdp_surface->hwaccel_nvidia = hwaccel_nvidia;
+
+  if (hwaccel_nvidia &&
+      !grd_hwaccel_nvidia_create_cuda_stream (hwaccel_nvidia,
+                                              &rdp_surface->cuda_stream))
+    return NULL;
+
   rdp_surface->detector = (GrdRdpDamageDetector *)
     grd_rdp_damage_detector_memcmp_new ();
 
   g_mutex_init (&rdp_surface->surface_mutex);
 
-  return rdp_surface;
+  return g_steal_pointer (&rdp_surface);
 }
 
 void
@@ -49,6 +57,12 @@ grd_rdp_surface_free (GrdRdpSurface *rdp_surface)
   g_mutex_clear (&rdp_surface->surface_mutex);
 
   g_clear_object (&rdp_surface->detector);
+  if (rdp_surface->cuda_stream)
+    {
+      grd_hwaccel_nvidia_destroy_cuda_stream (rdp_surface->hwaccel_nvidia,
+                                              rdp_surface->cuda_stream);
+      rdp_surface->cuda_stream = NULL;
+    }
 
   g_free (rdp_surface);
 }
diff --git a/src/grd-rdp-surface.h b/src/grd-rdp-surface.h
index 1feabf23..d6878544 100644
--- a/src/grd-rdp-surface.h
+++ b/src/grd-rdp-surface.h
@@ -20,6 +20,7 @@
 #ifndef GRD_RDP_SURFACE_H
 #define GRD_RDP_SURFACE_H
 
+#include <ffnvcodec/dynlink_cuda.h>
 #include <gio/gio.h>
 #include <stdint.h>
 
@@ -37,6 +38,9 @@ struct _GrdRdpSurface
   GrdRdpBuffer *pending_framebuffer;
   GrdRdpDamageDetector *detector;
 
+  GrdHwAccelNvidia *hwaccel_nvidia;
+  CUstream cuda_stream;
+
   gboolean valid;
 
   GrdRdpGfxSurface *gfx_surface;
@@ -44,7 +48,7 @@ struct _GrdRdpSurface
   gboolean encoding_suspended;
 };
 
-GrdRdpSurface *grd_rdp_surface_new (void);
+GrdRdpSurface *grd_rdp_surface_new (GrdHwAccelNvidia *hwaccel_nvidia);
 
 void grd_rdp_surface_free (GrdRdpSurface *rdp_surface);
 
diff --git a/src/grd-session-rdp.c b/src/grd-session-rdp.c
index 0435257c..cde858f2 100644
--- a/src/grd-session-rdp.c
+++ b/src/grd-session-rdp.c
@@ -1671,7 +1671,13 @@ rdp_peer_post_connect (freerdp_peer *peer)
 
   rdp_settings->PointerCacheSize = MIN (rdp_settings->PointerCacheSize, 100);
 
-  session_rdp->rdp_surface = grd_rdp_surface_new ();
+  session_rdp->rdp_surface = grd_rdp_surface_new (session_rdp->hwaccel_nvidia);
+  if (!session_rdp->rdp_surface)
+    {
+      g_warning ("[RDP] Failed to create RDP surface");
+      return FALSE;
+    }
+
   session_rdp->rdp_surface->refresh_rate = rdp_settings->SupportGraphicsPipeline ? 60
                                                                                  : 30;
 
@@ -1893,6 +1899,9 @@ socket_thread_func (gpointer data)
   uint32_t n_events;
   uint32_t n_freerdp_handles;
 
+  if (session_rdp->hwaccel_nvidia)
+    grd_hwaccel_nvidia_push_cuda_context (session_rdp->hwaccel_nvidia);
+
   WaitForSingleObject (session_rdp->start_event, INFINITE);
 
   peer = session_rdp->peer;
@@ -1965,6 +1974,9 @@ socket_thread_func (gpointer data)
         }
     }
 
+  if (session_rdp->hwaccel_nvidia)
+    grd_hwaccel_nvidia_pop_cuda_context (session_rdp->hwaccel_nvidia);
+
   return NULL;
 }
 
@@ -2176,6 +2188,7 @@ grd_session_rdp_stream_ready (GrdSession *session,
   rdp_surface = session_rdp->rdp_surface;
   pipewire_node_id = grd_stream_get_pipewire_node_id (stream);
   session_rdp->pipewire_stream = grd_rdp_pipewire_stream_new (session_rdp,
+                                                              session_rdp->hwaccel_nvidia,
                                                               graphics_context,
                                                               rdp_surface,
                                                               pipewire_node_id,
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]