[gnome-remote-desktop] rdp: Stop reallocating NV12 buffer every frame



commit 1459055bef60274e46d1ca21c35d9d48a08512d6
Author: Pascal Nowack <Pascal Nowack gmx de>
Date:   Thu Jan 13 12:55:02 2022 +0100

    rdp: Stop reallocating NV12 buffer every frame
    
    Since memory allocation on the GPU is slower than on the CPU, avoid
    reallocating the NV12 buffer every buffer.
    The buffer won't be preallocated, but allocated on the first use.

 src/grd-hwaccel-nvidia.c        | 25 +++++++------------------
 src/grd-hwaccel-nvidia.h        |  1 +
 src/grd-rdp-graphics-pipeline.c |  1 +
 src/grd-rdp-pipewire-stream.c   |  1 +
 src/grd-rdp-surface.c           | 15 +++++++++++++++
 src/grd-rdp-surface.h           |  7 +++++++
 6 files changed, 32 insertions(+), 18 deletions(-)
---
diff --git a/src/grd-hwaccel-nvidia.c b/src/grd-hwaccel-nvidia.c
index cf139ee8..7ddc8ed1 100644
--- a/src/grd-hwaccel-nvidia.c
+++ b/src/grd-hwaccel-nvidia.c
@@ -384,6 +384,7 @@ gboolean
 grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                                              uint32_t           encode_session_id,
                                              CUdeviceptr        src_data,
+                                             CUdeviceptr       *main_view_nv12,
                                              uint16_t           src_width,
                                              uint16_t           src_height,
                                              uint16_t           aligned_width,
@@ -398,8 +399,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
   NV_ENC_PIC_PARAMS pic_params = {0};
   NV_ENC_LOCK_BITSTREAM lock_bitstream = {0};
   uint16_t src_stride;
-  CUdeviceptr nv12_buffer = 0;
-  size_t nv12_pitch = 0;
   unsigned int grid_dim_x, grid_dim_y, grid_dim_z;
   unsigned int block_dim_x, block_dim_y, block_dim_z;
   void *args[8];
@@ -412,13 +411,10 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
   g_assert (encode_session->enc_width == aligned_width);
   g_assert (encode_session->enc_height == aligned_height);
 
-  if (hwaccel_nvidia->cuda_funcs->cuMemAllocPitch (
-        &nv12_buffer, &nv12_pitch,
-        aligned_width, aligned_height + aligned_height / 2, 4) != CUDA_SUCCESS)
-    {
-      g_warning ("[HWAccel.CUDA] Failed to allocate NV12 buffer");
-      return FALSE;
-    }
+  if (!(*main_view_nv12) &&
+      !grd_hwaccel_nvidia_alloc_mem (hwaccel_nvidia, main_view_nv12,
+                                     aligned_width * (aligned_height + aligned_height / 2)))
+    return FALSE;
 
   src_stride = src_width * 4;
 
@@ -433,7 +429,7 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                (aligned_height / 2 % block_dim_y ? 1 : 0);
   grid_dim_z = 1;
 
-  args[0] = &nv12_buffer;
+  args[0] = main_view_nv12;
   args[1] = &src_data;
   args[2] = &src_width;
   args[3] = &src_height;
@@ -447,14 +443,12 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
         block_dim_x, block_dim_y, block_dim_z, 0, cuda_stream, args, NULL) != CUDA_SUCCESS)
     {
       g_warning ("[HWAccel.CUDA] Failed to launch BGRX_TO_YUV420 kernel");
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
   if (hwaccel_nvidia->cuda_funcs->cuStreamSynchronize (cuda_stream) != CUDA_SUCCESS)
     {
       g_warning ("[HWAccel.CUDA] Failed to synchronize stream");
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
@@ -463,7 +457,7 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
   register_res.width = aligned_width;
   register_res.height = aligned_height;
   register_res.pitch = aligned_width;
-  register_res.resourceToRegister = (void *) nv12_buffer;
+  register_res.resourceToRegister = (void *) *main_view_nv12;
   register_res.bufferFormat = NV_ENC_BUFFER_FORMAT_NV12;
   register_res.bufferUsage = NV_ENC_INPUT_IMAGE;
 
@@ -471,7 +465,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
         encode_session->encoder, &register_res) != NV_ENC_SUCCESS)
     {
       g_warning ("[HWAccel.NVENC] Failed to register resource");
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
@@ -484,7 +477,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
       g_warning ("[HWAccel.NVENC] Failed to map input resource");
       hwaccel_nvidia->nvenc_api.nvEncUnregisterResource (encode_session->encoder,
                                                          register_res.registeredResource);
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
@@ -505,7 +497,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                                                          map_input_res.mappedResource);
       hwaccel_nvidia->nvenc_api.nvEncUnregisterResource (encode_session->encoder,
                                                          register_res.registeredResource);
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
@@ -520,7 +511,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                                                          map_input_res.mappedResource);
       hwaccel_nvidia->nvenc_api.nvEncUnregisterResource (encode_session->encoder,
                                                          register_res.registeredResource);
-      hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
       return FALSE;
     }
 
@@ -534,7 +524,6 @@ grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                                                      map_input_res.mappedResource);
   hwaccel_nvidia->nvenc_api.nvEncUnregisterResource (encode_session->encoder,
                                                      register_res.registeredResource);
-  hwaccel_nvidia->cuda_funcs->cuMemFree (nv12_buffer);
 
   return TRUE;
 }
diff --git a/src/grd-hwaccel-nvidia.h b/src/grd-hwaccel-nvidia.h
index 66f6f935..915a2e33 100644
--- a/src/grd-hwaccel-nvidia.h
+++ b/src/grd-hwaccel-nvidia.h
@@ -88,6 +88,7 @@ void grd_hwaccel_nvidia_free_nvenc_session (GrdHwAccelNvidia *hwaccel_nvidia,
 gboolean grd_hwaccel_nvidia_avc420_encode_bgrx_frame (GrdHwAccelNvidia  *hwaccel_nvidia,
                                                       uint32_t           encode_session_id,
                                                       CUdeviceptr        src_data,
+                                                      CUdeviceptr       *main_view_nv12,
                                                       uint16_t           src_width,
                                                       uint16_t           src_height,
                                                       uint16_t           aligned_width,
diff --git a/src/grd-rdp-graphics-pipeline.c b/src/grd-rdp-graphics-pipeline.c
index 20361044..01963e5e 100644
--- a/src/grd-rdp-graphics-pipeline.c
+++ b/src/grd-rdp-graphics-pipeline.c
@@ -419,6 +419,7 @@ refresh_gfx_surface_avc420 (GrdRdpGraphicsPipeline *graphics_pipeline,
   if (!grd_hwaccel_nvidia_avc420_encode_bgrx_frame (graphics_pipeline->hwaccel_nvidia,
                                                     hwaccel_context->encode_session_id,
                                                     buffer->mapped_cuda_pointer,
+                                                    &rdp_surface->avc.main_view,
                                                     surface_width, surface_height,
                                                     aligned_width, aligned_height,
                                                     &avc420.data, &avc420.length,
diff --git a/src/grd-rdp-pipewire-stream.c b/src/grd-rdp-pipewire-stream.c
index c32fdfea..d40a522a 100644
--- a/src/grd-rdp-pipewire-stream.c
+++ b/src/grd-rdp-pipewire-stream.c
@@ -388,6 +388,7 @@ on_stream_param_changed (void                 *user_data,
   if (egl_thread)
     sync_egl_thread (egl_thread);
   release_all_buffers (stream);
+  grd_rdp_surface_reset (stream->rdp_surface);
 
   if (!grd_rdp_damage_detector_resize_surface (stream->rdp_surface->detector,
                                                width, height) ||
diff --git a/src/grd-rdp-surface.c b/src/grd-rdp-surface.c
index cb1851f6..d2481fff 100644
--- a/src/grd-rdp-surface.c
+++ b/src/grd-rdp-surface.c
@@ -34,6 +34,11 @@ destroy_hwaccel_util_objects (GrdRdpSurface *rdp_surface)
                                               rdp_surface->cuda_stream);
       rdp_surface->cuda_stream = NULL;
     }
+  if (rdp_surface->avc.main_view)
+    {
+      grd_hwaccel_nvidia_clear_mem_ptr (rdp_surface->hwaccel_nvidia,
+                                        &rdp_surface->avc.main_view);
+    }
 }
 
 GrdRdpSurface *
@@ -91,3 +96,13 @@ grd_rdp_surface_free (GrdRdpSurface *rdp_surface)
 
   g_free (rdp_surface);
 }
+
+void
+grd_rdp_surface_reset (GrdRdpSurface *rdp_surface)
+{
+  if (rdp_surface->avc.main_view)
+    {
+      grd_hwaccel_nvidia_clear_mem_ptr (rdp_surface->hwaccel_nvidia,
+                                        &rdp_surface->avc.main_view);
+    }
+}
diff --git a/src/grd-rdp-surface.h b/src/grd-rdp-surface.h
index d6878544..49dd84b5 100644
--- a/src/grd-rdp-surface.h
+++ b/src/grd-rdp-surface.h
@@ -41,6 +41,11 @@ struct _GrdRdpSurface
   GrdHwAccelNvidia *hwaccel_nvidia;
   CUstream cuda_stream;
 
+  struct
+  {
+    CUdeviceptr main_view;
+  } avc;
+
   gboolean valid;
 
   GrdRdpGfxSurface *gfx_surface;
@@ -52,4 +57,6 @@ GrdRdpSurface *grd_rdp_surface_new (GrdHwAccelNvidia *hwaccel_nvidia);
 
 void grd_rdp_surface_free (GrdRdpSurface *rdp_surface);
 
+void grd_rdp_surface_reset (GrdRdpSurface *rdp_surface);
+
 #endif /* GRD_RDP_SURFACE_H */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]