[sysprof/wip/chergert/path-resolver] do delayed path resolving of files containing symbols



commit 5f352abc86998ec3d8499a5db506462552ae1064
Author: Christian Hergert <chergert redhat com>
Date:   Wed Sep 15 17:52:58 2021 -0700

    do delayed path resolving of files containing symbols
    
    we still need to teach this to locate debug dirs relative to the
    process paths.

 src/libsysprof/sysprof-elf-symbol-resolver.c     | 232 ++++++++++++++++++-----
 src/libsysprof/sysprof-symbol-resolver-private.h |  31 +++
 src/libsysprof/sysprof-symbol-resolver.c         |  53 +++++-
 3 files changed, 265 insertions(+), 51 deletions(-)
---
diff --git a/src/libsysprof/sysprof-elf-symbol-resolver.c b/src/libsysprof/sysprof-elf-symbol-resolver.c
index 27a3b5f..c555e05 100644
--- a/src/libsysprof/sysprof-elf-symbol-resolver.c
+++ b/src/libsysprof/sysprof-elf-symbol-resolver.c
@@ -20,6 +20,7 @@
 
 #include "config.h"
 
+#include <stdio.h>
 #include <string.h>
 
 #include "binfile.h"
@@ -27,14 +28,34 @@
 #include "sysprof-elf-symbol-resolver.h"
 #include "sysprof-flatpak.h"
 #include "sysprof-map-lookaside.h"
+#include "sysprof-path-resolver.h"
 #include "sysprof-podman.h"
+#include "sysprof-symbol-resolver-private.h"
+
+typedef struct
+{
+  char *on_host;
+  char *in_process;
+  int layer;
+} ProcessOverlay;
+
+typedef struct
+{
+  SysprofMapLookaside *lookaside;
+  SysprofPathResolver *resolver;
+  GByteArray          *mountinfo_data;
+  GArray              *overlays;
+  int                  pid;
+} ProcessInfo;
 
 struct _SysprofElfSymbolResolver
 {
-  GObject     parent_instance;
+  GObject       parent_instance;
+
+  GHashTable   *processes;
+  GStringChunk *chunks;
 
   GArray     *debug_dirs;
-  GHashTable *lookasides;
   GHashTable *bin_files;
   GHashTable *tag_cache;
 };
@@ -48,6 +69,20 @@ G_DEFINE_TYPE_EXTENDED (SysprofElfSymbolResolver,
                         G_IMPLEMENT_INTERFACE (SYSPROF_TYPE_SYMBOL_RESOLVER,
                                                symbol_resolver_iface_init))
 
+static void
+process_info_free (gpointer data)
+{
+  ProcessInfo *pi = data;
+
+  if (pi != NULL)
+    {
+      g_clear_pointer (&pi->mountinfo_data, g_byte_array_unref);
+      g_clear_pointer (&pi->resolver, _sysprof_path_resolver_free);
+      g_clear_pointer (&pi->lookaside, sysprof_map_lookaside_free);
+      g_slice_free (ProcessInfo, pi);
+    }
+}
+
 static gboolean
 is_flatpak (void)
 {
@@ -69,9 +104,9 @@ sysprof_elf_symbol_resolver_finalize (GObject *object)
   SysprofElfSymbolResolver *self = (SysprofElfSymbolResolver *)object;
 
   g_clear_pointer (&self->bin_files, g_hash_table_unref);
-  g_clear_pointer (&self->lookasides, g_hash_table_unref);
   g_clear_pointer (&self->tag_cache, g_hash_table_unref);
   g_clear_pointer (&self->debug_dirs, g_array_unref);
+  g_clear_pointer (&self->processes, g_hash_table_unref);
 
   G_OBJECT_CLASS (sysprof_elf_symbol_resolver_parent_class)->finalize (object);
 }
@@ -95,6 +130,8 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self)
 {
   g_auto(GStrv) podman_dirs = NULL;
 
+  self->processes = g_hash_table_new_full (NULL, NULL, NULL, process_info_free);
+
   self->debug_dirs = g_array_new (TRUE, FALSE, sizeof (gchar *));
   g_array_set_clear_func (self->debug_dirs, free_element_string);
 
@@ -115,11 +152,6 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self)
         sysprof_elf_symbol_resolver_add_debug_dir (self, debug_dirs[i]);
     }
 
-  self->lookasides = g_hash_table_new_full (NULL,
-                                            NULL,
-                                            NULL,
-                                            (GDestroyNotify)sysprof_map_lookaside_free);
-
   self->bin_files = g_hash_table_new_full (g_str_hash,
                                            g_str_equal,
                                            g_free,
@@ -128,63 +160,179 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self)
   self->tag_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
 }
 
+static ProcessInfo *
+sysprof_elf_symbol_resolver_get_process (SysprofElfSymbolResolver *self,
+                                         int                       pid)
+{
+  ProcessInfo *pi;
+
+  g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self));
+
+  if (!(pi = g_hash_table_lookup (self->processes, GINT_TO_POINTER (pid))))
+    {
+      pi = g_slice_new0 (ProcessInfo);
+      pi->pid = pid;
+      g_hash_table_insert (self->processes, GINT_TO_POINTER (pid), pi);
+    }
+
+  return pi;
+}
+
 static void
 sysprof_elf_symbol_resolver_load (SysprofSymbolResolver *resolver,
                                   SysprofCaptureReader  *reader)
 {
   SysprofElfSymbolResolver *self = (SysprofElfSymbolResolver *)resolver;
+  static const guint8 zero[1] = {0};
   SysprofCaptureFrameType type;
+  g_autoptr(GByteArray) mounts = NULL;
+  g_autofree char *mounts_data = NULL;
+  GHashTableIter iter;
+  gpointer k, v;
 
-  g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver));
+  g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self));
   g_assert (reader != NULL);
 
+  g_hash_table_remove_all (self->processes);
+
+  /* First we need to load all the /proc/{pid}/mountinfo files so that
+   * we can discover what files within the processes filesystem namespace
+   * were mapped and where. We can use that information later to build
+   * path resolvers that let us locate the files from the host.
+   */
   sysprof_capture_reader_reset (reader);
+  while (sysprof_capture_reader_peek_type (reader, &type))
+    {
+      if (type == SYSPROF_CAPTURE_FRAME_FILE_CHUNK)
+        {
+          const SysprofCaptureFileChunk *ev;
+          ProcessInfo *pi;
+          int pid;
+
+          if (!(ev = sysprof_capture_reader_read_file (reader)))
+            break;
+
+          if (g_str_has_prefix (ev->path, "/proc/") &&
+              g_str_has_suffix (ev->path, "/mountinfo") &&
+              sscanf (ev->path, "/proc/%u/mountinfo", &pid) == 1)
+            {
+              pi = sysprof_elf_symbol_resolver_get_process (self, pid);
+              if (pi->mountinfo_data == NULL)
+                pi->mountinfo_data = g_byte_array_new ();
+              if (ev->len)
+                g_byte_array_append (pi->mountinfo_data, ev->data, ev->len);
+            }
+          else if (g_str_equal (ev->path, "/proc/mounts"))
+            {
+              if (mounts == NULL)
+                mounts = g_byte_array_new ();
+              if (ev->len)
+                g_byte_array_append (mounts, ev->data, ev->len);
+            }
+        }
+      else if (type == SYSPROF_CAPTURE_FRAME_OVERLAY)
+        {
+          const SysprofCaptureOverlay *ev;
+          ProcessOverlay ov;
+          ProcessInfo *pi;
+
+          if (!(ev = sysprof_capture_reader_read_overlay (reader)))
+            break;
+
+          ov.on_host = g_string_chunk_insert_const (self->chunks, ev->data);
+          ov.in_process = g_string_chunk_insert_const (self->chunks, &ev->data[ev->src_len+1]);
+          ov.layer = ev->layer;
 
-  /* Start by finding mount/mountinfo for processes */
+          pi = sysprof_elf_symbol_resolver_get_process (self, ev->frame.pid);
+          if (pi->overlays == NULL)
+            pi->overlays = g_array_new (FALSE, FALSE, sizeof (ProcessOverlay));
+          g_array_append_val (pi->overlays, ov);
+        }
+      else
+        {
+          if (!sysprof_capture_reader_skip (reader))
+            break;
+        }
+    }
+
+  /* Now make sure we have access to /proc/mounts data. If we do not find it
+   * within the capture, assume we're running on the same host.
+   */
+  if (mounts != NULL)
+    {
+      g_byte_array_append (mounts, zero, 1);
+      mounts_data = (char *)g_byte_array_free (g_steal_pointer (&mounts), FALSE);
+    }
+
+  if (mounts_data == NULL)
+    g_file_get_contents ("/proc/mounts", &mounts_data, NULL, NULL);
+
+  /* Now that we loaded all the mountinfo data, we can create path resolvers
+   * for each of the processes. Once we have that data we can walk the file
+   * again to load the map events.
+   */
+  g_hash_table_iter_init (&iter, self->processes);
+  while (g_hash_table_iter_next (&iter, &k, &v))
+    {
+      ProcessInfo *pi = v;
+
+      if (pi->mountinfo_data == NULL)
+        continue;
+
+      g_byte_array_append (pi->mountinfo_data, zero, 1);
+
+      pi->resolver = _sysprof_path_resolver_new (mounts_data,
+                                                 (const char *)pi->mountinfo_data->data);
+
+      if (pi->overlays != NULL)
+        {
+          for (guint i = 0; i < pi->overlays->len; i++)
+            {
+              const ProcessOverlay *ov = &g_array_index (pi->overlays, ProcessOverlay, i);
+              _sysprof_path_resolver_add_overlay (pi->resolver, ov->in_process, ov->on_host, ov->layer);
+            }
+        }
+    }
 
+  /* Walk through the file again and extract maps so long as
+   * we have a resolver for them already.
+   */
+  sysprof_capture_reader_reset (reader);
   while (sysprof_capture_reader_peek_type (reader, &type))
     {
       if (type == SYSPROF_CAPTURE_FRAME_MAP)
         {
           const SysprofCaptureMap *ev = sysprof_capture_reader_read_map (reader);
-          SysprofMapLookaside *lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER 
(ev->frame.pid));
           const char *filename = ev->filename;
+          g_autofree char *resolved = NULL;
+          ProcessInfo *pi;
           SysprofMap map;
 
+          pi = sysprof_elf_symbol_resolver_get_process (self, ev->frame.pid);
+
+          if (pi->resolver != NULL)
+            {
+              resolved = _sysprof_path_resolver_resolve (pi->resolver, filename);
+
+              if (resolved)
+                filename = resolved;
+            }
+
           map.start = ev->start;
           map.end = ev->end;
           map.offset = ev->offset;
           map.inode = ev->inode;
           map.filename = filename;
 
-          if (lookaside == NULL)
-            {
-              lookaside = sysprof_map_lookaside_new ();
-              g_hash_table_insert (self->lookasides, GINT_TO_POINTER (ev->frame.pid), lookaside);
-            }
-
-          sysprof_map_lookaside_insert (lookaside, &map);
-        }
-      else if (type == SYSPROF_CAPTURE_FRAME_OVERLAY)
-        {
-          const SysprofCaptureOverlay *ev = sysprof_capture_reader_read_overlay (reader);
-          SysprofMapLookaside *lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER 
(ev->frame.pid));
-          const char *src = ev->data;
-          const char *dst = &ev->data[ev->src_len+1];
-
-          if (lookaside == NULL)
-            {
-              lookaside = sysprof_map_lookaside_new ();
-              g_hash_table_insert (self->lookasides, GINT_TO_POINTER (ev->frame.pid), lookaside);
-            }
+          if (pi->lookaside == NULL)
+            pi->lookaside = sysprof_map_lookaside_new ();
 
-          sysprof_map_lookaside_overlay (lookaside, src, dst);
+          sysprof_map_lookaside_insert (pi->lookaside, &map);
         }
       else
         {
           if (!sysprof_capture_reader_skip (reader))
             return;
-          continue;
         }
     }
 }
@@ -351,15 +499,13 @@ sysprof_elf_symbol_resolver_resolve_full (SysprofElfSymbolResolver *self,
                                           gchar                   **name,
                                           GQuark                   *tag)
 {
-  SysprofMapLookaside *lookaside;
-  const SysprofMapOverlay *overlays = NULL;
   const bin_symbol_t *bin_sym;
   const gchar *bin_sym_name;
   const SysprofMap *map;
+  ProcessInfo *pi;
   bin_file_t *bin_file;
   gulong ubegin;
   gulong uend;
-  guint n_overlays = 0;
 
   g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self));
   g_assert (name != NULL);
@@ -371,24 +517,18 @@ sysprof_elf_symbol_resolver_resolve_full (SysprofElfSymbolResolver *self,
   if (context != SYSPROF_ADDRESS_CONTEXT_USER)
     return FALSE;
 
-  lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER (pid));
-  if G_UNLIKELY (lookaside == NULL)
+  if (!(pi = g_hash_table_lookup (self->processes, GINT_TO_POINTER (pid))))
     return FALSE;
 
-  map = sysprof_map_lookaside_lookup (lookaside, address);
+  map = sysprof_map_lookaside_lookup (pi->lookaside, address);
   if G_UNLIKELY (map == NULL)
     return FALSE;
 
   address -= map->start;
   address += map->offset;
 
-  if (lookaside->overlays)
-    {
-      overlays = &g_array_index (lookaside->overlays, SysprofMapOverlay, 0);
-      n_overlays = lookaside->overlays->len;
-    }
-
-  bin_file = sysprof_elf_symbol_resolver_get_bin_file (self, overlays, n_overlays, map->filename);
+  /* TODO: Get debugdirs for process */
+  bin_file = sysprof_elf_symbol_resolver_get_bin_file (self, NULL, 0, map->filename);
 
   g_assert (bin_file != NULL);
 
diff --git a/src/libsysprof/sysprof-symbol-resolver-private.h 
b/src/libsysprof/sysprof-symbol-resolver-private.h
new file mode 100644
index 0000000..76cbe45
--- /dev/null
+++ b/src/libsysprof/sysprof-symbol-resolver-private.h
@@ -0,0 +1,31 @@
+/* sysprof-symbol-resolver-private.h
+ *
+ * Copyright 2021 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+#include <sysprof-capture.h>
+
+G_BEGIN_DECLS
+
+char *_sysprof_symbol_resolver_load_file (SysprofCaptureReader *reader,
+                                          const char           *path);
+
+G_END_DECLS
diff --git a/src/libsysprof/sysprof-symbol-resolver.c b/src/libsysprof/sysprof-symbol-resolver.c
index 83da30a..19e4140 100644
--- a/src/libsysprof/sysprof-symbol-resolver.c
+++ b/src/libsysprof/sysprof-symbol-resolver.c
@@ -20,6 +20,7 @@
 
 #include "config.h"
 
+#include "sysprof-platform.h"
 #include "sysprof-symbol-resolver.h"
 
 G_DEFINE_INTERFACE (SysprofSymbolResolver, sysprof_symbol_resolver, G_TYPE_OBJECT)
@@ -125,11 +126,11 @@ sysprof_symbol_resolver_resolve (SysprofSymbolResolver *self,
  */
 gchar *
 sysprof_symbol_resolver_resolve_with_context (SysprofSymbolResolver *self,
-                                         guint64           time,
-                                         GPid              pid,
-                                         SysprofAddressContext  context,
-                                         SysprofCaptureAddress  address,
-                                         GQuark           *tag)
+                                              guint64                time,
+                                              GPid                   pid,
+                                              SysprofAddressContext  context,
+                                              SysprofCaptureAddress  address,
+                                              GQuark                *tag)
 {
   GQuark dummy;
 
@@ -142,3 +143,45 @@ sysprof_symbol_resolver_resolve_with_context (SysprofSymbolResolver *self,
 
   return SYSPROF_SYMBOL_RESOLVER_GET_IFACE (self)->resolve_with_context (self, time, pid, context, address, 
tag);
 }
+
+char *
+_sysprof_symbol_resolver_load_file (SysprofCaptureReader *reader,
+                                    const char           *path)
+{
+  g_autofree char *data = NULL;
+  goffset len;
+  goffset pos = 0;
+  int fd;
+
+  g_assert (reader != NULL);
+  g_assert (path != NULL);
+
+  sysprof_capture_reader_reset (reader);
+
+  if (-1 == (fd = sysprof_memfd_create ("")) ||
+      !sysprof_capture_reader_read_file_fd (reader, path, fd))
+    {
+      if (fd != -1)
+        close (fd);
+      return NULL;
+    }
+
+  len = lseek (fd, 0L, SEEK_CUR);
+  data = g_malloc (len + 1);
+  lseek (fd, 0L, SEEK_SET);
+
+  while (pos < len)
+    {
+      gssize n_read = read (fd, data + pos, len - pos);
+
+      if (n_read < 0)
+        return NULL;
+
+      pos += n_read;
+    }
+
+  data[len] = 0;
+  close (fd);
+
+  return g_steal_pointer (&data);
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]