[sysprof/wip/chergert/sysprof-3] libsysprof: add utility to build symbol maps



commit af63a878e82329bd16d0b767a74727d51b0a5840
Author: Christian Hergert <chergert redhat com>
Date:   Tue May 28 19:04:54 2019 -0700

    libsysprof: add utility to build symbol maps
    
    These are useful to allow us to append symbol informatio to a capture file
    using the existing symbol resolvers.
    
    It can read/write a small format embedded within capture files so that
    we can append them from the target machine rather than decoding from the
    machine we run Sysprof UI on.

 src/libsysprof/meson.build          |   1 +
 src/libsysprof/sysprof-symbol-map.c | 509 ++++++++++++++++++++++++++++++++++++
 src/libsysprof/sysprof-symbol-map.h |  49 ++++
 src/tests/test-addr-map.c           | 105 ++++++++
 4 files changed, 664 insertions(+)
---
diff --git a/src/libsysprof/meson.build b/src/libsysprof/meson.build
index 1d60a12..09aed85 100644
--- a/src/libsysprof/meson.build
+++ b/src/libsysprof/meson.build
@@ -53,6 +53,7 @@ libsysprof_private_sources = [
   'sysprof-kallsyms.c',
   'sysprof-line-reader.c',
   'sysprof-map-lookaside.c',
+  'sysprof-symbol-map.c',
   ipc_service_src,
   stackstash_sources,
   helpers_sources,
diff --git a/src/libsysprof/sysprof-symbol-map.c b/src/libsysprof/sysprof-symbol-map.c
new file mode 100644
index 0000000..9d6b050
--- /dev/null
+++ b/src/libsysprof/sysprof-symbol-map.c
@@ -0,0 +1,509 @@
+/* sysprof-symbol-map.c
+ *
+ * Copyright 2019 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define G_LOG_DOMAIN "sysprof-symbol-map"
+
+#include "config.h"
+
+#include <unistd.h>
+
+#include "sysprof-map-lookaside.h"
+#include "sysprof-symbol-map.h"
+
+/*
+ * Because we can't rely on the address ranges of symbols from ELF files
+ * or elsewhere, we have to duplicate a lot of entries when building this
+ * so that we can resolve all of the corrent addresses.
+ */
+
+SYSPROF_ALIGNED_BEGIN(1)
+typedef struct
+{
+  SysprofCaptureAddress addr_begin;
+  SysprofCaptureAddress addr_end;
+  guint32               pid;
+  guint32               offset;
+  guint32               tag_offset;
+  guint32               padding;
+} Decoded
+SYSPROF_ALIGNED_END(1);
+
+struct _SysprofSymbolMap
+{
+  /* For creating maps */
+  GStringChunk *chunk;
+  GHashTable   *lookasides;
+  GPtrArray    *resolvers;
+  GPtrArray    *samples;
+  guint         resolved : 1;
+
+  /* For reading maps */
+  GMappedFile   *mapped;
+  const Decoded *symbols;
+  gsize          n_symbols;
+  const gchar   *beginptr;
+  const gchar   *endptr;
+};
+
+typedef struct
+{
+  SysprofCaptureAddress  addr;
+  const gchar           *name;
+  GQuark                 tag;
+  guint32                pid;
+} Element;
+
+static void
+element_free (Element *ele)
+{
+  g_slice_free (Element, ele);
+}
+
+static gint
+element_compare (gconstpointer a,
+                 gconstpointer b)
+{
+  const Element *aa = *(const Element **)a;
+  const Element *bb = *(const Element **)b;
+
+  if (aa->pid < bb->pid)
+    return -1;
+
+  if (aa->pid > bb->pid)
+    return 1;
+
+  if (aa->addr < bb->addr)
+    return -1;
+
+  if (aa->addr > bb->addr)
+    return 1;
+
+  return 0;
+}
+
+static guint
+element_hash (gconstpointer data)
+{
+  const Element *ele = data;
+  struct {
+    guint32 a;
+    guint32 b;
+  } addr;
+
+  memcpy (&addr, &ele->addr, sizeof addr);
+  return addr.a ^ addr.b ^ ele->pid;
+}
+
+static gboolean
+element_equal (gconstpointer a,
+               gconstpointer b)
+{
+  const Element *aa = a;
+  const Element *bb = b;
+
+  return aa->pid == bb->pid && aa->addr == bb->addr;
+}
+
+SysprofSymbolMap *
+sysprof_symbol_map_new (void)
+{
+  SysprofSymbolMap *self;
+
+  self = g_slice_new0 (SysprofSymbolMap);
+  self->samples = g_ptr_array_new_with_free_func ((GDestroyNotify) element_free);
+  self->chunk = g_string_chunk_new (4096*16);
+  self->resolvers = g_ptr_array_new_with_free_func (g_object_unref);
+  self->lookasides = g_hash_table_new_full (NULL, NULL, NULL,
+                                            (GDestroyNotify) sysprof_map_lookaside_free);
+
+  return g_steal_pointer (&self);
+}
+
+void
+sysprof_symbol_map_free (SysprofSymbolMap *self)
+{
+  g_clear_pointer (&self->lookasides, g_hash_table_unref);
+  g_clear_pointer (&self->resolvers, g_ptr_array_unref);
+  g_clear_pointer (&self->chunk, g_string_chunk_free);
+  g_clear_pointer (&self->samples, g_ptr_array_unref);
+  g_clear_pointer (&self->mapped, g_mapped_file_unref);
+  g_slice_free (SysprofSymbolMap, self);
+}
+
+static gint
+search_for_symbol_cb (gconstpointer a,
+                      gconstpointer b)
+{
+  const Decoded *key = a;
+  const Decoded *ele = b;
+
+  if (key->pid < ele->pid)
+    return -1;
+
+  if (key->pid > ele->pid)
+    return 1;
+
+  g_assert (key->pid == ele->pid);
+
+  if (key->addr_begin < ele->addr_begin)
+    return -1;
+
+  if (key->addr_begin > ele->addr_end)
+    return 1;
+
+  g_assert (key->addr_begin >= ele->addr_begin);
+  g_assert (key->addr_end <= ele->addr_end);
+
+  return 0;
+}
+
+const gchar *
+sysprof_symbol_map_lookup (SysprofSymbolMap      *self,
+                           gint64                 time,
+                           gint32                 pid,
+                           SysprofCaptureAddress  addr,
+                           GQuark                *tag)
+{
+  const Decoded *ret;
+  const Decoded key = {
+    .addr_begin = addr,
+    .addr_end = addr,
+    .pid = pid,
+    .offset = 0,
+    .tag_offset = 0,
+  };
+
+  g_assert (self != NULL);
+
+  if (tag != NULL)
+    *tag = 0;
+
+  ret = bsearch (&key,
+                 self->symbols,
+                 self->n_symbols,
+                 sizeof *ret,
+                 search_for_symbol_cb);
+
+  if (ret == NULL)
+    return NULL;
+
+  if (tag != NULL && ret->tag_offset < (self->endptr - self->beginptr))
+    *tag = g_quark_from_string (&self->beginptr[ret->tag_offset]);
+
+  if (ret->offset < (self->endptr - self->beginptr))
+    return &self->beginptr[ret->offset];
+
+  return NULL;
+}
+
+void
+sysprof_symbol_map_add_resolver (SysprofSymbolMap      *self,
+                                 SysprofSymbolResolver *resolver)
+{
+  g_assert (self != NULL);
+  g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver));
+
+  g_ptr_array_add (self->resolvers, g_object_ref (resolver));
+}
+
+static void
+sysprof_symbol_map_do_sample (SysprofSymbolMap     *self,
+                              SysprofCaptureReader *reader,
+                              GHashTable           *seen)
+{
+  SysprofAddressContext last_context = SYSPROF_ADDRESS_CONTEXT_NONE;
+  const SysprofCaptureSample *sample;
+
+  g_assert (self != NULL);
+  g_assert (reader != NULL);
+  g_assert (seen != NULL);
+
+  if (!(sample = sysprof_capture_reader_read_sample (reader)))
+    return;
+
+  for (guint i = 0; i < sample->n_addrs; i++)
+    {
+      SysprofCaptureAddress addr = sample->addrs[i];
+      SysprofAddressContext context;
+
+      if (sysprof_address_is_context_switch (addr, &context))
+        {
+          last_context = context;
+          continue;
+        }
+
+      for (guint j = 0; j < self->resolvers->len; j++)
+        {
+          SysprofSymbolResolver *resolver = g_ptr_array_index (self->resolvers, j);
+          g_autofree gchar *name = NULL;
+          const gchar *cname;
+          Element ele;
+          GQuark tag = 0;
+
+          name = sysprof_symbol_resolver_resolve_with_context (resolver,
+                                                               sample->frame.time,
+                                                               sample->frame.pid,
+                                                               last_context,
+                                                               addr,
+                                                               &tag);
+
+          if (name == NULL)
+            continue;
+
+          cname = g_string_chunk_insert_const (self->chunk, name);
+
+          ele.addr = addr;
+          ele.pid = sample->frame.pid;
+          ele.name = cname;
+          ele.tag = tag;
+
+          if (!g_hash_table_contains (seen, &ele))
+            {
+              Element *cpy = g_slice_dup (Element, &ele);
+              g_hash_table_add (seen, cpy);
+              g_ptr_array_add (self->samples, cpy);
+            }
+        }
+    }
+}
+
+void
+sysprof_symbol_map_resolve (SysprofSymbolMap     *self,
+                            SysprofCaptureReader *reader)
+{
+  g_autoptr(GHashTable) seen = NULL;
+  SysprofCaptureFrameType type;
+
+  g_return_if_fail (self != NULL);
+  g_return_if_fail (self->resolved == FALSE);
+  g_return_if_fail (reader != NULL);
+
+  self->resolved = TRUE;
+
+  seen = g_hash_table_new (element_hash, element_equal);
+
+  sysprof_capture_reader_reset (reader);
+
+  for (guint i = 0; i < self->resolvers->len; i++)
+    {
+      sysprof_symbol_resolver_load (g_ptr_array_index (self->resolvers, i), reader);
+      sysprof_capture_reader_reset (reader);
+    }
+
+  while (sysprof_capture_reader_peek_type (reader, &type))
+    {
+      if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
+        {
+          sysprof_symbol_map_do_sample (self, reader, seen);
+          continue;
+        }
+
+      if (!sysprof_capture_reader_skip (reader))
+        break;
+    }
+
+  g_ptr_array_sort (self->samples, element_compare);
+}
+
+void
+sysprof_symbol_map_printf (SysprofSymbolMap *self)
+{
+  g_return_if_fail (self != NULL);
+  g_return_if_fail (self->samples != NULL);
+
+  for (guint i = 0; i < self->samples->len; i++)
+    {
+      Element *ele = g_ptr_array_index (self->samples, i);
+
+      if (ele->tag)
+        g_print ("%-5d: %p: %s [%s]\n", ele->pid, (gpointer)ele->addr, ele->name, g_quark_to_string 
(ele->tag));
+      else
+        g_print ("%-5d: %p: %s\n", ele->pid, (gpointer)ele->addr, ele->name);
+    }
+}
+
+static guint
+get_string_offset (GByteArray  *ar,
+                   GHashTable  *seen,
+                   const gchar *str)
+{
+  gpointer ret;
+
+  if G_UNLIKELY (!g_hash_table_lookup_extended (seen, str, NULL, &ret))
+    {
+      ret = GUINT_TO_POINTER (ar->len);
+      g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
+      g_hash_table_insert (seen, (gpointer)str, ret);
+    }
+
+  return GPOINTER_TO_UINT (ret);
+}
+
+gboolean
+sysprof_symbol_map_serialize (SysprofSymbolMap *self,
+                              gint              fd)
+{
+  static const Decoded empty = {0};
+  SysprofCaptureAddress begin = 0;
+  g_autoptr(GByteArray) ar = NULL;
+  g_autoptr(GHashTable) seen = NULL;
+  g_autoptr(GArray) decoded = NULL;
+  gsize offset;
+
+  g_assert (self != NULL);
+  g_assert (fd != -1);
+
+  ar = g_byte_array_new ();
+  seen = g_hash_table_new (NULL, NULL);
+  decoded = g_array_new (FALSE, FALSE, sizeof (Decoded));
+
+  /* Add some empty space to both give us non-zero offsets and also ensure
+   * empty space between data.
+   */
+  g_byte_array_append (ar, (guint8 *)&empty, sizeof empty);
+
+  for (guint i = 0; i < self->samples->len; i++)
+    {
+      Element *ele = g_ptr_array_index (self->samples, i);
+
+      if (!g_hash_table_contains (seen, ele->name))
+        {
+          const gchar *str = ele->name;
+          gpointer ptr = GUINT_TO_POINTER (ar->len);
+          g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
+          g_hash_table_insert (seen, (gpointer)str, ptr);
+        }
+    }
+
+  for (guint i = 0; i < self->samples->len; i++)
+    {
+      Element *ele = g_ptr_array_index (self->samples, i);
+      Decoded dec;
+
+      if (begin == 0)
+        begin = ele->addr;
+
+      if ((i + 1) < self->samples->len)
+        {
+          Element *next = g_ptr_array_index (self->samples, i + 1);
+
+          if (ele->pid == next->pid && ele->name == next->name)
+            continue;
+        }
+
+      dec.padding = 0;
+      dec.addr_begin = begin;
+      dec.addr_end = ele->addr;
+      dec.pid = ele->pid;
+      dec.offset = get_string_offset (ar, seen, ele->name);
+
+      if (ele->tag)
+        dec.tag_offset = get_string_offset (ar, seen, g_quark_to_string (ele->tag));
+      else
+        dec.tag_offset = 0;
+
+      g_array_append_val (decoded, dec);
+
+      begin = 0;
+    }
+
+  offset = sizeof empty * decoded->len;
+
+  for (guint i = 0; i < decoded->len; i++)
+    {
+      Decoded *dec = &g_array_index (decoded, Decoded, i);
+
+      if (dec->offset)
+        dec->offset += offset;
+
+      if (dec->tag_offset)
+        dec->tag_offset += offset;
+    }
+
+  if (write (fd, decoded->data, offset) != offset)
+    return FALSE;
+
+  if (write (fd, ar->data, ar->len) != ar->len)
+    return FALSE;
+
+  /* Aggressively release state now that we're finished */
+  if (self->samples->len)
+    g_ptr_array_remove_range (self->samples, 0, self->samples->len);
+  if (self->resolvers != NULL)
+    g_ptr_array_remove_range (self->resolvers, 0, self->resolvers->len);
+  g_string_chunk_clear (self->chunk);
+  g_hash_table_remove_all (self->lookasides);
+
+  return TRUE;
+}
+
+gboolean
+sysprof_symbol_map_deserialize (SysprofSymbolMap *self,
+                                gint              byte_order,
+                                gint              fd)
+{
+  gboolean needs_swap = byte_order != G_BYTE_ORDER;
+  gchar *beginptr;
+  gchar *endptr;
+
+  g_return_val_if_fail (self != NULL, FALSE);
+  g_return_val_if_fail (self->mapped == NULL, FALSE);
+
+  if (!(self->mapped = g_mapped_file_new_from_fd (fd, TRUE, NULL)))
+    return FALSE;
+
+  beginptr = g_mapped_file_get_contents (self->mapped);
+  endptr = beginptr + g_mapped_file_get_length (self->mapped);
+
+  for (gchar *ptr = beginptr;
+       ptr < endptr && (ptr + sizeof (Decoded)) < endptr;
+       ptr += sizeof (Decoded))
+    {
+      Decoded *sym = (Decoded *)ptr;
+
+      if (sym->addr_begin == 0 &&
+          sym->addr_end == 0 &&
+          sym->pid == 0 &&
+          sym->offset == 0)
+        {
+          self->symbols = (const Decoded *)beginptr;
+          self->n_symbols = sym - self->symbols;
+          break;
+        }
+      else if (needs_swap)
+        {
+          sym->addr_begin = GUINT64_SWAP_LE_BE (sym->addr_begin);
+          sym->addr_end = GUINT64_SWAP_LE_BE (sym->addr_end);
+          sym->pid = GUINT32_SWAP_LE_BE (sym->pid);
+          sym->offset = GUINT32_SWAP_LE_BE (sym->offset);
+          sym->tag_offset = GUINT32_SWAP_LE_BE (sym->tag_offset);
+        }
+
+#if 0
+      g_print ("Added pid=%d  begin=%p  end=%p\n",
+               sym->pid, (gpointer)sym->begin, (gpointer)sym->end);
+#endif
+    }
+
+  self->beginptr = beginptr;
+  self->endptr = endptr;
+
+  return TRUE;
+}
diff --git a/src/libsysprof/sysprof-symbol-map.h b/src/libsysprof/sysprof-symbol-map.h
new file mode 100644
index 0000000..2e482c2
--- /dev/null
+++ b/src/libsysprof/sysprof-symbol-map.h
@@ -0,0 +1,49 @@
+/* sysprof-symbol-map.h
+ *
+ * Copyright 2019 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include <sysprof-capture.h>
+
+#include "sysprof-symbol-resolver.h"
+
+G_BEGIN_DECLS
+
+typedef struct _SysprofSymbolMap SysprofSymbolMap;
+
+SysprofSymbolMap *sysprof_symbol_map_new          (void);
+void              sysprof_symbol_map_add_resolver (SysprofSymbolMap           *self,
+                                                   SysprofSymbolResolver      *resolver);
+void              sysprof_symbol_map_resolve      (SysprofSymbolMap           *self,
+                                                   SysprofCaptureReader       *reader);
+const gchar      *sysprof_symbol_map_lookup       (SysprofSymbolMap           *self,
+                                                   gint64                      time,
+                                                   gint32                      pid,
+                                                   SysprofCaptureAddress       addr,
+                                                   GQuark                     *tag);
+void              sysprof_symbol_map_printf       (SysprofSymbolMap           *self);
+gboolean          sysprof_symbol_map_serialize    (SysprofSymbolMap           *self,
+                                                   gint                        fd);
+gboolean          sysprof_symbol_map_deserialize  (SysprofSymbolMap           *self,
+                                                   gint                        byte_order,
+                                                   gint                        fd);
+void              sysprof_symbol_map_free         (SysprofSymbolMap           *self);
+
+G_END_DECLS
diff --git a/src/tests/test-addr-map.c b/src/tests/test-addr-map.c
new file mode 100644
index 0000000..c1867e0
--- /dev/null
+++ b/src/tests/test-addr-map.c
@@ -0,0 +1,105 @@
+#include <fcntl.h>
+#include <sysprof.h>
+
+#include "sysprof-platform.h"
+#include "sysprof-symbol-map.h"
+
+static GMainLoop *main_loop;
+
+static void *
+resolve_in_thread (gpointer data)
+{
+  SysprofCaptureReader *reader = data;
+  g_autoptr(SysprofSymbolResolver) kernel = NULL;
+  g_autoptr(SysprofSymbolResolver) elf = NULL;
+  SysprofCaptureFrameType type;
+  SysprofSymbolMap *map;
+  gboolean r;
+  int fd;
+
+  g_assert (reader != NULL);
+
+  map = sysprof_symbol_map_new ();
+  kernel = sysprof_kernel_symbol_resolver_new ();
+  elf = sysprof_elf_symbol_resolver_new ();
+
+  sysprof_symbol_map_add_resolver (map, kernel);
+  sysprof_symbol_map_add_resolver (map, elf);
+
+  sysprof_symbol_map_resolve (map, reader);
+
+  fd = sysprof_memfd_create ("decode-test");
+  g_assert_cmpint (fd, !=, -1);
+
+  r = sysprof_symbol_map_serialize (map, fd);
+  g_assert_true (r);
+  sysprof_symbol_map_free (map);
+
+  /* Reset some state */
+  sysprof_capture_reader_reset (reader);
+  lseek (fd, SEEK_SET, 0);
+
+  /* Now desrialize it */
+  map = sysprof_symbol_map_new ();
+  sysprof_symbol_map_deserialize (map, G_BYTE_ORDER, fd);
+
+  /* Now try to print some stack traces */
+  while (sysprof_capture_reader_peek_type (reader, &type))
+    {
+      if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
+        {
+          const SysprofCaptureSample *sample = NULL;
+
+          if (!(sample = sysprof_capture_reader_read_sample (reader)))
+            break;
+
+          for (guint j = 0; j < sample->n_addrs; j++)
+            {
+              const gchar *name;
+              GQuark tag;
+
+              if (!(name = sysprof_symbol_map_lookup (map, sample->frame.time, sample->frame.pid, 
sample->addrs[j], &tag)))
+                name = "Unknown symbol";
+
+              g_print ("%u: %s\n", j, name);
+            }
+
+          g_print ("======\n");
+        }
+      else if (!sysprof_capture_reader_skip (reader))
+        break;
+    }
+
+
+  sysprof_symbol_map_free (map);
+
+  close (fd);
+  g_main_loop_quit (main_loop);
+  return NULL;
+}
+
+gint
+main (gint argc,
+      gchar *argv[])
+{
+  g_autoptr(SysprofCaptureReader) reader = NULL;
+  g_autoptr(GError) error = NULL;
+
+  if (argc != 2)
+    {
+      g_printerr ("usage: %s CAPTURE_FILE\n", argv[0]);
+      return 1;
+    }
+
+  if (!(reader = sysprof_capture_reader_new (argv[1], &error)))
+    {
+      g_printerr ("%s\n", error->message);
+      return 1;
+    }
+
+  main_loop = g_main_loop_new (NULL, FALSE);
+  g_thread_new ("reader-thread", resolve_in_thread, reader);
+  g_main_loop_run (main_loop);
+
+  return 0;
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]