[sysprof] libsysprof: add utility to build symbol maps
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [sysprof] libsysprof: add utility to build symbol maps
- Date: Wed, 29 May 2019 22:49:36 +0000 (UTC)
commit 7ffd3e41cfdcf57e27417291f2670a40566b9f52
Author: Christian Hergert <chergert redhat com>
Date: Tue May 28 19:04:54 2019 -0700
libsysprof: add utility to build symbol maps
These are useful to allow us to append symbol informatio to a capture file
using the existing symbol resolvers.
It can read/write a small format embedded within capture files so that
we can append them from the target machine rather than decoding from the
machine we run Sysprof UI on.
src/libsysprof/meson.build | 1 +
src/libsysprof/sysprof-symbol-map.c | 509 ++++++++++++++++++++++++++++++++++++
src/libsysprof/sysprof-symbol-map.h | 49 ++++
src/tests/test-addr-map.c | 105 ++++++++
4 files changed, 664 insertions(+)
---
diff --git a/src/libsysprof/meson.build b/src/libsysprof/meson.build
index 1d60a12..09aed85 100644
--- a/src/libsysprof/meson.build
+++ b/src/libsysprof/meson.build
@@ -53,6 +53,7 @@ libsysprof_private_sources = [
'sysprof-kallsyms.c',
'sysprof-line-reader.c',
'sysprof-map-lookaside.c',
+ 'sysprof-symbol-map.c',
ipc_service_src,
stackstash_sources,
helpers_sources,
diff --git a/src/libsysprof/sysprof-symbol-map.c b/src/libsysprof/sysprof-symbol-map.c
new file mode 100644
index 0000000..9d6b050
--- /dev/null
+++ b/src/libsysprof/sysprof-symbol-map.c
@@ -0,0 +1,509 @@
+/* sysprof-symbol-map.c
+ *
+ * Copyright 2019 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define G_LOG_DOMAIN "sysprof-symbol-map"
+
+#include "config.h"
+
+#include <unistd.h>
+
+#include "sysprof-map-lookaside.h"
+#include "sysprof-symbol-map.h"
+
+/*
+ * Because we can't rely on the address ranges of symbols from ELF files
+ * or elsewhere, we have to duplicate a lot of entries when building this
+ * so that we can resolve all of the corrent addresses.
+ */
+
+SYSPROF_ALIGNED_BEGIN(1)
+typedef struct
+{
+ SysprofCaptureAddress addr_begin;
+ SysprofCaptureAddress addr_end;
+ guint32 pid;
+ guint32 offset;
+ guint32 tag_offset;
+ guint32 padding;
+} Decoded
+SYSPROF_ALIGNED_END(1);
+
+struct _SysprofSymbolMap
+{
+ /* For creating maps */
+ GStringChunk *chunk;
+ GHashTable *lookasides;
+ GPtrArray *resolvers;
+ GPtrArray *samples;
+ guint resolved : 1;
+
+ /* For reading maps */
+ GMappedFile *mapped;
+ const Decoded *symbols;
+ gsize n_symbols;
+ const gchar *beginptr;
+ const gchar *endptr;
+};
+
+typedef struct
+{
+ SysprofCaptureAddress addr;
+ const gchar *name;
+ GQuark tag;
+ guint32 pid;
+} Element;
+
+static void
+element_free (Element *ele)
+{
+ g_slice_free (Element, ele);
+}
+
+static gint
+element_compare (gconstpointer a,
+ gconstpointer b)
+{
+ const Element *aa = *(const Element **)a;
+ const Element *bb = *(const Element **)b;
+
+ if (aa->pid < bb->pid)
+ return -1;
+
+ if (aa->pid > bb->pid)
+ return 1;
+
+ if (aa->addr < bb->addr)
+ return -1;
+
+ if (aa->addr > bb->addr)
+ return 1;
+
+ return 0;
+}
+
+static guint
+element_hash (gconstpointer data)
+{
+ const Element *ele = data;
+ struct {
+ guint32 a;
+ guint32 b;
+ } addr;
+
+ memcpy (&addr, &ele->addr, sizeof addr);
+ return addr.a ^ addr.b ^ ele->pid;
+}
+
+static gboolean
+element_equal (gconstpointer a,
+ gconstpointer b)
+{
+ const Element *aa = a;
+ const Element *bb = b;
+
+ return aa->pid == bb->pid && aa->addr == bb->addr;
+}
+
+SysprofSymbolMap *
+sysprof_symbol_map_new (void)
+{
+ SysprofSymbolMap *self;
+
+ self = g_slice_new0 (SysprofSymbolMap);
+ self->samples = g_ptr_array_new_with_free_func ((GDestroyNotify) element_free);
+ self->chunk = g_string_chunk_new (4096*16);
+ self->resolvers = g_ptr_array_new_with_free_func (g_object_unref);
+ self->lookasides = g_hash_table_new_full (NULL, NULL, NULL,
+ (GDestroyNotify) sysprof_map_lookaside_free);
+
+ return g_steal_pointer (&self);
+}
+
+void
+sysprof_symbol_map_free (SysprofSymbolMap *self)
+{
+ g_clear_pointer (&self->lookasides, g_hash_table_unref);
+ g_clear_pointer (&self->resolvers, g_ptr_array_unref);
+ g_clear_pointer (&self->chunk, g_string_chunk_free);
+ g_clear_pointer (&self->samples, g_ptr_array_unref);
+ g_clear_pointer (&self->mapped, g_mapped_file_unref);
+ g_slice_free (SysprofSymbolMap, self);
+}
+
+static gint
+search_for_symbol_cb (gconstpointer a,
+ gconstpointer b)
+{
+ const Decoded *key = a;
+ const Decoded *ele = b;
+
+ if (key->pid < ele->pid)
+ return -1;
+
+ if (key->pid > ele->pid)
+ return 1;
+
+ g_assert (key->pid == ele->pid);
+
+ if (key->addr_begin < ele->addr_begin)
+ return -1;
+
+ if (key->addr_begin > ele->addr_end)
+ return 1;
+
+ g_assert (key->addr_begin >= ele->addr_begin);
+ g_assert (key->addr_end <= ele->addr_end);
+
+ return 0;
+}
+
+const gchar *
+sysprof_symbol_map_lookup (SysprofSymbolMap *self,
+ gint64 time,
+ gint32 pid,
+ SysprofCaptureAddress addr,
+ GQuark *tag)
+{
+ const Decoded *ret;
+ const Decoded key = {
+ .addr_begin = addr,
+ .addr_end = addr,
+ .pid = pid,
+ .offset = 0,
+ .tag_offset = 0,
+ };
+
+ g_assert (self != NULL);
+
+ if (tag != NULL)
+ *tag = 0;
+
+ ret = bsearch (&key,
+ self->symbols,
+ self->n_symbols,
+ sizeof *ret,
+ search_for_symbol_cb);
+
+ if (ret == NULL)
+ return NULL;
+
+ if (tag != NULL && ret->tag_offset < (self->endptr - self->beginptr))
+ *tag = g_quark_from_string (&self->beginptr[ret->tag_offset]);
+
+ if (ret->offset < (self->endptr - self->beginptr))
+ return &self->beginptr[ret->offset];
+
+ return NULL;
+}
+
+void
+sysprof_symbol_map_add_resolver (SysprofSymbolMap *self,
+ SysprofSymbolResolver *resolver)
+{
+ g_assert (self != NULL);
+ g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver));
+
+ g_ptr_array_add (self->resolvers, g_object_ref (resolver));
+}
+
+static void
+sysprof_symbol_map_do_sample (SysprofSymbolMap *self,
+ SysprofCaptureReader *reader,
+ GHashTable *seen)
+{
+ SysprofAddressContext last_context = SYSPROF_ADDRESS_CONTEXT_NONE;
+ const SysprofCaptureSample *sample;
+
+ g_assert (self != NULL);
+ g_assert (reader != NULL);
+ g_assert (seen != NULL);
+
+ if (!(sample = sysprof_capture_reader_read_sample (reader)))
+ return;
+
+ for (guint i = 0; i < sample->n_addrs; i++)
+ {
+ SysprofCaptureAddress addr = sample->addrs[i];
+ SysprofAddressContext context;
+
+ if (sysprof_address_is_context_switch (addr, &context))
+ {
+ last_context = context;
+ continue;
+ }
+
+ for (guint j = 0; j < self->resolvers->len; j++)
+ {
+ SysprofSymbolResolver *resolver = g_ptr_array_index (self->resolvers, j);
+ g_autofree gchar *name = NULL;
+ const gchar *cname;
+ Element ele;
+ GQuark tag = 0;
+
+ name = sysprof_symbol_resolver_resolve_with_context (resolver,
+ sample->frame.time,
+ sample->frame.pid,
+ last_context,
+ addr,
+ &tag);
+
+ if (name == NULL)
+ continue;
+
+ cname = g_string_chunk_insert_const (self->chunk, name);
+
+ ele.addr = addr;
+ ele.pid = sample->frame.pid;
+ ele.name = cname;
+ ele.tag = tag;
+
+ if (!g_hash_table_contains (seen, &ele))
+ {
+ Element *cpy = g_slice_dup (Element, &ele);
+ g_hash_table_add (seen, cpy);
+ g_ptr_array_add (self->samples, cpy);
+ }
+ }
+ }
+}
+
+void
+sysprof_symbol_map_resolve (SysprofSymbolMap *self,
+ SysprofCaptureReader *reader)
+{
+ g_autoptr(GHashTable) seen = NULL;
+ SysprofCaptureFrameType type;
+
+ g_return_if_fail (self != NULL);
+ g_return_if_fail (self->resolved == FALSE);
+ g_return_if_fail (reader != NULL);
+
+ self->resolved = TRUE;
+
+ seen = g_hash_table_new (element_hash, element_equal);
+
+ sysprof_capture_reader_reset (reader);
+
+ for (guint i = 0; i < self->resolvers->len; i++)
+ {
+ sysprof_symbol_resolver_load (g_ptr_array_index (self->resolvers, i), reader);
+ sysprof_capture_reader_reset (reader);
+ }
+
+ while (sysprof_capture_reader_peek_type (reader, &type))
+ {
+ if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
+ {
+ sysprof_symbol_map_do_sample (self, reader, seen);
+ continue;
+ }
+
+ if (!sysprof_capture_reader_skip (reader))
+ break;
+ }
+
+ g_ptr_array_sort (self->samples, element_compare);
+}
+
+void
+sysprof_symbol_map_printf (SysprofSymbolMap *self)
+{
+ g_return_if_fail (self != NULL);
+ g_return_if_fail (self->samples != NULL);
+
+ for (guint i = 0; i < self->samples->len; i++)
+ {
+ Element *ele = g_ptr_array_index (self->samples, i);
+
+ if (ele->tag)
+ g_print ("%-5d: %p: %s [%s]\n", ele->pid, (gpointer)ele->addr, ele->name, g_quark_to_string
(ele->tag));
+ else
+ g_print ("%-5d: %p: %s\n", ele->pid, (gpointer)ele->addr, ele->name);
+ }
+}
+
+static guint
+get_string_offset (GByteArray *ar,
+ GHashTable *seen,
+ const gchar *str)
+{
+ gpointer ret;
+
+ if G_UNLIKELY (!g_hash_table_lookup_extended (seen, str, NULL, &ret))
+ {
+ ret = GUINT_TO_POINTER (ar->len);
+ g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
+ g_hash_table_insert (seen, (gpointer)str, ret);
+ }
+
+ return GPOINTER_TO_UINT (ret);
+}
+
+gboolean
+sysprof_symbol_map_serialize (SysprofSymbolMap *self,
+ gint fd)
+{
+ static const Decoded empty = {0};
+ SysprofCaptureAddress begin = 0;
+ g_autoptr(GByteArray) ar = NULL;
+ g_autoptr(GHashTable) seen = NULL;
+ g_autoptr(GArray) decoded = NULL;
+ gsize offset;
+
+ g_assert (self != NULL);
+ g_assert (fd != -1);
+
+ ar = g_byte_array_new ();
+ seen = g_hash_table_new (NULL, NULL);
+ decoded = g_array_new (FALSE, FALSE, sizeof (Decoded));
+
+ /* Add some empty space to both give us non-zero offsets and also ensure
+ * empty space between data.
+ */
+ g_byte_array_append (ar, (guint8 *)&empty, sizeof empty);
+
+ for (guint i = 0; i < self->samples->len; i++)
+ {
+ Element *ele = g_ptr_array_index (self->samples, i);
+
+ if (!g_hash_table_contains (seen, ele->name))
+ {
+ const gchar *str = ele->name;
+ gpointer ptr = GUINT_TO_POINTER (ar->len);
+ g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
+ g_hash_table_insert (seen, (gpointer)str, ptr);
+ }
+ }
+
+ for (guint i = 0; i < self->samples->len; i++)
+ {
+ Element *ele = g_ptr_array_index (self->samples, i);
+ Decoded dec;
+
+ if (begin == 0)
+ begin = ele->addr;
+
+ if ((i + 1) < self->samples->len)
+ {
+ Element *next = g_ptr_array_index (self->samples, i + 1);
+
+ if (ele->pid == next->pid && ele->name == next->name)
+ continue;
+ }
+
+ dec.padding = 0;
+ dec.addr_begin = begin;
+ dec.addr_end = ele->addr;
+ dec.pid = ele->pid;
+ dec.offset = get_string_offset (ar, seen, ele->name);
+
+ if (ele->tag)
+ dec.tag_offset = get_string_offset (ar, seen, g_quark_to_string (ele->tag));
+ else
+ dec.tag_offset = 0;
+
+ g_array_append_val (decoded, dec);
+
+ begin = 0;
+ }
+
+ offset = sizeof empty * decoded->len;
+
+ for (guint i = 0; i < decoded->len; i++)
+ {
+ Decoded *dec = &g_array_index (decoded, Decoded, i);
+
+ if (dec->offset)
+ dec->offset += offset;
+
+ if (dec->tag_offset)
+ dec->tag_offset += offset;
+ }
+
+ if (write (fd, decoded->data, offset) != offset)
+ return FALSE;
+
+ if (write (fd, ar->data, ar->len) != ar->len)
+ return FALSE;
+
+ /* Aggressively release state now that we're finished */
+ if (self->samples->len)
+ g_ptr_array_remove_range (self->samples, 0, self->samples->len);
+ if (self->resolvers != NULL)
+ g_ptr_array_remove_range (self->resolvers, 0, self->resolvers->len);
+ g_string_chunk_clear (self->chunk);
+ g_hash_table_remove_all (self->lookasides);
+
+ return TRUE;
+}
+
+gboolean
+sysprof_symbol_map_deserialize (SysprofSymbolMap *self,
+ gint byte_order,
+ gint fd)
+{
+ gboolean needs_swap = byte_order != G_BYTE_ORDER;
+ gchar *beginptr;
+ gchar *endptr;
+
+ g_return_val_if_fail (self != NULL, FALSE);
+ g_return_val_if_fail (self->mapped == NULL, FALSE);
+
+ if (!(self->mapped = g_mapped_file_new_from_fd (fd, TRUE, NULL)))
+ return FALSE;
+
+ beginptr = g_mapped_file_get_contents (self->mapped);
+ endptr = beginptr + g_mapped_file_get_length (self->mapped);
+
+ for (gchar *ptr = beginptr;
+ ptr < endptr && (ptr + sizeof (Decoded)) < endptr;
+ ptr += sizeof (Decoded))
+ {
+ Decoded *sym = (Decoded *)ptr;
+
+ if (sym->addr_begin == 0 &&
+ sym->addr_end == 0 &&
+ sym->pid == 0 &&
+ sym->offset == 0)
+ {
+ self->symbols = (const Decoded *)beginptr;
+ self->n_symbols = sym - self->symbols;
+ break;
+ }
+ else if (needs_swap)
+ {
+ sym->addr_begin = GUINT64_SWAP_LE_BE (sym->addr_begin);
+ sym->addr_end = GUINT64_SWAP_LE_BE (sym->addr_end);
+ sym->pid = GUINT32_SWAP_LE_BE (sym->pid);
+ sym->offset = GUINT32_SWAP_LE_BE (sym->offset);
+ sym->tag_offset = GUINT32_SWAP_LE_BE (sym->tag_offset);
+ }
+
+#if 0
+ g_print ("Added pid=%d begin=%p end=%p\n",
+ sym->pid, (gpointer)sym->begin, (gpointer)sym->end);
+#endif
+ }
+
+ self->beginptr = beginptr;
+ self->endptr = endptr;
+
+ return TRUE;
+}
diff --git a/src/libsysprof/sysprof-symbol-map.h b/src/libsysprof/sysprof-symbol-map.h
new file mode 100644
index 0000000..2e482c2
--- /dev/null
+++ b/src/libsysprof/sysprof-symbol-map.h
@@ -0,0 +1,49 @@
+/* sysprof-symbol-map.h
+ *
+ * Copyright 2019 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include <sysprof-capture.h>
+
+#include "sysprof-symbol-resolver.h"
+
+G_BEGIN_DECLS
+
+typedef struct _SysprofSymbolMap SysprofSymbolMap;
+
+SysprofSymbolMap *sysprof_symbol_map_new (void);
+void sysprof_symbol_map_add_resolver (SysprofSymbolMap *self,
+ SysprofSymbolResolver *resolver);
+void sysprof_symbol_map_resolve (SysprofSymbolMap *self,
+ SysprofCaptureReader *reader);
+const gchar *sysprof_symbol_map_lookup (SysprofSymbolMap *self,
+ gint64 time,
+ gint32 pid,
+ SysprofCaptureAddress addr,
+ GQuark *tag);
+void sysprof_symbol_map_printf (SysprofSymbolMap *self);
+gboolean sysprof_symbol_map_serialize (SysprofSymbolMap *self,
+ gint fd);
+gboolean sysprof_symbol_map_deserialize (SysprofSymbolMap *self,
+ gint byte_order,
+ gint fd);
+void sysprof_symbol_map_free (SysprofSymbolMap *self);
+
+G_END_DECLS
diff --git a/src/tests/test-addr-map.c b/src/tests/test-addr-map.c
new file mode 100644
index 0000000..c1867e0
--- /dev/null
+++ b/src/tests/test-addr-map.c
@@ -0,0 +1,105 @@
+#include <fcntl.h>
+#include <sysprof.h>
+
+#include "sysprof-platform.h"
+#include "sysprof-symbol-map.h"
+
+static GMainLoop *main_loop;
+
+static void *
+resolve_in_thread (gpointer data)
+{
+ SysprofCaptureReader *reader = data;
+ g_autoptr(SysprofSymbolResolver) kernel = NULL;
+ g_autoptr(SysprofSymbolResolver) elf = NULL;
+ SysprofCaptureFrameType type;
+ SysprofSymbolMap *map;
+ gboolean r;
+ int fd;
+
+ g_assert (reader != NULL);
+
+ map = sysprof_symbol_map_new ();
+ kernel = sysprof_kernel_symbol_resolver_new ();
+ elf = sysprof_elf_symbol_resolver_new ();
+
+ sysprof_symbol_map_add_resolver (map, kernel);
+ sysprof_symbol_map_add_resolver (map, elf);
+
+ sysprof_symbol_map_resolve (map, reader);
+
+ fd = sysprof_memfd_create ("decode-test");
+ g_assert_cmpint (fd, !=, -1);
+
+ r = sysprof_symbol_map_serialize (map, fd);
+ g_assert_true (r);
+ sysprof_symbol_map_free (map);
+
+ /* Reset some state */
+ sysprof_capture_reader_reset (reader);
+ lseek (fd, SEEK_SET, 0);
+
+ /* Now desrialize it */
+ map = sysprof_symbol_map_new ();
+ sysprof_symbol_map_deserialize (map, G_BYTE_ORDER, fd);
+
+ /* Now try to print some stack traces */
+ while (sysprof_capture_reader_peek_type (reader, &type))
+ {
+ if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
+ {
+ const SysprofCaptureSample *sample = NULL;
+
+ if (!(sample = sysprof_capture_reader_read_sample (reader)))
+ break;
+
+ for (guint j = 0; j < sample->n_addrs; j++)
+ {
+ const gchar *name;
+ GQuark tag;
+
+ if (!(name = sysprof_symbol_map_lookup (map, sample->frame.time, sample->frame.pid,
sample->addrs[j], &tag)))
+ name = "Unknown symbol";
+
+ g_print ("%u: %s\n", j, name);
+ }
+
+ g_print ("======\n");
+ }
+ else if (!sysprof_capture_reader_skip (reader))
+ break;
+ }
+
+
+ sysprof_symbol_map_free (map);
+
+ close (fd);
+ g_main_loop_quit (main_loop);
+ return NULL;
+}
+
+gint
+main (gint argc,
+ gchar *argv[])
+{
+ g_autoptr(SysprofCaptureReader) reader = NULL;
+ g_autoptr(GError) error = NULL;
+
+ if (argc != 2)
+ {
+ g_printerr ("usage: %s CAPTURE_FILE\n", argv[0]);
+ return 1;
+ }
+
+ if (!(reader = sysprof_capture_reader_new (argv[1], &error)))
+ {
+ g_printerr ("%s\n", error->message);
+ return 1;
+ }
+
+ main_loop = g_main_loop_new (NULL, FALSE);
+ g_thread_new ("reader-thread", resolve_in_thread, reader);
+ g_main_loop_run (main_loop);
+
+ return 0;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]