[tracker/wip/carlosg/benchmark: 2/2] utils: Add benchmark utility
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/benchmark: 2/2] utils: Add benchmark utility
- Date: Sun, 27 Feb 2022 15:31:47 +0000 (UTC)
commit edb307abca84dee35e9edb5dbdcf31bb78e775d2
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Feb 27 16:19:18 2022 +0100
utils: Add benchmark utility
This executable is only built in-tree and not installed, mostly
useful so developers can check performance improvements and
regressions.
For reference and future baseline, the output is:
```
$ ./utils/benchmark/tracker-benchmark
Batch size: 5000, Individual test duration: 30 sec
Opening in-memory database…
Test Elements Elems/sec Min Max
Avg
Resource batch update (sync) 1016148,459 33871,615 27,302 usec 40,300 usec
29,523 usec
SPARQL batch update (sync) 365763,206 12192,107 76,743 usec 93,013 usec
82,020 usec
Prepared statement query (sync) 1931876,356 64395,879 13,000 usec 5,158 msec
15,529 usec
SPARQL query (sync) 220222,325 7340,744 123,000 usec 1,350 msec
136,226 usec
```
utils/benchmark/meson.build | 4 +
utils/benchmark/tracker-benchmark.c | 430 ++++++++++++++++++++++++++++++++++++
utils/meson.build | 1 +
3 files changed, 435 insertions(+)
---
diff --git a/utils/benchmark/meson.build b/utils/benchmark/meson.build
new file mode 100644
index 000000000..134dc67cc
--- /dev/null
+++ b/utils/benchmark/meson.build
@@ -0,0 +1,4 @@
+executable('tracker-benchmark',
+ 'tracker-benchmark.c',
+ dependencies: [tracker_sparql_dep],
+ install: false)
diff --git a/utils/benchmark/tracker-benchmark.c b/utils/benchmark/tracker-benchmark.c
new file mode 100644
index 000000000..aede7d303
--- /dev/null
+++ b/utils/benchmark/tracker-benchmark.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright (C) 2022, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <locale.h>
+#include <glib.h>
+#include <stdio.h>
+
+static gchar *database_path = NULL;
+static gint batch_size = 5000;
+static gint duration = 30;
+
+static GOptionEntry entries[] = {
+ { "database", 'p', 0, G_OPTION_ARG_FILENAME, &database_path,
+ "Location of the database",
+ "FILE"
+ },
+ { "batch-size", 'b', 0, G_OPTION_ARG_INT, &batch_size,
+ "Update batch size",
+ "SIZE"
+ },
+ { "duration", 'd', 0, G_OPTION_ARG_INT, &duration,
+ "Duration of individual benchmarks",
+ "DURATION"
+ },
+ { NULL }
+};
+
+typedef gpointer (*DataCreateFunc) (void);
+
+typedef void (*BenchmarkFunc) (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ double *elapsed,
+ int *elems,
+ double *min,
+ double *max);
+
+enum {
+ UNIT_SEC,
+ UNIT_MSEC,
+ UNIT_USEC,
+};
+
+static inline int
+get_unit (gdouble value)
+{
+ /* Below msec, report in usecs */
+ if (value < 0.001)
+ return UNIT_USEC;
+ else if (value < 1)
+ return UNIT_MSEC;
+
+ return UNIT_SEC;
+}
+
+static gdouble
+transform_unit (gdouble value)
+{
+ int unit = get_unit (value);
+
+ switch (unit) {
+ case UNIT_USEC:
+ return value * G_USEC_PER_SEC;
+ case UNIT_MSEC:
+ return value * 1000;
+ case UNIT_SEC:
+ return value;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static const gchar *
+unit_string (gdouble value)
+{
+ int unit = get_unit (value);
+
+ switch (unit) {
+ case UNIT_USEC:
+ return "usec";
+ case UNIT_MSEC:
+ return "msec";
+ case UNIT_SEC:
+ return "sec";
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static inline gpointer
+create_resource (void)
+{
+ TrackerResource *resource;
+
+ resource = tracker_resource_new (NULL);
+ tracker_resource_set_uri (resource, "rdf:type", "rdfs:Resource");
+
+ return resource;
+}
+
+static inline gpointer
+create_query (void)
+{
+ return g_strdup ("SELECT ?u { ?u a rdfs:Resource } limit 1");
+}
+
+static inline TrackerBatch *
+create_batch (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ gboolean sparql)
+{
+ TrackerBatch *batch;
+ TrackerResource *resource;
+ int i;
+
+ batch = tracker_sparql_connection_create_batch (conn);
+
+ for (i = 0; i < batch_size; i++) {
+ resource = data_func ();
+
+ if (sparql) {
+ gchar *sparql;
+
+ sparql = tracker_resource_print_sparql_update (resource,
+
tracker_sparql_connection_get_namespace_manager (conn),
+ NULL);
+ tracker_batch_add_sparql (batch, sparql);
+ g_free (sparql);
+ } else {
+ tracker_batch_add_resource (batch, NULL, resource);
+ }
+
+ g_object_unref (resource);
+ }
+
+ return batch;
+}
+
+static int
+consume_cursor (TrackerSparqlCursor *cursor)
+{
+ GError *error = NULL;
+ int magic = 0;
+
+ while (tracker_sparql_cursor_next (cursor, NULL, &error)) {
+ const gchar *str;
+
+ /* Some bit fiddling so the loop is not optimized out */
+ str = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+ magic ^= str[0] == 'h';
+ }
+
+ tracker_sparql_cursor_close (cursor);
+
+ return magic;
+}
+
+static void
+benchmark_update_batch (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ double *elapsed,
+ int *elems,
+ double *min,
+ double *max)
+{
+ GTimer *timer;
+ GError *error = NULL;
+
+ timer = g_timer_new ();
+
+ while (*elapsed < duration) {
+ TrackerBatch *batch;
+ double batch_elapsed;
+
+ g_timer_reset (timer);
+ batch = create_batch (conn, data_func, FALSE);
+ tracker_batch_execute (batch, NULL, &error);
+ g_assert_no_error (error);
+ g_object_unref (batch);
+
+ batch_elapsed = g_timer_elapsed (timer, NULL);
+ *min = MIN (*min, batch_elapsed);
+ *max = MAX (*max, batch_elapsed);
+ *elapsed += batch_elapsed;
+ *elems += 1;
+ }
+
+ /* We count things by resources, not batches */
+ *min /= batch_size;
+ *max /= batch_size;
+ *elems *= batch_size;
+
+ g_timer_destroy (timer);
+}
+
+static void
+benchmark_update_sparql (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ double *elapsed,
+ int *elems,
+ double *min,
+ double *max)
+{
+ GTimer *timer;
+ GError *error = NULL;
+
+ timer = g_timer_new ();
+
+ while (*elapsed < duration) {
+ TrackerBatch *batch;
+ double batch_elapsed;
+
+ batch = create_batch (conn, data_func, TRUE);
+ tracker_batch_execute (batch, NULL, &error);
+ g_assert_no_error (error);
+ g_object_unref (batch);
+
+ batch_elapsed = g_timer_elapsed (timer, NULL);
+ *min = MIN (*min, batch_elapsed);
+ *max = MAX (*max, batch_elapsed);
+ *elapsed += batch_elapsed;
+ *elems += 1;
+ g_timer_reset (timer);
+ }
+
+ /* We count things by resources, not batches */
+ *min /= batch_size;
+ *max /= batch_size;
+ *elems *= batch_size;
+
+ g_timer_destroy (timer);
+}
+
+static void
+benchmark_query_statement (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ double *elapsed,
+ int *elems,
+ double *min,
+ double *max)
+{
+ TrackerSparqlStatement *stmt;
+ GTimer *timer;
+ GError *error = NULL;
+ gchar *query;
+
+ timer = g_timer_new ();
+ query = data_func ();
+ stmt = tracker_sparql_connection_query_statement (conn, query,
+ NULL, &error);
+ g_assert_no_error (error);
+ g_free (query);
+
+ while (*elapsed < duration) {
+ TrackerSparqlCursor *cursor;
+ double query_elapsed;
+
+ cursor = tracker_sparql_statement_execute (stmt, NULL, &error);
+ g_assert_no_error (error);
+ consume_cursor (cursor);
+ g_object_unref (cursor);
+
+ query_elapsed = g_timer_elapsed (timer, NULL);
+ *min = MIN (*min, query_elapsed);
+ *max = MAX (*max, query_elapsed);
+ *elapsed += query_elapsed;
+ *elems += 1;
+ g_timer_reset (timer);
+ }
+
+ g_object_unref (stmt);
+ g_timer_destroy (timer);
+}
+
+static void
+benchmark_query_sparql (TrackerSparqlConnection *conn,
+ DataCreateFunc data_func,
+ double *elapsed,
+ int *elems,
+ double *min,
+ double *max)
+{
+ GTimer *timer;
+ GError *error = NULL;
+ gchar *query;
+
+ timer = g_timer_new ();
+ query = data_func ();
+
+ while (*elapsed < duration) {
+ TrackerSparqlCursor *cursor;
+ double query_elapsed;
+
+ cursor = tracker_sparql_connection_query (conn, query,
+ NULL, &error);
+ g_assert_no_error (error);
+ consume_cursor (cursor);
+ g_object_unref (cursor);
+
+ query_elapsed = g_timer_elapsed (timer, NULL);
+ *min = MIN (*min, query_elapsed);
+ *max = MAX (*max, query_elapsed);
+ *elapsed += query_elapsed;
+ *elems += 1;
+ g_timer_reset (timer);
+ }
+
+ g_timer_destroy (timer);
+ g_free (query);
+}
+
+struct {
+ const gchar *desc;
+ BenchmarkFunc func;
+ DataCreateFunc data_func;
+} benchmarks[] = {
+ { "Resource batch update (sync)", benchmark_update_batch, create_resource },
+ { "SPARQL batch update (sync)", benchmark_update_sparql, create_resource },
+ { "Prepared statement query (sync)", benchmark_query_statement, create_query },
+ { "SPARQL query (sync)", benchmark_query_sparql, create_query },
+};
+
+static void
+run_benchmarks (TrackerSparqlConnection *conn)
+{
+ guint i;
+ guint max_len = 0;
+
+ for (i = 0; i < G_N_ELEMENTS (benchmarks); i++)
+ max_len = MAX (max_len, strlen (benchmarks[i].desc));
+
+ g_print ("%*s\t\tElements\tElems/sec\tMin \tMax \tAvg\n",
+ max_len, "Test");
+
+ for (i = 0; i < G_N_ELEMENTS (benchmarks); i++) {
+ double elapsed = 0, min = G_MAXDOUBLE, max = -G_MAXDOUBLE, adjusted, avg;
+ int elems = 0;
+
+ benchmarks[i].func (conn, benchmarks[i].data_func,
+ &elapsed, &elems, &min, &max);
+
+ if (elapsed > duration) {
+ /* To avoid explaining how long did the benchmark
+ * actually take to run. Adjust the output to the
+ * specified time limit.
+ */
+ adjusted = elems * ((double) duration / elapsed);
+ } else {
+ adjusted = elems;
+ }
+
+ avg = elapsed / elems;
+ g_print ("%*s\t\t%.3f\t%.3f\t%.3f %s\t%.3f %s\t%3.3f %s\n",
+ max_len, benchmarks[i].desc,
+ adjusted,
+ elems / elapsed,
+ transform_unit (min), unit_string (min),
+ transform_unit (max), unit_string (max),
+ transform_unit (avg), unit_string (avg));
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ TrackerSparqlConnection *conn;
+ GOptionContext *context;
+ GError *error = NULL;
+ GFile *db = NULL;
+
+ setlocale (LC_ALL, "");
+
+ context = g_option_context_new (NULL);
+ g_option_context_add_main_entries (context, entries, NULL);
+
+ if (!g_option_context_parse (context, &argc, (char***) &argv, &error)) {
+ g_printerr ("%s, %s\n", "Unrecognized options", error->message);
+ g_error_free (error);
+ g_option_context_free (context);
+ return EXIT_FAILURE;
+ }
+
+ g_option_context_free (context);
+
+ g_print ("Batch size: %d, Individual test duration: %d sec\n",
+ batch_size, duration);
+
+ if (database_path) {
+ if (g_file_test (database_path, G_FILE_TEST_EXISTS)) {
+ g_printerr ("Database path '%s' already exists", database_path);
+ return EXIT_FAILURE;
+ }
+
+ g_print ("Opening file database at '%s'…\n",
+ database_path);
+ db = g_file_new_for_commandline_arg (database_path);
+ } else {
+ g_print ("Opening in-memory database…\n");
+ }
+
+ conn = tracker_sparql_connection_new (0, db,
+ tracker_sparql_get_ontology_nepomuk(),
+ NULL, &error);
+ g_assert_no_error (error);
+
+ run_benchmarks (conn);
+
+ g_object_unref (conn);
+ g_clear_object (&db);
+
+ return EXIT_SUCCESS;
+}
diff --git a/utils/meson.build b/utils/meson.build
index d571a582b..a13458910 100644
--- a/utils/meson.build
+++ b/utils/meson.build
@@ -1,3 +1,4 @@
+subdir('benchmark')
subdir('mtp')
subdir('tracker-resdump')
subdir('trackertestutils')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]