Re: [patch NetworkManager v2 6/6] nm-device-team: spawn teamd for team device connection



Wed, Jul 24, 2013 at 07:23:51PM CEST, dcbw redhat com wrote:
On Wed, 2013-07-24 at 07:37 +0200, Jiri Pirko wrote:
Wed, Jul 24, 2013 at 01:53:56AM CEST, dcbw redhat com wrote:
On Mon, 2013-07-15 at 09:46 +0200, Jiri Pirko wrote:
Signed-off-by: Jiri Pirko <jiri resnulli us>
---
 src/devices/nm-device-team.c | 283 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 282 insertions(+), 1 deletion(-)

diff --git a/src/devices/nm-device-team.c b/src/devices/nm-device-team.c
index 320b659..ac0467c 100644
--- a/src/devices/nm-device-team.c
+++ b/src/devices/nm-device-team.c
@@ -20,8 +20,13 @@
 
 #include "config.h"
 
+#include <sys/types.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/wait.h>
 #include <glib.h>
 #include <glib/gi18n.h>
+#include <gio/gio.h>
 
 #include <netinet/ether.h>
 
@@ -34,6 +39,7 @@
 #include "nm-dbus-glib-types.h"
 #include "nm-dbus-manager.h"
 #include "nm-enum-types.h"
+#include "nm-posix-signals.h"
 
 #include "nm-device-team-glue.h"
 
@@ -45,7 +51,11 @@ G_DEFINE_TYPE (NMDeviceTeam, nm_device_team, NM_TYPE_DEVICE)
 #define NM_TEAM_ERROR (nm_team_error_quark ())
 
 typedef struct {
- int dummy;
+ GPid teamd_pid;
+ guint teamd_process_watch;
+ guint teamd_timeout;
+ guint teamd_dbus_watch;
+ gboolean teamd_on_dbus;
 } NMDeviceTeamPrivate;
 
 enum {
@@ -179,9 +189,267 @@ match_l2_config (NMDevice *self, NMConnection *connection)
 
 /******************************************************************/
 
+typedef struct {
+ int pid;
+} KillInfo;
+
+static gboolean
+ensure_killed (gpointer data)
+{
+ KillInfo *info = data;
+
+ if (kill (info->pid, 0) == 0)
+         kill (info->pid, SIGKILL);
+
+ /* ensure the child is reaped */
+ nm_log_dbg (LOGD_TEAM, "waiting for teamd pid %d to exit", info->pid);
+ waitpid (info->pid, NULL, 0);
+ nm_log_dbg (LOGD_TEAM, "teamd pid %d cleaned up", info->pid);
+
+ g_free (info);
+ return FALSE;
+}
+
+static void
+service_kill (int pid)
+{
+ if (kill (pid, SIGTERM) == 0) {
+         KillInfo *info;
+
+         info = g_malloc0 (sizeof (KillInfo));
+         info->pid = pid;

You don't actually need the 'info' thing here, since the pid will always
be a 32-bit integer (AFAIK?), you can use GINT_TO_POINTER (pid) and skip
all the allocation/free stuff.  So it would really end up as:

static gboolean
ensure_killed (gpointer data)
{
   int pid = GPOINTER_TO_INT (data);

   kill (pid);
   ...
   return FALSE;
}

static void
service_kill (...)
{
   if (kill (pid, SIGTERM)) {
       g_timeout_add_seconds (2, ensure_killed, GINT_TO_POINTER (pid));
   }
   ...
}

You are right that structure is not needed per say. But even with int, I
would have to do malloc/free anyway (because ensure_killed() is called
when original pid variable is not available anymore)

Actually you still don't  need to malloc/free, because the user_data
pointer is stored by glib.  Since the PID here is just a value, and not
a pointer to anything, we don't need to worry about malloc/free at all.

Now, if we wanted to pass multiple values to ensure_killed(), yeah we'd
need a struct and we'd need to malloc/free it, but as long as we're
using a value that can fit into a pointer, malloc/free isn't required.


Okay. Yeah, you are right. I misread the code. Will fix this.


(more below)

I always like to wrap this in struct, seems nicer to me.



+         g_timeout_add_seconds (2, ensure_killed, info);
+ }
+ else {
+         kill (pid, SIGKILL);
+
+         /* ensure the child is reaped */
+         nm_log_dbg (LOGD_TEAM, "waiting for teamd pid %d to exit", pid);
+         waitpid (pid, NULL, 0);
+         nm_log_dbg (LOGD_TEAM, "teamd pid %d cleaned up", pid);
+ }
+}
+
+static void
+teamd_timeout_remove (NMDevice *dev)
+{
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+
+ if (priv->teamd_timeout) {
+         g_source_remove (priv->teamd_timeout);
+         priv->teamd_timeout = 0;
+ }
+}
+
+static void
+teamd_cleanup (NMDevice *dev)
+{
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+
+ if (priv->teamd_dbus_watch) {
+         g_source_remove (priv->teamd_dbus_watch);
+         priv->teamd_dbus_watch = 0;
+ }
+
+ if (priv->teamd_process_watch) {
+         g_source_remove (priv->teamd_process_watch);
+         priv->teamd_process_watch = 0;
+ }
+
+ if (priv->teamd_pid > 0) {
+         service_kill (priv->teamd_pid);
+         priv->teamd_pid = 0;
+ }
+
+ teamd_timeout_remove (dev);
+
+ priv->teamd_on_dbus = FALSE;
+}
+
+static gboolean
+teamd_timeout_cb (gpointer user_data)
+{
+ NMDevice *dev = NM_DEVICE (user_data);
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+
+ if (priv->teamd_timeout) {
+         nm_log_info (LOGD_TEAM, "(%s): teamd timed out.", nm_device_get_iface (dev));
+         teamd_cleanup (dev);
+ }
+
+ return FALSE;
+}
+
+static void
+teamd_dbus_appeared (GDBusConnection *connection,
+                     const gchar *name,
+                     const gchar *name_owner,
+                     gpointer user_data)
+{
+ NMDevice *dev = NM_DEVICE (user_data);
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+
+ if (!priv->teamd_dbus_watch)
+         return;
+
+ nm_log_info (LOGD_TEAM, "(%s): teamd appeared on D-Bus", nm_device_get_iface (dev));
+ priv->teamd_on_dbus = FALSE;
+ teamd_timeout_remove (dev);
+ nm_device_activate_schedule_stage2_device_config (dev);
+}
+
+static void
+teamd_dbus_vanished (GDBusConnection *connection,
+                     const gchar *name,
+                     gpointer user_data)
+{
+ NMDevice *dev = NM_DEVICE (user_data);
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+ NMDeviceState state;
+
+ if (!priv->teamd_dbus_watch || !priv->teamd_on_dbus)
+         return;
+ nm_log_info (LOGD_TEAM, "(%s): teamd vanished from D-Bus", nm_device_get_iface (dev));
+ teamd_cleanup (dev);
+
+ state = nm_device_get_state (dev);
+ if (nm_device_is_activating (dev) || (state == NM_DEVICE_STATE_ACTIVATED))
+         nm_device_state_changed (dev, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_NONE);
+}
+
+static void
+teamd_process_watch_cb (GPid pid, gint status, gpointer user_data)
+{
+ NMDevice *dev = NM_DEVICE (user_data);
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+ NMDeviceState state;
+
+ nm_log_info (LOGD_TEAM, "(%s): teamd died", nm_device_get_iface (dev));
+ priv->teamd_pid = 0;
+ teamd_cleanup (dev);
+
+ state = nm_device_get_state (dev);
+ if (nm_device_is_activating (dev) || (state == NM_DEVICE_STATE_ACTIVATED))
+         nm_device_state_changed (dev, NM_DEVICE_STATE_FAILED, NM_DEVICE_STATE_REASON_NONE);
+}
+
+static void
+teamd_child_setup (gpointer user_data G_GNUC_UNUSED)
+{
+ /* We are in the child process at this point.
+  * Give child it's own program group for signal
+  * separation.
+  */
+ pid_t pid = getpid ();
+ setpgid (pid, pid);
+
+ /*
+  * We blocked signals in main(). We need to restore original signal
+  * mask for avahi-autoipd here so that it can receive signals.
+  */
+ nm_unblock_posix_signals (NULL);
+}
+
+static gboolean
+teamd_start (NMDevice *dev, NMSettingTeam *s_team, NMDeviceTeamPrivate *priv)
+{
+ const char *iface = nm_device_get_ip_iface (dev);
+ char *tmp_str;
+ const char *config;
+ const char **teamd_binary = NULL;
+ static const char *teamd_paths[] = {
+         "/usr/bin/teamd",
+         "/usr/local/bin/teamd",
+         NULL
+ };
+ GPtrArray *argv;
+ GError *error = NULL;
+ gboolean ret;
+
+ teamd_binary = teamd_paths;
+ while (*teamd_binary != NULL) {
+         if (g_file_test (*teamd_binary, G_FILE_TEST_EXISTS))
+                 break;
+         teamd_binary++;
+ }
+
+ if (!*teamd_binary) {
+         nm_log_warn (LOGD_TEAM,
+                      "Activation (%s) to start teamd: not found", iface);
+         return FALSE;
+ }
+
+ argv = g_ptr_array_new ();
+ g_ptr_array_add (argv, (gpointer) *teamd_binary);
+ g_ptr_array_add (argv, (gpointer) "-o");
+ g_ptr_array_add (argv, (gpointer) "-n");
+ g_ptr_array_add (argv, (gpointer) "-U");
+ g_ptr_array_add (argv, (gpointer) "-D");
+ g_ptr_array_add (argv, (gpointer) "-t");
+ g_ptr_array_add (argv, (gpointer) iface);
+
+ config = nm_setting_team_get_config(s_team);
+ if (config) {
+         g_ptr_array_add (argv, (gpointer) "-c");
+         g_ptr_array_add (argv, (gpointer) config);
+ }
+
+ if (nm_logging_level_enabled (LOGL_DEBUG))
+         g_ptr_array_add (argv, (gpointer) "-gg");
+ g_ptr_array_add (argv, NULL);
+
+ tmp_str = g_strjoinv (" ", (gchar **) argv->pdata);
+ nm_log_dbg (LOGD_TEAM, "running: %s", tmp_str);
+ g_free (tmp_str);
+
+ /* Start a timeout for teamd to appear at D-Bus */
+ priv->teamd_timeout = g_timeout_add_seconds (5, teamd_timeout_cb, dev);
+
+ /* Register D-Bus name watcher */
+ tmp_str = g_strdup_printf ("org.libteam.teamd.%s", iface);

Huh, so what if I run "ip link set dev team0 name foobar0"?  Does teamd
drop its bus name and acquire a new name, which wouldn't play at all
well with clients, or does it just continue with the old bus name?
That's the problem with running a daemon-per-interface without some
central hub handling lookups for you...

The name will not change. User should not need to do this. Renaming is
primary for implicitly named devices, which team is not.


Or does the team driver simply prevent device renaming in the kernel?

+ priv->teamd_dbus_watch = g_bus_watch_name (G_BUS_TYPE_SYSTEM,
+                                            tmp_str,
+                                            G_BUS_NAME_WATCHER_FLAGS_NONE,
+                                            teamd_dbus_appeared,
+                                            teamd_dbus_vanished,
+                                            dev,
+                                            NULL);
+ g_free (tmp_str);
+
+ ret = g_spawn_async ("/", (char **) argv->pdata, NULL, G_SPAWN_DO_NOT_REAP_CHILD,
+                     &teamd_child_setup, NULL, &priv->teamd_pid, &error);
+ g_ptr_array_free (argv, TRUE);
+ if (!ret) {
+         nm_log_warn (LOGD_TEAM,
+                      "Activation (%s) failed to start teamd: %s",
+                      iface, error->message);
+         g_clear_error (&error);
+         return FALSE;
+ }
+
+ /* Monitor the child process so we know when it dies */
+ priv->teamd_process_watch = g_child_watch_add (priv->teamd_pid,
+                                                teamd_process_watch_cb,
+                                                dev);
+
+ nm_log_info (LOGD_TEAM,
+              "Activation (%s) started teamd...", iface);
+ return TRUE;
+}
+
+static void
+teamd_stop (NMDevice *dev, NMDeviceTeamPrivate *priv)
+{
+ g_return_if_fail (priv->teamd_pid > 0);
+ nm_log_info (LOGD_TEAM, "Deactivation (%s) stopping teamd...",
+              nm_device_get_ip_iface (dev));
+ teamd_cleanup (dev);
+}
+
 static NMActStageReturn
 act_stage1_prepare (NMDevice *dev, NMDeviceStateReason *reason)
 {
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
  NMActStageReturn ret = NM_ACT_STAGE_RETURN_SUCCESS;
  NMConnection *connection;
  NMSettingTeam *s_team;
@@ -194,10 +462,22 @@ act_stage1_prepare (NMDevice *dev, NMDeviceStateReason *reason)
          g_assert (connection);
          s_team = nm_connection_get_setting_team (connection);
          g_assert (s_team);
+         if (teamd_start (dev, s_team, priv))
+                 ret = NM_ACT_STAGE_RETURN_POSTPONE;
+         else
+                 ret = NM_ACT_STAGE_RETURN_FAILURE;

So in the future, if teamd is already started for an interface and NM
wants to assume that interface non-destructively, is there a way to set
the options that NM usually runs teamd with, eg "-o -n -U -D -t" and
then also grab the configuration as a JSON blob over D-Bus?  We'll want
to do that at some point this year I think.  Also, same situation if NM
crashes for some reason, we'd want to take the interface over again when
NM gets restarted without doing anything destructive, and that would
involve finding the existing teamd process via D-Bus and reading the
configuration back from it.

Yes, this I want to implement soon


  }
  return ret;
 }
 
+static void
+deactivate (NMDevice *dev)
+{
+ NMDeviceTeamPrivate *priv = NM_DEVICE_TEAM_GET_PRIVATE (dev);
+
+ teamd_stop (dev, priv);
+}
+
 static gboolean
 enslave_slave (NMDevice *device, NMDevice *slave, NMConnection *connection)
 {
@@ -334,6 +614,7 @@ nm_device_team_class_init (NMDeviceTeamClass *klass)
  parent_class->match_l2_config = match_l2_config;
 
  parent_class->act_stage1_prepare = act_stage1_prepare;
+ parent_class->deactivate = deactivate;
  parent_class->enslave_slave = enslave_slave;
  parent_class->release_slave = release_slave;
 

Would be best to also call teamd_cleanup() from the dispose() method of
NMDeviceTeam just to ensure that if the device is freed, that everything
gets cleaned up.

This one should get done too, just to make sure we don't have any
use-after-free if the callbacks don't get removed for some reason.


Okay, will fix this and repost


Thanks for review!


Dan

Other than that, looks good.  I'm still somewhat uncomfortable with the
JSON configuration blob, but I'm not sure there's much we can do about
it at this point.  At least it's structured and can be validated, but it
makes that much more work for clients since they then have to go about
parsing the blob or linking to libteam or something, which means support
for libteam has to be a compile-time decision instead of a runtime
one...

Dan





[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]