[evolution-patches] patch for bug 45504




i think his system is running out of thread resources ... but *shrug* not sure.  this patch forces all dns lookups through a finite number of worker threads rather than having it unbounded.

side effect: cancellation of dns lookups will become less async than it was (i think, i'm not sure the pthread_cancel stuff cancelled it anyway).
real solution: we probably need to use a decent async dns library, if one exists.

i'm not even sure this patch will address the problem, and he doesn't build from source, so i'm not sure how we can test it, and it doesn't seem to happen elsewhere but on his multi-user system.  I dont know how much effort a custom build is - if we can just put it in, let it filter through to the snapshots (are they being built for 7.3?), and retract it later, perhaps ...

Michael

Index: ChangeLog
===================================================================
RCS file: /cvs/gnome/evolution/camel/ChangeLog,v
retrieving revision 1.1895
diff -u -3 -r1.1895 ChangeLog
--- ChangeLog	25 Sep 2003 15:16:49 -0000	1.1895
+++ ChangeLog	8 Oct 2003 03:15:51 -0000
@@ -1,3 +1,15 @@
+2003-10-08  Not Zed  <NotZed Ximian com>
+
+	** See Bug #45504
+
+	* camel-service.c (run_gethostbyx): new thread dispatcher which
+	just consumes tasks on the lookup port.
+	(get_hostbyx): generic lookup handler that handles cancellation.
+	(camel_gethostbyname, camel_gethostbyaddr): Changed to use stuff
+	above, sets up a request message and lets gethostbyx run it.
+	(init_gethostbyx): setup threads for dns lookups,
+	CAMEL_DNS_THREADS allows the number to be altered.
+
 2003-09-25  Jeffrey Stedfast  <fejj ximian com>
 
 	* providers/smtp/camel-smtp-transport.c (smtp_helo): If the
Index: camel-service.c
===================================================================
RCS file: /cvs/gnome/evolution/camel/camel-service.c,v
retrieving revision 1.85
diff -u -3 -r1.85 camel-service.c
--- camel-service.c	11 Aug 2003 20:41:32 -0000	1.85
+++ camel-service.c	8 Oct 2003 03:15:53 -0000
@@ -682,9 +682,16 @@
 #define STRUCT_OFFSET(type, field)        ((gint) ((gchar*) &((type *) 0)->field))
 #endif
 
+enum {
+	LOOKUP_QUIT,
+	LOOKUP_HOSTBYNAME,
+	LOOKUP_HOSTBYADDR,
+};
+
 struct _lookup_msg {
 	EMsg msg;
 	unsigned int cancelled:1;
+	unsigned int op:2;
 	const char *name;
 	int len;
 	int type;
@@ -695,67 +702,112 @@
 	char *hostbufmem;
 };
 
+static pthread_once_t gethostbyx_once = PTHREAD_ONCE_INIT;
+static EMsgPort *gethostbyx_port;
+static int gethostbyx_threaded = FALSE;
+
+static void
+run_gethostbyx_lookup(struct _lookup_msg *info)
+{
+	do {
+		if (info->op == LOOKUP_HOSTBYNAME)
+			info->result = e_gethostbyname_r(info->name, &info->hostbuf, info->hostbufmem, info->hostbuflen, &info->herr);
+		else
+			info->result = e_gethostbyaddr_r(info->name, info->len, info->type, &info->hostbuf, info->hostbufmem, info->hostbuflen, &info->herr);
+		
+		if (info->result == ERANGE) {
+			info->hostbuflen *= 2;
+			info->hostbufmem = g_realloc(info->hostbufmem, info->hostbuflen);
+		}
+	} while (info->result == ERANGE && !info->cancelled);
+}
+
 static void *
-get_hostbyname(void *data)
+run_gethostbyx(void *data)
 {
-	struct _lookup_msg *info = data;
+	int more = TRUE;
 
-	while ((info->result = e_gethostbyname_r(info->name, &info->hostbuf, info->hostbufmem, info->hostbuflen, &info->herr)) == ERANGE) {
-		d(printf("gethostbyname fialed?\n"));
-		pthread_testcancel();
-                info->hostbuflen *= 2;
-                info->hostbufmem = g_realloc(info->hostbufmem, info->hostbuflen);
-	}
+	/* This never quits actually ... */
+	do {
+		struct _lookup_msg *msg;
+
+		e_msgport_wait(gethostbyx_port);
+		msg = (struct _lookup_msg *)e_msgport_get(gethostbyx_port);
+		if (msg == NULL)
+			continue;
+
+		if (!msg->cancelled)
+			run_gethostbyx_lookup(msg);
+
+		if (msg->cancelled) {
+			g_free(msg->hostbufmem);
+			g_free(msg);
+		} else {
+			e_msgport_reply((EMsg *)msg);
+		}
+	} while (more);
 
-	d(printf("gethostbyname ok?\n"));
-	
-	/* If we got cancelled, dont reply, just free it */
-	if (info->cancelled) {
-		g_free(info->hostbufmem);
-		g_free(info);
-	} else {
-		e_msgport_reply((EMsg *)info);
-	}
-	
 	return NULL;
 }
 
-struct hostent *
-camel_gethostbyname (const char *name, CamelException *exout)
+static void
+init_gethostbyx(void)
 {
-	int fdmax, status, fd, cancel_fd;
-	struct _lookup_msg *msg;
-	CamelException ex;
-	
-	g_return_val_if_fail(name != NULL, NULL);
-	
-	if (camel_operation_cancel_check(NULL)) {
-		camel_exception_set (exout, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
-		return NULL;
+	pthread_t id;
+	int count = 3, i;
+	char *tmp;
+
+	gethostbyx_port = e_msgport_new();
+
+	tmp = getenv("CAMEL_DNS_THREADS");
+	if (tmp && tmp[0]) {
+		count = MAX(atoi(tmp), 1);
+		g_warning("Overriding CAMEL_DNS_THREADS: %d threads for dns lookups", count);
+	}
+
+	/* We could create as many threads as we want here, they're auto-setup as consumers by e_msgport */
+	for (i=0;i<count;i++) {
+		if (pthread_create(&id, NULL, run_gethostbyx, NULL) != 0 && i == 0) {
+			g_warning("Could not create async dns thread, host lookups will block");
+			return;
+		}
 	}
 
-	camel_exception_init(&ex);
-	camel_operation_start_transient(NULL, _("Resolving: %s"), name);
+	gethostbyx_threaded = TRUE;
+}
 
-	msg = g_malloc0(sizeof(*msg));
-	msg->hostbuflen = 1024;
-	msg->hostbufmem = g_malloc(msg->hostbuflen);
-	msg->name = name;
-	msg->result = -1;
+static struct hostent *
+get_hostbyx(struct _lookup_msg *msg, CamelException *exout)
+{
+	CamelException ex;
+
+	/* This routine is used to serialise name lookups, and make them 'cancellable', in practice,
+	   we can't cancel, but we just throw away the result.  The problem is that if one name lookup
+	   is taking an inordinate amount of time, any new lookups will have to wait till that
+	   one times out.  A sort of workaround is to setup more than one lookup thread ... */
+
+	pthread_once(&gethostbyx_once, init_gethostbyx);
 	
-	cancel_fd = camel_operation_cancel_fd(NULL);
-	if (cancel_fd == -1) {
-		get_hostbyname(msg);
-	} else {
+	camel_exception_init(&ex);
+
+	if (msg->op == LOOKUP_HOSTBYNAME)
+		camel_operation_start_transient(NULL, _("Resolving: %s"), msg->name);
+	else
+		camel_operation_start_transient(NULL, _("Resolving address"));
+
+	if (gethostbyx_threaded) {
+		int fdmax, status, fd, cancel_fd;
 		EMsgPort *reply_port;
-		pthread_t id;
 		fd_set rdset;
-		int err;
 
 		reply_port = msg->msg.reply_port = e_msgport_new();
-		fd = e_msgport_fd(msg->msg.reply_port);
-		if ((err = pthread_create(&id, NULL, get_hostbyname, msg)) == 0) {
-			d(printf("waiting for name return/cancellation in main process\n"));
+		cancel_fd = camel_operation_cancel_fd(NULL);
+		if (cancel_fd != -1)
+			fd = e_msgport_fd(msg->msg.reply_port);
+
+		e_msgport_put(gethostbyx_port, (EMsg *)msg);
+
+		if (cancel_fd != -1) {
 			do {
 				FD_ZERO(&rdset);
 				FD_SET(cancel_fd, &rdset);
@@ -770,28 +822,26 @@
 				else
 					camel_exception_setv(&ex, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
 
-				/* We cancel so if the thread impl is decent it causes immediate exit.
-				   We detach so we dont need to wait for it to exit if it isn't.
-				   We check the reply port incase we had a reply in the mean time, which we free later */
-				d(printf("Cancelling lookup thread and leaving it\n"));
 				msg->cancelled = 1;
-				pthread_detach(id);
-				pthread_cancel(id);
+				/* we could leak the message here, but its not very likely, and only small and harmless */
 				msg = (struct _lookup_msg *)e_msgport_get(reply_port);
 			} else {
 				struct _lookup_msg *reply = (struct _lookup_msg *)e_msgport_get(reply_port);
 
 				g_assert(reply == msg);
-				d(printf("waiting for child to exit\n"));
-				pthread_join(id, NULL);
-				d(printf("child done\n"));
 			}
 		} else {
-			camel_exception_setv(&ex, CAMEL_EXCEPTION_SYSTEM, _("Host lookup failed: cannot create thread: %s"), g_strerror(err));
+			struct _lookup_msg *reply;
+
+			e_msgport_wait(reply_port);
+			reply = (struct _lookup_msg *)e_msgport_get(reply_port);
+			g_assert(reply == msg);
 		}
 		e_msgport_destroy(reply_port);
+	} else {
+		run_gethostbyx_lookup(msg);
 	}
-	
+
 	camel_operation_end(NULL);
 
 	if (!camel_exception_is_set(&ex)) {
@@ -800,10 +850,10 @@
 
 		if (msg->herr == HOST_NOT_FOUND || msg->herr == NO_DATA)
 			camel_exception_setv (&ex, CAMEL_EXCEPTION_SYSTEM,
-					      _("Host lookup failed: %s: host not found"), name);
+					      _("Host lookup failed: host not found"));
 		else
 			camel_exception_setv (&ex, CAMEL_EXCEPTION_SYSTEM,
-					      _("Host lookup failed: %s: unknown reason"), name);
+					      _("Host lookup failed: unknown reason"));
 	}
 
 	if (msg) {
@@ -816,130 +866,50 @@
 	return NULL;
 }
 
-static void *
-get_hostbyaddr (void *data)
+struct hostent *
+camel_gethostbyname (const char *name, CamelException *exout)
 {
-	struct _lookup_msg *info = data;
-	
-	while ((info->result = e_gethostbyaddr_r (info->name, info->len, info->type, &info->hostbuf,
-						  info->hostbufmem, info->hostbuflen, &info->herr)) == ERANGE) {
-		d(printf ("gethostbyaddr fialed?\n"));
-		pthread_testcancel ();
-                info->hostbuflen *= 2;
-                info->hostbufmem = g_realloc (info->hostbufmem, info->hostbuflen);
-	}
+	struct _lookup_msg *msg;
 	
-	d(printf ("gethostbyaddr ok?\n"));
+	g_return_val_if_fail(name != NULL, NULL);
 	
-	if (info->cancelled) {
-		g_free(info->hostbufmem);
-		g_free(info);
-	} else {
-		e_msgport_reply((EMsg *)info);
+	if (camel_operation_cancel_check(NULL)) {
+		camel_exception_set(exout, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
+		return NULL;
 	}
-	
-	return NULL;
-}
 
+	msg = g_malloc0(sizeof(*msg));
+	msg->hostbuflen = 1024;
+	msg->hostbufmem = g_malloc(msg->hostbuflen);
+	msg->name = name;
+	msg->result = -1;
+	msg->op = LOOKUP_HOSTBYNAME;
+
+	return get_hostbyx(msg, exout);
+}
 
 struct hostent *
 camel_gethostbyaddr (const char *addr, int len, int type, CamelException *exout)
 {
-	int fdmax, status, fd, cancel_fd;
 	struct _lookup_msg *msg;
-	CamelException ex;
 
 	g_return_val_if_fail (addr != NULL, NULL);
 	
-	if (camel_operation_cancel_check (NULL)) {
-		camel_exception_set (exout, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
+	if (camel_operation_cancel_check(NULL)) {
+		camel_exception_set(exout, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
 		return NULL;
 	}
 
-	camel_exception_init(&ex);
-	camel_operation_start_transient (NULL, _("Resolving address"));
-	
-	msg = g_malloc0 (sizeof (struct _lookup_msg));
+	msg = g_malloc0(sizeof(struct _lookup_msg));
 	msg->hostbuflen = 1024;
-	msg->hostbufmem = g_malloc (msg->hostbuflen);
+	msg->hostbufmem = g_malloc(msg->hostbuflen);
 	msg->name = addr;
 	msg->len = len;
 	msg->type = type;
 	msg->result = -1;
-	
-	cancel_fd = camel_operation_cancel_fd (NULL);
-	if (cancel_fd == -1) {
-		get_hostbyaddr (msg);
-	} else {
-		EMsgPort *reply_port;
-		pthread_t id;
-		fd_set rdset;
-		int err;
-
-		reply_port = msg->msg.reply_port = e_msgport_new ();
-		fd = e_msgport_fd (msg->msg.reply_port);
-		if ((err = pthread_create (&id, NULL, get_hostbyaddr, msg)) == 0) {
-			d(printf("waiting for name return/cancellation in main process\n"));
-			do {
-				FD_ZERO(&rdset);
-				FD_SET(cancel_fd, &rdset);
-				FD_SET(fd, &rdset);
-				fdmax = MAX(fd, cancel_fd) + 1;
-				status = select (fdmax, &rdset, NULL, 0, NULL);
-			} while (status == -1 && errno == EINTR);
-			
-			if (status == -1 || FD_ISSET(cancel_fd, &rdset)) {
-				if (status == -1)
-					camel_exception_setv(&ex, CAMEL_EXCEPTION_SYSTEM, _("Failure in name lookup: %s"), g_strerror(errno));
-				else
-					camel_exception_setv(&ex, CAMEL_EXCEPTION_USER_CANCEL, _("Cancelled"));
-
-				/* We cancel so if the thread impl is decent it causes immediate exit.
-				   We detach so we dont need to wait for it to exit if it isn't.
-				   We check the reply port incase we had a reply in the mean time, which we free later */
-				d(printf("Cancelling lookup thread and leaving it\n"));
-				msg->cancelled = 1;
-				pthread_detach(id);
-				pthread_cancel(id);
-				msg = (struct _lookup_msg *)e_msgport_get(reply_port);
-			} else {
-				struct _lookup_msg *reply = (struct _lookup_msg *)e_msgport_get(reply_port);
-
-				g_assert(reply == msg);
-				d(printf("waiting for child to exit\n"));
-				pthread_join(id, NULL);
-				d(printf("child done\n"));
-			}
-		} else {
-			camel_exception_setv(&ex, CAMEL_EXCEPTION_SYSTEM, _("Host lookup failed: cannot create thread: %s"), g_strerror(err));
-		}
-
-		
-		e_msgport_destroy (reply_port);
-	}
-	
-	camel_operation_end (NULL);
-	
-	if (!camel_exception_is_set(&ex)) {
-		if (msg->result == 0)
-			return &msg->hostbuf;
+	msg->op = LOOKUP_HOSTBYADDR;
 
-		if (msg->herr == HOST_NOT_FOUND || msg->herr == NO_DATA)
-			camel_exception_setv (&ex, CAMEL_EXCEPTION_SYSTEM,
-					      _("Host lookup failed: host not found"));
-		else
-			camel_exception_setv (&ex, CAMEL_EXCEPTION_SYSTEM,
-					      _("Host lookup failed: unknown reason"));
-	}
-
-	if (msg) {
-		g_free(msg->hostbufmem);
-		g_free(msg);
-	}
-
-	camel_exception_xfer(exout, &ex);
-
-	return NULL;
+	return get_hostbyx(msg, exout);
 }
 
 void camel_free_host(struct hostent *h)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]