[nanny] Some SQL optimizations



commit 484d86e29151e9b8851dee9f022b21cf8d212813
Author: Roberto Majadas <roberto majadas openshine com>
Date:   Tue Dec 22 23:01:54 2009 +0100

    Some SQL optimizations

 client/common/src/DBusClient.py  |    2 +-
 daemon/nanny.tap                 |    5 ++
 daemon/src/proxy/Controllers.py  |   36 +++++++++--
 daemon/src/proxy/TwistedProxy.py |  126 +++++++++++++++++++++++++++++++-------
 4 files changed, 141 insertions(+), 28 deletions(-)
---
diff --git a/client/common/src/DBusClient.py b/client/common/src/DBusClient.py
index 643faec..2dbbae7 100644
--- a/client/common/src/DBusClient.py
+++ b/client/common/src/DBusClient.py
@@ -128,7 +128,7 @@ class DBusClient(gobject.GObject):
         return self.nanny_wcf.AddCustomFilter (user_id, color, name, description, url)
 
     def add_dansguardian_list (self, uid, name, description, list_url, reply_handler, error_handler):
-        self.nanny_wcf.AddDansGuardianList (uid, name, description, list_url, reply_handler=reply_handler, error_handler=error_handler)
+        self.nanny_wcf.AddDansGuardianList (uid, name, description, list_url, reply_handler=reply_handler, error_handler=error_handler, timeout=2000000)
 
     def check_web_access (self, uid, url):
         return self.nanny_wcf.CheckWebAccess (uid, url)
diff --git a/daemon/nanny.tap b/daemon/nanny.tap
index 4e71d29..f0ce897 100644
--- a/daemon/nanny.tap
+++ b/daemon/nanny.tap
@@ -20,7 +20,12 @@
 import nanny.daemon
 from twisted.application import internet, service
 
+
 application = service.Application('nanny')
 daemon = nanny.daemon.Daemon(application)
 
+from twisted.internet import reactor
+reactor.suggestThreadPoolSize(30)
+
+
 
diff --git a/daemon/src/proxy/Controllers.py b/daemon/src/proxy/Controllers.py
index 52d9293..c212085 100644
--- a/daemon/src/proxy/Controllers.py
+++ b/daemon/src/proxy/Controllers.py
@@ -147,10 +147,33 @@ class DansGuardianImporter(object):
                 else:
                     continue
 
+                regex_list = []
+                i = 0
                 for line in m_fd.readlines() :
                     regex = line.replace("\r","").replace("\n", "").replace(" ","")
                     if len(regex) > 0 :
-                        query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex)
+                        regex_list.append((regex.decode("iso8859-15".lower()),))
+
+                    if len(regex_list) >= 10000 :
+                        query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex_list)
+                        block_d = BlockingDeferred(query)
+                        reg_web_id = None
+                        try:
+                            reg_web_id = block_d.blockOn()
+                        except:
+                            print "Something wrong registering web regex : (%s, %s, %s)" % (len(regex_list), category, i)
+                        regex_list = []
+                        i = i + 1
+                    
+                
+                query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex_list)
+                block_d = BlockingDeferred(query)
+                reg_web_id = None
+                try:
+                    reg_web_id = block_d.blockOn()
+                except:
+                    print "Something wrong registering web regex : (%s, %s, %s)" % (len(regex_list), category, i)
+                
 
             print "Imported '%s' List" % name
             self.finished = True
@@ -171,11 +194,14 @@ class DansGuardianImporter(object):
         origin_id = ret[0][0]
         return origin_id
 
-    def __register_dg_website(self, txn, is_black, uid, origin_id, category, regex_type, regex):
-        body = "%" + regex + "%"
+    def __register_dg_website(self, txn, is_black, uid, origin_id, category, regex_type, regex_list):
+        if len(regex_list) == 0 :
+            return
 
-        sql="INSERT INTO Website ('is_black', 'uid', 'origin_id', 'category', 'type', 'body') VALUES (%s, '%s', %s, '%s', '%s', '%s')" % (int(is_black), uid, int(origin_id), category, regex_type, body)
-        txn.execute(sql)
+        #print "Registering %s websites (%s)" % (category, len(regex_list))
+        sql="INSERT INTO Website ('is_black', 'uid', 'origin_id', 'category', 'type', 'body') VALUES (%s, '%s', %s, '%s', '%s', ?)" % (int(is_black), uid, int(origin_id), category, regex_type)
+        txn.executemany(sql, regex_list)
+        
 
     def gotFailure(self, f):
         self.finished = True
diff --git a/daemon/src/proxy/TwistedProxy.py b/daemon/src/proxy/TwistedProxy.py
index 606b74d..3cb585d 100644
--- a/daemon/src/proxy/TwistedProxy.py
+++ b/daemon/src/proxy/TwistedProxy.py
@@ -27,15 +27,19 @@ from twisted.internet import reactor
 from twisted.web import proxy, resource, server
 from twisted.enterprise import adbapi
 from twisted.application import internet, service
-
+from twisted.python.lockfile import FilesystemLock, isLocked
+from twisted.internet.defer import DeferredFilesystemLock 
 import urlparse
 from urllib import quote as urlquote
 
 import os
-from tempfile import TemporaryFile
+from tempfile import TemporaryFile, gettempdir
+import time
 
 import Image, ImageDraw, ImageFilter
 
+from Controllers import BlockingDeferred
+
 BAD_WEB_TEMPLATE='''
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd";>
 <html>
@@ -106,7 +110,14 @@ class BadBoyResponseFilterImage(BadBoyResponseFilter) :
         im_format = im.format
 
         draw = ImageDraw.Draw(im)
-        draw.rectangle((0, 0) + im.size, fill="#FFFFFF")
+        try:
+            draw.rectangle((0, 0) + im.size, fill="#FFFFFF")
+        except:
+            try:
+                draw.rectangle((0, 0) + im.size, fill="255")
+            except:
+                pass
+            
         draw.line((0, 0) + im.size, fill=128, width=10)
         draw.line((0, im.size[1], im.size[0], 0), fill=128, width=10)
         del draw 
@@ -187,15 +198,50 @@ class ReverseProxyResource(resource.Resource) :
 
     proxyClientFactoryClass = ProxyClientFactory
 
-    def __init__(self, uid, dbpool, reactor=reactor):
+    def __init__(self, uid, dbpool, reactor=reactor, domain_level=0, pre_check=[False, False]):
         resource.Resource.__init__(self)
         self.reactor = reactor
         self.uid = uid
         self.url = ''
         self.dbpool = dbpool
+        self.domain_level = domain_level
+        self.pre_check = pre_check
+        self.domains_blocked_cache = {}
         
     def getChild(self, path, request):
-        return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor)
+        pre_check=[False, False]
+        host, port = self.__get_host_info(request)
+        if self.domain_level ==0 :
+            ts = reactor.seconds()
+            
+            if self.domains_blocked_cache.has_key(host) and ( ts - self.domains_blocked_cache[host][0] ) <= 120  :
+                print self.domains_blocked_cache[host][1]
+                block_d = BlockingDeferred(self.domains_blocked_cache[host][1])
+                try:
+                    pre_check = block_d.blockOn()
+                    print "Host %s , verified [cached] (pre_check=%s)" % (host, pre_check)
+                except:
+                    print "Something wrong validating domain %s" % host
+                    pre_check = [False, False]
+            else:
+                query = self.dbpool.runInteraction(self.__validate_site, host)
+                self.domains_blocked_cache[host]=[reactor.seconds(), query]
+                
+                block_d = BlockingDeferred(query)
+                try:
+                    pre_check = block_d.blockOn()
+                    print "Host %s , verified (pre_check=%s)" % (host, pre_check)
+                except:
+                    print "Something wrong validating domain %s" % host
+                    pre_check = [False, False]
+                
+            return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor,
+                                        domain_level=self.domain_level + 1,
+                                        pre_check=pre_check)
+        else:
+            return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor,
+                                        domain_level=self.domain_level + 1,
+                                        pre_check=self.pre_check)
 
     def render(self, request):
         host, port = self.__get_host_info(request)
@@ -212,32 +258,68 @@ class ReverseProxyResource(resource.Resource) :
             
         self.request = request
 
-	query = self.dbpool.runInteraction(self.__validate_uri, host, port, request, rest)
+        query = self.dbpool.runInteraction(self.__validate_uri, host, port, request, rest, self.pre_check)
         query.addCallback(self.__validate_request_cb)
 
         return server.NOT_DONE_YET
- 
-    def __validate_uri(self, txn, host, port, request, rest):
+
+    def __validate_site(self, txn, host):
         found = False
+        block_domain = False
+        may_block_url = False
+        
+        sql="SELECT * FROM Website WHERE uid = '%s' AND ((type = 'domain' AND '%s' GLOB body) OR (type = 'domain' AND '%s' GLOB '*.' || body) OR (type = 'url' AND body GLOB '%s'))" % (self.uid, host, host, "*" + host + "*")
+        txn.execute(sql)
+    	select = txn.fetchall()
+
+        if len(select) > 0 :
+            for web in select:
+                if web[1] == True and web[5] == "domain" :
+                    block_domain = True
+                    break
+            
+            for web in select:
+                if web[1] == True and web[5] == "url" :
+                    may_block_url = True
+                    break
+                
+            for web in select:
+                if web[1] == False and web[5] == "domain" :
+                    print "Domain WhiteListed : %s"  % host
+                    block_domain = False
+                    break
+            
+        return block_domain, may_block_url
+ 
+    def __validate_uri(self, txn, host, port, request, rest, pre_check):
+        if pre_check[0] == True :
+            print 'Uri Validation stopped because domain is blocked, %s' % (host + request.uri)
+            return False, request, rest, host, port
+
+        if pre_check[1] == False :
+            print 'Uri validation verified in pre-check %s' % (host + request.uri)
+            return True, request, rest, host, port
+    
         uri = host + request.uri
         is_ok = True
-        
-        sql="SELECT * FROM Website WHERE is_black = 0 AND uid = '%s' AND '%s' LIKE body" % (self.uid, uri)
+
+        sql="SELECT * FROM Website WHERE uid = '%s' AND type = 'url' AND '%s' GLOB '*' || body || '*' " % (self.uid, uri)
         txn.execute(sql)
     	select = txn.fetchall()
-        for web in select:
-            is_ok = True
-            found = True
-            break
-
-        if not found:
-            sql="SELECT * FROM Website WHERE is_black = 1 AND uid = '%s' AND '%s' LIKE body" % (self.uid, uri)
-            txn.execute(sql)
-            select = txn.fetchall()
+
+        if len(select) > 0 :
             for web in select:
-                print '    BLOCKING ENTRY WAS FOUND : ' + web[6]
-                is_ok = False
-                break
+                if web[1] == True :
+                    print 'Uri blocked : %s ' % (web[6])
+                    is_ok = False
+                    break
+            
+            for web in select:
+                if web[1] == False :
+                    print 'Uri WhiteListed : %s ' % (web[6])
+                    is_ok = False
+                    break
+            
         return is_ok, request, rest, host, port
 
     def __validate_request_cb(self, data):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]