[nanny] Some SQL optimizations
- From: Roberto Majadas <telemaco src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [nanny] Some SQL optimizations
- Date: Tue, 22 Dec 2009 23:33:38 +0000 (UTC)
commit 484d86e29151e9b8851dee9f022b21cf8d212813
Author: Roberto Majadas <roberto majadas openshine com>
Date: Tue Dec 22 23:01:54 2009 +0100
Some SQL optimizations
client/common/src/DBusClient.py | 2 +-
daemon/nanny.tap | 5 ++
daemon/src/proxy/Controllers.py | 36 +++++++++--
daemon/src/proxy/TwistedProxy.py | 126 +++++++++++++++++++++++++++++++-------
4 files changed, 141 insertions(+), 28 deletions(-)
---
diff --git a/client/common/src/DBusClient.py b/client/common/src/DBusClient.py
index 643faec..2dbbae7 100644
--- a/client/common/src/DBusClient.py
+++ b/client/common/src/DBusClient.py
@@ -128,7 +128,7 @@ class DBusClient(gobject.GObject):
return self.nanny_wcf.AddCustomFilter (user_id, color, name, description, url)
def add_dansguardian_list (self, uid, name, description, list_url, reply_handler, error_handler):
- self.nanny_wcf.AddDansGuardianList (uid, name, description, list_url, reply_handler=reply_handler, error_handler=error_handler)
+ self.nanny_wcf.AddDansGuardianList (uid, name, description, list_url, reply_handler=reply_handler, error_handler=error_handler, timeout=2000000)
def check_web_access (self, uid, url):
return self.nanny_wcf.CheckWebAccess (uid, url)
diff --git a/daemon/nanny.tap b/daemon/nanny.tap
index 4e71d29..f0ce897 100644
--- a/daemon/nanny.tap
+++ b/daemon/nanny.tap
@@ -20,7 +20,12 @@
import nanny.daemon
from twisted.application import internet, service
+
application = service.Application('nanny')
daemon = nanny.daemon.Daemon(application)
+from twisted.internet import reactor
+reactor.suggestThreadPoolSize(30)
+
+
diff --git a/daemon/src/proxy/Controllers.py b/daemon/src/proxy/Controllers.py
index 52d9293..c212085 100644
--- a/daemon/src/proxy/Controllers.py
+++ b/daemon/src/proxy/Controllers.py
@@ -147,10 +147,33 @@ class DansGuardianImporter(object):
else:
continue
+ regex_list = []
+ i = 0
for line in m_fd.readlines() :
regex = line.replace("\r","").replace("\n", "").replace(" ","")
if len(regex) > 0 :
- query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex)
+ regex_list.append((regex.decode("iso8859-15".lower()),))
+
+ if len(regex_list) >= 10000 :
+ query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex_list)
+ block_d = BlockingDeferred(query)
+ reg_web_id = None
+ try:
+ reg_web_id = block_d.blockOn()
+ except:
+ print "Something wrong registering web regex : (%s, %s, %s)" % (len(regex_list), category, i)
+ regex_list = []
+ i = i + 1
+
+
+ query = self.dbpool.runInteraction(self.__register_dg_website , is_black, uid, origin_id, category, regex_type, regex_list)
+ block_d = BlockingDeferred(query)
+ reg_web_id = None
+ try:
+ reg_web_id = block_d.blockOn()
+ except:
+ print "Something wrong registering web regex : (%s, %s, %s)" % (len(regex_list), category, i)
+
print "Imported '%s' List" % name
self.finished = True
@@ -171,11 +194,14 @@ class DansGuardianImporter(object):
origin_id = ret[0][0]
return origin_id
- def __register_dg_website(self, txn, is_black, uid, origin_id, category, regex_type, regex):
- body = "%" + regex + "%"
+ def __register_dg_website(self, txn, is_black, uid, origin_id, category, regex_type, regex_list):
+ if len(regex_list) == 0 :
+ return
- sql="INSERT INTO Website ('is_black', 'uid', 'origin_id', 'category', 'type', 'body') VALUES (%s, '%s', %s, '%s', '%s', '%s')" % (int(is_black), uid, int(origin_id), category, regex_type, body)
- txn.execute(sql)
+ #print "Registering %s websites (%s)" % (category, len(regex_list))
+ sql="INSERT INTO Website ('is_black', 'uid', 'origin_id', 'category', 'type', 'body') VALUES (%s, '%s', %s, '%s', '%s', ?)" % (int(is_black), uid, int(origin_id), category, regex_type)
+ txn.executemany(sql, regex_list)
+
def gotFailure(self, f):
self.finished = True
diff --git a/daemon/src/proxy/TwistedProxy.py b/daemon/src/proxy/TwistedProxy.py
index 606b74d..3cb585d 100644
--- a/daemon/src/proxy/TwistedProxy.py
+++ b/daemon/src/proxy/TwistedProxy.py
@@ -27,15 +27,19 @@ from twisted.internet import reactor
from twisted.web import proxy, resource, server
from twisted.enterprise import adbapi
from twisted.application import internet, service
-
+from twisted.python.lockfile import FilesystemLock, isLocked
+from twisted.internet.defer import DeferredFilesystemLock
import urlparse
from urllib import quote as urlquote
import os
-from tempfile import TemporaryFile
+from tempfile import TemporaryFile, gettempdir
+import time
import Image, ImageDraw, ImageFilter
+from Controllers import BlockingDeferred
+
BAD_WEB_TEMPLATE='''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
@@ -106,7 +110,14 @@ class BadBoyResponseFilterImage(BadBoyResponseFilter) :
im_format = im.format
draw = ImageDraw.Draw(im)
- draw.rectangle((0, 0) + im.size, fill="#FFFFFF")
+ try:
+ draw.rectangle((0, 0) + im.size, fill="#FFFFFF")
+ except:
+ try:
+ draw.rectangle((0, 0) + im.size, fill="255")
+ except:
+ pass
+
draw.line((0, 0) + im.size, fill=128, width=10)
draw.line((0, im.size[1], im.size[0], 0), fill=128, width=10)
del draw
@@ -187,15 +198,50 @@ class ReverseProxyResource(resource.Resource) :
proxyClientFactoryClass = ProxyClientFactory
- def __init__(self, uid, dbpool, reactor=reactor):
+ def __init__(self, uid, dbpool, reactor=reactor, domain_level=0, pre_check=[False, False]):
resource.Resource.__init__(self)
self.reactor = reactor
self.uid = uid
self.url = ''
self.dbpool = dbpool
+ self.domain_level = domain_level
+ self.pre_check = pre_check
+ self.domains_blocked_cache = {}
def getChild(self, path, request):
- return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor)
+ pre_check=[False, False]
+ host, port = self.__get_host_info(request)
+ if self.domain_level ==0 :
+ ts = reactor.seconds()
+
+ if self.domains_blocked_cache.has_key(host) and ( ts - self.domains_blocked_cache[host][0] ) <= 120 :
+ print self.domains_blocked_cache[host][1]
+ block_d = BlockingDeferred(self.domains_blocked_cache[host][1])
+ try:
+ pre_check = block_d.blockOn()
+ print "Host %s , verified [cached] (pre_check=%s)" % (host, pre_check)
+ except:
+ print "Something wrong validating domain %s" % host
+ pre_check = [False, False]
+ else:
+ query = self.dbpool.runInteraction(self.__validate_site, host)
+ self.domains_blocked_cache[host]=[reactor.seconds(), query]
+
+ block_d = BlockingDeferred(query)
+ try:
+ pre_check = block_d.blockOn()
+ print "Host %s , verified (pre_check=%s)" % (host, pre_check)
+ except:
+ print "Something wrong validating domain %s" % host
+ pre_check = [False, False]
+
+ return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor,
+ domain_level=self.domain_level + 1,
+ pre_check=pre_check)
+ else:
+ return ReverseProxyResource(self.uid, self.dbpool, reactor=reactor,
+ domain_level=self.domain_level + 1,
+ pre_check=self.pre_check)
def render(self, request):
host, port = self.__get_host_info(request)
@@ -212,32 +258,68 @@ class ReverseProxyResource(resource.Resource) :
self.request = request
- query = self.dbpool.runInteraction(self.__validate_uri, host, port, request, rest)
+ query = self.dbpool.runInteraction(self.__validate_uri, host, port, request, rest, self.pre_check)
query.addCallback(self.__validate_request_cb)
return server.NOT_DONE_YET
-
- def __validate_uri(self, txn, host, port, request, rest):
+
+ def __validate_site(self, txn, host):
found = False
+ block_domain = False
+ may_block_url = False
+
+ sql="SELECT * FROM Website WHERE uid = '%s' AND ((type = 'domain' AND '%s' GLOB body) OR (type = 'domain' AND '%s' GLOB '*.' || body) OR (type = 'url' AND body GLOB '%s'))" % (self.uid, host, host, "*" + host + "*")
+ txn.execute(sql)
+ select = txn.fetchall()
+
+ if len(select) > 0 :
+ for web in select:
+ if web[1] == True and web[5] == "domain" :
+ block_domain = True
+ break
+
+ for web in select:
+ if web[1] == True and web[5] == "url" :
+ may_block_url = True
+ break
+
+ for web in select:
+ if web[1] == False and web[5] == "domain" :
+ print "Domain WhiteListed : %s" % host
+ block_domain = False
+ break
+
+ return block_domain, may_block_url
+
+ def __validate_uri(self, txn, host, port, request, rest, pre_check):
+ if pre_check[0] == True :
+ print 'Uri Validation stopped because domain is blocked, %s' % (host + request.uri)
+ return False, request, rest, host, port
+
+ if pre_check[1] == False :
+ print 'Uri validation verified in pre-check %s' % (host + request.uri)
+ return True, request, rest, host, port
+
uri = host + request.uri
is_ok = True
-
- sql="SELECT * FROM Website WHERE is_black = 0 AND uid = '%s' AND '%s' LIKE body" % (self.uid, uri)
+
+ sql="SELECT * FROM Website WHERE uid = '%s' AND type = 'url' AND '%s' GLOB '*' || body || '*' " % (self.uid, uri)
txn.execute(sql)
select = txn.fetchall()
- for web in select:
- is_ok = True
- found = True
- break
-
- if not found:
- sql="SELECT * FROM Website WHERE is_black = 1 AND uid = '%s' AND '%s' LIKE body" % (self.uid, uri)
- txn.execute(sql)
- select = txn.fetchall()
+
+ if len(select) > 0 :
for web in select:
- print ' BLOCKING ENTRY WAS FOUND : ' + web[6]
- is_ok = False
- break
+ if web[1] == True :
+ print 'Uri blocked : %s ' % (web[6])
+ is_ok = False
+ break
+
+ for web in select:
+ if web[1] == False :
+ print 'Uri WhiteListed : %s ' % (web[6])
+ is_ok = False
+ break
+
return is_ok, request, rest, host, port
def __validate_request_cb(self, data):
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]