[stickynotes-antispam] Rewrite to use MySQL directly



commit e6898d256b347b284127a3191f70f1ab3bdc2bf8
Author: Bartłomiej Piotrowski <bpiotrowski gnome org>
Date:   Mon Sep 30 19:19:33 2019 +0200

    Rewrite to use MySQL directly

 stickynotes-antispam.py | 105 +++++++++++++-----------------------------------
 1 file changed, 27 insertions(+), 78 deletions(-)
---
diff --git a/stickynotes-antispam.py b/stickynotes-antispam.py
index be7efd0..7af7159 100755
--- a/stickynotes-antispam.py
+++ b/stickynotes-antispam.py
@@ -4,11 +4,11 @@ import os
 import re
 import sys
 
-from bs4 import BeautifulSoup
-import requests
+import MySQLdb
 
 BLACKLIST = [
-    "bitcoin" "loan",
+    "bitcoin",
+    "loan",
     "paypal",
     "western union",
     "westernunion",
@@ -16,98 +16,47 @@ BLACKLIST = [
     "support",
 ]
 
-
-class StickyNotes(object):
-    def __init__(self, url, username, password):
-        self.url = url
-
-        self.session = requests.Session()
-        login_form = self.session.get(f"{url}/user/login")
-        soup = BeautifulSoup(login_form.text, features="html.parser")
-        token = soup.find(attrs={"name": "_token"})["value"]
-        payload = {"username": username, "password": password, "_token": token}
-        self.session.post(f"{url}/user/login", data=payload)
-
-    def list(self, pages=1):
-        ids = []
-        for page in range(1, pages + 1):
-            r = self.session.get(f"{self.url}/all?page={page}")
-            soup = BeautifulSoup(r.text, features="html.parser")
-            on_page = [
-                url["href"].split("/")[3]
-                for url in soup.find_all("a", text="Show paste")
-            ]
-            ids.extend(on_page)
-
-        return ids
-
-    def get(self, id):
-        r = self.session.get(f"{self.url}/{id}")
-        soup = BeautifulSoup(r.text, features="html.parser")
-        ids = soup.find("a", text="Raw")["href"].split("/")[3:-1]
-
-        if len(ids) == 2:
-            raw_url = f"{self.url}/{ids[0]}/{ids[1]}/raw"
-        else:
-            raw_url = f"{self.url}/{ids[0]}/raw"
-
-        r = self.session.get(raw_url)
-        return r.text
-
-    def delete(self, id):
-        r = self.session.get(f"{self.url}/{id}")
-        soup = BeautifulSoup(r.text, features="html.parser")
-        ids = soup.find("a", text="Raw")["href"].split("/")[3:-1]
-
-        if len(ids) == 2:
-            delete_url = f"{self.url}/{ids[0]}/{ids[1]}/delete"
-        else:
-            delete_url = f"{self.url}/{ids[0]}/delete"
-
-        r = self.session.get(delete_url)
-        print(f"deleting {id}")
-
-
 def check_if_spam(data):
     mark_for_deletion = False
 
-    # Delete paste if contains more than 2 lines containing an URL
-    link_count = 0
-    for line in data.splitlines():
-        if re.search("http[s]?://", line, re.IGNORECASE):
-            link_count += 1
-
-    if link_count >= 2:
-        mark_for_deletion = True
-
-    # Delete pastes with matching keywords
     for word in BLACKLIST:
         if re.search(word, data, re.IGNORECASE):
             mark_for_deletion = True
 
+    # Delete paste if contains more than 2 lines containing an URL
+    if not mark_for_deletion:
+        link_count = 0
+        for line in data.splitlines():
+            if re.search("http[s]?://", line, re.IGNORECASE):
+                link_count += 1
+
+        if link_count >= 2:
+            mark_for_deletion = True
+
     return mark_for_deletion
 
 
 def main():
     try:
-        url = os.environ["STICKYNOTES_URL"]
-        username = os.environ["STICKYNOTES_USERNAME"]
-        password = os.environ["STICKYNOTES_PASSWORD"]
+        hostname = os.environ["STICKYNOTES_DB_HOST"]
+        username = os.environ["STICKYNOTES_DB_USERNAME"]
+        password = os.environ["STICKYNOTES_DB_PASSWORD"]
+        dbname = os.environ["STICKYNOTES_DB_NAME"]
     except KeyError:
         sys.exit(1)
 
-    client = StickyNotes(url, username, password)
-    pastes = client.list(5)
+    db = MySQLdb.connect(host=hostname, user=username, pass=password, db=dbname)
+    with db.cursor() as c:
+        c.execute("""SELECT urlkey, data FROM main""")
+        pastes = c.fetchall()
 
-    for paste_id in pastes:
-        try:
-            paste = client.get(paste_id)
-        except:
-            print(f"=> error processing {paste_id}")
-            continue
-        if check_if_spam(paste):
-            client.delete(paste_id)
+    for paste in pastes:
+        urlkey, data = paste
+        if check_if_spam(data):
+            with db.cursor() as c:
+                c.execute("""DELETE from main WHERE urlkey = %s""", (urlkey,))
 
+    db.close()
 
 if __name__ == "__main__":
     main()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]