[uchardet] script: Wikipedia API's python wrapper does not return garbage text anymore.



commit d24bd7d5782167c69dc391679a69ba983ca7b6b6
Author: Jehan <jehan girinstud io>
Date:   Sun Feb 21 16:07:10 2016 +0100

    script: Wikipedia API's python wrapper does not return garbage text anymore.
    
    I can't see new commits since 2014. So I am assuming the issue was on
    Wikipedia side and that it has been fixed.

 script/langs/el.py |    2 +-
 script/langs/es.py |    2 +-
 script/langs/fr.py |    2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
---
diff --git a/script/langs/el.py b/script/langs/el.py
index 7d12262..efd9a3e 100644
--- a/script/langs/el.py
+++ b/script/langs/el.py
@@ -57,7 +57,7 @@ case_mapping = True
 # A function to clean content returned by the `wikipedia` python lib,
 # in case some unwanted data has been overlooked.
 def clean_wikipedia_content(content):
-    cleaned = re.sub(r'(=+) *([^=]+) *Επεξεργασία \1',
+    cleaned = re.sub(r'(=+) *([^=]+) *\1',
                      r'\2',
                      content)
     return cleaned
diff --git a/script/langs/es.py b/script/langs/es.py
index af4ac89..f48acc5 100644
--- a/script/langs/es.py
+++ b/script/langs/es.py
@@ -71,7 +71,7 @@ case_mapping = True
 # A function to clean content returned by the `wikipedia` python lib,
 # in case some unwanted data has been overlooked.
 def clean_wikipedia_content(content):
-    cleaned = re.sub(r'(=+) *([^=]+) *Editar \1',
+    cleaned = re.sub(r'(=+) *([^=]+) *\1',
                      r'\2',
                      content)
     return cleaned
diff --git a/script/langs/fr.py b/script/langs/fr.py
index 4bbc59d..9312b7b 100644
--- a/script/langs/fr.py
+++ b/script/langs/fr.py
@@ -72,7 +72,7 @@ case_mapping = True
 # in case some unwanted data has been overlooked.
 def clean_wikipedia_content(content):
     # We get modify link in the text: "=== Articles connexesModifier ==="
-    cleaned = re.sub(r'(=+) *([^=]+) *Modifier \1',
+    cleaned = re.sub(r'(=+) *([^=]+) *\1',
                      r'\2',
                      content)
     return cleaned


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]