[odrs-web/oscp] Allow moderators to add per-locale banned words



commit 0d351d694cfa52fafe83c9f86aeb84d47634be47
Author: Richard Hughes <richard hughsie com>
Date:   Wed Jul 3 18:29:57 2019 +0100

    Allow moderators to add per-locale banned words
    
    This allows us to auto-report reviews at submission time.

 app_data/cron.py                              | 56 ++++++++++++++++++-
 app_data/migrations/versions/e37c745e3097_.py | 31 +++++++++++
 app_data/odrs/models.py                       | 50 +++++++++++++++++
 app_data/odrs/templates/default.html          |  1 +
 app_data/odrs/templates/show.html             | 10 ++++
 app_data/odrs/templates/taboos.html           | 49 +++++++++++++++++
 app_data/odrs/tests/odrs_test.py              | 32 +++++++++++
 app_data/odrs/util.py                         | 15 +++++-
 app_data/odrs/views_admin.py                  | 77 +++++++++++++++++++++++++--
 app_data/odrs/views_api.py                    |  5 ++
 10 files changed, 320 insertions(+), 6 deletions(-)
---
diff --git a/app_data/cron.py b/app_data/cron.py
index 31fd7fc..2f87b7b 100755
--- a/app_data/cron.py
+++ b/app_data/cron.py
@@ -10,11 +10,12 @@
 import json
 import sys
 import datetime
+import csv
 
 from odrs import db
 
-from odrs.models import Review
-from odrs.util import _get_rating_for_app_id
+from odrs.models import Review, Taboo
+from odrs.util import _get_rating_for_app_id, _get_taboos_for_locale
 
 def _auto_delete(days=31):
 
@@ -48,6 +49,50 @@ def _regenerate_ratings(fn):
     with open(fn, 'w') as outfd:
         outfd.write(json.dumps(item, sort_keys=True, indent=4, separators=(',', ': ')))
 
+def _taboo_check():
+
+    # this is moderately expensive, so cache for each locale
+    taboos = {}
+    for review in db.session.query(Review).\
+                    filter(Review.reported < 5).all():
+        if review.locale not in taboos:
+            taboos[review.locale] = _get_taboos_for_locale(review.locale)
+        matched_taboos = review.matches_taboos(taboos[review.locale])
+        if matched_taboos:
+            for taboo in matched_taboos:
+                print(review.review_id, review.locale, taboo.value)
+            review.reported = 5
+    db.session.commit()
+
+def _taboo_import(fn):
+
+    # get all the taboos in one database call
+    taboos = {}
+    for taboo in db.session.query(Taboo).all():
+        key = taboo.locale + ':' + taboo.value
+        taboos[key] = taboo
+
+    # add any new ones
+    with open(fn, newline='') as csvfile:
+        for locale, value, description in csv.reader(csvfile):
+            locale = locale.strip()
+            value = value.strip()
+            description = description.strip()
+            key = locale + ':' + value
+            if key in taboos:
+                continue
+            if value.find(' ') != -1:
+                print('Ignoring', locale, value)
+                continue
+            if value.lower() != value:
+                print('Ignoring', locale, value)
+                continue
+            taboo = Taboo(locale, value, description)
+            taboos[key] = taboo
+            print('Adding', locale, value)
+            db.session.add(taboo)
+    db.session.commit()
+
 if __name__ == '__main__':
 
     if len(sys.argv) < 2:
@@ -62,6 +107,13 @@ if __name__ == '__main__':
         _regenerate_ratings(sys.argv[2])
     elif sys.argv[1] == 'auto-delete':
         _auto_delete()
+    elif sys.argv[1] == 'taboo-check':
+        _taboo_check()
+    elif sys.argv[1] == 'taboo-import':
+        if len(sys.argv) < 3:
+            print('Usage: %s taboo-import filename' % sys.argv[0])
+            sys.exit(1)
+        _taboo_import(sys.argv[2])
     else:
         print("cron mode %s not known" % sys.argv[1])
         sys.exit(1)
diff --git a/app_data/migrations/versions/e37c745e3097_.py b/app_data/migrations/versions/e37c745e3097_.py
new file mode 100644
index 0000000..a8bce80
--- /dev/null
+++ b/app_data/migrations/versions/e37c745e3097_.py
@@ -0,0 +1,31 @@
+"""
+
+Revision ID: e37c745e3097
+Revises: 64751cf97429
+Create Date: 2019-07-03 19:54:01.718718
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'e37c745e3097'
+down_revision = '64751cf97429'
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade():
+    op.create_table('taboos',
+    sa.Column('taboo_id', sa.Integer(), nullable=False),
+    sa.Column('locale', sa.String(length=8), nullable=False),
+    sa.Column('value', sa.Text(), nullable=False),
+    sa.Column('description', sa.Text(), nullable=True),
+    sa.PrimaryKeyConstraint('taboo_id'),
+    sa.UniqueConstraint('taboo_id'),
+    mysql_character_set='utf8mb4'
+    )
+    op.create_index(op.f('ix_taboos_locale'), 'taboos', ['locale'], unique=False)
+
+def downgrade():
+    op.drop_index(op.f('ix_taboos_locale'), table_name='taboos')
+    op.drop_table('taboos')
diff --git a/app_data/odrs/models.py b/app_data/odrs/models.py
index 4684dda..7bb4d40 100644
--- a/app_data/odrs/models.py
+++ b/app_data/odrs/models.py
@@ -8,6 +8,7 @@
 # SPDX-License-Identifier: GPL-3.0+
 
 import datetime
+import re
 
 from werkzeug.security import generate_password_hash, check_password_hash
 
@@ -43,6 +44,25 @@ class Analytic(db.Model):
     def __repr__(self):
         return 'Analytic object %s' % self.analytic_id
 
+class Taboo(db.Model):
+
+    # sqlalchemy metadata
+    __tablename__ = 'taboos'
+    __table_args__ = {'mysql_character_set': 'utf8mb4'}
+
+    taboo_id = Column(Integer, primary_key=True, nullable=False, unique=True)
+    locale = Column(String(8), nullable=False, index=True)
+    value = Column(Text, nullable=False)
+    description = Column(Text, nullable=True)
+
+    def __init__(self, locale, value, description=True):
+        self.locale = locale
+        self.value = value
+        self.description = description
+
+    def __repr__(self):
+        return 'Taboo object %s' % self.taboo_id
+
 class Vote(db.Model):
 
     # sqlalchemy metadata
@@ -93,6 +113,9 @@ class User(db.Model):
     def __repr__(self):
         return 'User object %s' % self.user_id
 
+def _tokenize(val):
+    return [token.lower() for token in re.findall(r"[\w']+", val)]
+
 class Review(db.Model):
 
     # sqlalchemy metadata
@@ -135,6 +158,33 @@ class Review(db.Model):
         self.rating = 0
         self.reported = 0
 
+    def _generate_keywords(self):
+
+        # tokenize anything the user can specify
+        tokens = []
+        if self.summary:
+            tokens.extend(_tokenize(self.summary))
+        if self.description:
+            tokens.extend(_tokenize(self.description))
+        if self.user_display:
+            tokens.extend(_tokenize(self.user_display))
+
+        # dedupe, and remove anything invalid
+        tokens = set(tokens)
+        if None in tokens:
+            tokens.remove(None)
+        return tokens
+
+    def matches_taboos(self, taboos):
+
+        # does the review contain any banned keywords
+        kws = self._generate_keywords()
+        matches = []
+        for taboo in taboos:
+            if taboo.value in kws:
+                matches.append(taboo)
+        return matches
+
     @property
     def user_addr(self):
         raise AttributeError('user_addr is not a readable attribute')
diff --git a/app_data/odrs/templates/default.html b/app_data/odrs/templates/default.html
index 8a7a4a7..087a9b6 100644
--- a/app_data/odrs/templates/default.html
+++ b/app_data/odrs/templates/default.html
@@ -39,6 +39,7 @@
             <li><a href="{{url_for('.admin_show_stats')}}">Statistics</a></li>
             <li><a href="{{url_for('.admin_users_all')}}">Users</a></li>
             <li><a href="{{url_for('.admin_moderator_show_all')}}">Moderators</a></li>
+            <li><a href="{{url_for('.admin_taboo_show_all')}}">Taboos</a></li>
             <li><a href="{{url_for('.admin_distros')}}">Distributions</a></li>
             <li><a href="{{url_for('.admin_graph_month')}}">Usage</a></li>
             <li><a href="{{url_for('.admin_search')}}">Search</a></li>
diff --git a/app_data/odrs/templates/show.html b/app_data/odrs/templates/show.html
index 5ef094f..a1fd9c3 100644
--- a/app_data/odrs/templates/show.html
+++ b/app_data/odrs/templates/show.html
@@ -11,6 +11,16 @@
       {{r.app_id}}
       <a class="btn pull-right" href="{{url_for('.admin_show_app', app_id=r.app_id)}}">All</a>
     </h1>
+{% if matched_taboos %}
+    <div class="alert alert-warning" role="alert">
+      <strong>Warning: Contains taboo:</strong>
+      <ul>
+{% for taboo in matched_taboos %}
+        <li>{{taboo.value}}: {{taboo.description}}</li>
+{% endfor %}
+      </ul>
+    </div>
+{% endif %}
     <table class="table card-text table-borderless table-condensed">
       <tr class="row">
         <th class="col col-md-2">Rating</th>
diff --git a/app_data/odrs/templates/taboos.html b/app_data/odrs/templates/taboos.html
new file mode 100644
index 0000000..6097b65
--- /dev/null
+++ b/app_data/odrs/templates/taboos.html
@@ -0,0 +1,49 @@
+{% extends "default.html" %}
+{% block title %}Taboos{% endblock %}
+
+{% block content %}
+
+<h2>Taboos</h2>
+
+{% if taboos|length == 0 %}
+<p>
+  There are no taboos stored.
+</p>
+{% else %}
+<form method="post" action="{{url_for('admin_taboo_add')}}" class="form">
+<table class="table table-hover table-responsive">
+  <tr class="row">
+    <th class="col-sm-1">Locale</th>
+    <th class="col-sm-2">Value</th>
+    <th class="col-sm-7">Description</th>
+    <th class="col-sm-2">&nbsp;</th>
+  </tr>
+{% for taboo in taboos %}
+  <tr class="row">
+    <td>{{taboo.locale}}</td>
+    <td>{{taboo.value}}</td>
+    <td>{{taboo.description}}</td>
+    <td>
+      <a class="btn btn-danger btn-block" href="{{url_for('.admin_taboo_delete', 
taboo_id=taboo.taboo_id)}}">Delete</a>
+    </td>
+  </tr>
+{% endfor %}
+  <tr class="row">
+    <td>
+      <input type="text" class="form-control" name="locale" required/>
+    </td>
+    <td>
+      <input type="text" class="form-control" name="value" required/>
+    </td>
+    <td>
+      <input type="text" class="form-control" name="description" required/>
+    </td>
+    <td>
+      <button class="btn btn-action btn-block" type="submit">Add</button>
+    </td>
+  </tr>
+</table>
+</form>
+{% endif %}
+
+{% endblock %}
diff --git a/app_data/odrs/tests/odrs_test.py b/app_data/odrs/tests/odrs_test.py
index c757c06..ccdb6b6 100644
--- a/app_data/odrs/tests/odrs_test.py
+++ b/app_data/odrs/tests/odrs_test.py
@@ -215,6 +215,38 @@ class OdrsTest(unittest.TestCase):
         rv = self.app.get('/admin/search?value=inkscape+notgoingtoexist')
         assert b'Somebody Import' in rv.data, rv.data
 
+    def _admin_taboo_add(self, locale='en', value='inkscape', description='ola!'):
+        data = {'locale': locale, 'value': value, 'description': description}
+        return self.app.post('/admin/taboo/add', data=data, follow_redirects=True)
+
+    def test_admin_taboo(self):
+
+        self.login()
+
+        rv = self.app.get('/admin/taboo/all')
+        assert b'There are no taboos stored' in rv.data, rv.data
+
+        # add taboos
+        rv = self._admin_taboo_add()
+        assert b'Added taboo' in rv.data, rv.data
+        assert b'inkscape' in rv.data, rv.data
+        rv = self._admin_taboo_add()
+        assert b'Already added that taboo' in rv.data, rv.data
+        rv = self._admin_taboo_add(locale='fr_FR')
+        assert b'Added taboo' in rv.data, rv.data
+
+        # submit something, and ensure it's flagged
+        self.review_submit()
+        rv = self.app.get('/admin/review/1')
+        assert b'Somebody Important' in rv.data, rv.data
+        assert b'Contains taboo' in rv.data, rv.data
+
+        # delete
+        rv = self.app.get('/admin/taboo/1/delete', follow_redirects=True)
+        assert b'Deleted taboo' in rv.data, rv.data
+        rv = self.app.get('/admin/taboo/1/delete', follow_redirects=True)
+        assert b'No taboo with ID' in rv.data, rv.data
+
     def test_api_submit_when_banned(self):
 
         # submit abusive review
diff --git a/app_data/odrs/util.py b/app_data/odrs/util.py
index ae5cfb8..fb6dd23 100644
--- a/app_data/odrs/util.py
+++ b/app_data/odrs/util.py
@@ -8,7 +8,7 @@
 import json
 import hashlib
 
-from sqlalchemy import text
+from sqlalchemy import text, or_
 
 from flask import Response
 
@@ -94,6 +94,19 @@ def _addr_hash(value):
     from odrs import app
     return hashlib.sha1((app.secret_key + value).encode('utf-8')).hexdigest()
 
+def _get_taboos_for_locale(locale):
+    from .models import Taboo
+    from odrs import db
+    if locale.find('_') != -1:
+        lang, _ = locale.split('_', maxsplit=1)
+        return db.session.query(Taboo).\
+                        filter(or_(Taboo.locale == locale,
+                                   Taboo.locale == lang,
+                                   Taboo.locale == 'en')).all()
+    return db.session.query(Taboo).\
+                    filter(or_(Taboo.locale == locale,
+                               Taboo.locale == 'en')).all()
+
 def _sanitised_input(val):
 
     # remove trailing whitespace
diff --git a/app_data/odrs/views_admin.py b/app_data/odrs/views_admin.py
index cf605f2..56b98d3 100644
--- a/app_data/odrs/views_admin.py
+++ b/app_data/odrs/views_admin.py
@@ -17,9 +17,9 @@ from flask import abort, request, flash, render_template, redirect, url_for
 from flask_login import login_required, current_user
 
 from odrs import app, db
-from .models import Review, User, Moderator, Vote
+from .models import Review, User, Moderator, Vote, Taboo
 from .models import _vote_exists
-from .util import _get_datestr_from_dt
+from .util import _get_datestr_from_dt, _get_taboos_for_locale
 
 def _get_chart_labels_months():
     """ Gets the chart labels """
@@ -276,7 +276,11 @@ def admin_show_review(review_id):
     else:
         vote = None
 
-    return render_template('show.html', r=review, vote_exists=vote)
+    # does the review contain any banned keywords
+    matched_taboos = review.matches_taboos(_get_taboos_for_locale(review.locale))
+    return render_template('show.html', r=review,
+                           vote_exists=vote,
+                           matched_taboos=matched_taboos)
 
 @app.route('/admin/modify/<review_id>', methods=['POST'])
 @login_required
@@ -622,6 +626,73 @@ def admin_moderate_delete(moderator_id):
     flash('Deleted user')
     return redirect(url_for('.admin_moderator_show_all'))
 
+@app.route('/admin/taboo/all')
+@login_required
+def admin_taboo_show_all():
+    """
+    Return all the taboos.
+    """
+    # security check
+    if not current_user.is_admin:
+        flash('Unable to show all taboos', 'error')
+        return redirect(url_for('.odrs_index'))
+    taboos = db.session.query(Taboo).\
+                order_by(Taboo.locale.asc()).\
+                order_by(Taboo.value.asc()).all()
+    return render_template('taboos.html', taboos=taboos)
+
+@app.route('/admin/taboo/add', methods=['GET', 'POST'])
+@login_required
+def admin_taboo_add():
+    """ Add a taboo [ADMIN ONLY] """
+
+    # only accept form data
+    if request.method != 'POST':
+        return redirect(url_for('.admin_taboo_show_all'))
+
+    # security check
+    if not current_user.is_admin:
+        flash('Unable to add taboo as non-admin', 'error')
+        return redirect(url_for('.odrs_index'))
+
+    for key in ['locale', 'value', 'description']:
+        if not key in request.form:
+            flash('Unable to add taboo as {} missing'.format(key), 'error')
+            return redirect(url_for('.odrs_index'))
+    if db.session.query(Taboo).\
+            filter(Taboo.locale == request.form['locale']).\
+            filter(Taboo.value == request.form['value']).first():
+        flash('Already added that taboo', 'warning')
+        return redirect(url_for('.admin_taboo_show_all'))
+
+    # verify username
+    db.session.add(Taboo(request.form['locale'],
+                         request.form['value'],
+                         request.form['description']))
+    db.session.commit()
+    flash('Added taboo')
+    return redirect(url_for('.admin_taboo_show_all'))
+
+@app.route('/admin/taboo/<taboo_id>/delete')
+@login_required
+def admin_taboo_delete(taboo_id):
+    """ Delete an taboo """
+
+    # security check
+    if not current_user.is_admin:
+        flash('Unable to delete taboo as not admin', 'error')
+        return redirect(url_for('.odrs_index'))
+
+    # check whether exists in database
+    taboo = db.session.query(Taboo).filter(Taboo.taboo_id == taboo_id).first()
+    if not taboo:
+        flash("No taboo with ID {}".format(taboo_id), 'warning')
+        return redirect(url_for('.admin_taboo_show_all'))
+    db.session.delete(taboo)
+    db.session.commit()
+    flash('Deleted taboo')
+    return redirect(url_for('.admin_taboo_show_all'))
+
 @app.route('/admin/vote/<review_id>/<val_str>')
 @login_required
 def admin_vote(review_id, val_str):
diff --git a/app_data/odrs/views_api.py b/app_data/odrs/views_api.py
index f8dc443..cfc4796 100644
--- a/app_data/odrs/views_api.py
+++ b/app_data/odrs/views_api.py
@@ -22,6 +22,7 @@ from .models import Review, User, Vote, Analytic
 from .models import _vote_exists
 from .util import json_success, json_error, _locale_is_compatible, _eventlog_add, _get_user_key, 
_get_datestr_from_dt
 from .util import _sanitised_version, _sanitised_summary, _sanitised_description, _get_rating_for_app_id
+from .util import _get_taboos_for_locale
 
 ODRS_REPORTED_CNT = 2
 
@@ -130,6 +131,10 @@ def api_submit():
     if item['user_display'] not in user_display_ignore:
         review.user_display = item['user_display']
 
+    # contains taboos
+    if review.matches_taboos(_get_taboos_for_locale(review.locale)):
+        review.reported = 5
+
     # log and add
     _eventlog_add(_get_client_address(),
                   review.user_id,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]