[odrs-web/oscp] Deduplicate the app_id values in the reviews table
- From: Richard Hughes <rhughes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [odrs-web/oscp] Deduplicate the app_id values in the reviews table
- Date: Thu, 4 Jul 2019 12:18:30 +0000 (UTC)
commit 0a15d5e2f7ede1de99c08fab8228ece0dac318a2
Author: Richard Hughes <richard hughsie com>
Date: Thu Jul 4 11:27:30 2019 +0100
Deduplicate the app_id values in the reviews table
This will allow us to cache some of the slow-to-generate stats counters and
also further deduplicate the list of IDs.
app_data/cron.py | 7 ++-
app_data/migrations/versions/e6fa15874247_.py | 71 +++++++++++++++++++++++++++
app_data/odrs/models.py | 28 +++++++++--
app_data/odrs/templates/components.html | 29 +++++++++++
app_data/odrs/templates/default.html | 1 +
app_data/odrs/templates/show-all.html | 2 +-
app_data/odrs/templates/show.html | 4 +-
app_data/odrs/views_admin.py | 30 ++++++++---
app_data/odrs/views_api.py | 45 ++++++++++++-----
9 files changed, 188 insertions(+), 29 deletions(-)
---
diff --git a/app_data/cron.py b/app_data/cron.py
index 7bab12c..084b153 100755
--- a/app_data/cron.py
+++ b/app_data/cron.py
@@ -14,7 +14,7 @@ import csv
from odrs import db
-from odrs.models import Review, Taboo
+from odrs.models import Review, Taboo, Component
from odrs.util import _get_rating_for_app_id, _get_taboos_for_locale
def _auto_delete(days=31):
@@ -36,9 +36,8 @@ def _auto_delete(days=31):
def _regenerate_ratings(fn):
item = {}
- app_ids = [res[0] for res in db.session.query(Review.app_id).\
- order_by(Review.app_id.asc()).\
- distinct(Review.app_id).all()]
+ app_ids = [res[0] for res in db.session.query(Component.app_id).\
+ order_by(Component.app_id.asc()).all()]
for app_id in app_ids:
ratings = _get_rating_for_app_id(app_id, 2)
if len(ratings) == 0:
diff --git a/app_data/migrations/versions/e6fa15874247_.py b/app_data/migrations/versions/e6fa15874247_.py
new file mode 100644
index 0000000..927fa91
--- /dev/null
+++ b/app_data/migrations/versions/e6fa15874247_.py
@@ -0,0 +1,71 @@
+"""
+
+Revision ID: e6fa15874247
+Revises: b2d75ba212ed
+Create Date: 2019-07-04 10:44:23.739416
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'e6fa15874247'
+down_revision = 'b2d75ba212ed'
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from sqlalchemy.exc import InternalError
+
+from odrs import db
+from odrs.models import Review, Component
+
+def upgrade():
+
+ try:
+ op.create_table('components',
+ sa.Column('component_id', sa.Integer(), nullable=False),
+ sa.Column('app_id', sa.Text(), nullable=True),
+ sa.Column('review_cnt', sa.Integer(), nullable=True),
+ sa.PrimaryKeyConstraint('component_id'),
+ sa.UniqueConstraint('component_id'),
+ mysql_character_set='utf8mb4'
+ )
+ except InternalError as e:
+ print(str(e))
+ try:
+ op.add_column('reviews', sa.Column('component_id', sa.Integer(), nullable=False))
+ except InternalError as e:
+ print(str(e))
+
+ # get existing components
+ app_ids = {}
+ for component in db.session.query(Component).all():
+ app_ids[component.app_id] = component
+
+ # add any extra we need, incrementing the count otherwise
+ reviews = db.session.query(Review).all()
+ for review in reviews:
+ if review._app_id not in app_ids:
+ print('adding', review._app_id)
+ component = Component(review._app_id)
+ db.session.add(component)
+ app_ids[component.app_id] = component
+ else:
+ component = app_ids[review._app_id]
+ component.review_cnt += 1
+ db.session.commit()
+
+ # fix up the component_id on the existing reviews
+ for review in reviews:
+ review.component_id = app_ids[review._app_id].component_id
+ db.session.commit()
+
+ # should all be valid now
+ try:
+ op.create_foreign_key('components_ibfk_3', 'reviews', 'components', ['component_id'],
['component_id'])
+ except InternalError as e:
+ print(str(e))
+
+def downgrade():
+ op.drop_constraint('components_ibfk_3', 'reviews', type_='foreignkey')
+ op.drop_column('reviews', 'component_id')
+ op.drop_table('components')
diff --git a/app_data/odrs/models.py b/app_data/odrs/models.py
index 0926a72..e2fa43f 100644
--- a/app_data/odrs/models.py
+++ b/app_data/odrs/models.py
@@ -134,6 +134,25 @@ class User(db.Model):
def _tokenize(val):
return [token.lower() for token in re.findall(r"[\w']+", val)]
+class Component(db.Model):
+
+ # sqlalchemy metadata
+ __tablename__ = 'components'
+ __table_args__ = {'mysql_character_set': 'utf8mb4'}
+
+ component_id = Column(Integer, primary_key=True, nullable=False, unique=True)
+ app_id = Column(Text)
+ review_cnt = Column(Integer, default=1)
+
+ reviews = relationship('Review', back_populates='component')
+
+ def __init__(self, app_id):
+ self.app_id = app_id
+ self.review_cnt = 1
+
+ def __repr__(self):
+ return 'Component object %s' % self.component_id
+
class Review(db.Model):
# sqlalchemy metadata
@@ -143,7 +162,8 @@ class Review(db.Model):
review_id = Column(Integer, primary_key=True, nullable=False, unique=True)
date_created = Column(DateTime, nullable=False, default=datetime.datetime.utcnow)
date_deleted = Column(DateTime)
- app_id = Column(Text)
+ _app_id = Column('app_id', Text)
+ component_id = Column(Integer, ForeignKey('components.component_id'), nullable=False)
locale = Column(Text)
summary = Column(Text)
description = Column(Text)
@@ -158,12 +178,12 @@ class Review(db.Model):
reported = Column(Integer, default=0)
user = relationship('User', back_populates='reviews')
+ component = relationship('Component', back_populates='reviews', lazy='joined')
votes = relationship('Vote',
back_populates='review',
cascade='all,delete-orphan')
def __init__(self):
- self.app_id = None
self.locale = None
self.summary = None
self.description = None
@@ -213,7 +233,7 @@ class Review(db.Model):
def asdict(self, user_hash=None):
item = {
- 'app_id': self.app_id,
+ 'app_id': self.component.app_id,
'date_created': self.date_created.timestamp(),
'description': self.description,
'distro': self.distro,
@@ -229,7 +249,7 @@ class Review(db.Model):
'version': self.version,
}
if user_hash:
- item['user_skey'] = _get_user_key(user_hash, self.app_id)
+ item['user_skey'] = _get_user_key(user_hash, self.component.app_id)
return item
def __repr__(self):
diff --git a/app_data/odrs/templates/components.html b/app_data/odrs/templates/components.html
new file mode 100644
index 0000000..b299713
--- /dev/null
+++ b/app_data/odrs/templates/components.html
@@ -0,0 +1,29 @@
+{% extends "default.html" %}
+{% block title %}Components{% endblock %}
+
+{% block content %}
+
+<h2>Components</h2>
+
+{% if components|length == 0 %}
+<p>
+ There are no components stored.
+</p>
+{% else %}
+<table class="table table-hover table-responsive">
+ <tr class="row">
+ <th class="col-sm-1">AppStream ID</th>
+ <th class="col-sm-2">Review Count</th>
+ <th class="col-sm-2"> </th>
+ </tr>
+{% for component in components %}
+ <tr class="row">
+ <td>{{component.app_id}}</td>
+ <td>{{component.review_cnt}}</td>
+ <td> </td>
+ </tr>
+{% endfor %}
+</table>
+{% endif %}
+
+{% endblock %}
diff --git a/app_data/odrs/templates/default.html b/app_data/odrs/templates/default.html
index 087a9b6..4e86e92 100644
--- a/app_data/odrs/templates/default.html
+++ b/app_data/odrs/templates/default.html
@@ -40,6 +40,7 @@
<li><a href="{{url_for('.admin_users_all')}}">Users</a></li>
<li><a href="{{url_for('.admin_moderator_show_all')}}">Moderators</a></li>
<li><a href="{{url_for('.admin_taboo_show_all')}}">Taboos</a></li>
+ <li><a href="{{url_for('.admin_component_show_all')}}">Components</a></li>
<li><a href="{{url_for('.admin_distros')}}">Distributions</a></li>
<li><a href="{{url_for('.admin_graph_month')}}">Usage</a></li>
<li><a href="{{url_for('.admin_search')}}">Search</a></li>
diff --git a/app_data/odrs/templates/show-all.html b/app_data/odrs/templates/show-all.html
index 19059fc..b2dde95 100644
--- a/app_data/odrs/templates/show-all.html
+++ b/app_data/odrs/templates/show-all.html
@@ -21,7 +21,7 @@
{% for r in reviews %}
<tr class="row">
- <td>{{r.app_id.replace('.desktop', '').replace('.Application', '')}}</td>
+ <td>{{r.component.app_id}}</td>
<td>{{r.version}}</td>
<td>
{{format_rating(r.rating)}}
diff --git a/app_data/odrs/templates/show.html b/app_data/odrs/templates/show.html
index 7566232..54af798 100644
--- a/app_data/odrs/templates/show.html
+++ b/app_data/odrs/templates/show.html
@@ -8,8 +8,8 @@
<div class="card">
<div class="card-body">
<h1 class="card-title">
- {{r.app_id}}
- <a class="btn pull-right" href="{{url_for('.admin_show_app', app_id=r.app_id)}}">All</a>
+ {{r.component.app_id}}
+ <a class="btn pull-right" href="{{url_for('.admin_show_app', app_id=r.component.app_id)}}">All</a>
</h1>
{% for taboo in matched_taboos %}
<div class="alert alert-{{taboo.color}}" role="alert">
diff --git a/app_data/odrs/views_admin.py b/app_data/odrs/views_admin.py
index 8dd6418..46c1268 100644
--- a/app_data/odrs/views_admin.py
+++ b/app_data/odrs/views_admin.py
@@ -1,7 +1,7 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
-# pylint: disable=invalid-name,missing-docstring,chained-comparison
+# pylint: disable=invalid-name,missing-docstring,chained-comparison,singleton-comparison
#
# Copyright (C) 2015-2019 Richard Hughes <richard hughsie com>
#
@@ -17,7 +17,7 @@ from flask import abort, request, flash, render_template, redirect, url_for
from flask_login import login_required, current_user
from odrs import app, db
-from .models import Review, User, Moderator, Vote, Taboo
+from .models import Review, User, Moderator, Vote, Taboo, Component
from .models import _vote_exists
from .util import _get_datestr_from_dt, _get_taboos_for_locale
@@ -208,7 +208,7 @@ def admin_show_stats():
stats['Unique distros'] = rs.fetchone()[0]
# unique apps
- rs = db.session.execute("SELECT COUNT(DISTINCT(app_id)) FROM reviews;") # pylint: disable=no-member
+ rs = db.session.execute("SELECT COUNT(*) FROM components;") # pylint: disable=no-member
stats['Unique apps reviewed'] = rs.fetchone()[0]
# unique distros
@@ -226,10 +226,11 @@ def admin_show_stats():
"GROUP BY app_id ORDER BY total DESC LIMIT 50;")
# popularity reviews
- submitted = db.session.execute("SELECT DISTINCT app_id, COUNT(app_id) as total " # pylint:
disable=no-member
- "FROM eventlog WHERE app_id IS NOT NULL "
- "AND message='reviewed' GROUP BY app_id "
- "ORDER BY total DESC LIMIT 50;")
+ submitted = db.session.query(Component.app_id, Component.review_cnt).\
+ filter(Component.app_id != None).\
+ order_by(Component.review_cnt.desc()).\
+ limit(50).all()
+
return render_template('stats.html',
results_stats=stats,
results_viewed=viewed,
@@ -698,6 +699,21 @@ def admin_taboo_delete(taboo_id):
flash('Deleted taboo')
return redirect(url_for('.admin_taboo_show_all'))
+@app.route('/admin/component/all')
+@login_required
+def admin_component_show_all():
+ """
+ Return all the components.
+ """
+ # security check
+ if not current_user.is_admin:
+ flash('Unable to show all components', 'error')
+ return redirect(url_for('.odrs_index'))
+ components = db.session.query(Component).\
+ order_by(Component.app_id.asc()).\
+ order_by(Component.review_cnt.asc()).all()
+ return render_template('components.html', components=components)
+
@app.route('/admin/vote/<review_id>/<val_str>')
@login_required
def admin_vote(review_id, val_str):
diff --git a/app_data/odrs/views_api.py b/app_data/odrs/views_api.py
index ad511cf..5ec19f1 100644
--- a/app_data/odrs/views_api.py
+++ b/app_data/odrs/views_api.py
@@ -20,7 +20,7 @@ from flask import request, Response
from odrs import app, db
-from .models import Review, User, Vote, Analytic, Taboo
+from .models import Review, User, Vote, Analytic, Taboo, Component
from .models import _vote_exists
from .util import json_success, json_error, _locale_is_compatible, _eventlog_add, _get_user_key,
_get_datestr_from_dt
from .util import _sanitised_version, _sanitised_summary, _sanitised_description, _get_rating_for_app_id
@@ -104,7 +104,8 @@ def api_submit():
# user has already reviewed
if db.session.query(Review).\
- filter(Review.app_id == item['app_id']).\
+ join(Component).\
+ filter(Component.app_id == item['app_id']).\
filter(Review.user_id == user.user_id).first():
_eventlog_add(_get_client_address(),
user.user_id,
@@ -112,9 +113,26 @@ def api_submit():
'already reviewed')
return json_error('already reviewed this app')
+ # this is basically a clunky UPSERT that works with MySQL
+ stmt = insert(Component).values(app_id=item['app_id'])
+ if db.session.bind.dialect.name != 'sqlite': # pylint: disable=no-member
+ stmt_ondupe = stmt.on_duplicate_key_update(review_cnt=Component.review_cnt + 1)
+ else:
+ stmt_ondupe = stmt
+ try:
+ db.session.execute(stmt_ondupe) # pylint: disable=no-member
+ db.session.commit()
+ except IntegrityError as e:
+ print('ignoring: {}'.format(str(e)))
+
+ # component definately exists now!
+ component = db.session.query(Component).filter(Component.app_id == item['app_id']).first()
+ if not component:
+ return json_error('cannot create component for {}'.format(item['app_id']))
+
# create new
review = Review()
- review.app_id = item['app_id']
+ review._app_id = item['app_id'] # pylint: disable=protected-access
review.locale = item['locale']
review.summary = _sanitised_summary(item['summary'])
review.description = _sanitised_description(item['description'])
@@ -123,6 +141,7 @@ def api_submit():
review.distro = item['distro']
review.rating = item['rating']
review.user_addr = _get_client_address()
+ review.component_id = component.component_id
# check if valid
user_display_ignore = ['root',
@@ -140,7 +159,7 @@ def api_submit():
# log and add
_eventlog_add(_get_client_address(),
review.user_id,
- review.app_id,
+ component.app_id,
'reviewed')
db.session.add(review)
db.session.commit()
@@ -153,7 +172,8 @@ def api_show_app(app_id, user_hash=None):
Return details about an application.
"""
reviews = db.session.query(Review).\
- filter(Review.app_id == app_id).\
+ join(Component).\
+ filter(Component.app_id == app_id).\
filter(Review.reported < ODRS_REPORTED_CNT).\
order_by(Review.date_created.desc()).all()
items = [review.asdict(user_hash) for review in reviews]
@@ -199,7 +219,8 @@ def api_fetch():
if 'compat_ids' in item:
app_ids.extend(item['compat_ids'])
reviews = db.session.query(Review).\
- filter(Review.app_id.in_(app_ids)).\
+ join(Component).\
+ filter(Component.app_id.in_(app_ids)).\
filter(Review.reported < ODRS_REPORTED_CNT).all()
# if user does not exist then create
@@ -334,7 +355,9 @@ def _vote(val):
# update the per-user karma
user.karma += val
- review = db.session.query(Review).filter(Review.app_id == item['app_id']).first()
+ review = db.session.query(Review).\
+ join(Component).\
+ filter(Component.app_id == item['app_id']).first()
if not review:
_eventlog_add(_get_client_address(), user.user_id, None,
'invalid review ID of %s' % item['app_id'], important=True)
@@ -416,7 +439,7 @@ def api_remove():
filter(Review.user_id == user.user_id).first()
if not review:
return json_error('no review')
- if review.app_id != item['app_id']:
+ if review.component.app_id != item['app_id']:
return json_error('the app_id is invalid')
if item['user_skey'] != _get_user_key(item['user_hash'], item['app_id']):
@@ -464,9 +487,9 @@ def api_ratings():
Get the star ratings for all known applications.
"""
item = {}
- app_ids = [res[0] for res in db.session.query(Review.app_id).\
- order_by(Review.app_id.asc()).\
- distinct(Review.app_id).all()]
+ app_ids = [res[0] for res in db.session.query(Component.app_id).\
+ order_by(Component.app_id.asc()).\
+ distinct(Component.app_id).all()]
for app_id in app_ids:
ratings = _get_rating_for_app_id(app_id, 2)
if len(ratings) == 0:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]