[odrs-web/oscp] Get the latest flathub provides list at build time



commit 6ba1a287d087445d0eb37ec71edf80dfdd70692f
Author: Richard Hughes <richard hughsie com>
Date:   Fri Jul 5 16:26:31 2019 +0100

    Get the latest flathub provides list at build time
    
    This means we dedupe more reviews app-ids.

 Dockerfile                |  5 +++++
 app_data/cron.py          | 38 ++++++++++++++++++++++++++++++++++++++
 app_data/requirements.txt |  1 +
 3 files changed, 44 insertions(+)
---
diff --git a/Dockerfile b/Dockerfile
index 3a8f7b9..f1b2149 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,6 +23,7 @@ RUN pip3 install --prefix=/usr \
     flask-login \
     flask-migrate \
     flask-wtf \
+    lxml \
     mysqlclient \
     PyMySQL \
     sqlalchemy
@@ -36,6 +37,10 @@ RUN LANG=en_US.utf8 \
     SQLALCHEMY_TRACK_MODIFICATIONS=False \
     flask db upgrade
 
+RUN curl https://flathub.org/repo/appstream/x86_64/appstream.xml.gz -o /tmp/appstream.xml.gz
+
+RUN python36 /opt/app-root/src/cron.py appstream-import /tmp/appstream.xml.gz
+
 RUN python36 /opt/app-root/src/cron.py ratings /opt/app-root/src/odrs/static/ratings.json
 
 RUN chown -R 1000310000:0 ${ODRS_HOME} && \
diff --git a/app_data/cron.py b/app_data/cron.py
index 084b153..d1b35ef 100755
--- a/app_data/cron.py
+++ b/app_data/cron.py
@@ -11,6 +11,9 @@ import json
 import sys
 import datetime
 import csv
+import gzip
+
+from lxml import etree as ET
 
 from odrs import db
 
@@ -63,6 +66,36 @@ def _taboo_check():
             review.reported = 5
     db.session.commit()
 
+def _appstream_import(fn):
+
+    # get existing components
+    app_ids = {}
+    for component in db.session.query(Component).all():
+        app_ids[component.app_id] = component
+
+    # parse xml
+    with gzip.open(fn, 'rb') as f:
+        for component in ET.fromstring(f.read()).xpath('/components/component'):
+            app_id = component.xpath('id')[0].text
+            if app_id not in app_ids:
+                continue
+            children = []
+            for provide in component.xpath('provides/id'):
+                child_id = provide.text
+                if child_id not in app_ids:
+                    continue
+                if app_ids[child_id].component_id_parent:
+                    continue
+                children.append(app_ids[child_id])
+            if not children:
+                continue
+            parent = app_ids[app_id]
+            for child in children:
+                parent.adopt(child)
+                print('adding AppStream parent for {} -> {}'.format(child.app_id,
+                                                                    parent.app_id))
+    db.session.commit()
+
 def _taboo_import(fn):
 
     # get all the taboos in one database call
@@ -114,6 +147,11 @@ if __name__ == '__main__':
             print('Usage: %s taboo-import filename' % sys.argv[0])
             sys.exit(1)
         _taboo_import(sys.argv[2])
+    elif sys.argv[1] == 'appstream-import':
+        if len(sys.argv) < 3:
+            print('Usage: %s taboo-import filename' % sys.argv[0])
+            sys.exit(1)
+        _appstream_import(sys.argv[2])
     else:
         print("cron mode %s not known" % sys.argv[1])
         sys.exit(1)
diff --git a/app_data/requirements.txt b/app_data/requirements.txt
index b7e53c7..0b4633a 100644
--- a/app_data/requirements.txt
+++ b/app_data/requirements.txt
@@ -2,6 +2,7 @@ flask-wtf
 flask-login
 markupsafe
 flask-migrate
+lxml
 PyMySQL
 pylint
 pytest-cov


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]