[damned-lies] Replaced raw queries and insert branch exclusion into queries

From: Claude Paroz <claudep src gnome org>
To: commits-list gnome org
Cc:
Subject: [damned-lies] Replaced raw queries and insert branch exclusion into queries
Date: Wed, 7 Oct 2015 09:59:38 +0000 (UTC)
commit 4cf477d3d02fbaca7b37436e3f8ac3fd348d8930
Author: Claude Paroz <claude 2xlibre net>
Date:   Mon Oct 5 22:32:38 2015 +0200

    Replaced raw queries and insert branch exclusion into queries

 stats/models.py      |  247 ++++++++++++++++++++------------------------------
 stats/tests/tests.py |   28 ++++++-
 2 files changed, 126 insertions(+), 149 deletions(-)
---
diff --git a/stats/models.py b/stats/models.py
index b1dcac4..3292199 100644
--- a/stats/models.py
+++ b/stats/models.py
@@ -18,6 +18,8 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see <http://www.gnu.org/licenses/>.
 
+from __future__ import division
+
 from collections import Counter, OrderedDict
 import fnmatch
 import logging
@@ -32,9 +34,10 @@ from django.conf import settings
 from django.core.exceptions import ValidationError
 from django.core.urlresolvers import reverse
 from django.core.validators import RegexValidator
+from django.utils.functional import cached_property
 from django.utils.translation import ungettext, ugettext as _, ugettext_noop
 from django.utils import dateformat
-from django.db import models, connection
+from django.db import models
 
 from common.fields import DictionaryField, JSONField
 from common.utils import is_site_admin
@@ -897,6 +900,15 @@ class Release(models.Model):
     def get_description(self):
         return _(self.description)
 
+    @cached_property
+    def excluded_branches(self):
+        # Compute stats which doesn't apply for this release due to limited domain
+        limited_stats = Statistics.objects.select_related('branch', 'domain'
+            ).filter(branch__releases=self
+            ).filter(models.Q(domain__branch_from__isnull=False) |
+                     models.Q(domain__branch_to__isnull=False))
+        return [st.branch for st in limited_stats if not st.branch.has_domain(st.domain)]
+
     @classmethod
     def total_by_releases(cls, dtype, releases):
         """ Get summary stats for all languages and 'releases', and return a 'stats' dict with
@@ -909,41 +921,31 @@ class Release(models.Model):
               'll': ...
             }
         """
-        rel_ids = [str(rel.id) for rel in releases]
-        LOCALE, NAME, REL_ID, TRANS, FUZZY, UNTRANS = 0, 1, 2, 3, 4, 5
-        query = """
-            SELECT language.locale, language.name, category.release_id,
-                   SUM(pofull.translated),
-                   SUM(pofull.fuzzy),
-                   SUM(pofull.untranslated)
-            FROM statistics AS stat
-            LEFT JOIN language
-                   ON stat.language_id = language.id
-            INNER JOIN domain
-                   ON stat.domain_id = domain.id
-            INNER JOIN branch
-                   ON stat.branch_id = branch.id
-            INNER JOIN pofile AS pofull
-                   ON pofull.id = stat.full_po_id
-            INNER JOIN category
-                   ON category.branch_id = branch.id
-            WHERE domain.dtype = %%s
-              AND category.release_id IN (%s)
-            GROUP BY language_id, category.release_id
-            ORDER BY language.name""" % (",".join(rel_ids),)
-        cursor = connection.cursor()
-        cursor.execute(query, (dtype,))
-        stats = {}; totals = [0] * len(releases)
-        for row in cursor.fetchall():
-            if row[LOCALE] and row[LOCALE] not in stats:
-                stats[row[LOCALE]] = {'lang': Language.objects.get(locale=row[LOCALE]),
-                                      'stats': [0] * len(releases)}
-            if row[LOCALE] is None: # POT stats
-                totals[rel_ids.index(str(row[REL_ID]))] = row[UNTRANS]
-            else:
-                stats[row[LOCALE]]['stats'][rel_ids.index(str(row[REL_ID]))] = row[TRANS]
+        stats = {}
+        totals = [0] * len(releases)
+        lang_dict = dict((lang.locale, lang) for lang in Language.objects.all())
+        for rel in releases:
+            query = Statistics.objects.filter(domain__dtype=dtype, branch__releases=rel
+                ).exclude(branch__in=rel.excluded_branches
+                ).values('language__locale'
+                ).annotate(trans=models.Sum('full_po__translated'), fuzzy=models.Sum('full_po__fuzzy'),
+                           untrans=models.Sum('full_po__untranslated')
+                ).order_by('language__name')
+            for line in query:
+                locale = line['language__locale']
+                if locale and locale not in stats:
+                    stats[locale] = {
+                        'lang': lang_dict[locale],
+                        'stats': [0] * len(releases)
+                    }
+                if locale is None:  # POT stats
+                    totals[releases.index(rel)] = line['untrans']
+                else:
+                    stats[locale]['stats'][releases.index(rel)] = line['trans']
+
         # Compute percentages
-        def perc(x, y): return int(x/y * 100)
+        def perc(x, y):
+            return int(x / y * 100)
         for k in stats.keys():
             stats[k]['stats'] = map(perc, stats[k]['stats'], totals)
             stats[k]['diff'] = stats[k]['stats'][-1] - stats[k]['stats'][0]
@@ -951,41 +953,23 @@ class Release(models.Model):
 
     def total_strings(self):
         """ Returns the total number of strings in the release as a tuple (doc_total, ui_total) """
-        # Uses the special statistics record where language_id is NULL to compute the sum.
-        query = """
-            SELECT domain.dtype,
-                   SUM(pofull.untranslated)
-            FROM statistics AS stat
-            LEFT JOIN domain
-                   ON domain.id = stat.domain_id
-            LEFT JOIN branch AS br
-                   ON br.id = stat.branch_id
-            LEFT JOIN category AS cat
-                   ON cat.branch_id = br.id
-            LEFT JOIN "release" AS rel
-                   ON rel.id = cat.release_id
-            LEFT JOIN pofile AS pofull
-                   ON pofull.id = stat.full_po_id
-            LEFT JOIN pofile AS popart
-                   ON popart.id = stat.part_po_id
-            WHERE rel.id = %s
-              AND stat.language_id IS NULL
-            GROUP BY domain.dtype"""
-        cursor = connection.cursor()
-        if settings.DATABASES['default']['ENGINE'].endswith('mysql'):
-            cursor.execute("SET sql_mode='ANSI_QUOTES'")
-        cursor.execute(query, (self.id,))
-
+        # Use pot stats to compute total sum
+        qs = Statistics.objects.filter(branch__category__release=self, language__isnull=True
+            ).exclude(branch__in=self.excluded_branches).values('domain__dtype'
+            ).annotate(untrans=models.Sum('full_po__untranslated'))
         totals = Counter()
-        for row in cursor.fetchall():
-            totals[row[0]] += row[1]
+        for line in qs:
+            totals[line['domain__dtype']] += line['untrans']
         return totals['doc'], totals['ui']
 
     def total_part_for_all_langs(self):
         """ Return total partial UI strings for each language """
         total_part_ui_strings = {}
-        all_ui_pots = Statistics.objects.select_related('part_po').filter(language__isnull=True, 
branch__releases=self, domain__dtype='ui')
+        all_ui_pots = Statistics.objects.select_related('part_po'
+            ).exclude(branch__in=self.excluded_branches
+            ).filter(language__isnull=True, branch__releases=self, domain__dtype='ui')
         all_ui_stats = Statistics.objects.select_related('part_po', 'language'
+            ).exclude(branch__in=self.excluded_branches
             ).filter(language__isnull=False, branch__releases=self, domain__dtype='ui'
             ).values('branch_id', 'domain_id', 'language__locale', 'part_po__translated', 'part_po__fuzzy', 
'part_po__untranslated')
         stats_d = dict([("%d-%d-%s" % (st['branch_id'], st['domain_id'], st['language__locale']),
@@ -998,9 +982,12 @@ class Release(models.Model):
         """ For partial UI stats, the total number can differ from lang to lang, so we
             are bound to iterate each stats to sum it """
         if all_pots is None:
-            all_pots = Statistics.objects.select_related('part_po').filter(language__isnull=True, 
branch__releases=self, domain__dtype='ui')
+            all_pots = Statistics.objects.select_related('part_po'
+                ).exclude(branch__in=self.excluded_branches
+                ).filter(language__isnull=True, branch__releases=self, domain__dtype='ui')
         if all_stats_d is None:
             all_stats = Statistics.objects.select_related('part_po', 'language'
+                ).exclude(branch__in=self.excluded_branches
                 ).filter(language=lang, branch__releases=self, domain__dtype='ui'
                 ).values('branch_id', 'domain_id', 'language__locale', 'part_po__translated', 
'part_po__fuzzy', 'part_po__untranslated')
             all_stats_d = dict([("%d-%d-%s" % (st['branch_id'], st['domain_id'], st['language__locale']),
@@ -1017,29 +1004,11 @@ class Release(models.Model):
 
         total_doc, total_ui = self.total_strings()
         total_ui_part = self.total_part_for_lang(lang)
-        query = """
-            SELECT domain.dtype,
-                   SUM(pofull.translated) AS trans,
-                   SUM(pofull.fuzzy),
-                   SUM(popart.translated) AS trans_p,
-                   SUM(popart.fuzzy) AS fuzzy_p,
-                   SUM(popart.untranslated) AS untrans_p
-            FROM statistics AS stat
-            LEFT JOIN domain
-                   ON stat.domain_id = domain.id
-            LEFT JOIN branch
-                   ON stat.branch_id = branch.id
-            LEFT JOIN category
-                   ON category.branch_id = branch.id
-            LEFT JOIN pofile AS pofull
-                   ON pofull.id = stat.full_po_id
-            LEFT JOIN pofile AS popart
-                   ON popart.id = stat.part_po_id
-            WHERE language_id = %s
-              AND category.release_id = %s
-            GROUP BY domain.dtype"""
-        cursor = connection.cursor()
-        cursor.execute(query, (lang.id, self.id))
+        query = Statistics.objects.filter(language=lang, branch__releases=self
+            ).exclude(branch__in=self.excluded_branches
+            ).values('domain__dtype'
+            ).annotate(trans=models.Sum('full_po__translated'), fuzzy=models.Sum('full_po__fuzzy'),
+                       trans_p=models.Sum('part_po__translated'), fuzzy_p=models.Sum('part_po__fuzzy'))
         stats = {'id': self.id, 'name': self.name, 'description': _(self.description),
                  'ui':  {'translated': 0, 'fuzzy': 0, 'total': total_ui,
                          'translated_perc': 0, 'fuzzy_perc': 0, 'untranslated_perc': 0,
@@ -1051,15 +1020,15 @@ class Release(models.Model):
                          'translated_perc': 0, 'fuzzy_perc': 0, 'untranslated_perc': 0
                         },
                 }
-        for res in cursor.fetchall():
-            if res[0] == 'ui':
-                stats['ui']['translated'] = res[1]
-                stats['ui']['fuzzy'] = res[2]
-                stats['ui_part']['translated'] = res[3]
-                stats['ui_part']['fuzzy'] = res[4]
-            if res[0] == 'doc':
-                stats['doc']['translated'] = res[1]
-                stats['doc']['fuzzy'] = res[2]
+        for res in query:
+            if res['domain__dtype'] == 'ui':
+                stats['ui']['translated'] = res['trans']
+                stats['ui']['fuzzy'] = res['fuzzy']
+                stats['ui_part']['translated'] = res['trans_p']
+                stats['ui_part']['fuzzy'] = res['fuzzy_p']
+            if res['domain__dtype'] == 'doc':
+                stats['doc']['translated'] = res['trans']
+                stats['doc']['fuzzy'] = res['fuzzy']
         stats['ui']['untranslated'] = total_ui - (stats['ui']['translated'] + stats['ui']['fuzzy'])
         stats['ui_part']['untranslated'] = total_ui_part - (stats['ui_part']['translated'] + 
stats['ui_part']['fuzzy'])
         if total_ui > 0:
@@ -1081,41 +1050,22 @@ class Release(models.Model):
         """ Get statistics for all languages in a release, grouped by language
             Returns a sorted list: (language name and locale, ui, ui-part and doc stats dictionaries) """
 
-        query = """
-            SELECT MIN(lang.name),
-                   MIN(lang.locale),
-                   domain.dtype,
-                   SUM(pofull.translated) AS trans,
-                   SUM(pofull.fuzzy),
-                   SUM(popart.translated) AS trans_p,
-                   SUM(popart.fuzzy) AS fuzzy_p
-            FROM statistics AS stat
-            LEFT JOIN domain
-                   ON domain.id = stat.domain_id
-            LEFT JOIN language AS lang
-                   ON stat.language_id = lang.id
-            LEFT JOIN branch AS br
-                   ON br.id = stat.branch_id
-            LEFT JOIN category
-                   ON category.branch_id = br.id
-            LEFT JOIN pofile AS pofull
-                   ON pofull.id = stat.full_po_id
-            LEFT JOIN pofile AS popart
-                   ON popart.id = stat.part_po_id
-            WHERE category.release_id = %s AND stat.language_id IS NOT NULL
-            GROUP BY domain.dtype, stat.language_id
-            ORDER BY domain.dtype, trans DESC"""
-        cursor = connection.cursor()
-        cursor.execute(query, (self.id,))
+        query = Statistics.objects.filter(language__isnull=False, branch__releases=self
+            ).exclude(branch__in=self.excluded_branches
+            ).values('domain__dtype', 'language'
+            ).annotate(trans=models.Sum('full_po__translated'), fuzzy=models.Sum('full_po__fuzzy'),
+                       trans_p=models.Sum('part_po__translated'), fuzzy_p=models.Sum('part_po__fuzzy'),
+                       locale=models.Min('language__locale'), lang_name=models.Min('language__name')
+            ).order_by('domain__dtype', 'trans')
         stats = {}
         total_docstrings, total_uistrings = self.total_strings()
         total_uistrings_part = self.total_part_for_all_langs()
-        for row in cursor.fetchall():
-            lang_name, locale, dtype, trans, fuzzy, trans_p, fuzzy_p = row
+        for row in query:
+            locale = row['locale']
             if locale not in stats:
                 # Initialize stats dict
                 stats[locale] = {
-                    'lang_name': lang_name, 'lang_locale': locale,
+                    'lang_name': row['lang_name'], 'lang_locale': locale,
                     'ui' : {'translated': 0, 'fuzzy': 0, 'untranslated': total_uistrings,
                             'translated_perc': 0, 'fuzzy_perc': 0, 'untranslated_perc': 100},
                     'ui_part' : {'translated': 0, 'fuzzy': 0, 'untranslated': total_uistrings_part[locale],
@@ -1123,30 +1073,31 @@ class Release(models.Model):
                     'doc': {'translated': 0, 'fuzzy': 0, 'untranslated': total_docstrings,
                             'translated_perc': 0, 'fuzzy_perc': 0, 'untranslated_perc': 100,},
                 }
-            if dtype == 'doc':
-                stats[locale]['doc']['translated'] = trans
-                stats[locale]['doc']['fuzzy'] = fuzzy
-                stats[locale]['doc']['untranslated'] = total_docstrings - (trans + fuzzy)
+            if row['domain__dtype'] == 'doc':
+                stats[locale]['doc']['translated'] = row['trans']
+                stats[locale]['doc']['fuzzy'] = row['fuzzy']
+                stats[locale]['doc']['untranslated'] = total_docstrings - (row['trans'] + row['fuzzy'])
                 if total_docstrings > 0:
-                    stats[locale]['doc']['translated_perc'] = int(100*trans/total_docstrings)
-                    stats[locale]['doc']['fuzzy_perc'] = int(100*fuzzy/total_docstrings)
-                    stats[locale]['doc']['untranslated_perc'] = 
int(100*stats[locale]['doc']['untranslated']/total_docstrings)
-            if dtype == 'ui':
-                stats[locale]['ui']['translated'] = trans
-                stats[locale]['ui']['fuzzy'] = fuzzy
-                stats[locale]['ui']['untranslated'] = total_uistrings - (trans + fuzzy)
-                stats[locale]['ui_part']['translated'] = trans_p
-                stats[locale]['ui_part']['fuzzy'] = fuzzy_p
-                stats[locale]['ui_part']['untranslated'] = total_uistrings_part[locale] - (trans_p + fuzzy_p)
+                    stats[locale]['doc']['translated_perc'] = int(100 * row['trans'] / total_docstrings)
+                    stats[locale]['doc']['fuzzy_perc'] = int(100 * row['fuzzy'] / total_docstrings)
+                    stats[locale]['doc']['untranslated_perc'] = int(
+                        100 * stats[locale]['doc']['untranslated'] / total_docstrings)
+            if row['domain__dtype'] == 'ui':
+                stats[locale]['ui']['translated'] = row['trans']
+                stats[locale]['ui']['fuzzy'] = row['fuzzy']
+                stats[locale]['ui']['untranslated'] = total_uistrings - (row['trans'] + row['fuzzy'])
+                stats[locale]['ui_part']['translated'] = row['trans_p']
+                stats[locale]['ui_part']['fuzzy'] = row['fuzzy_p']
+                stats[locale]['ui_part']['untranslated'] = total_uistrings_part[locale] - (row['trans_p'] + 
row['fuzzy_p'])
                 if total_uistrings > 0:
-                    stats[locale]['ui']['translated_perc'] = int(100*trans/total_uistrings)
-                    stats[locale]['ui']['fuzzy_perc'] = int(100*fuzzy/total_uistrings)
-                    stats[locale]['ui']['untranslated_perc'] = 
int(100*stats[locale]['ui']['untranslated']/total_uistrings)
+                    stats[locale]['ui']['translated_perc'] = int(100 * row['trans'] / total_uistrings)
+                    stats[locale]['ui']['fuzzy_perc'] = int(100 * row['fuzzy'] / total_uistrings)
+                    stats[locale]['ui']['untranslated_perc'] = int(
+                        100 * stats[locale]['ui']['untranslated'] / total_uistrings)
                 if total_uistrings_part.get(locale, 0) > 0:
-                    stats[locale]['ui_part']['translated_perc'] = 
int(100*trans_p/total_uistrings_part[locale])
-                    stats[locale]['ui_part']['fuzzy_perc'] = int(100*fuzzy_p/total_uistrings_part[locale])
-                    stats[locale]['ui_part']['untranslated_perc'] = 
int(100*stats[locale]['ui_part']['untranslated']/total_uistrings_part[locale])
-        cursor.close()
+                    stats[locale]['ui_part']['translated_perc'] = int(100 * row['trans_p'] 
/total_uistrings_part[locale])
+                    stats[locale]['ui_part']['fuzzy_perc'] = int(100 * row['fuzzy_p'] 
/total_uistrings_part[locale])
+                    stats[locale]['ui_part']['untranslated_perc'] = int(100 * 
stats[locale]['ui_part']['untranslated'] / total_uistrings_part[locale])
 
         results = stats.values()
         results.sort(self.compare_stats)
@@ -1175,8 +1126,8 @@ class Release(models.Model):
         partial = False
         if dtype == "ui-part":
             dtype, partial = "ui", True
-        pot_stats = Statistics.objects.filter(
-            language=None, branch__releases=self, domain__dtype=dtype, full_po__isnull=False)
+        pot_stats = Statistics.objects.exclude(branch__in=self.excluded_branches
+            ).filter(language=None, branch__releases=self, domain__dtype=dtype, full_po__isnull=False)
         po_stats = dict([("%s-%s" % (st.branch_id, st.domain_id), st)
                          for st in Statistics.objects.filter(language=lang, branch__releases=self, 
domain__dtype=dtype)])
         lang_files = []
diff --git a/stats/tests/tests.py b/stats/tests/tests.py
index 840b4c0..5d7e73f 100644
--- a/stats/tests/tests.py
+++ b/stats/tests/tests.py
@@ -355,8 +355,18 @@ class ModuleTestCase(TestCase):
 
 class StatisticsTests(TestCase):
     fixtures = ['sample_data.json']
+
+    def test_get_global_stats(self):
+        rel = Release.objects.get(name="gnome-3-8")
+        stats = rel.get_global_stats()
+        self.assertEqual(len(stats), 2)  # data for French and Italian
+        fr_stats = stats[0]
+        self.assertEqual(fr_stats['lang_name'], 'French')
+        self.assertEqual(fr_stats['ui']['translated'], 183)
+        self.assertEqual(fr_stats['doc']['untranslated'], 259)
+
     def test_total_stats_for_lang(self):
-        rel  = Release.objects.get(name="gnome-3-8")
+        rel = Release.objects.get(name="gnome-3-8")
         total_for_lang = rel.total_for_lang(Language.objects.get(locale='fr'))
         self.assertEqual(total_for_lang['ui']['total'], total_for_lang['ui_part']['total'])
         self.assertTrue(total_for_lang['ui']['untranslated'] == total_for_lang['ui_part']['untranslated'] == 
0)
@@ -364,6 +374,13 @@ class StatisticsTests(TestCase):
         self.assertEqual(total_for_lang['ui']['total']-8, total_for_lang['ui_part']['total'])
         self.assertEqual(total_for_lang['ui']['untranslated'], 183)
         self.assertEqual(total_for_lang['ui_part']['untranslated'], 175)
+        # Test that excluded domains are taken into account
+        zenity_po = Domain.objects.get(module__name='zenity', name='po')
+        zenity_po.branch_from = Branch.objects.get(module__name='zenity', name='master')
+        zenity_po.save()
+        rel = Release.objects.get(name="gnome-3-8")
+        total_for_lang = rel.total_for_lang(Language.objects.get(locale='bem'))
+        self.assertLess(total_for_lang['ui']['untranslated'], 183)
 
     def test_stats_links(self):
         pot_stats = Statistics.objects.get(
@@ -380,6 +397,15 @@ class StatisticsTests(TestCase):
         self.assertEqual(stats.po_url(), "/module/po/zenity/po/gnome-3-8/bem.po")
         self.assertEqual(stats.po_url(reduced=True), "/module/po/zenity/po/gnome-3-8/bem-reduced.po")
 
+    def test_total_by_releases(self):
+        releases = list(Release.objects.filter(name__in=('gnome-3-8', 'gnome-dev')))
+        stats = Release.total_by_releases('ui', releases)
+        self.assertEqual(list(stats.keys()), ['fr', 'it'])
+        self.assertEqual(stats['fr']['diff'], 0)
+        self.assertEqual(stats['fr']['stats'], [100, 100])
+        self.assertEqual(stats['it']['diff'], 24)
+        self.assertEqual(stats['it']['stats'], [63, 87])
+
     def _test_update_statistics(self):
         # Temporarily deactivated, since update_stats cannot receive stats any more
         from vertimus.models import State, StateTranslating
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]