[damned-lies] Add per-domain filtering for reduced po files



commit fbbf4f8887d4dae651f46cd4f55ef4ecfc134beb
Author: Claude Paroz <claude 2xlibre net>
Date:   Fri Mar 18 19:19:48 2011 +0100

    Add per-domain filtering for reduced po files

 stats/migrations/0007_add_domain_filter.py |  202 ++++++++++++++++++++++++++++
 stats/models.py                            |    4 +-
 stats/utils.py                             |   14 ++-
 3 files changed, 216 insertions(+), 4 deletions(-)
---
diff --git a/stats/migrations/0007_add_domain_filter.py b/stats/migrations/0007_add_domain_filter.py
new file mode 100644
index 0000000..6304944
--- /dev/null
+++ b/stats/migrations/0007_add_domain_filter.py
@@ -0,0 +1,202 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+class Migration(SchemaMigration):
+
+    def forwards(self, orm):
+        
+        # Adding field 'Domain.red_filter'
+        db.add_column('domain', 'red_filter', self.gf('django.db.models.fields.TextField')(null=True, blank=True), keep_default=False)
+
+
+    def backwards(self, orm):
+        
+        # Deleting field 'Domain.red_filter'
+        db.delete_column('domain', 'red_filter')
+
+
+    models = {
+        'auth.group': {
+            'Meta': {'object_name': 'Group'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
+            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
+        },
+        'auth.permission': {
+            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
+            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
+        },
+        'auth.user': {
+            'Meta': {'object_name': 'User'},
+            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
+            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
+            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
+            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
+        },
+        'contenttypes.contenttype': {
+            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
+            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
+        },
+        'languages.language': {
+            'Meta': {'ordering': "('name',)", 'object_name': 'Language', 'db_table': "'language'"},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'locale': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '15'}),
+            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '50'}),
+            'plurals': ('django.db.models.fields.CharField', [], {'max_length': '200', 'blank': 'True'}),
+            'team': ('django.db.models.fields.related.ForeignKey', [], {'default': 'None', 'to': "orm['teams.Team']", 'null': 'True', 'blank': 'True'})
+        },
+        'people.person': {
+            'Meta': {'ordering': "('username',)", 'object_name': 'Person', 'db_table': "'person'", '_ormbases': ['auth.User']},
+            'activation_key': ('django.db.models.fields.CharField', [], {'max_length': '40', 'null': 'True', 'blank': 'True'}),
+            'bugzilla_account': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'null': 'True', 'blank': 'True'}),
+            'image': ('django.db.models.fields.URLField', [], {'max_length': '200', 'null': 'True', 'blank': 'True'}),
+            'irc_nick': ('django.db.models.fields.SlugField', [], {'db_index': 'True', 'max_length': '20', 'null': 'True', 'blank': 'True'}),
+            'svn_account': ('django.db.models.fields.SlugField', [], {'db_index': 'True', 'max_length': '20', 'null': 'True', 'blank': 'True'}),
+            'user_ptr': ('django.db.models.fields.related.OneToOneField', [], {'to': "orm['auth.User']", 'unique': 'True', 'primary_key': 'True'}),
+            'webpage_url': ('django.db.models.fields.URLField', [], {'max_length': '200', 'null': 'True', 'blank': 'True'})
+        },
+        'stats.branch': {
+            'Meta': {'ordering': "('name',)", 'unique_together': "(('name', 'module'),)", 'object_name': 'Branch', 'db_table': "'branch'"},
+            'file_hashes': ('common.fields.DictionaryField', [], {'default': "''", 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'module': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Module']"}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
+            'vcs_subpath': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
+            'weight': ('django.db.models.fields.IntegerField', [], {'default': '0'})
+        },
+        'stats.category': {
+            'Meta': {'unique_together': "(('release', 'branch'),)", 'object_name': 'Category', 'db_table': "'category'"},
+            'branch': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Branch']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'default': "'default'", 'max_length': '30'}),
+            'release': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Release']"})
+        },
+        'stats.domain': {
+            'Meta': {'ordering': "('-dtype', 'name')", 'object_name': 'Domain', 'db_table': "'domain'"},
+            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'directory': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
+            'dtype': ('django.db.models.fields.CharField', [], {'default': "'ui'", 'max_length': '5'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'linguas_location': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
+            'module': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Module']"}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
+            'pot_method': ('django.db.models.fields.CharField', [], {'max_length': '100', 'null': 'True', 'blank': 'True'}),
+            'red_filter': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
+        },
+        'stats.information': {
+            'Meta': {'object_name': 'Information', 'db_table': "'information'"},
+            'description': ('django.db.models.fields.TextField', [], {}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'statistics': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Statistics']"}),
+            'type': ('django.db.models.fields.CharField', [], {'max_length': '10'})
+        },
+        'stats.informationarchived': {
+            'Meta': {'object_name': 'InformationArchived', 'db_table': "'information_archived'"},
+            'description': ('django.db.models.fields.TextField', [], {}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'statistics': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.StatisticsArchived']"}),
+            'type': ('django.db.models.fields.CharField', [], {'max_length': '10'})
+        },
+        'stats.module': {
+            'Meta': {'ordering': "('name',)", 'object_name': 'Module', 'db_table': "'module'"},
+            'bugs_base': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
+            'bugs_component': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
+            'bugs_product': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
+            'comment': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'homepage': ('django.db.models.fields.URLField', [], {'max_length': '200', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'maintainers': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "'maintains_modules'", 'blank': 'True', 'db_table': "'module_maintainer'", 'to': "orm['people.Person']"}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
+            'vcs_root': ('django.db.models.fields.CharField', [], {'max_length': '200'}),
+            'vcs_type': ('django.db.models.fields.CharField', [], {'max_length': '5'}),
+            'vcs_web': ('django.db.models.fields.URLField', [], {'max_length': '200'})
+        },
+        'stats.pofile': {
+            'Meta': {'object_name': 'PoFile', 'db_table': "'pofile'"},
+            'fuzzy': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'num_figures': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'path': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
+            'translated': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'untranslated': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'})
+        },
+        'stats.release': {
+            'Meta': {'ordering': "('status', '-name')", 'object_name': 'Release', 'db_table': "'release'"},
+            'branches': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'releases'", 'symmetrical': 'False', 'through': "orm['stats.Category']", 'to': "orm['stats.Branch']"}),
+            'description': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.SlugField', [], {'max_length': '20', 'db_index': 'True'}),
+            'status': ('django.db.models.fields.CharField', [], {'max_length': '12'}),
+            'string_frozen': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'weight': ('django.db.models.fields.IntegerField', [], {'default': '0'})
+        },
+        'stats.statistics': {
+            'Meta': {'unique_together': "(('branch', 'domain', 'language'),)", 'object_name': 'Statistics', 'db_table': "'statistics'"},
+            'branch': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Branch']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['stats.Domain']"}),
+            'full_po': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'stat_f'", 'unique': 'True', 'null': 'True', 'to': "orm['stats.PoFile']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'language': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['languages.Language']", 'null': 'True'}),
+            'old_date': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'old_fuzzy': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'old_num_figures': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'old_translated': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'old_untranslated': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'part_po': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'stat_p'", 'unique': 'True', 'null': 'True', 'to': "orm['stats.PoFile']"})
+        },
+        'stats.statisticsarchived': {
+            'Meta': {'object_name': 'StatisticsArchived', 'db_table': "'statistics_archived'"},
+            'branch': ('django.db.models.fields.TextField', [], {}),
+            'date': ('django.db.models.fields.DateTimeField', [], {}),
+            'domain': ('django.db.models.fields.TextField', [], {}),
+            'fuzzy': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'language': ('django.db.models.fields.CharField', [], {'max_length': '15'}),
+            'module': ('django.db.models.fields.TextField', [], {}),
+            'translated': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'type': ('django.db.models.fields.CharField', [], {'max_length': '3'}),
+            'untranslated': ('django.db.models.fields.IntegerField', [], {'default': '0'})
+        },
+        'teams.role': {
+            'Meta': {'unique_together': "(('team', 'person'),)", 'object_name': 'Role', 'db_table': "'role'"},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'person': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['people.Person']"}),
+            'role': ('django.db.models.fields.CharField', [], {'default': "'translator'", 'max_length': '15'}),
+            'team': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['teams.Team']"})
+        },
+        'teams.team': {
+            'Meta': {'ordering': "('description',)", 'object_name': 'Team', 'db_table': "'team'"},
+            'description': ('django.db.models.fields.TextField', [], {}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'mailing_list': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'null': 'True', 'blank': 'True'}),
+            'mailing_list_subscribe': ('django.db.models.fields.URLField', [], {'max_length': '200', 'null': 'True', 'blank': 'True'}),
+            'members': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'teams'", 'symmetrical': 'False', 'through': "orm['teams.Role']", 'to': "orm['people.Person']"}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '80'}),
+            'presentation': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
+            'use_workflow': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'webpage_url': ('django.db.models.fields.URLField', [], {'max_length': '200', 'null': 'True', 'blank': 'True'})
+        }
+    }
+
+    complete_apps = ['stats']
diff --git a/stats/models.py b/stats/models.py
index dd47fb4..fea55d3 100644
--- a/stats/models.py
+++ b/stats/models.py
@@ -697,6 +697,8 @@ class Domain(models.Model):
     linguas_location = models.CharField(max_length=50, null=True, blank=True,
         help_text="""Use 'no' for no LINGUAS check, or path/to/file#variable for a non-standard location.
             Leave blank for standard location (ALL_LINGUAS in LINGUAS/configure.ac/.in for UI and DOC_LINGUAS in Makefile.am for DOC)""")
+    red_filter = models.TextField(null=True, blank=True,
+        help_text="""pogrep filter to strip po file from unprioritized strings (format: location|string, "-" for no filter)""")
 
     class Meta:
         db_table = 'domain'
@@ -1403,7 +1405,7 @@ class Statistics(models.Model):
                     part_po_path = self.full_po.path[:-3] + "reduced.pot"
                 else:
                     part_po_path = self.full_po.path[:-3] + ".reduced.po"
-                utils.po_grep(self.full_po.path, part_po_path)
+                utils.po_grep(self.full_po.path, part_po_path, self.domain.red_filter)
                 part_stats = utils.po_file_stats(part_po_path, msgfmt_checks=False, count_images=False)
                 if part_stats['translated'] + part_stats['fuzzy'] + part_stats['untranslated'] == translated + fuzzy + untranslated:
                     # No possible gain, set part_po = full_po so it is possible to compute complete stats at database level
diff --git a/stats/utils.py b/stats/utils.py
index 25e714b..ee39e9a 100644
--- a/stats/utils.py
+++ b/stats/utils.py
@@ -94,10 +94,18 @@ def check_program_presence(prog_name):
     status, output, err = run_shell_command("which %s" % prog_name)
     return status == 0
 
-def po_grep(in_file, out_file):
-    if not has_toolkit:
+def po_grep(in_file, out_file, filter_):
+    if not has_toolkit or filter_ == u"-":
         return
-    grepfilter = pogrep.GrepFilter("gschema.xml.in", "locations", invertmatch=True, keeptranslations=True)
+    if not filter_:
+        filter_loc, filter_str = "gschema.xml.in", "locations"
+    else:
+        try:
+            filter_loc, filter_str = filter_.strip("|")
+        except:
+            # Probably bad filter syntax in DB (TODO: log it)
+            return
+    grepfilter = pogrep.GrepFilter(filter_loc, filter_str, invertmatch=True, keeptranslations=True)
     out = open(out_file, "w")
     pogrep.rungrep(in_file, out, None, grepfilter)
     out.close()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]