[tracker-miners/sam/plain-text: 2/3] tracker-extract: Only index text/plain MIME types, never index source code



commit 35b05b53eab3988120b2e5005b18dd9cff44f391
Author: Sam Thursfield <sam afuera me uk>
Date:   Thu Jun 18 00:24:42 2020 +0200

    tracker-extract: Only index text/plain MIME types, never index source code
    
    Tracker isn't and never will be a sourcecode indexer. Source
    code is often huge, and it requires some understanding and
    tokenization of the contents in order to effectively create
    an index. Tracker indexes text documents by copying the
    first 1MB of data from each file into an SQLite database.

 src/tracker-extract/15-source-code.rule                    | 5 -----
 src/tracker-extract/{90-text-generic.rule => 15-text.rule} | 2 +-
 src/tracker-extract/meson.build                            | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)
---
diff --git a/src/tracker-extract/90-text-generic.rule b/src/tracker-extract/15-text.rule
similarity index 77%
rename from src/tracker-extract/90-text-generic.rule
rename to src/tracker-extract/15-text.rule
index 341d4f004..90dbbe08e 100644
--- a/src/tracker-extract/90-text-generic.rule
+++ b/src/tracker-extract/15-text.rule
@@ -1,5 +1,5 @@
 [ExtractorRule]
 ModulePath=libextract-text.so
-MimeTypes=text/*
+MimeTypes=text/plain;text/markdown
 FallbackRdfTypes=nfo:Document;nfo:PlainTextDocument;
 Graph=tracker:Documents
diff --git a/src/tracker-extract/meson.build b/src/tracker-extract/meson.build
index f2e03e8b7..d6848cfe6 100644
--- a/src/tracker-extract/meson.build
+++ b/src/tracker-extract/meson.build
@@ -94,7 +94,7 @@ if get_option('ps')
 endif
 
 if get_option('text')
-  modules += [['extract-text', 'tracker-extract-text.c', ['15-source-code.rule', '90-text-generic.rule'], 
[]]]
+  modules += [['extract-text', 'tracker-extract-text.c', ['15-text.rule'], []]]
 endif
 
 if libtiff.found()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]