[orca] More work on language utilities



commit 01c70cb0133f7247ad499740fd9de163a07acb6b
Author: Joanmarie Diggs <jdiggs igalia com>
Date:   Fri Jan 7 16:24:38 2022 +0100

    More work on language utilities
    
    * Add ability to retrieve attributes for substring. This is needed
      for at least LibreOffice Writer which returns text attributes on
      a per-line basis (even when those attributes haven't changed)
    * Retrieve attributes for substring everywhere we can (i.e. also for
      web content)
    * Stop caching attributes for web content which was only useful when
      getting all text attributes for an entire element.

 src/orca/script_utilities.py             | 21 ++++++++++---------
 src/orca/scripts/web/script_utilities.py | 36 +++-----------------------------
 2 files changed, 14 insertions(+), 43 deletions(-)
---
diff --git a/src/orca/script_utilities.py b/src/orca/script_utilities.py
index c9dec3e8b..881383a4e 100644
--- a/src/orca/script_utilities.py
+++ b/src/orca/script_utilities.py
@@ -3146,19 +3146,22 @@ class Utilities:
 
         return self._script.attributeNamesDict.get(attribName, attribName)
 
-    def getAllTextAttributesForObject(self, obj):
+    def getAllTextAttributesForObject(self, obj, startOffset=0, endOffset=-1):
         """Returns a list of (start, end, attrsDict) tuples for obj."""
         try:
             text = obj.queryText()
         except:
             return []
 
-        msg = "INFO: Getting all text attributes for %s" % obj
+        if endOffset == -1:
+            endOffset = text.characterCount
+
+        msg = "INFO: Getting text attributes for %s (chars: %i-%i)" % (obj, startOffset, endOffset)
         debug.println(debug.LEVEL_INFO, msg, True)
 
         rv = []
-        offset = 0
-        while offset < text.characterCount:
+        offset = startOffset
+        while offset < endOffset:
             attrList, start, end = text.getAttributeRun(offset)
             msg = "INFO: Attributes at %i: %s (%i-%i)" % (offset, attrList, start, end)
             debug.println(debug.LEVEL_INFO, msg, True)
@@ -3167,8 +3170,6 @@ class Utilities:
             rv.append((max(start, offset), end, attrDict))
             offset = max(end, offset + 1)
 
-        msg = "INFO: Result: %s" % rv
-        debug.println(debug.LEVEL_INFO, msg, True)
         return rv
 
     def textAttributes(self, acc, offset=None, get_defaults=False):
@@ -3250,7 +3251,7 @@ class Utilities:
         """Returns a list of (start, end, string, language, dialect) tuples."""
 
         rv = []
-        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
+        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj, start, end)
         for startOffset, endOffset, language, dialect in allSubstrings:
             if start >= endOffset:
                 continue
@@ -3268,19 +3269,19 @@ class Utilities:
         the substring, language and dialect will be empty strings. Callers must
         do any preprocessing to avoid that condition."""
 
-        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
+        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj, start, end)
         for startOffset, endOffset, language, dialect in allSubstrings:
             if startOffset <= start and endOffset >= end:
                 return language, dialect
 
         return "", ""
 
-    def getLanguageAndDialectFromTextAttributes(self, obj):
+    def getLanguageAndDialectFromTextAttributes(self, obj, startOffset=0, endOffset=-1):
         """Returns a list of (start, end, language, dialect) tuples for obj
         based on what is exposed via text attributes."""
 
         rv = []
-        attributeSet = self.getAllTextAttributesForObject(obj)
+        attributeSet = self.getAllTextAttributesForObject(obj, startOffset, endOffset)
         lastLanguage = lastDialect = ""
         for (start, end, attrs) in attributeSet:
             language = attrs.get("language", "")
diff --git a/src/orca/scripts/web/script_utilities.py b/src/orca/scripts/web/script_utilities.py
index cff16fb7b..462e4b4dc 100644
--- a/src/orca/scripts/web/script_utilities.py
+++ b/src/orca/scripts/web/script_utilities.py
@@ -53,8 +53,6 @@ class Utilities(script_utilities.Utilities):
 
         self._objectAttributes = {}
         self._currentTextAttrs = {}
-        self._allTextAttrs = {}
-        self._languageAndDialects = {}
         self._caretContexts = {}
         self._priorContexts = {}
         self._contextPathsRolesAndNames = {}
@@ -154,8 +152,6 @@ class Utilities(script_utilities.Utilities):
     def clearCachedObjects(self):
         debug.println(debug.LEVEL_INFO, "WEB: cleaning up cached objects", True)
         self._objectAttributes = {}
-        self._allTextAttrs = {}
-        self._languageAndDialects = {}
         self._inDocumentContent = {}
         self._inTopLevelWebApp = {}
         self._isTextBlockElement = {}
@@ -226,8 +222,6 @@ class Utilities(script_utilities.Utilities):
         self._currentWordContents = None
         self._currentCharacterContents = None
         self._currentTextAttrs = {}
-        self._allTextAttrs = {}
-        self._languageAndDialects = {}
 
     def isDocument(self, obj):
         if not obj:
@@ -938,20 +932,6 @@ class Utilities(script_utilities.Utilities):
 
         return super().localizeTextAttribute(key, value)
 
-    def getAllTextAttributesForObject(self, obj):
-        """Returns a list of (start, end, attrsDict) tuples for obj."""
-
-        if not (obj and self.inDocumentContent(obj)):
-            return super().getAllTextAttributesForObject(obj)
-
-        rv = self._allTextAttrs.get(hash(obj))
-        if rv is not None:
-            return rv
-
-        rv = super().getAllTextAttributesForObject(obj)
-        self._allTextAttrs[hash(obj)] = rv
-        return rv
-
     def adjustContentsForLanguage(self, contents):
         rv = []
         for content in contents:
@@ -961,15 +941,8 @@ class Utilities(script_utilities.Utilities):
 
         return rv
 
-    def getLanguageAndDialectFromTextAttributes(self, obj):
-        if not self.inDocumentContent(obj):
-            return super().getLanguageAndDialectFromTextAttributes(obj)
-
-        rv = self._languageAndDialects.get(hash(obj))
-        if rv is not None:
-            return rv
-
-        rv = super().getLanguageAndDialectFromTextAttributes(obj)
+    def getLanguageAndDialectFromTextAttributes(self, obj, startOffset=0, endOffset=-1):
+        rv = super().getLanguageAndDialectFromTextAttributes(obj, startOffset, endOffset)
 
         # Embedded objects such as images and certain widgets won't implement the text interface
         # and thus won't expose text attributes. Therefore try to get the info from the parent.
@@ -978,7 +951,6 @@ class Utilities(script_utilities.Utilities):
             language, dialect = self.getLanguageAndDialectForSubstring(obj.parent, start, end)
             rv.append((0, 1, language, dialect))
 
-        self._languageAndDialects[hash(obj)] = rv
         return rv
 
     def findObjectInContents(self, obj, offset, contents, usingCache=False):
@@ -1742,9 +1714,7 @@ class Utilities(script_utilities.Utilities):
                 extents = self.getExtents(acc, start, end)
             except:
                 extents = "(exception)"
-            language, dialect = self.getLanguageAndDialectForSubstring(acc, start, end)
-            msg = "     %i. chars: %i-%i: '%s' extents=%s language='%s' dialect='%s'\n" % \
-                (i, start, end, string, extents, language, dialect)
+            msg = "     %i. chars: %i-%i: '%s' extents=%s\n" % (i, start, end, string, extents)
             msg += debug.getAccessibleDetails(debug.LEVEL_INFO, acc, indent)
             debug.println(debug.LEVEL_INFO, msg, True)
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]