[orca] speechdispatcherfactory: Do not add mark within numbers



commit c819842d8c621443a706990b837bdb74e04fdf69
Author: Samuel Thibault <samuel thibault ens-lyon org>
Date:   Sun Mar 14 00:52:36 2021 +0100

    speechdispatcherfactory: Do not add mark within numbers
    
    Some speech synthesis get their number reading wrong otherwise.
    
    Fixes #167.

 src/orca/speechdispatcherfactory.py | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)
---
diff --git a/src/orca/speechdispatcherfactory.py b/src/orca/speechdispatcherfactory.py
index c4a78414a..468c71790 100644
--- a/src/orca/speechdispatcherfactory.py
+++ b/src/orca/speechdispatcherfactory.py
@@ -372,6 +372,7 @@ class SpeechServer(speechserver.SpeechServer):
         # Note2: we assume that text mangling below leave U+E000 untouched
         last_begin = None
         last_end = None
+        is_numeric = None
         marks_offsets = []
         marks_endoffsets = []
         marked_text = ""
@@ -387,12 +388,33 @@ class SpeechServer(speechserver.SpeechServer):
                 # Word begin
                 marked_text += '\ue000'
                 last_begin = i
-
-            if c.isspace() and last_begin != None:
-                # Word end, add a mark
-                marks_offsets.append(last_begin)
-                marks_endoffsets.append(i)
-                last_begin = None
+                is_numeric = c.isnumeric()
+
+            elif c.isspace() and last_begin != None:
+                # Word end
+                if is_numeric:
+                    # We had a wholy numeric word, possibly next word is as well.
+                    # Skip to next word
+                    for j in range(i+1, len(text)):
+                        if not text[j].isspace():
+                            break
+                    else:
+                        is_numeric = False
+                    # Check next word
+                    while is_numeric and j < len(text) and not text[j].isspace():
+                        if not text[j].isnumeric():
+                            is_numeric = False
+                        j += 1
+
+                if not is_numeric:
+                    # add a mark
+                    marks_offsets.append(last_begin)
+                    marks_endoffsets.append(i)
+                    last_begin = None
+                    is_numeric = None
+
+            elif is_numeric and not c.isnumeric():
+                is_numeric = False
 
             marked_text += c
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]