Another improved Hebrew dots placement patch

From: dov imagic weizmann ac il
To: gtk-i18n-list gnome org
Subject: Another improved Hebrew dots placement patch
Date: Tue, 25 Sep 2001 23:09:18 IST
Here is my second patch for improved Hebrew vowel (nikud) placement,
implementing some more magic heuristics. I don't think there is 
much more that can be improved based only on the bounding boxes. 
If on the other hand it would be possible to access outlines or
pixmaps of the characters, then I could have a lot more fun.

Regards,
Dov

--- pango-0.19.org/modules/hebrew/hebrew-x.c	Wed Jul 18 13:14:53 2001
+++ pango-0.19/modules/hebrew/hebrew-x.c	Fri Sep 21 12:51:12 2001
@@ -56,15 +56,26 @@
 #define	__NS			2
 #define	__DA			3
 
-/* Unicode definitions ... */
+
+/* Unicode definitions needed in logics below... */
+#define UNI_BET 0x5d1
+#define UNI_DALED 0x5d3
+#define UNI_KAF 0x05DB
 #define UNI_VAV			0x5d5
+#define UNI_YOD 0x5d9
+#define UNI_RESH 0x5e8
 #define UNI_LAMED		0x5DC
 #define UNI_SHIN		0x5E9
 #define UNI_FINAL_PE		0x05E3
 #define UNI_PE			0x05E4
+#define UNI_TAV 0x5EA
 #define UNI_SHIN_DOT		0x5c1
 #define UNI_SIN_DOT		0x5c2
 #define UNI_MAPIQ		0x5bc
+#define UNI_SHEVA 0x5b0
+#define UNI_QAMATS 0x5b8
+#define UNI_HOLAM 0x5b9
+#define UNI_QUBUTS 0x5bb
 
 #define is_char_class(wc, mask)	(char_class_table[ucs2iso8859_8 ((wc))] & (mask))
 #define	is_composible(cur_wc, nxt_wc)	(compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\
@@ -144,7 +155,6 @@
 
   /*00*/ __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
          __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
-		
   /*10*/ __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
          __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS,
   /*20*/ __NS, __NS, __ND, __NS, __NS, __NS, __NS, __NS,
@@ -212,6 +222,7 @@
 static const gint Unicode_shape_table[128] = {
   /* 00 */    0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
               0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
+
   /* cantillation marks followed by accents */
   /* 10 */    0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
               0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
@@ -435,6 +446,7 @@
   gint i;
   
   num_glyphs = get_glyphs_list(font_info, cluster, num_chrs, glyphs_list);
+
   for (i=0; i<num_glyphs; i++)
        add_glyph (font_info, glyphs, cluster_start, glyphs_list[i],
 	    		i == 0 ? FALSE : TRUE);
@@ -494,6 +506,27 @@
 		     cluster width. But how can I check if that is the
 		     case??
 		  */
+		  /* This is wild, but it does the job of differentiating
+		     between two M$ fonts... Base the decision on the
+		     aspect ratio of the vav...
+		  */
+		  if (base_ink_height > base_ink_width * 3.5)
+		    {
+		      int j;
+		      double space = 0.7;
+		      double kern = 0.5;
+
+		      for (j=0; j<i; j++)
+			{
+			  glyphs->glyphs[cluster_start_idx+j].geometry.x_offset
+			    += ink_rect.width*(1+space-kern);
+			}
+		      
+		      glyphs->glyphs[cluster_start_idx+i].geometry.width
+			+= ink_rect.width*(1+space-kern);
+		      glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+			-= ink_rect.width*(kern);
+		    }
 		}
 
 	      /* Dot over SHIN */
@@ -511,13 +544,27 @@
 		    = base_ink_x_offset -ink_rect.x;
 		}
 
-	      /* VOWEL DOT next to LAMED */
-	      else if (gl == UNI_SIN_DOT && base_char == UNI_LAMED)
+	      /* VOWEL DOT above to any other character than
+	         SHIN or VAV should stick out a bit to the left. */
+	      else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)
+		       && base_char != UNI_SHIN && base_char != UNI_VAV)
 		{  
 		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
 		    = base_ink_x_offset -ink_rect.x - 2*ink_rect.width;
 		}
 
+	      /* VOWELS under resh or vav are right aligned */
+	      else if ((base_char == UNI_VAV || base_char == UNI_RESH
+			|| base_char == UNI_YOD)
+		       && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) ||
+			   gl == UNI_QUBUTS)) 
+		{
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset + base_ink_width
+		    - ink_rect.x - ink_rect.width;
+		}
+
+	      
 	      /* MAPIQ in PE or FINAL PE */
 	      else if (gl == UNI_MAPIQ
 		       && (base_char == UNI_PE || base_char == UNI_FINAL_PE))
@@ -532,12 +579,34 @@
 		}
 
 	      /* VOWEL DOT next to any other character */
-	      else if (gl == UNI_SIN_DOT)
+	      else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM)
+		       && (base_char != UNI_VAV))
 		{   
 		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
 		    = base_ink_x_offset -ink_rect.x;
 		}
 
+	      /* Move nikud of taf a bit ... */
+	      else if (base_char == UNI_TAV)
+		{
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset - ink_rect.x
+		    + base_ink_width * 5/8 - ink_rect.width/2;
+		}
+
+	      /* Move center dot of characters with a right stem and no
+		 left stem. */
+	      else if (gl == UNI_MAPIQ &&
+		       (base_char == UNI_BET
+			|| base_char == UNI_DALED
+			|| base_char == UNI_KAF
+			))
+		{
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset - ink_rect.x
+		    + base_ink_width * 3/8 - ink_rect.width/2;
+		}
+	      
 	      /* Center by default */
 	      else
 		{  
@@ -563,7 +632,6 @@
   /* What is the maximum size of a Hebrew cluster? It is certainly
      bigger than two characters... */
   while (p < text + length && n_chars < MAX_CLUSTER_CHRS)  
-
     {
       gunichar current = g_utf8_get_char (p);
       
@@ -654,7 +722,7 @@
 
 static PangoCoverage *
 hebrew_engine_get_coverage (PangoFont *font,
-			   PangoLanguage *lang)
+			   const char *lang)
 {
   PangoCoverage *result = pango_coverage_new ();
   
@@ -702,6 +770,7 @@
 
 /* List the engines contained within this module
  */
+
 void 
 MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
 {
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]