Re: Industry Thai Cell-Clustering Rules



Hi K.Theppitak,

] From: Theppitak Karoonboonayanan <thep links nectec or th>
] 
] I would like to discuss some points before going on..
] 
] As you once stated about the cursor movements and editing behaviors within
] Thai text :-
] - It is natural to move cursor by one cell instead of by one character.
] - Text insertion is only allowed at the cell boundary.
] - Text deletion with Del key should delete the whole cell under the cursor,
]   not only the baseline character. This can prevent confusion when the
]   upper/lower vowels from the right cell jump to combine with the left cell.
] - However, text deletion with backspace can be done on character basis,
]   because there could be no confusion in that case.

Yes, this is correct with the I-Beam cursor type.

] 
] Therefore, the meaning of "cell" here is quite sensitive for the special
] case of SARA AM, which occupies two cells.

I think, it would be better if we use the term "cluster". This would be
proper for SaraAm case.

Here is the cluster case for SaraAm.

(1) Cons + SaraAm
(2) Cons + Tone + SaraAm

] 
] Moreover, the consistence between the cell counting and the physical
] appearance is quite important in some matrix-based display, like those in
] web browser's <TEXTAREA> and in terminal emulators.

Not quite sure what you are trying to say, please let me know more detail.

] 
] So, I think the cell retrieval routine you have provided could be used in
] other areas as well, and it could be shared for consistency.

The wtt cell-clustering rule routine will be able to share with other area
for consistency.

] But how could
] we make its meaning more firm?
] 

Are you suggesting the name of it ? or I'm not too sure if I understand
the question, please explain me more detail.

] I think the twisting of the cell meaning comes from the need to render
] SARA AM in two cells. So, how about letting two consecutive cells combine
] in shaping stage?

Please take a look at the change I made in thai.c as attached and, let me
know if the change I did is what you meant.

The change is in the #ifdef WTT_CLUSTERING.

] 
] If you agree, I will design the shaping mechanism based on this concept.

I agree and, please let me know any comment or feel free to modify any
change I attached.

Thanks a lot.

Chookij V.


] 
] Regards,
] -Theppitak.
] 
/* pANGO
 * thai.c:
 *
 * Copyright (C) 1999 Red Hat Software
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include <iconv.h>

#include <glib.h>
#include "pango.h"
#include "pangox.h"
#include <fribidi/fribidi.h>

#ifdef	WTT_CLUSTERING
#define	MAX_CLUSTER_CHRS	256
#define	MAX_GLYPHS		256

/* Define TACTIS character classes */
#define	CTRL			0
#define	NON			1
#define	CONS			2
#define	LV			3
#define	FV1			4
#define	FV2			5
#define	FV3			6
#define	BV1			7
#define	BV2			8
#define	BD			9
#define	TONE			10
#define	AD1			11
#define	AD2			12
#define	AD3			13
#define	AV1			14
#define	AV2			15
#define	AV3			16

#define	NoTailCons		_NC
#define	UpTailCons		_UC
#define	BotTailCons		_BC
#define	SpltTailCons		_SC
#define	Cons			_NC|_UC|_BC|_SC
#define	SaraAm			_AM
#define	Tone			_TN
#define	_ND			0
#define	_NC			1
#define	_UC			(1<<1)
#define	_BC			(1<<2)
#define	_SC			(1<<3)
#define	_AV			(1<<4)
#define	_BV			(1<<5)
#define	_TN			(1<<6)
#define	_AD			(1<<7)
#define	_BD			(1<<8)
#define	_AM			(1<<9)

#define ChrType(InpChr)		_TACchclass[(unsigned int)(InpChr)]
#define	IsSaraAm(wc)		(wc == 0x0E33 ? TRUE : FALSE)
#define	IsChrType(wc, mask)	(_ChrTypeTbl[(unsigned int)((wc) - 0xE00 + 0xA0)] & (mask))
#endif


/* We handle the range U+0e01 to U+0e5b exactly
 */
static PangoEngineRange thai_ranges[] = {
  { 0x0e01, 0x0e5b, "*" },  /* Thai */
};

static PangoEngineInfo script_engines[] = {
  {
    "ThaiScriptEngineLang",
    PANGO_ENGINE_TYPE_LANG,
    PANGO_RENDER_TYPE_NONE,
    thai_ranges, G_N_ELEMENTS(thai_ranges)
  },
  {
    "ThaiScriptEngineX",
    PANGO_ENGINE_TYPE_SHAPE,
    PANGO_RENDER_TYPE_X,
    thai_ranges, G_N_ELEMENTS(thai_ranges)
  }
};

/*
 * Language script engine
 */

static void 
thai_engine_break (const char     *text,
		    gint            len,
		    PangoAnalysis  *analysis,
		    PangoLogAttr   *attrs)
{
}

static PangoEngine *
thai_engine_lang_new ()
{
  PangoEngineLang *result;
  
  result = g_new (PangoEngineLang, 1);

  result->engine.id = "ThaiScriptEngine";
  result->engine.type = PANGO_ENGINE_TYPE_LANG;
  result->engine.length = sizeof (result);
  result->script_break = thai_engine_break;

  return (PangoEngine *)result;
}

/*
 * X window system script engine portion
 */

typedef struct _ThaiFontInfo ThaiFontInfo;

/* The type of encoding that we will use
 */
typedef enum {
  THAI_FONT_NONE,
  THAI_FONT_XTIS,
  THAI_FONT_TIS,
#ifdef	WTT_CLUSTERING
  THAI_FONT_TIS_2,
#endif
  THAI_FONT_ISO10646
} ThaiFontType;

struct _ThaiFontInfo
{
  PangoFont   *font;
  ThaiFontType type;
  PangoXSubfont subfont;
};

/* All combining marks for Thai fall in the range U+0E30-U+0E50,
 * so we confine our data tables to that range, and use
 * default values for characters outside those ranges.
 */

/* Map from code point to group used for rendering with XTIS fonts
 * (0 == base character)
 */
static const char groups[32] = {
  0, 1, 0, 0, 1, 1, 1, 1,
  1, 1, 1, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 2,
  2, 2, 2, 2, 2, 2, 1, 0
};

/* Map from code point to index within group 1
 * (0 == no combining mark from group 1)
 */   
static const char group1_map[32] = {
  0, 1, 0, 0, 2, 3, 4, 5,
  6, 7, 8, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0
};

/* Map from code point to index within group 2
 * (0 == no combining mark from group 2)
 */   
static const char group2_map[32] = {
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 1,
  2, 3, 4, 5, 6, 7, 1, 0
};

#ifdef	WTT_CLUSTERING

static int _ChrTypeTbl[256] = {
	/*	   0,   1,   2,   3,   4,   5,   6,   7,
		   8,   9,   A,   B,   C,   D,   E,   F  */

	/*00*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*10*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*20*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*30*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*40*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*50*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*60*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*70*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*80*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
	/*90*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		
	/*A0*/	 _ND, _NC, _NC, _NC, _NC, _NC, _NC, _NC,
		 _NC, _NC, _NC, _NC, _NC, _SC, _BC, _BC,
	/*B0*/	 _SC, _NC, _NC, _NC, _NC, _NC, _NC, _NC,
		 _NC, _NC, _NC, _UC, _NC, _UC, _NC, _UC,
	/*C0*/	 _NC, _NC, _NC, _NC, _ND, _NC, _ND, _NC,
		 _NC, _NC, _NC, _NC, _NC, _NC, _NC, _ND,
	/*D0*/	 _ND, _AV, _ND, _AM, _AV, _AV, _AV, _AV,
		 _BV, _BV, _BD, _ND, _ND, _ND, _ND, _ND,
	/*E0*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _AD,
		 _TN, _TN, _TN, _TN, _AD, _AD, _AD, _ND,
	/*F0*/	 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
		 _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND
};

static int _TACchclass[256] = {
	/*	   0,   1,   2,   3,   4,   5,   6,   7,
		   8,   9,   A,   B,   C,   D,   E,   F  */

	/*00*/	CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
		CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
	/*10*/	CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
		CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
	/*20*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON, NON,
	/*30*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON, NON,
	/*40*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON, NON,
	/*50*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON, NON,
	/*60*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON, NON,
	/*70*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON,CTRL,
	/*80*/	CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
		CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
	/*90*/	CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
		CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
	/*A0*/	 NON,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
		CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
	/*B0*/	CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
		CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
	/*C0*/	CONS,CONS,CONS,CONS, FV3,CONS, FV3,CONS,
		CONS,CONS,CONS,CONS,CONS,CONS,CONS, NON,
	/*D0*/	 FV1, AV2, FV1, FV1, AV1, AV3, AV2, AV3,
		 BV1, BV2,  BD, NON, NON, NON, NON, NON,
	/*E0*/	  LV,  LV,  LV,  LV,  LV, FV2, NON, AD2,
		TONE,TONE,TONE,TONE, AD1, AD1, AD3, NON,
	/*F0*/	 NON, NON, NON, NON, NON, NON, NON, NON,
		 NON, NON, NON, NON, NON, NON, NON,CTRL
};

/* Table for Thai Cell Manipulation */
static char _TAC_celltype_inputcheck[17][17] = {
	/* Cn */ /*	 0,  1,  2,  3,  4,  5,  6,  7,
			 8,  9,  A,  B,  C,  D,  E,  F	    */
	/* Cn-1 00 */	'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 10 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 20 */	'X', 'A', 'A', 'A', 'A', 'S', 'A', 'C',
			'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
	     /*	30 */	'X', 'S', 'A', 'S', 'S', 'S', 'S', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 40 */	'X', 'S', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 50 */	'X', 'A', 'A', 'A', 'A', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 60 */	'X', 'A', 'A', 'A', 'S', 'A', 'S', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 70 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R',
	     /* 80 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* 90 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* A0 */	'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* B0 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* C0 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* D0 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
	     /* E0 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R',
	     /* F0 */	'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R',
			'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
			'R', 'R', 'C', 'R', 'C', 'R', 'R', 'R', 'R'
};

static const gint ShiftDown_TONE_AD[8] = {
  0xE7, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0xED, 0xEE
};

static const gint ShiftDownLeft_TONE_AD[8] = {
  0xE7, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x99, 0xEE
};

static const gint ShiftLeft_TONE_AD[8] = {
  0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0x99, 0xEE
};

static const gint ShiftLeft_AV[7] = {
  0x98, 0x00, 0x00, 0x81, 0x82, 0x83, 0x84
};

static const gint ShiftDown_BV_BD[3] = {
  0xFC, 0xFD, 0xFE
};

static const gint TailCutCons[4] = {
  0x90, 0x00, 0x00, 0x80
};

#endif

/* Returns a structure with information we will use to rendering given the
 * #PangoFont. This is computed once per font and cached for later retrieval.
 */
static ThaiFontInfo *
get_font_info (PangoFont *font)
{
  static const char *charsets[] = {
    "xtis620.2529-1",
    "xtis-0",
    "tis620.2533-1",
    "tis620.2529-1",
    "iso8859-11",
#ifdef	WTT_CLUSTERING
    "tis620-2",
#endif
    "iso10646-1",
  };

  static const int charset_types[] = {
    THAI_FONT_XTIS,
    THAI_FONT_XTIS,
    THAI_FONT_TIS,
    THAI_FONT_TIS,
    THAI_FONT_TIS,
#ifdef	WTT_CLUSTERING
    THAI_FONT_TIS_2,
#endif
    THAI_FONT_ISO10646
  };
  
  ThaiFontInfo *font_info;
  GQuark info_id = g_quark_from_string ("thai-font-info");
  
  font_info = g_object_get_qdata (G_OBJECT (font), info_id);

  if (!font_info)
    {
      /* No cached information not found, so we need to compute it
       * from scratch
       */
      PangoXSubfont *subfont_ids;
      int *subfont_charsets;
      int n_subfonts, i;

      font_info = g_new (ThaiFontInfo, 1);
      font_info->font = font;
      font_info->type = THAI_FONT_NONE;
      
      g_object_set_qdata_full (G_OBJECT (font), info_id, font_info, (GDestroyNotify)g_free);
      
      n_subfonts = pango_x_list_subfonts (font, (char **)charsets, G_N_ELEMENTS (charsets),
					  &subfont_ids, &subfont_charsets);

      for (i=0; i < n_subfonts; i++)
	{
	  ThaiFontType font_type = charset_types[subfont_charsets[i]];
	  
	  if (font_type != THAI_FONT_ISO10646 ||
	      pango_x_has_glyph (font, PANGO_X_MAKE_GLYPH (subfont_ids[i], 0xe01)))
	    {
	      font_info->type = font_type;
	      font_info->subfont = subfont_ids[i];
	      
	      break;
	    }
	}

      g_free (subfont_ids);
      g_free (subfont_charsets);
    }

  return font_info;
}

static void
add_glyph (ThaiFontInfo     *font_info, 
	   PangoGlyphString *glyphs, 
	   int               cluster_start, 
	   PangoGlyph        glyph,
	   gboolean          combining)
{
  PangoRectangle ink_rect, logical_rect;
  int index = glyphs->num_glyphs;

  pango_glyph_string_set_size (glyphs, index + 1);
  
  glyphs->glyphs[index].glyph = glyph;
  glyphs->glyphs[index].attr.is_cluster_start = combining ? 0 : 1;
  
  glyphs->log_clusters[index] = cluster_start;

  pango_font_get_glyph_extents (font_info->font,
				glyphs->glyphs[index].glyph, &ink_rect, &logical_rect);

  if (combining)
    {
      glyphs->glyphs[index].geometry.width = 
	MAX (logical_rect.width, glyphs->glyphs[index - 1].geometry.width);
      glyphs->glyphs[index - 1].geometry.width = 0;
      glyphs->glyphs[index].geometry.x_offset = 0;
    }
  else
    {
      glyphs->glyphs[index].geometry.x_offset = 0;
      glyphs->glyphs[index].geometry.width = logical_rect.width;
    }
  
  glyphs->glyphs[index].geometry.y_offset = 0;
}

#ifdef	WTT_CLUSTERING
static gint
get_glyphs_list (ThaiFontInfo *font_info,
		 gunichar *cluster,
		 gint num_chrs,
		 gint **glyph_lists)
{
  int i;

  if ((cluster == NULL) || (num_chrs == 0))
      return 0;

  switch (font_info->type)
    {
    case THAI_FONT_NONE:
      for (i=0; i < num_chrs; i++)
	  (*glyph_lists)[i] = pango_x_get_unknown_glyph (font_info->font);
      return num_chrs;

    case THAI_FONT_XTIS:
      for (i=0; i < num_chrs; i++)
	  (*glyph_lists)[i] =
	      PANGO_X_MAKE_GLYPH (font_info->subfont, 0x100 * (cluster[i] - 0xe00 + 0x20) + 0x30);
      return num_chrs;
      
    case THAI_FONT_TIS:
      for (i=0; i < num_chrs; i++)
          (*glyph_lists)[i] =
              PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[i] - 0xe00 + 0xA0);
      return num_chrs;
      
    case THAI_FONT_TIS_2: /* Microsoft Extension */
      switch (num_chrs) {
        case 1:
          if (IsChrType(cluster[0], _BV|_BD|_AV|_AD|_TN)) {
              (*glyph_lists)[0] =
              		PANGO_X_MAKE_GLYPH (font_info->subfont, 0x7F);
              (*glyph_lists)[1] =
              		PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
              return 2;
          } else {
              (*glyph_lists)[0] =
              		PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
              return 1;
          }
          break;
        
        case 2:
          if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
              IsChrType(cluster[1], SaraAm)) {
             (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
             (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xED);
             (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
             return 3;
          } else if (IsChrType(cluster[0], UpTailCons) &&
          	     IsChrType(cluster[1], SaraAm)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
                    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x99);
                    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
                    return 3;
          } else if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
          	     IsChrType(cluster[1], _AV)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[1] - 0xE00 + 0xA0);
		    return 2;
          } else if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
          	     IsChrType(cluster[1], _AD|_TN)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
		    return 2;
	  } else if (IsChrType(cluster[0], UpTailCons) &&
          	     IsChrType(cluster[1], _AV)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftLeft_AV[cluster[1] - 0xE00 - 0xD1]);
		    return 2;
          } else if (IsChrType(cluster[0], UpTailCons) &&
          	     IsChrType(cluster[1], _AD|_TN)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDownLeft_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
		    return 2;
	  } else if (IsChrType(cluster[0], NoTailCons|UpTailCons) &&
          	     IsChrType(cluster[1], _BV|_BD)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[1] - 0xE00 + 0xA0);
		    return 2;
          } else if (IsChrType(cluster[0], BotTailCons) &&
          	     IsChrType(cluster[1], _BV|_BD)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_BV_BD[cluster[1] - 0xE00 - 0xD8]);
		    return 2;
	  } else if (IsChrType(cluster[0], SpltTailCons) &&
          	     IsChrType(cluster[1], _BV|_BD)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			TailCutCons[cluster[0] - 0xE00 - 0xAD]);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    						cluster[1] - 0xE00 + 0xA0);
		    return 2;
	  } else {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x7F);
		    (*glyph_lists)[1] =
		    	PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
 		    (*glyph_lists)[2] =
		    	PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[2] - 0xE00 + 0xA0);
		    return 3;
	  }
          break;
          
        case 3:
          if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
              IsChrType(cluster[1], _TN) &&
              IsChrType(cluster[2], SaraAm)) {
             (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
             (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
             (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xED);
             (*glyph_lists)[3] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
             return 4;
          } else if (IsChrType(cluster[0], UpTailCons) &&
		     IsChrType(cluster[1], _TN) &&
		     IsChrType(cluster[2], SaraAm)) {
		    (*glyph_lists)[0] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftLeft_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x99);
		    (*glyph_lists)[3] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
		    return 4;
	  } else if (IsChrType(cluster[0], UpTailCons) &&
		     IsChrType(cluster[1], _AV) &&
		     IsChrType(cluster[2], _AD|_TN)) {
		    (*glyph_lists)[0] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
					ShiftLeft_AV[cluster[1] - 0xE00 - 0xD1]);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftLeft_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
		    return 3;
	  } else if (IsChrType(cluster[0], UpTailCons) &&
		     IsChrType(cluster[1], _BV) &&
		     IsChrType(cluster[2], _AD|_TN)) {
		    (*glyph_lists)[0] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] =
		        PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDownLeft_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
		    return 3;
	  } else if (IsChrType(cluster[0], NoTailCons) &&
		     IsChrType(cluster[1], _BV) &&
		     IsChrType(cluster[2], _AD|_TN)) {
		    (*glyph_lists)[0] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] =
		        PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
		    return 3;
	  } else if (IsChrType(cluster[0], SpltTailCons) &&
		     IsChrType(cluster[1], _BV) &&
		     IsChrType(cluster[2], _AD|_TN)) {
		    (*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			TailCutCons[cluster[0] - 0xE00 - 0xAD]);
		    (*glyph_lists)[1] =
		        PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
		    return 3;
	  } else if (IsChrType(cluster[0], BotTailCons) &&
		     IsChrType(cluster[1], _BV) &&
		     IsChrType(cluster[2], _AD|_TN)) {
		    (*glyph_lists)[0] =
		        PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_BV_BD[cluster[1] - 0xE00 - 0xD8]);
		    (*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
		    			ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
		    return 3;
	  } else {
		    (*glyph_lists)[0] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
		    (*glyph_lists)[1] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
		    (*glyph_lists)[2] =
			PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[2] - 0xE00 + 0xA0);
		    return 3;
	  }
          break;
      }
      break;
      
    case THAI_FONT_ISO10646:
      for (i=0; i < num_chrs; i++)
          (*glyph_lists)[i] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[i]);
      return num_chrs;
    }
  return 0;			/* Quiet GCC */
}
#endif


/* Return the glyph code within the font for the given Unicode Thai 
 * code pointer
 */
get_glyph (ThaiFontInfo *font_info, gunichar wc)
{
  switch (font_info->type)
    {
    case THAI_FONT_NONE:
      return pango_x_get_unknown_glyph (font_info->font);
    case THAI_FONT_XTIS:
      return PANGO_X_MAKE_GLYPH (font_info->subfont, 0x100 * (wc - 0xe00 + 0x20) + 0x30);
    case THAI_FONT_TIS:
      return PANGO_X_MAKE_GLYPH (font_info->subfont, wc - 0xe00 + 0xA0);
    case THAI_FONT_ISO10646:
      return PANGO_X_MAKE_GLYPH (font_info->subfont, wc);
    }
  return 0;			/* Quiet GCC */
}

#ifdef	WTT_CLUSTERING
static void
add_cluster (ThaiFontInfo *font_info,
	     PangoGlyphString *glyphs,
	     int cluster_start,
	     gunichar *cluster,
	     gint num_chrs)
	     
{
  int i;
  gint glyphs_list[MAX_GLYPHS];
  gint num_glyphs;
  
  num_glyphs = get_glyphs_list(font_info, cluster, num_chrs, &glyphs_list);
  for (i=0; i<num_glyphs; i++)
       add_glyph (font_info, glyphs, cluster_start, glyphs_list[i],
	    		i == 0 ? FALSE : TRUE);
}
#else
static void
add_cluster (ThaiFontInfo *font_info,
	     PangoGlyphString *glyphs,
	     int cluster_start,
	     gunichar base, 
	     gunichar group1,
	     gunichar group2)
{
  /* If we are rendering with an XTIS font, we try to find a precomposed
   * glyph for the cluster.
   */
  if (font_info->type == THAI_FONT_XTIS)
    {
      PangoGlyph glyph;
      int xtis_index = 0x100 * (base - 0xe00 + 0x20) + 0x30;
      if (group1)
	xtis_index +=8 * group1_map[group1 - 0xe30];
      if (group2)
	xtis_index += group2_map[group2 - 0xe30];
      
      glyph = PANGO_X_MAKE_GLYPH (font_info->subfont, xtis_index);

      if (pango_x_has_glyph (font_info->font, glyph))
	{
	  add_glyph (font_info, glyphs, cluster_start, glyph, FALSE);
	  return;
	}
    }

  /* If that failed, then we add compose the cluster out of three 
   * individual glyphs
   */
  add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, base), FALSE);
  if (group1)
    add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, group1), TRUE);
  if (group2)
    add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, group2), TRUE);
}
#endif


#ifdef	WTT_CLUSTERING
gboolean
IsWttCombinedRule(gint cur_wc, gint nxt_wc)
{
  switch (_TAC_celltype_inputcheck[ChrType(cur_wc - 0xE00 + 0xA0)]
				  [ChrType(nxt_wc - 0xE00 + 0xA0)])
  {
     case 'A':
     case 'S':
     case 'R':
     case 'X':
       return FALSE;

     case 'C':
       return TRUE;
  }
}

static char *
g_utf8_get_next_cluster(const char	*text,
			gunichar	**cluster,
			gint		*num_chrs)
{  
  int nChrs = 1;
  gboolean ClusterNotFound = TRUE;

  if ((text == NULL) ||
      ((text) && (*text == '\0')) ) {
      if (*num_chrs)
          *num_chrs = 0;
      if (*cluster)
	  *cluster = (gunichar *)NULL;
      return (char *)NULL;
  }
  
  if (*cluster)
     (*cluster)[0] = g_utf8_get_char(text);
  text = g_utf8_next_char(text);
  do {
    if (*cluster)
       (*cluster)[nChrs] = g_utf8_get_char(text);
    if (IsWttCombinedRule((*cluster)[nChrs - 1], (*cluster)[nChrs])) {
        nChrs++;
        if (nChrs == 3)
	    ClusterNotFound = FALSE;
    } else {
	if ((nChrs == 1) &&
	    IsChrType((*cluster)[nChrs - 1], Cons) &&
	    IsChrType((*cluster)[nChrs], SaraAm) ) {
	    nChrs = 2;
	    text = g_utf8_next_char(text);
	} if ((nChrs == 2) &&
	      IsChrType((*cluster)[nChrs - 2], Cons) &&
	      IsChrType((*cluster)[nChrs - 1], Tone) ) {
	    nChrs = 3;
	    text = g_utf8_next_char(text);
	}
	ClusterNotFound = FALSE;
    }
    text = g_utf8_next_char(text);
  } while (ClusterNotFound);

  if (*num_chrs)
      *num_chrs = nChrs;
  if (*cluster)
     (*cluster)[nChrs] = 0;
  return text;
}
#endif


static void 
thai_engine_shape (PangoFont        *font,
		   const char       *text,
		   gint              length,
		   PangoAnalysis    *analysis,
		   PangoGlyphString *glyphs)
{
  ThaiFontInfo *font_info;
  const char *p;

  gunichar base = 0;
  gunichar group1 = 0;
  gunichar group2 = 0;
  int cluster_start = 0;

  pango_glyph_string_set_size (glyphs, 0);

  font_info = get_font_info (font);

#ifdef	WTT_CLUSTERING
  p = text;
  while (p < text + length)
    {
      gunichar cluster[MAX_CLUSTER_CHRS];
      gint num_chrs;

	p = g_utf8_get_next_cluster(p, &cluster, &num_chrs);
	add_cluster(font_info, glyphs, p - text, cluster, num_chrs);
    }
#else
  p = text;
  while (p < text + length)
    {
      int group;
      gunichar wc;

      wc = g_utf8_get_char (p);

      if (wc >= 0xe30 && wc < 0xe50)
	group = groups[wc - 0xe30];
      else
	group = 0;

      switch (group)
	{
	case 0:
	  if (base)
	    {
	      add_cluster (font_info, glyphs, cluster_start, base, group1, group2);
	      group1 = 0;
	      group2 = 0;
	    }
	  cluster_start = p - text;
	  base = wc;
	  break;
	case 1:
	  group1 = wc;
	  break;
	case 2:
	  group2 = wc;
	  break;
	}
      
      p = g_utf8_next_char (p);
    }

  if (base)
    add_cluster (font_info, glyphs, cluster_start, base, group1, group2);
#endif

}

static PangoCoverage *
thai_engine_get_coverage (PangoFont  *font,
			   const char *lang)
{
  PangoCoverage *result = pango_coverage_new ();
  
  ThaiFontInfo *font_info = get_font_info (font);
  
  if (font_info->type != THAI_FONT_NONE)
    {
      gunichar wc;
      
      for (wc = 0xe01; wc <= 0xe3a; wc++)
	pango_coverage_set (result, wc, PANGO_COVERAGE_EXACT);
      for (wc = 0xe3f; wc <= 0xe5b; wc++)
	pango_coverage_set (result, wc, PANGO_COVERAGE_EXACT);
    }

  return result;
}

static PangoEngine *
thai_engine_x_new ()
{
  PangoEngineShape *result;
  
  result = g_new (PangoEngineShape, 1);

  result->engine.id = "ThaiScriptEngine";
#ifdef	WTT_CLUSTERING
  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
#else
  result->engine.type = PANGO_ENGINE_TYPE_LANG;
#endif
  result->engine.length = sizeof (result);
  result->script_shape = thai_engine_shape;
  result->get_coverage = thai_engine_get_coverage;

  return (PangoEngine *)result;
}

/* The following three functions provide the public module API for
 * Pango. If we are compiling it is a module, then we name the
 * entry points script_engine_list, etc. But if we are compiling
 * it for inclusion directly in Pango, then we need them to
 * to have distinct names for this module, so we prepend
 * _pango_thai_
 */
#ifdef MODULE_PREFIX
#define MODULE_ENTRY(func) _pango_thai_##func
#else
#define MODULE_ENTRY(func) func
#endif

/* List the engines contained within this module
 */
void 
MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
{
  *engines = script_engines;
  *n_engines = G_N_ELEMENTS (script_engines);
}

/* Load a particular engine given the ID for the engine
 */
PangoEngine *
MODULE_ENTRY(script_engine_load) (const char *id)
{
  if (!strcmp (id, "ThaiScriptEngineLang")) {
    return thai_engine_lang_new ();
  } else if (!strcmp (id, "ThaiScriptEngineX")) {
    return thai_engine_x_new ();
  } else {
    return NULL;
  }
}

void 
MODULE_ENTRY(script_engine_unload) (PangoEngine *engine)
{
}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]