Re: Industry Thai Cell-Clustering Rules
- From: Chookij Vanatham <chookij vanatham eng sun com>
- To: gtk-i18n-list gnome org, thep links nectec or th
- Subject: Re: Industry Thai Cell-Clustering Rules
- Date: Tue, 07 Nov 2000 17:44:03 -0800 (PST)
Hi K.Theppitak,
] From: Theppitak Karoonboonayanan <thep links nectec or th>
]
] I would like to discuss some points before going on..
]
] As you once stated about the cursor movements and editing behaviors within
] Thai text :-
] - It is natural to move cursor by one cell instead of by one character.
] - Text insertion is only allowed at the cell boundary.
] - Text deletion with Del key should delete the whole cell under the cursor,
] not only the baseline character. This can prevent confusion when the
] upper/lower vowels from the right cell jump to combine with the left cell.
] - However, text deletion with backspace can be done on character basis,
] because there could be no confusion in that case.
Yes, this is correct with the I-Beam cursor type.
]
] Therefore, the meaning of "cell" here is quite sensitive for the special
] case of SARA AM, which occupies two cells.
I think, it would be better if we use the term "cluster". This would be
proper for SaraAm case.
Here is the cluster case for SaraAm.
(1) Cons + SaraAm
(2) Cons + Tone + SaraAm
]
] Moreover, the consistence between the cell counting and the physical
] appearance is quite important in some matrix-based display, like those in
] web browser's <TEXTAREA> and in terminal emulators.
Not quite sure what you are trying to say, please let me know more detail.
]
] So, I think the cell retrieval routine you have provided could be used in
] other areas as well, and it could be shared for consistency.
The wtt cell-clustering rule routine will be able to share with other area
for consistency.
] But how could
] we make its meaning more firm?
]
Are you suggesting the name of it ? or I'm not too sure if I understand
the question, please explain me more detail.
] I think the twisting of the cell meaning comes from the need to render
] SARA AM in two cells. So, how about letting two consecutive cells combine
] in shaping stage?
Please take a look at the change I made in thai.c as attached and, let me
know if the change I did is what you meant.
The change is in the #ifdef WTT_CLUSTERING.
]
] If you agree, I will design the shaping mechanism based on this concept.
I agree and, please let me know any comment or feel free to modify any
change I attached.
Thanks a lot.
Chookij V.
]
] Regards,
] -Theppitak.
]
/* pANGO
* thai.c:
*
* Copyright (C) 1999 Red Hat Software
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <iconv.h>
#include <glib.h>
#include "pango.h"
#include "pangox.h"
#include <fribidi/fribidi.h>
#ifdef WTT_CLUSTERING
#define MAX_CLUSTER_CHRS 256
#define MAX_GLYPHS 256
/* Define TACTIS character classes */
#define CTRL 0
#define NON 1
#define CONS 2
#define LV 3
#define FV1 4
#define FV2 5
#define FV3 6
#define BV1 7
#define BV2 8
#define BD 9
#define TONE 10
#define AD1 11
#define AD2 12
#define AD3 13
#define AV1 14
#define AV2 15
#define AV3 16
#define NoTailCons _NC
#define UpTailCons _UC
#define BotTailCons _BC
#define SpltTailCons _SC
#define Cons _NC|_UC|_BC|_SC
#define SaraAm _AM
#define Tone _TN
#define _ND 0
#define _NC 1
#define _UC (1<<1)
#define _BC (1<<2)
#define _SC (1<<3)
#define _AV (1<<4)
#define _BV (1<<5)
#define _TN (1<<6)
#define _AD (1<<7)
#define _BD (1<<8)
#define _AM (1<<9)
#define ChrType(InpChr) _TACchclass[(unsigned int)(InpChr)]
#define IsSaraAm(wc) (wc == 0x0E33 ? TRUE : FALSE)
#define IsChrType(wc, mask) (_ChrTypeTbl[(unsigned int)((wc) - 0xE00 + 0xA0)] & (mask))
#endif
/* We handle the range U+0e01 to U+0e5b exactly
*/
static PangoEngineRange thai_ranges[] = {
{ 0x0e01, 0x0e5b, "*" }, /* Thai */
};
static PangoEngineInfo script_engines[] = {
{
"ThaiScriptEngineLang",
PANGO_ENGINE_TYPE_LANG,
PANGO_RENDER_TYPE_NONE,
thai_ranges, G_N_ELEMENTS(thai_ranges)
},
{
"ThaiScriptEngineX",
PANGO_ENGINE_TYPE_SHAPE,
PANGO_RENDER_TYPE_X,
thai_ranges, G_N_ELEMENTS(thai_ranges)
}
};
/*
* Language script engine
*/
static void
thai_engine_break (const char *text,
gint len,
PangoAnalysis *analysis,
PangoLogAttr *attrs)
{
}
static PangoEngine *
thai_engine_lang_new ()
{
PangoEngineLang *result;
result = g_new (PangoEngineLang, 1);
result->engine.id = "ThaiScriptEngine";
result->engine.type = PANGO_ENGINE_TYPE_LANG;
result->engine.length = sizeof (result);
result->script_break = thai_engine_break;
return (PangoEngine *)result;
}
/*
* X window system script engine portion
*/
typedef struct _ThaiFontInfo ThaiFontInfo;
/* The type of encoding that we will use
*/
typedef enum {
THAI_FONT_NONE,
THAI_FONT_XTIS,
THAI_FONT_TIS,
#ifdef WTT_CLUSTERING
THAI_FONT_TIS_2,
#endif
THAI_FONT_ISO10646
} ThaiFontType;
struct _ThaiFontInfo
{
PangoFont *font;
ThaiFontType type;
PangoXSubfont subfont;
};
/* All combining marks for Thai fall in the range U+0E30-U+0E50,
* so we confine our data tables to that range, and use
* default values for characters outside those ranges.
*/
/* Map from code point to group used for rendering with XTIS fonts
* (0 == base character)
*/
static const char groups[32] = {
0, 1, 0, 0, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2,
2, 2, 2, 2, 2, 2, 1, 0
};
/* Map from code point to index within group 1
* (0 == no combining mark from group 1)
*/
static const char group1_map[32] = {
0, 1, 0, 0, 2, 3, 4, 5,
6, 7, 8, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
/* Map from code point to index within group 2
* (0 == no combining mark from group 2)
*/
static const char group2_map[32] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
2, 3, 4, 5, 6, 7, 1, 0
};
#ifdef WTT_CLUSTERING
static int _ChrTypeTbl[256] = {
/* 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, A, B, C, D, E, F */
/*00*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*10*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*20*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*30*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*40*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*50*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*60*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*70*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*80*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*90*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
/*A0*/ _ND, _NC, _NC, _NC, _NC, _NC, _NC, _NC,
_NC, _NC, _NC, _NC, _NC, _SC, _BC, _BC,
/*B0*/ _SC, _NC, _NC, _NC, _NC, _NC, _NC, _NC,
_NC, _NC, _NC, _UC, _NC, _UC, _NC, _UC,
/*C0*/ _NC, _NC, _NC, _NC, _ND, _NC, _ND, _NC,
_NC, _NC, _NC, _NC, _NC, _NC, _NC, _ND,
/*D0*/ _ND, _AV, _ND, _AM, _AV, _AV, _AV, _AV,
_BV, _BV, _BD, _ND, _ND, _ND, _ND, _ND,
/*E0*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _AD,
_TN, _TN, _TN, _TN, _AD, _AD, _AD, _ND,
/*F0*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND,
_ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND
};
static int _TACchclass[256] = {
/* 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, A, B, C, D, E, F */
/*00*/ CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
/*10*/ CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
/*20*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON, NON,
/*30*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON, NON,
/*40*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON, NON,
/*50*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON, NON,
/*60*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON, NON,
/*70*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON,CTRL,
/*80*/ CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
/*90*/ CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,CTRL,
/*A0*/ NON,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
/*B0*/ CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
CONS,CONS,CONS,CONS,CONS,CONS,CONS,CONS,
/*C0*/ CONS,CONS,CONS,CONS, FV3,CONS, FV3,CONS,
CONS,CONS,CONS,CONS,CONS,CONS,CONS, NON,
/*D0*/ FV1, AV2, FV1, FV1, AV1, AV3, AV2, AV3,
BV1, BV2, BD, NON, NON, NON, NON, NON,
/*E0*/ LV, LV, LV, LV, LV, FV2, NON, AD2,
TONE,TONE,TONE,TONE, AD1, AD1, AD3, NON,
/*F0*/ NON, NON, NON, NON, NON, NON, NON, NON,
NON, NON, NON, NON, NON, NON, NON,CTRL
};
/* Table for Thai Cell Manipulation */
static char _TAC_celltype_inputcheck[17][17] = {
/* Cn */ /* 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, A, B, C, D, E, F */
/* Cn-1 00 */ 'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 10 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 20 */ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'C',
'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
/* 30 */ 'X', 'S', 'A', 'S', 'S', 'S', 'S', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 40 */ 'X', 'S', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 50 */ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 60 */ 'X', 'A', 'A', 'A', 'S', 'A', 'S', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* 70 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R',
/* 80 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R',
/* 90 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* A0 */ 'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* B0 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* C0 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* D0 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
/* E0 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R',
/* F0 */ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R',
'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R',
'R', 'R', 'C', 'R', 'C', 'R', 'R', 'R', 'R'
};
static const gint ShiftDown_TONE_AD[8] = {
0xE7, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0xED, 0xEE
};
static const gint ShiftDownLeft_TONE_AD[8] = {
0xE7, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x99, 0xEE
};
static const gint ShiftLeft_TONE_AD[8] = {
0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0x99, 0xEE
};
static const gint ShiftLeft_AV[7] = {
0x98, 0x00, 0x00, 0x81, 0x82, 0x83, 0x84
};
static const gint ShiftDown_BV_BD[3] = {
0xFC, 0xFD, 0xFE
};
static const gint TailCutCons[4] = {
0x90, 0x00, 0x00, 0x80
};
#endif
/* Returns a structure with information we will use to rendering given the
* #PangoFont. This is computed once per font and cached for later retrieval.
*/
static ThaiFontInfo *
get_font_info (PangoFont *font)
{
static const char *charsets[] = {
"xtis620.2529-1",
"xtis-0",
"tis620.2533-1",
"tis620.2529-1",
"iso8859-11",
#ifdef WTT_CLUSTERING
"tis620-2",
#endif
"iso10646-1",
};
static const int charset_types[] = {
THAI_FONT_XTIS,
THAI_FONT_XTIS,
THAI_FONT_TIS,
THAI_FONT_TIS,
THAI_FONT_TIS,
#ifdef WTT_CLUSTERING
THAI_FONT_TIS_2,
#endif
THAI_FONT_ISO10646
};
ThaiFontInfo *font_info;
GQuark info_id = g_quark_from_string ("thai-font-info");
font_info = g_object_get_qdata (G_OBJECT (font), info_id);
if (!font_info)
{
/* No cached information not found, so we need to compute it
* from scratch
*/
PangoXSubfont *subfont_ids;
int *subfont_charsets;
int n_subfonts, i;
font_info = g_new (ThaiFontInfo, 1);
font_info->font = font;
font_info->type = THAI_FONT_NONE;
g_object_set_qdata_full (G_OBJECT (font), info_id, font_info, (GDestroyNotify)g_free);
n_subfonts = pango_x_list_subfonts (font, (char **)charsets, G_N_ELEMENTS (charsets),
&subfont_ids, &subfont_charsets);
for (i=0; i < n_subfonts; i++)
{
ThaiFontType font_type = charset_types[subfont_charsets[i]];
if (font_type != THAI_FONT_ISO10646 ||
pango_x_has_glyph (font, PANGO_X_MAKE_GLYPH (subfont_ids[i], 0xe01)))
{
font_info->type = font_type;
font_info->subfont = subfont_ids[i];
break;
}
}
g_free (subfont_ids);
g_free (subfont_charsets);
}
return font_info;
}
static void
add_glyph (ThaiFontInfo *font_info,
PangoGlyphString *glyphs,
int cluster_start,
PangoGlyph glyph,
gboolean combining)
{
PangoRectangle ink_rect, logical_rect;
int index = glyphs->num_glyphs;
pango_glyph_string_set_size (glyphs, index + 1);
glyphs->glyphs[index].glyph = glyph;
glyphs->glyphs[index].attr.is_cluster_start = combining ? 0 : 1;
glyphs->log_clusters[index] = cluster_start;
pango_font_get_glyph_extents (font_info->font,
glyphs->glyphs[index].glyph, &ink_rect, &logical_rect);
if (combining)
{
glyphs->glyphs[index].geometry.width =
MAX (logical_rect.width, glyphs->glyphs[index - 1].geometry.width);
glyphs->glyphs[index - 1].geometry.width = 0;
glyphs->glyphs[index].geometry.x_offset = 0;
}
else
{
glyphs->glyphs[index].geometry.x_offset = 0;
glyphs->glyphs[index].geometry.width = logical_rect.width;
}
glyphs->glyphs[index].geometry.y_offset = 0;
}
#ifdef WTT_CLUSTERING
static gint
get_glyphs_list (ThaiFontInfo *font_info,
gunichar *cluster,
gint num_chrs,
gint **glyph_lists)
{
int i;
if ((cluster == NULL) || (num_chrs == 0))
return 0;
switch (font_info->type)
{
case THAI_FONT_NONE:
for (i=0; i < num_chrs; i++)
(*glyph_lists)[i] = pango_x_get_unknown_glyph (font_info->font);
return num_chrs;
case THAI_FONT_XTIS:
for (i=0; i < num_chrs; i++)
(*glyph_lists)[i] =
PANGO_X_MAKE_GLYPH (font_info->subfont, 0x100 * (cluster[i] - 0xe00 + 0x20) + 0x30);
return num_chrs;
case THAI_FONT_TIS:
for (i=0; i < num_chrs; i++)
(*glyph_lists)[i] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[i] - 0xe00 + 0xA0);
return num_chrs;
case THAI_FONT_TIS_2: /* Microsoft Extension */
switch (num_chrs) {
case 1:
if (IsChrType(cluster[0], _BV|_BD|_AV|_AD|_TN)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, 0x7F);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
return 2;
} else {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
return 1;
}
break;
case 2:
if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
IsChrType(cluster[1], SaraAm)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xED);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
return 3;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], SaraAm)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x99);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
return 3;
} else if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
IsChrType(cluster[1], _AV)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[1] - 0xE00 + 0xA0);
return 2;
} else if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
IsChrType(cluster[1], _AD|_TN)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
return 2;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], _AV)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftLeft_AV[cluster[1] - 0xE00 - 0xD1]);
return 2;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], _AD|_TN)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDownLeft_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
return 2;
} else if (IsChrType(cluster[0], NoTailCons|UpTailCons) &&
IsChrType(cluster[1], _BV|_BD)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[1] - 0xE00 + 0xA0);
return 2;
} else if (IsChrType(cluster[0], BotTailCons) &&
IsChrType(cluster[1], _BV|_BD)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_BV_BD[cluster[1] - 0xE00 - 0xD8]);
return 2;
} else if (IsChrType(cluster[0], SpltTailCons) &&
IsChrType(cluster[1], _BV|_BD)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
TailCutCons[cluster[0] - 0xE00 - 0xAD]);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
cluster[1] - 0xE00 + 0xA0);
return 2;
} else {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x7F);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[2] - 0xE00 + 0xA0);
return 3;
}
break;
case 3:
if (IsChrType(cluster[0], NoTailCons|BotTailCons|SpltTailCons) &&
IsChrType(cluster[1], _TN) &&
IsChrType(cluster[2], SaraAm)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xED);
(*glyph_lists)[3] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
return 4;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], _TN) &&
IsChrType(cluster[2], SaraAm)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftLeft_TONE_AD[cluster[1] - 0xE00 - 0xE7]);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0x99);
(*glyph_lists)[3] = PANGO_X_MAKE_GLYPH (font_info->subfont, 0xD2);
return 4;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], _AV) &&
IsChrType(cluster[2], _AD|_TN)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftLeft_AV[cluster[1] - 0xE00 - 0xD1]);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftLeft_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
return 3;
} else if (IsChrType(cluster[0], UpTailCons) &&
IsChrType(cluster[1], _BV) &&
IsChrType(cluster[2], _AD|_TN)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDownLeft_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
return 3;
} else if (IsChrType(cluster[0], NoTailCons) &&
IsChrType(cluster[1], _BV) &&
IsChrType(cluster[2], _AD|_TN)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
return 3;
} else if (IsChrType(cluster[0], SpltTailCons) &&
IsChrType(cluster[1], _BV) &&
IsChrType(cluster[2], _AD|_TN)) {
(*glyph_lists)[0] = PANGO_X_MAKE_GLYPH (font_info->subfont,
TailCutCons[cluster[0] - 0xE00 - 0xAD]);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
return 3;
} else if (IsChrType(cluster[0], BotTailCons) &&
IsChrType(cluster[1], _BV) &&
IsChrType(cluster[2], _AD|_TN)) {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_BV_BD[cluster[1] - 0xE00 - 0xD8]);
(*glyph_lists)[2] = PANGO_X_MAKE_GLYPH (font_info->subfont,
ShiftDown_TONE_AD[cluster[2] - 0xE00 - 0xE7]);
return 3;
} else {
(*glyph_lists)[0] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[0] - 0xE00 + 0xA0);
(*glyph_lists)[1] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[1] - 0xE00 + 0xA0);
(*glyph_lists)[2] =
PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[2] - 0xE00 + 0xA0);
return 3;
}
break;
}
break;
case THAI_FONT_ISO10646:
for (i=0; i < num_chrs; i++)
(*glyph_lists)[i] = PANGO_X_MAKE_GLYPH (font_info->subfont, cluster[i]);
return num_chrs;
}
return 0; /* Quiet GCC */
}
#endif
/* Return the glyph code within the font for the given Unicode Thai
* code pointer
*/
get_glyph (ThaiFontInfo *font_info, gunichar wc)
{
switch (font_info->type)
{
case THAI_FONT_NONE:
return pango_x_get_unknown_glyph (font_info->font);
case THAI_FONT_XTIS:
return PANGO_X_MAKE_GLYPH (font_info->subfont, 0x100 * (wc - 0xe00 + 0x20) + 0x30);
case THAI_FONT_TIS:
return PANGO_X_MAKE_GLYPH (font_info->subfont, wc - 0xe00 + 0xA0);
case THAI_FONT_ISO10646:
return PANGO_X_MAKE_GLYPH (font_info->subfont, wc);
}
return 0; /* Quiet GCC */
}
#ifdef WTT_CLUSTERING
static void
add_cluster (ThaiFontInfo *font_info,
PangoGlyphString *glyphs,
int cluster_start,
gunichar *cluster,
gint num_chrs)
{
int i;
gint glyphs_list[MAX_GLYPHS];
gint num_glyphs;
num_glyphs = get_glyphs_list(font_info, cluster, num_chrs, &glyphs_list);
for (i=0; i<num_glyphs; i++)
add_glyph (font_info, glyphs, cluster_start, glyphs_list[i],
i == 0 ? FALSE : TRUE);
}
#else
static void
add_cluster (ThaiFontInfo *font_info,
PangoGlyphString *glyphs,
int cluster_start,
gunichar base,
gunichar group1,
gunichar group2)
{
/* If we are rendering with an XTIS font, we try to find a precomposed
* glyph for the cluster.
*/
if (font_info->type == THAI_FONT_XTIS)
{
PangoGlyph glyph;
int xtis_index = 0x100 * (base - 0xe00 + 0x20) + 0x30;
if (group1)
xtis_index +=8 * group1_map[group1 - 0xe30];
if (group2)
xtis_index += group2_map[group2 - 0xe30];
glyph = PANGO_X_MAKE_GLYPH (font_info->subfont, xtis_index);
if (pango_x_has_glyph (font_info->font, glyph))
{
add_glyph (font_info, glyphs, cluster_start, glyph, FALSE);
return;
}
}
/* If that failed, then we add compose the cluster out of three
* individual glyphs
*/
add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, base), FALSE);
if (group1)
add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, group1), TRUE);
if (group2)
add_glyph (font_info, glyphs, cluster_start, get_glyph (font_info, group2), TRUE);
}
#endif
#ifdef WTT_CLUSTERING
gboolean
IsWttCombinedRule(gint cur_wc, gint nxt_wc)
{
switch (_TAC_celltype_inputcheck[ChrType(cur_wc - 0xE00 + 0xA0)]
[ChrType(nxt_wc - 0xE00 + 0xA0)])
{
case 'A':
case 'S':
case 'R':
case 'X':
return FALSE;
case 'C':
return TRUE;
}
}
static char *
g_utf8_get_next_cluster(const char *text,
gunichar **cluster,
gint *num_chrs)
{
int nChrs = 1;
gboolean ClusterNotFound = TRUE;
if ((text == NULL) ||
((text) && (*text == '\0')) ) {
if (*num_chrs)
*num_chrs = 0;
if (*cluster)
*cluster = (gunichar *)NULL;
return (char *)NULL;
}
if (*cluster)
(*cluster)[0] = g_utf8_get_char(text);
text = g_utf8_next_char(text);
do {
if (*cluster)
(*cluster)[nChrs] = g_utf8_get_char(text);
if (IsWttCombinedRule((*cluster)[nChrs - 1], (*cluster)[nChrs])) {
nChrs++;
if (nChrs == 3)
ClusterNotFound = FALSE;
} else {
if ((nChrs == 1) &&
IsChrType((*cluster)[nChrs - 1], Cons) &&
IsChrType((*cluster)[nChrs], SaraAm) ) {
nChrs = 2;
text = g_utf8_next_char(text);
} if ((nChrs == 2) &&
IsChrType((*cluster)[nChrs - 2], Cons) &&
IsChrType((*cluster)[nChrs - 1], Tone) ) {
nChrs = 3;
text = g_utf8_next_char(text);
}
ClusterNotFound = FALSE;
}
text = g_utf8_next_char(text);
} while (ClusterNotFound);
if (*num_chrs)
*num_chrs = nChrs;
if (*cluster)
(*cluster)[nChrs] = 0;
return text;
}
#endif
static void
thai_engine_shape (PangoFont *font,
const char *text,
gint length,
PangoAnalysis *analysis,
PangoGlyphString *glyphs)
{
ThaiFontInfo *font_info;
const char *p;
gunichar base = 0;
gunichar group1 = 0;
gunichar group2 = 0;
int cluster_start = 0;
pango_glyph_string_set_size (glyphs, 0);
font_info = get_font_info (font);
#ifdef WTT_CLUSTERING
p = text;
while (p < text + length)
{
gunichar cluster[MAX_CLUSTER_CHRS];
gint num_chrs;
p = g_utf8_get_next_cluster(p, &cluster, &num_chrs);
add_cluster(font_info, glyphs, p - text, cluster, num_chrs);
}
#else
p = text;
while (p < text + length)
{
int group;
gunichar wc;
wc = g_utf8_get_char (p);
if (wc >= 0xe30 && wc < 0xe50)
group = groups[wc - 0xe30];
else
group = 0;
switch (group)
{
case 0:
if (base)
{
add_cluster (font_info, glyphs, cluster_start, base, group1, group2);
group1 = 0;
group2 = 0;
}
cluster_start = p - text;
base = wc;
break;
case 1:
group1 = wc;
break;
case 2:
group2 = wc;
break;
}
p = g_utf8_next_char (p);
}
if (base)
add_cluster (font_info, glyphs, cluster_start, base, group1, group2);
#endif
}
static PangoCoverage *
thai_engine_get_coverage (PangoFont *font,
const char *lang)
{
PangoCoverage *result = pango_coverage_new ();
ThaiFontInfo *font_info = get_font_info (font);
if (font_info->type != THAI_FONT_NONE)
{
gunichar wc;
for (wc = 0xe01; wc <= 0xe3a; wc++)
pango_coverage_set (result, wc, PANGO_COVERAGE_EXACT);
for (wc = 0xe3f; wc <= 0xe5b; wc++)
pango_coverage_set (result, wc, PANGO_COVERAGE_EXACT);
}
return result;
}
static PangoEngine *
thai_engine_x_new ()
{
PangoEngineShape *result;
result = g_new (PangoEngineShape, 1);
result->engine.id = "ThaiScriptEngine";
#ifdef WTT_CLUSTERING
result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
#else
result->engine.type = PANGO_ENGINE_TYPE_LANG;
#endif
result->engine.length = sizeof (result);
result->script_shape = thai_engine_shape;
result->get_coverage = thai_engine_get_coverage;
return (PangoEngine *)result;
}
/* The following three functions provide the public module API for
* Pango. If we are compiling it is a module, then we name the
* entry points script_engine_list, etc. But if we are compiling
* it for inclusion directly in Pango, then we need them to
* to have distinct names for this module, so we prepend
* _pango_thai_
*/
#ifdef MODULE_PREFIX
#define MODULE_ENTRY(func) _pango_thai_##func
#else
#define MODULE_ENTRY(func) func
#endif
/* List the engines contained within this module
*/
void
MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
{
*engines = script_engines;
*n_engines = G_N_ELEMENTS (script_engines);
}
/* Load a particular engine given the ID for the engine
*/
PangoEngine *
MODULE_ENTRY(script_engine_load) (const char *id)
{
if (!strcmp (id, "ThaiScriptEngineLang")) {
return thai_engine_lang_new ();
} else if (!strcmp (id, "ThaiScriptEngineX")) {
return thai_engine_x_new ();
} else {
return NULL;
}
}
void
MODULE_ENTRY(script_engine_unload) (PangoEngine *engine)
{
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]