Pango Reference Manual | ||||
---|---|---|---|---|
PangoScriptIter; enum PangoScript; #define PANGO_TYPE_SCRIPT PangoScript pango_script_for_unichar (gunichar ch); PangoLanguage* pango_script_get_sample_language (PangoScript script); gboolean pango_language_includes_script (PangoLanguage *language, PangoScript script); PangoScriptIter* pango_script_iter_new (const char *text, int length); void pango_script_iter_get_range (PangoScriptIter *iter, G_CONST_RETURN char **start, G_CONST_RETURN char **end, PangoScript *script); gboolean pango_script_iter_next (PangoScriptIter *iter); void pango_script_iter_free (PangoScriptIter *iter);
The functions in this section are used to identify the writing system, or script of individual characters and of ranges within a larger text string.
typedef struct _PangoScriptIter PangoScriptIter;
A PangoScriptIter is used to iterate through a string and identify ranges in different scripts.
typedef enum { /* ISO 15924 code */ PANGO_SCRIPT_INVALID_CODE = -1, PANGO_SCRIPT_COMMON = 0, /* Zyyy */ PANGO_SCRIPT_INHERITED, /* Qaai */ PANGO_SCRIPT_ARABIC, /* Arab */ PANGO_SCRIPT_ARMENIAN, /* Armn */ PANGO_SCRIPT_BENGALI, /* Beng */ PANGO_SCRIPT_BOPOMOFO, /* Bopo */ PANGO_SCRIPT_CHEROKEE, /* Cher */ PANGO_SCRIPT_COPTIC, /* Qaac */ PANGO_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ PANGO_SCRIPT_DESERET, /* Dsrt */ PANGO_SCRIPT_DEVANAGARI, /* Deva */ PANGO_SCRIPT_ETHIOPIC, /* Ethi */ PANGO_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ PANGO_SCRIPT_GOTHIC, /* Goth */ PANGO_SCRIPT_GREEK, /* Grek */ PANGO_SCRIPT_GUJARATI, /* Gujr */ PANGO_SCRIPT_GURMUKHI, /* Guru */ PANGO_SCRIPT_HAN, /* Hani */ PANGO_SCRIPT_HANGUL, /* Hang */ PANGO_SCRIPT_HEBREW, /* Hebr */ PANGO_SCRIPT_HIRAGANA, /* Hira */ PANGO_SCRIPT_KANNADA, /* Knda */ PANGO_SCRIPT_KATAKANA, /* Kana */ PANGO_SCRIPT_KHMER, /* Khmr */ PANGO_SCRIPT_LAO, /* Laoo */ PANGO_SCRIPT_LATIN, /* Latn (Latf, Latg) */ PANGO_SCRIPT_MALAYALAM, /* Mlym */ PANGO_SCRIPT_MONGOLIAN, /* Mong */ PANGO_SCRIPT_MYANMAR, /* Mymr */ PANGO_SCRIPT_OGHAM, /* Ogam */ PANGO_SCRIPT_OLD_ITALIC, /* Ital */ PANGO_SCRIPT_ORIYA, /* Orya */ PANGO_SCRIPT_RUNIC, /* Runr */ PANGO_SCRIPT_SINHALA, /* Sinh */ PANGO_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ PANGO_SCRIPT_TAMIL, /* Taml */ PANGO_SCRIPT_TELUGU, /* Telu */ PANGO_SCRIPT_THAANA, /* Thaa */ PANGO_SCRIPT_THAI, /* Thai */ PANGO_SCRIPT_TIBETAN, /* Tibt */ PANGO_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ PANGO_SCRIPT_YI, /* Yiii */ PANGO_SCRIPT_TAGALOG, /* Tglg */ PANGO_SCRIPT_HANUNOO, /* Hano */ PANGO_SCRIPT_BUHID, /* Buhd */ PANGO_SCRIPT_TAGBANWA, /* Tagb */ /* Unicode-4.0 additions */ PANGO_SCRIPT_BRAILLE, /* Brai */ PANGO_SCRIPT_CYPRIOT, /* Cprt */ PANGO_SCRIPT_LIMBU, /* Limb */ PANGO_SCRIPT_OSMANYA, /* Osma */ PANGO_SCRIPT_SHAVIAN, /* Shaw */ PANGO_SCRIPT_LINEAR_B, /* Linb */ PANGO_SCRIPT_TAI_LE, /* Tale */ PANGO_SCRIPT_UGARITIC, /* Ugar */ /* Unicode-4.1 additions */ PANGO_SCRIPT_NEW_TAI_LUE, /* Talu */ PANGO_SCRIPT_BUGINESE, /* Bugi */ PANGO_SCRIPT_GLAGOLITIC, /* Glag */ PANGO_SCRIPT_TIFINAGH, /* Tfng */ PANGO_SCRIPT_SYLOTI_NAGRI, /* Sylo */ PANGO_SCRIPT_OLD_PERSIAN, /* Xpeo */ PANGO_SCRIPT_KHAROSHTHI, /* Khar */ /* Unicode-5.0 additions */ PANGO_SCRIPT_UNKNOWN, /* Zzzz */ PANGO_SCRIPT_BALINESE, /* Bali */ PANGO_SCRIPT_CUNEIFORM, /* Xsux */ PANGO_SCRIPT_PHOENICIAN, /* Phnx */ PANGO_SCRIPT_PHAGS_PA, /* Phag */ PANGO_SCRIPT_NKO /* Nkoo */ } PangoScript;
The PangoScript enumeration identifies different writing systems. The values correspond to the names as defined in the Unicode standard. Note that new types may be added in the future. Applications should be ready to handle unknown values. See Unicode Standard Annex 24: Script names.
PANGO_SCRIPT_INVALID_CODE |
a value never returned from pango_script_for_unichar()
|
PANGO_SCRIPT_COMMON |
a character used by multiple different scripts |
PANGO_SCRIPT_INHERITED |
a mark glyph that takes its script from the base glyph to which it is attached |
PANGO_SCRIPT_ARABIC |
Arabic |
PANGO_SCRIPT_ARMENIAN |
Armenian |
PANGO_SCRIPT_BENGALI |
Bengali |
PANGO_SCRIPT_BOPOMOFO |
Bopomofo |
PANGO_SCRIPT_CHEROKEE |
Cherokee |
PANGO_SCRIPT_COPTIC |
Coptic |
PANGO_SCRIPT_CYRILLIC |
Cyrillic |
PANGO_SCRIPT_DESERET |
Deseret |
PANGO_SCRIPT_DEVANAGARI |
Devanagari |
PANGO_SCRIPT_ETHIOPIC |
Ethiopic |
PANGO_SCRIPT_GEORGIAN |
Georgian |
PANGO_SCRIPT_GOTHIC |
Gothic |
PANGO_SCRIPT_GREEK |
Greek |
PANGO_SCRIPT_GUJARATI |
Gujarati |
PANGO_SCRIPT_GURMUKHI |
Gurmukhi |
PANGO_SCRIPT_HAN |
Han |
PANGO_SCRIPT_HANGUL |
Hangul |
PANGO_SCRIPT_HEBREW |
Hebrew |
PANGO_SCRIPT_HIRAGANA |
Hiragana |
PANGO_SCRIPT_KANNADA |
Kannada |
PANGO_SCRIPT_KATAKANA |
Katakana |
PANGO_SCRIPT_KHMER |
Khmer |
PANGO_SCRIPT_LAO |
Lao |
PANGO_SCRIPT_LATIN |
Latin |
PANGO_SCRIPT_MALAYALAM |
Malayalam |
PANGO_SCRIPT_MONGOLIAN |
Mongolian |
PANGO_SCRIPT_MYANMAR |
Myanmar |
PANGO_SCRIPT_OGHAM |
Ogham |
PANGO_SCRIPT_OLD_ITALIC |
Old Italic |
PANGO_SCRIPT_ORIYA |
Oriya |
PANGO_SCRIPT_RUNIC |
Runic |
PANGO_SCRIPT_SINHALA |
Sinhala |
PANGO_SCRIPT_SYRIAC |
Syriac |
PANGO_SCRIPT_TAMIL |
Tamil |
PANGO_SCRIPT_TELUGU |
Telugu |
PANGO_SCRIPT_THAANA |
Thaana |
PANGO_SCRIPT_THAI |
Thai |
PANGO_SCRIPT_TIBETAN |
Tibetan |
PANGO_SCRIPT_CANADIAN_ABORIGINAL |
Canadian Aboriginal |
PANGO_SCRIPT_YI |
Yi |
PANGO_SCRIPT_TAGALOG |
Tagalog |
PANGO_SCRIPT_HANUNOO |
Hanunoo |
PANGO_SCRIPT_BUHID |
Buhid |
PANGO_SCRIPT_TAGBANWA |
Tagbanwa |
PANGO_SCRIPT_BRAILLE |
Braille |
PANGO_SCRIPT_CYPRIOT |
Cypriot |
PANGO_SCRIPT_LIMBU |
Limbu |
PANGO_SCRIPT_OSMANYA |
Osmanya |
PANGO_SCRIPT_SHAVIAN |
Shavian |
PANGO_SCRIPT_LINEAR_B |
Linear B |
PANGO_SCRIPT_TAI_LE |
Tai Le |
PANGO_SCRIPT_UGARITIC |
Ugaritic |
PANGO_SCRIPT_NEW_TAI_LUE |
New Tai Lue |
PANGO_SCRIPT_BUGINESE |
Buginese |
PANGO_SCRIPT_GLAGOLITIC |
Glagolitic |
PANGO_SCRIPT_TIFINAGH |
Tifinagh |
PANGO_SCRIPT_SYLOTI_NAGRI |
Syloti Nagri |
PANGO_SCRIPT_OLD_PERSIAN |
Old Persian |
PANGO_SCRIPT_KHAROSHTHI |
Kharoshthi |
PANGO_SCRIPT_UNKNOWN |
an unassigned code point |
PANGO_SCRIPT_BALINESE |
Balinese |
PANGO_SCRIPT_CUNEIFORM |
Cuneiform |
PANGO_SCRIPT_PHOENICIAN |
Phoenician |
PANGO_SCRIPT_PHAGS_PA |
Phags-pa |
PANGO_SCRIPT_NKO |
N'Ko |
#define PANGO_TYPE_SCRIPT (pango_script_get_type())
The GObject type for PangoScript
PangoScript pango_script_for_unichar (gunichar ch);
Looks up the PangoScript for a particular character (as defined by
Unicode Standard Annex 24). No check is made for ch
being a
valid Unicode character; if you pass in invalid character, the
result is undefined.
ch : |
a Unicode character |
Returns : | the PangoScript for the character. |
Since 1.4
PangoLanguage* pango_script_get_sample_language (PangoScript script);
Given a script, finds a language tag that is reasonably
representative of that script. This will usually be the
most widely spoken or used language written in that script:
for instance, the sample language for PANGO_SCRIPT_CYRILLIC
is ru
(Russian), the sample language
for PANGO_SCRIPT_ARABIC
is ar
.
For some
scripts, no sample language will be returned because there
is no language that is sufficiently representative. The best
example of this is PANGO_SCRIPT_HAN
, where various different
variants of written Chinese, Japanese, and Korean all use
significantly different sets of Han characters and forms
of shared characters. No sample language can be provided
for many historical scripts as well.
script : |
a PangoScript |
Returns : | a PangoLanguage that is representative
of the script, or NULL if no such language exists.
|
Since 1.4
gboolean pango_language_includes_script (PangoLanguage *language, PangoScript script);
Determines if script
is one of the scripts used to
write language
. The returned value is conservative;
if nothing is known about the language tag language
,
TRUE
will be returned, since, as far as Pango knows,
script
might be used to write language
.
This routine is used in Pango's itemization process when determining if a supplied language tag is relevant to a particular section of text. It probably is not useful for applications in most circumstances.
language : |
a PangoLanguage |
script : |
a PangoScript |
Returns : | TRUE if script is one of the scripts used
to write language , or if nothing is known about language .
|
Since 1.4
PangoScriptIter* pango_script_iter_new (const char *text, int length);
Create a new PangoScriptIter, used to break a string of
Unicode into runs by text. No copy is made of text
, so
the caller needs to make sure it remains valid until
the iterator is freed with pango_script_iter_free()
.x
text : |
a UTF-8 string |
length : |
length of text , or -1 if text is nul-terminated.
|
Returns : | the new script iterator, initialized
to point at the first range in the text, which should be
freed with pango_script_iter_free() . If the string is
empty, it will point at an empty range.
|
Since 1.4
void pango_script_iter_get_range (PangoScriptIter *iter, G_CONST_RETURN char **start, G_CONST_RETURN char **end, PangoScript *script);
Gets information about the range to which iter
currently points.
The range is the set of locations p where *start <= p < *end.
(That is, it doesn't include the character stored at *end)
iter : |
a PangoScriptIter |
start : |
location to store start position of the range, or NULL
|
end : |
location to store end position of the range, or NULL
|
script : |
location to store script for range, or NULL
|
Since 1.4
gboolean pango_script_iter_next (PangoScriptIter *iter);
Advances a PangoScriptIter to the next range. If iter
is already at the end, it is left unchanged and FALSE
is returned.
iter : |
a PangoScriptIter |
Returns : | TRUE if iter was successfully advanced.
|
Since 1.4
void pango_script_iter_free (PangoScriptIter *iter);
Frees a PangoScriptIter created with pango_script_iter_new()
.
iter : |
a PangoScriptIter |
Since 1.4