Class CharScript
- java.lang.Object
-
- org.apache.fop.complexscripts.util.CharScript
-
public final class CharScript extends java.lang.Object
Script related utilities.
This work was originally authored by Glenn Adams (gadams@apache.org).
-
-
Field Summary
Fields Modifier and Type Field Description static int
SCRIPT_ARABIC
arabic script constantstatic int
SCRIPT_BENGALI
bengali script constantstatic int
SCRIPT_BENGALI_2
bengali 2 script constantstatic int
SCRIPT_BOPOMOFO
bopomofo script constantstatic int
SCRIPT_BURMESE
burmese script constantstatic int
SCRIPT_CYRILLIC
cyrillic script constantstatic int
SCRIPT_DEVANAGARI
devanagari script constantstatic int
SCRIPT_DEVANAGARI_2
devanagari 2 script constantstatic int
SCRIPT_ETHIOPIC
ethiopic script constantstatic int
SCRIPT_GEORGIAN
georgian script constantstatic int
SCRIPT_GREEK
greek script constantstatic int
SCRIPT_GUJARATI
gujarati script constantstatic int
SCRIPT_GUJARATI_2
gujarati 2 script constantstatic int
SCRIPT_GURMUKHI
gurmukhi script constantstatic int
SCRIPT_GURMUKHI_2
gurmukhi 2 script constantstatic int
SCRIPT_HAN
han script constantstatic int
SCRIPT_HANGUL
hangul script constantstatic int
SCRIPT_HEBREW
hebrew script constantstatic int
SCRIPT_HIRAGANA
hiragana script constantstatic int
SCRIPT_KANNADA
kannada script constantstatic int
SCRIPT_KANNADA_2
kannada 2 script constantstatic int
SCRIPT_KATAKANA
katakana script constantstatic int
SCRIPT_KHMER
khmer script constantstatic int
SCRIPT_LAO
lao script constantstatic int
SCRIPT_LATIN
latin script constantstatic int
SCRIPT_MALAYALAM
malayalam script constantstatic int
SCRIPT_MALAYALAM_2
malayalam 2 script constantstatic int
SCRIPT_MATH
math script constantstatic int
SCRIPT_MONGOLIAN
mongolian script constantstatic int
SCRIPT_ORIYA
oriya script constantstatic int
SCRIPT_ORIYA_2
oriya 2 script constantstatic int
SCRIPT_SINHALESE
sinhalese script constantstatic int
SCRIPT_SYMBOL
symbol script constantstatic int
SCRIPT_TAMIL
tamil script constantstatic int
SCRIPT_TAMIL_2
tamil 2 script constantstatic int
SCRIPT_TELUGU
telugu script constantstatic int
SCRIPT_TELUGU_2
telugu 2 script constantstatic int
SCRIPT_THAI
thai script constantstatic int
SCRIPT_TIBETAN
tibetan script constantstatic int
SCRIPT_UNCODED
uncoded script constantstatic int
SCRIPT_UNDETERMINED
undetermined script constant
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static int
dominantScript(java.lang.CharSequence cs)
Determine the dominant script of a character sequence.static boolean
isArabic(int c)
Determine if character c belong to the arabic script.static boolean
isBengali(int c)
Determine if character c belong to the bengali script.static boolean
isBopomofo(int c)
Determine if character c belong to the bopomofo script.static boolean
isBurmese(int c)
Determine if character c belong to the burmese script.static boolean
isCyrillic(int c)
Determine if character c belong to the cyrillic script.static boolean
isDevanagari(int c)
Determine if character c belong to the devanagari script.static boolean
isDigit(int c)
Determine if character c is a digit.static boolean
isEthiopic(int c)
Determine if character c belong to the ethiopic (amharic) script.static boolean
isGeorgian(int c)
Determine if character c belong to the georgian script.static boolean
isGreek(int c)
Determine if character c belong to the greek script.static boolean
isGujarati(int c)
Determine if character c belong to the gujarati script.static boolean
isGurmukhi(int c)
Determine if character c belong to the gurmukhi script.static boolean
isHan(int c)
Determine if character c belong to the han (unified cjk) script.static boolean
isHangul(int c)
Determine if character c belong to the hangul script.static boolean
isHebrew(int c)
Determine if character c belong to the hebrew script.static boolean
isHiragana(int c)
Determine if character c belong to the hiragana script.static boolean
isIndicScript(int script)
Determine if script tag denotes an 'Indic' script, where a script is an 'Indic' script if it is intended to be processed by the generic 'Indic' Script Processor.static boolean
isIndicScript(java.lang.String script)
Determine if script tag denotes an 'Indic' script, where a script is an 'Indic' script if it is intended to be processed by the generic 'Indic' Script Processor.static boolean
isKannada(int c)
Determine if character c belong to the kannada script.static boolean
isKatakana(int c)
Determine if character c belong to the katakana script.static boolean
isKhmer(int c)
Determine if character c belong to the khmer script.static boolean
isLao(int c)
Determine if character c belong to the lao script.static boolean
isLatin(int c)
Determine if character c belong to the latin script.static boolean
isMalayalam(int c)
Determine if character c belong to the malayalam script.static boolean
isMongolian(int c)
Determine if character c belong to the mongolian script.static boolean
isOriya(int c)
Determine if character c belong to the oriya script.static boolean
isPunctuation(int c)
Determine if character c is punctuation.static boolean
isSinhalese(int c)
Determine if character c belong to the sinhalese script.static boolean
isTamil(int c)
Determine if character c belong to the tamil script.static boolean
isTelugu(int c)
Determine if character c belong to the telugu script.static boolean
isThai(int c)
Determine if character c belong to the thai script.static boolean
isTibetan(int c)
Determine if character c belong to the tibetan script.static int
scriptCodeFromTag(java.lang.String tag)
Determine the internal script code associated with a script tag.static int
scriptOf(int c)
Obtain ISO15924 numeric script code of character.static int[]
scriptsOf(java.lang.CharSequence cs)
Obtain the script codes of each character in a character sequence.static java.lang.String
scriptTagFromCode(int code)
Determine the script tag associated with an internal script code.static int
useV2IndicRules(int sc)
Obtain the V2 indic script code corresponding to V1 indic script code SC if and only iff V2 indic rules apply; otherwise return SC.
-
-
-
Field Detail
-
SCRIPT_HEBREW
public static final int SCRIPT_HEBREW
hebrew script constant- See Also:
- Constant Field Values
-
SCRIPT_MONGOLIAN
public static final int SCRIPT_MONGOLIAN
mongolian script constant- See Also:
- Constant Field Values
-
SCRIPT_ARABIC
public static final int SCRIPT_ARABIC
arabic script constant- See Also:
- Constant Field Values
-
SCRIPT_GREEK
public static final int SCRIPT_GREEK
greek script constant- See Also:
- Constant Field Values
-
SCRIPT_LATIN
public static final int SCRIPT_LATIN
latin script constant- See Also:
- Constant Field Values
-
SCRIPT_CYRILLIC
public static final int SCRIPT_CYRILLIC
cyrillic script constant- See Also:
- Constant Field Values
-
SCRIPT_GEORGIAN
public static final int SCRIPT_GEORGIAN
georgian script constant- See Also:
- Constant Field Values
-
SCRIPT_BOPOMOFO
public static final int SCRIPT_BOPOMOFO
bopomofo script constant- See Also:
- Constant Field Values
-
SCRIPT_HANGUL
public static final int SCRIPT_HANGUL
hangul script constant- See Also:
- Constant Field Values
-
SCRIPT_GURMUKHI
public static final int SCRIPT_GURMUKHI
gurmukhi script constant- See Also:
- Constant Field Values
-
SCRIPT_GURMUKHI_2
public static final int SCRIPT_GURMUKHI_2
gurmukhi 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_DEVANAGARI
public static final int SCRIPT_DEVANAGARI
devanagari script constant- See Also:
- Constant Field Values
-
SCRIPT_DEVANAGARI_2
public static final int SCRIPT_DEVANAGARI_2
devanagari 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_GUJARATI
public static final int SCRIPT_GUJARATI
gujarati script constant- See Also:
- Constant Field Values
-
SCRIPT_GUJARATI_2
public static final int SCRIPT_GUJARATI_2
gujarati 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_BENGALI
public static final int SCRIPT_BENGALI
bengali script constant- See Also:
- Constant Field Values
-
SCRIPT_BENGALI_2
public static final int SCRIPT_BENGALI_2
bengali 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_ORIYA
public static final int SCRIPT_ORIYA
oriya script constant- See Also:
- Constant Field Values
-
SCRIPT_ORIYA_2
public static final int SCRIPT_ORIYA_2
oriya 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_TIBETAN
public static final int SCRIPT_TIBETAN
tibetan script constant- See Also:
- Constant Field Values
-
SCRIPT_TELUGU
public static final int SCRIPT_TELUGU
telugu script constant- See Also:
- Constant Field Values
-
SCRIPT_TELUGU_2
public static final int SCRIPT_TELUGU_2
telugu 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_KANNADA
public static final int SCRIPT_KANNADA
kannada script constant- See Also:
- Constant Field Values
-
SCRIPT_KANNADA_2
public static final int SCRIPT_KANNADA_2
kannada 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_TAMIL
public static final int SCRIPT_TAMIL
tamil script constant- See Also:
- Constant Field Values
-
SCRIPT_TAMIL_2
public static final int SCRIPT_TAMIL_2
tamil 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_MALAYALAM
public static final int SCRIPT_MALAYALAM
malayalam script constant- See Also:
- Constant Field Values
-
SCRIPT_MALAYALAM_2
public static final int SCRIPT_MALAYALAM_2
malayalam 2 script constant- See Also:
- Constant Field Values
-
SCRIPT_SINHALESE
public static final int SCRIPT_SINHALESE
sinhalese script constant- See Also:
- Constant Field Values
-
SCRIPT_BURMESE
public static final int SCRIPT_BURMESE
burmese script constant- See Also:
- Constant Field Values
-
SCRIPT_THAI
public static final int SCRIPT_THAI
thai script constant- See Also:
- Constant Field Values
-
SCRIPT_KHMER
public static final int SCRIPT_KHMER
khmer script constant- See Also:
- Constant Field Values
-
SCRIPT_LAO
public static final int SCRIPT_LAO
lao script constant- See Also:
- Constant Field Values
-
SCRIPT_HIRAGANA
public static final int SCRIPT_HIRAGANA
hiragana script constant- See Also:
- Constant Field Values
-
SCRIPT_ETHIOPIC
public static final int SCRIPT_ETHIOPIC
ethiopic script constant- See Also:
- Constant Field Values
-
SCRIPT_HAN
public static final int SCRIPT_HAN
han script constant- See Also:
- Constant Field Values
-
SCRIPT_KATAKANA
public static final int SCRIPT_KATAKANA
katakana script constant- See Also:
- Constant Field Values
-
SCRIPT_MATH
public static final int SCRIPT_MATH
math script constant- See Also:
- Constant Field Values
-
SCRIPT_SYMBOL
public static final int SCRIPT_SYMBOL
symbol script constant- See Also:
- Constant Field Values
-
SCRIPT_UNDETERMINED
public static final int SCRIPT_UNDETERMINED
undetermined script constant- See Also:
- Constant Field Values
-
SCRIPT_UNCODED
public static final int SCRIPT_UNCODED
uncoded script constant- See Also:
- Constant Field Values
-
-
Method Detail
-
isPunctuation
public static boolean isPunctuation(int c)
Determine if character c is punctuation.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character is punctuation
-
isDigit
public static boolean isDigit(int c)
Determine if character c is a digit.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character is a digit
-
isHebrew
public static boolean isHebrew(int c)
Determine if character c belong to the hebrew script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to hebrew script
-
isMongolian
public static boolean isMongolian(int c)
Determine if character c belong to the mongolian script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to mongolian script
-
isArabic
public static boolean isArabic(int c)
Determine if character c belong to the arabic script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to arabic script
-
isGreek
public static boolean isGreek(int c)
Determine if character c belong to the greek script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to greek script
-
isLatin
public static boolean isLatin(int c)
Determine if character c belong to the latin script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to latin script
-
isCyrillic
public static boolean isCyrillic(int c)
Determine if character c belong to the cyrillic script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to cyrillic script
-
isGeorgian
public static boolean isGeorgian(int c)
Determine if character c belong to the georgian script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to georgian script
-
isHangul
public static boolean isHangul(int c)
Determine if character c belong to the hangul script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to hangul script
-
isGurmukhi
public static boolean isGurmukhi(int c)
Determine if character c belong to the gurmukhi script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to gurmukhi script
-
isDevanagari
public static boolean isDevanagari(int c)
Determine if character c belong to the devanagari script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to devanagari script
-
isGujarati
public static boolean isGujarati(int c)
Determine if character c belong to the gujarati script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to gujarati script
-
isBengali
public static boolean isBengali(int c)
Determine if character c belong to the bengali script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to bengali script
-
isOriya
public static boolean isOriya(int c)
Determine if character c belong to the oriya script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to oriya script
-
isTibetan
public static boolean isTibetan(int c)
Determine if character c belong to the tibetan script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to tibetan script
-
isTelugu
public static boolean isTelugu(int c)
Determine if character c belong to the telugu script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to telugu script
-
isKannada
public static boolean isKannada(int c)
Determine if character c belong to the kannada script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to kannada script
-
isTamil
public static boolean isTamil(int c)
Determine if character c belong to the tamil script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to tamil script
-
isMalayalam
public static boolean isMalayalam(int c)
Determine if character c belong to the malayalam script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to malayalam script
-
isSinhalese
public static boolean isSinhalese(int c)
Determine if character c belong to the sinhalese script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to sinhalese script
-
isBurmese
public static boolean isBurmese(int c)
Determine if character c belong to the burmese script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to burmese script
-
isThai
public static boolean isThai(int c)
Determine if character c belong to the thai script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to thai script
-
isKhmer
public static boolean isKhmer(int c)
Determine if character c belong to the khmer script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to khmer script
-
isLao
public static boolean isLao(int c)
Determine if character c belong to the lao script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to lao script
-
isEthiopic
public static boolean isEthiopic(int c)
Determine if character c belong to the ethiopic (amharic) script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to ethiopic (amharic) script
-
isHan
public static boolean isHan(int c)
Determine if character c belong to the han (unified cjk) script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to han (unified cjk) script
-
isBopomofo
public static boolean isBopomofo(int c)
Determine if character c belong to the bopomofo script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to bopomofo script
-
isHiragana
public static boolean isHiragana(int c)
Determine if character c belong to the hiragana script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to hiragana script
-
isKatakana
public static boolean isKatakana(int c)
Determine if character c belong to the katakana script.- Parameters:
c
- a character represented as a unicode scalar value- Returns:
- true if character belongs to katakana script
-
scriptOf
public static int scriptOf(int c)
Obtain ISO15924 numeric script code of character. If script is not or cannot be determined, then the script code 998 ('zyyy') is returned.- Parameters:
c
- the character to obtain script- Returns:
- an ISO15924 script code
-
useV2IndicRules
public static int useV2IndicRules(int sc)
Obtain the V2 indic script code corresponding to V1 indic script code SC if and only iff V2 indic rules apply; otherwise return SC.- Parameters:
sc
- a V1 indic script code- Returns:
- either SC or the V2 flavor of SC if V2 indic rules apply
-
scriptsOf
public static int[] scriptsOf(java.lang.CharSequence cs)
Obtain the script codes of each character in a character sequence. If script is not or cannot be determined for some character, then the script code 998 ('zyyy') is returned.- Parameters:
cs
- the character sequence- Returns:
- a (possibly empty) array of script codes
-
dominantScript
public static int dominantScript(java.lang.CharSequence cs)
Determine the dominant script of a character sequence.- Parameters:
cs
- the character sequence- Returns:
- the dominant script or SCRIPT_UNDETERMINED
-
isIndicScript
public static boolean isIndicScript(java.lang.String script)
Determine if script tag denotes an 'Indic' script, where a script is an 'Indic' script if it is intended to be processed by the generic 'Indic' Script Processor.- Parameters:
script
- a script tag- Returns:
- true if script tag is a designated 'Indic' script
-
isIndicScript
public static boolean isIndicScript(int script)
Determine if script tag denotes an 'Indic' script, where a script is an 'Indic' script if it is intended to be processed by the generic 'Indic' Script Processor.- Parameters:
script
- a script code- Returns:
- true if script code is a designated 'Indic' script
-
scriptTagFromCode
public static java.lang.String scriptTagFromCode(int code)
Determine the script tag associated with an internal script code.- Parameters:
code
- the script code- Returns:
- a script tag
-
scriptCodeFromTag
public static int scriptCodeFromTag(java.lang.String tag)
Determine the internal script code associated with a script tag.- Parameters:
tag
- the script tag- Returns:
- a script code
-
-