Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

fxunicode.h

Go to the documentation of this file.
00001 /********************************************************************************
00002 *                                                                               *
00003 *                   U N I C O D E   C h a r a c t e r   I n f o                 *
00004 *                                                                               *
00005 *********************************************************************************
00006 * Copyright (C) 2005,2006 by Jeroen van der Zijp.   All Rights Reserved.        *
00007 *********************************************************************************
00008 * This library is free software; you can redistribute it and/or                 *
00009 * modify it under the terms of the GNU Lesser General Public                    *
00010 * License as published by the Free Software Foundation; either                  *
00011 * version 2.1 of the License, or (at your option) any later version.            *
00012 *                                                                               *
00013 * This library is distributed in the hope that it will be useful,               *
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU             *
00016 * Lesser General Public License for more details.                               *
00017 *                                                                               *
00018 * You should have received a copy of the GNU Lesser General Public              *
00019 * License along with this library; if not, write to the Free Software           *
00020 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.    *
00021 *********************************************************************************
00022 * $Id: fxunicode.h,v 1.3 2006/01/22 17:58:14 fox Exp $                          *
00023 ********************************************************************************/
00024 #ifndef FXUNICODE_H
00025 #define FXUNICODE_H
00026 
00027 
00028 namespace FX {
00029 
00030 
00031 /// General Category
00032 enum {
00033   CatNotAssigned          = 0,     /// Cn Other, Not Assigned (no characters in the file have this property)
00034   CatControl              = 1,     /// Cc Other, Control
00035   CatFormat               = 2,     /// Cf Other, Format
00036   CatSurrogate            = 3,     /// Cs Other, Surrogate
00037   CatOther                = 4,     /// Co Other, Private Use
00038   CatMarkNonSpacing       = 5,     /// Mn Mark, Nonspacing
00039   CatMarkSpacingCombining = 6,     /// Mc Mark, Spacing Combining
00040   CatMarkEnclosing        = 7,     /// Me Mark, Enclosing
00041   CatSeparatorSpace       = 8,     /// Zs Separator, Space
00042   CatSeparatorLine        = 9,     /// Zl Separator, Line
00043   CatSeparatorParagraph   = 10,    /// Zp Separator, Paragraph
00044   CatLetterUpper          = 11,    /// Lu Letter, Uppercase
00045   CatLetterLower          = 12,    /// Ll Letter, Lowercase
00046   CatLetterTitle          = 13,    /// Lt Letter, Titlecase
00047   CatLetterModifier       = 14,    /// Lm Letter, Modifier
00048   CatLetterOther          = 15,    /// Lo Letter, Other
00049   CatNumberLetter         = 16,    /// Nl Number, Letter
00050   CatNumberDecimal        = 17,    /// Nd Number, Decimal Digit
00051   CatNumberOther          = 18,    /// No Number, Other
00052   CatPunctConnector       = 19,    /// Pc Punctuation, Connector
00053   CatPunctDash            = 20,    /// Pd Punctuation, Dash
00054   CatPunctOpen            = 21,    /// Ps Punctuation, Open
00055   CatPunctClose           = 22,    /// Pe Punctuation, Close
00056   CatPunctInitial         = 23,    /// Pi Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
00057   CatPunctFinal           = 24,    /// Pf Punctuation, Final quote (may behave like Ps or Pe depending on usage)
00058   CatPunctOther           = 25,    /// Po Punctuation, Other
00059   CatSymbolMath           = 26,    /// Sm Symbol, Math
00060   CatSymbolCurrency       = 27,    /// Sc Symbol, Currency
00061   CatSymbolModifier       = 28,    /// Sk Symbol, Modifier
00062   CatSymbolOther          = 29     /// So Symbol, Other
00063   };
00064 
00065 
00066 /// Bidi types
00067 enum {
00068   DirL   = 0,           /// Left-to-Right
00069   DirLRE = 1,           /// Left-to-Right Embedding
00070   DirLRO = 2,           /// Left-to-Right Override
00071   DirR   = 3,           /// Right-to-Left
00072   DirAL  = 4,           /// Right-to-Left Arabic
00073   DirRLE = 5,           /// Right-to-Left Embedding
00074   DirRLO = 6,           /// Right-to-Left Override
00075   DirPDF = 7,           /// Pop Directional Format
00076   DirEN  = 8,           /// European Number
00077   DirES  = 9,           /// European Number Separator
00078   DirET  = 10,          /// European Number Terminator
00079   DirAN  = 11,          /// Arabic Number
00080   DirCS  = 12,          /// Common Number Separator
00081   DirNSM = 13,          /// Non-Spacing Mark
00082   DirBN  = 14,          /// Boundary Neutral
00083   DirB   = 15,          /// Paragraph Separator
00084   DirS   = 16,          /// Segment Separator
00085   DirWS  = 17,          /// Whitespace
00086   DirON  = 18           /// Other Neutrals
00087   };
00088 
00089 
00090 /// Arabic joining
00091 enum {
00092   NonJoining   = 0,
00093   RightJoining = 1,
00094   DualJoining  = 2,
00095   JoinCausing  = 3
00096   };
00097 
00098 
00099 /// Combining class
00100 enum {
00101   CombBelowLeftAtt  = 200,     /// Below left attached
00102   CombBelowAtt      = 202,     /// Below attached
00103   CombBelowRightAtt = 204,     /// Below right attached
00104   CombLeftAtt       = 208,     /// Left attached (reordrant around single base character)
00105   CombRightAtt      = 210,     /// Right attached
00106   CombAboveLeftAtt  = 212,     /// Above left attached
00107   CombAboveAtt      = 214,     /// Above attached
00108   CombAboveRightAtt = 216,     /// Above right attached
00109   CombBelowLeft     = 218,     /// Below left
00110   CombBelow         = 220,     /// Below
00111   CombBelowRight    = 222,     /// Below right
00112   CombLeft          = 224,     /// Left (reordrant around single base character)
00113   CombRight         = 226,     /// Right
00114   CombAboveLeft     = 228,     /// Above left
00115   CombAbove         = 230,     /// Above
00116   CombAboveRight    = 232,     /// Above right
00117   CombDoubleBelow   = 233,     /// Double below
00118   CombDoubleAbove   = 234,     /// Double above
00119   CombIotaSub       = 240      /// Below (iota subscript)
00120   };
00121 
00122 
00123 /// Decompose types
00124 enum {
00125   DecNone      = 0,     /// Non-decomposable
00126   DecFont      = 1,     /// A font variant (e.g. a blackletter form)
00127   DecNoBreak   = 2,     /// A no-break version of a space or hyphen
00128   DecInitial   = 3,     /// An initial presentation form (Arabic)
00129   DecMedial    = 4,     /// A medial presentation form (Arabic)
00130   DecFinal     = 5,     /// A final presentation form (Arabic)
00131   DecIsolated  = 6,     /// An isolated presentation form (Arabic)
00132   DecCircle    = 7,     /// An encircled form
00133   DecSuper     = 8,     /// A superscript form
00134   DecSub       = 9,     /// A subscript form
00135   DecVertical  = 10,    /// A vertical layout presentation form
00136   DecWide      = 11,    /// A wide (or zenkaku) compatibility character
00137   DecNarrow    = 12,    /// A narrow (or hankaku) compatibility character
00138   DecSmall     = 13,    /// A small variant form (CNS compatibility)
00139   DecSquare    = 14,    /// A CJK squared font variant
00140   DecFraction  = 15,    /// A vulgar fraction form
00141   DecCompat    = 16,    /// Compatible
00142   DecCanonical = 17     /// Canonical (equivalent)
00143   };
00144 
00145 
00146 /// Line break types
00147 enum {
00148   BreakUnknown    = 0,          /// XX Unknown
00149 
00150   BreakMandarory  = 1,          /// BK Mandatory Break
00151   BreakReturn     = 2,          /// CR Carriage Return
00152   BreakLineFeed   = 3,          /// LF Line Feed
00153   BreakCombMark   = 4,          /// CM Attached Characters and Combining Marks
00154   BreakNextLine   = 5,          /// NL Next Line
00155   BreakSurrogate  = 6,          /// SG Surrogates
00156   BreakWordJoiner = 7,          /// WJ Word Joiner
00157   BreakZWSpace    = 8,          /// ZW Zero Width Space
00158   BreakGlue       = 9,          /// GL Non-breaking Glue
00159   BreakContingent = 10,         /// CB Contingent Break Opportunity
00160   BreakSpace      = 11,         /// SP Space
00161 
00162   BreakBoth       = 12,         /// B2 Break Opportunity Before and After
00163   BreakAfter      = 13,         /// BA Break Opportunity After
00164   BreakBefore     = 14,         /// BB Break Opportunity Before
00165   BreakHyphen     = 15,         /// HY Hyphen
00166 
00167   BreakOpen       = 16,         /// OP Opening Punctuation
00168   BreakClose      = 17,         /// CL Closing Punctuation
00169   BreakQuote      = 18,         /// QU Ambiguous Quotation
00170   BreakExclaim    = 19,         /// EX Exclamation/Interrogation
00171   BreakInsep      = 20,         /// IN Inseparable
00172   BreakNonStart   = 21,         /// NS Non Starter
00173 
00174   BreakInfix      = 22,         /// IS Infix Separator (Numeric)
00175   BreakNumeric    = 23,         /// NU Numeric
00176   BreakPostfix    = 24,         /// PO Postfix (Numeric)
00177   BreakPrefix     = 25,         /// PR Prefix (Numeric)
00178   BreakSymbol     = 26,         /// SY Symbols Allowing Breaks
00179 
00180   BreakOrdinary   = 27,         /// AL Ordinary Alphabetic and Symbol Characters
00181   BreakIdeograph  = 28,         /// ID Ideographic
00182   BreakComplex    = 29          /// SA Complex Context (South East Asian)
00183   };
00184 
00185 
00186 /// Scripts
00187 enum {
00188   ScriptCommon             = 0,       /// Zyyy
00189   ScriptInherited          = 1,       /// Qaai
00190 
00191   ScriptLatin              = 2,       /// Latn  European scripts
00192   ScriptGreek              = 3,       /// Grek
00193   ScriptCyrillic           = 4,       /// Cyrl (Cyrs)
00194   ScriptArmenian           = 5,       /// Armn
00195   ScriptGeorgian           = 6,       /// Geor (Geon, Geoa)
00196   ScriptRunic              = 7,       /// Runr
00197   ScriptOgham              = 8,       /// Ogam
00198 
00199   ScriptHebrew             = 9,       /// Hebr  Middle eastern
00200   ScriptArabic             = 10,      /// Arab
00201   ScriptSyriac             = 11,      /// Syrc (Syrj, Syrn, Syre)
00202   ScriptThaana             = 12,      /// Thaa
00203 
00204   ScriptDevanagari         = 13,      /// Deva  Indic
00205   ScriptBengali            = 14,      /// Beng
00206   ScriptGurmukhi           = 15,      /// Guru
00207   ScriptGujarati           = 16,      /// Gujr
00208   ScriptOriya              = 17,      /// Orya
00209   ScriptTamil              = 18,      /// Taml
00210   ScriptTelugu             = 19,      /// Telu
00211   ScriptKannada            = 20,      /// Knda
00212   ScriptMalayalam          = 21,      /// Mlym
00213   ScriptSinhala            = 22,      /// Sinh
00214   ScriptThai               = 23,      /// Thai
00215   ScriptLao                = 24,      /// Laoo
00216   ScriptTibetan            = 25,      /// Tibt
00217   ScriptMyanmar            = 26,      /// Mymr
00218   ScriptKhmer              = 27,      /// Khmr
00219 
00220   ScriptHan                = 28,      /// Hani  Asian
00221   ScriptHiragana           = 29,      /// Hira
00222   ScriptKatakana           = 30,      /// Kana
00223   ScriptHangul             = 31,      /// Hang
00224   ScriptBopomofo           = 32,      /// Bopo
00225   ScriptYi                 = 33,      /// Yiii
00226 
00227   ScriptEthiopic           = 34,      /// Ethi  Misc
00228   ScriptCherokee           = 35,      /// Cher
00229   ScriptCanadianAboriginal = 36,      /// Cans
00230   ScriptMongolian          = 37,      /// Mong
00231   ScriptGothic             = 38,      /// Goth
00232 
00233   ScriptTagalog            = 39,      /// Tglg
00234   ScriptHanunoo            = 40,      /// Hano
00235   ScriptBuhid              = 41,      /// Buhd
00236   ScriptTagbanwa           = 42,      /// Tagb
00237   ScriptLimbu              = 43,      /// Limb
00238   ScriptTaiLe              = 44,      /// Tale
00239   ScriptUgaritic           = 45,      /// Ugar
00240   ScriptOsmanya            = 46,      /// Osma
00241   ScriptCypriot            = 47,      /// Cprt
00242   ScriptShavian            = 48,      /// Shaw
00243   ScriptDeseret            = 49,      /// Dsrt
00244   ScriptKatakanaHiragana   = 50       /// Hrkt
00245   };
00246 
00247 
00248 /// Unicode versions of common character functions
00249 namespace Unicode {
00250 
00251 /// Character wide character category
00252 extern FXAPI FXuint charCategory(FXwchar ucs);
00253 
00254 /// Get character wide character direction
00255 extern FXAPI FXuint charDirection(FXwchar ucs);
00256 
00257 /// Get wide character decompose type
00258 extern FXAPI FXuint decomposeType(FXwchar ucs);
00259 
00260 /// Return number of wide characters in decomposition
00261 extern FXAPI FXuint charNumDecompose(FXwchar ucs);
00262 
00263 /// Return wide character decomposition
00264 extern FXAPI const FXwchar* charDecompose(FXwchar ucs);
00265 
00266 /// Return wide character composition from ucsa and ucsb
00267 extern FXAPI FXwchar charCompose(FXwchar ucsa,FXwchar ucsb);
00268 
00269 /// Get wide character joining
00270 extern FXAPI FXuint joiningType(FXwchar ucs);
00271 
00272 /// Get wide character symmetry
00273 extern FXAPI FXuint isSymmetric(FXwchar ucs);
00274 
00275 /// Get wide character combining type; zero means starter
00276 extern FXAPI FXuint charCombining(FXwchar ucs);
00277 
00278 /// Get numeric value of wide character (this includes hex value)
00279 extern FXAPI FXint digitValue(FXwchar ucs);
00280 
00281 /// Get linebreak type of wide character
00282 extern FXAPI FXuint lineBreakType(FXwchar ucs);
00283 
00284 
00285 /// Get mirror image of wide character or character itself
00286 extern FXAPI FXwchar mirrorImage(FXwchar ucs);
00287 
00288 /// Script type of wide character
00289 extern FXAPI FXuint scriptType(FXwchar ucs);
00290 
00291 
00292 /// Unicode flavor of common functions
00293 extern FXAPI bool hasCase(FXwchar ucs);
00294 extern FXAPI bool isUpper(FXwchar ucs);
00295 extern FXAPI bool isLower(FXwchar ucs);
00296 extern FXAPI bool isTitle(FXwchar ucs);
00297 extern FXAPI bool isAscii(FXwchar ucs);
00298 extern FXAPI bool isLetter(FXwchar ucs);
00299 extern FXAPI bool isDigit(FXwchar ucs);
00300 extern FXAPI bool isAlphaNumeric(FXwchar ucs);
00301 extern FXAPI bool isControl(FXwchar ucs);
00302 extern FXAPI bool isSpace(FXwchar ucs);
00303 extern FXAPI bool isBlank(FXwchar ucs);
00304 extern FXAPI bool isPunct(FXwchar ucs);
00305 extern FXAPI bool isGraph(FXwchar ucs);
00306 extern FXAPI bool isPrint(FXwchar ucs);
00307 extern FXAPI bool isHexDigit(FXwchar ucs);
00308 extern FXAPI bool isSymbol(FXwchar ucs);
00309 extern FXAPI bool isMark(FXwchar ucs);
00310 extern FXAPI bool isSep(FXwchar ucs);
00311 
00312 /// Case conversion
00313 extern FXAPI FXwchar toUpper(FXwchar ucs);
00314 extern FXAPI FXwchar toLower(FXwchar ucs);
00315 extern FXAPI FXwchar toTitle(FXwchar ucs);
00316 
00317 }
00318 
00319 
00320 }
00321 
00322 #endif

Copyright © 1997-2005 Jeroen van der Zijp