• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/pcre/ucp.h

00001 /*************************************************
00002 *          Unicode Property Table handler        *
00003 *************************************************/
00004 
00005 #ifndef _UCP_H
00006 #define _UCP_H
00007 
00008 /* This file contains definitions of the property values that are returned by
00009 the UCD access macros. New values that are added for new releases of Unicode
00010 should always be at the end of each enum, for backwards compatibility. */
00011 
00012 /* These are the general character categories. */
00013 
00014 enum {
00015   ucp_C,     /* Other */
00016   ucp_L,     /* Letter */
00017   ucp_M,     /* Mark */
00018   ucp_N,     /* Number */
00019   ucp_P,     /* Punctuation */
00020   ucp_S,     /* Symbol */
00021   ucp_Z      /* Separator */
00022 };
00023 
00024 /* These are the particular character types. */
00025 
00026 enum {
00027   ucp_Cc,    /* Control */
00028   ucp_Cf,    /* Format */
00029   ucp_Cn,    /* Unassigned */
00030   ucp_Co,    /* Private use */
00031   ucp_Cs,    /* Surrogate */
00032   ucp_Ll,    /* Lower case letter */
00033   ucp_Lm,    /* Modifier letter */
00034   ucp_Lo,    /* Other letter */
00035   ucp_Lt,    /* Title case letter */
00036   ucp_Lu,    /* Upper case letter */
00037   ucp_Mc,    /* Spacing mark */
00038   ucp_Me,    /* Enclosing mark */
00039   ucp_Mn,    /* Non-spacing mark */
00040   ucp_Nd,    /* Decimal number */
00041   ucp_Nl,    /* Letter number */
00042   ucp_No,    /* Other number */
00043   ucp_Pc,    /* Connector punctuation */
00044   ucp_Pd,    /* Dash punctuation */
00045   ucp_Pe,    /* Close punctuation */
00046   ucp_Pf,    /* Final punctuation */
00047   ucp_Pi,    /* Initial punctuation */
00048   ucp_Po,    /* Other punctuation */
00049   ucp_Ps,    /* Open punctuation */
00050   ucp_Sc,    /* Currency symbol */
00051   ucp_Sk,    /* Modifier symbol */
00052   ucp_Sm,    /* Mathematical symbol */
00053   ucp_So,    /* Other symbol */
00054   ucp_Zl,    /* Line separator */
00055   ucp_Zp,    /* Paragraph separator */
00056   ucp_Zs     /* Space separator */
00057 };
00058 
00059 /* These are the script identifications. */
00060 
00061 enum {
00062   ucp_Arabic,
00063   ucp_Armenian,
00064   ucp_Bengali,
00065   ucp_Bopomofo,
00066   ucp_Braille,
00067   ucp_Buginese,
00068   ucp_Buhid,
00069   ucp_Canadian_Aboriginal,
00070   ucp_Cherokee,
00071   ucp_Common,
00072   ucp_Coptic,
00073   ucp_Cypriot,
00074   ucp_Cyrillic,
00075   ucp_Deseret,
00076   ucp_Devanagari,
00077   ucp_Ethiopic,
00078   ucp_Georgian,
00079   ucp_Glagolitic,
00080   ucp_Gothic,
00081   ucp_Greek,
00082   ucp_Gujarati,
00083   ucp_Gurmukhi,
00084   ucp_Han,
00085   ucp_Hangul,
00086   ucp_Hanunoo,
00087   ucp_Hebrew,
00088   ucp_Hiragana,
00089   ucp_Inherited,
00090   ucp_Kannada,
00091   ucp_Katakana,
00092   ucp_Kharoshthi,
00093   ucp_Khmer,
00094   ucp_Lao,
00095   ucp_Latin,
00096   ucp_Limbu,
00097   ucp_Linear_B,
00098   ucp_Malayalam,
00099   ucp_Mongolian,
00100   ucp_Myanmar,
00101   ucp_New_Tai_Lue,
00102   ucp_Ogham,
00103   ucp_Old_Italic,
00104   ucp_Old_Persian,
00105   ucp_Oriya,
00106   ucp_Osmanya,
00107   ucp_Runic,
00108   ucp_Shavian,
00109   ucp_Sinhala,
00110   ucp_Syloti_Nagri,
00111   ucp_Syriac,
00112   ucp_Tagalog,
00113   ucp_Tagbanwa,
00114   ucp_Tai_Le,
00115   ucp_Tamil,
00116   ucp_Telugu,
00117   ucp_Thaana,
00118   ucp_Thai,
00119   ucp_Tibetan,
00120   ucp_Tifinagh,
00121   ucp_Ugaritic,
00122   ucp_Yi,
00123   /* New for Unicode 5.0: */
00124   ucp_Balinese,
00125   ucp_Cuneiform,
00126   ucp_Nko,
00127   ucp_Phags_Pa,
00128   ucp_Phoenician,
00129   /* New for Unicode 5.1: */
00130   ucp_Carian,
00131   ucp_Cham,
00132   ucp_Kayah_Li,
00133   ucp_Lepcha,
00134   ucp_Lycian,
00135   ucp_Lydian,
00136   ucp_Ol_Chiki,
00137   ucp_Rejang,
00138   ucp_Saurashtra,
00139   ucp_Sundanese,
00140   ucp_Vai
00141 };
00142 
00143 #endif
00144 
00145 /* End of ucp.h */