00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #ifndef _UT_CHARSET_H_
00031 #define _UT_CHARSET_H_
00032
00033 #include <sys/types.h>
00034
00035
00036
00045 typedef enum UtCateg {
00046 UT_CTG_UNSET=0 ,
00047 UT_CTG_UPPERCASE,
00048 UT_CTG_LOWERCASE,
00049 UT_CTG_OTHER_LETTER,
00050 UT_CTG_NUMBER,
00051 UT_CTG_PONCTUATION,
00052 UT_CTG_PONCT_INIT_0,
00053 UT_CTG_PONCT_INIT_1,
00054 UT_CTG_PONCT_INIT_2,
00055 UT_CTG_PONCT_INIT_3,
00056 UT_CTG_PONCT_INIT_OTHER,
00057 UT_CTG_PONCT_FINAL_0,
00058 UT_CTG_PONCT_FINAL_1,
00059 UT_CTG_PONCT_FINAL_2,
00060 UT_CTG_PONCT_FINAL_3,
00061 UT_CTG_PONCT_FINAL_OTHER,
00062 UT_CTG_CURRENCY,
00063 UT_CTG_SYMBOL,
00064 UT_CTG_CONTROL,
00065 UT_CTG_DELIMITER,
00066 UT_CTG_MARK,
00067 UT_CTG_OTHER
00068 } UtCateg;
00069
00070
00071 #define UT_CTG_PONCT_IF_N UT_CTG_PONCT_INIT_OTHER-UT_CTG_PONCT_INIT_0+1
00072
00076 typedef u_char UtScript;
00077
00081 typedef struct UtCharType {
00082 UtCateg categorie;
00083 UtScript script;
00084 } UtCharType;
00085
00086
00089 #ifdef _UT_CHARSET_C_
00090 const char * UT_CHARSET_NAME[] = {
00091 "ASCII",
00092 "UTF-8",
00093 "UTF-16BE",
00094 "UTF-16LE",
00095 "UTF-32BE",
00096 "UTF-32LE",
00097 NULL
00098 };
00099 #else
00100 extern const char * UT_CHARSET_NAME[];
00101 #endif
00102
00110 typedef enum UtCharsetType {
00111 UT_CST_UNSET = -1,
00112 UT_CST_ASCII = 0,
00113 UT_CST_UTF_8,
00114 UT_CST_UTF_16BE,
00115 UT_CST_UTF_16LE,
00116 UT_CST_UTF_32BE,
00117 UT_CST_UTF_32LE,
00118 UT_CST_ASCII_EXTENSION
00119 } UtCharsetType;
00120
00121
00129 typedef struct UtCharset {
00130 char * name;
00131 char * alias;
00132 char * common_name;
00133 char * comment;
00134 UtCharsetType type;
00135 ushort * unicode;
00136 UtCharType * char_type;
00137 u_char * language;
00138 u_char * system;
00139 } UtCharset;
00140
00141 #endif // _UT_CHARSET_H_