00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #ifndef _UT_TEXT_H_
00031 #define _UT_TEXT_H_
00032
00033
00034
00035
00044 typedef enum UtTextFlags {
00045 UT_F_UNSET = 0,
00046 UT_F_FORCE_BINARY = 1<<0,
00047 UT_F_IDENTIFY_EOL = 1<<1,
00048 UT_F_TRANSFORM_EOL = 1<<2,
00049 UT_F_REMOVE_ILLEGAL_CHAR = 1<<3,
00050 UT_F_ADD_FINAL_EOL = 1<<4,
00051 UT_F_IDENTIFY_CHARSET = 1<<5,
00052 UT_F_REFERENCE_EXT_CHAR = 1<<6,
00053
00054 UT_F_DEFAULT = UT_F_REMOVE_ILLEGAL_CHAR | UT_F_IDENTIFY_CHARSET
00055 } UtTextFlags;
00056
00057
00067 typedef enum UtPassFlags {
00068 UT_PF_UNSET = 0,
00069 UT_PF_NONE = 1<<0,
00070 UT_PF_LOAD = 1<<1,
00071 UT_PF_RECOGNIZE = 1<<2,
00072 UT_PF_DISTRIB_PASS = 1<<3,
00073 UT_PF_EOL_PASS = 1<<4,
00074 UT_PF_XASCII_PASS = 1<<5,
00075 UT_PF_CONVERT = 1<<6,
00076
00077 UT_PF_MAX = 1<<6
00078
00079 } UtPassFlags;
00080
00081
00082
00083
00084
00085
00094 typedef struct UtCharsetEval {
00095 long rating;
00096 ulong checksum;
00097 } UtCharsetEval;
00098
00099
00100
00109 typedef struct UtExtCharLine {
00110 char * line_p;
00111 ulong line_i;
00112 ulong nb_ext_chars;
00113 struct UtExtCharLine * next;
00114 } UtExtCharLine;
00115
00116
00117
00132 typedef enum UtEolType {
00133 UT_EOL_UNSET=-1,
00134 UT_EOL_CR,
00135 UT_EOL_LF,
00136 UT_EOL_CRLF,
00137 UT_EOL_LFCR,
00138 UT_EOL_MIX,
00139 UT_EOL_BSN,
00140 UT_EOL_NUL,
00141
00142
00143
00144 UT_EOL_NONE
00145 } UtEolType;
00146
00147 extern const char * UT_EOL_NAME [];
00148
00149 typedef short UtCharsetIndex;
00150
00151
00160 typedef struct UtText {
00161 char * data;
00162 ulong size;
00163
00164 UtEolType eol;
00165 UtEolType eol_alt;
00166 UtCharsetIndex charset;
00167
00168 ulong nb_lines;
00169 ulong nb_lines_alt;
00170 ulong * distribution;
00171 UtExtCharLine * ext_char;
00172 UtCharsetEval * evaluation;
00173
00174 UtTextFlags flags;
00175 UtPassFlags pass_flags;
00176 char skip_char;
00177
00178
00179 float progress_done;
00180 int progress_todo;
00181 UtPassFlags current_pass;
00182
00183 void * user;
00184 } UtText;
00185
00186 #endif //_UT_TEXT_H_