00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #ifndef _UTRAC_H_
00031 #define _UTRAC_H_
00032
00033 #ifndef __cplusplus
00034 typedef unsigned short int bool;
00035 #define true 1
00036 #define false 0
00037 #else
00038 extern "C" {
00039 #endif
00040
00041
00042 #include <sys/types.h>
00043 #include "ut_error.h"
00044 #include "ut_text.h"
00045 #include "ut_charset.h"
00046
00047 #define UT_VERSION "0.3.0"
00048
00049 #define UT_EOL_CHAR 0x0
00050 #define UT_EOL_ALT_CHAR 0xD
00051 #define UT_SKIP_CHAR 0x1
00052 #define UT_EOF_CHAR 0x0
00053 #define UT_UNICODE_NONCHAR 0xFFFF
00054 //#define BUFFER_OFFSET 4
00055
00056 #define UT_UNSET -1
00057 //#define UT_NO_CHANGE -2U
00058
00059 #define UT_THRESHOLD_CONTROL_CHAR 0.05
00060 #define UT_THRESHOLD_UTF8 0.01
00061
00062 #define UT_LOAD_STEP 1*1024*1024
00063 #define UT_PROCESS_STEP 1*1024*1024
00064
00065
00066 #define UT_COEF_MAX 5
00067
00068 #ifdef _UT_CHARSET_C_
00069 const float UT_LANG_SYS_COEF [UT_COEF_MAX] = { 1.0, 1.02, 1.04, 1.06, 1.10 };
00070
00071
00072 const char * UT_CHARMAPS_FILENAME2 = "charsets.dat";
00073 const char * UT_DEFAULT_ENCODING_UNIX = "ISO-8859-1";
00074 #else
00075
00076 extern const float UT_LANG_SYS_COEF [];
00078 extern const char * UT_CHARMAPS_FILENAME;
00080 extern const char * UT_CHARMAPS_FILENAME2;
00082 extern const char * UT_DEFAULT_ENCODING_UNIX;
00083 #endif
00084
00085 #define UT_LANG_SYS_ALLOC_STEP 8
00086 #define UT_ERROR_STRING_SIZE 128
00087 #define UT_STDIN_BUFFER_SIZE 65536
00088
00089
00090
00097 typedef struct UtLangSys {
00098 char ** name;
00099 char * code;
00100 ushort n;
00101 ushort n_max;
00102 } UtLangSys ;
00103
00104
00105
00117 typedef struct UtSession {
00118 struct UtCharset * charset;
00119 int nb_charsets;
00120
00121 UtLangSys language;
00122 UtLangSys system;
00123
00124 int language_default;
00125 int system_default;
00126 UtEolType eol_default;
00127 UtEolType eol_alt_default;
00128 UtCharsetIndex charset_default;
00129
00130 ulong nomapping_char;
00131
00132 int (*progress_function)
00133 (UtText*,float);
00134
00135
00136
00137
00138
00139
00140 char * error_string;
00141 } UtSession;
00142
00143 #ifdef _UTRAC_C_
00144 UtSession * ut_session = NULL;
00145 #else
00146 extern UtSession * ut_session;
00147 #endif
00148
00149 #define UT_TRY(func) \
00150 {\
00151 UtCode rcode = func;\
00152 if (rcode != UT_OK) return rcode;\
00153 }
00154
00155
00156
00157 UtCode ut_init ();
00158 UtCode ut_init_noalloc ();
00159 void ut_finish ();
00160 void ut_finish_nofree ();
00161 UtText * ut_init_text_heap ();
00162 void ut_init_text (UtText * new_text);
00163 void ut_free_text_heap (UtText *text);
00164 void ut_free_text (UtText * text);
00165
00166
00167
00168 UtCode ut_init_progress (UtText *text);
00169 UtCode ut_load (UtText *text, const char * filename);
00170 UtCode ut_recognize (UtText *text);
00171 UtCode ut_convert (UtText *src_text, UtText *dst_text);
00172
00173
00174
00175
00176 UtCode ut_load_charsets ();
00177 UtCode ut_load_charset_file (const char * filename, char ** buffer);
00178 UtCharsetIndex ut_find_charset (char * charset_name);
00179 UtEolType ut_find_eol (char * eol_name);
00180 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys);
00181
00182 double ut_get_charset_coef (UtCharsetIndex i);
00183 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char);
00184
00185 bool ut_update_progress (struct UtText *, ulong, bool);
00186
00187 ulong ut_crc32 (ushort , ulong);
00188
00189 void ut_print_binary (ulong src);
00190 UtCode ut_debug_text (struct UtText *);
00191 UtCode ut_debug_text_rating (struct UtText *);
00192
00193
00194 const char * ut_error_message (UtCode code);
00195
00196
00197 UtCode ut_load_file_pass (UtText *text, const char * filename);
00198 UtCode ut_load_stdin_pass (UtText *text);
00199
00200
00201 UtCode ut_distrib_utf_pass (struct UtText *);
00202 UtCode ut_eol_pass (struct UtText *);
00203
00204
00205 UtCode ut_xascii_pass (struct UtText *);
00206
00207
00208 int ut_size_char (char **src_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
00209 void ut_conv_char (char ** src_p, char ** dst_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
00210 void ut_insert_eol (char ** dst_p, UtEolType dst_eol);
00211
00212 uint ut_count_ext_char (UtText * text);
00213 int ut_size_difference (UtText * src_text, UtText * dst_text);
00214
00215 UtCode ut_conversion_pass (UtText * src_text, UtText * dst_text);
00216
00217
00218 #ifdef __cplusplus
00219 }
00220 #endif
00221
00222 #endif // _UTRAC_H