Main Page | Class List | File List | Class Members | File Members | Related Pages

utrac.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *            utrac.h
00003  *
00004  *  Tue Oct  5 11:28:44 2004
00005  *  Copyright  2004  Alliance MCA
00006  *  Written by : Antoine Calando (antoine@alliancemca.net)
00007  ****************************************************************************/
00008 
00009 /*
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU Library General Public License for more details.
00019  *
00020  *  You should have received a copy of the GNU General Public License
00021  *  along with this program; if not, write to the Free Software
00022  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00023  */
00024  
00030 #ifndef _UTRAC_H_
00031 #define _UTRAC_H_
00032 
00033 #ifndef __cplusplus
00034 typedef unsigned short int bool;
00035 #define true 1
00036 #define false 0
00037 #else
00038 extern "C" {
00039 #endif
00040 
00041 //#include "debug.h"
00042 #include <sys/types.h>
00043 #include "ut_error.h"
00044 #include "ut_text.h"
00045 #include "ut_charset.h"
00046 
00047 #define UT_VERSION              "0.3.0"
00048 
00049 #define UT_EOL_CHAR     0x0             
00050 #define UT_EOL_ALT_CHAR 0xD             
00051 #define UT_SKIP_CHAR    0x1             
00052 #define UT_EOF_CHAR     0x0             
00053 #define UT_UNICODE_NONCHAR 0xFFFF       
00054 //#define BUFFER_OFFSET 4
00055 
00056 #define UT_UNSET -1     
00057 //#define UT_NO_CHANGE -2U
00058 
00059 #define UT_THRESHOLD_CONTROL_CHAR 0.05  
00060 #define UT_THRESHOLD_UTF8 0.01                  
00061 
00062 #define UT_LOAD_STEP 1*1024*1024        
00063 #define UT_PROCESS_STEP 1*1024*1024 
00064 
00065 
00066 #define UT_COEF_MAX 5   
00067 
00068 #ifdef _UT_CHARSET_C_
00069 const float UT_LANG_SYS_COEF [UT_COEF_MAX] = { 1.0, 1.02, 1.04, 1.06, 1.10 };
00070 //const char * UT_CHARMAPS_FILENAME2 = "/home/antoine/dev/utrac/charsets.dat";
00071 //UT_CHARMAPS_FILENAME should be set with "gcc -D ..."
00072 const char * UT_CHARMAPS_FILENAME2 = "charsets.dat";
00073 const char * UT_DEFAULT_ENCODING_UNIX = "ISO-8859-1";
00074 #else
00075 
00076 extern const float UT_LANG_SYS_COEF [];
00078 extern const char * UT_CHARMAPS_FILENAME;
00080 extern const char * UT_CHARMAPS_FILENAME2;
00082 extern const char * UT_DEFAULT_ENCODING_UNIX;
00083 #endif
00084 
00085 #define UT_LANG_SYS_ALLOC_STEP  8               
00086 #define UT_ERROR_STRING_SIZE    128             
00087 #define UT_STDIN_BUFFER_SIZE    65536   
00088 
00089 
00090 /***************************************************************************/
00097 typedef struct UtLangSys {
00098         char ** name;           
00099         char * code;            
00100         ushort n;                       
00101         ushort n_max;           
00102 } UtLangSys ;
00103 
00104 
00105 /***************************************************************************/
00117 typedef struct UtSession {
00118         struct UtCharset * charset;             
00119         int nb_charsets;                                        
00120 
00121         UtLangSys language;                             
00122         UtLangSys system;                               
00123 
00124         int language_default;                   
00125         int system_default;                             
00126         UtEolType eol_default;                  
00127         UtEolType eol_alt_default;              
00128         UtCharsetIndex charset_default; 
00129 
00130         ulong nomapping_char;                   
00131         
00132         int (*progress_function)
00133         (UtText*,float);                                
00134 
00135 
00136 
00137 
00138 
00139 
00140         char * error_string;                    
00141 } UtSession;
00142 
00143 #ifdef _UTRAC_C_
00144         UtSession * ut_session = NULL; 
00145 #else
00146         extern UtSession  * ut_session;
00147 #endif
00148 
00149 #define UT_TRY(func) \
00150 {\
00151         UtCode rcode = func;\
00152         if (rcode != UT_OK) return rcode;\
00153 }
00154 
00155 
00156 // ********** utrac.c ********** 
00157 UtCode ut_init ();
00158 UtCode ut_init_noalloc ();
00159 void ut_finish ();
00160 void ut_finish_nofree ();
00161 UtText * ut_init_text_heap ();
00162 void ut_init_text (UtText * new_text);
00163 void ut_free_text_heap (UtText *text);
00164 void ut_free_text (UtText * text);
00165 
00166 
00167 
00168 UtCode ut_init_progress (UtText *text);
00169 UtCode ut_load (UtText *text, const char * filename);
00170 UtCode ut_recognize (UtText *text);
00171 UtCode ut_convert (UtText *src_text, UtText *dst_text);
00172 
00173 //UtCode ut_process_text (UtText * text, bool convert);
00174 
00175 // ********** utils.c ********** 
00176 UtCode ut_load_charsets ();
00177 UtCode ut_load_charset_file (const char * filename, char ** buffer);
00178 UtCharsetIndex ut_find_charset (char * charset_name);
00179 UtEolType ut_find_eol (char * eol_name);
00180 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys);
00181 
00182 double ut_get_charset_coef (UtCharsetIndex i);
00183 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char);
00184 
00185 bool ut_update_progress (struct UtText *, ulong, bool);
00186 
00187 ulong ut_crc32 (ushort , ulong);
00188 
00189 void ut_print_binary (ulong src);
00190 UtCode ut_debug_text (struct UtText *);
00191 UtCode ut_debug_text_rating (struct UtText *);
00192 
00193 // ************** ut_messages.c *************
00194 const char * ut_error_message (UtCode code);
00195 
00196 // ************** ut_load.c *************
00197 UtCode ut_load_file_pass (UtText *text, const char * filename);
00198 UtCode ut_load_stdin_pass (UtText *text);
00199 
00200 // ********** ut_recognition1.c ********** 
00201 UtCode ut_distrib_utf_pass (struct UtText *);
00202 UtCode ut_eol_pass (struct UtText *);
00203 
00204 // ********** ut_recognition2.c ********** 
00205 UtCode ut_xascii_pass  (struct UtText *);
00206 
00207 // ********** ut_conversion.c ********** 
00208 int ut_size_char (char **src_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
00209 void ut_conv_char (char ** src_p, char ** dst_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
00210 void ut_insert_eol (char ** dst_p, UtEolType dst_eol);
00211 
00212 uint ut_count_ext_char (UtText * text);
00213 int ut_size_difference (UtText * src_text, UtText * dst_text);
00214         
00215 UtCode ut_conversion_pass (UtText * src_text, UtText * dst_text);
00216 
00217 
00218 #ifdef __cplusplus
00219 } // extern "C"
00220 #endif
00221 
00222 #endif // _UTRAC_H

Generated on Fri Feb 25 18:30:15 2005 for Utrac by  doxygen 1.3.9