mirror of
https://gitee.com/lionsoul/friso.git
synced 2024-12-02 03:07:52 +08:00
增加friso_enchar_type函数, 用于判断给定字符的类型。
This commit is contained in:
parent
4722fc777b
commit
873a1344cb
@ -173,3 +173,35 @@ FRISO_API int friso_uppercase_letter(
|
||||
return gbk_uppercase_letter( task->buffer );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the type of the specified char.
|
||||
* the type will be the constants defined above.
|
||||
* (include the fullwidth english char.)
|
||||
*/
|
||||
FRISO_API friso_enchar_t friso_enchar_type(
|
||||
friso_charset_t charset,
|
||||
friso_task_t task )
|
||||
{
|
||||
//Unicode or ASCII.(Both UTF-8 and GBK are valid)
|
||||
uint_t u = 0;
|
||||
|
||||
if ( charset == FRISO_UTF8 )
|
||||
{
|
||||
u = task->unicode;
|
||||
//if ( u >= 65280 ) u = 65280 - 65248;
|
||||
}
|
||||
else if ( charset == FRISO_GBK )
|
||||
{
|
||||
u = (uchar_t)task->buffer[0];
|
||||
//if ( u == 0xa3 ) ; //full-width.
|
||||
}
|
||||
|
||||
//range check.
|
||||
if ( u > 126 ) return FRISO_EN_UNKNOW;
|
||||
if ( u == 32 ) return FRISO_EN_WHITESPACE;
|
||||
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
|
||||
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
|
||||
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
|
||||
|
||||
return FRISO_EN_PUNCTUATION;
|
||||
}
|
||||
|
@ -59,6 +59,27 @@ FRISO_API int friso_decimal_string( friso_charset_t, char * );
|
||||
// included full-width and half-width letters.
|
||||
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
|
||||
|
||||
|
||||
//en char type.
|
||||
//#define FRISO_EN_LETTER 0 //a-z && A-Z
|
||||
//#define FRISO_EN_NUMERIC 1 //0-9
|
||||
//#define FRISO_EN_PUNCTUATION 2 //english punctuations
|
||||
//#define FRISO_EN_WHITESPACE 3 //whitespace
|
||||
//#define FRISO_EN_UNKNOW -1 //beyond 32-122
|
||||
typedef enum {
|
||||
FRISO_EN_LETTER = 0, //A-Z, a-z
|
||||
FRISO_EN_NUMERIC = 1, //0-9
|
||||
FRISO_EN_PUNCTUATION = 2, //english punctuations
|
||||
FRISO_EN_WHITESPACE = 3, //whitespace
|
||||
FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126)
|
||||
} friso_enchar_t;
|
||||
|
||||
/* get the type of the specified char.
|
||||
* the type will be the constants defined above.
|
||||
* (include the fullwidth english char.)
|
||||
*/
|
||||
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
|
||||
|
||||
/* }}} */
|
||||
|
||||
|
||||
|
@ -16,18 +16,18 @@
|
||||
//create a new lexicon
|
||||
FRISO_API friso_dic_t friso_dic_new()
|
||||
{
|
||||
register uint_t t;
|
||||
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
|
||||
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
|
||||
if ( dic == NULL ) {
|
||||
___ALLOCATION_ERROR___
|
||||
}
|
||||
register uint_t t;
|
||||
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
|
||||
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
|
||||
if ( dic == NULL ) {
|
||||
___ALLOCATION_ERROR___
|
||||
}
|
||||
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
dic[t] = new_hash_table();
|
||||
}
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
dic[t] = new_hash_table();
|
||||
}
|
||||
|
||||
return dic;
|
||||
return dic;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -38,55 +38,55 @@ FRISO_API friso_dic_t friso_dic_new()
|
||||
*/
|
||||
__STATIC_API__ void default_fdic_callback( hash_entry_t e )
|
||||
{
|
||||
register uint_t i;
|
||||
friso_array_t syn;
|
||||
lex_entry_t lex = ( lex_entry_t ) e->_val;
|
||||
//free the lex->word
|
||||
FRISO_FREE( lex->word );
|
||||
//free the lex->syn if it is not NULL
|
||||
if ( lex->syn != NULL ) {
|
||||
syn = lex->syn;
|
||||
for ( i = 0; i < syn->length; i++ ) {
|
||||
FRISO_FREE( syn->items[i] );
|
||||
}
|
||||
free_array_list( syn );
|
||||
register uint_t i;
|
||||
friso_array_t syn;
|
||||
lex_entry_t lex = ( lex_entry_t ) e->_val;
|
||||
//free the lex->word
|
||||
FRISO_FREE( lex->word );
|
||||
//free the lex->syn if it is not NULL
|
||||
if ( lex->syn != NULL ) {
|
||||
syn = lex->syn;
|
||||
for ( i = 0; i < syn->length; i++ ) {
|
||||
FRISO_FREE( syn->items[i] );
|
||||
}
|
||||
free_array_list( syn );
|
||||
}
|
||||
}
|
||||
|
||||
FRISO_API void friso_dic_free( friso_dic_t dic )
|
||||
{
|
||||
register uint_t t;
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
//free the hash table
|
||||
free_hash_table( dic[t], default_fdic_callback );
|
||||
}
|
||||
register uint_t t;
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
//free the hash table
|
||||
free_hash_table( dic[t], default_fdic_callback );
|
||||
}
|
||||
|
||||
FRISO_FREE( dic );
|
||||
FRISO_FREE( dic );
|
||||
}
|
||||
|
||||
|
||||
//create a new lexicon entry
|
||||
FRISO_API lex_entry_t new_lex_entry(
|
||||
fstring word,
|
||||
friso_array_t syn,
|
||||
uint_t fre,
|
||||
uint_t length,
|
||||
uint_t type )
|
||||
fstring word,
|
||||
friso_array_t syn,
|
||||
uint_t fre,
|
||||
uint_t length,
|
||||
uint_t type )
|
||||
{
|
||||
lex_entry_t e = ( lex_entry_t )
|
||||
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
|
||||
if ( e == NULL ) {
|
||||
___ALLOCATION_ERROR___
|
||||
}
|
||||
lex_entry_t e = ( lex_entry_t )
|
||||
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
|
||||
if ( e == NULL ) {
|
||||
___ALLOCATION_ERROR___
|
||||
}
|
||||
|
||||
//initialize.
|
||||
e->word = word;
|
||||
e->syn = syn;
|
||||
e->fre = fre;
|
||||
e->length = length;
|
||||
e->type = type;
|
||||
//initialize.
|
||||
e->word = word;
|
||||
e->syn = syn;
|
||||
e->fre = fre;
|
||||
e->length = length;
|
||||
e->type = type;
|
||||
|
||||
return e;
|
||||
return e;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -98,42 +98,42 @@ FRISO_API lex_entry_t new_lex_entry(
|
||||
*/
|
||||
FRISO_API void free_lex_entry( lex_entry_t e )
|
||||
{
|
||||
//if ( e->syn != NULL ) {
|
||||
// if ( flag == 1 ) free_array_list( e->syn);
|
||||
// else free_array_list( e->syn );
|
||||
//}
|
||||
FRISO_FREE( e );
|
||||
//if ( e->syn != NULL ) {
|
||||
// if ( flag == 1 ) free_array_list( e->syn);
|
||||
// else free_array_list( e->syn );
|
||||
//}
|
||||
FRISO_FREE( e );
|
||||
}
|
||||
|
||||
|
||||
//add a new entry to the dictionary.
|
||||
FRISO_API void friso_dic_add(
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word,
|
||||
friso_array_t syn )
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word,
|
||||
friso_array_t syn )
|
||||
{
|
||||
if ( lex >= 0 || lex < __FRISO_LEXICON_LENGTH__ )
|
||||
{
|
||||
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
|
||||
hash_put_mapping( dic[lex], word,
|
||||
new_lex_entry( word, syn, 0,
|
||||
(uint_t) strlen(word), (uint_t) lex ) );
|
||||
}
|
||||
if ( lex >= 0 || lex < __FRISO_LEXICON_LENGTH__ )
|
||||
{
|
||||
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
|
||||
hash_put_mapping( dic[lex], word,
|
||||
new_lex_entry( word, syn, 0,
|
||||
(uint_t) strlen(word), (uint_t) lex ) );
|
||||
}
|
||||
}
|
||||
|
||||
FRISO_API void friso_dic_add_with_fre(
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word,
|
||||
friso_array_t syn,
|
||||
uint_t frequency )
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word,
|
||||
friso_array_t syn,
|
||||
uint_t frequency )
|
||||
{
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
hash_put_mapping( dic[lex], word,
|
||||
new_lex_entry( word, syn, frequency,
|
||||
( uint_t ) strlen(word), ( uint_t ) lex ) );
|
||||
}
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
hash_put_mapping( dic[lex], word,
|
||||
new_lex_entry( word, syn, frequency,
|
||||
( uint_t ) strlen(word), ( uint_t ) lex ) );
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -144,18 +144,18 @@ FRISO_API void friso_dic_add_with_fre(
|
||||
*/
|
||||
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
|
||||
{
|
||||
register int c;
|
||||
fstring cs;
|
||||
register int c;
|
||||
fstring cs;
|
||||
|
||||
cs = __dst;
|
||||
while ( ( c = fgetc( _stream ) ) != EOF )
|
||||
{
|
||||
if ( c == '\n' ) break;
|
||||
*cs++ = c;
|
||||
}
|
||||
*cs = '\0';
|
||||
cs = __dst;
|
||||
while ( ( c = fgetc( _stream ) ) != EOF )
|
||||
{
|
||||
if ( c == '\n' ) break;
|
||||
*cs++ = c;
|
||||
}
|
||||
*cs = '\0';
|
||||
|
||||
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
||||
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -163,21 +163,21 @@ FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
|
||||
*/
|
||||
///instead of memcpy
|
||||
__STATIC_API__ fstring string_copy(
|
||||
fstring _src,
|
||||
fstring __dst,
|
||||
uint_t blocks )
|
||||
fstring _src,
|
||||
fstring __dst,
|
||||
uint_t blocks )
|
||||
{
|
||||
|
||||
register fstring __src = _src;
|
||||
register uint_t t;
|
||||
register fstring __src = _src;
|
||||
register uint_t t;
|
||||
|
||||
for ( t = 0; t < blocks; t++ ) {
|
||||
if ( *__src == '\0' ) break;
|
||||
__dst[t] = *__src++;
|
||||
}
|
||||
__dst[t] = '\0';
|
||||
for ( t = 0; t < blocks; t++ ) {
|
||||
if ( *__src == '\0' ) break;
|
||||
__dst[t] = *__src++;
|
||||
}
|
||||
__dst[t] = '\0';
|
||||
|
||||
return __dst;
|
||||
return __dst;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -189,23 +189,23 @@ __STATIC_API__ fstring string_copy(
|
||||
* @param blocks number of bytes to copy
|
||||
*/
|
||||
__STATIC_API__ fstring string_copy_heap(
|
||||
fstring _src, uint_t blocks )
|
||||
fstring _src, uint_t blocks )
|
||||
{
|
||||
register uint_t t;
|
||||
register uint_t t;
|
||||
|
||||
fstring str = ( fstring )
|
||||
FRISO_MALLOC( blocks + 1 );
|
||||
if ( str == NULL ) {
|
||||
___ALLOCATION_ERROR___;
|
||||
}
|
||||
fstring str = ( fstring )
|
||||
FRISO_MALLOC( blocks + 1 );
|
||||
if ( str == NULL ) {
|
||||
___ALLOCATION_ERROR___;
|
||||
}
|
||||
|
||||
for ( t = 0; t < blocks; t++ ) {
|
||||
if ( *_src == '\0' ) break;
|
||||
str[t] = *_src++;
|
||||
}
|
||||
for ( t = 0; t < blocks; t++ ) {
|
||||
if ( *_src == '\0' ) break;
|
||||
str[t] = *_src++;
|
||||
}
|
||||
|
||||
str[t] = '\0';
|
||||
return str;
|
||||
str[t] = '\0';
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -215,15 +215,15 @@ __STATIC_API__ fstring string_copy_heap(
|
||||
*/
|
||||
__STATIC_API__ fstring indexOf( fstring __str, char delimiter )
|
||||
{
|
||||
uint_t i, __length__;
|
||||
uint_t i, __length__;
|
||||
|
||||
__length__ = strlen( __str );
|
||||
for ( i = 0; i < __length__; i++ ) {
|
||||
if ( __str[i] == delimiter )
|
||||
return __str + i;
|
||||
}
|
||||
__length__ = strlen( __str );
|
||||
for ( i = 0; i < __length__; i++ ) {
|
||||
if ( __str[i] == delimiter )
|
||||
return __str + i;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -235,105 +235,105 @@ __STATIC_API__ fstring indexOf( fstring __str, char delimiter )
|
||||
* @param length the maximum length of the word item
|
||||
*/
|
||||
FRISO_API void friso_dic_load(
|
||||
friso_t friso,
|
||||
friso_config_t config,
|
||||
friso_lex_t lex,
|
||||
fstring lex_file,
|
||||
uint_t length )
|
||||
friso_t friso,
|
||||
friso_config_t config,
|
||||
friso_lex_t lex,
|
||||
fstring lex_file,
|
||||
uint_t length )
|
||||
{
|
||||
|
||||
FILE * _stream;
|
||||
char __char[1024], _buffer[512];
|
||||
fstring _line;
|
||||
string_split_entry sse;
|
||||
FILE * _stream;
|
||||
char __char[1024], _buffer[512];
|
||||
fstring _line;
|
||||
string_split_entry sse;
|
||||
|
||||
fstring _word;
|
||||
char _sbuffer[512];
|
||||
fstring _syn;
|
||||
friso_array_t sywords;
|
||||
uint_t _fre;
|
||||
fstring _word;
|
||||
char _sbuffer[512];
|
||||
fstring _syn;
|
||||
friso_array_t sywords;
|
||||
uint_t _fre;
|
||||
|
||||
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
|
||||
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
|
||||
{
|
||||
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
|
||||
{
|
||||
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
|
||||
//clear up the notes
|
||||
//make sure the length of the line is greater than 1.
|
||||
//like the single '#' mark in stopwords dictionary.
|
||||
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
|
||||
|
||||
//handle the stopwords.
|
||||
if ( lex == __LEX_STOPWORDS__ )
|
||||
{
|
||||
//clean the chinese words that its length is greater than max length.
|
||||
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
|
||||
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
|
||||
string_copy_heap( _line, strlen(_line) ), NULL );
|
||||
continue;
|
||||
}
|
||||
|
||||
//split the fstring with '/'.
|
||||
string_split_reset( &sse, "/", _line);
|
||||
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
|
||||
|
||||
//1. get the word.
|
||||
_word = string_copy_heap( _buffer, strlen(_buffer) );
|
||||
|
||||
if ( string_split_next( &sse, _buffer ) == NULL )
|
||||
{
|
||||
//normal lexicon type,
|
||||
//add them to the dictionary directly
|
||||
friso_dic_add( friso->dic, lex, _word, NULL );
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* filter out the words that its length is larger
|
||||
* than the specified limit.
|
||||
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
|
||||
* and __LEX_CEM_WORDS__.
|
||||
*/
|
||||
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
|
||||
&& strlen( _word ) > length ) continue;
|
||||
|
||||
//2. get the synonyms words.
|
||||
_syn = NULL;
|
||||
if ( strcmp( _buffer, "null" ) != 0 )
|
||||
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
|
||||
|
||||
//3. get the word frequency if it available.
|
||||
_fre = 0;
|
||||
if ( string_split_next( &sse, _buffer ) != NULL )
|
||||
_fre = atoi( _buffer );
|
||||
|
||||
/**
|
||||
* Here:
|
||||
* split the synonyms words with mark ","
|
||||
* and put them in a array list if the synonyms is not NULL
|
||||
*/
|
||||
sywords = NULL;
|
||||
if ( config->add_syn && _syn != NULL )
|
||||
{
|
||||
string_split_reset( &sse, ",", _sbuffer );
|
||||
sywords = new_array_list_with_opacity(5);
|
||||
while ( string_split_next( &sse, _buffer ) != NULL )
|
||||
{
|
||||
//clear up the notes
|
||||
//make sure the length of the line is greater than 1.
|
||||
//like the single '#' mark in stopwords dictionary.
|
||||
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
|
||||
if ( strlen(_buffer) > length ) continue;
|
||||
array_list_add( sywords,
|
||||
string_copy_heap(_buffer, strlen(_buffer)) );
|
||||
}
|
||||
sywords = array_list_trim( sywords );
|
||||
}
|
||||
|
||||
//handle the stopwords.
|
||||
if ( lex == __LEX_STOPWORDS__ )
|
||||
{
|
||||
//clean the chinese words that its length is greater than max length.
|
||||
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
|
||||
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
|
||||
string_copy_heap( _line, strlen(_line) ), NULL );
|
||||
continue;
|
||||
}
|
||||
|
||||
//split the fstring with '/'.
|
||||
string_split_reset( &sse, "/", _line);
|
||||
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
|
||||
|
||||
//1. get the word.
|
||||
_word = string_copy_heap( _buffer, strlen(_buffer) );
|
||||
|
||||
if ( string_split_next( &sse, _buffer ) == NULL )
|
||||
{
|
||||
//normal lexicon type,
|
||||
//add them to the dictionary directly
|
||||
friso_dic_add( friso->dic, lex, _word, NULL );
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* filter out the words that its length is larger
|
||||
* than the specified limit.
|
||||
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
|
||||
* and __LEX_CEM_WORDS__.
|
||||
*/
|
||||
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
|
||||
&& strlen( _word ) > length ) continue;
|
||||
|
||||
//2. get the synonyms words.
|
||||
_syn = NULL;
|
||||
if ( strcmp( _buffer, "null" ) != 0 )
|
||||
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
|
||||
|
||||
//3. get the word frequency if it available.
|
||||
_fre = 0;
|
||||
if ( string_split_next( &sse, _buffer ) != NULL )
|
||||
_fre = atoi( _buffer );
|
||||
|
||||
/**
|
||||
* Here:
|
||||
* split the synonyms words with mark ","
|
||||
* and put them in a array list if the synonyms is not NULL
|
||||
*/
|
||||
sywords = NULL;
|
||||
if ( config->add_syn && _syn != NULL )
|
||||
{
|
||||
string_split_reset( &sse, ",", _sbuffer );
|
||||
sywords = new_array_list_with_opacity(5);
|
||||
while ( string_split_next( &sse, _buffer ) != NULL )
|
||||
{
|
||||
if ( strlen(_buffer) > length ) continue;
|
||||
array_list_add( sywords,
|
||||
string_copy_heap(_buffer, strlen(_buffer)) );
|
||||
}
|
||||
sywords = array_list_trim( sywords );
|
||||
}
|
||||
|
||||
//4. add the word item
|
||||
friso_dic_add_with_fre(
|
||||
friso->dic, lex, _word, sywords, _fre );
|
||||
}
|
||||
|
||||
fclose( _stream );
|
||||
} else {
|
||||
printf("Warning: Fail to open lexicon file %s\n", lex_file);
|
||||
//4. add the word item
|
||||
friso_dic_add_with_fre(
|
||||
friso->dic, lex, _word, sywords, _fre );
|
||||
}
|
||||
|
||||
fclose( _stream );
|
||||
} else {
|
||||
printf("Warning: Fail to open lexicon file %s\n", lex_file);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -347,44 +347,44 @@ FRISO_API void friso_dic_load(
|
||||
*/
|
||||
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
|
||||
{
|
||||
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
|
||||
return __LEX_CJK_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
|
||||
return __LEX_CJK_UNITS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
|
||||
return __LEX_ECM_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
|
||||
return __LEX_CEM_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
|
||||
return __LEX_CN_LNAME__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
|
||||
return __LEX_CN_SNAME__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
|
||||
return __LEX_CN_DNAME1__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
|
||||
return __LEX_CN_DNAME2__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
|
||||
return __LEX_CN_LNA__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
|
||||
return __LEX_STOPWORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
|
||||
return __LEX_ENPUN_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
|
||||
return __LEX_EN_WORDS__;
|
||||
}
|
||||
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
|
||||
return __LEX_CJK_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
|
||||
return __LEX_CJK_UNITS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
|
||||
return __LEX_ECM_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
|
||||
return __LEX_CEM_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
|
||||
return __LEX_CN_LNAME__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
|
||||
return __LEX_CN_SNAME__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
|
||||
return __LEX_CN_DNAME1__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
|
||||
return __LEX_CN_DNAME2__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
|
||||
return __LEX_CN_LNA__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
|
||||
return __LEX_STOPWORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
|
||||
return __LEX_ENPUN_WORDS__;
|
||||
}
|
||||
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
|
||||
return __LEX_EN_WORDS__;
|
||||
}
|
||||
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -396,136 +396,136 @@ __STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
|
||||
* @param _limitts words length limit
|
||||
*/
|
||||
FRISO_API void friso_dic_load_from_ifile(
|
||||
friso_t friso,
|
||||
friso_config_t config,
|
||||
fstring _path,
|
||||
uint_t _limits )
|
||||
friso_t friso,
|
||||
friso_config_t config,
|
||||
fstring _path,
|
||||
uint_t _limits )
|
||||
{
|
||||
|
||||
//1.parse the configuration file.
|
||||
FILE * __stream;
|
||||
char __chars__[1024], __key__[30], *__line__;
|
||||
uint_t __length__, i, t;
|
||||
friso_lex_t lex_t;
|
||||
string_buffer_t sb;
|
||||
//1.parse the configuration file.
|
||||
FILE * __stream;
|
||||
char __chars__[1024], __key__[30], *__line__;
|
||||
uint_t __length__, i, t;
|
||||
friso_lex_t lex_t;
|
||||
string_buffer_t sb;
|
||||
|
||||
//get the lexicon configruation file path
|
||||
sb = new_string_buffer();
|
||||
string_buffer_append( sb, _path );
|
||||
if ( _path[ strlen(_path) - 1 ] != '/' )
|
||||
string_buffer_append( sb, "/" );
|
||||
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
|
||||
//get the lexicon configruation file path
|
||||
sb = new_string_buffer();
|
||||
string_buffer_append( sb, _path );
|
||||
if ( _path[ strlen(_path) - 1 ] != '/' )
|
||||
string_buffer_append( sb, "/" );
|
||||
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
|
||||
|
||||
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
|
||||
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
|
||||
{
|
||||
while ( ( __line__ =
|
||||
file_get_line( __chars__, __stream ) ) != NULL )
|
||||
{
|
||||
while ( ( __line__ =
|
||||
file_get_line( __chars__, __stream ) ) != NULL )
|
||||
//comment filter.
|
||||
if ( __line__[0] == '#' ) continue;
|
||||
if ( __line__[0] == '\0' ) continue;
|
||||
|
||||
__length__ = strlen( __line__ );
|
||||
//item start
|
||||
if ( __line__[ __length__ - 1 ] == '[' )
|
||||
{
|
||||
//get the type key
|
||||
for ( i = 0; i < __length__
|
||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||
for ( t = 0; i < __length__; i++,t++ ) {
|
||||
if ( __line__[i] == ' '
|
||||
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
|
||||
__key__[t] = __line__[i];
|
||||
}
|
||||
__key__[t] = '\0';
|
||||
|
||||
//get the lexicon type
|
||||
lex_t = get_lexicon_type_with_constant(__key__);
|
||||
if ( lex_t == -1 ) continue;
|
||||
|
||||
//printf("key=%s, type=%d\n", __key__, lex_t );
|
||||
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
|
||||
{
|
||||
//comment filter.
|
||||
if ( __line__[0] == '#' ) continue;
|
||||
if ( __line__[0] == '\0' ) continue;
|
||||
//comments filter.
|
||||
if ( __line__[0] == '#' ) continue;
|
||||
if ( __line__[0] == '\0' ) continue;
|
||||
|
||||
__length__ = strlen( __line__ );
|
||||
//item start
|
||||
if ( __line__[ __length__ - 1 ] == '[' )
|
||||
{
|
||||
//get the type key
|
||||
for ( i = 0; i < __length__
|
||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||
for ( t = 0; i < __length__; i++,t++ ) {
|
||||
if ( __line__[i] == ' '
|
||||
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
|
||||
__key__[t] = __line__[i];
|
||||
}
|
||||
__key__[t] = '\0';
|
||||
__length__ = strlen( __line__ );
|
||||
if ( __line__[ __length__ - 1 ] == ']' ) break;
|
||||
|
||||
//get the lexicon type
|
||||
lex_t = get_lexicon_type_with_constant(__key__);
|
||||
if ( lex_t == -1 ) continue;
|
||||
for ( i = 0; i < __length__
|
||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||
for ( t = 0; i < __length__; i++,t++ ) {
|
||||
if ( __line__[i] == ' '
|
||||
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
|
||||
__key__[t] = __line__[i];
|
||||
}
|
||||
__key__[t] = '\0';
|
||||
|
||||
//printf("key=%s, type=%d\n", __key__, lex_t );
|
||||
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
|
||||
{
|
||||
//comments filter.
|
||||
if ( __line__[0] == '#' ) continue;
|
||||
if ( __line__[0] == '\0' ) continue;
|
||||
//load the lexicon item from the lexicon file.
|
||||
string_buffer_clear( sb );
|
||||
string_buffer_append( sb, _path );
|
||||
string_buffer_append( sb, __key__ );
|
||||
//printf("key=%s, type=%d\n", __key__, lex_t);
|
||||
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
|
||||
}
|
||||
|
||||
__length__ = strlen( __line__ );
|
||||
if ( __line__[ __length__ - 1 ] == ']' ) break;
|
||||
}
|
||||
|
||||
for ( i = 0; i < __length__
|
||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||
for ( t = 0; i < __length__; i++,t++ ) {
|
||||
if ( __line__[i] == ' '
|
||||
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
|
||||
__key__[t] = __line__[i];
|
||||
}
|
||||
__key__[t] = '\0';
|
||||
} //end while
|
||||
|
||||
//load the lexicon item from the lexicon file.
|
||||
string_buffer_clear( sb );
|
||||
string_buffer_append( sb, _path );
|
||||
string_buffer_append( sb, __key__ );
|
||||
//printf("key=%s, type=%d\n", __key__, lex_t);
|
||||
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
|
||||
}
|
||||
fclose( __stream );
|
||||
} else {
|
||||
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} //end while
|
||||
|
||||
fclose( __stream );
|
||||
} else {
|
||||
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
|
||||
}
|
||||
|
||||
free_string_buffer(sb);
|
||||
free_string_buffer(sb);
|
||||
}
|
||||
|
||||
//match the item.
|
||||
FRISO_API int friso_dic_match(
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word )
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word )
|
||||
{
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return hash_exist_mapping( dic[lex], word );
|
||||
}
|
||||
return 0;
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return hash_exist_mapping( dic[lex], word );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//get the lex_entry_t associated with the word.
|
||||
FRISO_API lex_entry_t friso_dic_get(
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word )
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex,
|
||||
fstring word )
|
||||
{
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return ( lex_entry_t ) hash_get_value( dic[lex], word );
|
||||
}
|
||||
return NULL;
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return ( lex_entry_t ) hash_get_value( dic[lex], word );
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//get the size of the specified type dictionary.
|
||||
FRISO_API uint_t friso_spec_dic_size(
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex )
|
||||
friso_dic_t dic,
|
||||
friso_lex_t lex )
|
||||
{
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return hash_get_size( dic[lex] );
|
||||
}
|
||||
return 0;
|
||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||
return hash_get_size( dic[lex] );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//get size of the whole dictionary.
|
||||
FRISO_API uint_t friso_all_dic_size(
|
||||
friso_dic_t dic )
|
||||
friso_dic_t dic )
|
||||
{
|
||||
register uint_t size = 0, t;
|
||||
register uint_t size = 0, t;
|
||||
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
size += hash_get_size( dic[t] );
|
||||
}
|
||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||
size += hash_get_size( dic[t] );
|
||||
}
|
||||
|
||||
return size;
|
||||
return size;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user