增加friso_enchar_type函数, 用于判断给定字符的类型。

This commit is contained in:
狮子的魂 2013-12-22 14:58:34 +08:00
parent 4722fc777b
commit 873a1344cb
3 changed files with 390 additions and 337 deletions

View File

@ -173,3 +173,35 @@ FRISO_API int friso_uppercase_letter(
return gbk_uppercase_letter( task->buffer );
return 0;
}
/* get the type of the specified char.
* the type will be the constants defined above.
* (include the fullwidth english char.)
*/
FRISO_API friso_enchar_t friso_enchar_type(
friso_charset_t charset,
friso_task_t task )
{
//Unicode or ASCII.(Both UTF-8 and GBK are valid)
uint_t u = 0;
if ( charset == FRISO_UTF8 )
{
u = task->unicode;
//if ( u >= 65280 ) u = 65280 - 65248;
}
else if ( charset == FRISO_GBK )
{
u = (uchar_t)task->buffer[0];
//if ( u == 0xa3 ) ; //full-width.
}
//range check.
if ( u > 126 ) return FRISO_EN_UNKNOW;
if ( u == 32 ) return FRISO_EN_WHITESPACE;
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
return FRISO_EN_PUNCTUATION;
}

View File

@ -59,6 +59,27 @@ FRISO_API int friso_decimal_string( friso_charset_t, char * );
// included full-width and half-width letters.
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
//en char type.
//#define FRISO_EN_LETTER 0 //a-z && A-Z
//#define FRISO_EN_NUMERIC 1 //0-9
//#define FRISO_EN_PUNCTUATION 2 //english punctuations
//#define FRISO_EN_WHITESPACE 3 //whitespace
//#define FRISO_EN_UNKNOW -1 //beyond 32-122
typedef enum {
FRISO_EN_LETTER = 0, //A-Z, a-z
FRISO_EN_NUMERIC = 1, //0-9
FRISO_EN_PUNCTUATION = 2, //english punctuations
FRISO_EN_WHITESPACE = 3, //whitespace
FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126)
} friso_enchar_t;
/* get the type of the specified char.
* the type will be the constants defined above.
* (include the fullwidth english char.)
*/
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
/* }}} */

View File

@ -16,18 +16,18 @@
//create a new lexicon
FRISO_API friso_dic_t friso_dic_new()
{
register uint_t t;
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
if ( dic == NULL ) {
___ALLOCATION_ERROR___
}
register uint_t t;
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
if ( dic == NULL ) {
___ALLOCATION_ERROR___
}
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
dic[t] = new_hash_table();
}
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
dic[t] = new_hash_table();
}
return dic;
return dic;
}
/**
@ -38,55 +38,55 @@ FRISO_API friso_dic_t friso_dic_new()
*/
__STATIC_API__ void default_fdic_callback( hash_entry_t e )
{
register uint_t i;
friso_array_t syn;
lex_entry_t lex = ( lex_entry_t ) e->_val;
//free the lex->word
FRISO_FREE( lex->word );
//free the lex->syn if it is not NULL
if ( lex->syn != NULL ) {
syn = lex->syn;
for ( i = 0; i < syn->length; i++ ) {
FRISO_FREE( syn->items[i] );
}
free_array_list( syn );
register uint_t i;
friso_array_t syn;
lex_entry_t lex = ( lex_entry_t ) e->_val;
//free the lex->word
FRISO_FREE( lex->word );
//free the lex->syn if it is not NULL
if ( lex->syn != NULL ) {
syn = lex->syn;
for ( i = 0; i < syn->length; i++ ) {
FRISO_FREE( syn->items[i] );
}
free_array_list( syn );
}
}
FRISO_API void friso_dic_free( friso_dic_t dic )
{
register uint_t t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
//free the hash table
free_hash_table( dic[t], default_fdic_callback );
}
register uint_t t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
//free the hash table
free_hash_table( dic[t], default_fdic_callback );
}
FRISO_FREE( dic );
FRISO_FREE( dic );
}
//create a new lexicon entry
FRISO_API lex_entry_t new_lex_entry(
fstring word,
friso_array_t syn,
uint_t fre,
uint_t length,
uint_t type )
fstring word,
friso_array_t syn,
uint_t fre,
uint_t length,
uint_t type )
{
lex_entry_t e = ( lex_entry_t )
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
if ( e == NULL ) {
___ALLOCATION_ERROR___
}
lex_entry_t e = ( lex_entry_t )
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
if ( e == NULL ) {
___ALLOCATION_ERROR___
}
//initialize.
e->word = word;
e->syn = syn;
e->fre = fre;
e->length = length;
e->type = type;
//initialize.
e->word = word;
e->syn = syn;
e->fre = fre;
e->length = length;
e->type = type;
return e;
return e;
}
/**
@ -98,42 +98,42 @@ FRISO_API lex_entry_t new_lex_entry(
*/
FRISO_API void free_lex_entry( lex_entry_t e )
{
//if ( e->syn != NULL ) {
// if ( flag == 1 ) free_array_list( e->syn);
// else free_array_list( e->syn );
//}
FRISO_FREE( e );
//if ( e->syn != NULL ) {
// if ( flag == 1 ) free_array_list( e->syn);
// else free_array_list( e->syn );
//}
FRISO_FREE( e );
}
//add a new entry to the dictionary.
FRISO_API void friso_dic_add(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn )
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn )
{
if ( lex >= 0 || lex < __FRISO_LEXICON_LENGTH__ )
{
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, 0,
(uint_t) strlen(word), (uint_t) lex ) );
}
if ( lex >= 0 || lex < __FRISO_LEXICON_LENGTH__ )
{
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, 0,
(uint_t) strlen(word), (uint_t) lex ) );
}
}
FRISO_API void friso_dic_add_with_fre(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn,
uint_t frequency )
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn,
uint_t frequency )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, frequency,
( uint_t ) strlen(word), ( uint_t ) lex ) );
}
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, frequency,
( uint_t ) strlen(word), ( uint_t ) lex ) );
}
}
/*
@ -144,18 +144,18 @@ FRISO_API void friso_dic_add_with_fre(
*/
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
{
register int c;
fstring cs;
register int c;
fstring cs;
cs = __dst;
while ( ( c = fgetc( _stream ) ) != EOF )
{
if ( c == '\n' ) break;
*cs++ = c;
}
*cs = '\0';
cs = __dst;
while ( ( c = fgetc( _stream ) ) != EOF )
{
if ( c == '\n' ) break;
*cs++ = c;
}
*cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst;
return ( c == EOF && cs == __dst ) ? NULL : __dst;
}
/*
@ -163,21 +163,21 @@ FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
*/
///instead of memcpy
__STATIC_API__ fstring string_copy(
fstring _src,
fstring __dst,
uint_t blocks )
fstring _src,
fstring __dst,
uint_t blocks )
{
register fstring __src = _src;
register uint_t t;
register fstring __src = _src;
register uint_t t;
for ( t = 0; t < blocks; t++ ) {
if ( *__src == '\0' ) break;
__dst[t] = *__src++;
}
__dst[t] = '\0';
for ( t = 0; t < blocks; t++ ) {
if ( *__src == '\0' ) break;
__dst[t] = *__src++;
}
__dst[t] = '\0';
return __dst;
return __dst;
}
/**
@ -189,23 +189,23 @@ __STATIC_API__ fstring string_copy(
* @param blocks number of bytes to copy
*/
__STATIC_API__ fstring string_copy_heap(
fstring _src, uint_t blocks )
fstring _src, uint_t blocks )
{
register uint_t t;
register uint_t t;
fstring str = ( fstring )
FRISO_MALLOC( blocks + 1 );
if ( str == NULL ) {
___ALLOCATION_ERROR___;
}
fstring str = ( fstring )
FRISO_MALLOC( blocks + 1 );
if ( str == NULL ) {
___ALLOCATION_ERROR___;
}
for ( t = 0; t < blocks; t++ ) {
if ( *_src == '\0' ) break;
str[t] = *_src++;
}
for ( t = 0; t < blocks; t++ ) {
if ( *_src == '\0' ) break;
str[t] = *_src++;
}
str[t] = '\0';
return str;
str[t] = '\0';
return str;
}
/*
@ -215,15 +215,15 @@ __STATIC_API__ fstring string_copy_heap(
*/
__STATIC_API__ fstring indexOf( fstring __str, char delimiter )
{
uint_t i, __length__;
uint_t i, __length__;
__length__ = strlen( __str );
for ( i = 0; i < __length__; i++ ) {
if ( __str[i] == delimiter )
return __str + i;
}
__length__ = strlen( __str );
for ( i = 0; i < __length__; i++ ) {
if ( __str[i] == delimiter )
return __str + i;
}
return NULL;
return NULL;
}
/**
@ -235,105 +235,105 @@ __STATIC_API__ fstring indexOf( fstring __str, char delimiter )
* @param length the maximum length of the word item
*/
FRISO_API void friso_dic_load(
friso_t friso,
friso_config_t config,
friso_lex_t lex,
fstring lex_file,
uint_t length )
friso_t friso,
friso_config_t config,
friso_lex_t lex,
fstring lex_file,
uint_t length )
{
FILE * _stream;
char __char[1024], _buffer[512];
fstring _line;
string_split_entry sse;
FILE * _stream;
char __char[1024], _buffer[512];
fstring _line;
string_split_entry sse;
fstring _word;
char _sbuffer[512];
fstring _syn;
friso_array_t sywords;
uint_t _fre;
fstring _word;
char _sbuffer[512];
fstring _syn;
friso_array_t sywords;
uint_t _fre;
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
{
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
{
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
//clear up the notes
//make sure the length of the line is greater than 1.
//like the single '#' mark in stopwords dictionary.
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
//handle the stopwords.
if ( lex == __LEX_STOPWORDS__ )
{
//clean the chinese words that its length is greater than max length.
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
string_copy_heap( _line, strlen(_line) ), NULL );
continue;
}
//split the fstring with '/'.
string_split_reset( &sse, "/", _line);
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
//1. get the word.
_word = string_copy_heap( _buffer, strlen(_buffer) );
if ( string_split_next( &sse, _buffer ) == NULL )
{
//normal lexicon type,
//add them to the dictionary directly
friso_dic_add( friso->dic, lex, _word, NULL );
continue;
}
/*
* filter out the words that its length is larger
* than the specified limit.
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
* and __LEX_CEM_WORDS__.
*/
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
&& strlen( _word ) > length ) continue;
//2. get the synonyms words.
_syn = NULL;
if ( strcmp( _buffer, "null" ) != 0 )
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
//3. get the word frequency if it available.
_fre = 0;
if ( string_split_next( &sse, _buffer ) != NULL )
_fre = atoi( _buffer );
/**
* Here:
* split the synonyms words with mark ","
* and put them in a array list if the synonyms is not NULL
*/
sywords = NULL;
if ( config->add_syn && _syn != NULL )
{
string_split_reset( &sse, ",", _sbuffer );
sywords = new_array_list_with_opacity(5);
while ( string_split_next( &sse, _buffer ) != NULL )
{
//clear up the notes
//make sure the length of the line is greater than 1.
//like the single '#' mark in stopwords dictionary.
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
if ( strlen(_buffer) > length ) continue;
array_list_add( sywords,
string_copy_heap(_buffer, strlen(_buffer)) );
}
sywords = array_list_trim( sywords );
}
//handle the stopwords.
if ( lex == __LEX_STOPWORDS__ )
{
//clean the chinese words that its length is greater than max length.
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
string_copy_heap( _line, strlen(_line) ), NULL );
continue;
}
//split the fstring with '/'.
string_split_reset( &sse, "/", _line);
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
//1. get the word.
_word = string_copy_heap( _buffer, strlen(_buffer) );
if ( string_split_next( &sse, _buffer ) == NULL )
{
//normal lexicon type,
//add them to the dictionary directly
friso_dic_add( friso->dic, lex, _word, NULL );
continue;
}
/*
* filter out the words that its length is larger
* than the specified limit.
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
* and __LEX_CEM_WORDS__.
*/
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
&& strlen( _word ) > length ) continue;
//2. get the synonyms words.
_syn = NULL;
if ( strcmp( _buffer, "null" ) != 0 )
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
//3. get the word frequency if it available.
_fre = 0;
if ( string_split_next( &sse, _buffer ) != NULL )
_fre = atoi( _buffer );
/**
* Here:
* split the synonyms words with mark ","
* and put them in a array list if the synonyms is not NULL
*/
sywords = NULL;
if ( config->add_syn && _syn != NULL )
{
string_split_reset( &sse, ",", _sbuffer );
sywords = new_array_list_with_opacity(5);
while ( string_split_next( &sse, _buffer ) != NULL )
{
if ( strlen(_buffer) > length ) continue;
array_list_add( sywords,
string_copy_heap(_buffer, strlen(_buffer)) );
}
sywords = array_list_trim( sywords );
}
//4. add the word item
friso_dic_add_with_fre(
friso->dic, lex, _word, sywords, _fre );
}
fclose( _stream );
} else {
printf("Warning: Fail to open lexicon file %s\n", lex_file);
//4. add the word item
friso_dic_add_with_fre(
friso->dic, lex, _word, sywords, _fre );
}
fclose( _stream );
} else {
printf("Warning: Fail to open lexicon file %s\n", lex_file);
}
}
@ -347,44 +347,44 @@ FRISO_API void friso_dic_load(
*/
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
{
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
return __LEX_CJK_WORDS__;
}
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
return __LEX_CJK_UNITS__;
}
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
return __LEX_ECM_WORDS__;
}
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
return __LEX_CEM_WORDS__;
}
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
return __LEX_CN_LNAME__;
}
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
return __LEX_CN_SNAME__;
}
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
return __LEX_CN_DNAME1__;
}
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
return __LEX_CN_DNAME2__;
}
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
return __LEX_CN_LNA__;
}
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
return __LEX_STOPWORDS__;
}
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
return __LEX_ENPUN_WORDS__;
}
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
return __LEX_EN_WORDS__;
}
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
return __LEX_CJK_WORDS__;
}
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
return __LEX_CJK_UNITS__;
}
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
return __LEX_ECM_WORDS__;
}
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
return __LEX_CEM_WORDS__;
}
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
return __LEX_CN_LNAME__;
}
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
return __LEX_CN_SNAME__;
}
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
return __LEX_CN_DNAME1__;
}
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
return __LEX_CN_DNAME2__;
}
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
return __LEX_CN_LNA__;
}
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
return __LEX_STOPWORDS__;
}
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
return __LEX_ENPUN_WORDS__;
}
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
return __LEX_EN_WORDS__;
}
return -1;
return -1;
}
/*
@ -396,136 +396,136 @@ __STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
* @param _limitts words length limit
*/
FRISO_API void friso_dic_load_from_ifile(
friso_t friso,
friso_config_t config,
fstring _path,
uint_t _limits )
friso_t friso,
friso_config_t config,
fstring _path,
uint_t _limits )
{
//1.parse the configuration file.
FILE * __stream;
char __chars__[1024], __key__[30], *__line__;
uint_t __length__, i, t;
friso_lex_t lex_t;
string_buffer_t sb;
//1.parse the configuration file.
FILE * __stream;
char __chars__[1024], __key__[30], *__line__;
uint_t __length__, i, t;
friso_lex_t lex_t;
string_buffer_t sb;
//get the lexicon configruation file path
sb = new_string_buffer();
string_buffer_append( sb, _path );
if ( _path[ strlen(_path) - 1 ] != '/' )
string_buffer_append( sb, "/" );
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
//get the lexicon configruation file path
sb = new_string_buffer();
string_buffer_append( sb, _path );
if ( _path[ strlen(_path) - 1 ] != '/' )
string_buffer_append( sb, "/" );
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
{
while ( ( __line__ =
file_get_line( __chars__, __stream ) ) != NULL )
{
while ( ( __line__ =
file_get_line( __chars__, __stream ) ) != NULL )
//comment filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
__length__ = strlen( __line__ );
//item start
if ( __line__[ __length__ - 1 ] == '[' )
{
//get the type key
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
//get the lexicon type
lex_t = get_lexicon_type_with_constant(__key__);
if ( lex_t == -1 ) continue;
//printf("key=%s, type=%d\n", __key__, lex_t );
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
{
//comment filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
//comments filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
__length__ = strlen( __line__ );
//item start
if ( __line__[ __length__ - 1 ] == '[' )
{
//get the type key
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
__length__ = strlen( __line__ );
if ( __line__[ __length__ - 1 ] == ']' ) break;
//get the lexicon type
lex_t = get_lexicon_type_with_constant(__key__);
if ( lex_t == -1 ) continue;
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
//printf("key=%s, type=%d\n", __key__, lex_t );
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
{
//comments filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
//load the lexicon item from the lexicon file.
string_buffer_clear( sb );
string_buffer_append( sb, _path );
string_buffer_append( sb, __key__ );
//printf("key=%s, type=%d\n", __key__, lex_t);
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
}
__length__ = strlen( __line__ );
if ( __line__[ __length__ - 1 ] == ']' ) break;
}
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
} //end while
//load the lexicon item from the lexicon file.
string_buffer_clear( sb );
string_buffer_append( sb, _path );
string_buffer_append( sb, __key__ );
//printf("key=%s, type=%d\n", __key__, lex_t);
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
}
fclose( __stream );
} else {
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
}
}
} //end while
fclose( __stream );
} else {
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
}
free_string_buffer(sb);
free_string_buffer(sb);
}
//match the item.
FRISO_API int friso_dic_match(
friso_dic_t dic,
friso_lex_t lex,
fstring word )
friso_dic_t dic,
friso_lex_t lex,
fstring word )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_exist_mapping( dic[lex], word );
}
return 0;
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_exist_mapping( dic[lex], word );
}
return 0;
}
//get the lex_entry_t associated with the word.
FRISO_API lex_entry_t friso_dic_get(
friso_dic_t dic,
friso_lex_t lex,
fstring word )
friso_dic_t dic,
friso_lex_t lex,
fstring word )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return ( lex_entry_t ) hash_get_value( dic[lex], word );
}
return NULL;
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return ( lex_entry_t ) hash_get_value( dic[lex], word );
}
return NULL;
}
//get the size of the specified type dictionary.
FRISO_API uint_t friso_spec_dic_size(
friso_dic_t dic,
friso_lex_t lex )
friso_dic_t dic,
friso_lex_t lex )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_get_size( dic[lex] );
}
return 0;
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_get_size( dic[lex] );
}
return 0;
}
//get size of the whole dictionary.
FRISO_API uint_t friso_all_dic_size(
friso_dic_t dic )
friso_dic_t dic )
{
register uint_t size = 0, t;
register uint_t size = 0, t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
size += hash_get_size( dic[t] );
}
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
size += hash_get_size( dic[t] );
}
return size;
return size;
}