mirror of
https://gitee.com/lionsoul/friso.git
synced 2024-11-29 17:57:38 +08:00
code tab to 4 space
This commit is contained in:
parent
e9bf4a2536
commit
a264922721
50
CHANGES.md
50
CHANGES.md
@ -9,9 +9,9 @@ friso-1.6.2:
|
|||||||
|
|
||||||
3. friso deb | rmp支持:
|
3. friso deb | rmp支持:
|
||||||
Debian & Ubuntu:
|
Debian & Ubuntu:
|
||||||
sudo apt-get install libfriso0 libfriso-dev
|
sudo apt-get install libfriso0 libfriso-dev
|
||||||
CentOS & Fedora:
|
CentOS & Fedora:
|
||||||
sudo yum install libfriso libfriso-devel
|
sudo yum install libfriso libfriso-devel
|
||||||
|
|
||||||
4. 中文词性标注。
|
4. 中文词性标注。
|
||||||
|
|
||||||
@ -26,41 +26,41 @@ friso-1.6.2:
|
|||||||
|
|
||||||
friso-1.6.1:
|
friso-1.6.1:
|
||||||
|
|
||||||
1. friso.ini中friso.lex_dir增加相对friso.ini的路径支持 -done
|
1. friso.ini中friso.lex_dir增加相对friso.ini的路径支持 -done
|
||||||
|
|
||||||
2. 修复两处内存泄漏bug. -done
|
2. 修复两处内存泄漏bug. -done
|
||||||
|
|
||||||
3. 改善中英混合词的识别, 可以识别更多情况, 例如:高3 -done
|
3. 改善中英混合词的识别, 可以识别更多情况, 例如:高3 -done
|
||||||
|
|
||||||
4. 词库优化, 加入了一些新词条. -done
|
4. 词库优化, 加入了一些新词条. -done
|
||||||
|
|
||||||
5. 修复friso_dic_add & array_list_insert的两处代码bug -done
|
5. 修复friso_dic_add & array_list_insert的两处代码bug -done
|
||||||
|
|
||||||
6. 增加检测模式切分, 只返回词库中有的词条 -done
|
6. 增加检测模式切分, 只返回词库中有的词条 -done
|
||||||
|
|
||||||
7. 集成了php扩展绑定,完美支持PHP分词 -done
|
7. 集成了php扩展绑定,完美支持PHP分词 -done
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
friso-1.6.0:
|
friso-1.6.0:
|
||||||
|
|
||||||
1. friso_stirng.c#utf8_decimal_string初始化bytes = 0,
|
1. friso_stirng.c#utf8_decimal_string初始化bytes = 0,
|
||||||
去除WinNT的Run-Time Check Failed. -done
|
去除WinNT的Run-Time Check Failed. -done
|
||||||
|
|
||||||
2. 复杂英文和数字组合的二次切分. 例如: QQ2013会被切分成: qq2013, qq, 2013. -done
|
2. 复杂英文和数字组合的二次切分. 例如: QQ2013会被切分成: qq2013, qq, 2013. -done
|
||||||
|
|
||||||
3. GBK编码支持. -done
|
3. GBK编码支持. -done
|
||||||
|
|
||||||
4. 增加了friso.ini中自定义保留标点, 去除了默认对"^,/,-,'"等标点的保留. -done
|
4. 增加了friso.ini中自定义保留标点, 去除了默认对"^,/,-,'"等标点的保留. -done
|
||||||
|
|
||||||
5. 使用掩码操作控制变量来代替了原来的多个控制变量. -done
|
5. 使用掩码操作控制变量来代替了原来的多个控制变量. -done
|
||||||
|
|
||||||
6. 切分结果friso_hits_t中增加了对词条类别和词条长度的返回,纠正了offset的误差。 -done
|
6. 切分结果friso_hits_t中增加了对词条类别和词条长度的返回,纠正了offset的误差。 -done
|
||||||
|
|
||||||
7. 做了一些优化,例如:同义词的追加(普通/sphinx定义)复杂的判断逻辑,
|
7. 做了一些优化,例如:同义词的追加(普通/sphinx定义)复杂的判断逻辑,
|
||||||
改为了使用掩码状态控制,不仅减少了代码量还提高了执行效率。 -done
|
改为了使用掩码状态控制,不仅减少了代码量还提高了执行效率。 -done
|
||||||
|
|
||||||
8. 更多的返回信息,增加了对切分词条的类别,长度,真实长度,词性(待实现)等信息的返回。 -done
|
8. 更多的返回信息,增加了对切分词条的类别,长度,真实长度,词性(待实现)等信息的返回。 -done
|
||||||
|
|
||||||
9. 增加了安装中头文件的自动拷贝(usr/include/friso),可以通过include <friso/xx.h>来引用头文件。
|
9. 增加了安装中头文件的自动拷贝(usr/include/friso),可以通过include <friso/xx.h>来引用头文件。
|
||||||
|
|
||||||
@ -83,18 +83,18 @@ friso-1.4:
|
|||||||
1. 小数+单位无法识别的情况.更改friso_string#utf8_numeric_string()函数.
|
1. 小数+单位无法识别的情况.更改friso_string#utf8_numeric_string()函数.
|
||||||
|
|
||||||
2. 更改中英混合词的识别(目前可以识别中英任何一种组合).
|
2. 更改中英混合词的识别(目前可以识别中英任何一种组合).
|
||||||
英中: 例如: b超,
|
英中: 例如: b超,
|
||||||
英中英: a美1,
|
英中英: a美1,
|
||||||
英中英中: a哆啦a梦,
|
英中英中: a哆啦a梦,
|
||||||
中英: 卡拉ok,
|
中英: 卡拉ok,
|
||||||
中英中: 哆啦a梦,
|
中英中: 哆啦a梦,
|
||||||
中英中英: 中文a美a
|
中英中英: 中文a美a
|
||||||
|
|
||||||
3. 更改了单位组合, 现在可以组合的单位不局限是中文, 例如: ℃,℉
|
3. 更改了单位组合, 现在可以组合的单位不局限是中文, 例如: ℃,℉
|
||||||
|
|
||||||
4. 对于未识别的字符, 给定一个开关选项来决定保留还是过滤.
|
4. 对于未识别的字符, 给定一个开关选项来决定保留还是过滤.
|
||||||
|
|
||||||
5. 英文同义词的追加(增加了lex-en.lex词库)
|
5. 英文同义词的追加(增加了lex-en.lex词库)
|
||||||
|
|
||||||
|
|
||||||
friso-1.3:
|
friso-1.3:
|
||||||
@ -103,7 +103,7 @@ friso-1.3:
|
|||||||
2. 部分简易函数使用了宏定义来代替, 减少函数的调用.
|
2. 部分简易函数使用了宏定义来代替, 减少函数的调用.
|
||||||
|
|
||||||
3. 保留了英文全半角和中文标点符号的切分.(可以通过过滤停止词来过滤不需要的标点)
|
3. 保留了英文全半角和中文标点符号的切分.(可以通过过滤停止词来过滤不需要的标点)
|
||||||
停止词词库中已经加入了全部的保留的标点, 也就是默认全部过滤了.
|
停止词词库中已经加入了全部的保留的标点, 也就是默认全部过滤了.
|
||||||
|
|
||||||
4. 修复friso_string#utf8_en_punctuation()函数一处bug.
|
4. 修复friso_string#utf8_en_punctuation()函数一处bug.
|
||||||
|
|
||||||
|
@ -6,9 +6,9 @@ Friso是使用c语言开发的一款开源的高性能中文分词器,使用
|
|||||||
|
|
||||||
2。三种切分模式:
|
2。三种切分模式:
|
||||||
|
|
||||||
(1). 简易模式:FMM算法,适合速度要求场合。
|
(1). 简易模式:FMM算法,适合速度要求场合。
|
||||||
(2). 复杂模式- MMSEG四种过滤算法,具有较高的岐义去除,分词准确率达到了98.41%。
|
(2). 复杂模式- MMSEG四种过滤算法,具有较高的岐义去除,分词准确率达到了98.41%。
|
||||||
(3). (!New)检测模式:只返回词库中已有的词条,很适合某些应用场合。(1.6.1版本开始)
|
(3). (!New)检测模式:只返回词库中已有的词条,很适合某些应用场合。(1.6.1版本开始)
|
||||||
|
|
||||||
请参考本算法的原作:http://technology.chtsai.org/mmseg/。
|
请参考本算法的原作:http://technology.chtsai.org/mmseg/。
|
||||||
|
|
||||||
|
@ -8,6 +8,6 @@
|
|||||||
// ARG_ENABLE("friso", "enable friso support", "no");
|
// ARG_ENABLE("friso", "enable friso support", "no");
|
||||||
|
|
||||||
if (PHP_FRISO != "no") {
|
if (PHP_FRISO != "no") {
|
||||||
EXTENSION("friso", "friso.c");
|
EXTENSION("friso", "friso.c");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,53 +20,53 @@ echo "friso_version(): " , friso_version(), ", friso_charset(): ", friso_charset
|
|||||||
echo "分词函数:<br />";
|
echo "分词函数:<br />";
|
||||||
if ( friso_charset() == 'UTF-8' )
|
if ( friso_charset() == 'UTF-8' )
|
||||||
{
|
{
|
||||||
$_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体,x射线本质是什么,今天去奇都ktv唱卡拉ok去,哆啦a梦是一个动漫中的主角,单位和全角: 2009年8月6日开始大学之旅,岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.";
|
$_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体,x射线本质是什么,今天去奇都ktv唱卡拉ok去,哆啦a梦是一个动漫中的主角,单位和全角: 2009年8月6日开始大学之旅,岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.";
|
||||||
echo "<p>friso_split(\"" . $_str . "\"):<p />";
|
echo "<p>friso_split(\"" . $_str . "\"):<p />";
|
||||||
|
|
||||||
//API:
|
//API:
|
||||||
//rb_split(string, Array, [long])
|
//rb_split(string, Array, [long])
|
||||||
//1.string: 要被切分的字符串。
|
//1.string: 要被切分的字符串。
|
||||||
//2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。
|
//2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。
|
||||||
//3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
|
//3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
|
||||||
|
|
||||||
//1.完整的配置:
|
//1.完整的配置:
|
||||||
//array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
|
//array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
|
||||||
// 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX);
|
// 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX);
|
||||||
//1.在不了解friso内核的情况下, 请不要随便更改nthreshold
|
//1.在不了解friso内核的情况下, 请不要随便更改nthreshold
|
||||||
//2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
|
//2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
|
||||||
|
|
||||||
//2.返回选项:
|
//2.返回选项:
|
||||||
//词条: FRISO_RET_WORD, 类别:FRISO_RET_TYPE, 长度:FRISO_RET_LENGTH, 真实长度:FRISO_RET_RLEN, 偏移量:FRISO_RET_OFF
|
//词条: FRISO_RET_WORD, 类别:FRISO_RET_TYPE, 长度:FRISO_RET_LENGTH, 真实长度:FRISO_RET_RLEN, 偏移量:FRISO_RET_OFF
|
||||||
//词性:FRISO_RET_POS(待实现)
|
//词性:FRISO_RET_POS(待实现)
|
||||||
$_rargs = FRISO_RET_TYPE | FRISO_RET_LEN | FRISO_RET_RLEN | FRISO_RET_OFF | FRISO_RET_POS;
|
$_rargs = FRISO_RET_TYPE | FRISO_RET_LEN | FRISO_RET_RLEN | FRISO_RET_OFF | FRISO_RET_POS;
|
||||||
//$_rargs = 0;
|
//$_rargs = 0;
|
||||||
|
|
||||||
//3.切分类别:
|
//3.切分类别:
|
||||||
//CJK词条:FRISO_TYP_CJK, 英中混合词(b超):FRISO_TYP_ECM,中英混合词(卡拉ok):FRISO_TYP_CEM,
|
//CJK词条:FRISO_TYP_CJK, 英中混合词(b超):FRISO_TYP_ECM,中英混合词(卡拉ok):FRISO_TYP_CEM,
|
||||||
//英文标点混合词(c++):FRISO_TYP_EPUN,标点:FRISO_TYP_PUN,未知类别:FRISO_TYP_UNK,其他类别(同义词):FRISO_TYP_OTR
|
//英文标点混合词(c++):FRISO_TYP_EPUN,标点:FRISO_TYP_PUN,未知类别:FRISO_TYP_UNK,其他类别(同义词):FRISO_TYP_OTR
|
||||||
$_result = friso_split($_str, array('mode'=>FRISO_COMPLEX), $_rargs);
|
$_result = friso_split($_str, array('mode'=>FRISO_COMPLEX), $_rargs);
|
||||||
unset($_str);
|
unset($_str);
|
||||||
foreach ( $_result as $_val )
|
foreach ( $_result as $_val )
|
||||||
{
|
{
|
||||||
$_str = $_val['word'];
|
$_str = $_val['word'];
|
||||||
if ( $_rargs != 0 ) {
|
if ( $_rargs != 0 ) {
|
||||||
$_str .= '[';
|
$_str .= '[';
|
||||||
if ( ($_rargs & FRISO_RET_TYPE) != 0 )
|
if ( ($_rargs & FRISO_RET_TYPE) != 0 )
|
||||||
$_str .= ', type: '.$_val['type']; //获取词条类别
|
$_str .= ', type: '.$_val['type']; //获取词条类别
|
||||||
if ( ($_rargs & FRISO_RET_LEN) != 0 )
|
if ( ($_rargs & FRISO_RET_LEN) != 0 )
|
||||||
$_str .= ', len: ' . $_val['len']; //词条长度
|
$_str .= ', len: ' . $_val['len']; //词条长度
|
||||||
if ( ($_rargs & FRISO_RET_RLEN) != 0 )
|
if ( ($_rargs & FRISO_RET_RLEN) != 0 )
|
||||||
$_str .= ', rlen: ' . $_val['rlen']; //词条真实长度
|
$_str .= ', rlen: ' . $_val['rlen']; //词条真实长度
|
||||||
if ( ($_rargs & FRISO_RET_OFF) != 0 )
|
if ( ($_rargs & FRISO_RET_OFF) != 0 )
|
||||||
$_str .= ', off: ' . $_val['off']; //词条偏移量
|
$_str .= ', off: ' . $_val['off']; //词条偏移量
|
||||||
if ( ($_rargs & FRISO_RET_POS) != 0 )
|
if ( ($_rargs & FRISO_RET_POS) != 0 )
|
||||||
$_str .= ', pos: ' . $_val['pos']; //词条词性
|
$_str .= ', pos: ' . $_val['pos']; //词条词性
|
||||||
$_str .= ']';
|
$_str .= ']';
|
||||||
}
|
}
|
||||||
|
|
||||||
$_str .= '/ ';
|
$_str .= '/ ';
|
||||||
echo $_str;
|
echo $_str;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else echo "set charset to UTF-8 to test function friso_split.";
|
else echo "set charset to UTF-8 to test function friso_split.";
|
||||||
?>
|
?>
|
||||||
|
@ -4,10 +4,10 @@ ini_set('magic_quotes_gpc', 0);
|
|||||||
|
|
||||||
//check the charset
|
//check the charset
|
||||||
if ( friso_charset() != "GBK" ) {
|
if ( friso_charset() != "GBK" ) {
|
||||||
$_str = "Error: GBK charset required. <br />";
|
$_str = "Error: GBK charset required. <br />";
|
||||||
$_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
|
$_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
|
||||||
$_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
|
$_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
|
||||||
exit($_str);
|
exit($_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = '';
|
$text = '';
|
||||||
@ -15,139 +15,139 @@ $_timer = 0;
|
|||||||
$_act = '';
|
$_act = '';
|
||||||
$_cfg = array('mode' => FRISO_COMPLEX);
|
$_cfg = array('mode' => FRISO_COMPLEX);
|
||||||
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
|
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
|
||||||
$text = &$_POST['text'];
|
$text = &$_POST['text'];
|
||||||
$_cfg = &$_POST['config'];
|
$_cfg = &$_POST['config'];
|
||||||
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
|
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
|
||||||
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
|
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
|
||||||
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
|
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
|
||||||
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
|
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
|
||||||
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
|
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
|
||||||
|
|
||||||
$s_time = timer();
|
$s_time = timer();
|
||||||
$_ret = friso_split($text, $_cfg);
|
$_ret = friso_split($text, $_cfg);
|
||||||
$_timer = timer() - $s_time;
|
$_timer = timer() - $s_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
function timer() {
|
function timer() {
|
||||||
list($msec, $sec) = explode(' ', microtime());
|
list($msec, $sec) = explode(' ', microtime());
|
||||||
return ((float)$msec + (float)$sec);
|
return ((float)$msec + (float)$sec);
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<title>GBK - robbe分词测试程序 </title>
|
<title>GBK - robbe分词测试程序 </title>
|
||||||
<meta http-equiv="content-type" content="text/html;charset=GBK" />
|
<meta http-equiv="content-type" content="text/html;charset=GBK" />
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
#box {width: 1000px}
|
#box {width: 1000px}
|
||||||
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
|
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
|
||||||
color: #555;font-size: 14px;}
|
color: #555;font-size: 14px;}
|
||||||
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
|
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
|
||||||
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
|
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
|
||||||
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
||||||
text-decoration: none;font-size: 14px;}
|
text-decoration: none;font-size: 14px;}
|
||||||
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
|
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
|
||||||
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
||||||
text-decoration: none;font-size: 14px;}
|
text-decoration: none;font-size: 14px;}
|
||||||
.link-item {float: left;font-size: 14px;font-weight: bold;
|
.link-item {float: left;font-size: 14px;font-weight: bold;
|
||||||
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
|
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
|
||||||
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
|
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
|
||||||
|
|
||||||
#cfg-box {margin-bottom: 10px;}
|
#cfg-box {margin-bottom: 10px;}
|
||||||
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
|
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
|
||||||
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
|
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
|
||||||
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
|
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
|
||||||
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
|
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
|
||||||
color:#555; outline: none;}
|
color:#555; outline: none;}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div id="box">
|
<div id="box">
|
||||||
<form name="robbe" method="post" action="gbk.demo.php">
|
<form name="robbe" method="post" action="gbk.demo.php">
|
||||||
<div class="title-item">分词配置:</div>
|
<div class="title-item">分词配置:</div>
|
||||||
<div id="cfg-box">
|
<div id="cfg-box">
|
||||||
<div>
|
<div>
|
||||||
<label>最大词长: </label>
|
<label>最大词长: </label>
|
||||||
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
|
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>混合词中文词长: </label>
|
<label>混合词中文词长: </label>
|
||||||
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
|
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>英文二次切分: </label>
|
<label>英文二次切分: </label>
|
||||||
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>二次切分子Token最小长度: </label>
|
<label>二次切分子Token最小长度: </label>
|
||||||
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
|
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>英文Token中保留的标点: </label>
|
<label>英文Token中保留的标点: </label>
|
||||||
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
|
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>同义词追加: </label>
|
<label>同义词追加: </label>
|
||||||
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>过滤停止词: </label>
|
<label>过滤停止词: </label>
|
||||||
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>保留未识别词: </label>
|
<label>保留未识别词: </label>
|
||||||
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>sphinx定制输出: </label>
|
<label>sphinx定制输出: </label>
|
||||||
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>分词模式: </label>
|
<label>分词模式: </label>
|
||||||
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
|
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
|
||||||
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
|
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="title-item">分词内容:</div>
|
<div class="title-item">分词内容:</div>
|
||||||
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
|
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
|
||||||
<input type="hidden" name="_act" value="split"/>
|
<input type="hidden" name="_act" value="split"/>
|
||||||
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
|
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
if ( $_act == 'split' ) {
|
if ( $_act == 'split' ) {
|
||||||
?>
|
?>
|
||||||
<div class="title-item">分词结果:</div>
|
<div class="title-item">分词结果:</div>
|
||||||
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
|
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
|
||||||
</textarea></div>
|
</textarea></div>
|
||||||
<div class="link-box"><a id="info-link">
|
<div class="link-box"><a id="info-link">
|
||||||
<?php
|
<?php
|
||||||
$len = strlen($text);
|
$len = strlen($text);
|
||||||
if ( $len >= 1048576 ) {
|
if ( $len >= 1048576 ) {
|
||||||
echo substr(($len/1048576), 0, 6).'MB';
|
echo substr(($len/1048576), 0, 6).'MB';
|
||||||
} else if ( $len >= 1024 ) {
|
} else if ( $len >= 1024 ) {
|
||||||
echo substr( ($len / 1024), 0, 6).'KB';
|
echo substr( ($len / 1024), 0, 6).'KB';
|
||||||
} else {
|
} else {
|
||||||
echo $len.'B';
|
echo $len.'B';
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
<?php printf("%.5f", $_timer)?>sec
|
<?php printf("%.5f", $_timer)?>sec
|
||||||
</a></div>
|
</a></div>
|
||||||
<?php
|
<?php
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
|
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
|
||||||
function do_submit() {
|
function do_submit() {
|
||||||
var text = document.getElementById('text');
|
var text = document.getElementById('text');
|
||||||
if ( text.value.trim() == '' ) return;
|
if ( text.value.trim() == '' ) return;
|
||||||
document.robbe.submit();
|
document.robbe.submit();
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
@ -4,10 +4,10 @@ ini_set('magic_quotes_gpc', 0);
|
|||||||
|
|
||||||
//charset check.
|
//charset check.
|
||||||
if ( friso_charset() != "UTF-8" ) {
|
if ( friso_charset() != "UTF-8" ) {
|
||||||
$_str = "Error: UTF-8 charset required. <br />";
|
$_str = "Error: UTF-8 charset required. <br />";
|
||||||
$_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
|
$_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
|
||||||
$_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
|
$_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
|
||||||
exit($_str);
|
exit($_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = '';
|
$text = '';
|
||||||
@ -15,139 +15,139 @@ $_timer = 0;
|
|||||||
$_act = '';
|
$_act = '';
|
||||||
$_cfg = array('mode' => FRISO_COMPLEX);
|
$_cfg = array('mode' => FRISO_COMPLEX);
|
||||||
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
|
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
|
||||||
$text = &$_POST['text'];
|
$text = &$_POST['text'];
|
||||||
$_cfg = &$_POST['config'];
|
$_cfg = &$_POST['config'];
|
||||||
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
|
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
|
||||||
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
|
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
|
||||||
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
|
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
|
||||||
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
|
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
|
||||||
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
|
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
|
||||||
|
|
||||||
$s_time = timer();
|
$s_time = timer();
|
||||||
$_ret = friso_split($text, $_cfg);
|
$_ret = friso_split($text, $_cfg);
|
||||||
$_timer = timer() - $s_time;
|
$_timer = timer() - $s_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
function timer() {
|
function timer() {
|
||||||
list($msec, $sec) = explode(' ', microtime());
|
list($msec, $sec) = explode(' ', microtime());
|
||||||
return ((float)$msec + (float)$sec);
|
return ((float)$msec + (float)$sec);
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<title>UTF8 - robbe分词测试程序</title>
|
<title>UTF8 - robbe分词测试程序</title>
|
||||||
<meta http-equiv="content-type" content="text/html;charset=utf-8" />
|
<meta http-equiv="content-type" content="text/html;charset=utf-8" />
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
#box {width: 1000px}
|
#box {width: 1000px}
|
||||||
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
|
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
|
||||||
color: #555;font-size: 14px;}
|
color: #555;font-size: 14px;}
|
||||||
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
|
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
|
||||||
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
|
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
|
||||||
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
||||||
text-decoration: none;font-size: 14px;}
|
text-decoration: none;font-size: 14px;}
|
||||||
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
|
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
|
||||||
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
|
||||||
text-decoration: none;font-size: 14px;}
|
text-decoration: none;font-size: 14px;}
|
||||||
.link-item {float: left;font-size: 14px;font-weight: bold;
|
.link-item {float: left;font-size: 14px;font-weight: bold;
|
||||||
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
|
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
|
||||||
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
|
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
|
||||||
|
|
||||||
#cfg-box {margin-bottom: 10px;}
|
#cfg-box {margin-bottom: 10px;}
|
||||||
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
|
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
|
||||||
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
|
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
|
||||||
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
|
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
|
||||||
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
|
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
|
||||||
color:#555; outline: none;}
|
color:#555; outline: none;}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div id="box">
|
<div id="box">
|
||||||
<form name="robbe" method="post" action="utf8.demo.php">
|
<form name="robbe" method="post" action="utf8.demo.php">
|
||||||
<div class="title-item">分词配置:</div>
|
<div class="title-item">分词配置:</div>
|
||||||
<div id="cfg-box">
|
<div id="cfg-box">
|
||||||
<div>
|
<div>
|
||||||
<label>最大词长: </label>
|
<label>最大词长: </label>
|
||||||
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
|
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>混合词中文词长: </label>
|
<label>混合词中文词长: </label>
|
||||||
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
|
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>英文二次切分: </label>
|
<label>英文二次切分: </label>
|
||||||
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>二次切分子Token最小长度: </label>
|
<label>二次切分子Token最小长度: </label>
|
||||||
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
|
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>英文Token中保留的标点: </label>
|
<label>英文Token中保留的标点: </label>
|
||||||
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
|
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>同义词追加: </label>
|
<label>同义词追加: </label>
|
||||||
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>过滤停止词: </label>
|
<label>过滤停止词: </label>
|
||||||
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>保留未识别词: </label>
|
<label>保留未识别词: </label>
|
||||||
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>sphinx定制输出: </label>
|
<label>sphinx定制输出: </label>
|
||||||
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
|
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<label>分词模式: </label>
|
<label>分词模式: </label>
|
||||||
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
|
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
|
||||||
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
|
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="title-item">分词内容:</div>
|
<div class="title-item">分词内容:</div>
|
||||||
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
|
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
|
||||||
<input type="hidden" name="_act" value="split"/>
|
<input type="hidden" name="_act" value="split"/>
|
||||||
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
|
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
if ( $_act == 'split' ) {
|
if ( $_act == 'split' ) {
|
||||||
?>
|
?>
|
||||||
<div class="title-item">分词结果:</div>
|
<div class="title-item">分词结果:</div>
|
||||||
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
|
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
|
||||||
</textarea></div>
|
</textarea></div>
|
||||||
<div class="link-box"><a id="info-link">
|
<div class="link-box"><a id="info-link">
|
||||||
<?php
|
<?php
|
||||||
$len = strlen($text);
|
$len = strlen($text);
|
||||||
if ( $len >= 1048576 ) {
|
if ( $len >= 1048576 ) {
|
||||||
echo substr(($len/1048576), 0, 6).'MB';
|
echo substr(($len/1048576), 0, 6).'MB';
|
||||||
} else if ( $len >= 1024 ) {
|
} else if ( $len >= 1024 ) {
|
||||||
echo substr( ($len / 1024), 0, 6).'KB';
|
echo substr( ($len / 1024), 0, 6).'KB';
|
||||||
} else {
|
} else {
|
||||||
echo $len.'B';
|
echo $len.'B';
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
<?php printf("%.5f", $_timer)?>sec
|
<?php printf("%.5f", $_timer)?>sec
|
||||||
</a></div>
|
</a></div>
|
||||||
<?php
|
<?php
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
|
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
|
||||||
function do_submit() {
|
function do_submit() {
|
||||||
var text = document.getElementById('text');
|
var text = document.getElementById('text');
|
||||||
if ( text.value.trim() == '' ) return;
|
if ( text.value.trim() == '' ) return;
|
||||||
document.robbe.submit();
|
document.robbe.submit();
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
@ -9,9 +9,9 @@
|
|||||||
#include "php_friso.h"
|
#include "php_friso.h"
|
||||||
|
|
||||||
#ifdef FRISO_WINNT
|
#ifdef FRISO_WINNT
|
||||||
# define friso_default_conf_file "c:/windows/friso.ini"
|
# define friso_default_conf_file "c:/windows/friso.ini"
|
||||||
#else
|
#else
|
||||||
# define friso_default_conf_file "/etc/friso/friso.ini"
|
# define friso_default_conf_file "/etc/friso/friso.ini"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* If you declare any globals in php_friso.h uncomment this:
|
/* If you declare any globals in php_friso.h uncomment this:
|
||||||
@ -27,15 +27,15 @@ static int le_friso = 1;
|
|||||||
* Every user visible function must have an entry in friso_functions[].
|
* Every user visible function must have an entry in friso_functions[].
|
||||||
*/
|
*/
|
||||||
const zend_function_entry friso_functions[] = {
|
const zend_function_entry friso_functions[] = {
|
||||||
PHP_FE(friso_split, NULL)
|
PHP_FE(friso_split, NULL)
|
||||||
PHP_FE(friso_version, NULL)
|
PHP_FE(friso_version, NULL)
|
||||||
PHP_FE(friso_charset, NULL)
|
PHP_FE(friso_charset, NULL)
|
||||||
PHP_FE(friso_dic_exist, NULL)
|
PHP_FE(friso_dic_exist, NULL)
|
||||||
PHP_FE(friso_dic_get, NULL)
|
PHP_FE(friso_dic_get, NULL)
|
||||||
PHP_FE(friso_utf8_bytes, NULL)
|
PHP_FE(friso_utf8_bytes, NULL)
|
||||||
PHP_FE(friso_utf8_ucode, NULL)
|
PHP_FE(friso_utf8_ucode, NULL)
|
||||||
PHP_FE(friso_ucode_utf8, NULL)
|
PHP_FE(friso_ucode_utf8, NULL)
|
||||||
{NULL, NULL, NULL} /* Must be the last line in friso_functions[] */
|
{NULL, NULL, NULL} /* Must be the last line in friso_functions[] */
|
||||||
};
|
};
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -43,19 +43,19 @@ const zend_function_entry friso_functions[] = {
|
|||||||
*/
|
*/
|
||||||
zend_module_entry friso_module_entry = {
|
zend_module_entry friso_module_entry = {
|
||||||
#if ZEND_MODULE_API_NO >= 20010901
|
#if ZEND_MODULE_API_NO >= 20010901
|
||||||
STANDARD_MODULE_HEADER,
|
STANDARD_MODULE_HEADER,
|
||||||
#endif
|
#endif
|
||||||
"friso",
|
"friso",
|
||||||
friso_functions,
|
friso_functions,
|
||||||
PHP_MINIT(friso),
|
PHP_MINIT(friso),
|
||||||
PHP_MSHUTDOWN(friso),
|
PHP_MSHUTDOWN(friso),
|
||||||
PHP_RINIT(friso), /* Replace with NULL if there's nothing to do at request start */
|
PHP_RINIT(friso), /* Replace with NULL if there's nothing to do at request start */
|
||||||
PHP_RSHUTDOWN(friso), /* Replace with NULL if there's nothing to do at request end */
|
PHP_RSHUTDOWN(friso), /* Replace with NULL if there's nothing to do at request end */
|
||||||
PHP_MINFO(friso),
|
PHP_MINFO(friso),
|
||||||
#if ZEND_MODULE_API_NO >= 20010901
|
#if ZEND_MODULE_API_NO >= 20010901
|
||||||
"0.1", /* Replace with version number for your extension */
|
"0.1", /* Replace with version number for your extension */
|
||||||
#endif
|
#endif
|
||||||
STANDARD_MODULE_PROPERTIES
|
STANDARD_MODULE_PROPERTIES
|
||||||
};
|
};
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -73,72 +73,72 @@ PHP_INI_END()
|
|||||||
/* {{{ php_robbe_globals_construct */
|
/* {{{ php_robbe_globals_construct */
|
||||||
static void php_friso_globals_construct(zend_friso_globals *friso_globals)
|
static void php_friso_globals_construct(zend_friso_globals *friso_globals)
|
||||||
{
|
{
|
||||||
friso_globals->friso = friso_new();
|
friso_globals->friso = friso_new();
|
||||||
friso_globals->config = friso_new_config();
|
friso_globals->config = friso_new_config();
|
||||||
friso_init_from_ifile(friso_globals->friso,
|
friso_init_from_ifile(friso_globals->friso,
|
||||||
friso_globals->config, INI_STR("friso.ini_file"));
|
friso_globals->config, INI_STR("friso.ini_file"));
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
/* {{{ php_robbe_globals_destruct*/
|
/* {{{ php_robbe_globals_destruct*/
|
||||||
static void php_friso_globals_destruct(zend_friso_globals *friso_globals)
|
static void php_friso_globals_destruct(zend_friso_globals *friso_globals)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* cause friso_free will free the dictionary
|
* cause friso_free will free the dictionary
|
||||||
* so here we don't have to call the friso_dic_free to free the
|
* so here we don't have to call the friso_dic_free to free the
|
||||||
* the robbe_dic global variable.
|
* the robbe_dic global variable.
|
||||||
*/
|
*/
|
||||||
//friso_dic_free( friso_globals->friso_dic );
|
//friso_dic_free( friso_globals->friso_dic );
|
||||||
//friso_globals->friso_dic = NULL;
|
//friso_globals->friso_dic = NULL;
|
||||||
friso_free_config( friso_globals->config );
|
friso_free_config( friso_globals->config );
|
||||||
friso_free( friso_globals->friso );
|
friso_free( friso_globals->friso );
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
#define FRISO_RET_WORD (1 << 0)
|
#define FRISO_RET_WORD (1 << 0)
|
||||||
#define FRISO_RET_TYPE (1 << 1)
|
#define FRISO_RET_TYPE (1 << 1)
|
||||||
#define FRISO_RET_OFF (1 << 2)
|
#define FRISO_RET_OFF (1 << 2)
|
||||||
#define FRISO_RET_LEN (1 << 3)
|
#define FRISO_RET_LEN (1 << 3)
|
||||||
#define FRISO_RET_RLEN (1 << 4)
|
#define FRISO_RET_RLEN (1 << 4)
|
||||||
#define FRISO_RET_POS (1 << 5)
|
#define FRISO_RET_POS (1 << 5)
|
||||||
|
|
||||||
/* {{{ PHP_MINIT_FUNCTION
|
/* {{{ PHP_MINIT_FUNCTION
|
||||||
*/
|
*/
|
||||||
PHP_MINIT_FUNCTION(friso)
|
PHP_MINIT_FUNCTION(friso)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* register some contants that robbe may use
|
* register some contants that robbe may use
|
||||||
* at its following work.
|
* at its following work.
|
||||||
* the constant is case sensitive and persitent.
|
* the constant is case sensitive and persitent.
|
||||||
*/
|
*/
|
||||||
REGISTER_LONG_CONSTANT("FRISO_SIMPLE", __FRISO_SIMPLE_MODE__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_SIMPLE", __FRISO_SIMPLE_MODE__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_COMPLEX", __FRISO_COMPLEX_MODE__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_COMPLEX", __FRISO_COMPLEX_MODE__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_DETECT", __FRISO_DETECT_MODE__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_DETECT", __FRISO_DETECT_MODE__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_LEX_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_LEX_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_LEX_STOP", __LEX_STOPWORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_LEX_STOP", __LEX_STOPWORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
|
|
||||||
//return parts for rb_split.
|
//return parts for rb_split.
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_WORD", FRISO_RET_WORD, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_WORD", FRISO_RET_WORD, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_TYPE", FRISO_RET_TYPE, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_TYPE", FRISO_RET_TYPE, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_OFF", FRISO_RET_OFF, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_OFF", FRISO_RET_OFF, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_LEN", FRISO_RET_LEN, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_LEN", FRISO_RET_LEN, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_RLEN", FRISO_RET_RLEN, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_RLEN", FRISO_RET_RLEN, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_RET_POS", FRISO_RET_POS, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_RET_POS", FRISO_RET_POS, CONST_CS | CONST_PERSISTENT);
|
||||||
|
|
||||||
//lex type constants.
|
//lex type constants.
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_ECM", __LEX_ECM_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_ECM", __LEX_ECM_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_CEM", __LEX_CEM_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_CEM", __LEX_CEM_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_EPUN", __LEX_ENPUN_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_EPUN", __LEX_ENPUN_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_PUN", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_PUN", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_UNK", __LEX_UNKNOW_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_UNK", __LEX_UNKNOW_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
REGISTER_LONG_CONSTANT("FRISO_TYP_OTR", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
|
REGISTER_LONG_CONSTANT("FRISO_TYP_OTR", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
|
||||||
|
|
||||||
REGISTER_INI_ENTRIES();
|
REGISTER_INI_ENTRIES();
|
||||||
/*initialize the globals variables.*/
|
/*initialize the globals variables.*/
|
||||||
php_friso_globals_construct( &friso_globals );
|
php_friso_globals_construct( &friso_globals );
|
||||||
|
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -146,11 +146,11 @@ PHP_MINIT_FUNCTION(friso)
|
|||||||
*/
|
*/
|
||||||
PHP_MSHUTDOWN_FUNCTION(friso)
|
PHP_MSHUTDOWN_FUNCTION(friso)
|
||||||
{
|
{
|
||||||
UNREGISTER_INI_ENTRIES();
|
UNREGISTER_INI_ENTRIES();
|
||||||
/*destruct the globals variables*/
|
/*destruct the globals variables*/
|
||||||
php_friso_globals_destruct( &friso_globals );
|
php_friso_globals_destruct( &friso_globals );
|
||||||
|
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ PHP_MSHUTDOWN_FUNCTION(friso)
|
|||||||
*/
|
*/
|
||||||
PHP_RINIT_FUNCTION(friso)
|
PHP_RINIT_FUNCTION(friso)
|
||||||
{
|
{
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -168,22 +168,22 @@ PHP_RINIT_FUNCTION(friso)
|
|||||||
*/
|
*/
|
||||||
PHP_RSHUTDOWN_FUNCTION(friso)
|
PHP_RSHUTDOWN_FUNCTION(friso)
|
||||||
{
|
{
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
/* {{{ PHP_MINFO_FUNCTION
|
/* {{{ PHP_MINFO_FUNCTION
|
||||||
*/
|
*/
|
||||||
PHP_MINFO_FUNCTION(friso)
|
PHP_MINFO_FUNCTION(friso)
|
||||||
{
|
{
|
||||||
php_info_print_table_start();
|
php_info_print_table_start();
|
||||||
php_info_print_table_row(2, "Friso Support", "enabled");
|
php_info_print_table_row(2, "Friso Support", "enabled");
|
||||||
php_info_print_table_row(2, "Version", FRISO_VERSION);
|
php_info_print_table_row(2, "Version", FRISO_VERSION);
|
||||||
php_info_print_table_row(2, "Bug Report", "chenxin619315@gmail.com");
|
php_info_print_table_row(2, "Bug Report", "chenxin619315@gmail.com");
|
||||||
php_info_print_table_row(2, "Home page", "http://code.google.com/p/friso");
|
php_info_print_table_row(2, "Home page", "http://code.google.com/p/friso");
|
||||||
php_info_print_table_end();
|
php_info_print_table_end();
|
||||||
|
|
||||||
DISPLAY_INI_ENTRIES();
|
DISPLAY_INI_ENTRIES();
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -192,130 +192,130 @@ PHP_MINFO_FUNCTION(friso)
|
|||||||
Return a array contains all the split result with a specified mode */
|
Return a array contains all the split result with a specified mode */
|
||||||
PHP_FUNCTION(friso_split)
|
PHP_FUNCTION(friso_split)
|
||||||
{
|
{
|
||||||
char *_str = NULL, *_key;
|
char *_str = NULL, *_key;
|
||||||
int slen, idx, klen, rargs = 0;
|
int slen, idx, klen, rargs = 0;
|
||||||
int arg_count;
|
int arg_count;
|
||||||
|
|
||||||
zval *ret, *cfg, **data;
|
zval *ret, *cfg, **data;
|
||||||
//used for multiple item return.
|
//used for multiple item return.
|
||||||
zval *item;
|
zval *item;
|
||||||
|
|
||||||
HashTable *cfgArr;
|
HashTable *cfgArr;
|
||||||
HashPosition pointer;
|
HashPosition pointer;
|
||||||
|
|
||||||
friso_task_t task;
|
friso_task_t task;
|
||||||
friso_config_t config = NULL, nconfig = NULL;
|
friso_config_t config = NULL, nconfig = NULL;
|
||||||
|
|
||||||
//get the arugments from the php layer.
|
//get the arugments from the php layer.
|
||||||
arg_count = ZEND_NUM_ARGS();
|
arg_count = ZEND_NUM_ARGS();
|
||||||
switch ( arg_count )
|
switch ( arg_count )
|
||||||
{
|
{
|
||||||
case 2:
|
case 2:
|
||||||
if ( zend_parse_parameters(arg_count TSRMLS_CC, "sz",
|
if ( zend_parse_parameters(arg_count TSRMLS_CC, "sz",
|
||||||
&_str, &slen, &cfg) == FAILURE ) return;
|
&_str, &slen, &cfg) == FAILURE ) return;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
if (zend_parse_parameters( arg_count TSRMLS_CC, "szl",
|
if (zend_parse_parameters( arg_count TSRMLS_CC, "szl",
|
||||||
&_str, &slen, &cfg, &rargs) == FAILURE ) return;
|
&_str, &slen, &cfg, &rargs) == FAILURE ) return;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
WRONG_PARAM_COUNT;
|
WRONG_PARAM_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
//make sure the RB_RET_WORD will be returned.
|
//make sure the RB_RET_WORD will be returned.
|
||||||
//rargs |= FRISO_RET_WORD;
|
//rargs |= FRISO_RET_WORD;
|
||||||
|
|
||||||
//check and initialize the friso.
|
//check and initialize the friso.
|
||||||
if ( Z_TYPE_P(cfg) != IS_NULL )
|
if ( Z_TYPE_P(cfg) != IS_NULL )
|
||||||
{
|
{
|
||||||
nconfig = friso_new_config();
|
nconfig = friso_new_config();
|
||||||
memcpy(nconfig, friso_globals.config, sizeof(friso_config_entry));
|
memcpy(nconfig, friso_globals.config, sizeof(friso_config_entry));
|
||||||
|
|
||||||
//check the new setting.
|
//check the new setting.
|
||||||
cfgArr = Z_ARRVAL_P(cfg);
|
cfgArr = Z_ARRVAL_P(cfg);
|
||||||
//zend_printf("array length: %d", zend_hash_num_elements(cfgArr));
|
//zend_printf("array length: %d", zend_hash_num_elements(cfgArr));
|
||||||
for ( zend_hash_internal_pointer_reset_ex(cfgArr, &pointer);
|
for ( zend_hash_internal_pointer_reset_ex(cfgArr, &pointer);
|
||||||
zend_hash_get_current_data_ex(cfgArr, (void **)&data, &pointer) == SUCCESS;
|
zend_hash_get_current_data_ex(cfgArr, (void **)&data, &pointer) == SUCCESS;
|
||||||
zend_hash_move_forward_ex(cfgArr, &pointer) )
|
zend_hash_move_forward_ex(cfgArr, &pointer) )
|
||||||
{
|
{
|
||||||
zend_hash_get_current_key_ex(cfgArr, &_key, &klen, NULL, 0, &pointer);
|
zend_hash_get_current_key_ex(cfgArr, &_key, &klen, NULL, 0, &pointer);
|
||||||
//zend_printf("key: %s, value: %d<br />", _key, (*data)->value.lval);
|
//zend_printf("key: %s, value: %d<br />", _key, (*data)->value.lval);
|
||||||
|
|
||||||
if ( strcmp(_key, "kpuncs") == 0 )
|
if ( strcmp(_key, "kpuncs") == 0 )
|
||||||
{
|
{
|
||||||
memcpy(nconfig->kpuncs, (*data)->value.str.val, (*data)->value.str.len);
|
memcpy(nconfig->kpuncs, (*data)->value.str.val, (*data)->value.str.len);
|
||||||
nconfig->kpuncs[(*data)->value.str.len] = '\0';
|
nconfig->kpuncs[(*data)->value.str.len] = '\0';
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//convert the data to long.
|
//convert the data to long.
|
||||||
convert_to_long_ex(data);
|
convert_to_long_ex(data);
|
||||||
if ( strcmp(_key, "max_len") == 0 )
|
if ( strcmp(_key, "max_len") == 0 )
|
||||||
nconfig->max_len = (ushort_t)(*data)->value.lval;
|
nconfig->max_len = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "r_name") == 0 )
|
else if ( strcmp(_key, "r_name") == 0 )
|
||||||
nconfig->r_name = (ushort_t)(*data)->value.lval;
|
nconfig->r_name = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "mix_len") == 0 )
|
else if ( strcmp(_key, "mix_len") == 0 )
|
||||||
nconfig->mix_len = (ushort_t)(*data)->value.lval;
|
nconfig->mix_len = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "lna_len") == 0 )
|
else if ( strcmp(_key, "lna_len") == 0 )
|
||||||
nconfig->lna_len = (ushort_t)(*data)->value.lval;
|
nconfig->lna_len = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "add_syn") == 0 )
|
else if ( strcmp(_key, "add_syn") == 0 )
|
||||||
nconfig->add_syn = (ushort_t)(*data)->value.lval;
|
nconfig->add_syn = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "clr_stw") == 0 )
|
else if ( strcmp(_key, "clr_stw") == 0 )
|
||||||
nconfig->clr_stw = (ushort_t)(*data)->value.lval;
|
nconfig->clr_stw = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "add_syn") == 0 )
|
else if ( strcmp(_key, "add_syn") == 0 )
|
||||||
nconfig->add_syn = (ushort_t)(*data)->value.lval;
|
nconfig->add_syn = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "keep_urec") == 0 )
|
else if ( strcmp(_key, "keep_urec") == 0 )
|
||||||
nconfig->keep_urec = (ushort_t)(*data)->value.lval;
|
nconfig->keep_urec = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "spx_out") == 0 )
|
else if ( strcmp(_key, "spx_out") == 0 )
|
||||||
nconfig->spx_out = (ushort_t)(*data)->value.lval;
|
nconfig->spx_out = (ushort_t)(*data)->value.lval;
|
||||||
else if ( strcmp(_key, "nthreshold") == 0 )
|
else if ( strcmp(_key, "nthreshold") == 0 )
|
||||||
nconfig->nthreshold = (uint_t) (*data)->value.lval;
|
nconfig->nthreshold = (uint_t) (*data)->value.lval;
|
||||||
else if ( strcmp(_key, "mode") == 0 )
|
else if ( strcmp(_key, "mode") == 0 )
|
||||||
friso_set_mode(nconfig, (friso_mode_t)((*data)->value.lval));
|
friso_set_mode(nconfig, (friso_mode_t)((*data)->value.lval));
|
||||||
else if ( strcmp(_key, "en_sseg") == 0 )
|
else if ( strcmp(_key, "en_sseg") == 0 )
|
||||||
nconfig->en_sseg = (ushort_t) (*data)->value.lval;
|
nconfig->en_sseg = (ushort_t) (*data)->value.lval;
|
||||||
else if ( strcmp(_key, "st_minl") == 0 )
|
else if ( strcmp(_key, "st_minl") == 0 )
|
||||||
nconfig->st_minl = (ushort_t) (*data)->value.lval;
|
nconfig->st_minl = (ushort_t) (*data)->value.lval;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize the array.
|
//initialize the array.
|
||||||
MAKE_STD_ZVAL( ret );
|
MAKE_STD_ZVAL( ret );
|
||||||
array_init( ret );
|
array_init( ret );
|
||||||
config = ( nconfig == NULL ) ? friso_globals.config : nconfig;
|
config = ( nconfig == NULL ) ? friso_globals.config : nconfig;
|
||||||
|
|
||||||
//create a new friso task.
|
//create a new friso task.
|
||||||
task = friso_new_task();
|
task = friso_new_task();
|
||||||
idx = 0;
|
idx = 0;
|
||||||
friso_set_text(task, _str);
|
friso_set_text(task, _str);
|
||||||
while ( config->next_token( friso_globals.friso, config, task ) != NULL )
|
while ( config->next_token( friso_globals.friso, config, task ) != NULL )
|
||||||
{
|
{
|
||||||
MAKE_STD_ZVAL(item);
|
MAKE_STD_ZVAL(item);
|
||||||
array_init(item);
|
array_init(item);
|
||||||
add_assoc_string(item, "word", task->token->word, 1);
|
add_assoc_string(item, "word", task->token->word, 1);
|
||||||
//check the append of type
|
//check the append of type
|
||||||
if ( (rargs & FRISO_RET_TYPE) != 0 )
|
if ( (rargs & FRISO_RET_TYPE) != 0 )
|
||||||
add_assoc_long(item, "type", task->token->type);
|
add_assoc_long(item, "type", task->token->type);
|
||||||
if ( (rargs & FRISO_RET_LEN) != 0 )
|
if ( (rargs & FRISO_RET_LEN) != 0 )
|
||||||
add_assoc_long(item, "len", task->token->length);
|
add_assoc_long(item, "len", task->token->length);
|
||||||
if ( (rargs & FRISO_RET_RLEN) != 0 )
|
if ( (rargs & FRISO_RET_RLEN) != 0 )
|
||||||
add_assoc_long(item, "rlen", task->token->rlen);
|
add_assoc_long(item, "rlen", task->token->rlen);
|
||||||
if ( (rargs & FRISO_RET_OFF) != 0 )
|
if ( (rargs & FRISO_RET_OFF) != 0 )
|
||||||
add_assoc_long(item, "off", task->token->offset);
|
add_assoc_long(item, "off", task->token->offset);
|
||||||
if ( (rargs & FRISO_RET_POS) != 0 )
|
if ( (rargs & FRISO_RET_POS) != 0 )
|
||||||
add_assoc_stringl(item, "pos", &task->token->pos, 1, 1);
|
add_assoc_stringl(item, "pos", &task->token->pos, 1, 1);
|
||||||
|
|
||||||
//append the sub result.
|
//append the sub result.
|
||||||
add_index_zval( ret, idx++, item );
|
add_index_zval( ret, idx++, item );
|
||||||
}
|
}
|
||||||
|
|
||||||
//free the friso task.
|
//free the friso task.
|
||||||
friso_free_task(task);
|
friso_free_task(task);
|
||||||
if ( nconfig != NULL ) friso_free_config(nconfig);
|
if ( nconfig != NULL ) friso_free_config(nconfig);
|
||||||
|
|
||||||
//RETURN_ZVAL( ret, 0, 0);
|
//RETURN_ZVAL( ret, 0, 0);
|
||||||
*( return_value ) = *( ret );
|
*( return_value ) = *( ret );
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -323,7 +323,7 @@ PHP_FUNCTION(friso_split)
|
|||||||
Return the current version of Friso. */
|
Return the current version of Friso. */
|
||||||
PHP_FUNCTION(friso_version)
|
PHP_FUNCTION(friso_version)
|
||||||
{
|
{
|
||||||
RETURN_STRINGL(FRISO_VERSION, strlen(FRISO_VERSION), 1);
|
RETURN_STRINGL(FRISO_VERSION, strlen(FRISO_VERSION), 1);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -331,8 +331,8 @@ PHP_FUNCTION(friso_version)
|
|||||||
Return the current charset of friso. */
|
Return the current charset of friso. */
|
||||||
PHP_FUNCTION(friso_charset)
|
PHP_FUNCTION(friso_charset)
|
||||||
{
|
{
|
||||||
char *charset = friso_globals.friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK";
|
char *charset = friso_globals.friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK";
|
||||||
RETURN_STRINGL(charset, strlen(charset), 1);
|
RETURN_STRINGL(charset, strlen(charset), 1);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -340,23 +340,23 @@ PHP_FUNCTION(friso_charset)
|
|||||||
Return a bool to confirm that the given str is a word in a specified dictionary. */
|
Return a bool to confirm that the given str is a word in a specified dictionary. */
|
||||||
PHP_FUNCTION(friso_dic_exist)
|
PHP_FUNCTION(friso_dic_exist)
|
||||||
{
|
{
|
||||||
char *word = NULL;
|
char *word = NULL;
|
||||||
int wlen;
|
int wlen;
|
||||||
long type;
|
long type;
|
||||||
|
|
||||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( friso_globals.friso->dic == NULL )
|
if ( friso_globals.friso->dic == NULL )
|
||||||
RETURN_BOOL(0);
|
RETURN_BOOL(0);
|
||||||
|
|
||||||
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
|
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
|
||||||
type = __LEX_CJK_WORDS__;
|
type = __LEX_CJK_WORDS__;
|
||||||
|
|
||||||
wlen = friso_dic_match( friso_globals.friso->dic, type, word );
|
wlen = friso_dic_match( friso_globals.friso->dic, type, word );
|
||||||
|
|
||||||
RETURN_BOOL(wlen);
|
RETURN_BOOL(wlen);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -364,38 +364,38 @@ PHP_FUNCTION(friso_dic_exist)
|
|||||||
Return a array contains all the information of the given word.*/
|
Return a array contains all the information of the given word.*/
|
||||||
PHP_FUNCTION(friso_dic_get)
|
PHP_FUNCTION(friso_dic_get)
|
||||||
{
|
{
|
||||||
char *word = NULL;
|
char *word = NULL;
|
||||||
int wlen;
|
int wlen;
|
||||||
long type;
|
long type;
|
||||||
zval *entry;
|
zval *entry;
|
||||||
lex_entry_t e;
|
lex_entry_t e;
|
||||||
|
|
||||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the dictionary
|
//check the dictionary
|
||||||
if ( friso_globals.friso->dic == NULL )
|
if ( friso_globals.friso->dic == NULL )
|
||||||
RETURN_BOOL(0);
|
RETURN_BOOL(0);
|
||||||
|
|
||||||
MAKE_STD_ZVAL( entry );
|
MAKE_STD_ZVAL( entry );
|
||||||
array_init( entry );
|
array_init( entry );
|
||||||
|
|
||||||
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
|
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
|
||||||
{
|
{
|
||||||
type = __LEX_CJK_WORDS__;
|
type = __LEX_CJK_WORDS__;
|
||||||
}
|
}
|
||||||
|
|
||||||
e = friso_dic_get( friso_globals.friso->dic, type, word );
|
e = friso_dic_get( friso_globals.friso->dic, type, word );
|
||||||
if ( e != NULL )
|
if ( e != NULL )
|
||||||
{
|
{
|
||||||
add_assoc_long( entry, "length", e->length);
|
add_assoc_long( entry, "length", e->length);
|
||||||
add_assoc_long( entry, "freq", e->fre );
|
add_assoc_long( entry, "freq", e->fre );
|
||||||
*( return_value ) = * ( entry );
|
*( return_value ) = * ( entry );
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
RETURN_BOOL(0);
|
RETURN_BOOL(0);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -403,17 +403,17 @@ PHP_FUNCTION(friso_dic_get)
|
|||||||
Return the bytes that the utf-8 char takes.*/
|
Return the bytes that the utf-8 char takes.*/
|
||||||
PHP_FUNCTION(friso_utf8_bytes)
|
PHP_FUNCTION(friso_utf8_bytes)
|
||||||
{
|
{
|
||||||
char *word = NULL;
|
char *word = NULL;
|
||||||
int wlen, _bytes;
|
int wlen, _bytes;
|
||||||
|
|
||||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( word == NULL ) RETURN_LONG(0);
|
if ( word == NULL ) RETURN_LONG(0);
|
||||||
_bytes = get_utf8_bytes( word[0] );
|
_bytes = get_utf8_bytes( word[0] );
|
||||||
|
|
||||||
RETURN_LONG(_bytes);
|
RETURN_LONG(_bytes);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -421,16 +421,16 @@ PHP_FUNCTION(friso_utf8_bytes)
|
|||||||
Return the unicode of the given utf-8 char.*/
|
Return the unicode of the given utf-8 char.*/
|
||||||
PHP_FUNCTION(friso_utf8_ucode)
|
PHP_FUNCTION(friso_utf8_ucode)
|
||||||
{
|
{
|
||||||
char *word = NULL;
|
char *word = NULL;
|
||||||
int wlen, _ucode;
|
int wlen, _ucode;
|
||||||
|
|
||||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_ucode = get_utf8_unicode( word );
|
_ucode = get_utf8_unicode( word );
|
||||||
|
|
||||||
RETURN_LONG(_ucode);
|
RETURN_LONG(_ucode);
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
@ -438,18 +438,18 @@ PHP_FUNCTION(friso_utf8_ucode)
|
|||||||
Return char that the a unicode pointed to.*/
|
Return char that the a unicode pointed to.*/
|
||||||
PHP_FUNCTION(friso_ucode_utf8)
|
PHP_FUNCTION(friso_ucode_utf8)
|
||||||
{
|
{
|
||||||
unsigned long *ucode = NULL;
|
unsigned long *ucode = NULL;
|
||||||
int _bytes;
|
int _bytes;
|
||||||
char word[7];
|
char word[7];
|
||||||
|
|
||||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &ucode ) == FAILURE) {
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &ucode ) == FAILURE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_bytes = unicode_to_utf8( ( size_t ) ucode, word );
|
_bytes = unicode_to_utf8( ( size_t ) ucode, word );
|
||||||
word[_bytes] = '\0';
|
word[_bytes] = '\0';
|
||||||
|
|
||||||
RETURN_STRINGL( word, _bytes, 1 );
|
RETURN_STRINGL( word, _bytes, 1 );
|
||||||
}
|
}
|
||||||
/* }}} */
|
/* }}} */
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
$br = (php_sapi_name() == "cli")? "":"<br>";
|
$br = (php_sapi_name() == "cli")? "":"<br>";
|
||||||
|
|
||||||
if(!extension_loaded('friso')) {
|
if(!extension_loaded('friso')) {
|
||||||
dl('friso.' . PHP_SHLIB_SUFFIX);
|
dl('friso.' . PHP_SHLIB_SUFFIX);
|
||||||
}
|
}
|
||||||
$module = 'friso';
|
$module = 'friso';
|
||||||
$functions = get_extension_funcs($module);
|
$functions = get_extension_funcs($module);
|
||||||
@ -13,9 +13,9 @@ foreach($functions as $func) {
|
|||||||
echo "$br\n";
|
echo "$br\n";
|
||||||
$function = 'confirm_' . $module . '_compiled';
|
$function = 'confirm_' . $module . '_compiled';
|
||||||
if (extension_loaded($module)) {
|
if (extension_loaded($module)) {
|
||||||
$str = $function($module);
|
$str = $function($module);
|
||||||
} else {
|
} else {
|
||||||
$str = "Module $module is not compiled into PHP";
|
$str = "Module $module is not compiled into PHP";
|
||||||
}
|
}
|
||||||
echo "$str\n";
|
echo "$str\n";
|
||||||
?>
|
?>
|
||||||
|
@ -6,11 +6,11 @@ extern zend_module_entry friso_module_entry;
|
|||||||
#define phpext_friso_ptr &friso_module_entry
|
#define phpext_friso_ptr &friso_module_entry
|
||||||
|
|
||||||
#ifdef PHP_WIN32
|
#ifdef PHP_WIN32
|
||||||
# define PHP_FRISO_API __declspec(dllexport)
|
# define PHP_FRISO_API __declspec(dllexport)
|
||||||
#elif defined(__GNUC__) && __GNUC__ >= 4
|
#elif defined(__GNUC__) && __GNUC__ >= 4
|
||||||
# define PHP_FRISO_API __attribute__ ((visibility("default")))
|
# define PHP_FRISO_API __attribute__ ((visibility("default")))
|
||||||
#else
|
#else
|
||||||
# define PHP_FRISO_API
|
# define PHP_FRISO_API
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef ZTS
|
#ifdef ZTS
|
||||||
@ -36,12 +36,12 @@ PHP_FUNCTION(friso_utf8_ucode);
|
|||||||
PHP_FUNCTION(friso_ucode_utf8);
|
PHP_FUNCTION(friso_ucode_utf8);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Declare any global variables you may need between the BEGIN
|
Declare any global variables you may need between the BEGIN
|
||||||
and END macros here:
|
and END macros here:
|
||||||
|
|
||||||
ZEND_BEGIN_MODULE_GLOBALS(friso)
|
ZEND_BEGIN_MODULE_GLOBALS(friso)
|
||||||
long global_value;
|
long global_value;
|
||||||
char *global_string;
|
char *global_string;
|
||||||
ZEND_END_MODULE_GLOBALS(friso)
|
ZEND_END_MODULE_GLOBALS(friso)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -66,5 +66,5 @@ typedef struct {
|
|||||||
#define FRISO_G(v) (friso_globals.v)
|
#define FRISO_G(v) (friso_globals.v)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* PHP_FRISO_H */
|
#endif /* PHP_FRISO_H */
|
||||||
|
|
||||||
|
@ -6,14 +6,14 @@ Check for friso presence
|
|||||||
<?php
|
<?php
|
||||||
echo "friso extension is available";
|
echo "friso extension is available";
|
||||||
/*
|
/*
|
||||||
you can add regression tests for your extension here
|
you can add regression tests for your extension here
|
||||||
|
|
||||||
the output of your test code has to be equal to the
|
the output of your test code has to be equal to the
|
||||||
text in the --EXPECT-- section below for the tests
|
text in the --EXPECT-- section below for the tests
|
||||||
to pass, differences between the output and the
|
to pass, differences between the output and the
|
||||||
expected text are interpreted as failure
|
expected text are interpreted as failure
|
||||||
|
|
||||||
see php5/README.TESTING for further information on
|
see php5/README.TESTING for further information on
|
||||||
writing regression tests
|
writing regression tests
|
||||||
*/
|
*/
|
||||||
?>
|
?>
|
||||||
|
2872
src/friso.c
2872
src/friso.c
File diff suppressed because it is too large
Load Diff
148
src/friso.h
148
src/friso.h
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* main interface file for friso - free soul.
|
* main interface file for friso - free soul.
|
||||||
* you could modify it and re-release it but never for commercial use.
|
* you could modify it and re-release it but never for commercial use.
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#ifndef _friso_h
|
#ifndef _friso_h
|
||||||
#define _friso_h
|
#define _friso_h
|
||||||
@ -15,11 +15,11 @@
|
|||||||
#define friso_version() FRISO_VERSION
|
#define friso_version() FRISO_VERSION
|
||||||
|
|
||||||
|
|
||||||
#define DEFAULT_SEGMENT_LENGTH 5
|
#define DEFAULT_SEGMENT_LENGTH 5
|
||||||
#define DEFAULT_MIX_LENGTH 2
|
#define DEFAULT_MIX_LENGTH 2
|
||||||
#define DEFAULT_LNA_LENGTH 1
|
#define DEFAULT_LNA_LENGTH 1
|
||||||
#define DEFAULT_NTHRESHOLD 1000000
|
#define DEFAULT_NTHRESHOLD 1000000
|
||||||
#define DEFAULT_SEGMENT_MODE 2
|
#define DEFAULT_SEGMENT_MODE 2
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Type: friso_lex_t
|
* Type: friso_lex_t
|
||||||
@ -29,8 +29,8 @@
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
__LEX_CJK_WORDS__ = 0,
|
__LEX_CJK_WORDS__ = 0,
|
||||||
__LEX_CJK_UNITS__ = 1,
|
__LEX_CJK_UNITS__ = 1,
|
||||||
__LEX_ECM_WORDS__ = 2, //english and chinese mixed words.
|
__LEX_ECM_WORDS__ = 2, //english and chinese mixed words.
|
||||||
__LEX_CEM_WORDS__ = 3, //chinese and english mixed words.
|
__LEX_CEM_WORDS__ = 3, //chinese and english mixed words.
|
||||||
__LEX_CN_LNAME__ = 4,
|
__LEX_CN_LNAME__ = 4,
|
||||||
__LEX_CN_SNAME__ = 5,
|
__LEX_CN_SNAME__ = 5,
|
||||||
__LEX_CN_DNAME1__ = 6,
|
__LEX_CN_DNAME1__ = 6,
|
||||||
@ -41,8 +41,8 @@ typedef enum {
|
|||||||
__LEX_EN_WORDS__ = 11,
|
__LEX_EN_WORDS__ = 11,
|
||||||
__LEX_OTHER_WORDS__ = 15,
|
__LEX_OTHER_WORDS__ = 15,
|
||||||
__LEX_NCSYN_WORDS__ = 16,
|
__LEX_NCSYN_WORDS__ = 16,
|
||||||
__LEX_PUNC_WORDS__ = 17, //punctuations
|
__LEX_PUNC_WORDS__ = 17, //punctuations
|
||||||
__LEX_UNKNOW_WORDS__ = 18 //unrecognized words.
|
__LEX_UNKNOW_WORDS__ = 18 //unrecognized words.
|
||||||
} friso_lex_t;
|
} friso_lex_t;
|
||||||
|
|
||||||
typedef friso_hash_t * friso_dic_t;
|
typedef friso_hash_t * friso_dic_t;
|
||||||
@ -51,8 +51,8 @@ typedef friso_hash_t * friso_dic_t;
|
|||||||
|
|
||||||
//charset that Friso now support.
|
//charset that Friso now support.
|
||||||
typedef enum {
|
typedef enum {
|
||||||
FRISO_UTF8 = 0, //UTF-8
|
FRISO_UTF8 = 0, //UTF-8
|
||||||
FRISO_GBK = 1 //GBK
|
FRISO_GBK = 1 //GBK
|
||||||
} friso_charset_t;
|
} friso_charset_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -61,15 +61,15 @@ typedef enum {
|
|||||||
* use to identidy the mode that the friso use.
|
* use to identidy the mode that the friso use.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
__FRISO_SIMPLE_MODE__ = 1,
|
__FRISO_SIMPLE_MODE__ = 1,
|
||||||
__FRISO_COMPLEX_MODE__ = 2,
|
__FRISO_COMPLEX_MODE__ = 2,
|
||||||
__FRISO_DETECT_MODE__ = 3
|
__FRISO_DETECT_MODE__ = 3
|
||||||
} friso_mode_t;
|
} friso_mode_t;
|
||||||
|
|
||||||
/* friso entry.*/
|
/* friso entry.*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
friso_dic_t dic; //friso dictionary
|
friso_dic_t dic; //friso dictionary
|
||||||
friso_charset_t charset; //project charset.
|
friso_charset_t charset; //project charset.
|
||||||
} friso_entry;
|
} friso_entry;
|
||||||
typedef friso_entry * friso_t;
|
typedef friso_entry * friso_t;
|
||||||
|
|
||||||
@ -80,26 +80,26 @@ typedef friso_entry * friso_t;
|
|||||||
* -------------------
|
* -------------------
|
||||||
* This type used to represent the lexicon entry struct.
|
* This type used to represent the lexicon entry struct.
|
||||||
*/
|
*/
|
||||||
#define _LEX_APPENSYN_MASK (1 << 0) //append synoyums words.
|
#define _LEX_APPENSYN_MASK (1 << 0) //append synoyums words.
|
||||||
#define lex_appensyn_open(e) e->ctrlMask |= _LEX_APPENSYN_MASK
|
#define lex_appensyn_open(e) e->ctrlMask |= _LEX_APPENSYN_MASK
|
||||||
#define lex_appensyn_close(e) e->ctrlMask &= ~_LEX_APPENSYN_MASK
|
#define lex_appensyn_close(e) e->ctrlMask &= ~_LEX_APPENSYN_MASK
|
||||||
#define lex_appensyn_check(e) ((e->ctrlMask & _LEX_APPENSYN_MASK) != 0)
|
#define lex_appensyn_check(e) ((e->ctrlMask & _LEX_APPENSYN_MASK) != 0)
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/*
|
/*
|
||||||
* the type of the lexicon item.
|
* the type of the lexicon item.
|
||||||
* available value is all the elements in friso_lex_t enum.
|
* available value is all the elements in friso_lex_t enum.
|
||||||
* and if it is __LEX_OTHER_WORDS__, we need to free it after use it.
|
* and if it is __LEX_OTHER_WORDS__, we need to free it after use it.
|
||||||
*/
|
*/
|
||||||
uchar_t length; //the length of the token.(after the convertor of Friso.)
|
uchar_t length; //the length of the token.(after the convertor of Friso.)
|
||||||
uchar_t rlen; //the real length of the token.(before any convert)
|
uchar_t rlen; //the real length of the token.(before any convert)
|
||||||
uchar_t type;
|
uchar_t type;
|
||||||
uchar_t ctrlMask; //function control mask, like append the synoyums words.
|
uchar_t ctrlMask; //function control mask, like append the synoyums words.
|
||||||
uint_t offset; //offset index.
|
uint_t offset; //offset index.
|
||||||
fstring word;
|
fstring word;
|
||||||
//fstring py; //pinyin of the word.(invalid)
|
//fstring py; //pinyin of the word.(invalid)
|
||||||
friso_array_t syn; //synoyums words.
|
friso_array_t syn; //synoyums words.
|
||||||
friso_array_t pos; //part of speech.
|
friso_array_t pos; //part of speech.
|
||||||
uint_t fre; //single word frequency.
|
uint_t fre; //single word frequency.
|
||||||
} lex_entry_cdt;
|
} lex_entry_cdt;
|
||||||
typedef lex_entry_cdt * lex_entry_t;
|
typedef lex_entry_cdt * lex_entry_t;
|
||||||
|
|
||||||
@ -108,11 +108,11 @@ typedef lex_entry_cdt * lex_entry_t;
|
|||||||
#define __HITS_WORD_LENGTH__ 64
|
#define __HITS_WORD_LENGTH__ 64
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uchar_t type; //type of the word. (item of friso_lex_t)
|
uchar_t type; //type of the word. (item of friso_lex_t)
|
||||||
uchar_t length; //length of the token.
|
uchar_t length; //length of the token.
|
||||||
uchar_t rlen; //the real length of the token.(in orgin strng)
|
uchar_t rlen; //the real length of the token.(in orgin strng)
|
||||||
char pos; //part of speech.
|
char pos; //part of speech.
|
||||||
int offset; //start offset of the word.
|
int offset; //start offset of the word.
|
||||||
char word[__HITS_WORD_LENGTH__];
|
char word[__HITS_WORD_LENGTH__];
|
||||||
//char py[0];
|
//char py[0];
|
||||||
} friso_token_entry;
|
} friso_token_entry;
|
||||||
@ -122,25 +122,25 @@ typedef friso_token_entry * friso_token_t;
|
|||||||
/*
|
/*
|
||||||
* Type: friso_task_entry
|
* Type: friso_task_entry
|
||||||
* This type used to represent the current segmentation content.
|
* This type used to represent the current segmentation content.
|
||||||
* like the text to split, and the current index, token buffer eg....
|
* like the text to split, and the current index, token buffer eg....
|
||||||
*/
|
*/
|
||||||
//action control mask for #FRISO_TASK_T#.
|
//action control mask for #FRISO_TASK_T#.
|
||||||
#define _TASK_CHECK_CF_MASK (1 << 0) //Wether to check the chinese fraction.
|
#define _TASK_CHECK_CF_MASK (1 << 0) //Wether to check the chinese fraction.
|
||||||
#define _TASK_START_SS_MASK (1 << 1) //Wether to start the secondary segmentation.
|
#define _TASK_START_SS_MASK (1 << 1) //Wether to start the secondary segmentation.
|
||||||
#define task_ssseg_open(task) task->ctrlMask |= _TASK_START_SS_MASK
|
#define task_ssseg_open(task) task->ctrlMask |= _TASK_START_SS_MASK
|
||||||
#define task_ssseg_close(task) task->ctrlMask &= ~_TASK_START_SS_MASK
|
#define task_ssseg_close(task) task->ctrlMask &= ~_TASK_START_SS_MASK
|
||||||
#define task_ssseg_check(task) ((task->ctrlMask & _TASK_START_SS_MASK) != 0)
|
#define task_ssseg_check(task) ((task->ctrlMask & _TASK_START_SS_MASK) != 0)
|
||||||
typedef struct {
|
typedef struct {
|
||||||
fstring text; //text to tokenize
|
fstring text; //text to tokenize
|
||||||
uint_t idx; //start offset index.
|
uint_t idx; //start offset index.
|
||||||
uint_t length; //length of the text.
|
uint_t length; //length of the text.
|
||||||
uint_t bytes; //latest word bytes in C.
|
uint_t bytes; //latest word bytes in C.
|
||||||
uint_t unicode; //latest word unicode number.
|
uint_t unicode; //latest word unicode number.
|
||||||
uint_t ctrlMask; //action control mask.
|
uint_t ctrlMask; //action control mask.
|
||||||
friso_link_t pool; //task pool.
|
friso_link_t pool; //task pool.
|
||||||
string_buffer_t sbuf; //string buffer.
|
string_buffer_t sbuf; //string buffer.
|
||||||
friso_token_t token; //token result token;
|
friso_token_t token; //token result token;
|
||||||
char buffer[7]; //word buffer. (1-6 bytes for an utf-8 word in C).
|
char buffer[7]; //word buffer. (1-6 bytes for an utf-8 word in C).
|
||||||
} friso_task_entry;
|
} friso_task_entry;
|
||||||
typedef friso_task_entry * friso_task_t;
|
typedef friso_task_entry * friso_task_t;
|
||||||
|
|
||||||
@ -151,23 +151,23 @@ typedef friso_task_entry * friso_task_t;
|
|||||||
//typedef friso_token_t ( * friso_next_hit_fn ) ( friso_t, void *, friso_task_t );
|
//typedef friso_token_t ( * friso_next_hit_fn ) ( friso_t, void *, friso_task_t );
|
||||||
//typedef lex_entry_t ( * friso_next_lex_fn ) ( friso_t, void *, friso_task_t );
|
//typedef lex_entry_t ( * friso_next_lex_fn ) ( friso_t, void *, friso_task_t );
|
||||||
struct friso_config_struct {
|
struct friso_config_struct {
|
||||||
ushort_t max_len; //the max match length (4 - 7).
|
ushort_t max_len; //the max match length (4 - 7).
|
||||||
ushort_t r_name; //1 for open chinese name recognition 0 for close it.
|
ushort_t r_name; //1 for open chinese name recognition 0 for close it.
|
||||||
ushort_t mix_len; //the max length for the CJK words in a mix string.
|
ushort_t mix_len; //the max length for the CJK words in a mix string.
|
||||||
ushort_t lna_len; //the max length for the chinese last name adron.
|
ushort_t lna_len; //the max length for the chinese last name adron.
|
||||||
ushort_t add_syn; //append synonyms tokenizer words.
|
ushort_t add_syn; //append synonyms tokenizer words.
|
||||||
ushort_t clr_stw; //clear the stopwords.
|
ushort_t clr_stw; //clear the stopwords.
|
||||||
ushort_t keep_urec; //keep the unrecongnized words.
|
ushort_t keep_urec; //keep the unrecongnized words.
|
||||||
ushort_t spx_out; //use sphinx output customize.
|
ushort_t spx_out; //use sphinx output customize.
|
||||||
ushort_t en_sseg; //start the secondary segmentation.
|
ushort_t en_sseg; //start the secondary segmentation.
|
||||||
ushort_t st_minl; //min length of the secondary segmentation token.
|
ushort_t st_minl; //min length of the secondary segmentation token.
|
||||||
uint_t nthreshold; //the threshold value for a char to make up a chinese name.
|
uint_t nthreshold; //the threshold value for a char to make up a chinese name.
|
||||||
friso_mode_t mode; //Complex mode or simple mode
|
friso_mode_t mode; //Complex mode or simple mode
|
||||||
|
|
||||||
//pointer to the function to get the next token
|
//pointer to the function to get the next token
|
||||||
friso_token_t (*next_token) (friso_t, struct friso_config_struct *, friso_task_t);
|
friso_token_t (*next_token) (friso_t, struct friso_config_struct *, friso_task_t);
|
||||||
//pointer to the function to get the next cjk lex_entry_t
|
//pointer to the function to get the next cjk lex_entry_t
|
||||||
lex_entry_t (*next_cjk ) (friso_t, struct friso_config_struct *, friso_task_t);
|
lex_entry_t (*next_cjk ) (friso_t, struct friso_config_struct *, friso_task_t);
|
||||||
|
|
||||||
char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer.
|
char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer.
|
||||||
};
|
};
|
||||||
@ -181,7 +181,7 @@ typedef friso_config_entry * friso_config_t;
|
|||||||
* Usage: vars = friso_new( void );
|
* Usage: vars = friso_new( void );
|
||||||
* --------------------------------
|
* --------------------------------
|
||||||
* This function used to create a new empty friso friso_t;
|
* This function used to create a new empty friso friso_t;
|
||||||
* with default value.
|
* with default value.
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_t friso_new( void );
|
FRISO_API friso_t friso_new( void );
|
||||||
|
|
||||||
@ -202,7 +202,7 @@ FRISO_API void friso_free( friso_t );
|
|||||||
* Usage: dic = friso_set_dic( vars, dic );
|
* Usage: dic = friso_set_dic( vars, dic );
|
||||||
* ----------------------------------------
|
* ----------------------------------------
|
||||||
* This function is used to set the dictionary for friso.
|
* This function is used to set the dictionary for friso.
|
||||||
* and firso_dic_t is the pointer of a hash table array.
|
* and firso_dic_t is the pointer of a hash table array.
|
||||||
*/
|
*/
|
||||||
//FRISO_API void friso_set_dic( friso_t, friso_dic_t );
|
//FRISO_API void friso_set_dic( friso_t, friso_dic_t );
|
||||||
#define friso_set_dic(friso, dic)\
|
#define friso_set_dic(friso, dic)\
|
||||||
@ -272,7 +272,7 @@ FRISO_API lex_entry_t next_complex_cjk( friso_t, friso_config_t, friso_task_t );
|
|||||||
* Usage: word = next_mmseg_token( vars, seg );
|
* Usage: word = next_mmseg_token( vars, seg );
|
||||||
* --------------------------------------
|
* --------------------------------------
|
||||||
* This function is used to get next word that friso segmented
|
* This function is used to get next word that friso segmented
|
||||||
* with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__
|
* with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_token_t next_mmseg_token( friso_t, friso_config_t, friso_task_t );
|
FRISO_API friso_token_t next_mmseg_token( friso_t, friso_config_t, friso_task_t );
|
||||||
|
|
||||||
@ -313,14 +313,14 @@ FRISO_API void free_lex_entry( lex_entry_t );
|
|||||||
* Usage: friso_dic_load( friso, friso_lex_t, path, length );
|
* Usage: friso_dic_load( friso, friso_lex_t, path, length );
|
||||||
* --------------------------------------------------
|
* --------------------------------------------------
|
||||||
* This function is used to load dictionary from a given path.
|
* This function is used to load dictionary from a given path.
|
||||||
* no length limit when length less than 0.
|
* no length limit when length less than 0.
|
||||||
*/
|
*/
|
||||||
FRISO_API void friso_dic_load( friso_t, friso_config_t,
|
FRISO_API void friso_dic_load( friso_t, friso_config_t,
|
||||||
friso_lex_t, fstring, uint_t );
|
friso_lex_t, fstring, uint_t );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* load the lexicon configuration file.
|
* load the lexicon configuration file.
|
||||||
* and load all the valid lexicon from the conf file.
|
* and load all the valid lexicon from the conf file.
|
||||||
*/
|
*/
|
||||||
FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint_t );
|
FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint_t );
|
||||||
|
|
||||||
|
@ -16,22 +16,22 @@
|
|||||||
|
|
||||||
//yat, just take it as this way, 99 percent you will find no problem
|
//yat, just take it as this way, 99 percent you will find no problem
|
||||||
#if ( defined(_WIN32) || defined(_WINDOWS_) || defined(__WINDOWS_) )
|
#if ( defined(_WIN32) || defined(_WINDOWS_) || defined(__WINDOWS_) )
|
||||||
# define FRISO_WINNT
|
# define FRISO_WINNT
|
||||||
#else
|
#else
|
||||||
# define FRISO_LINUX
|
# define FRISO_LINUX
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef FRISO_WINNT
|
#ifdef FRISO_WINNT
|
||||||
# define FRISO_API extern __declspec(dllexport)
|
# define FRISO_API extern __declspec(dllexport)
|
||||||
# define __STATIC_API__ static
|
# define __STATIC_API__ static
|
||||||
#else
|
#else
|
||||||
/*platform shared library statement :: unix*/
|
/*platform shared library statement :: unix*/
|
||||||
# define FRISO_API extern
|
# define FRISO_API extern
|
||||||
# define __STATIC_API__ static inline
|
# define __STATIC_API__ static inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ___ALLOCATION_ERROR___ \
|
#define ___ALLOCATION_ERROR___ \
|
||||||
printf("Unable to do the memory allocation, program will now exit\n" ); \
|
printf("Unable to do the memory allocation, program will now exit\n" ); \
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
||||||
#define print(str) printf("%s", str )
|
#define print(str) printf("%s", str )
|
||||||
@ -39,12 +39,12 @@ exit(1);
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* memory allocation macro definition.
|
* memory allocation macro definition.
|
||||||
* cause we should use emalloc,ecalloc .ege. in php.
|
* cause we should use emalloc,ecalloc .ege. in php.
|
||||||
* so you could make it better apdat the php environment.
|
* so you could make it better apdat the php environment.
|
||||||
*/
|
*/
|
||||||
#define FRISO_CALLOC(_bytes, _blocks) calloc(_bytes, _blocks)
|
#define FRISO_CALLOC(_bytes, _blocks) calloc(_bytes, _blocks)
|
||||||
#define FRISO_MALLOC(_bytes) malloc(_bytes)
|
#define FRISO_MALLOC(_bytes) malloc(_bytes)
|
||||||
#define FRISO_FREE( _ptr ) free( _ptr )
|
#define FRISO_FREE( _ptr ) free( _ptr )
|
||||||
|
|
||||||
typedef unsigned short ushort_t;
|
typedef unsigned short ushort_t;
|
||||||
typedef unsigned char uchar_t;
|
typedef unsigned char uchar_t;
|
||||||
@ -74,7 +74,7 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str );
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* this function will copy the chars that the fstring pointed.
|
* this function will copy the chars that the fstring pointed.
|
||||||
* to the buffer.
|
* to the buffer.
|
||||||
* this may cause the resize action of the buffer.
|
* this may cause the resize action of the buffer.
|
||||||
*/
|
*/
|
||||||
FRISO_API void string_buffer_append( string_buffer_t, fstring );
|
FRISO_API void string_buffer_append( string_buffer_t, fstring );
|
||||||
@ -88,21 +88,21 @@ FRISO_API fstring string_buffer_remove( string_buffer_t, uint_t idx, uint_t );
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* turn the string_buffer to a string.
|
* turn the string_buffer to a string.
|
||||||
* or return the buffer of the string_buffer.
|
* or return the buffer of the string_buffer.
|
||||||
*/
|
*/
|
||||||
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t );
|
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* free the given fstring buffer.
|
* free the given fstring buffer.
|
||||||
* and this function will not free the allocations of the
|
* and this function will not free the allocations of the
|
||||||
* the string_buffer_t->buffer, we return it to you, if there is
|
* the string_buffer_t->buffer, we return it to you, if there is
|
||||||
* a necessary you could free it youself by calling free();
|
* a necessary you could free it youself by calling free();
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring string_buffer_devote( string_buffer_t );
|
FRISO_API fstring string_buffer_devote( string_buffer_t );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* clear the given fstring buffer.
|
* clear the given fstring buffer.
|
||||||
* reset its buffer with 0 and reset its length to 0.
|
* reset its buffer with 0 and reset its length to 0.
|
||||||
*/
|
*/
|
||||||
FRISO_API void string_buffer_clear( string_buffer_t );
|
FRISO_API void string_buffer_clear( string_buffer_t );
|
||||||
|
|
||||||
@ -126,8 +126,8 @@ typedef string_split_entry * string_split_t;
|
|||||||
/**
|
/**
|
||||||
* create a new string_split_entry.
|
* create a new string_split_entry.
|
||||||
*
|
*
|
||||||
* @param source
|
* @param source
|
||||||
* @return string_split_t;
|
* @return string_split_t;
|
||||||
*/
|
*/
|
||||||
FRISO_API string_split_t new_string_split( fstring, fstring );
|
FRISO_API string_split_t new_string_split( fstring, fstring );
|
||||||
|
|
||||||
@ -141,12 +141,12 @@ FRISO_API void free_string_split( string_split_t );
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* get the next split fstring, and copy the
|
* get the next split fstring, and copy the
|
||||||
* splited fstring into the __dst buffer .
|
* splited fstring into the __dst buffer .
|
||||||
*
|
*
|
||||||
* @param string_split_t
|
* @param string_split_t
|
||||||
* @param __dst
|
* @param __dst
|
||||||
* @return fstring (NULL if reach the end of the source
|
* @return fstring (NULL if reach the end of the source
|
||||||
* or there is no more segmentation)
|
* or there is no more segmentation)
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring string_split_next( string_split_t, fstring );
|
FRISO_API fstring string_split_next( string_split_t, fstring );
|
||||||
/* }}} */
|
/* }}} */
|
||||||
@ -175,7 +175,7 @@ FRISO_API friso_array_t new_array_list_with_opacity( uint_t );
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* free the given friso array.
|
* free the given friso array.
|
||||||
* and its items, but never where the items's item to pointed to .
|
* and its items, but never where the items's item to pointed to .
|
||||||
*/
|
*/
|
||||||
FRISO_API void free_array_list( friso_array_t );
|
FRISO_API void free_array_list( friso_array_t );
|
||||||
|
|
||||||
@ -190,13 +190,13 @@ FRISO_API void *array_list_get( friso_array_t, uint_t );
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* set the item at a specified position.
|
* set the item at a specified position.
|
||||||
* this will return the old value.
|
* this will return the old value.
|
||||||
*/
|
*/
|
||||||
FRISO_API void *array_list_set( friso_array_t, uint_t, void * );
|
FRISO_API void *array_list_set( friso_array_t, uint_t, void * );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* remove the given item at a specified position.
|
* remove the given item at a specified position.
|
||||||
* this will return the value of the removed item.
|
* this will return the value of the removed item.
|
||||||
*/
|
*/
|
||||||
FRISO_API void *array_list_remove( friso_array_t, uint_t );
|
FRISO_API void *array_list_remove( friso_array_t, uint_t );
|
||||||
|
|
||||||
@ -205,9 +205,9 @@ FRISO_API friso_array_t array_list_trim( friso_array_t );
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* clear the array list.
|
* clear the array list.
|
||||||
* this function will free all the allocations that the pointer pointed.
|
* this function will free all the allocations that the pointer pointed.
|
||||||
* but will not free the point array allocations,
|
* but will not free the point array allocations,
|
||||||
* and will reset the length of it.
|
* and will reset the length of it.
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_array_t array_list_clear( friso_array_t );
|
FRISO_API friso_array_t array_list_clear( friso_array_t );
|
||||||
|
|
||||||
@ -300,8 +300,8 @@ FRISO_API void link_list_add_first( friso_link_t, void * );
|
|||||||
|
|
||||||
/* {{{ hashtable interface define :: start*/
|
/* {{{ hashtable interface define :: start*/
|
||||||
struct hash_entry {
|
struct hash_entry {
|
||||||
fstring _key; //the node key
|
fstring _key; //the node key
|
||||||
void * _val; //the node value
|
void * _val; //the node value
|
||||||
struct hash_entry * _next;
|
struct hash_entry * _next;
|
||||||
};
|
};
|
||||||
typedef struct hash_entry friso_hash_entry;
|
typedef struct hash_entry friso_hash_entry;
|
||||||
@ -319,8 +319,8 @@ typedef struct {
|
|||||||
typedef friso_hash_cdt * friso_hash_t;
|
typedef friso_hash_cdt * friso_hash_t;
|
||||||
|
|
||||||
//default value for friso_hash_cdt
|
//default value for friso_hash_cdt
|
||||||
#define DEFAULT_LENGTH 31
|
#define DEFAULT_LENGTH 31
|
||||||
#define DEFAULT_FACTOR 0.85f
|
#define DEFAULT_FACTOR 0.85f
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Function: new_hash_table
|
* Function: new_hash_table
|
||||||
@ -359,7 +359,7 @@ FRISO_API int hash_exist_mapping( friso_hash_t, fstring );
|
|||||||
* Usage: value = get_mapping_value( table, key );
|
* Usage: value = get_mapping_value( table, key );
|
||||||
* -----------------------------------------------
|
* -----------------------------------------------
|
||||||
* this function return the value associated with the given key.
|
* this function return the value associated with the given key.
|
||||||
* UNDEFINED will be return if the mapping is not exists.
|
* UNDEFINED will be return if the mapping is not exists.
|
||||||
*/
|
*/
|
||||||
FRISO_API void * hash_get_value( friso_hash_t, fstring );
|
FRISO_API void * hash_get_value( friso_hash_t, fstring );
|
||||||
|
|
||||||
|
192
src/friso_GBK.c
192
src/friso_GBK.c
@ -1,6 +1,6 @@
|
|||||||
/**
|
/**
|
||||||
* Friso GBK about function implements source file.
|
* Friso GBK about function implements source file.
|
||||||
* @package src/friso_GBK.c .
|
* @package src/friso_GBK.c .
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
@ -12,12 +12,12 @@
|
|||||||
|
|
||||||
/* read the next GBK word from the specified position.
|
/* read the next GBK word from the specified position.
|
||||||
*
|
*
|
||||||
* @return int the bytes of the current readed word.
|
* @return int the bytes of the current readed word.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_next_word(
|
FRISO_API int gbk_next_word(
|
||||||
friso_task_t task,
|
friso_task_t task,
|
||||||
uint_t *idx,
|
uint_t *idx,
|
||||||
fstring __word )
|
fstring __word )
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
if ( *idx >= task->length ) return 0;
|
if ( *idx >= task->length ) return 0;
|
||||||
@ -41,26 +41,26 @@ FRISO_API int gbk_next_word(
|
|||||||
//}
|
//}
|
||||||
|
|
||||||
//check if the given buffer is a gbk word (ANSII string).
|
//check if the given buffer is a gbk word (ANSII string).
|
||||||
// included the simplified and traditional words.
|
// included the simplified and traditional words.
|
||||||
FRISO_API int gbk_cn_string( char *str )
|
FRISO_API int gbk_cn_string( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
//GBK/2: gb2312 chinese word.
|
//GBK/2: gb2312 chinese word.
|
||||||
return ( ((c1 >= 0xb0 && c1 <= 0xf7)
|
return ( ((c1 >= 0xb0 && c1 <= 0xf7)
|
||||||
&& (c2 >= 0xa1 && c2 <= 0xfe))
|
&& (c2 >= 0xa1 && c2 <= 0xfe))
|
||||||
//GBK/3: extend chinese words.
|
//GBK/3: extend chinese words.
|
||||||
|| ((c1 >= 0x81 && c1 <= 0xa0)
|
|| ((c1 >= 0x81 && c1 <= 0xa0)
|
||||||
&& ( (c2 >= 0x40 && c2 <= 0x7e)
|
&& ( (c2 >= 0x40 && c2 <= 0x7e)
|
||||||
|| (c2 >= 0x80 && c2 <= 0xfe) ))
|
|| (c2 >= 0x80 && c2 <= 0xfe) ))
|
||||||
//GBK/4: extend chinese words.
|
//GBK/4: extend chinese words.
|
||||||
|| ((c1 >= 0xaa && c1 <= 0xfe)
|
|| ((c1 >= 0xaa && c1 <= 0xfe)
|
||||||
&& ( (c2 >= 0x40 && c2 <= 0xfe)
|
&& ( (c2 >= 0x40 && c2 <= 0xfe)
|
||||||
|| (c2 >= 0x80 && c2 <= 0xa0) )) );
|
|| (c2 >= 0x80 && c2 <= 0xa0) )) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*check if the given char is a ASCII letter
|
/*check if the given char is a ASCII letter
|
||||||
* include all the arabic number, letters and english puntuations.*/
|
* include all the arabic number, letters and english puntuations.*/
|
||||||
FRISO_API int gbk_halfwidth_en_char( char c )
|
FRISO_API int gbk_halfwidth_en_char( char c )
|
||||||
{
|
{
|
||||||
int u = (uchar_t) c;
|
int u = (uchar_t) c;
|
||||||
@ -69,58 +69,58 @@ FRISO_API int gbk_halfwidth_en_char( char c )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given char is a full-width latain.
|
* check if the given char is a full-width latain.
|
||||||
* include the full-width arabic numeber, letters.
|
* include the full-width arabic numeber, letters.
|
||||||
* but not the full-width puntuations.
|
* but not the full-width puntuations.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_fullwidth_en_char( char *str )
|
FRISO_API int gbk_fullwidth_en_char( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
return ( (c1 == 0xA3)
|
return ( (c1 == 0xA3)
|
||||||
&& ( (c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers.
|
&& ( (c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers.
|
||||||
|| ( c2 >= 0xC1 && c2 <= 0xDA ) //uppercase letters.
|
|| ( c2 >= 0xC1 && c2 <= 0xDA ) //uppercase letters.
|
||||||
|| ( c2 >= 0xE1 && c2 <= 0xFA) ) ); //lowercase letters.
|
|| ( c2 >= 0xE1 && c2 <= 0xFA) ) ); //lowercase letters.
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the given char is a upper case english letter.
|
//check if the given char is a upper case english letter.
|
||||||
// included the full-width and half-width letters.
|
// included the full-width and half-width letters.
|
||||||
FRISO_API int gbk_uppercase_letter( char *str )
|
FRISO_API int gbk_uppercase_letter( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
return ( c1 >= 65 && c1 <= 90 );
|
return ( c1 >= 65 && c1 <= 90 );
|
||||||
else //full-width
|
else //full-width
|
||||||
return ( c1 == 0xa3 && ( c2 >= 0xc1 && c2 <= 0xda ) );
|
return ( c1 == 0xa3 && ( c2 >= 0xc1 && c2 <= 0xda ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the given char is a lower case char.
|
//check if the given char is a lower case char.
|
||||||
// included the full-width and half-width letters.
|
// included the full-width and half-width letters.
|
||||||
FRISO_API int gbk_lowercase_letter( char *str )
|
FRISO_API int gbk_lowercase_letter( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
return ( c1 >= 97 && c1 <= 122 );
|
return ( c1 >= 97 && c1 <= 122 );
|
||||||
else //full-width
|
else //full-width
|
||||||
return ( c1 == 0xa3 && ( c2 >= 0xe1 && c2 <= 0xfa ) );
|
return ( c1 == 0xa3 && ( c2 >= 0xe1 && c2 <= 0xfa ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the given char is a arabic numeric.
|
//check if the given char is a arabic numeric.
|
||||||
// included the full-width and half-width arabic numeric.
|
// included the full-width and half-width arabic numeric.
|
||||||
FRISO_API int gbk_numeric_letter( char *str )
|
FRISO_API int gbk_numeric_letter( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
return ( c1 >= 48 && c1 <= 57 );
|
return ( c1 >= 48 && c1 <= 57 );
|
||||||
else //full-width
|
else //full-width
|
||||||
return ( ( c1 == 0xa3 ) && ( c2 >= 0xb0 && c2 <= 0xb9 ) );
|
return ( ( c1 == 0xa3 ) && ( c2 >= 0xb0 && c2 <= 0xb9 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given fstring is make up with numeric chars.
|
* check if the given fstring is make up with numeric chars.
|
||||||
* both full-width,half-width numeric is ok.
|
* both full-width,half-width numeric is ok.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_numeric_string( char *str )
|
FRISO_API int gbk_numeric_string( char *str )
|
||||||
{
|
{
|
||||||
@ -130,17 +130,17 @@ FRISO_API int gbk_numeric_string( char *str )
|
|||||||
|
|
||||||
while ( *s != '\0' )
|
while ( *s != '\0' )
|
||||||
{
|
{
|
||||||
c1 = (uchar_t) (*s++);
|
c1 = (uchar_t) (*s++);
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
{
|
{
|
||||||
if ( c1 < 48 || c2 > 57 ) return 0;
|
if ( c1 < 48 || c2 > 57 ) return 0;
|
||||||
}
|
}
|
||||||
else //full-width
|
else //full-width
|
||||||
{
|
{
|
||||||
if ( c1 != 0xa3 ) return 0;
|
if ( c1 != 0xa3 ) return 0;
|
||||||
c2 = (uchar_t) (*s++);
|
c2 = (uchar_t) (*s++);
|
||||||
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
|
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -157,47 +157,47 @@ FRISO_API int gbk_decimal_string( char *str )
|
|||||||
|
|
||||||
for ( i = 0; i < len; )
|
for ( i = 0; i < len; )
|
||||||
{
|
{
|
||||||
c1 = (uchar_t) str[i++];
|
c1 = (uchar_t) str[i++];
|
||||||
//count the number of the points.
|
//count the number of the points.
|
||||||
if ( c1 == 46 )
|
if ( c1 == 46 )
|
||||||
{
|
{
|
||||||
p++;
|
p++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
{
|
{
|
||||||
if ( c1 < 48 || c1 > 57 ) return 0;
|
if ( c1 < 48 || c1 > 57 ) return 0;
|
||||||
}
|
}
|
||||||
else //full-width
|
else //full-width
|
||||||
{
|
{
|
||||||
if ( c1 != 0xa3 ) return 0;
|
if ( c1 != 0xa3 ) return 0;
|
||||||
c2 = (uchar_t) str[i++];
|
c2 = (uchar_t) str[i++];
|
||||||
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
|
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (p == 1);
|
return (p == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the given char is a english(ASCII) letter.
|
//check if the given char is a english(ASCII) letter.
|
||||||
// (full-width and half-width), not the punctuation/arabic of course.
|
// (full-width and half-width), not the punctuation/arabic of course.
|
||||||
FRISO_API int gbk_en_letter( char *str )
|
FRISO_API int gbk_en_letter( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
if ( c1 <= 0x80 ) //half-width
|
if ( c1 <= 0x80 ) //half-width
|
||||||
return ( (c1 >= 65 && c1 <= 90) //lowercase
|
return ( (c1 >= 65 && c1 <= 90) //lowercase
|
||||||
|| (c1 >= 97 && c1 <= 122)); //uppercase
|
|| (c1 >= 97 && c1 <= 122)); //uppercase
|
||||||
else
|
else
|
||||||
return ( (c1 == 0xa3)
|
return ( (c1 == 0xa3)
|
||||||
&& ( ( c2 >= 0xc1 && c2 <= 0xda ) //lowercase
|
&& ( ( c2 >= 0xc1 && c2 <= 0xda ) //lowercase
|
||||||
|| ( c2 >= 0xe1 && c2 <= 0xfa ) ) ); //uppercase
|
|| ( c2 >= 0xe1 && c2 <= 0xfa ) ) ); //uppercase
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a whitespace or not.
|
//check the given char is a whitespace or not.
|
||||||
// included full-width and half-width whitespace.
|
// included full-width and half-width whitespace.
|
||||||
FRISO_API int gbk_whitespace( char *str )
|
FRISO_API int gbk_whitespace( char *str )
|
||||||
{
|
{
|
||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
@ -213,8 +213,8 @@ FRISO_API int gbk_letter_number( char *str )
|
|||||||
int c1 = (uchar_t) str[0];
|
int c1 = (uchar_t) str[0];
|
||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
return ( (c1 == 0xa2)
|
return ( (c1 == 0xa2)
|
||||||
&& ( ( c2 >= 0xa1 && c2 <= 0xb0 ) //lowercase
|
&& ( ( c2 >= 0xa1 && c2 <= 0xb0 ) //lowercase
|
||||||
|| ( c2 >= 0xf0 && c2 <= 0xfe ) ) ); //uppercase
|
|| ( c2 >= 0xf0 && c2 <= 0xfe ) ) ); //uppercase
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -232,9 +232,9 @@ FRISO_API int gbk_en_punctuation( char c )
|
|||||||
{
|
{
|
||||||
int u = (uchar_t) c;
|
int u = (uchar_t) c;
|
||||||
return ( (u > 32 && u < 48)
|
return ( (u > 32 && u < 48)
|
||||||
|| ( u > 57 && u < 65 )
|
|| ( u > 57 && u < 65 )
|
||||||
|| ( u > 90 && u < 97 )
|
|| ( u > 90 && u < 97 )
|
||||||
|| ( u > 122 && u < 127 ) );
|
|| ( u > 122 && u < 127 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a chinese punctuation.
|
//check the given char is a chinese punctuation.
|
||||||
@ -244,16 +244,16 @@ FRISO_API int gbk_cn_punctuation( char *str )
|
|||||||
int c2 = (uchar_t) str[1];
|
int c2 = (uchar_t) str[1];
|
||||||
//full-width en punctuation.
|
//full-width en punctuation.
|
||||||
return ( (c1 == 0xa3 && (( c2 >= 0xa1 && c2 <= 0xaf )
|
return ( (c1 == 0xa3 && (( c2 >= 0xa1 && c2 <= 0xaf )
|
||||||
|| ( c2 >= 0xba && c2 <= 0xc0 )
|
|| ( c2 >= 0xba && c2 <= 0xc0 )
|
||||||
|| ( c2 >= 0xdb && c2 <= 0xe0 )
|
|| ( c2 >= 0xdb && c2 <= 0xe0 )
|
||||||
|| ( c2 >= 0xfb && c2 <= 0xfe ) ))
|
|| ( c2 >= 0xfb && c2 <= 0xfe ) ))
|
||||||
//chinese punctuation.
|
//chinese punctuation.
|
||||||
|| (c1 == 0xa1 && ( (c2 >= 0xa1 && c2 <= 0xae)
|
|| (c1 == 0xa1 && ( (c2 >= 0xa1 && c2 <= 0xae)
|
||||||
|| ( c2 >= 0xb0 && c2 <= 0xbf ) ))
|
|| ( c2 >= 0xb0 && c2 <= 0xbf ) ))
|
||||||
//A6 area special punctuations:" "
|
//A6 area special punctuations:" "
|
||||||
|| (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
|
|| (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
|
||||||
//A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
|
//A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
|
||||||
|| (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)) );
|
|| (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* {{{
|
/* {{{
|
||||||
@ -269,19 +269,19 @@ FRISO_API int gbk_cn_punctuation( char *str )
|
|||||||
//FRISO_API int gbk_keep_punctuation( char *str )
|
//FRISO_API int gbk_keep_punctuation( char *str )
|
||||||
//{
|
//{
|
||||||
// if ( __keep_punctuations_hash__ == NULL ) {
|
// if ( __keep_punctuations_hash__ == NULL ) {
|
||||||
// __keep_punctuations_hash__ = new_hash_table();
|
// __keep_punctuations_hash__ = new_hash_table();
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
|
||||||
// }
|
// }
|
||||||
// //check the hash.
|
// //check the hash.
|
||||||
// return hash_exist_mapping( __keep_punctuations_hash__, str );
|
// return hash_exist_mapping( __keep_punctuations_hash__, str );
|
||||||
|
334
src/friso_UTF8.c
334
src/friso_UTF8.c
@ -1,6 +1,6 @@
|
|||||||
/**
|
/**
|
||||||
* Friso utf8 about function implements source file.
|
* Friso utf8 about function implements source file.
|
||||||
* @package src/friso_UTF8.c .
|
* @package src/friso_UTF8.c .
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
@ -12,12 +12,12 @@
|
|||||||
|
|
||||||
/* read the next utf-8 word from the specified position.
|
/* read the next utf-8 word from the specified position.
|
||||||
*
|
*
|
||||||
* @return int the bytes of the current readed word.
|
* @return int the bytes of the current readed word.
|
||||||
*/
|
*/
|
||||||
FRISO_API int utf8_next_word(
|
FRISO_API int utf8_next_word(
|
||||||
friso_task_t task,
|
friso_task_t task,
|
||||||
uint_t *idx,
|
uint_t *idx,
|
||||||
fstring __word )
|
fstring __word )
|
||||||
{
|
{
|
||||||
if ( *idx >= task->length ) return 0;
|
if ( *idx >= task->length ) return 0;
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ FRISO_API int utf8_next_word(
|
|||||||
task->bytes = get_utf8_bytes( task->text[ *idx ] );
|
task->bytes = get_utf8_bytes( task->text[ *idx ] );
|
||||||
|
|
||||||
//for ( t = 0; t < task->bytes; t++ ) {
|
//for ( t = 0; t < task->bytes; t++ ) {
|
||||||
// __word[t] = task->text[ (*idx)++ ];
|
// __word[t] = task->text[ (*idx)++ ];
|
||||||
//}
|
//}
|
||||||
|
|
||||||
//change the loop to memcpy.
|
//change the loop to memcpy.
|
||||||
@ -52,31 +52,31 @@ FRISO_API void print_char_binary( char value )
|
|||||||
|
|
||||||
for ( t = 0; t < __CHAR_BYTES__; t++ )
|
for ( t = 0; t < __CHAR_BYTES__; t++ )
|
||||||
{
|
{
|
||||||
if ( ( value & 0x80 ) == 0x80 ) {
|
if ( ( value & 0x80 ) == 0x80 ) {
|
||||||
printf("1");
|
printf("1");
|
||||||
} else {
|
} else {
|
||||||
printf("0");
|
printf("0");
|
||||||
}
|
}
|
||||||
value <<= 1;
|
value <<= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get the bytes of a utf-8 char.
|
* get the bytes of a utf-8 char.
|
||||||
* between 1 - 6.
|
* between 1 - 6.
|
||||||
*
|
*
|
||||||
* @param __char
|
* @param __char
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
FRISO_API int get_utf8_bytes( char value )
|
FRISO_API int get_utf8_bytes( char value )
|
||||||
{
|
{
|
||||||
register uint_t t = 0;
|
register uint_t t = 0;
|
||||||
|
|
||||||
//one byte ascii char.
|
//one byte ascii char.
|
||||||
if ( ( value & 0x80 ) == 0 ) return 1;
|
if ( ( value & 0x80 ) == 0 ) return 1;
|
||||||
|
|
||||||
for ( ; ( value & 0x80 ) != 0; value <<= 1 )
|
for ( ; ( value & 0x80 ) != 0; value <<= 1 )
|
||||||
t++;
|
t++;
|
||||||
|
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
@ -94,25 +94,25 @@ FRISO_API int get_utf8_unicode( const fstring ch )
|
|||||||
register char b1,b2,b3;
|
register char b1,b2,b3;
|
||||||
|
|
||||||
switch ( bytes ) {
|
switch ( bytes ) {
|
||||||
case 1:
|
case 1:
|
||||||
*bit = *ch;
|
*bit = *ch;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
b1 = *ch;
|
b1 = *ch;
|
||||||
b2 = *(ch + 1);
|
b2 = *(ch + 1);
|
||||||
|
|
||||||
*bit = (b1 << 6) + (b2 & 0x3F);
|
*bit = (b1 << 6) + (b2 & 0x3F);
|
||||||
*(bit+1) = (b1 >> 2) & 0x07;
|
*(bit+1) = (b1 >> 2) & 0x07;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
b1 = *ch;
|
b1 = *ch;
|
||||||
b2 = *(ch + 1);
|
b2 = *(ch + 1);
|
||||||
b3 = *(ch + 2);
|
b3 = *(ch + 2);
|
||||||
|
|
||||||
*bit = (b2 << 6) + (b3 & 0x3F);
|
*bit = (b2 << 6) + (b3 & 0x3F);
|
||||||
*(bit+1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
|
*(bit+1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
|
||||||
break;
|
break;
|
||||||
//ignore the ones that are larger than 3 bytes;
|
//ignore the ones that are larger than 3 bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
@ -122,50 +122,50 @@ FRISO_API int get_utf8_unicode( const fstring ch )
|
|||||||
FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
|
FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
|
||||||
{
|
{
|
||||||
if ( u <= 0x0000007F ) {
|
if ( u <= 0x0000007F ) {
|
||||||
//U-00000000 - U-0000007F
|
//U-00000000 - U-0000007F
|
||||||
//0xxxxxxx
|
//0xxxxxxx
|
||||||
*__word = ( u & 0x7F );
|
*__word = ( u & 0x7F );
|
||||||
return 1;
|
return 1;
|
||||||
} else if ( u >= 0x00000080 && u <= 0x000007FF ) {
|
} else if ( u >= 0x00000080 && u <= 0x000007FF ) {
|
||||||
//U-00000080 - U-000007FF
|
//U-00000080 - U-000007FF
|
||||||
//110xxxxx 10xxxxxx
|
//110xxxxx 10xxxxxx
|
||||||
*( __word + 1 ) = ( u & 0x3F) | 0x80;
|
*( __word + 1 ) = ( u & 0x3F) | 0x80;
|
||||||
*__word = ((u >> 6) & 0x1F) | 0xC0;
|
*__word = ((u >> 6) & 0x1F) | 0xC0;
|
||||||
return 2;
|
return 2;
|
||||||
} else if ( u >= 0x00000800 && u <= 0x0000FFFF ) {
|
} else if ( u >= 0x00000800 && u <= 0x0000FFFF ) {
|
||||||
//U-00000800 - U-0000FFFF
|
//U-00000800 - U-0000FFFF
|
||||||
//1110xxxx 10xxxxxx 10xxxxxx
|
//1110xxxx 10xxxxxx 10xxxxxx
|
||||||
*( __word + 2 ) = ( u & 0x3F) | 0x80;
|
*( __word + 2 ) = ( u & 0x3F) | 0x80;
|
||||||
*( __word + 1 ) = ((u >> 6) & 0x3F) | 0x80;
|
*( __word + 1 ) = ((u >> 6) & 0x3F) | 0x80;
|
||||||
*__word = ((u >> 12) & 0x0F) | 0xE0;
|
*__word = ((u >> 12) & 0x0F) | 0xE0;
|
||||||
return 3;
|
return 3;
|
||||||
} else if ( u >= 0x00010000 && u <= 0x001FFFFF ) {
|
} else if ( u >= 0x00010000 && u <= 0x001FFFFF ) {
|
||||||
//U-00010000 - U-001FFFFF
|
//U-00010000 - U-001FFFFF
|
||||||
//11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
//11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
*( __word + 3 ) = ( u & 0x3F) | 0x80;
|
*( __word + 3 ) = ( u & 0x3F) | 0x80;
|
||||||
*( __word + 2 ) = ((u >> 6) & 0x3F) | 0x80;
|
*( __word + 2 ) = ((u >> 6) & 0x3F) | 0x80;
|
||||||
*( __word + 1 ) = ((u >> 12) & 0x3F) | 0x80;
|
*( __word + 1 ) = ((u >> 12) & 0x3F) | 0x80;
|
||||||
*__word = ((u >> 18) & 0x07) | 0xF0;
|
*__word = ((u >> 18) & 0x07) | 0xF0;
|
||||||
return 4;
|
return 4;
|
||||||
} else if ( u >= 0x00200000 && u <= 0x03FFFFFF ) {
|
} else if ( u >= 0x00200000 && u <= 0x03FFFFFF ) {
|
||||||
//U-00200000 - U-03FFFFFF
|
//U-00200000 - U-03FFFFFF
|
||||||
//111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
//111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
*( __word + 4 ) = ( u & 0x3F) | 0x80;
|
*( __word + 4 ) = ( u & 0x3F) | 0x80;
|
||||||
*( __word + 3 ) = ((u >> 6) & 0x3F) | 0x80;
|
*( __word + 3 ) = ((u >> 6) & 0x3F) | 0x80;
|
||||||
*( __word + 2 ) = ((u >> 12) & 0x3F) | 0x80;
|
*( __word + 2 ) = ((u >> 12) & 0x3F) | 0x80;
|
||||||
*( __word + 1 ) = ((u >> 18) & 0x3F) | 0x80;
|
*( __word + 1 ) = ((u >> 18) & 0x3F) | 0x80;
|
||||||
*__word = ((u >> 24) & 0x03) | 0xF8;
|
*__word = ((u >> 24) & 0x03) | 0xF8;
|
||||||
return 5;
|
return 5;
|
||||||
} else if ( u >= 0x04000000 && u <= 0x7FFFFFFF ) {
|
} else if ( u >= 0x04000000 && u <= 0x7FFFFFFF ) {
|
||||||
//U-04000000 - U-7FFFFFFF
|
//U-04000000 - U-7FFFFFFF
|
||||||
//1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
//1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
*( __word + 5 ) = ( u & 0x3F) | 0x80;
|
*( __word + 5 ) = ( u & 0x3F) | 0x80;
|
||||||
*( __word + 4 ) = ((u >> 6) & 0x3F) | 0x80;
|
*( __word + 4 ) = ((u >> 6) & 0x3F) | 0x80;
|
||||||
*( __word + 3 ) = ((u >> 12) & 0x3F) | 0x80;
|
*( __word + 3 ) = ((u >> 12) & 0x3F) | 0x80;
|
||||||
*( __word + 2 ) = ((u >> 18) & 0x3F) | 0x80;
|
*( __word + 2 ) = ((u >> 18) & 0x3F) | 0x80;
|
||||||
*( __word + 1 ) = ((u >> 24) & 0x3F) | 0x80;
|
*( __word + 1 ) = ((u >> 24) & 0x3F) | 0x80;
|
||||||
*__word = ((u >> 30) & 0x01) | 0xFC;
|
*__word = ((u >> 30) & 0x01) | 0xFC;
|
||||||
return 6;
|
return 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -173,28 +173,28 @@ FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check the given char is a CJK char or not.
|
* check the given char is a CJK char or not.
|
||||||
* 2E80-2EFF CJK 部首补充
|
* 2E80-2EFF CJK 部首补充
|
||||||
* 2F00-2FDF 康熙字典部首
|
* 2F00-2FDF 康熙字典部首
|
||||||
* 3000-303F CJK 符号和标点 --ignore
|
* 3000-303F CJK 符号和标点 --ignore
|
||||||
* 31C0-31EF CJK 笔画
|
* 31C0-31EF CJK 笔画
|
||||||
* 3200-32FF 封闭式 CJK 文字和月份 --ignore.
|
* 3200-32FF 封闭式 CJK 文字和月份 --ignore.
|
||||||
* 3300-33FF CJK 兼容
|
* 3300-33FF CJK 兼容
|
||||||
* 3400-4DBF CJK 统一表意符号扩展 A
|
* 3400-4DBF CJK 统一表意符号扩展 A
|
||||||
* 4DC0-4DFF 易经六十四卦符号
|
* 4DC0-4DFF 易经六十四卦符号
|
||||||
* 4E00-9FBF CJK 统一表意符号
|
* 4E00-9FBF CJK 统一表意符号
|
||||||
* F900-FAFF CJK 兼容象形文字
|
* F900-FAFF CJK 兼容象形文字
|
||||||
* FE30-FE4F CJK 兼容形式
|
* FE30-FE4F CJK 兼容形式
|
||||||
* FF00-FFEF 全角ASCII、全角标点 --ignore (as basic latin)
|
* FF00-FFEF 全角ASCII、全角标点 --ignore (as basic latin)
|
||||||
*
|
*
|
||||||
* Japanese:
|
* Japanese:
|
||||||
* 3040-309F 日本平假名
|
* 3040-309F 日本平假名
|
||||||
* 30A0-30FF 日本片假名
|
* 30A0-30FF 日本片假名
|
||||||
* 31F0-31FF 日本片假名拼音扩展
|
* 31F0-31FF 日本片假名拼音扩展
|
||||||
*
|
*
|
||||||
* Korean:
|
* Korean:
|
||||||
* AC00-D7AF 韩文拼音
|
* AC00-D7AF 韩文拼音
|
||||||
* 1100-11FF 韩文字母
|
* 1100-11FF 韩文字母
|
||||||
* 3130-318F 韩文兼容字母
|
* 3130-318F 韩文兼容字母
|
||||||
*
|
*
|
||||||
* @param ch :pointer to the char
|
* @param ch :pointer to the char
|
||||||
* @return int : 1 for yes and 0 for not.
|
* @return int : 1 for yes and 0 for not.
|
||||||
@ -211,23 +211,23 @@ FRISO_API int utf8_cjk_string( uint_t u )
|
|||||||
//Chinese.
|
//Chinese.
|
||||||
#ifdef FRISO_CJK_CHK_C
|
#ifdef FRISO_CJK_CHK_C
|
||||||
c = ( ( u >= 0x4E00 && u <= 0x9FBF )
|
c = ( ( u >= 0x4E00 && u <= 0x9FBF )
|
||||||
|| ( u >= 0x2E80 && u <= 0x2EFF ) || ( u >= 0x2F00 && u <= 0x2FDF )
|
|| ( u >= 0x2E80 && u <= 0x2EFF ) || ( u >= 0x2F00 && u <= 0x2FDF )
|
||||||
|| ( u >= 0x31C0 && u <= 0x31EF ) //|| ( u >= 0x3200 && u <= 0x32FF )
|
|| ( u >= 0x31C0 && u <= 0x31EF ) //|| ( u >= 0x3200 && u <= 0x32FF )
|
||||||
|| ( u >= 0x3300 && u <= 0x33FF ) //|| ( u >= 0x3400 && u <= 0x4DBF )
|
|| ( u >= 0x3300 && u <= 0x33FF ) //|| ( u >= 0x3400 && u <= 0x4DBF )
|
||||||
|| ( u >= 0x4DC0 && u <= 0x4DFF ) || ( u >= 0xF900 && u <= 0xFAFF )
|
|| ( u >= 0x4DC0 && u <= 0x4DFF ) || ( u >= 0xF900 && u <= 0xFAFF )
|
||||||
|| ( u >= 0xFE30 && u <= 0xFE4F ) );
|
|| ( u >= 0xFE30 && u <= 0xFE4F ) );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//Japanese.
|
//Japanese.
|
||||||
#ifdef FRISO_CJK_CHK_J
|
#ifdef FRISO_CJK_CHK_J
|
||||||
j = ( ( u >= 0x3040 && u <= 0x309F )
|
j = ( ( u >= 0x3040 && u <= 0x309F )
|
||||||
|| ( u >= 0x30A0 && u <= 0x30FF ) || ( u >= 0x31F0 && u <= 0x31FF ) );
|
|| ( u >= 0x30A0 && u <= 0x30FF ) || ( u >= 0x31F0 && u <= 0x31FF ) );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//Korean
|
//Korean
|
||||||
#ifdef FRISO_CJK_CHK_K
|
#ifdef FRISO_CJK_CHK_K
|
||||||
k = ( ( u >= 0xAC00 && u <= 0xD7AF )
|
k = ( ( u >= 0xAC00 && u <= 0xD7AF )
|
||||||
|| ( u >= 0x1100 && u <= 0x11FF ) || ( u >= 0x3130 && u <= 0x318F ) );
|
|| ( u >= 0x1100 && u <= 0x11FF ) || ( u >= 0x3130 && u <= 0x318F ) );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return ( c || j || k );
|
return ( c || j || k );
|
||||||
@ -235,7 +235,7 @@ FRISO_API int utf8_cjk_string( uint_t u )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check the given char is a Basic Latin letter or not.
|
* check the given char is a Basic Latin letter or not.
|
||||||
* include all the letters and english punctuations.
|
* include all the letters and english punctuations.
|
||||||
*
|
*
|
||||||
* @param c
|
* @param c
|
||||||
* @return int 1 for yes and 0 for not.
|
* @return int 1 for yes and 0 for not.
|
||||||
@ -247,21 +247,21 @@ FRISO_API int utf8_halfwidth_en_char( uint_t u )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check the given char is a full-width latain or not.
|
* check the given char is a full-width latain or not.
|
||||||
* include the full-width arabic numeber, letters.
|
* include the full-width arabic numeber, letters.
|
||||||
* but not the full-width punctuations.
|
* but not the full-width punctuations.
|
||||||
*
|
*
|
||||||
* @param c
|
* @param c
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
FRISO_API int utf8_fullwidth_en_char( uint_t u )
|
FRISO_API int utf8_fullwidth_en_char( uint_t u )
|
||||||
{
|
{
|
||||||
return ( (u >= 65296 && u <= 65305 ) //arabic number
|
return ( (u >= 65296 && u <= 65305 ) //arabic number
|
||||||
|| ( u >= 65313 && u <= 65338 ) //upper case letters
|
|| ( u >= 65313 && u <= 65338 ) //upper case letters
|
||||||
|| ( u >= 65345 && u <= 65370 ) ); //lower case letters
|
|| ( u >= 65345 && u <= 65370 ) ); //lower case letters
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a upper case letters or not.
|
//check the given char is a upper case letters or not.
|
||||||
// included the full-width and half-width letters.
|
// included the full-width and half-width letters.
|
||||||
FRISO_API int utf8_uppercase_letter( uint_t u )
|
FRISO_API int utf8_uppercase_letter( uint_t u )
|
||||||
{
|
{
|
||||||
if ( u > 65280 ) u -= 65248;
|
if ( u > 65280 ) u -= 65248;
|
||||||
@ -269,7 +269,7 @@ FRISO_API int utf8_uppercase_letter( uint_t u )
|
|||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a upper case letters or not.
|
//check the given char is a upper case letters or not.
|
||||||
// included the full-width and half-width letters.
|
// included the full-width and half-width letters.
|
||||||
FRISO_API int utf8_lowercase_letter( uint_t u )
|
FRISO_API int utf8_lowercase_letter( uint_t u )
|
||||||
{
|
{
|
||||||
if ( u > 65280 ) u -= 65248;
|
if ( u > 65280 ) u -= 65248;
|
||||||
@ -277,25 +277,25 @@ FRISO_API int utf8_lowercase_letter( uint_t u )
|
|||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a numeric
|
//check the given char is a numeric
|
||||||
// included the full-width and half-width arabic numeric.
|
// included the full-width and half-width arabic numeric.
|
||||||
FRISO_API int utf8_numeric_letter( uint_t u )
|
FRISO_API int utf8_numeric_letter( uint_t u )
|
||||||
{
|
{
|
||||||
if ( u > 65280 ) u -= 65248; //make full-width half-width.
|
if ( u > 65280 ) u -= 65248; //make full-width half-width.
|
||||||
return ( ( u >= 48 && u <= 57 ) );
|
return ( ( u >= 48 && u <= 57 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the given char is a english letter.(included the full-width)
|
//check the given char is a english letter.(included the full-width)
|
||||||
// not the punctuation of course.
|
// not the punctuation of course.
|
||||||
FRISO_API int utf8_en_letter( uint_t u )
|
FRISO_API int utf8_en_letter( uint_t u )
|
||||||
{
|
{
|
||||||
if ( u > 65280 ) u -= 65248;
|
if ( u > 65280 ) u -= 65248;
|
||||||
return ( ( u >= 65 && u <= 90 )
|
return ( ( u >= 65 && u <= 90 )
|
||||||
|| ( u >= 97 && u <= 122 ) );
|
|| ( u >= 97 && u <= 122 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given fstring is make up with numeric.
|
* check if the given fstring is make up with numeric.
|
||||||
* both full-width,half-width numeric is ok.
|
* both full-width,half-width numeric is ok.
|
||||||
*
|
*
|
||||||
* @param str
|
* @param str
|
||||||
* @return int
|
* @return int
|
||||||
@ -317,22 +317,22 @@ FRISO_API int utf8_numeric_string( const fstring str )
|
|||||||
|
|
||||||
while ( *s != '\0' )
|
while ( *s != '\0' )
|
||||||
{
|
{
|
||||||
//if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) {
|
//if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) {
|
||||||
// return 0;
|
// return 0;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
//new implemention.
|
//new implemention.
|
||||||
//@date 2013-10-14
|
//@date 2013-10-14
|
||||||
bytes = 1;
|
bytes = 1;
|
||||||
if ( *s < 0 ) //full-width chars.
|
if ( *s < 0 ) //full-width chars.
|
||||||
{
|
{
|
||||||
u = get_utf8_unicode(s);
|
u = get_utf8_unicode(s);
|
||||||
bytes = get_utf8_bytes(*s);
|
bytes = get_utf8_bytes(*s);
|
||||||
if ( u < 65296 || u > 65305 ) return 0;
|
if ( u < 65296 || u > 65305 ) return 0;
|
||||||
}
|
}
|
||||||
else if ( *s < 48 || *s > 57 ) return 0;
|
else if ( *s < 48 || *s > 57 ) return 0;
|
||||||
|
|
||||||
s += bytes;
|
s += bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -347,24 +347,24 @@ FRISO_API int utf8_decimal_string( const fstring str )
|
|||||||
|
|
||||||
for ( i = 1; i < len; bytes = 1 )
|
for ( i = 1; i < len; bytes = 1 )
|
||||||
{
|
{
|
||||||
//count the number of char '.'
|
//count the number of char '.'
|
||||||
if ( str[i] == '.' )
|
if ( str[i] == '.' )
|
||||||
{
|
{
|
||||||
i++;
|
i++;
|
||||||
p++;
|
p++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//full-width numeric.
|
//full-width numeric.
|
||||||
else if ( str[i] < 0 )
|
else if ( str[i] < 0 )
|
||||||
{
|
{
|
||||||
u = get_utf8_unicode(str+i);
|
u = get_utf8_unicode(str+i);
|
||||||
bytes = get_utf8_bytes(str[i]);
|
bytes = get_utf8_bytes(str[i]);
|
||||||
if ( u < 65296 || u > 65305 ) return 0;
|
if ( u < 65296 || u > 65305 ) return 0;
|
||||||
}
|
}
|
||||||
else if ( str[i] < 48 || str[i] > 57 ) return 0;
|
else if ( str[i] < 48 || str[i] > 57 ) return 0;
|
||||||
|
|
||||||
i += bytes;
|
i += bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (p == 1);
|
return (p == 1);
|
||||||
@ -379,7 +379,7 @@ FRISO_API int utf8_decimal_string( const fstring str )
|
|||||||
FRISO_API int utf8_whitespace( uint_t u )
|
FRISO_API int utf8_whitespace( uint_t u )
|
||||||
{
|
{
|
||||||
if ( u == 32 || u == 12288 )
|
if ( u == 32 || u == 12288 )
|
||||||
return 1;
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -392,16 +392,16 @@ FRISO_API int utf8_whitespace( uint_t u )
|
|||||||
*/
|
*/
|
||||||
FRISO_API int utf8_en_punctuation( uint_t u )
|
FRISO_API int utf8_en_punctuation( uint_t u )
|
||||||
{
|
{
|
||||||
//if ( u > 65280 ) u = u - 65248; //make full-width half-width
|
//if ( u > 65280 ) u = u - 65248; //make full-width half-width
|
||||||
return ( (u > 32 && u < 48)
|
return ( (u > 32 && u < 48)
|
||||||
|| ( u > 57 && u < 65 )
|
|| ( u > 57 && u < 65 )
|
||||||
|| ( u > 90 && u < 97 ) //added @2013-08-31
|
|| ( u > 90 && u < 97 ) //added @2013-08-31
|
||||||
|| ( u > 122 && u < 127 ) );
|
|| ( u > 122 && u < 127 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check the given char is a chinese punctuation.
|
* check the given char is a chinese punctuation.
|
||||||
* @date 2013-08-31 added.
|
* @date 2013-08-31 added.
|
||||||
*
|
*
|
||||||
* @param ch
|
* @param ch
|
||||||
* @return int
|
* @return int
|
||||||
@ -409,17 +409,17 @@ FRISO_API int utf8_en_punctuation( uint_t u )
|
|||||||
FRISO_API int utf8_cn_punctuation( uint_t u )
|
FRISO_API int utf8_cn_punctuation( uint_t u )
|
||||||
{
|
{
|
||||||
return ( ( u > 65280 && u < 65296 )
|
return ( ( u > 65280 && u < 65296 )
|
||||||
|| ( u > 65305 && u < 65312 )
|
|| ( u > 65305 && u < 65312 )
|
||||||
|| ( u > 65338 && u < 65345 )
|
|| ( u > 65338 && u < 65345 )
|
||||||
|| ( u > 65370 && u < 65382 )
|
|| ( u > 65370 && u < 65382 )
|
||||||
//cjk symbol and punctuation.(added 2013-09-06)
|
//cjk symbol and punctuation.(added 2013-09-06)
|
||||||
//from http://www.unicode.org/charts/PDF/U3000.pdf
|
//from http://www.unicode.org/charts/PDF/U3000.pdf
|
||||||
|| ( u >= 12289 && u <= 12319) );
|
|| ( u >= 12289 && u <= 12319) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given char is a letter number in unicode.
|
* check if the given char is a letter number in unicode.
|
||||||
* like 'ⅠⅡ'.
|
* like 'ⅠⅡ'.
|
||||||
* @param ch
|
* @param ch
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
@ -430,7 +430,7 @@ FRISO_API int utf8_letter_number( uint_t u )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given char is a other number in unicode.
|
* check if the given char is a other number in unicode.
|
||||||
* like '①⑩⑽㈩'.
|
* like '①⑩⑽㈩'.
|
||||||
* @param ch
|
* @param ch
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
@ -456,19 +456,19 @@ FRISO_API int utf8_other_number( uint_t u )
|
|||||||
//{
|
//{
|
||||||
// if ( __keep_punctuations_hash__ == NULL )
|
// if ( __keep_punctuations_hash__ == NULL )
|
||||||
// {
|
// {
|
||||||
// __keep_punctuations_hash__ = new_hash_table();
|
// __keep_punctuations_hash__ = new_hash_table();
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
|
||||||
// //hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
|
// //hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
|
||||||
// //hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
|
// //hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
|
||||||
// //hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
|
// //hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
|
||||||
// //hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
|
// //hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
|
||||||
// //hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
|
// //hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
|
||||||
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
|
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
|
||||||
// }
|
// }
|
||||||
// //check the hash.
|
// //check the hash.
|
||||||
// return hash_exist_mapping( __keep_punctuations_hash__, str );
|
// return hash_exist_mapping( __keep_punctuations_hash__, str );
|
||||||
@ -484,7 +484,7 @@ FRISO_API int utf8_other_number( uint_t u )
|
|||||||
//FRISO_API int utf8_fullwidth_char( uint_t u )
|
//FRISO_API int utf8_fullwidth_char( uint_t u )
|
||||||
//{
|
//{
|
||||||
// if ( u == 12288 )
|
// if ( u == 12288 )
|
||||||
// return 1; //full-width space
|
// return 1; //full-width space
|
||||||
// //(32 - 126) ascii code
|
// //(32 - 126) ascii code
|
||||||
// return (u > 65280 && u <= 65406);
|
// return (u > 65280 && u <= 65406);
|
||||||
//}
|
//}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
/*
|
/*
|
||||||
* friso dynamaic interface implemented functions file
|
* friso dynamaic interface implemented functions file
|
||||||
* that defined in header file "friso_API.h".
|
* that defined in header file "friso_API.h".
|
||||||
* never use it for commercial use.
|
* never use it for commercial use.
|
||||||
*
|
*
|
||||||
* @author chenxini <chenxin619315@gmail.com>
|
* @author chenxini <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
@ -14,37 +14,37 @@
|
|||||||
**********************************************/
|
**********************************************/
|
||||||
__STATIC_API__ void **create_array_entries( uint_t __blocks )
|
__STATIC_API__ void **create_array_entries( uint_t __blocks )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
void **block = ( void ** ) FRISO_CALLOC( sizeof( void * ), __blocks );
|
void **block = ( void ** ) FRISO_CALLOC( sizeof( void * ), __blocks );
|
||||||
if ( block == NULL ) {
|
if ( block == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize
|
//initialize
|
||||||
for ( t = 0; t < __blocks; t++ ) {
|
for ( t = 0; t < __blocks; t++ ) {
|
||||||
block[t] = NULL;
|
block[t] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
|
|
||||||
//resize the array. (the opacity should not be smaller than array->length)
|
//resize the array. (the opacity should not be smaller than array->length)
|
||||||
__STATIC_API__ friso_array_t resize_array_list(
|
__STATIC_API__ friso_array_t resize_array_list(
|
||||||
friso_array_t array,
|
friso_array_t array,
|
||||||
uint_t opacity )
|
uint_t opacity )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
void **block = create_array_entries( opacity );
|
void **block = create_array_entries( opacity );
|
||||||
|
|
||||||
for ( t = 0; t < array->length ; t++ ) {
|
for ( t = 0; t < array->length ; t++ ) {
|
||||||
block[t] = array->items[t];
|
block[t] = array->items[t];
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_FREE( array->items );
|
FRISO_FREE( array->items );
|
||||||
array->items = block;
|
array->items = block;
|
||||||
array->allocs = opacity;
|
array->allocs = opacity;
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -59,154 +59,154 @@ __STATIC_API__ friso_array_t resize_array_list(
|
|||||||
//create a new array list with a given opacity.
|
//create a new array list with a given opacity.
|
||||||
FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity )
|
FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity )
|
||||||
{
|
{
|
||||||
friso_array_t array = ( friso_array_t )
|
friso_array_t array = ( friso_array_t )
|
||||||
FRISO_MALLOC( sizeof( friso_array_entry ) );
|
FRISO_MALLOC( sizeof( friso_array_entry ) );
|
||||||
if ( array == NULL ) {
|
if ( array == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize
|
//initialize
|
||||||
array->items = create_array_entries( opacity );
|
array->items = create_array_entries( opacity );
|
||||||
array->allocs = opacity;
|
array->allocs = opacity;
|
||||||
array->length = 0;
|
array->length = 0;
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* free the given friso array.
|
* free the given friso array.
|
||||||
* and its items, but never where its items item pointed to .
|
* and its items, but never where its items item pointed to .
|
||||||
*/
|
*/
|
||||||
FRISO_API void free_array_list( friso_array_t array )
|
FRISO_API void free_array_list( friso_array_t array )
|
||||||
{
|
{
|
||||||
//free the allocation that all the items pointed to
|
//free the allocation that all the items pointed to
|
||||||
//register int t;
|
//register int t;
|
||||||
//if ( flag == 1 ) {
|
//if ( flag == 1 ) {
|
||||||
// for ( t = 0; t < array->length; t++ ) {
|
// for ( t = 0; t < array->length; t++ ) {
|
||||||
// if ( array->items[t] == NULL ) continue;
|
// if ( array->items[t] == NULL ) continue;
|
||||||
// FRISO_FREE( array->items[t] );
|
// FRISO_FREE( array->items[t] );
|
||||||
// array->items[t] = NULL;
|
// array->items[t] = NULL;
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
FRISO_FREE( array->items );
|
FRISO_FREE( array->items );
|
||||||
FRISO_FREE( array );
|
FRISO_FREE( array );
|
||||||
}
|
}
|
||||||
|
|
||||||
//add a new item to the array.
|
//add a new item to the array.
|
||||||
FRISO_API void array_list_add( friso_array_t array, void *value )
|
FRISO_API void array_list_add( friso_array_t array, void *value )
|
||||||
{
|
{
|
||||||
//check the condition to resize.
|
//check the condition to resize.
|
||||||
if ( array->length == array->allocs ) {
|
if ( array->length == array->allocs ) {
|
||||||
resize_array_list( array, array->length * 2 + 1 );
|
resize_array_list( array, array->length * 2 + 1 );
|
||||||
}
|
}
|
||||||
array->items[array->length++] = value;
|
array->items[array->length++] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
//insert a new item at a specified position.
|
//insert a new item at a specified position.
|
||||||
FRISO_API void array_list_insert(
|
FRISO_API void array_list_insert(
|
||||||
friso_array_t array,
|
friso_array_t array,
|
||||||
uint_t idx,
|
uint_t idx,
|
||||||
void *value )
|
void *value )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
|
|
||||||
if ( idx <= array->length )
|
if ( idx <= array->length )
|
||||||
{
|
{
|
||||||
//check the condition to resize the array.
|
//check the condition to resize the array.
|
||||||
if ( array->length == array->allocs ) {
|
if ( array->length == array->allocs ) {
|
||||||
resize_array_list( array, array->length * 2 + 1 );
|
resize_array_list( array, array->length * 2 + 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
//move the elements after idx.
|
//move the elements after idx.
|
||||||
//for ( t = idx; t < array->length; t++ ) {
|
//for ( t = idx; t < array->length; t++ ) {
|
||||||
// array->items[t+1] = array->items[t];
|
// array->items[t+1] = array->items[t];
|
||||||
//}
|
//}
|
||||||
for ( t = array->length - 1; t >= idx; t-- )
|
for ( t = array->length - 1; t >= idx; t-- )
|
||||||
{
|
{
|
||||||
array->items[t+1] = array->items[t];
|
array->items[t+1] = array->items[t];
|
||||||
}
|
}
|
||||||
|
|
||||||
array->items[idx] = value;
|
array->items[idx] = value;
|
||||||
array->length++;
|
array->length++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//get the item at a specified position.
|
//get the item at a specified position.
|
||||||
FRISO_API void *array_list_get( friso_array_t array, uint_t idx )
|
FRISO_API void *array_list_get( friso_array_t array, uint_t idx )
|
||||||
{
|
{
|
||||||
if ( idx < array->length ) {
|
if ( idx < array->length ) {
|
||||||
return array->items[idx];
|
return array->items[idx];
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
//set the value of the item at a specified position.
|
//set the value of the item at a specified position.
|
||||||
//this will return the old value.
|
//this will return the old value.
|
||||||
FRISO_API void * array_list_set(
|
FRISO_API void * array_list_set(
|
||||||
friso_array_t array,
|
friso_array_t array,
|
||||||
uint_t idx,
|
uint_t idx,
|
||||||
void * value )
|
void * value )
|
||||||
{
|
{
|
||||||
void * oval = NULL;
|
void * oval = NULL;
|
||||||
if ( idx < array->length )
|
if ( idx < array->length )
|
||||||
{
|
{
|
||||||
oval = array->items[idx];
|
oval = array->items[idx];
|
||||||
array->items[idx] = value;
|
array->items[idx] = value;
|
||||||
}
|
}
|
||||||
return oval;
|
return oval;
|
||||||
}
|
}
|
||||||
|
|
||||||
//remove the item at a specified position.
|
//remove the item at a specified position.
|
||||||
//this will return the value of the removed item.
|
//this will return the value of the removed item.
|
||||||
FRISO_API void * array_list_remove(
|
FRISO_API void * array_list_remove(
|
||||||
friso_array_t array, uint_t idx )
|
friso_array_t array, uint_t idx )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
void *oval = NULL;
|
void *oval = NULL;
|
||||||
|
|
||||||
if ( idx < array->length )
|
if ( idx < array->length )
|
||||||
{
|
{
|
||||||
oval = array->items[idx];
|
oval = array->items[idx];
|
||||||
//move the elements after idx.
|
//move the elements after idx.
|
||||||
for ( t = idx; t < array->length - 1; t++ ) {
|
for ( t = idx; t < array->length - 1; t++ ) {
|
||||||
array->items[t] = array->items[ t + 1 ];
|
array->items[t] = array->items[ t + 1 ];
|
||||||
}
|
}
|
||||||
array->items[array->length - 1] = NULL;
|
array->items[array->length - 1] = NULL;
|
||||||
array->length--;
|
array->length--;
|
||||||
}
|
}
|
||||||
|
|
||||||
return oval;
|
return oval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*trim the array list*/
|
/*trim the array list*/
|
||||||
FRISO_API friso_array_t array_list_trim( friso_array_t array )
|
FRISO_API friso_array_t array_list_trim( friso_array_t array )
|
||||||
{
|
{
|
||||||
if ( array->length < array->allocs ) {
|
if ( array->length < array->allocs ) {
|
||||||
return resize_array_list( array, array->length );
|
return resize_array_list( array, array->length );
|
||||||
}
|
}
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* clear the array list.
|
* clear the array list.
|
||||||
* this function will free all the allocations that the pointer pointed.
|
* this function will free all the allocations that the pointer pointed.
|
||||||
* but will not free the point array allocations,
|
* but will not free the point array allocations,
|
||||||
* and will reset the length of it.
|
* and will reset the length of it.
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_array_t array_list_clear( friso_array_t array )
|
FRISO_API friso_array_t array_list_clear( friso_array_t array )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
//free all the allocations that the array->length's pointer pointed.
|
//free all the allocations that the array->length's pointer pointed.
|
||||||
for ( t = 0; t < array->length; t++ ) {
|
for ( t = 0; t < array->length; t++ ) {
|
||||||
/*if ( array->items[t] == NULL ) continue;
|
/*if ( array->items[t] == NULL ) continue;
|
||||||
FRISO_FREE( array->items[t] ); */
|
FRISO_FREE( array->items[t] ); */
|
||||||
array->items[t] = NULL;
|
array->items[t] = NULL;
|
||||||
}
|
}
|
||||||
//attribute reset.
|
//attribute reset.
|
||||||
array->length = 0;
|
array->length = 0;
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
//get the size of the array list. (A macro define has replace this.)
|
//get the size of the array list. (A macro define has replace this.)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* friso string type check function interface,
|
* friso string type check function interface,
|
||||||
* like english/CJK, full-wdith/half-width, punctuation or not.
|
* like english/CJK, full-wdith/half-width, punctuation or not.
|
||||||
* @ses friso_UTF8.c and friso_GBK.c for detail.
|
* @ses friso_UTF8.c and friso_GBK.c for detail.
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
@ -16,25 +16,25 @@
|
|||||||
* @return int (true for cn string or false)
|
* @return int (true for cn string or false)
|
||||||
* */
|
* */
|
||||||
FRISO_API int friso_cn_string(
|
FRISO_API int friso_cn_string(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_cjk_string(task->unicode);
|
return utf8_cjk_string(task->unicode);
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_cn_string(task->buffer);
|
return gbk_cn_string(task->buffer);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified word is a whitespace.
|
//check if the specified word is a whitespace.
|
||||||
FRISO_API int friso_whitespace(
|
FRISO_API int friso_whitespace(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_whitespace(task->unicode);
|
return utf8_whitespace(task->unicode);
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_whitespace(task->buffer);
|
return gbk_whitespace(task->buffer);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,76 +52,76 @@ FRISO_API int friso_numeric_letter(
|
|||||||
|
|
||||||
//check if the specified word is aa english letter.
|
//check if the specified word is aa english letter.
|
||||||
FRISO_API int friso_en_letter(
|
FRISO_API int friso_en_letter(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_en_letter( ( uint_t ) task->text[task->idx]);
|
return utf8_en_letter( ( uint_t ) task->text[task->idx]);
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_en_letter( task->text + task->idx );
|
return gbk_en_letter( task->text + task->idx );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified word is a half-width letter.
|
//check if the specified word is a half-width letter.
|
||||||
// punctuations are inclued.
|
// punctuations are inclued.
|
||||||
FRISO_API int friso_halfwidth_en_char(
|
FRISO_API int friso_halfwidth_en_char(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_halfwidth_en_char(task->unicode);
|
return utf8_halfwidth_en_char(task->unicode);
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_halfwidth_en_char(task->buffer[0]);
|
return gbk_halfwidth_en_char(task->buffer[0]);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified word is a full-width letter.
|
//check if the specified word is a full-width letter.
|
||||||
// full-width punctuations are not included.
|
// full-width punctuations are not included.
|
||||||
FRISO_API int friso_fullwidth_en_char(
|
FRISO_API int friso_fullwidth_en_char(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_fullwidth_en_char( task->unicode );
|
return utf8_fullwidth_en_char( task->unicode );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_fullwidth_en_char( task->buffer );
|
return gbk_fullwidth_en_char( task->buffer );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified word is an english punctuations.
|
//check if the specified word is an english punctuations.
|
||||||
FRISO_API int friso_en_punctuation(
|
FRISO_API int friso_en_punctuation(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_en_punctuation( task->unicode );
|
return utf8_en_punctuation( task->unicode );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_en_punctuation( task->buffer[0] );
|
return gbk_en_punctuation( task->buffer[0] );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified word ia sn chinese punctuation.
|
//check if the specified word ia sn chinese punctuation.
|
||||||
FRISO_API int friso_cn_punctuation(
|
FRISO_API int friso_cn_punctuation(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_cn_punctuation( task->unicode );
|
return utf8_cn_punctuation( task->unicode );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_cn_punctuation( task->buffer );
|
return gbk_cn_punctuation( task->buffer );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API int friso_letter_number(
|
FRISO_API int friso_letter_number(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API int friso_other_number(
|
FRISO_API int friso_other_number(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -129,98 +129,98 @@ FRISO_API int friso_other_number(
|
|||||||
//check if the word is a keep punctuation.
|
//check if the word is a keep punctuation.
|
||||||
//@Deprecated
|
//@Deprecated
|
||||||
//FRISO_API int friso_keep_punctuation(
|
//FRISO_API int friso_keep_punctuation(
|
||||||
// friso_charset_t charset,
|
// friso_charset_t charset,
|
||||||
// friso_task_t task )
|
// friso_task_t task )
|
||||||
//{
|
//{
|
||||||
// if ( charset == FRISO_UTF8 )
|
// if ( charset == FRISO_UTF8 )
|
||||||
// return utf8_keep_punctuation( task->buffer );
|
// return utf8_keep_punctuation( task->buffer );
|
||||||
// else if ( charset == FRISO_GBK )
|
// else if ( charset == FRISO_GBK )
|
||||||
// return gbk_keep_punctuation( task->buffer );
|
// return gbk_keep_punctuation( task->buffer );
|
||||||
// return 0;
|
// return 0;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
//check if the specified char is en english punctuation.
|
//check if the specified char is en english punctuation.
|
||||||
// this function is the same as friso_en_punctuation.
|
// this function is the same as friso_en_punctuation.
|
||||||
FRISO_API int is_en_punctuation(
|
FRISO_API int is_en_punctuation(
|
||||||
friso_charset_t charset, char c )
|
friso_charset_t charset, char c )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_en_punctuation( (uint_t) c);
|
return utf8_en_punctuation( (uint_t) c);
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_en_punctuation( c );
|
return gbk_en_punctuation( c );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the specified string is make up with numeric.
|
//check the specified string is make up with numeric.
|
||||||
FRISO_API int friso_numeric_string(
|
FRISO_API int friso_numeric_string(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
char *buffer )
|
char *buffer )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_numeric_string( buffer );
|
return utf8_numeric_string( buffer );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_numeric_string( buffer );
|
return gbk_numeric_string( buffer );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the specified string is a decimal string.
|
//check the specified string is a decimal string.
|
||||||
FRISO_API int friso_decimal_string(
|
FRISO_API int friso_decimal_string(
|
||||||
friso_charset_t charset, char *buffer )
|
friso_charset_t charset, char *buffer )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_decimal_string( buffer );
|
return utf8_decimal_string( buffer );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_decimal_string( buffer );
|
return gbk_decimal_string( buffer );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if the specified char is english uppercase letter.
|
//check if the specified char is english uppercase letter.
|
||||||
// included full-width and half-width letters.
|
// included full-width and half-width letters.
|
||||||
FRISO_API int friso_uppercase_letter(
|
FRISO_API int friso_uppercase_letter(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
return utf8_uppercase_letter( task->unicode );
|
return utf8_uppercase_letter( task->unicode );
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
return gbk_uppercase_letter( task->buffer );
|
return gbk_uppercase_letter( task->buffer );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the type of the specified char.
|
/* get the type of the specified char.
|
||||||
* the type will be the constants defined above.
|
* the type will be the constants defined above.
|
||||||
* (include the fullwidth english char.)
|
* (include the fullwidth english char.)
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_enchar_t friso_enchar_type(
|
FRISO_API friso_enchar_t friso_enchar_type(
|
||||||
friso_charset_t charset,
|
friso_charset_t charset,
|
||||||
friso_task_t task )
|
friso_task_t task )
|
||||||
{
|
{
|
||||||
//Unicode or ASCII.(Both UTF-8 and GBK are valid)
|
//Unicode or ASCII.(Both UTF-8 and GBK are valid)
|
||||||
uint_t u = 0;
|
uint_t u = 0;
|
||||||
|
|
||||||
if ( charset == FRISO_UTF8 )
|
if ( charset == FRISO_UTF8 )
|
||||||
{
|
{
|
||||||
u = task->unicode;
|
u = task->unicode;
|
||||||
//if ( u >= 65280 ) u = 65280 - 65248;
|
//if ( u >= 65280 ) u = 65280 - 65248;
|
||||||
}
|
}
|
||||||
else if ( charset == FRISO_GBK )
|
else if ( charset == FRISO_GBK )
|
||||||
{
|
{
|
||||||
u = (uchar_t)task->buffer[0];
|
u = (uchar_t)task->buffer[0];
|
||||||
//if ( u == 0xa3 ) ; //full-width.
|
//if ( u == 0xa3 ) ; //full-width.
|
||||||
}
|
}
|
||||||
|
|
||||||
//range check.
|
//range check.
|
||||||
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
|
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
|
||||||
if ( u == 32 ) return FRISO_EN_WHITESPACE;
|
if ( u == 32 ) return FRISO_EN_WHITESPACE;
|
||||||
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
|
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
|
||||||
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
|
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
|
||||||
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
|
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
|
||||||
|
|
||||||
return FRISO_EN_PUNCTUATION;
|
return FRISO_EN_PUNCTUATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the type of the specified en char.
|
/* get the type of the specified en char.
|
||||||
* the type will be the constants defined above.
|
* the type will be the constants defined above.
|
||||||
* (the char should be half-width english char only)
|
* (the char should be half-width english char only)
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_enchar_t get_enchar_type( char ch )
|
FRISO_API friso_enchar_t get_enchar_type( char ch )
|
||||||
@ -228,11 +228,11 @@ FRISO_API friso_enchar_t get_enchar_type( char ch )
|
|||||||
uint_t u = (uchar_t) ch;
|
uint_t u = (uchar_t) ch;
|
||||||
|
|
||||||
//range check.
|
//range check.
|
||||||
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
|
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
|
||||||
if ( u == 32 ) return FRISO_EN_WHITESPACE;
|
if ( u == 32 ) return FRISO_EN_WHITESPACE;
|
||||||
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
|
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
|
||||||
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
|
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
|
||||||
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
|
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
|
||||||
|
|
||||||
return FRISO_EN_PUNCTUATION;
|
return FRISO_EN_PUNCTUATION;
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
* Friso charset about function interface header file.
|
* Friso charset about function interface header file.
|
||||||
* @package src/friso_charset.h .
|
* @package src/friso_charset.h .
|
||||||
* Available charset for now:
|
* Available charset for now:
|
||||||
* 1. UTF8 - function start with utf8
|
* 1. UTF8 - function start with utf8
|
||||||
* 2. GBK - function start with gbk
|
* 2. GBK - function start with gbk
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
@ -33,11 +33,11 @@ FRISO_API int friso_numeric_letter(friso_charset_t, friso_task_t);
|
|||||||
FRISO_API int friso_en_letter( friso_charset_t, friso_task_t );
|
FRISO_API int friso_en_letter( friso_charset_t, friso_task_t );
|
||||||
|
|
||||||
//check if the specified word is a half-width letter.
|
//check if the specified word is a half-width letter.
|
||||||
// punctuations are inclued.
|
// punctuations are inclued.
|
||||||
FRISO_API int friso_halfwidth_en_char( friso_charset_t, friso_task_t );
|
FRISO_API int friso_halfwidth_en_char( friso_charset_t, friso_task_t );
|
||||||
|
|
||||||
//check if the specified word is a full-width letter.
|
//check if the specified word is a full-width letter.
|
||||||
// full-width punctuations are not included.
|
// full-width punctuations are not included.
|
||||||
FRISO_API int friso_fullwidth_en_char( friso_charset_t, friso_task_t );
|
FRISO_API int friso_fullwidth_en_char( friso_charset_t, friso_task_t );
|
||||||
|
|
||||||
//check if the specified word is an english punctuations.
|
//check if the specified word is an english punctuations.
|
||||||
@ -60,32 +60,32 @@ FRISO_API int friso_numeric_string( friso_charset_t, char * );
|
|||||||
FRISO_API int friso_decimal_string( friso_charset_t, char * );
|
FRISO_API int friso_decimal_string( friso_charset_t, char * );
|
||||||
|
|
||||||
//check if the specified char is english uppercase letter.
|
//check if the specified char is english uppercase letter.
|
||||||
// included full-width and half-width letters.
|
// included full-width and half-width letters.
|
||||||
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
|
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
|
||||||
|
|
||||||
|
|
||||||
//en char type.
|
//en char type.
|
||||||
//#define FRISO_EN_LETTER 0 //a-z && A-Z
|
//#define FRISO_EN_LETTER 0 //a-z && A-Z
|
||||||
//#define FRISO_EN_NUMERIC 1 //0-9
|
//#define FRISO_EN_NUMERIC 1 //0-9
|
||||||
//#define FRISO_EN_PUNCTUATION 2 //english punctuations
|
//#define FRISO_EN_PUNCTUATION 2 //english punctuations
|
||||||
//#define FRISO_EN_WHITESPACE 3 //whitespace
|
//#define FRISO_EN_WHITESPACE 3 //whitespace
|
||||||
//#define FRISO_EN_UNKNOW -1 //beyond 32-122
|
//#define FRISO_EN_UNKNOW -1 //beyond 32-122
|
||||||
typedef enum {
|
typedef enum {
|
||||||
FRISO_EN_LETTER = 0, //A-Z, a-z
|
FRISO_EN_LETTER = 0, //A-Z, a-z
|
||||||
FRISO_EN_NUMERIC = 1, //0-9
|
FRISO_EN_NUMERIC = 1, //0-9
|
||||||
FRISO_EN_PUNCTUATION = 2, //english punctuations
|
FRISO_EN_PUNCTUATION = 2, //english punctuations
|
||||||
FRISO_EN_WHITESPACE = 3, //whitespace
|
FRISO_EN_WHITESPACE = 3, //whitespace
|
||||||
FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126)
|
FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126)
|
||||||
} friso_enchar_t;
|
} friso_enchar_t;
|
||||||
|
|
||||||
/* get the type of the specified char.
|
/* get the type of the specified char.
|
||||||
* the type will be the constants defined above.
|
* the type will be the constants defined above.
|
||||||
* (include the fullwidth english char.)
|
* (include the fullwidth english char.)
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
|
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
|
||||||
|
|
||||||
/* get the type of the specified en char.
|
/* get the type of the specified en char.
|
||||||
* the type will be the constants defined above.
|
* the type will be the constants defined above.
|
||||||
* (the char should be half-width english char only)
|
* (the char should be half-width english char only)
|
||||||
*/
|
*/
|
||||||
FRISO_API friso_enchar_t get_enchar_type( char );
|
FRISO_API friso_enchar_t get_enchar_type( char );
|
||||||
@ -99,7 +99,7 @@ FRISO_API friso_enchar_t get_enchar_type( char );
|
|||||||
|
|
||||||
/* read the next utf-8 word from the specified position.
|
/* read the next utf-8 word from the specified position.
|
||||||
*
|
*
|
||||||
* @return int the bytes of the current readed word.
|
* @return int the bytes of the current readed word.
|
||||||
*/
|
*/
|
||||||
FRISO_API int utf8_next_word( friso_task_t, uint_t *, fstring );
|
FRISO_API int utf8_next_word( friso_task_t, uint_t *, fstring );
|
||||||
|
|
||||||
@ -116,31 +116,31 @@ FRISO_API int unicode_to_utf8( uint_t, fstring );
|
|||||||
FRISO_API int utf8_cjk_string( uint_t ) ;
|
FRISO_API int utf8_cjk_string( uint_t ) ;
|
||||||
|
|
||||||
/*check the given char is a Basic Latin letter or not.
|
/*check the given char is a Basic Latin letter or not.
|
||||||
* include all the letters and english puntuations.*/
|
* include all the letters and english puntuations.*/
|
||||||
FRISO_API int utf8_halfwidth_en_char( uint_t );
|
FRISO_API int utf8_halfwidth_en_char( uint_t );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check the given char is a full-width latain or not.
|
* check the given char is a full-width latain or not.
|
||||||
* include the full-width arabic numeber, letters.
|
* include the full-width arabic numeber, letters.
|
||||||
* but not the full-width puntuations.
|
* but not the full-width puntuations.
|
||||||
*/
|
*/
|
||||||
FRISO_API int utf8_fullwidth_en_char( uint_t );
|
FRISO_API int utf8_fullwidth_en_char( uint_t );
|
||||||
|
|
||||||
//check the given char is a upper case letter or not.
|
//check the given char is a upper case letter or not.
|
||||||
// included all the full-width and half-width letters.
|
// included all the full-width and half-width letters.
|
||||||
FRISO_API int utf8_uppercase_letter( uint_t );
|
FRISO_API int utf8_uppercase_letter( uint_t );
|
||||||
|
|
||||||
//check the given char is a lower case letter or not.
|
//check the given char is a lower case letter or not.
|
||||||
// included all the full-width and half-width letters.
|
// included all the full-width and half-width letters.
|
||||||
FRISO_API int utf8_lowercase_letter( uint_t );
|
FRISO_API int utf8_lowercase_letter( uint_t );
|
||||||
|
|
||||||
//check the given char is a numeric.
|
//check the given char is a numeric.
|
||||||
// included the full-width and half-width arabic numeric.
|
// included the full-width and half-width arabic numeric.
|
||||||
FRISO_API int utf8_numeric_letter( uint_t );
|
FRISO_API int utf8_numeric_letter( uint_t );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given fstring is make up with numeric chars.
|
* check if the given fstring is make up with numeric chars.
|
||||||
* both full-width,half-width numeric is ok.
|
* both full-width,half-width numeric is ok.
|
||||||
*/
|
*/
|
||||||
FRISO_API int utf8_numeric_string( char * );
|
FRISO_API int utf8_numeric_string( char * );
|
||||||
|
|
||||||
@ -183,7 +183,7 @@ FRISO_API int is_en_punctuation( friso_charset_t, char );
|
|||||||
|
|
||||||
/* read the next GBK word from the specified position.
|
/* read the next GBK word from the specified position.
|
||||||
*
|
*
|
||||||
* @return int the bytes of the current readed word.
|
* @return int the bytes of the current readed word.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_next_word( friso_task_t, uint_t *, fstring );
|
FRISO_API int gbk_next_word( friso_task_t, uint_t *, fstring );
|
||||||
|
|
||||||
@ -194,31 +194,31 @@ FRISO_API int get_gbk_bytes( char );
|
|||||||
FRISO_API int gbk_cn_string( char * ) ;
|
FRISO_API int gbk_cn_string( char * ) ;
|
||||||
|
|
||||||
/*check if the given char is a ASCII letter
|
/*check if the given char is a ASCII letter
|
||||||
* include all the letters and english puntuations.*/
|
* include all the letters and english puntuations.*/
|
||||||
FRISO_API int gbk_halfwidth_en_char( char );
|
FRISO_API int gbk_halfwidth_en_char( char );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given char is a full-width latain.
|
* check if the given char is a full-width latain.
|
||||||
* include the full-width arabic numeber, letters.
|
* include the full-width arabic numeber, letters.
|
||||||
* but not the full-width puntuations.
|
* but not the full-width puntuations.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_fullwidth_en_char( char * );
|
FRISO_API int gbk_fullwidth_en_char( char * );
|
||||||
|
|
||||||
//check if the given char is a upper case char.
|
//check if the given char is a upper case char.
|
||||||
// included all the full-width and half-width letters.
|
// included all the full-width and half-width letters.
|
||||||
FRISO_API int gbk_uppercase_letter( char * );
|
FRISO_API int gbk_uppercase_letter( char * );
|
||||||
|
|
||||||
//check if the given char is a lower case char.
|
//check if the given char is a lower case char.
|
||||||
// included all the full-width and half-width letters.
|
// included all the full-width and half-width letters.
|
||||||
FRISO_API int gbk_lowercase_letter( char * );
|
FRISO_API int gbk_lowercase_letter( char * );
|
||||||
|
|
||||||
//check if the given char is a numeric.
|
//check if the given char is a numeric.
|
||||||
// included the full-width and half-width arabic numeric.
|
// included the full-width and half-width arabic numeric.
|
||||||
FRISO_API int gbk_numeric_letter( char * );
|
FRISO_API int gbk_numeric_letter( char * );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if the given fstring is make up with numeric chars.
|
* check if the given fstring is make up with numeric chars.
|
||||||
* both full-width,half-width numeric is ok.
|
* both full-width,half-width numeric is ok.
|
||||||
*/
|
*/
|
||||||
FRISO_API int gbk_numeric_string( char * );
|
FRISO_API int gbk_numeric_string( char * );
|
||||||
|
|
||||||
@ -248,7 +248,7 @@ FRISO_API int gbk_en_punctuation( char ) ;
|
|||||||
FRISO_API int gbk_cn_punctuation( char * );
|
FRISO_API int gbk_cn_punctuation( char * );
|
||||||
|
|
||||||
//cause the logic handle is the same as the utf8.
|
//cause the logic handle is the same as the utf8.
|
||||||
// here invoke the utf8 interface directly.
|
// here invoke the utf8 interface directly.
|
||||||
//FRISO_API int gbk_keep_punctuation( char * );
|
//FRISO_API int gbk_keep_punctuation( char * );
|
||||||
//@Deprecated
|
//@Deprecated
|
||||||
//#define gbk_keep_punctuation( str ) utf8_keep_punctuation(str)
|
//#define gbk_keep_punctuation( str ) utf8_keep_punctuation(str)
|
||||||
@ -257,4 +257,4 @@ FRISO_API int gbk_cn_punctuation( char * );
|
|||||||
//FRISO_API int gbk_fullwidth_char( char * ) ;
|
//FRISO_API int gbk_fullwidth_char( char * ) ;
|
||||||
/* }}}*/
|
/* }}}*/
|
||||||
|
|
||||||
#endif /*end _friso_charset_h*/
|
#endif /*end _friso_charset_h*/
|
||||||
|
180
src/friso_hash.c
180
src/friso_hash.c
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* friso hash table implements functions
|
* friso hash table implements functions
|
||||||
* defined in header file "friso_API.h".
|
* defined in header file "friso_API.h".
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
//-166411799L
|
//-166411799L
|
||||||
//31 131 1331 13331 133331 ..
|
//31 131 1331 13331 133331 ..
|
||||||
//31 131 1313 13131 131313 .. the best
|
//31 131 1313 13131 131313 .. the best
|
||||||
#define HASH_FACTOR 1313131
|
#define HASH_FACTOR 1313131
|
||||||
|
|
||||||
/* ************************
|
/* ************************
|
||||||
@ -22,7 +22,7 @@ __STATIC_API__ uint_t hash( fstring str, uint_t length )
|
|||||||
uint_t h = 0;
|
uint_t h = 0;
|
||||||
|
|
||||||
while ( *str != '\0' )
|
while ( *str != '\0' )
|
||||||
h = h * HASH_FACTOR + ( *str++ );
|
h = h * HASH_FACTOR + ( *str++ );
|
||||||
|
|
||||||
return (h % length);
|
return (h % length);
|
||||||
}
|
}
|
||||||
@ -32,13 +32,13 @@ __STATIC_API__ int is_prime( int n )
|
|||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
if ( n == 2 || n == 3 )
|
if ( n == 2 || n == 3 )
|
||||||
return 1;
|
return 1;
|
||||||
if ( n == 1 || n % 2 == 0 )
|
if ( n == 1 || n % 2 == 0 )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for ( j = 3; j * j < n; j++ )
|
for ( j = 3; j * j < n; j++ )
|
||||||
if ( n % j == 0 )
|
if ( n % j == 0 )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -47,7 +47,7 @@ __STATIC_API__ int is_prime( int n )
|
|||||||
__STATIC_API__ int next_prime( int n )
|
__STATIC_API__ int next_prime( int n )
|
||||||
{
|
{
|
||||||
if ( n % 2 == 0 )
|
if ( n % 2 == 0 )
|
||||||
n++;
|
n++;
|
||||||
for ( ; ! is_prime( n ); n = n + 2 ) ;
|
for ( ; ! is_prime( n ); n = n + 2 ) ;
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
@ -72,14 +72,14 @@ __STATIC_API__ int next_prime( int n )
|
|||||||
* static hashtable function area. *
|
* static hashtable function area. *
|
||||||
***********************************/
|
***********************************/
|
||||||
__STATIC_API__ hash_entry_t new_hash_entry(
|
__STATIC_API__ hash_entry_t new_hash_entry(
|
||||||
fstring key,
|
fstring key,
|
||||||
void * value,
|
void * value,
|
||||||
hash_entry_t next )
|
hash_entry_t next )
|
||||||
{
|
{
|
||||||
hash_entry_t e = ( hash_entry_t )
|
hash_entry_t e = ( hash_entry_t )
|
||||||
FRISO_MALLOC( sizeof( friso_hash_entry ) );
|
FRISO_MALLOC( sizeof( friso_hash_entry ) );
|
||||||
if ( e == NULL ) {
|
if ( e == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//e->_key = string_copy( key );
|
//e->_key = string_copy( key );
|
||||||
@ -95,13 +95,13 @@ __STATIC_API__ hash_entry_t * create_hash_entries( uint_t blocks )
|
|||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
hash_entry_t *e = ( hash_entry_t * )
|
hash_entry_t *e = ( hash_entry_t * )
|
||||||
FRISO_CALLOC( sizeof( hash_entry_t ), blocks );
|
FRISO_CALLOC( sizeof( hash_entry_t ), blocks );
|
||||||
if ( e == NULL ) {
|
if ( e == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( t = 0; t < blocks; t++ ) {
|
for ( t = 0; t < blocks; t++ ) {
|
||||||
e[t] = NULL;
|
e[t] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return e;
|
return e;
|
||||||
@ -114,22 +114,22 @@ __STATIC_API__ void rebuild_hash( friso_hash_t _hash )
|
|||||||
//find the next prime as the length of the hashtable.
|
//find the next prime as the length of the hashtable.
|
||||||
uint_t t, length = next_prime( _hash->length * 2 + 1 );
|
uint_t t, length = next_prime( _hash->length * 2 + 1 );
|
||||||
hash_entry_t e, next, *_src = _hash->table, \
|
hash_entry_t e, next, *_src = _hash->table, \
|
||||||
*table = create_hash_entries( length );
|
*table = create_hash_entries( length );
|
||||||
uint_t bucket;
|
uint_t bucket;
|
||||||
|
|
||||||
//copy the nodes
|
//copy the nodes
|
||||||
for ( t = 0; t < _hash->length; t++ )
|
for ( t = 0; t < _hash->length; t++ )
|
||||||
{
|
{
|
||||||
e = *( _src + t );
|
e = *( _src + t );
|
||||||
if ( e != NULL ) {
|
if ( e != NULL ) {
|
||||||
do {
|
do {
|
||||||
next = e->_next;
|
next = e->_next;
|
||||||
bucket = hash( e->_key, length );
|
bucket = hash( e->_key, length );
|
||||||
e->_next = table[bucket];
|
e->_next = table[bucket];
|
||||||
table[bucket] = e;
|
table[bucket] = e;
|
||||||
e = next;
|
e = next;
|
||||||
} while ( e != NULL );
|
} while ( e != NULL );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_hash->table = table;
|
_hash->table = table;
|
||||||
@ -149,35 +149,35 @@ FRISO_API friso_hash_t new_hash_table( void )
|
|||||||
{
|
{
|
||||||
friso_hash_t _hash = ( friso_hash_t ) FRISO_MALLOC( sizeof ( friso_hash_cdt ) );
|
friso_hash_t _hash = ( friso_hash_t ) FRISO_MALLOC( sizeof ( friso_hash_cdt ) );
|
||||||
if ( _hash == NULL ) {
|
if ( _hash == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize the the hashtable
|
//initialize the the hashtable
|
||||||
_hash->length = DEFAULT_LENGTH;
|
_hash->length = DEFAULT_LENGTH;
|
||||||
_hash->size = 0;
|
_hash->size = 0;
|
||||||
_hash->factor = DEFAULT_FACTOR;
|
_hash->factor = DEFAULT_FACTOR;
|
||||||
_hash->threshold = ( uint_t ) ( _hash->length * _hash->factor );
|
_hash->threshold = ( uint_t ) ( _hash->length * _hash->factor );
|
||||||
_hash->table = create_hash_entries( _hash->length );
|
_hash->table = create_hash_entries( _hash->length );
|
||||||
|
|
||||||
return _hash;
|
return _hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void free_hash_table(
|
FRISO_API void free_hash_table(
|
||||||
friso_hash_t _hash,
|
friso_hash_t _hash,
|
||||||
fhash_callback_fn_t fentry_func )
|
fhash_callback_fn_t fentry_func )
|
||||||
{
|
{
|
||||||
register uint_t j;
|
register uint_t j;
|
||||||
hash_entry_t e, n;
|
hash_entry_t e, n;
|
||||||
|
|
||||||
for ( j = 0; j < _hash->length; j++ )
|
for ( j = 0; j < _hash->length; j++ )
|
||||||
{
|
{
|
||||||
e = *( _hash->table + j );
|
e = *( _hash->table + j );
|
||||||
for ( ; e != NULL ; ) {
|
for ( ; e != NULL ; ) {
|
||||||
n = e->_next;
|
n = e->_next;
|
||||||
if ( fentry_func != NULL ) fentry_func(e);
|
if ( fentry_func != NULL ) fentry_func(e);
|
||||||
FRISO_FREE( e );
|
FRISO_FREE( e );
|
||||||
e = n;
|
e = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//free the pointer array block ( 4 * htable->length continuous bytes ).
|
//free the pointer array block ( 4 * htable->length continuous bytes ).
|
||||||
@ -189,9 +189,9 @@ FRISO_API void free_hash_table(
|
|||||||
//put a new mapping insite.
|
//put a new mapping insite.
|
||||||
//the value cannot be NULL.
|
//the value cannot be NULL.
|
||||||
FRISO_API void *hash_put_mapping(
|
FRISO_API void *hash_put_mapping(
|
||||||
friso_hash_t _hash,
|
friso_hash_t _hash,
|
||||||
fstring key,
|
fstring key,
|
||||||
void * value )
|
void * value )
|
||||||
{
|
{
|
||||||
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
||||||
hash_entry_t e = *( _hash->table + bucket );
|
hash_entry_t e = *( _hash->table + bucket );
|
||||||
@ -200,14 +200,14 @@ FRISO_API void *hash_put_mapping(
|
|||||||
//check the given key is already exists or not.
|
//check the given key is already exists or not.
|
||||||
for ( ; e != NULL; e = e->_next )
|
for ( ; e != NULL; e = e->_next )
|
||||||
{
|
{
|
||||||
if ( key == e->_key
|
if ( key == e->_key
|
||||||
|| ( key != NULL && e->_key != NULL
|
|| ( key != NULL && e->_key != NULL
|
||||||
&& strcmp( key, e->_key ) == 0 ) )
|
&& strcmp( key, e->_key ) == 0 ) )
|
||||||
{
|
{
|
||||||
oval = e->_val; //bak the old value
|
oval = e->_val; //bak the old value
|
||||||
e->_val = value;
|
e->_val = value;
|
||||||
return oval;
|
return oval;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//put a new mapping into the hashtable.
|
//put a new mapping into the hashtable.
|
||||||
@ -216,27 +216,27 @@ FRISO_API void *hash_put_mapping(
|
|||||||
|
|
||||||
//check the condition to rebuild the hashtable.
|
//check the condition to rebuild the hashtable.
|
||||||
if ( _hash->size >= _hash->threshold )
|
if ( _hash->size >= _hash->threshold )
|
||||||
rebuild_hash( _hash );
|
rebuild_hash( _hash );
|
||||||
|
|
||||||
return oval;
|
return oval;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check the existence of the mapping associated with the given key.
|
//check the existence of the mapping associated with the given key.
|
||||||
FRISO_API int hash_exist_mapping(
|
FRISO_API int hash_exist_mapping(
|
||||||
friso_hash_t _hash, fstring key )
|
friso_hash_t _hash, fstring key )
|
||||||
{
|
{
|
||||||
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
||||||
hash_entry_t e;
|
hash_entry_t e;
|
||||||
|
|
||||||
for ( e = *( _hash->table + bucket );
|
for ( e = *( _hash->table + bucket );
|
||||||
e != NULL;
|
e != NULL;
|
||||||
e = e->_next ) {
|
e = e->_next ) {
|
||||||
if ( key == e->_key
|
if ( key == e->_key
|
||||||
|| ( key != NULL && e->_key != NULL
|
|| ( key != NULL && e->_key != NULL
|
||||||
&& strcmp( key, e->_key ) == 0 ))
|
&& strcmp( key, e->_key ) == 0 ))
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -249,14 +249,14 @@ FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
|
|||||||
hash_entry_t e;
|
hash_entry_t e;
|
||||||
|
|
||||||
for ( e = *( _hash->table + bucket );
|
for ( e = *( _hash->table + bucket );
|
||||||
e != NULL;
|
e != NULL;
|
||||||
e = e->_next ) {
|
e = e->_next ) {
|
||||||
if ( key == e->_key
|
if ( key == e->_key
|
||||||
|| ( key != NULL && e->_key != NULL
|
|| ( key != NULL && e->_key != NULL
|
||||||
&& strcmp( key, e->_key ) == 0 ))
|
&& strcmp( key, e->_key ) == 0 ))
|
||||||
{
|
{
|
||||||
return e->_val;
|
return e->_val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -264,31 +264,31 @@ FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
|
|||||||
|
|
||||||
//remove the mapping associated with the given key.
|
//remove the mapping associated with the given key.
|
||||||
FRISO_API hash_entry_t hash_remove_mapping(
|
FRISO_API hash_entry_t hash_remove_mapping(
|
||||||
friso_hash_t _hash, fstring key )
|
friso_hash_t _hash, fstring key )
|
||||||
{
|
{
|
||||||
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
|
||||||
hash_entry_t e, prev = NULL;
|
hash_entry_t e, prev = NULL;
|
||||||
hash_entry_t b;
|
hash_entry_t b;
|
||||||
|
|
||||||
for ( e = *( _hash->table + bucket );
|
for ( e = *( _hash->table + bucket );
|
||||||
e != NULL;
|
e != NULL;
|
||||||
prev = e, e = e->_next ) {
|
prev = e, e = e->_next ) {
|
||||||
if ( key == e->_key
|
if ( key == e->_key
|
||||||
|| ( key != NULL && e->_key != NULL
|
|| ( key != NULL && e->_key != NULL
|
||||||
&& strcmp( key, e->_key ) == 0 ) )
|
&& strcmp( key, e->_key ) == 0 ) )
|
||||||
{
|
{
|
||||||
b = e;
|
b = e;
|
||||||
//the node located at *( htable->table + bucket )
|
//the node located at *( htable->table + bucket )
|
||||||
if ( prev == NULL ) {
|
if ( prev == NULL ) {
|
||||||
_hash->table[bucket] = e->_next;
|
_hash->table[bucket] = e->_next;
|
||||||
} else {
|
} else {
|
||||||
prev->_next = e->_next;
|
prev->_next = e->_next;
|
||||||
}
|
}
|
||||||
//printf("%s was removed\n", b->_key);
|
//printf("%s was removed\n", b->_key);
|
||||||
_hash->size--;
|
_hash->size--;
|
||||||
//FRISO_FREE( b );
|
//FRISO_FREE( b );
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1,102 +1,102 @@
|
|||||||
/*
|
/*
|
||||||
* friso lexicon implemented functions.
|
* friso lexicon implemented functions.
|
||||||
* used to deal with the friso lexicon, like: load,remove,match...
|
* used to deal with the friso lexicon, like: load,remove,match...
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
#include "friso.h"
|
#include "friso.h"
|
||||||
|
|
||||||
#define __SPLIT_MAX_TOKENS__ 5
|
#define __SPLIT_MAX_TOKENS__ 5
|
||||||
#define __LEX_FILE_DELIME__ '#'
|
#define __LEX_FILE_DELIME__ '#'
|
||||||
#define __FRISO_LEX_IFILE__ "friso.lex.ini"
|
#define __FRISO_LEX_IFILE__ "friso.lex.ini"
|
||||||
|
|
||||||
//create a new lexicon
|
//create a new lexicon
|
||||||
FRISO_API friso_dic_t friso_dic_new()
|
FRISO_API friso_dic_t friso_dic_new()
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
|
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
|
||||||
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
|
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
|
||||||
if ( dic == NULL ) {
|
if ( dic == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||||
dic[t] = new_hash_table();
|
dic[t] = new_hash_table();
|
||||||
}
|
}
|
||||||
|
|
||||||
return dic;
|
return dic;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* default callback function to invoke
|
* default callback function to invoke
|
||||||
* when free the friso dictionary .
|
* when free the friso dictionary .
|
||||||
*
|
*
|
||||||
* @date 2013-06-12
|
* @date 2013-06-12
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ void default_fdic_callback( hash_entry_t e )
|
__STATIC_API__ void default_fdic_callback( hash_entry_t e )
|
||||||
{
|
{
|
||||||
register uint_t i;
|
register uint_t i;
|
||||||
friso_array_t syn;
|
friso_array_t syn;
|
||||||
lex_entry_t lex = ( lex_entry_t ) e->_val;
|
lex_entry_t lex = ( lex_entry_t ) e->_val;
|
||||||
//free the lex->word
|
//free the lex->word
|
||||||
FRISO_FREE( lex->word );
|
FRISO_FREE( lex->word );
|
||||||
//free the lex->syn if it is not NULL
|
//free the lex->syn if it is not NULL
|
||||||
if ( lex->syn != NULL )
|
if ( lex->syn != NULL )
|
||||||
{
|
{
|
||||||
syn = lex->syn;
|
syn = lex->syn;
|
||||||
for ( i = 0; i < syn->length; i++ ) {
|
for ( i = 0; i < syn->length; i++ ) {
|
||||||
FRISO_FREE( syn->items[i] );
|
FRISO_FREE( syn->items[i] );
|
||||||
}
|
}
|
||||||
free_array_list( syn );
|
free_array_list( syn );
|
||||||
}
|
}
|
||||||
|
|
||||||
//free the e->_val
|
//free the e->_val
|
||||||
//@date 2014-01-28 posted by mlemay@gmail.com
|
//@date 2014-01-28 posted by mlemay@gmail.com
|
||||||
FRISO_FREE(lex);
|
FRISO_FREE(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void friso_dic_free( friso_dic_t dic )
|
FRISO_API void friso_dic_free( friso_dic_t dic )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||||
//free the hash table
|
//free the hash table
|
||||||
free_hash_table( dic[t], default_fdic_callback );
|
free_hash_table( dic[t], default_fdic_callback );
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_FREE( dic );
|
FRISO_FREE( dic );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//create a new lexicon entry
|
//create a new lexicon entry
|
||||||
FRISO_API lex_entry_t new_lex_entry(
|
FRISO_API lex_entry_t new_lex_entry(
|
||||||
fstring word,
|
fstring word,
|
||||||
friso_array_t syn,
|
friso_array_t syn,
|
||||||
uint_t fre,
|
uint_t fre,
|
||||||
uint_t length,
|
uint_t length,
|
||||||
uint_t type )
|
uint_t type )
|
||||||
{
|
{
|
||||||
lex_entry_t e = ( lex_entry_t )
|
lex_entry_t e = ( lex_entry_t )
|
||||||
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
|
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
|
||||||
if ( e == NULL ) {
|
if ( e == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize.
|
//initialize.
|
||||||
e->word = word;
|
e->word = word;
|
||||||
e->syn = syn; //synoyum words array list.
|
e->syn = syn; //synoyum words array list.
|
||||||
e->pos = NULL; //part of speech array list.
|
e->pos = NULL; //part of speech array list.
|
||||||
//e->py = NULL; //set to NULL first.
|
//e->py = NULL; //set to NULL first.
|
||||||
e->fre = fre;
|
e->fre = fre;
|
||||||
e->length = (uchar_t) length; //length
|
e->length = (uchar_t) length; //length
|
||||||
e->rlen = (uchar_t) length; //set to length by default.
|
e->rlen = (uchar_t) length; //set to length by default.
|
||||||
e->type = (uchar_t) type; //type
|
e->type = (uchar_t) type; //type
|
||||||
e->ctrlMask = 0; //control mask.
|
e->ctrlMask = 0; //control mask.
|
||||||
e->offset = -1;
|
e->offset = -1;
|
||||||
|
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -109,64 +109,64 @@ FRISO_API lex_entry_t new_lex_entry(
|
|||||||
*/
|
*/
|
||||||
FRISO_API void free_lex_entry( lex_entry_t e )
|
FRISO_API void free_lex_entry( lex_entry_t e )
|
||||||
{
|
{
|
||||||
//if ( e->syn != NULL ) {
|
//if ( e->syn != NULL ) {
|
||||||
// if ( flag == 1 ) free_array_list( e->syn);
|
// if ( flag == 1 ) free_array_list( e->syn);
|
||||||
// else free_array_list( e->syn );
|
// else free_array_list( e->syn );
|
||||||
//}
|
//}
|
||||||
FRISO_FREE( e );
|
FRISO_FREE( e );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//add a new entry to the dictionary.
|
//add a new entry to the dictionary.
|
||||||
FRISO_API void friso_dic_add(
|
FRISO_API void friso_dic_add(
|
||||||
friso_dic_t dic,
|
friso_dic_t dic,
|
||||||
friso_lex_t lex,
|
friso_lex_t lex,
|
||||||
fstring word,
|
fstring word,
|
||||||
friso_array_t syn )
|
friso_array_t syn )
|
||||||
{
|
{
|
||||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ )
|
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ )
|
||||||
{
|
{
|
||||||
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
|
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
|
||||||
hash_put_mapping( dic[lex], word,
|
hash_put_mapping( dic[lex], word,
|
||||||
new_lex_entry( word, syn, 0,
|
new_lex_entry( word, syn, 0,
|
||||||
(uint_t) strlen(word), (uint_t) lex ) );
|
(uint_t) strlen(word), (uint_t) lex ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void friso_dic_add_with_fre(
|
FRISO_API void friso_dic_add_with_fre(
|
||||||
friso_dic_t dic,
|
friso_dic_t dic,
|
||||||
friso_lex_t lex,
|
friso_lex_t lex,
|
||||||
fstring word,
|
fstring word,
|
||||||
friso_array_t syn,
|
friso_array_t syn,
|
||||||
uint_t frequency )
|
uint_t frequency )
|
||||||
{
|
{
|
||||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||||
hash_put_mapping( dic[lex], word,
|
hash_put_mapping( dic[lex], word,
|
||||||
new_lex_entry( word, syn, frequency,
|
new_lex_entry( word, syn, frequency,
|
||||||
( uint_t ) strlen(word), ( uint_t ) lex ) );
|
( uint_t ) strlen(word), ( uint_t ) lex ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* read a line from a specified stream.
|
* read a line from a specified stream.
|
||||||
* the newline will be cleared.
|
* the newline will be cleared.
|
||||||
*
|
*
|
||||||
* @date 2012-11-24
|
* @date 2012-11-24
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
|
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
|
||||||
{
|
{
|
||||||
register int c;
|
register int c;
|
||||||
fstring cs;
|
fstring cs;
|
||||||
|
|
||||||
cs = __dst;
|
cs = __dst;
|
||||||
while ( ( c = fgetc( _stream ) ) != EOF )
|
while ( ( c = fgetc( _stream ) ) != EOF )
|
||||||
{
|
{
|
||||||
if ( c == '\n' ) break;
|
if ( c == '\n' ) break;
|
||||||
*cs++ = c;
|
*cs++ = c;
|
||||||
}
|
}
|
||||||
*cs = '\0';
|
*cs = '\0';
|
||||||
|
|
||||||
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -174,373 +174,373 @@ FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
|
|||||||
*/
|
*/
|
||||||
///instead of memcpy
|
///instead of memcpy
|
||||||
__STATIC_API__ fstring string_copy(
|
__STATIC_API__ fstring string_copy(
|
||||||
fstring _src,
|
fstring _src,
|
||||||
fstring __dst,
|
fstring __dst,
|
||||||
uint_t blocks )
|
uint_t blocks )
|
||||||
{
|
{
|
||||||
|
|
||||||
register fstring __src = _src;
|
register fstring __src = _src;
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
|
|
||||||
for ( t = 0; t < blocks; t++ ) {
|
for ( t = 0; t < blocks; t++ ) {
|
||||||
if ( *__src == '\0' ) break;
|
if ( *__src == '\0' ) break;
|
||||||
__dst[t] = *__src++;
|
__dst[t] = *__src++;
|
||||||
}
|
}
|
||||||
__dst[t] = '\0';
|
__dst[t] = '\0';
|
||||||
|
|
||||||
return __dst;
|
return __dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* make a heap allocation, and copy the
|
* make a heap allocation, and copy the
|
||||||
* source fstring to the new allocation, and
|
* source fstring to the new allocation, and
|
||||||
* you should free it after use it .
|
* you should free it after use it .
|
||||||
*
|
*
|
||||||
* @param _src source fstring
|
* @param _src source fstring
|
||||||
* @param blocks number of bytes to copy
|
* @param blocks number of bytes to copy
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ fstring string_copy_heap(
|
__STATIC_API__ fstring string_copy_heap(
|
||||||
fstring _src, uint_t blocks )
|
fstring _src, uint_t blocks )
|
||||||
{
|
{
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
|
|
||||||
fstring str = ( fstring )
|
fstring str = ( fstring )
|
||||||
FRISO_MALLOC( blocks + 1 );
|
FRISO_MALLOC( blocks + 1 );
|
||||||
if ( str == NULL ) {
|
if ( str == NULL ) {
|
||||||
___ALLOCATION_ERROR___;
|
___ALLOCATION_ERROR___;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( t = 0; t < blocks; t++ ) {
|
for ( t = 0; t < blocks; t++ ) {
|
||||||
if ( *_src == '\0' ) break;
|
if ( *_src == '\0' ) break;
|
||||||
str[t] = *_src++;
|
str[t] = *_src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
str[t] = '\0';
|
str[t] = '\0';
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* find the postion of the first appear of the given char.
|
* find the postion of the first appear of the given char.
|
||||||
* address of the char in the fstring will be return .
|
* address of the char in the fstring will be return .
|
||||||
* if not found NULL will be return .
|
* if not found NULL will be return .
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ fstring indexOf( fstring __str, char delimiter )
|
__STATIC_API__ fstring indexOf( fstring __str, char delimiter )
|
||||||
{
|
{
|
||||||
uint_t i, __length__;
|
uint_t i, __length__;
|
||||||
|
|
||||||
__length__ = strlen( __str );
|
__length__ = strlen( __str );
|
||||||
for ( i = 0; i < __length__; i++ ) {
|
for ( i = 0; i < __length__; i++ ) {
|
||||||
if ( __str[i] == delimiter )
|
if ( __str[i] == delimiter )
|
||||||
return __str + i;
|
return __str + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* load all the valid wors from a specified lexicon file .
|
* load all the valid wors from a specified lexicon file .
|
||||||
*
|
*
|
||||||
* @param dic friso dictionary instance (A hash array)
|
* @param dic friso dictionary instance (A hash array)
|
||||||
* @param lex the lexicon type
|
* @param lex the lexicon type
|
||||||
* @param lex_file the path of the lexicon file
|
* @param lex_file the path of the lexicon file
|
||||||
* @param length the maximum length of the word item
|
* @param length the maximum length of the word item
|
||||||
*/
|
*/
|
||||||
FRISO_API void friso_dic_load(
|
FRISO_API void friso_dic_load(
|
||||||
friso_t friso,
|
friso_t friso,
|
||||||
friso_config_t config,
|
friso_config_t config,
|
||||||
friso_lex_t lex,
|
friso_lex_t lex,
|
||||||
fstring lex_file,
|
fstring lex_file,
|
||||||
uint_t length )
|
uint_t length )
|
||||||
{
|
{
|
||||||
|
|
||||||
FILE * _stream;
|
FILE * _stream;
|
||||||
char __char[1024], _buffer[512];
|
char __char[1024], _buffer[512];
|
||||||
fstring _line;
|
fstring _line;
|
||||||
string_split_entry sse;
|
string_split_entry sse;
|
||||||
|
|
||||||
fstring _word;
|
fstring _word;
|
||||||
char _sbuffer[512];
|
char _sbuffer[512];
|
||||||
fstring _syn;
|
fstring _syn;
|
||||||
friso_array_t sywords;
|
friso_array_t sywords;
|
||||||
uint_t _fre;
|
uint_t _fre;
|
||||||
|
|
||||||
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
|
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
|
||||||
{
|
{
|
||||||
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
|
while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
|
||||||
{
|
{
|
||||||
//clear up the notes
|
//clear up the notes
|
||||||
//make sure the length of the line is greater than 1.
|
//make sure the length of the line is greater than 1.
|
||||||
//like the single '#' mark in stopwords dictionary.
|
//like the single '#' mark in stopwords dictionary.
|
||||||
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
|
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
|
||||||
|
|
||||||
//handle the stopwords.
|
//handle the stopwords.
|
||||||
if ( lex == __LEX_STOPWORDS__ )
|
if ( lex == __LEX_STOPWORDS__ )
|
||||||
{
|
{
|
||||||
//clean the chinese words that its length is greater than max length.
|
//clean the chinese words that its length is greater than max length.
|
||||||
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
|
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
|
||||||
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
|
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
|
||||||
string_copy_heap( _line, strlen(_line) ), NULL );
|
string_copy_heap( _line, strlen(_line) ), NULL );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//split the fstring with '/'.
|
//split the fstring with '/'.
|
||||||
string_split_reset( &sse, "/", _line);
|
string_split_reset( &sse, "/", _line);
|
||||||
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
|
if ( string_split_next( &sse, _buffer ) == NULL ) continue;
|
||||||
|
|
||||||
//1. get the word.
|
//1. get the word.
|
||||||
_word = string_copy_heap( _buffer, strlen(_buffer) );
|
_word = string_copy_heap( _buffer, strlen(_buffer) );
|
||||||
|
|
||||||
if ( string_split_next( &sse, _buffer ) == NULL )
|
if ( string_split_next( &sse, _buffer ) == NULL )
|
||||||
{
|
{
|
||||||
//normal lexicon type,
|
//normal lexicon type,
|
||||||
//add them to the dictionary directly
|
//add them to the dictionary directly
|
||||||
friso_dic_add( friso->dic, lex, _word, NULL );
|
friso_dic_add( friso->dic, lex, _word, NULL );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* filter out the words that its length is larger
|
* filter out the words that its length is larger
|
||||||
* than the specified limit.
|
* than the specified limit.
|
||||||
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
|
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
|
||||||
* and __LEX_CEM_WORDS__.
|
* and __LEX_CEM_WORDS__.
|
||||||
*/
|
*/
|
||||||
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
|
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
|
||||||
&& strlen( _word ) > length )
|
&& strlen( _word ) > length )
|
||||||
{
|
{
|
||||||
FRISO_FREE(_word);
|
FRISO_FREE(_word);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//2. get the synonyms words.
|
//2. get the synonyms words.
|
||||||
_syn = NULL;
|
_syn = NULL;
|
||||||
if ( strcmp( _buffer, "null" ) != 0 )
|
if ( strcmp( _buffer, "null" ) != 0 )
|
||||||
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
|
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
|
||||||
|
|
||||||
//3. get the word frequency if it available.
|
//3. get the word frequency if it available.
|
||||||
_fre = 0;
|
_fre = 0;
|
||||||
if ( string_split_next( &sse, _buffer ) != NULL )
|
if ( string_split_next( &sse, _buffer ) != NULL )
|
||||||
_fre = atoi( _buffer );
|
_fre = atoi( _buffer );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Here:
|
* Here:
|
||||||
* split the synonyms words with mark ","
|
* split the synonyms words with mark ","
|
||||||
* and put them in a array list if the synonyms is not NULL
|
* and put them in a array list if the synonyms is not NULL
|
||||||
*/
|
*/
|
||||||
sywords = NULL;
|
sywords = NULL;
|
||||||
if ( config->add_syn && _syn != NULL )
|
if ( config->add_syn && _syn != NULL )
|
||||||
{
|
{
|
||||||
string_split_reset( &sse, ",", _sbuffer );
|
string_split_reset( &sse, ",", _sbuffer );
|
||||||
sywords = new_array_list_with_opacity(5);
|
sywords = new_array_list_with_opacity(5);
|
||||||
while ( string_split_next( &sse, _buffer ) != NULL )
|
while ( string_split_next( &sse, _buffer ) != NULL )
|
||||||
{
|
{
|
||||||
if ( strlen(_buffer) > length ) continue;
|
if ( strlen(_buffer) > length ) continue;
|
||||||
array_list_add( sywords,
|
array_list_add( sywords,
|
||||||
string_copy_heap(_buffer, strlen(_buffer)) );
|
string_copy_heap(_buffer, strlen(_buffer)) );
|
||||||
}
|
}
|
||||||
sywords = array_list_trim( sywords );
|
sywords = array_list_trim( sywords );
|
||||||
}
|
}
|
||||||
|
|
||||||
//4. add the word item
|
//4. add the word item
|
||||||
friso_dic_add_with_fre(
|
friso_dic_add_with_fre(
|
||||||
friso->dic, lex, _word, sywords, _fre );
|
friso->dic, lex, _word, sywords, _fre );
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose( _stream );
|
fclose( _stream );
|
||||||
} else {
|
} else {
|
||||||
printf("Warning: Fail to open lexicon file %s\n", lex_file);
|
printf("Warning: Fail to open lexicon file %s\n", lex_file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the lexicon type index with the specified
|
* get the lexicon type index with the specified
|
||||||
* type keywords .
|
* type keywords .
|
||||||
*
|
*
|
||||||
* @see friso.h#friso_lex_t
|
* @see friso.h#friso_lex_t
|
||||||
* @param _key
|
* @param _key
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
|
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
|
||||||
{
|
{
|
||||||
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
|
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
|
||||||
return __LEX_CJK_WORDS__;
|
return __LEX_CJK_WORDS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
|
||||||
return __LEX_CJK_UNITS__;
|
return __LEX_CJK_UNITS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
|
||||||
return __LEX_ECM_WORDS__;
|
return __LEX_ECM_WORDS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
|
||||||
return __LEX_CEM_WORDS__;
|
return __LEX_CEM_WORDS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
|
||||||
return __LEX_CN_LNAME__;
|
return __LEX_CN_LNAME__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
|
||||||
return __LEX_CN_SNAME__;
|
return __LEX_CN_SNAME__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
|
||||||
return __LEX_CN_DNAME1__;
|
return __LEX_CN_DNAME1__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
|
||||||
return __LEX_CN_DNAME2__;
|
return __LEX_CN_DNAME2__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
|
||||||
return __LEX_CN_LNA__;
|
return __LEX_CN_LNA__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
|
||||||
return __LEX_STOPWORDS__;
|
return __LEX_STOPWORDS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
|
||||||
return __LEX_ENPUN_WORDS__;
|
return __LEX_ENPUN_WORDS__;
|
||||||
}
|
}
|
||||||
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
|
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
|
||||||
return __LEX_EN_WORDS__;
|
return __LEX_EN_WORDS__;
|
||||||
}
|
}
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* load the lexicon configuration file.
|
* load the lexicon configuration file.
|
||||||
* and load all the valid lexicon from the configuration file.
|
* and load all the valid lexicon from the configuration file.
|
||||||
*
|
*
|
||||||
* @param friso friso instance
|
* @param friso friso instance
|
||||||
* @param config friso_config instance
|
* @param config friso_config instance
|
||||||
* @param _path dictionary directory
|
* @param _path dictionary directory
|
||||||
* @param _limitts words length limit
|
* @param _limitts words length limit
|
||||||
*/
|
*/
|
||||||
FRISO_API void friso_dic_load_from_ifile(
|
FRISO_API void friso_dic_load_from_ifile(
|
||||||
friso_t friso,
|
friso_t friso,
|
||||||
friso_config_t config,
|
friso_config_t config,
|
||||||
fstring _path,
|
fstring _path,
|
||||||
uint_t _limits )
|
uint_t _limits )
|
||||||
{
|
{
|
||||||
|
|
||||||
//1.parse the configuration file.
|
//1.parse the configuration file.
|
||||||
FILE *__stream;
|
FILE *__stream;
|
||||||
char __chars__[1024], __key__[30], *__line__;
|
char __chars__[1024], __key__[30], *__line__;
|
||||||
uint_t __length__, i, t;
|
uint_t __length__, i, t;
|
||||||
friso_lex_t lex_t;
|
friso_lex_t lex_t;
|
||||||
string_buffer_t sb;
|
string_buffer_t sb;
|
||||||
|
|
||||||
//get the lexicon configruation file path
|
//get the lexicon configruation file path
|
||||||
sb = new_string_buffer();
|
sb = new_string_buffer();
|
||||||
string_buffer_append( sb, _path );
|
string_buffer_append( sb, _path );
|
||||||
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
|
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
|
||||||
//printf("%s\n", sb->buffer);
|
//printf("%s\n", sb->buffer);
|
||||||
|
|
||||||
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
|
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
|
||||||
{
|
{
|
||||||
while ( ( __line__ =
|
while ( ( __line__ =
|
||||||
file_get_line( __chars__, __stream ) ) != NULL )
|
file_get_line( __chars__, __stream ) ) != NULL )
|
||||||
{
|
{
|
||||||
//comment filter.
|
//comment filter.
|
||||||
if ( __line__[0] == '#' ) continue;
|
if ( __line__[0] == '#' ) continue;
|
||||||
if ( __line__[0] == '\0' ) continue;
|
if ( __line__[0] == '\0' ) continue;
|
||||||
|
|
||||||
__length__ = strlen( __line__ );
|
__length__ = strlen( __line__ );
|
||||||
//item start
|
//item start
|
||||||
if ( __line__[ __length__ - 1 ] == '[' )
|
if ( __line__[ __length__ - 1 ] == '[' )
|
||||||
{
|
{
|
||||||
//get the type key
|
//get the type key
|
||||||
for ( i = 0; i < __length__
|
for ( i = 0; i < __length__
|
||||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||||
for ( t = 0; i < __length__; i++,t++ ) {
|
for ( t = 0; i < __length__; i++,t++ ) {
|
||||||
if ( __line__[i] == ' '
|
if ( __line__[i] == ' '
|
||||||
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
|
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
|
||||||
__key__[t] = __line__[i];
|
__key__[t] = __line__[i];
|
||||||
}
|
}
|
||||||
__key__[t] = '\0';
|
__key__[t] = '\0';
|
||||||
|
|
||||||
//get the lexicon type
|
//get the lexicon type
|
||||||
lex_t = get_lexicon_type_with_constant(__key__);
|
lex_t = get_lexicon_type_with_constant(__key__);
|
||||||
if ( lex_t == -1 ) continue;
|
if ( lex_t == -1 ) continue;
|
||||||
|
|
||||||
//printf("key=%s, type=%d\n", __key__, lex_t );
|
//printf("key=%s, type=%d\n", __key__, lex_t );
|
||||||
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
|
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
|
||||||
{
|
{
|
||||||
//comments filter.
|
//comments filter.
|
||||||
if ( __line__[0] == '#' ) continue;
|
if ( __line__[0] == '#' ) continue;
|
||||||
if ( __line__[0] == '\0' ) continue;
|
if ( __line__[0] == '\0' ) continue;
|
||||||
|
|
||||||
__length__ = strlen( __line__ );
|
__length__ = strlen( __line__ );
|
||||||
if ( __line__[ __length__ - 1 ] == ']' ) break;
|
if ( __line__[ __length__ - 1 ] == ']' ) break;
|
||||||
|
|
||||||
for ( i = 0; i < __length__
|
for ( i = 0; i < __length__
|
||||||
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
|
||||||
for ( t = 0; i < __length__; i++,t++ ) {
|
for ( t = 0; i < __length__; i++,t++ ) {
|
||||||
if ( __line__[i] == ' '
|
if ( __line__[i] == ' '
|
||||||
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
|
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
|
||||||
__key__[t] = __line__[i];
|
__key__[t] = __line__[i];
|
||||||
}
|
}
|
||||||
__key__[t] = '\0';
|
__key__[t] = '\0';
|
||||||
|
|
||||||
//load the lexicon item from the lexicon file.
|
//load the lexicon item from the lexicon file.
|
||||||
string_buffer_clear( sb );
|
string_buffer_clear( sb );
|
||||||
string_buffer_append( sb, _path );
|
string_buffer_append( sb, _path );
|
||||||
string_buffer_append( sb, __key__ );
|
string_buffer_append( sb, __key__ );
|
||||||
//printf("key=%s, type=%d\n", __key__, lex_t);
|
//printf("key=%s, type=%d\n", __key__, lex_t);
|
||||||
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
|
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} //end while
|
} //end while
|
||||||
|
|
||||||
fclose( __stream );
|
fclose( __stream );
|
||||||
} else {
|
} else {
|
||||||
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
|
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_string_buffer(sb);
|
free_string_buffer(sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
//match the item.
|
//match the item.
|
||||||
FRISO_API int friso_dic_match(
|
FRISO_API int friso_dic_match(
|
||||||
friso_dic_t dic,
|
friso_dic_t dic,
|
||||||
friso_lex_t lex,
|
friso_lex_t lex,
|
||||||
fstring word )
|
fstring word )
|
||||||
{
|
{
|
||||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||||
return hash_exist_mapping( dic[lex], word );
|
return hash_exist_mapping( dic[lex], word );
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//get the lex_entry_t associated with the word.
|
//get the lex_entry_t associated with the word.
|
||||||
FRISO_API lex_entry_t friso_dic_get(
|
FRISO_API lex_entry_t friso_dic_get(
|
||||||
friso_dic_t dic,
|
friso_dic_t dic,
|
||||||
friso_lex_t lex,
|
friso_lex_t lex,
|
||||||
fstring word )
|
fstring word )
|
||||||
{
|
{
|
||||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||||
return ( lex_entry_t ) hash_get_value( dic[lex], word );
|
return ( lex_entry_t ) hash_get_value( dic[lex], word );
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
//get the size of the specified type dictionary.
|
//get the size of the specified type dictionary.
|
||||||
FRISO_API uint_t friso_spec_dic_size(
|
FRISO_API uint_t friso_spec_dic_size(
|
||||||
friso_dic_t dic,
|
friso_dic_t dic,
|
||||||
friso_lex_t lex )
|
friso_lex_t lex )
|
||||||
{
|
{
|
||||||
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
|
||||||
return hash_get_size( dic[lex] );
|
return hash_get_size( dic[lex] );
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//get size of the whole dictionary.
|
//get size of the whole dictionary.
|
||||||
FRISO_API uint_t friso_all_dic_size(
|
FRISO_API uint_t friso_all_dic_size(
|
||||||
friso_dic_t dic )
|
friso_dic_t dic )
|
||||||
{
|
{
|
||||||
register uint_t size = 0, t;
|
register uint_t size = 0, t;
|
||||||
|
|
||||||
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
|
||||||
size += hash_get_size( dic[t] );
|
size += hash_get_size( dic[t] );
|
||||||
}
|
}
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
126
src/friso_link.c
126
src/friso_link.c
@ -1,29 +1,29 @@
|
|||||||
/*
|
/*
|
||||||
* link list implemented functions
|
* link list implemented functions
|
||||||
* defined in header file "friso_API.h".
|
* defined in header file "friso_API.h".
|
||||||
* when the link_node is being deleted, here we just free
|
* when the link_node is being deleted, here we just free
|
||||||
* the allocation of the node, not the allcation of it's value.
|
* the allocation of the node, not the allcation of it's value.
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
//create a new link list node.
|
//create a new link list node.
|
||||||
__STATIC_API__ link_node_t new_node_entry(
|
__STATIC_API__ link_node_t new_node_entry(
|
||||||
void * value,
|
void * value,
|
||||||
link_node_t prev,
|
link_node_t prev,
|
||||||
link_node_t next )
|
link_node_t next )
|
||||||
{
|
{
|
||||||
link_node_t node = ( link_node_t )
|
link_node_t node = ( link_node_t )
|
||||||
FRISO_MALLOC( sizeof( link_node_entry ) );
|
FRISO_MALLOC( sizeof( link_node_entry ) );
|
||||||
if ( node == NULL ) {
|
if ( node == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
node->value = value;
|
node->value = value;
|
||||||
node->prev = prev;
|
node->prev = prev;
|
||||||
node->next = next;
|
node->next = next;
|
||||||
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
@ -32,14 +32,14 @@ __STATIC_API__ link_node_t new_node_entry(
|
|||||||
FRISO_API friso_link_t new_link_list( void )
|
FRISO_API friso_link_t new_link_list( void )
|
||||||
{
|
{
|
||||||
friso_link_t e = ( friso_link_t )
|
friso_link_t e = ( friso_link_t )
|
||||||
FRISO_MALLOC( sizeof( friso_link_entry ) );
|
FRISO_MALLOC( sizeof( friso_link_entry ) );
|
||||||
if ( e == NULL ) {
|
if ( e == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize the entry
|
//initialize the entry
|
||||||
e->head = new_node_entry( NULL, NULL, NULL );
|
e->head = new_node_entry( NULL, NULL, NULL );
|
||||||
e->tail = new_node_entry( NULL, e->head, NULL );
|
e->tail = new_node_entry( NULL, e->head, NULL );
|
||||||
e->head->next = e->tail;
|
e->head->next = e->tail;
|
||||||
e->size = 0;
|
e->size = 0;
|
||||||
|
|
||||||
@ -52,9 +52,9 @@ FRISO_API void free_link_list( friso_link_t link )
|
|||||||
link_node_t node, next;
|
link_node_t node, next;
|
||||||
for ( node = link->head; node != NULL; )
|
for ( node = link->head; node != NULL; )
|
||||||
{
|
{
|
||||||
next = node->next;
|
next = node->next;
|
||||||
FRISO_FREE( node );
|
FRISO_FREE( node );
|
||||||
node = next;
|
node = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_FREE( link );
|
FRISO_FREE( link );
|
||||||
@ -62,16 +62,16 @@ FRISO_API void free_link_list( friso_link_t link )
|
|||||||
|
|
||||||
//clear all nodes in the link list.
|
//clear all nodes in the link list.
|
||||||
FRISO_API friso_link_t link_list_clear(
|
FRISO_API friso_link_t link_list_clear(
|
||||||
friso_link_t link )
|
friso_link_t link )
|
||||||
{
|
{
|
||||||
link_node_t node, next;
|
link_node_t node, next;
|
||||||
//free all the middle nodes.
|
//free all the middle nodes.
|
||||||
for ( node = link->head->next;
|
for ( node = link->head->next;
|
||||||
node != link->tail; )
|
node != link->tail; )
|
||||||
{
|
{
|
||||||
next = node->next;
|
next = node->next;
|
||||||
FRISO_FREE( node );
|
FRISO_FREE( node );
|
||||||
node = next;
|
node = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
link->head->next = link->tail;
|
link->head->next = link->tail;
|
||||||
@ -97,22 +97,22 @@ FRISO_API friso_link_t link_list_clear(
|
|||||||
* static
|
* static
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ link_node_t get_node(
|
__STATIC_API__ link_node_t get_node(
|
||||||
friso_link_t link, uint_t idx )
|
friso_link_t link, uint_t idx )
|
||||||
{
|
{
|
||||||
link_node_t p = NULL;
|
link_node_t p = NULL;
|
||||||
register uint_t t;
|
register uint_t t;
|
||||||
|
|
||||||
if ( idx >= 0 && idx < link->size )
|
if ( idx >= 0 && idx < link->size )
|
||||||
{
|
{
|
||||||
if ( idx < link->size / 2 ) { //find from the head.
|
if ( idx < link->size / 2 ) { //find from the head.
|
||||||
p = link->head;
|
p = link->head;
|
||||||
for ( t = 0; t <= idx; t++ )
|
for ( t = 0; t <= idx; t++ )
|
||||||
p = p->next;
|
p = p->next;
|
||||||
} else { //find from the tail.
|
} else { //find from the tail.
|
||||||
p = link->tail;
|
p = link->tail;
|
||||||
for ( t = link->size; t > idx; t-- )
|
for ( t = link->size; t > idx; t-- )
|
||||||
p = p->prev;
|
p = p->prev;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
@ -123,9 +123,9 @@ __STATIC_API__ link_node_t get_node(
|
|||||||
* static
|
* static
|
||||||
*/
|
*/
|
||||||
//__STATIC_API__ void insert_before(
|
//__STATIC_API__ void insert_before(
|
||||||
// friso_link_t link,
|
// friso_link_t link,
|
||||||
// link_node_t node,
|
// link_node_t node,
|
||||||
// void * value )
|
// void * value )
|
||||||
//{
|
//{
|
||||||
// link_node_t e = new_node_entry( value, node->prev, node );
|
// link_node_t e = new_node_entry( value, node->prev, node );
|
||||||
// e->prev->next = e;
|
// e->prev->next = e;
|
||||||
@ -136,10 +136,10 @@ __STATIC_API__ link_node_t get_node(
|
|||||||
//}
|
//}
|
||||||
#define insert_before( link, node, value ) \
|
#define insert_before( link, node, value ) \
|
||||||
{ \
|
{ \
|
||||||
link_node_t e = new_node_entry( value, node->prev, node ); \
|
link_node_t e = new_node_entry( value, node->prev, node ); \
|
||||||
e->prev->next = e; \
|
e->prev->next = e; \
|
||||||
e->next->prev = e; \
|
e->next->prev = e; \
|
||||||
link->size++; \
|
link->size++; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -150,7 +150,7 @@ __STATIC_API__ link_node_t get_node(
|
|||||||
* @return the value of the removed node.
|
* @return the value of the removed node.
|
||||||
*/
|
*/
|
||||||
__STATIC_API__ void * remove_node(
|
__STATIC_API__ void * remove_node(
|
||||||
friso_link_t link, link_node_t node )
|
friso_link_t link, link_node_t node )
|
||||||
{
|
{
|
||||||
void * _value = node->value;
|
void * _value = node->value;
|
||||||
|
|
||||||
@ -166,18 +166,18 @@ __STATIC_API__ void * remove_node(
|
|||||||
|
|
||||||
//add a new node to the link list.(insert just before the tail)
|
//add a new node to the link list.(insert just before the tail)
|
||||||
FRISO_API void link_list_add(
|
FRISO_API void link_list_add(
|
||||||
friso_link_t link, void * value )
|
friso_link_t link, void * value )
|
||||||
{
|
{
|
||||||
insert_before( link, link->tail, value );
|
insert_before( link, link->tail, value );
|
||||||
}
|
}
|
||||||
|
|
||||||
//add a new node before the given index.
|
//add a new node before the given index.
|
||||||
FRISO_API void link_list_insert_before(
|
FRISO_API void link_list_insert_before(
|
||||||
friso_link_t link, uint_t idx, void * value )
|
friso_link_t link, uint_t idx, void * value )
|
||||||
{
|
{
|
||||||
link_node_t node = get_node( link, idx );
|
link_node_t node = get_node( link, idx );
|
||||||
if ( node != NULL ) {
|
if ( node != NULL ) {
|
||||||
insert_before( link, node, value );
|
insert_before( link, node, value );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -187,11 +187,11 @@ FRISO_API void link_list_insert_before(
|
|||||||
* @return the value of the node.
|
* @return the value of the node.
|
||||||
*/
|
*/
|
||||||
FRISO_API void * link_list_get(
|
FRISO_API void * link_list_get(
|
||||||
friso_link_t link, uint_t idx )
|
friso_link_t link, uint_t idx )
|
||||||
{
|
{
|
||||||
link_node_t node = get_node( link, idx );
|
link_node_t node = get_node( link, idx );
|
||||||
if ( node != NULL ) {
|
if ( node != NULL ) {
|
||||||
return node->value;
|
return node->value;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -199,20 +199,20 @@ FRISO_API void * link_list_get(
|
|||||||
/*
|
/*
|
||||||
* set the value of the node that located in the specified position.
|
* set the value of the node that located in the specified position.
|
||||||
* we did't free the allocation of the old value, we return it to you.
|
* we did't free the allocation of the old value, we return it to you.
|
||||||
* free it yourself when it is necessary.
|
* free it yourself when it is necessary.
|
||||||
*
|
*
|
||||||
* @return the old value.
|
* @return the old value.
|
||||||
*/
|
*/
|
||||||
FRISO_API void *link_list_set(
|
FRISO_API void *link_list_set(
|
||||||
friso_link_t link,
|
friso_link_t link,
|
||||||
uint_t idx, void * value )
|
uint_t idx, void * value )
|
||||||
{
|
{
|
||||||
link_node_t node = get_node( link, idx );
|
link_node_t node = get_node( link, idx );
|
||||||
void * _value = NULL;
|
void * _value = NULL;
|
||||||
|
|
||||||
if ( node != NULL ) {
|
if ( node != NULL ) {
|
||||||
_value = node->value;
|
_value = node->value;
|
||||||
node->value = value;
|
node->value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
return _value;
|
return _value;
|
||||||
@ -225,13 +225,13 @@ FRISO_API void *link_list_set(
|
|||||||
* @return the value of the node removed.
|
* @return the value of the node removed.
|
||||||
*/
|
*/
|
||||||
FRISO_API void *link_list_remove(
|
FRISO_API void *link_list_remove(
|
||||||
friso_link_t link, uint_t idx )
|
friso_link_t link, uint_t idx )
|
||||||
{
|
{
|
||||||
link_node_t node = get_node( link, idx );
|
link_node_t node = get_node( link, idx );
|
||||||
|
|
||||||
if ( node != NULL ) {
|
if ( node != NULL ) {
|
||||||
//printf("idx=%d, node->value=%s\n", idx, (string) node->value );
|
//printf("idx=%d, node->value=%s\n", idx, (string) node->value );
|
||||||
return remove_node( link, node );
|
return remove_node( link, node );
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -244,43 +244,43 @@ FRISO_API void *link_list_remove(
|
|||||||
* @return the value of the node removed.
|
* @return the value of the node removed.
|
||||||
*/
|
*/
|
||||||
FRISO_API void *link_list_remove_node(
|
FRISO_API void *link_list_remove_node(
|
||||||
friso_link_t link,
|
friso_link_t link,
|
||||||
link_node_t node )
|
link_node_t node )
|
||||||
{
|
{
|
||||||
return remove_node( link, node );
|
return remove_node( link, node );
|
||||||
}
|
}
|
||||||
|
|
||||||
//remove the first node after the head
|
//remove the first node after the head
|
||||||
FRISO_API void *link_list_remove_first(
|
FRISO_API void *link_list_remove_first(
|
||||||
friso_link_t link )
|
friso_link_t link )
|
||||||
{
|
{
|
||||||
if ( link->size > 0 ) {
|
if ( link->size > 0 ) {
|
||||||
return remove_node( link, link->head->next );
|
return remove_node( link, link->head->next );
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
//remove the last node just before the tail.
|
//remove the last node just before the tail.
|
||||||
FRISO_API void *link_list_remove_last(
|
FRISO_API void *link_list_remove_last(
|
||||||
friso_link_t link )
|
friso_link_t link )
|
||||||
{
|
{
|
||||||
if ( link->size > 0 ) {
|
if ( link->size > 0 ) {
|
||||||
return remove_node( link, link->tail->prev );
|
return remove_node( link, link->tail->prev );
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
//append a node from the tail.
|
//append a node from the tail.
|
||||||
FRISO_API void link_list_add_last(
|
FRISO_API void link_list_add_last(
|
||||||
friso_link_t link,
|
friso_link_t link,
|
||||||
void *value )
|
void *value )
|
||||||
{
|
{
|
||||||
insert_before( link, link->tail, value );
|
insert_before( link, link->tail, value );
|
||||||
}
|
}
|
||||||
|
|
||||||
//append a note just after the head.
|
//append a note just after the head.
|
||||||
FRISO_API void link_list_add_first(
|
FRISO_API void link_list_add_first(
|
||||||
friso_link_t link, void *value )
|
friso_link_t link, void *value )
|
||||||
{
|
{
|
||||||
insert_before( link, link->head->next, value );
|
insert_before( link, link->head->next, value );
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* utf-8 handle function implements.
|
* utf-8 handle function implements.
|
||||||
* you could modify it or re-release it but never for commercial use.
|
* you could modify it or re-release it but never for commercial use.
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
|
|
||||||
@ -11,14 +11,14 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
/* ******************************************
|
/* ******************************************
|
||||||
* fstring buffer functions implements. *
|
* fstring buffer functions implements. *
|
||||||
********************************************/
|
********************************************/
|
||||||
/**
|
/**
|
||||||
* create a new buffer
|
* create a new buffer
|
||||||
* @Note:
|
* @Note:
|
||||||
* 1. it's real length is 1 byte greater than the specifield value
|
* 1. it's real length is 1 byte greater than the specifield value
|
||||||
* 2. we did not do any optimization for the memory allocation to ...
|
* 2. we did not do any optimization for the memory allocation to ...
|
||||||
* avoid the memory defragmentation.
|
* avoid the memory defragmentation.
|
||||||
*
|
*
|
||||||
* @date: 2014-10-16
|
* @date: 2014-10-16
|
||||||
*/
|
*/
|
||||||
@ -26,7 +26,7 @@ __STATIC_API__ fstring create_buffer( uint_t length )
|
|||||||
{
|
{
|
||||||
fstring buffer = ( fstring ) FRISO_MALLOC( length + 1 );
|
fstring buffer = ( fstring ) FRISO_MALLOC( length + 1 );
|
||||||
if ( buffer == NULL ) {
|
if ( buffer == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
memset( buffer, 0x00, length + 1 );
|
memset( buffer, 0x00, length + 1 );
|
||||||
@ -36,7 +36,7 @@ __STATIC_API__ fstring create_buffer( uint_t length )
|
|||||||
|
|
||||||
//the __allocs should not be smaller than sb->length
|
//the __allocs should not be smaller than sb->length
|
||||||
__STATIC_API__ string_buffer_t resize_buffer(
|
__STATIC_API__ string_buffer_t resize_buffer(
|
||||||
string_buffer_t sb, uint_t __allocs )
|
string_buffer_t sb, uint_t __allocs )
|
||||||
{
|
{
|
||||||
//create a new buffer.
|
//create a new buffer.
|
||||||
//if ( __allocs < sb->length ) __allocs = sb->length + 1;
|
//if ( __allocs < sb->length ) __allocs = sb->length + 1;
|
||||||
@ -44,7 +44,7 @@ __STATIC_API__ string_buffer_t resize_buffer(
|
|||||||
|
|
||||||
//register uint_t t;
|
//register uint_t t;
|
||||||
//for ( t = 0; t < sb->length; t++ ) {
|
//for ( t = 0; t < sb->length; t++ ) {
|
||||||
// str[t] = sb->buffer[t];
|
// str[t] = sb->buffer[t];
|
||||||
//}
|
//}
|
||||||
memcpy( str, sb->buffer, sb->length );
|
memcpy( str, sb->buffer, sb->length );
|
||||||
FRISO_FREE( sb->buffer );
|
FRISO_FREE( sb->buffer );
|
||||||
@ -65,9 +65,9 @@ __STATIC_API__ string_buffer_t resize_buffer(
|
|||||||
FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity )
|
FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity )
|
||||||
{
|
{
|
||||||
string_buffer_t sb = ( string_buffer_t )
|
string_buffer_t sb = ( string_buffer_t )
|
||||||
FRISO_MALLOC( sizeof( string_buffer_entry ) );
|
FRISO_MALLOC( sizeof( string_buffer_entry ) );
|
||||||
if ( sb == NULL ) {
|
if ( sb == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->buffer = create_buffer( opacity );
|
sb->buffer = create_buffer( opacity );
|
||||||
@ -82,9 +82,9 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
|
|||||||
{
|
{
|
||||||
//buffer allocations.
|
//buffer allocations.
|
||||||
string_buffer_t sb = ( string_buffer_t )
|
string_buffer_t sb = ( string_buffer_t )
|
||||||
FRISO_MALLOC( sizeof( string_buffer_entry ) );
|
FRISO_MALLOC( sizeof( string_buffer_entry ) );
|
||||||
if ( sb == NULL ) {
|
if ( sb == NULL ) {
|
||||||
___ALLOCATION_ERROR___
|
___ALLOCATION_ERROR___
|
||||||
}
|
}
|
||||||
|
|
||||||
//initialize
|
//initialize
|
||||||
@ -95,7 +95,7 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
|
|||||||
//register uint_t t;
|
//register uint_t t;
|
||||||
//copy the str to the buffer.
|
//copy the str to the buffer.
|
||||||
//for ( t = 0; t < sb->length; t++ ) {
|
//for ( t = 0; t < sb->length; t++ ) {
|
||||||
// sb->buffer[t] = str[t];
|
// sb->buffer[t] = str[t];
|
||||||
//}
|
//}
|
||||||
memcpy( sb->buffer, str, sb->length );
|
memcpy( sb->buffer, str, sb->length );
|
||||||
|
|
||||||
@ -103,66 +103,66 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
|
|||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_buffer_append(
|
FRISO_API void string_buffer_append(
|
||||||
string_buffer_t sb, fstring __str )
|
string_buffer_t sb, fstring __str )
|
||||||
{
|
{
|
||||||
register uint_t __len__ = strlen( __str );
|
register uint_t __len__ = strlen( __str );
|
||||||
|
|
||||||
//check the necessity to resize the buffer.
|
//check the necessity to resize the buffer.
|
||||||
if ( sb->length + __len__ > sb->allocs ) {
|
if ( sb->length + __len__ > sb->allocs ) {
|
||||||
sb = resize_buffer( sb, ( sb->length + __len__ ) * 2 + 1 );
|
sb = resize_buffer( sb, ( sb->length + __len__ ) * 2 + 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
//register uint_t t;
|
//register uint_t t;
|
||||||
////copy the __str to the buffer.
|
////copy the __str to the buffer.
|
||||||
//for ( t = 0; t < __len__; t++ ) {
|
//for ( t = 0; t < __len__; t++ ) {
|
||||||
// sb->buffer[ sb->length++ ] = __str[t];
|
// sb->buffer[ sb->length++ ] = __str[t];
|
||||||
//}
|
//}
|
||||||
memcpy( sb->buffer + sb->length, __str, __len__ );
|
memcpy( sb->buffer + sb->length, __str, __len__ );
|
||||||
sb->length += __len__;
|
sb->length += __len__;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_buffer_append_char(
|
FRISO_API void string_buffer_append_char(
|
||||||
string_buffer_t sb, char ch )
|
string_buffer_t sb, char ch )
|
||||||
{
|
{
|
||||||
//check the necessity to resize the buffer.
|
//check the necessity to resize the buffer.
|
||||||
if ( sb->length + 1 > sb->allocs ) {
|
if ( sb->length + 1 > sb->allocs ) {
|
||||||
sb = resize_buffer( sb, sb->length * 2 + 1 );
|
sb = resize_buffer( sb, sb->length * 2 + 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->buffer[sb->length++] = ch;
|
sb->buffer[sb->length++] = ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_buffer_insert(
|
FRISO_API void string_buffer_insert(
|
||||||
string_buffer_t sb,
|
string_buffer_t sb,
|
||||||
uint_t idx,
|
uint_t idx,
|
||||||
fstring __str )
|
fstring __str )
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* remove the given bytes from the buffer start from idx.
|
* remove the given bytes from the buffer start from idx.
|
||||||
* this will cause the byte move after the idx+length.
|
* this will cause the byte move after the idx+length.
|
||||||
*
|
*
|
||||||
* @return the new string.
|
* @return the new string.
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring string_buffer_remove(
|
FRISO_API fstring string_buffer_remove(
|
||||||
string_buffer_t sb,
|
string_buffer_t sb,
|
||||||
uint_t idx,
|
uint_t idx,
|
||||||
uint_t length )
|
uint_t length )
|
||||||
{
|
{
|
||||||
uint_t t;
|
uint_t t;
|
||||||
//move the bytes after the idx + length
|
//move the bytes after the idx + length
|
||||||
for ( t = idx + length; t < sb->length; t++ ) {
|
for ( t = idx + length; t < sb->length; t++ ) {
|
||||||
sb->buffer[t - length] = sb->buffer[t];
|
sb->buffer[t - length] = sb->buffer[t];
|
||||||
}
|
}
|
||||||
sb->buffer[t] = '\0';
|
sb->buffer[t] = '\0';
|
||||||
//memcpy( sb->buffer + idx,
|
//memcpy( sb->buffer + idx,
|
||||||
// sb->buffer + idx + length,
|
// sb->buffer + idx + length,
|
||||||
// sb->length - idx - length );
|
// sb->length - idx - length );
|
||||||
|
|
||||||
t = sb->length - idx;
|
t = sb->length - idx;
|
||||||
if ( t > 0 ) {
|
if ( t > 0 ) {
|
||||||
sb->length -= ( t > length ) ? length : t;
|
sb->length -= ( t > length ) ? length : t;
|
||||||
}
|
}
|
||||||
sb->buffer[sb->length-1] = '\0';
|
sb->buffer[sb->length-1] = '\0';
|
||||||
|
|
||||||
@ -171,13 +171,13 @@ FRISO_API fstring string_buffer_remove(
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* turn the string_buffer to a string.
|
* turn the string_buffer to a string.
|
||||||
* or return the buffer of the string_buffer.
|
* or return the buffer of the string_buffer.
|
||||||
*/
|
*/
|
||||||
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
|
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
|
||||||
{
|
{
|
||||||
//resize the buffer.
|
//resize the buffer.
|
||||||
if ( sb->length < sb->allocs - 1 ) {
|
if ( sb->length < sb->allocs - 1 ) {
|
||||||
sb = resize_buffer( sb, sb->length + 1 );
|
sb = resize_buffer( sb, sb->length + 1 );
|
||||||
}
|
}
|
||||||
return sb;
|
return sb;
|
||||||
}
|
}
|
||||||
@ -185,8 +185,8 @@ FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
|
|||||||
/*
|
/*
|
||||||
* free the given fstring buffer.
|
* free the given fstring buffer.
|
||||||
* and this function will not free the allocations of the
|
* and this function will not free the allocations of the
|
||||||
* string_buffer_t->buffer, we return it to you, if there is
|
* string_buffer_t->buffer, we return it to you, if there is
|
||||||
* a necessary you could free it youself by calling free();
|
* a necessary you could free it youself by calling free();
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring string_buffer_devote( string_buffer_t sb )
|
FRISO_API fstring string_buffer_devote( string_buffer_t sb )
|
||||||
{
|
{
|
||||||
@ -197,7 +197,7 @@ FRISO_API fstring string_buffer_devote( string_buffer_t sb )
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* clear the given fstring buffer.
|
* clear the given fstring buffer.
|
||||||
* reset its buffer with 0 and reset its length to 0.
|
* reset its buffer with 0 and reset its length to 0.
|
||||||
*/
|
*/
|
||||||
FRISO_API void string_buffer_clear( string_buffer_t sb )
|
FRISO_API void string_buffer_clear( string_buffer_t sb )
|
||||||
{
|
{
|
||||||
@ -216,17 +216,17 @@ FRISO_API void free_string_buffer( string_buffer_t sb )
|
|||||||
/**
|
/**
|
||||||
* create a new string_split_entry.
|
* create a new string_split_entry.
|
||||||
*
|
*
|
||||||
* @param source
|
* @param source
|
||||||
* @return string_split_t;
|
* @return string_split_t;
|
||||||
*/
|
*/
|
||||||
FRISO_API string_split_t new_string_split(
|
FRISO_API string_split_t new_string_split(
|
||||||
fstring delimiter,
|
fstring delimiter,
|
||||||
fstring source )
|
fstring source )
|
||||||
{
|
{
|
||||||
string_split_t e = ( string_split_t )
|
string_split_t e = ( string_split_t )
|
||||||
FRISO_MALLOC( sizeof( string_split_entry ) );
|
FRISO_MALLOC( sizeof( string_split_entry ) );
|
||||||
if ( e == NULL ) {
|
if ( e == NULL ) {
|
||||||
___ALLOCATION_ERROR___;
|
___ALLOCATION_ERROR___;
|
||||||
}
|
}
|
||||||
|
|
||||||
e->delimiter = delimiter;
|
e->delimiter = delimiter;
|
||||||
@ -239,19 +239,19 @@ FRISO_API string_split_t new_string_split(
|
|||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_split_reset(
|
FRISO_API void string_split_reset(
|
||||||
string_split_t sst,
|
string_split_t sst,
|
||||||
fstring delimiter,
|
fstring delimiter,
|
||||||
fstring source )
|
fstring source )
|
||||||
{
|
{
|
||||||
sst->delimiter = delimiter;
|
sst->delimiter = delimiter;
|
||||||
sst->delLen = strlen(delimiter);
|
sst->delLen = strlen(delimiter);
|
||||||
sst->source = source;
|
sst->source = source;
|
||||||
sst->srcLen = strlen(source);
|
sst->srcLen = strlen(source);
|
||||||
sst->idx = 0;
|
sst->idx = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_split_set_source(
|
FRISO_API void string_split_set_source(
|
||||||
string_split_t sst, fstring source )
|
string_split_t sst, fstring source )
|
||||||
{
|
{
|
||||||
sst->source = source;
|
sst->source = source;
|
||||||
sst->srcLen = strlen(source);
|
sst->srcLen = strlen(source);
|
||||||
@ -259,7 +259,7 @@ FRISO_API void string_split_set_source(
|
|||||||
}
|
}
|
||||||
|
|
||||||
FRISO_API void string_split_set_delimiter(
|
FRISO_API void string_split_set_delimiter(
|
||||||
string_split_t sst, fstring delimiter )
|
string_split_t sst, fstring delimiter )
|
||||||
{
|
{
|
||||||
sst->delimiter = delimiter;
|
sst->delimiter = delimiter;
|
||||||
sst->delLen = strlen( delimiter );
|
sst->delLen = strlen( delimiter );
|
||||||
@ -273,15 +273,15 @@ FRISO_API void free_string_split( string_split_t sst )
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* get the next split fstring, and copy the
|
* get the next split fstring, and copy the
|
||||||
* splited fstring into the __dst buffer .
|
* splited fstring into the __dst buffer .
|
||||||
*
|
*
|
||||||
* @param string_split_t
|
* @param string_split_t
|
||||||
* @param __dst
|
* @param __dst
|
||||||
* @return fstring (NULL if reach the end of the source
|
* @return fstring (NULL if reach the end of the source
|
||||||
* or there is no more segmentation)
|
* or there is no more segmentation)
|
||||||
*/
|
*/
|
||||||
FRISO_API fstring string_split_next(
|
FRISO_API fstring string_split_next(
|
||||||
string_split_t sst, fstring __dst)
|
string_split_t sst, fstring __dst)
|
||||||
{
|
{
|
||||||
uint_t i, _ok;
|
uint_t i, _ok;
|
||||||
fstring _dst = __dst;
|
fstring _dst = __dst;
|
||||||
@ -291,28 +291,28 @@ FRISO_API fstring string_split_next(
|
|||||||
|
|
||||||
while ( 1 )
|
while ( 1 )
|
||||||
{
|
{
|
||||||
_ok = 1;
|
_ok = 1;
|
||||||
for ( i = 0; i < sst->delLen
|
for ( i = 0; i < sst->delLen
|
||||||
&& (sst->idx + i < sst->srcLen); i++ )
|
&& (sst->idx + i < sst->srcLen); i++ )
|
||||||
{
|
{
|
||||||
if ( sst->source[sst->idx+i] != sst->delimiter[i] )
|
if ( sst->source[sst->idx+i] != sst->delimiter[i] )
|
||||||
{
|
{
|
||||||
_ok = 0;
|
_ok = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//find the delimiter here,
|
//find the delimiter here,
|
||||||
//break the loop and self plus the sst->idx, then return the buffer .
|
//break the loop and self plus the sst->idx, then return the buffer .
|
||||||
if ( _ok == 1 ) {
|
if ( _ok == 1 ) {
|
||||||
sst->idx += sst->delLen;
|
sst->idx += sst->delLen;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//coy the char to the buffer
|
//coy the char to the buffer
|
||||||
*_dst++ = sst->source[sst->idx++];
|
*_dst++ = sst->source[sst->idx++];
|
||||||
//check if reach the end of the fstring
|
//check if reach the end of the fstring
|
||||||
if ( sst->idx >= sst->srcLen ) break;
|
if ( sst->idx >= sst->srcLen ) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*_dst = '\0';
|
*_dst = '\0';
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* dynamatic array test program.
|
* dynamatic array test program.
|
||||||
*
|
*
|
||||||
* @author chenxin
|
* @author chenxin
|
||||||
* @email chenxin619315@gmail.com
|
* @email chenxin619315@gmail.com
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
|
|
||||||
@ -10,42 +10,42 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
int main( int argc, char **args ) {
|
int main( int argc, char **args ) {
|
||||||
|
|
||||||
//create a new array list.
|
//create a new array list.
|
||||||
friso_array_t array = new_array_list();
|
friso_array_t array = new_array_list();
|
||||||
fstring keys[] = {
|
fstring keys[] = {
|
||||||
"chenmanwen", "yangqinghua",
|
"chenmanwen", "yangqinghua",
|
||||||
"chenxin", "luojiangyan", "xiaoyanzi", "bibi",
|
"chenxin", "luojiangyan", "xiaoyanzi", "bibi",
|
||||||
"zhangrenfang", "yangjian",
|
"zhangrenfang", "yangjian",
|
||||||
"liuxiao", "pankai",
|
"liuxiao", "pankai",
|
||||||
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
|
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
|
||||||
"caizaili", "panpan", "xiaolude", "yintanwen"
|
"caizaili", "panpan", "xiaolude", "yintanwen"
|
||||||
};
|
};
|
||||||
int j, idx = 2, len = sizeof( keys ) / sizeof( fstring );
|
int j, idx = 2, len = sizeof( keys ) / sizeof( fstring );
|
||||||
|
|
||||||
for ( j = 0; j < len; j++ ) {
|
for ( j = 0; j < len; j++ ) {
|
||||||
array_list_add( array, keys[j] );
|
array_list_add( array, keys[j] );
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("length=%d, allocations=%d\n", array->length, array->allocs );
|
printf("length=%d, allocations=%d\n", array->length, array->allocs );
|
||||||
array_list_trim( array );
|
array_list_trim( array );
|
||||||
printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs );
|
printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs );
|
||||||
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
||||||
|
|
||||||
printf("\nAfter set %dth item.\n", idx );
|
printf("\nAfter set %dth item.\n", idx );
|
||||||
array_list_set( array, idx, "chenxin__" );
|
array_list_set( array, idx, "chenxin__" );
|
||||||
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
||||||
|
|
||||||
printf("\nAfter remove %dth item.\n", idx );
|
printf("\nAfter remove %dth item.\n", idx );
|
||||||
array_list_remove( array, idx );
|
array_list_remove( array, idx );
|
||||||
printf("length=%d, allocations=%d\n", array->length, array->allocs );
|
printf("length=%d, allocations=%d\n", array->length, array->allocs );
|
||||||
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
||||||
|
|
||||||
printf("\nInsert a item at %dth\n", idx );
|
printf("\nInsert a item at %dth\n", idx );
|
||||||
array_list_insert( array, idx, "*chenxin*" );
|
array_list_insert( array, idx, "*chenxin*" );
|
||||||
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
|
||||||
|
|
||||||
free_array_list( array );
|
free_array_list( array );
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
190
src/tst-friso.c
190
src/tst-friso.c
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* Friso test program.
|
* Friso test program.
|
||||||
* Of couse you can make it a perfect demo for friso.
|
* Of couse you can make it a perfect demo for friso.
|
||||||
* all threads or proccess share the same friso_t,
|
* all threads or proccess share the same friso_t,
|
||||||
* defferent threads/proccess use defferent friso_task_t.
|
* defferent threads/proccess use defferent friso_task_t.
|
||||||
* and you could share the friso_config_t if you wish...
|
* and you could share the friso_config_t if you wish...
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
@ -17,33 +17,33 @@
|
|||||||
|
|
||||||
#define __LENGTH__ 15
|
#define __LENGTH__ 15
|
||||||
#define __INPUT_LENGTH__ 20480
|
#define __INPUT_LENGTH__ 20480
|
||||||
#define ___EXIT_INFO___ \
|
#define ___EXIT_INFO___ \
|
||||||
println("Thanks for trying friso."); \
|
println("Thanks for trying friso."); \
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#define ___ABOUT___ \
|
#define ___ABOUT___ \
|
||||||
println("+-----------------------------------------------------------+"); \
|
println("+-----------------------------------------------------------+"); \
|
||||||
println("| friso - a chinese word segmentation writen by c. |"); \
|
println("| friso - a chinese word segmentation writen by c. |"); \
|
||||||
println("| bug report email - chenxin619315@gmail.com. |"); \
|
println("| bug report email - chenxin619315@gmail.com. |"); \
|
||||||
println("| or: visit http://code.google.com/p/friso. |"); \
|
println("| or: visit http://code.google.com/p/friso. |"); \
|
||||||
println("| java edition for http://code.google.com/p/jcseg |"); \
|
println("| java edition for http://code.google.com/p/jcseg |"); \
|
||||||
println("| type 'quit' to exit the program. |"); \
|
println("| type 'quit' to exit the program. |"); \
|
||||||
println("+-----------------------------------------------------------+");
|
println("+-----------------------------------------------------------+");
|
||||||
|
|
||||||
//read a line from a command line.
|
//read a line from a command line.
|
||||||
static fstring getLine( FILE *fp, fstring __dst )
|
static fstring getLine( FILE *fp, fstring __dst )
|
||||||
{
|
{
|
||||||
register int c;
|
register int c;
|
||||||
register fstring cs;
|
register fstring cs;
|
||||||
|
|
||||||
cs = __dst;
|
cs = __dst;
|
||||||
while ( ( c = getc( fp ) ) != EOF ) {
|
while ( ( c = getc( fp ) ) != EOF ) {
|
||||||
if ( c == '\n' ) break;
|
if ( c == '\n' ) break;
|
||||||
*cs++ = c;
|
*cs++ = c;
|
||||||
}
|
}
|
||||||
*cs = '\0';
|
*cs = '\0';
|
||||||
|
|
||||||
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
return ( c == EOF && cs == __dst ) ? NULL : __dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static void printcode( fstring str ) {
|
/*static void printcode( fstring str ) {
|
||||||
@ -59,94 +59,94 @@ static fstring getLine( FILE *fp, fstring __dst )
|
|||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
|
||||||
clock_t s_time, e_time;
|
clock_t s_time, e_time;
|
||||||
char line[__INPUT_LENGTH__] = {0};
|
char line[__INPUT_LENGTH__] = {0};
|
||||||
int i;
|
int i;
|
||||||
fstring __path__ = NULL, mode = NULL;
|
fstring __path__ = NULL, mode = NULL;
|
||||||
|
|
||||||
friso_t friso;
|
friso_t friso;
|
||||||
friso_config_t config;
|
friso_config_t config;
|
||||||
friso_task_t task;
|
friso_task_t task;
|
||||||
|
|
||||||
//get the lexicon directory
|
//get the lexicon directory
|
||||||
for ( i = 0; i < argc; i++ ) {
|
for ( i = 0; i < argc; i++ ) {
|
||||||
if ( strcasecmp( "-init", argv[i] ) == 0 ) {
|
if ( strcasecmp( "-init", argv[i] ) == 0 ) {
|
||||||
__path__ = argv[i+1];
|
__path__ = argv[i+1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( __path__ == NULL ) {
|
if ( __path__ == NULL ) {
|
||||||
println("Usage: friso -init lexicon path");
|
println("Usage: friso -init lexicon path");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
s_time = clock();
|
s_time = clock();
|
||||||
|
|
||||||
//initialize
|
//initialize
|
||||||
friso = friso_new();
|
friso = friso_new();
|
||||||
config = friso_new_config();
|
config = friso_new_config();
|
||||||
/*friso_dic_t dic = friso_dic_new();
|
/*friso_dic_t dic = friso_dic_new();
|
||||||
friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
|
friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
|
||||||
friso_set_dic( friso, dic );
|
friso_set_dic( friso, dic );
|
||||||
friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
|
friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
|
||||||
if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
|
if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
|
||||||
printf("fail to initialize friso and config.");
|
printf("fail to initialize friso and config.");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ( config->mode )
|
switch ( config->mode )
|
||||||
{
|
{
|
||||||
case __FRISO_SIMPLE_MODE__:
|
case __FRISO_SIMPLE_MODE__:
|
||||||
mode = "Simple";
|
mode = "Simple";
|
||||||
break;
|
break;
|
||||||
case __FRISO_COMPLEX_MODE__:
|
case __FRISO_COMPLEX_MODE__:
|
||||||
mode = "Complex";
|
mode = "Complex";
|
||||||
break;
|
break;
|
||||||
case __FRISO_DETECT_MODE__:
|
case __FRISO_DETECT_MODE__:
|
||||||
mode = "Detect";
|
mode = "Detect";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//friso_set_mode( config, __FRISO_DETECT_MODE__ );
|
//friso_set_mode( config, __FRISO_DETECT_MODE__ );
|
||||||
//printf("clr_stw=%d\n", friso->clr_stw);
|
//printf("clr_stw=%d\n", friso->clr_stw);
|
||||||
//printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
|
//printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
|
||||||
//printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));
|
//printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));
|
||||||
|
|
||||||
e_time = clock();
|
e_time = clock();
|
||||||
|
|
||||||
printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
|
printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
|
||||||
printf("Mode: %s\n", mode);
|
printf("Mode: %s\n", mode);
|
||||||
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
|
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
|
||||||
___ABOUT___;
|
___ABOUT___;
|
||||||
|
|
||||||
//set the task.
|
//set the task.
|
||||||
task = friso_new_task();
|
task = friso_new_task();
|
||||||
|
|
||||||
while ( 1 )
|
while ( 1 )
|
||||||
{
|
{
|
||||||
print("friso>> ");
|
print("friso>> ");
|
||||||
getLine( stdin, line );
|
getLine( stdin, line );
|
||||||
//exit the programe
|
//exit the programe
|
||||||
if ( strcasecmp( line, "quit" ) == 0 ) {
|
if ( strcasecmp( line, "quit" ) == 0 ) {
|
||||||
___EXIT_INFO___
|
___EXIT_INFO___
|
||||||
}
|
}
|
||||||
|
|
||||||
//for ( i = 0; i < 1000000; i++ ) {
|
//for ( i = 0; i < 1000000; i++ ) {
|
||||||
//set the task text.
|
//set the task text.
|
||||||
friso_set_text( task, line );
|
friso_set_text( task, line );
|
||||||
println("分词结果:");
|
println("分词结果:");
|
||||||
|
|
||||||
s_time = clock();
|
s_time = clock();
|
||||||
while ( ( config->next_token( friso, config, task ) ) != NULL )
|
while ( ( config->next_token( friso, config, task ) ) != NULL )
|
||||||
{
|
{
|
||||||
//printf("%s[%d, %d, %d] ", task->token->word,
|
//printf("%s[%d, %d, %d] ", task->token->word,
|
||||||
// task->token->offset, task->token->length, task->token->rlen );
|
// task->token->offset, task->token->length, task->token->rlen );
|
||||||
printf("%s ", task->token->word );
|
printf("%s ", task->token->word );
|
||||||
}
|
}
|
||||||
//}
|
//}
|
||||||
e_time = clock();
|
e_time = clock();
|
||||||
printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );
|
printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
friso_free_task( task );
|
friso_free_task( task );
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/**
|
/**
|
||||||
* File Explain.
|
* File Explain.
|
||||||
*
|
*
|
||||||
* @author chenxin
|
* @author chenxin
|
||||||
* @see http://www.webssky.com
|
* @see http://www.webssky.com
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
|
|
||||||
@ -10,28 +10,28 @@
|
|||||||
|
|
||||||
void print_hash_info( friso_hash_t _hash ) {
|
void print_hash_info( friso_hash_t _hash ) {
|
||||||
printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \
|
printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \
|
||||||
_hash->size, _hash->factor, _hash->threshold);
|
_hash->size, _hash->factor, _hash->threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
friso_hash_t _hash = new_hash_table();
|
friso_hash_t _hash = new_hash_table();
|
||||||
char *names[] = {
|
char *names[] = {
|
||||||
"陈满文", "阳清华",
|
"陈满文", "阳清华",
|
||||||
"陈鑫", "罗江艳",
|
"陈鑫", "罗江艳",
|
||||||
"小燕子", "比比",
|
"小燕子", "比比",
|
||||||
"张仁芳", "阳建",
|
"张仁芳", "阳建",
|
||||||
"陈配", "李恒",
|
"陈配", "李恒",
|
||||||
"张志刚", "张怡少",
|
"张志刚", "张怡少",
|
||||||
"阳江波", "蔡再利",
|
"阳江波", "蔡再利",
|
||||||
"阳绘章", "尹唐文",
|
"阳绘章", "尹唐文",
|
||||||
"谭志鹏", "肖路德",
|
"谭志鹏", "肖路德",
|
||||||
"潘凯", "刘潇",
|
"潘凯", "刘潇",
|
||||||
"马朝辉", "张强",
|
"马朝辉", "张强",
|
||||||
"殷美林", "元明清",
|
"殷美林", "元明清",
|
||||||
"周安", "郭桥安",
|
"周安", "郭桥安",
|
||||||
"刘敏", "黄广华",
|
"刘敏", "黄广华",
|
||||||
"李胜", "黄海清"
|
"李胜", "黄海清"
|
||||||
};
|
};
|
||||||
//char *str[] = {"陈鑫", "张仁芳", "比比"};
|
//char *str[] = {"陈鑫", "张仁芳", "比比"};
|
||||||
char **str = names;
|
char **str = names;
|
||||||
@ -39,7 +39,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
print_hash_info( _hash );
|
print_hash_info( _hash );
|
||||||
for ( j = 0; j < len; j++) {
|
for ( j = 0; j < len; j++) {
|
||||||
hash_put_mapping( _hash, names[j], names[j] );
|
hash_put_mapping( _hash, names[j], names[j] );
|
||||||
}
|
}
|
||||||
|
|
||||||
print_hash_info( _hash );
|
print_hash_info( _hash );
|
||||||
@ -49,11 +49,11 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
//remove mappings
|
//remove mappings
|
||||||
for ( j = 0; j < len; j++ ) {
|
for ( j = 0; j < len; j++ ) {
|
||||||
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
|
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
|
||||||
printf("Now, remove %s\n", str[j]);
|
printf("Now, remove %s\n", str[j]);
|
||||||
hash_remove_mapping( _hash, str[j] );
|
hash_remove_mapping( _hash, str[j] );
|
||||||
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
|
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
|
||||||
printf("*********************************\n");
|
printf("*********************************\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("Press any key to continue.");
|
printf("Press any key to continue.");
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* lex functions test program.
|
* lex functions test program.
|
||||||
*
|
*
|
||||||
* @author chenxin
|
* @author chenxin
|
||||||
* @see http://www.webssky.com
|
* @see http://www.webssky.com
|
||||||
*/
|
*/
|
||||||
#include "friso.h"
|
#include "friso.h"
|
||||||
|
|
||||||
@ -11,10 +11,10 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#define __LENGTH__ 15
|
#define __LENGTH__ 15
|
||||||
#define ___PRINT_HELP_INFO___ \
|
#define ___PRINT_HELP_INFO___ \
|
||||||
printf("1. help print the current menu.\n"); \
|
printf("1. help print the current menu.\n"); \
|
||||||
printf("2. #set set the classify of the dictionary.\n"); \
|
printf("2. #set set the classify of the dictionary.\n"); \
|
||||||
printf("3. other search the words in the dictionary.\n"); \
|
printf("3. other search the words in the dictionary.\n"); \
|
||||||
printf("4. quit exit the programe.\n");
|
printf("4. quit exit the programe.\n");
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
@ -62,30 +62,30 @@ int main(int argc, char **argv)
|
|||||||
e_time = clock();
|
e_time = clock();
|
||||||
|
|
||||||
printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \
|
printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \
|
||||||
friso_all_dic_size( friso->dic ) );
|
friso_all_dic_size( friso->dic ) );
|
||||||
|
|
||||||
while ( 1 ) {
|
while ( 1 ) {
|
||||||
printf("friso-%d>> ", lex);
|
printf("friso-%d>> ", lex);
|
||||||
scanf("%s", _line);
|
scanf("%s", _line);
|
||||||
if ( strcmp( _line, "quit" ) == 0 ) {
|
if ( strcmp( _line, "quit" ) == 0 ) {
|
||||||
break;
|
break;
|
||||||
} else if ( strcmp( _line, "help" ) == 0 ) {
|
} else if ( strcmp( _line, "help" ) == 0 ) {
|
||||||
___PRINT_HELP_INFO___
|
___PRINT_HELP_INFO___
|
||||||
} else if ( strcmp( _line, "#set" ) == 0 ) {
|
} else if ( strcmp( _line, "#set" ) == 0 ) {
|
||||||
printf("lex_t>> ");
|
printf("lex_t>> ");
|
||||||
scanf("%d", &lex);
|
scanf("%d", &lex);
|
||||||
} else {
|
} else {
|
||||||
s_time = clock();
|
s_time = clock();
|
||||||
e = friso_dic_get( friso->dic, lex, _line );
|
e = friso_dic_get( friso->dic, lex, _line );
|
||||||
e_time = clock();
|
e_time = clock();
|
||||||
if ( e != NULL ) {
|
if ( e != NULL ) {
|
||||||
printf("word=%s, syn=%s, fre=%d, cost:%fsec\n",
|
printf("word=%s, syn=%s, fre=%d, cost:%fsec\n",
|
||||||
e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre,
|
e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre,
|
||||||
(double) ( e_time - s_time ) / CLOCKS_PER_SEC );
|
(double) ( e_time - s_time ) / CLOCKS_PER_SEC );
|
||||||
} else {
|
} else {
|
||||||
printf("%s was not found.\n", _line);
|
printf("%s was not found.\n", _line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//friso_dic_free( friso->dic );
|
//friso_dic_free( friso->dic );
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* link list test programe.
|
* link list test programe.
|
||||||
*
|
*
|
||||||
* @author chenxin
|
* @author chenxin
|
||||||
* @email chenxin619315@gmail.com
|
* @email chenxin619315@gmail.com
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
|
|
||||||
@ -13,12 +13,12 @@ int main( int argc, char **args ) {
|
|||||||
|
|
||||||
friso_link_t link;
|
friso_link_t link;
|
||||||
fstring keys[] = {
|
fstring keys[] = {
|
||||||
"chenmanwen", "yangqinghua",
|
"chenmanwen", "yangqinghua",
|
||||||
"chenxin", "luojiangyan", "xiaoyanzi", "bibi",
|
"chenxin", "luojiangyan", "xiaoyanzi", "bibi",
|
||||||
"zhangrenfang", "yangjian",
|
"zhangrenfang", "yangjian",
|
||||||
"liuxiao", "pankai",
|
"liuxiao", "pankai",
|
||||||
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
|
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
|
||||||
"caizaili", "panpan", "xiaolude", "yintanwen"
|
"caizaili", "panpan", "xiaolude", "yintanwen"
|
||||||
};
|
};
|
||||||
int j, len = sizeof( keys ) / sizeof( fstring );
|
int j, len = sizeof( keys ) / sizeof( fstring );
|
||||||
|
|
||||||
@ -28,15 +28,15 @@ int main( int argc, char **args ) {
|
|||||||
printf("size=%d\n", link->size );
|
printf("size=%d\n", link->size );
|
||||||
|
|
||||||
for ( j = 0; j < len; j++ ) {
|
for ( j = 0; j < len; j++ ) {
|
||||||
//link_add( link, keys[j] );
|
//link_add( link, keys[j] );
|
||||||
link_list_add_last( link, keys[j] );
|
link_list_add_last( link, keys[j] );
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("size=%d\n", link->size );
|
printf("size=%d\n", link->size );
|
||||||
|
|
||||||
for ( j = 0; j < len / 2; j++ ) {
|
for ( j = 0; j < len / 2; j++ ) {
|
||||||
//printf("idx=%d, remove %s\n", j, ( fstring ) link_remove( link, 0 ) );
|
//printf("idx=%d, remove %s\n", j, ( fstring ) link_remove( link, 0 ) );
|
||||||
printf("idx=%d, remove %s\n", j, ( fstring ) link_list_remove_first( link ) );
|
printf("idx=%d, remove %s\n", j, ( fstring ) link_list_remove_first( link ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("size=%d\n", link->size );
|
printf("size=%d\n", link->size );
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
|
|
||||||
int main ( int argc, char **args )
|
int main ( int argc, char **args )
|
||||||
{
|
{
|
||||||
fstring source = ",I am a chinese,,my name is chenxin,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057";
|
fstring source = ",I am a chinese,,my name is chenxin,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057";
|
||||||
char buffer[128];
|
char buffer[128];
|
||||||
string_split_t split = new_string_split(",", source );
|
string_split_t split = new_string_split(",", source );
|
||||||
|
|
||||||
@ -20,7 +20,7 @@ int main ( int argc, char **args )
|
|||||||
printf("sst->delLen=%d\n", split->delLen);
|
printf("sst->delLen=%d\n", split->delLen);
|
||||||
|
|
||||||
while ( string_split_next(split, buffer) != NULL) {
|
while ( string_split_next(split, buffer) != NULL) {
|
||||||
printf("buffer:%s\n", buffer);
|
printf("buffer:%s\n", buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_string_split(split);
|
free_string_split(split);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* fstring handle mode test program.
|
* fstring handle mode test program.
|
||||||
*
|
*
|
||||||
* @author chenxin <chenxin619315@gmail.com>
|
* @author chenxin <chenxin619315@gmail.com>
|
||||||
*/
|
*/
|
||||||
#include "friso_API.h"
|
#include "friso_API.h"
|
||||||
|
|
||||||
@ -20,13 +20,13 @@ int main( int argc, char **args ) {
|
|||||||
|
|
||||||
|
|
||||||
for ( t = 0; t < length; t += bytes ) {
|
for ( t = 0; t < length; t += bytes ) {
|
||||||
bytes = get_utf8_bytes( *(str + t) );
|
bytes = get_utf8_bytes( *(str + t) );
|
||||||
if ( bytes == 0 ) continue;
|
if ( bytes == 0 ) continue;
|
||||||
for ( j = 0; j < bytes; j++ )
|
for ( j = 0; j < bytes; j++ )
|
||||||
word[j] = *(str + t + j );
|
word[j] = *(str + t + j );
|
||||||
word[j] = '\0';
|
word[j] = '\0';
|
||||||
string_buffer_append( sb, word );
|
string_buffer_append( sb, word );
|
||||||
printf("word=%s\n", word );
|
printf("word=%s\n", word );
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("length=%d, buffer=%s\n", sb->length, sb->buffer );
|
printf("length=%d, buffer=%s\n", sb->length, sb->buffer );
|
||||||
|
Loading…
Reference in New Issue
Block a user