code tab to 4 space

This commit is contained in:
chenxin 2015-12-07 11:42:33 +08:00
parent e9bf4a2536
commit a264922721
29 changed files with 3422 additions and 3422 deletions

View File

@ -9,9 +9,9 @@ friso-1.6.2:
3. friso deb | rmp支持 3. friso deb | rmp支持
Debian & Ubuntu: Debian & Ubuntu:
sudo apt-get install libfriso0 libfriso-dev sudo apt-get install libfriso0 libfriso-dev
CentOS & Fedora: CentOS & Fedora:
sudo yum install libfriso libfriso-devel sudo yum install libfriso libfriso-devel
4. 中文词性标注。 4. 中文词性标注。
@ -26,41 +26,41 @@ friso-1.6.2:
friso-1.6.1: friso-1.6.1:
1. friso.ini中friso.lex_dir增加相对friso.ini的路径支持 -done 1. friso.ini中friso.lex_dir增加相对friso.ini的路径支持 -done
2. 修复两处内存泄漏bug. -done 2. 修复两处内存泄漏bug. -done
3. 改善中英混合词的识别, 可以识别更多情况, 例如:高3 -done 3. 改善中英混合词的识别, 可以识别更多情况, 例如:高3 -done
4. 词库优化, 加入了一些新词条. -done 4. 词库优化, 加入了一些新词条. -done
5. 修复friso_dic_add & array_list_insert的两处代码bug -done 5. 修复friso_dic_add & array_list_insert的两处代码bug -done
6. 增加检测模式切分, 只返回词库中有的词条 -done 6. 增加检测模式切分, 只返回词库中有的词条 -done
7. 集成了php扩展绑定完美支持PHP分词 -done 7. 集成了php扩展绑定完美支持PHP分词 -done
friso-1.6.0: friso-1.6.0:
1. friso_stirng.c#utf8_decimal_string初始化bytes = 0, 1. friso_stirng.c#utf8_decimal_string初始化bytes = 0,
去除WinNT的Run-Time Check Failed. -done 去除WinNT的Run-Time Check Failed. -done
2. 复杂英文和数字组合的二次切分. 例如: QQ2013会被切分成: qq2013, qq, 2013. -done 2. 复杂英文和数字组合的二次切分. 例如: QQ2013会被切分成: qq2013, qq, 2013. -done
3. GBK编码支持. -done 3. GBK编码支持. -done
4. 增加了friso.ini中自定义保留标点, 去除了默认对"^,/,-,'"等标点的保留. -done 4. 增加了friso.ini中自定义保留标点, 去除了默认对"^,/,-,'"等标点的保留. -done
5. 使用掩码操作控制变量来代替了原来的多个控制变量. -done 5. 使用掩码操作控制变量来代替了原来的多个控制变量. -done
6. 切分结果friso_hits_t中增加了对词条类别和词条长度的返回纠正了offset的误差。 -done 6. 切分结果friso_hits_t中增加了对词条类别和词条长度的返回纠正了offset的误差。 -done
7. 做了一些优化,例如:同义词的追加(普通/sphinx定义)复杂的判断逻辑, 7. 做了一些优化,例如:同义词的追加(普通/sphinx定义)复杂的判断逻辑,
改为了使用掩码状态控制,不仅减少了代码量还提高了执行效率。 -done 改为了使用掩码状态控制,不仅减少了代码量还提高了执行效率。 -done
8. 更多的返回信息,增加了对切分词条的类别,长度,真实长度,词性(待实现)等信息的返回。 -done 8. 更多的返回信息,增加了对切分词条的类别,长度,真实长度,词性(待实现)等信息的返回。 -done
9. 增加了安装中头文件的自动拷贝usr/include/friso可以通过include <friso/xx.h>来引用头文件。 9. 增加了安装中头文件的自动拷贝usr/include/friso可以通过include <friso/xx.h>来引用头文件。
@ -83,18 +83,18 @@ friso-1.4:
1. 小数+单位无法识别的情况.更改friso_string#utf8_numeric_string()函数. 1. 小数+单位无法识别的情况.更改friso_string#utf8_numeric_string()函数.
2. 更改中英混合词的识别(目前可以识别中英任何一种组合). 2. 更改中英混合词的识别(目前可以识别中英任何一种组合).
英中: 例如: b超, 英中: 例如: b超,
英中英: a美1, 英中英: a美1,
英中英中: a哆啦a梦, 英中英中: a哆啦a梦,
中英: 卡拉ok, 中英: 卡拉ok,
中英中: 哆啦a梦, 中英中: 哆啦a梦,
中英中英: 中文a美a 中英中英: 中文a美a
3. 更改了单位组合, 现在可以组合的单位不局限是中文, 例如: ℃,℉ 3. 更改了单位组合, 现在可以组合的单位不局限是中文, 例如: ℃,℉
4. 对于未识别的字符, 给定一个开关选项来决定保留还是过滤. 4. 对于未识别的字符, 给定一个开关选项来决定保留还是过滤.
5. 英文同义词的追加(增加了lex-en.lex词库) 5. 英文同义词的追加(增加了lex-en.lex词库)
friso-1.3: friso-1.3:
@ -103,7 +103,7 @@ friso-1.3:
2. 部分简易函数使用了宏定义来代替, 减少函数的调用. 2. 部分简易函数使用了宏定义来代替, 减少函数的调用.
3. 保留了英文全半角和中文标点符号的切分.(可以通过过滤停止词来过滤不需要的标点) 3. 保留了英文全半角和中文标点符号的切分.(可以通过过滤停止词来过滤不需要的标点)
停止词词库中已经加入了全部的保留的标点, 也就是默认全部过滤了. 停止词词库中已经加入了全部的保留的标点, 也就是默认全部过滤了.
4. 修复friso_string#utf8_en_punctuation()函数一处bug. 4. 修复friso_string#utf8_en_punctuation()函数一处bug.

View File

@ -6,9 +6,9 @@ Friso是使用c语言开发的一款开源的高性能中文分词器使用
2。三种切分模式 2。三种切分模式
(1). 简易模式FMM算法适合速度要求场合。 (1). 简易模式FMM算法适合速度要求场合。
(2). 复杂模式- MMSEG四种过滤算法具有较高的岐义去除分词准确率达到了98.41%。 (2). 复杂模式- MMSEG四种过滤算法具有较高的岐义去除分词准确率达到了98.41%。
(3). (!New)检测模式:只返回词库中已有的词条,很适合某些应用场合。(1.6.1版本开始) (3). (!New)检测模式:只返回词库中已有的词条,很适合某些应用场合。(1.6.1版本开始)
请参考本算法的原作http://technology.chtsai.org/mmseg/。 请参考本算法的原作http://technology.chtsai.org/mmseg/。

View File

@ -8,6 +8,6 @@
// ARG_ENABLE("friso", "enable friso support", "no"); // ARG_ENABLE("friso", "enable friso support", "no");
if (PHP_FRISO != "no") { if (PHP_FRISO != "no") {
EXTENSION("friso", "friso.c"); EXTENSION("friso", "friso.c");
} }

View File

@ -20,53 +20,53 @@ echo "friso_version(): " , friso_version(), ", friso_charset(): ", friso_charset
echo "分词函数:<br />"; echo "分词函数:<br />";
if ( friso_charset() == 'UTF-8' ) if ( friso_charset() == 'UTF-8' )
{ {
$_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体x射线本质是什么今天去奇都ktv唱卡拉ok去哆啦a梦是一个动漫中的主角单位和全角: 2009年日开始大学之旅岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩."; $_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体x射线本质是什么今天去奇都ktv唱卡拉ok去哆啦a梦是一个动漫中的主角单位和全角: 2009年日开始大学之旅岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.";
echo "<p>friso_split(\"" . $_str . "\")<p />"; echo "<p>friso_split(\"" . $_str . "\")<p />";
//API: //API:
//rb_split(string, Array, [long]) //rb_split(string, Array, [long])
//1.string: 要被切分的字符串。 //1.string: 要被切分的字符串。
//2.Array: 配置选项使用NULL来选择默认的配置(friso.ini中的配置)。 //2.Array: 配置选项使用NULL来选择默认的配置(friso.ini中的配置)。
//3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs //3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
//1.完整的配置: //1.完整的配置:
//array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1, //array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
// 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX); // 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX);
//1.在不了解friso内核的情况下, 请不要随便更改nthreshold //1.在不了解friso内核的情况下, 请不要随便更改nthreshold
//2.使用NULL来使用php.ini中指定的friso.ini文件中的配置 //2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
//2.返回选项: //2.返回选项:
//词条: FRISO_RET_WORD, 类别FRISO_RET_TYPE, 长度FRISO_RET_LENGTH, 真实长度FRISO_RET_RLEN, 偏移量FRISO_RET_OFF //词条: FRISO_RET_WORD, 类别FRISO_RET_TYPE, 长度FRISO_RET_LENGTH, 真实长度FRISO_RET_RLEN, 偏移量FRISO_RET_OFF
//词性FRISO_RET_POS(待实现) //词性FRISO_RET_POS(待实现)
$_rargs = FRISO_RET_TYPE | FRISO_RET_LEN | FRISO_RET_RLEN | FRISO_RET_OFF | FRISO_RET_POS; $_rargs = FRISO_RET_TYPE | FRISO_RET_LEN | FRISO_RET_RLEN | FRISO_RET_OFF | FRISO_RET_POS;
//$_rargs = 0; //$_rargs = 0;
//3.切分类别: //3.切分类别:
//CJK词条FRISO_TYP_CJK, 英中混合词(b超)FRISO_TYP_ECM中英混合词(卡拉ok)FRISO_TYP_CEM //CJK词条FRISO_TYP_CJK, 英中混合词(b超)FRISO_TYP_ECM中英混合词(卡拉ok)FRISO_TYP_CEM
//英文标点混合词(c++)FRISO_TYP_EPUN标点FRISO_TYP_PUN未知类别FRISO_TYP_UNK其他类别(同义词)FRISO_TYP_OTR //英文标点混合词(c++)FRISO_TYP_EPUN标点FRISO_TYP_PUN未知类别FRISO_TYP_UNK其他类别(同义词)FRISO_TYP_OTR
$_result = friso_split($_str, array('mode'=>FRISO_COMPLEX), $_rargs); $_result = friso_split($_str, array('mode'=>FRISO_COMPLEX), $_rargs);
unset($_str); unset($_str);
foreach ( $_result as $_val ) foreach ( $_result as $_val )
{ {
$_str = $_val['word']; $_str = $_val['word'];
if ( $_rargs != 0 ) { if ( $_rargs != 0 ) {
$_str .= '['; $_str .= '[';
if ( ($_rargs & FRISO_RET_TYPE) != 0 ) if ( ($_rargs & FRISO_RET_TYPE) != 0 )
$_str .= ', type: '.$_val['type']; //获取词条类别 $_str .= ', type: '.$_val['type']; //获取词条类别
if ( ($_rargs & FRISO_RET_LEN) != 0 ) if ( ($_rargs & FRISO_RET_LEN) != 0 )
$_str .= ', len: ' . $_val['len']; //词条长度 $_str .= ', len: ' . $_val['len']; //词条长度
if ( ($_rargs & FRISO_RET_RLEN) != 0 ) if ( ($_rargs & FRISO_RET_RLEN) != 0 )
$_str .= ', rlen: ' . $_val['rlen']; //词条真实长度 $_str .= ', rlen: ' . $_val['rlen']; //词条真实长度
if ( ($_rargs & FRISO_RET_OFF) != 0 ) if ( ($_rargs & FRISO_RET_OFF) != 0 )
$_str .= ', off: ' . $_val['off']; //词条偏移量 $_str .= ', off: ' . $_val['off']; //词条偏移量
if ( ($_rargs & FRISO_RET_POS) != 0 ) if ( ($_rargs & FRISO_RET_POS) != 0 )
$_str .= ', pos: ' . $_val['pos']; //词条词性 $_str .= ', pos: ' . $_val['pos']; //词条词性
$_str .= ']'; $_str .= ']';
} }
$_str .= '/&nbsp;&nbsp;&nbsp;'; $_str .= '/&nbsp;&nbsp;&nbsp;';
echo $_str; echo $_str;
} }
} }
else echo "set charset to UTF-8 to test function friso_split."; else echo "set charset to UTF-8 to test function friso_split.";
?> ?>

View File

@ -4,10 +4,10 @@ ini_set('magic_quotes_gpc', 0);
//check the charset //check the charset
if ( friso_charset() != "GBK" ) { if ( friso_charset() != "GBK" ) {
$_str = "Error: GBK charset required. <br />"; $_str = "Error: GBK charset required. <br />";
$_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />"; $_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
$_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />"; $_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
exit($_str); exit($_str);
} }
$text = ''; $text = '';
@ -15,139 +15,139 @@ $_timer = 0;
$_act = ''; $_act = '';
$_cfg = array('mode' => FRISO_COMPLEX); $_cfg = array('mode' => FRISO_COMPLEX);
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) { if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
$text = &$_POST['text']; $text = &$_POST['text'];
$_cfg = &$_POST['config']; $_cfg = &$_POST['config'];
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0; if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0; if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0; if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0; if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0; if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
$s_time = timer(); $s_time = timer();
$_ret = friso_split($text, $_cfg); $_ret = friso_split($text, $_cfg);
$_timer = timer() - $s_time; $_timer = timer() - $s_time;
} }
function timer() { function timer() {
list($msec, $sec) = explode(' ', microtime()); list($msec, $sec) = explode(' ', microtime());
return ((float)$msec + (float)$sec); return ((float)$msec + (float)$sec);
} }
?> ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head> <head>
<title>GBK - robbe分词测试程序 </title> <title>GBK - robbe分词测试程序 </title>
<meta http-equiv="content-type" content="text/html;charset=GBK" /> <meta http-equiv="content-type" content="text/html;charset=GBK" />
<style type="text/css"> <style type="text/css">
#box {width: 1000px} #box {width: 1000px}
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF; .input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
color: #555;font-size: 14px;} color: #555;font-size: 14px;}
.link-box {overflow: hidden;zoom:1;padding-top:10px;} .link-box {overflow: hidden;zoom:1;padding-top:10px;}
#submit-link {float:right;width:150px;height: 26px;line-height: 26px; #submit-link {float:right;width:150px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center; background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;} text-decoration: none;font-size: 14px;}
#info-link {float:right;width:300px;height: 26px;line-height: 26px; #info-link {float:right;width:300px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center; background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;} text-decoration: none;font-size: 14px;}
.link-item {float: left;font-size: 14px;font-weight: bold; .link-item {float: left;font-size: 14px;font-weight: bold;
height: 26px;line-height: 26px;width: 100px;color: #A50100;} height: 26px;line-height: 26px;width: 100px;color: #A50100;}
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;} .title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
#cfg-box {margin-bottom: 10px;} #cfg-box {margin-bottom: 10px;}
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;} #cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right; #cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
padding-right:10px;font-size:12px;font-weight:bold;color:#555;} padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px; .input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
color:#555; outline: none;} color:#555; outline: none;}
</style> </style>
</head> </head>
<body> <body>
<div id="box"> <div id="box">
<form name="robbe" method="post" action="gbk.demo.php"> <form name="robbe" method="post" action="gbk.demo.php">
<div class="title-item">分词配置:</div> <div class="title-item">分词配置:</div>
<div id="cfg-box"> <div id="cfg-box">
<div> <div>
<label>最大词长: </label> <label>最大词长: </label>
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" /> <input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
</div> </div>
<div> <div>
<label>混合词中文词长: </label> <label>混合词中文词长: </label>
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" /> <input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
</div> </div>
<div> <div>
<label>英文二次切分: </label> <label>英文二次切分: </label>
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>二次切分子Token最小长度: </label> <label>二次切分子Token最小长度: </label>
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" /> <input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
</div> </div>
<div> <div>
<label>英文Token中保留的标点: </label> <label>英文Token中保留的标点: </label>
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" /> <input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
</div> </div>
<div> <div>
<label>同义词追加: </label> <label>同义词追加: </label>
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>过滤停止词: </label> <label>过滤停止词: </label>
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>保留未识别词: </label> <label>保留未识别词: </label>
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>sphinx定制输出: </label> <label>sphinx定制输出: </label>
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>分词模式: </label> <label>分词模式: </label>
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式 <input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式 <input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
</div> </div>
</div> </div>
<div class="title-item">分词内容:</div> <div class="title-item">分词内容:</div>
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div> <div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
<input type="hidden" name="_act" value="split"/> <input type="hidden" name="_act" value="split"/>
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a> <a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
</form> </form>
<?php <?php
if ( $_act == 'split' ) { if ( $_act == 'split' ) {
?> ?>
<div class="title-item">分词结果:</div> <div class="title-item">分词结果:</div>
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?> <div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
</textarea></div> </textarea></div>
<div class="link-box"><a id="info-link"> <div class="link-box"><a id="info-link">
<?php <?php
$len = strlen($text); $len = strlen($text);
if ( $len >= 1048576 ) { if ( $len >= 1048576 ) {
echo substr(($len/1048576), 0, 6).'MB'; echo substr(($len/1048576), 0, 6).'MB';
} else if ( $len >= 1024 ) { } else if ( $len >= 1024 ) {
echo substr( ($len / 1024), 0, 6).'KB'; echo substr( ($len / 1024), 0, 6).'KB';
} else { } else {
echo $len.'B'; echo $len.'B';
} }
?> ?>
&nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec &nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec
</a></div> </a></div>
<?php <?php
} }
?> ?>
</div> </div>
<script type="text/javascript"> <script type="text/javascript">
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');} String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
function do_submit() { function do_submit() {
var text = document.getElementById('text'); var text = document.getElementById('text');
if ( text.value.trim() == '' ) return; if ( text.value.trim() == '' ) return;
document.robbe.submit(); document.robbe.submit();
} }
</script> </script>
</body> </body>

View File

@ -4,10 +4,10 @@ ini_set('magic_quotes_gpc', 0);
//charset check. //charset check.
if ( friso_charset() != "UTF-8" ) { if ( friso_charset() != "UTF-8" ) {
$_str = "Error: UTF-8 charset required. <br />"; $_str = "Error: UTF-8 charset required. <br />";
$_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />"; $_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
$_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />"; $_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
exit($_str); exit($_str);
} }
$text = ''; $text = '';
@ -15,139 +15,139 @@ $_timer = 0;
$_act = ''; $_act = '';
$_cfg = array('mode' => FRISO_COMPLEX); $_cfg = array('mode' => FRISO_COMPLEX);
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) { if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
$text = &$_POST['text']; $text = &$_POST['text'];
$_cfg = &$_POST['config']; $_cfg = &$_POST['config'];
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0; if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0; if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0; if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0; if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0; if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
$s_time = timer(); $s_time = timer();
$_ret = friso_split($text, $_cfg); $_ret = friso_split($text, $_cfg);
$_timer = timer() - $s_time; $_timer = timer() - $s_time;
} }
function timer() { function timer() {
list($msec, $sec) = explode(' ', microtime()); list($msec, $sec) = explode(' ', microtime());
return ((float)$msec + (float)$sec); return ((float)$msec + (float)$sec);
} }
?> ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head> <head>
<title>UTF8 - robbe分词测试程序</title> <title>UTF8 - robbe分词测试程序</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8" /> <meta http-equiv="content-type" content="text/html;charset=utf-8" />
<style type="text/css"> <style type="text/css">
#box {width: 1000px} #box {width: 1000px}
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF; .input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
color: #555;font-size: 14px;} color: #555;font-size: 14px;}
.link-box {overflow: hidden;zoom:1;padding-top:10px;} .link-box {overflow: hidden;zoom:1;padding-top:10px;}
#submit-link {float:right;width:150px;height: 26px;line-height: 26px; #submit-link {float:right;width:150px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center; background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;} text-decoration: none;font-size: 14px;}
#info-link {float:right;width:300px;height: 26px;line-height: 26px; #info-link {float:right;width:300px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center; background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;} text-decoration: none;font-size: 14px;}
.link-item {float: left;font-size: 14px;font-weight: bold; .link-item {float: left;font-size: 14px;font-weight: bold;
height: 26px;line-height: 26px;width: 100px;color: #A50100;} height: 26px;line-height: 26px;width: 100px;color: #A50100;}
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;} .title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
#cfg-box {margin-bottom: 10px;} #cfg-box {margin-bottom: 10px;}
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;} #cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right; #cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
padding-right:10px;font-size:12px;font-weight:bold;color:#555;} padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px; .input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
color:#555; outline: none;} color:#555; outline: none;}
</style> </style>
</head> </head>
<body> <body>
<div id="box"> <div id="box">
<form name="robbe" method="post" action="utf8.demo.php"> <form name="robbe" method="post" action="utf8.demo.php">
<div class="title-item">分词配置:</div> <div class="title-item">分词配置:</div>
<div id="cfg-box"> <div id="cfg-box">
<div> <div>
<label>最大词长: </label> <label>最大词长: </label>
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" /> <input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
</div> </div>
<div> <div>
<label>混合词中文词长: </label> <label>混合词中文词长: </label>
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" /> <input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
</div> </div>
<div> <div>
<label>英文二次切分: </label> <label>英文二次切分: </label>
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>二次切分子Token最小长度: </label> <label>二次切分子Token最小长度: </label>
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" /> <input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
</div> </div>
<div> <div>
<label>英文Token中保留的标点: </label> <label>英文Token中保留的标点: </label>
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" /> <input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
</div> </div>
<div> <div>
<label>同义词追加: </label> <label>同义词追加: </label>
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>过滤停止词: </label> <label>过滤停止词: </label>
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>保留未识别词: </label> <label>保留未识别词: </label>
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>sphinx定制输出: </label> <label>sphinx定制输出: </label>
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" /> <input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
</div> </div>
<div> <div>
<label>分词模式: </label> <label>分词模式: </label>
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式 <input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式 <input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
</div> </div>
</div> </div>
<div class="title-item">分词内容:</div> <div class="title-item">分词内容:</div>
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div> <div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
<input type="hidden" name="_act" value="split"/> <input type="hidden" name="_act" value="split"/>
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a> <a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
</form> </form>
<?php <?php
if ( $_act == 'split' ) { if ( $_act == 'split' ) {
?> ?>
<div class="title-item">分词结果:</div> <div class="title-item">分词结果:</div>
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?> <div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
</textarea></div> </textarea></div>
<div class="link-box"><a id="info-link"> <div class="link-box"><a id="info-link">
<?php <?php
$len = strlen($text); $len = strlen($text);
if ( $len >= 1048576 ) { if ( $len >= 1048576 ) {
echo substr(($len/1048576), 0, 6).'MB'; echo substr(($len/1048576), 0, 6).'MB';
} else if ( $len >= 1024 ) { } else if ( $len >= 1024 ) {
echo substr( ($len / 1024), 0, 6).'KB'; echo substr( ($len / 1024), 0, 6).'KB';
} else { } else {
echo $len.'B'; echo $len.'B';
} }
?> ?>
&nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec &nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec
</a></div> </a></div>
<?php <?php
} }
?> ?>
</div> </div>
<script type="text/javascript"> <script type="text/javascript">
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');} String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
function do_submit() { function do_submit() {
var text = document.getElementById('text'); var text = document.getElementById('text');
if ( text.value.trim() == '' ) return; if ( text.value.trim() == '' ) return;
document.robbe.submit(); document.robbe.submit();
} }
</script> </script>
</body> </body>

View File

@ -9,9 +9,9 @@
#include "php_friso.h" #include "php_friso.h"
#ifdef FRISO_WINNT #ifdef FRISO_WINNT
# define friso_default_conf_file "c:/windows/friso.ini" # define friso_default_conf_file "c:/windows/friso.ini"
#else #else
# define friso_default_conf_file "/etc/friso/friso.ini" # define friso_default_conf_file "/etc/friso/friso.ini"
#endif #endif
/* If you declare any globals in php_friso.h uncomment this: /* If you declare any globals in php_friso.h uncomment this:
@ -27,15 +27,15 @@ static int le_friso = 1;
* Every user visible function must have an entry in friso_functions[]. * Every user visible function must have an entry in friso_functions[].
*/ */
const zend_function_entry friso_functions[] = { const zend_function_entry friso_functions[] = {
PHP_FE(friso_split, NULL) PHP_FE(friso_split, NULL)
PHP_FE(friso_version, NULL) PHP_FE(friso_version, NULL)
PHP_FE(friso_charset, NULL) PHP_FE(friso_charset, NULL)
PHP_FE(friso_dic_exist, NULL) PHP_FE(friso_dic_exist, NULL)
PHP_FE(friso_dic_get, NULL) PHP_FE(friso_dic_get, NULL)
PHP_FE(friso_utf8_bytes, NULL) PHP_FE(friso_utf8_bytes, NULL)
PHP_FE(friso_utf8_ucode, NULL) PHP_FE(friso_utf8_ucode, NULL)
PHP_FE(friso_ucode_utf8, NULL) PHP_FE(friso_ucode_utf8, NULL)
{NULL, NULL, NULL} /* Must be the last line in friso_functions[] */ {NULL, NULL, NULL} /* Must be the last line in friso_functions[] */
}; };
/* }}} */ /* }}} */
@ -43,19 +43,19 @@ const zend_function_entry friso_functions[] = {
*/ */
zend_module_entry friso_module_entry = { zend_module_entry friso_module_entry = {
#if ZEND_MODULE_API_NO >= 20010901 #if ZEND_MODULE_API_NO >= 20010901
STANDARD_MODULE_HEADER, STANDARD_MODULE_HEADER,
#endif #endif
"friso", "friso",
friso_functions, friso_functions,
PHP_MINIT(friso), PHP_MINIT(friso),
PHP_MSHUTDOWN(friso), PHP_MSHUTDOWN(friso),
PHP_RINIT(friso), /* Replace with NULL if there's nothing to do at request start */ PHP_RINIT(friso), /* Replace with NULL if there's nothing to do at request start */
PHP_RSHUTDOWN(friso), /* Replace with NULL if there's nothing to do at request end */ PHP_RSHUTDOWN(friso), /* Replace with NULL if there's nothing to do at request end */
PHP_MINFO(friso), PHP_MINFO(friso),
#if ZEND_MODULE_API_NO >= 20010901 #if ZEND_MODULE_API_NO >= 20010901
"0.1", /* Replace with version number for your extension */ "0.1", /* Replace with version number for your extension */
#endif #endif
STANDARD_MODULE_PROPERTIES STANDARD_MODULE_PROPERTIES
}; };
/* }}} */ /* }}} */
@ -73,72 +73,72 @@ PHP_INI_END()
/* {{{ php_robbe_globals_construct */ /* {{{ php_robbe_globals_construct */
static void php_friso_globals_construct(zend_friso_globals *friso_globals) static void php_friso_globals_construct(zend_friso_globals *friso_globals)
{ {
friso_globals->friso = friso_new(); friso_globals->friso = friso_new();
friso_globals->config = friso_new_config(); friso_globals->config = friso_new_config();
friso_init_from_ifile(friso_globals->friso, friso_init_from_ifile(friso_globals->friso,
friso_globals->config, INI_STR("friso.ini_file")); friso_globals->config, INI_STR("friso.ini_file"));
} }
/* }}} */ /* }}} */
/* {{{ php_robbe_globals_destruct*/ /* {{{ php_robbe_globals_destruct*/
static void php_friso_globals_destruct(zend_friso_globals *friso_globals) static void php_friso_globals_destruct(zend_friso_globals *friso_globals)
{ {
/* /*
* cause friso_free will free the dictionary * cause friso_free will free the dictionary
* so here we don't have to call the friso_dic_free to free the * so here we don't have to call the friso_dic_free to free the
* the robbe_dic global variable. * the robbe_dic global variable.
*/ */
//friso_dic_free( friso_globals->friso_dic ); //friso_dic_free( friso_globals->friso_dic );
//friso_globals->friso_dic = NULL; //friso_globals->friso_dic = NULL;
friso_free_config( friso_globals->config ); friso_free_config( friso_globals->config );
friso_free( friso_globals->friso ); friso_free( friso_globals->friso );
} }
/* }}} */ /* }}} */
#define FRISO_RET_WORD (1 << 0) #define FRISO_RET_WORD (1 << 0)
#define FRISO_RET_TYPE (1 << 1) #define FRISO_RET_TYPE (1 << 1)
#define FRISO_RET_OFF (1 << 2) #define FRISO_RET_OFF (1 << 2)
#define FRISO_RET_LEN (1 << 3) #define FRISO_RET_LEN (1 << 3)
#define FRISO_RET_RLEN (1 << 4) #define FRISO_RET_RLEN (1 << 4)
#define FRISO_RET_POS (1 << 5) #define FRISO_RET_POS (1 << 5)
/* {{{ PHP_MINIT_FUNCTION /* {{{ PHP_MINIT_FUNCTION
*/ */
PHP_MINIT_FUNCTION(friso) PHP_MINIT_FUNCTION(friso)
{ {
/* /*
* register some contants that robbe may use * register some contants that robbe may use
* at its following work. * at its following work.
* the constant is case sensitive and persitent. * the constant is case sensitive and persitent.
*/ */
REGISTER_LONG_CONSTANT("FRISO_SIMPLE", __FRISO_SIMPLE_MODE__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_SIMPLE", __FRISO_SIMPLE_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_COMPLEX", __FRISO_COMPLEX_MODE__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_COMPLEX", __FRISO_COMPLEX_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_DETECT", __FRISO_DETECT_MODE__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_DETECT", __FRISO_DETECT_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_LEX_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_LEX_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_LEX_STOP", __LEX_STOPWORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_LEX_STOP", __LEX_STOPWORDS__, CONST_CS | CONST_PERSISTENT);
//return parts for rb_split. //return parts for rb_split.
REGISTER_LONG_CONSTANT("FRISO_RET_WORD", FRISO_RET_WORD, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_WORD", FRISO_RET_WORD, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_TYPE", FRISO_RET_TYPE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_TYPE", FRISO_RET_TYPE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_OFF", FRISO_RET_OFF, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_OFF", FRISO_RET_OFF, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_LEN", FRISO_RET_LEN, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_LEN", FRISO_RET_LEN, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_RLEN", FRISO_RET_RLEN, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_RLEN", FRISO_RET_RLEN, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_POS", FRISO_RET_POS, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_RET_POS", FRISO_RET_POS, CONST_CS | CONST_PERSISTENT);
//lex type constants. //lex type constants.
REGISTER_LONG_CONSTANT("FRISO_TYP_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_ECM", __LEX_ECM_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_ECM", __LEX_ECM_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_CEM", __LEX_CEM_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_CEM", __LEX_CEM_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_EPUN", __LEX_ENPUN_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_EPUN", __LEX_ENPUN_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_PUN", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_PUN", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_UNK", __LEX_UNKNOW_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_UNK", __LEX_UNKNOW_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_OTR", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FRISO_TYP_OTR", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_INI_ENTRIES(); REGISTER_INI_ENTRIES();
/*initialize the globals variables.*/ /*initialize the globals variables.*/
php_friso_globals_construct( &friso_globals ); php_friso_globals_construct( &friso_globals );
return SUCCESS; return SUCCESS;
} }
/* }}} */ /* }}} */
@ -146,11 +146,11 @@ PHP_MINIT_FUNCTION(friso)
*/ */
PHP_MSHUTDOWN_FUNCTION(friso) PHP_MSHUTDOWN_FUNCTION(friso)
{ {
UNREGISTER_INI_ENTRIES(); UNREGISTER_INI_ENTRIES();
/*destruct the globals variables*/ /*destruct the globals variables*/
php_friso_globals_destruct( &friso_globals ); php_friso_globals_destruct( &friso_globals );
return SUCCESS; return SUCCESS;
} }
/* }}} */ /* }}} */
@ -159,7 +159,7 @@ PHP_MSHUTDOWN_FUNCTION(friso)
*/ */
PHP_RINIT_FUNCTION(friso) PHP_RINIT_FUNCTION(friso)
{ {
return SUCCESS; return SUCCESS;
} }
/* }}} */ /* }}} */
@ -168,22 +168,22 @@ PHP_RINIT_FUNCTION(friso)
*/ */
PHP_RSHUTDOWN_FUNCTION(friso) PHP_RSHUTDOWN_FUNCTION(friso)
{ {
return SUCCESS; return SUCCESS;
} }
/* }}} */ /* }}} */
/* {{{ PHP_MINFO_FUNCTION /* {{{ PHP_MINFO_FUNCTION
*/ */
PHP_MINFO_FUNCTION(friso) PHP_MINFO_FUNCTION(friso)
{ {
php_info_print_table_start(); php_info_print_table_start();
php_info_print_table_row(2, "Friso Support", "enabled"); php_info_print_table_row(2, "Friso Support", "enabled");
php_info_print_table_row(2, "Version", FRISO_VERSION); php_info_print_table_row(2, "Version", FRISO_VERSION);
php_info_print_table_row(2, "Bug Report", "chenxin619315@gmail.com"); php_info_print_table_row(2, "Bug Report", "chenxin619315@gmail.com");
php_info_print_table_row(2, "Home page", "http://code.google.com/p/friso"); php_info_print_table_row(2, "Home page", "http://code.google.com/p/friso");
php_info_print_table_end(); php_info_print_table_end();
DISPLAY_INI_ENTRIES(); DISPLAY_INI_ENTRIES();
} }
/* }}} */ /* }}} */
@ -192,130 +192,130 @@ PHP_MINFO_FUNCTION(friso)
Return a array contains all the split result with a specified mode */ Return a array contains all the split result with a specified mode */
PHP_FUNCTION(friso_split) PHP_FUNCTION(friso_split)
{ {
char *_str = NULL, *_key; char *_str = NULL, *_key;
int slen, idx, klen, rargs = 0; int slen, idx, klen, rargs = 0;
int arg_count; int arg_count;
zval *ret, *cfg, **data; zval *ret, *cfg, **data;
//used for multiple item return. //used for multiple item return.
zval *item; zval *item;
HashTable *cfgArr; HashTable *cfgArr;
HashPosition pointer; HashPosition pointer;
friso_task_t task; friso_task_t task;
friso_config_t config = NULL, nconfig = NULL; friso_config_t config = NULL, nconfig = NULL;
//get the arugments from the php layer. //get the arugments from the php layer.
arg_count = ZEND_NUM_ARGS(); arg_count = ZEND_NUM_ARGS();
switch ( arg_count ) switch ( arg_count )
{ {
case 2: case 2:
if ( zend_parse_parameters(arg_count TSRMLS_CC, "sz", if ( zend_parse_parameters(arg_count TSRMLS_CC, "sz",
&_str, &slen, &cfg) == FAILURE ) return; &_str, &slen, &cfg) == FAILURE ) return;
break; break;
case 3: case 3:
if (zend_parse_parameters( arg_count TSRMLS_CC, "szl", if (zend_parse_parameters( arg_count TSRMLS_CC, "szl",
&_str, &slen, &cfg, &rargs) == FAILURE ) return; &_str, &slen, &cfg, &rargs) == FAILURE ) return;
break; break;
default: default:
WRONG_PARAM_COUNT; WRONG_PARAM_COUNT;
} }
//make sure the RB_RET_WORD will be returned. //make sure the RB_RET_WORD will be returned.
//rargs |= FRISO_RET_WORD; //rargs |= FRISO_RET_WORD;
//check and initialize the friso. //check and initialize the friso.
if ( Z_TYPE_P(cfg) != IS_NULL ) if ( Z_TYPE_P(cfg) != IS_NULL )
{ {
nconfig = friso_new_config(); nconfig = friso_new_config();
memcpy(nconfig, friso_globals.config, sizeof(friso_config_entry)); memcpy(nconfig, friso_globals.config, sizeof(friso_config_entry));
//check the new setting. //check the new setting.
cfgArr = Z_ARRVAL_P(cfg); cfgArr = Z_ARRVAL_P(cfg);
//zend_printf("array length: %d", zend_hash_num_elements(cfgArr)); //zend_printf("array length: %d", zend_hash_num_elements(cfgArr));
for ( zend_hash_internal_pointer_reset_ex(cfgArr, &pointer); for ( zend_hash_internal_pointer_reset_ex(cfgArr, &pointer);
zend_hash_get_current_data_ex(cfgArr, (void **)&data, &pointer) == SUCCESS; zend_hash_get_current_data_ex(cfgArr, (void **)&data, &pointer) == SUCCESS;
zend_hash_move_forward_ex(cfgArr, &pointer) ) zend_hash_move_forward_ex(cfgArr, &pointer) )
{ {
zend_hash_get_current_key_ex(cfgArr, &_key, &klen, NULL, 0, &pointer); zend_hash_get_current_key_ex(cfgArr, &_key, &klen, NULL, 0, &pointer);
//zend_printf("key: %s, value: %d<br />", _key, (*data)->value.lval); //zend_printf("key: %s, value: %d<br />", _key, (*data)->value.lval);
if ( strcmp(_key, "kpuncs") == 0 ) if ( strcmp(_key, "kpuncs") == 0 )
{ {
memcpy(nconfig->kpuncs, (*data)->value.str.val, (*data)->value.str.len); memcpy(nconfig->kpuncs, (*data)->value.str.val, (*data)->value.str.len);
nconfig->kpuncs[(*data)->value.str.len] = '\0'; nconfig->kpuncs[(*data)->value.str.len] = '\0';
} }
else else
{ {
//convert the data to long. //convert the data to long.
convert_to_long_ex(data); convert_to_long_ex(data);
if ( strcmp(_key, "max_len") == 0 ) if ( strcmp(_key, "max_len") == 0 )
nconfig->max_len = (ushort_t)(*data)->value.lval; nconfig->max_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "r_name") == 0 ) else if ( strcmp(_key, "r_name") == 0 )
nconfig->r_name = (ushort_t)(*data)->value.lval; nconfig->r_name = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "mix_len") == 0 ) else if ( strcmp(_key, "mix_len") == 0 )
nconfig->mix_len = (ushort_t)(*data)->value.lval; nconfig->mix_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "lna_len") == 0 ) else if ( strcmp(_key, "lna_len") == 0 )
nconfig->lna_len = (ushort_t)(*data)->value.lval; nconfig->lna_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "add_syn") == 0 ) else if ( strcmp(_key, "add_syn") == 0 )
nconfig->add_syn = (ushort_t)(*data)->value.lval; nconfig->add_syn = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "clr_stw") == 0 ) else if ( strcmp(_key, "clr_stw") == 0 )
nconfig->clr_stw = (ushort_t)(*data)->value.lval; nconfig->clr_stw = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "add_syn") == 0 ) else if ( strcmp(_key, "add_syn") == 0 )
nconfig->add_syn = (ushort_t)(*data)->value.lval; nconfig->add_syn = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "keep_urec") == 0 ) else if ( strcmp(_key, "keep_urec") == 0 )
nconfig->keep_urec = (ushort_t)(*data)->value.lval; nconfig->keep_urec = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "spx_out") == 0 ) else if ( strcmp(_key, "spx_out") == 0 )
nconfig->spx_out = (ushort_t)(*data)->value.lval; nconfig->spx_out = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "nthreshold") == 0 ) else if ( strcmp(_key, "nthreshold") == 0 )
nconfig->nthreshold = (uint_t) (*data)->value.lval; nconfig->nthreshold = (uint_t) (*data)->value.lval;
else if ( strcmp(_key, "mode") == 0 ) else if ( strcmp(_key, "mode") == 0 )
friso_set_mode(nconfig, (friso_mode_t)((*data)->value.lval)); friso_set_mode(nconfig, (friso_mode_t)((*data)->value.lval));
else if ( strcmp(_key, "en_sseg") == 0 ) else if ( strcmp(_key, "en_sseg") == 0 )
nconfig->en_sseg = (ushort_t) (*data)->value.lval; nconfig->en_sseg = (ushort_t) (*data)->value.lval;
else if ( strcmp(_key, "st_minl") == 0 ) else if ( strcmp(_key, "st_minl") == 0 )
nconfig->st_minl = (ushort_t) (*data)->value.lval; nconfig->st_minl = (ushort_t) (*data)->value.lval;
} }
} }
} }
//initialize the array. //initialize the array.
MAKE_STD_ZVAL( ret ); MAKE_STD_ZVAL( ret );
array_init( ret ); array_init( ret );
config = ( nconfig == NULL ) ? friso_globals.config : nconfig; config = ( nconfig == NULL ) ? friso_globals.config : nconfig;
//create a new friso task. //create a new friso task.
task = friso_new_task(); task = friso_new_task();
idx = 0; idx = 0;
friso_set_text(task, _str); friso_set_text(task, _str);
while ( config->next_token( friso_globals.friso, config, task ) != NULL ) while ( config->next_token( friso_globals.friso, config, task ) != NULL )
{ {
MAKE_STD_ZVAL(item); MAKE_STD_ZVAL(item);
array_init(item); array_init(item);
add_assoc_string(item, "word", task->token->word, 1); add_assoc_string(item, "word", task->token->word, 1);
//check the append of type //check the append of type
if ( (rargs & FRISO_RET_TYPE) != 0 ) if ( (rargs & FRISO_RET_TYPE) != 0 )
add_assoc_long(item, "type", task->token->type); add_assoc_long(item, "type", task->token->type);
if ( (rargs & FRISO_RET_LEN) != 0 ) if ( (rargs & FRISO_RET_LEN) != 0 )
add_assoc_long(item, "len", task->token->length); add_assoc_long(item, "len", task->token->length);
if ( (rargs & FRISO_RET_RLEN) != 0 ) if ( (rargs & FRISO_RET_RLEN) != 0 )
add_assoc_long(item, "rlen", task->token->rlen); add_assoc_long(item, "rlen", task->token->rlen);
if ( (rargs & FRISO_RET_OFF) != 0 ) if ( (rargs & FRISO_RET_OFF) != 0 )
add_assoc_long(item, "off", task->token->offset); add_assoc_long(item, "off", task->token->offset);
if ( (rargs & FRISO_RET_POS) != 0 ) if ( (rargs & FRISO_RET_POS) != 0 )
add_assoc_stringl(item, "pos", &task->token->pos, 1, 1); add_assoc_stringl(item, "pos", &task->token->pos, 1, 1);
//append the sub result. //append the sub result.
add_index_zval( ret, idx++, item ); add_index_zval( ret, idx++, item );
} }
//free the friso task. //free the friso task.
friso_free_task(task); friso_free_task(task);
if ( nconfig != NULL ) friso_free_config(nconfig); if ( nconfig != NULL ) friso_free_config(nconfig);
//RETURN_ZVAL( ret, 0, 0); //RETURN_ZVAL( ret, 0, 0);
*( return_value ) = *( ret ); *( return_value ) = *( ret );
} }
/* }}} */ /* }}} */
@ -323,7 +323,7 @@ PHP_FUNCTION(friso_split)
Return the current version of Friso. */ Return the current version of Friso. */
PHP_FUNCTION(friso_version) PHP_FUNCTION(friso_version)
{ {
RETURN_STRINGL(FRISO_VERSION, strlen(FRISO_VERSION), 1); RETURN_STRINGL(FRISO_VERSION, strlen(FRISO_VERSION), 1);
} }
/* }}} */ /* }}} */
@ -331,8 +331,8 @@ PHP_FUNCTION(friso_version)
Return the current charset of friso. */ Return the current charset of friso. */
PHP_FUNCTION(friso_charset) PHP_FUNCTION(friso_charset)
{ {
char *charset = friso_globals.friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK"; char *charset = friso_globals.friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK";
RETURN_STRINGL(charset, strlen(charset), 1); RETURN_STRINGL(charset, strlen(charset), 1);
} }
/* }}} */ /* }}} */
@ -340,23 +340,23 @@ PHP_FUNCTION(friso_charset)
Return a bool to confirm that the given str is a word in a specified dictionary. */ Return a bool to confirm that the given str is a word in a specified dictionary. */
PHP_FUNCTION(friso_dic_exist) PHP_FUNCTION(friso_dic_exist)
{ {
char *word = NULL; char *word = NULL;
int wlen; int wlen;
long type; long type;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
return; return;
} }
if ( friso_globals.friso->dic == NULL ) if ( friso_globals.friso->dic == NULL )
RETURN_BOOL(0); RETURN_BOOL(0);
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ ) if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
type = __LEX_CJK_WORDS__; type = __LEX_CJK_WORDS__;
wlen = friso_dic_match( friso_globals.friso->dic, type, word ); wlen = friso_dic_match( friso_globals.friso->dic, type, word );
RETURN_BOOL(wlen); RETURN_BOOL(wlen);
} }
/* }}} */ /* }}} */
@ -364,38 +364,38 @@ PHP_FUNCTION(friso_dic_exist)
Return a array contains all the information of the given word.*/ Return a array contains all the information of the given word.*/
PHP_FUNCTION(friso_dic_get) PHP_FUNCTION(friso_dic_get)
{ {
char *word = NULL; char *word = NULL;
int wlen; int wlen;
long type; long type;
zval *entry; zval *entry;
lex_entry_t e; lex_entry_t e;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
return; return;
} }
//check the dictionary //check the dictionary
if ( friso_globals.friso->dic == NULL ) if ( friso_globals.friso->dic == NULL )
RETURN_BOOL(0); RETURN_BOOL(0);
MAKE_STD_ZVAL( entry ); MAKE_STD_ZVAL( entry );
array_init( entry ); array_init( entry );
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ ) if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
{ {
type = __LEX_CJK_WORDS__; type = __LEX_CJK_WORDS__;
} }
e = friso_dic_get( friso_globals.friso->dic, type, word ); e = friso_dic_get( friso_globals.friso->dic, type, word );
if ( e != NULL ) if ( e != NULL )
{ {
add_assoc_long( entry, "length", e->length); add_assoc_long( entry, "length", e->length);
add_assoc_long( entry, "freq", e->fre ); add_assoc_long( entry, "freq", e->fre );
*( return_value ) = * ( entry ); *( return_value ) = * ( entry );
return; return;
} }
RETURN_BOOL(0); RETURN_BOOL(0);
} }
/* }}} */ /* }}} */
@ -403,17 +403,17 @@ PHP_FUNCTION(friso_dic_get)
Return the bytes that the utf-8 char takes.*/ Return the bytes that the utf-8 char takes.*/
PHP_FUNCTION(friso_utf8_bytes) PHP_FUNCTION(friso_utf8_bytes)
{ {
char *word = NULL; char *word = NULL;
int wlen, _bytes; int wlen, _bytes;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
return; return;
} }
if ( word == NULL ) RETURN_LONG(0); if ( word == NULL ) RETURN_LONG(0);
_bytes = get_utf8_bytes( word[0] ); _bytes = get_utf8_bytes( word[0] );
RETURN_LONG(_bytes); RETURN_LONG(_bytes);
} }
/* }}} */ /* }}} */
@ -421,16 +421,16 @@ PHP_FUNCTION(friso_utf8_bytes)
Return the unicode of the given utf-8 char.*/ Return the unicode of the given utf-8 char.*/
PHP_FUNCTION(friso_utf8_ucode) PHP_FUNCTION(friso_utf8_ucode)
{ {
char *word = NULL; char *word = NULL;
int wlen, _ucode; int wlen, _ucode;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
return; return;
} }
_ucode = get_utf8_unicode( word ); _ucode = get_utf8_unicode( word );
RETURN_LONG(_ucode); RETURN_LONG(_ucode);
} }
/* }}} */ /* }}} */
@ -438,18 +438,18 @@ PHP_FUNCTION(friso_utf8_ucode)
Return char that the a unicode pointed to.*/ Return char that the a unicode pointed to.*/
PHP_FUNCTION(friso_ucode_utf8) PHP_FUNCTION(friso_ucode_utf8)
{ {
unsigned long *ucode = NULL; unsigned long *ucode = NULL;
int _bytes; int _bytes;
char word[7]; char word[7];
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &ucode ) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &ucode ) == FAILURE) {
return; return;
} }
_bytes = unicode_to_utf8( ( size_t ) ucode, word ); _bytes = unicode_to_utf8( ( size_t ) ucode, word );
word[_bytes] = '\0'; word[_bytes] = '\0';
RETURN_STRINGL( word, _bytes, 1 ); RETURN_STRINGL( word, _bytes, 1 );
} }
/* }}} */ /* }}} */

View File

@ -2,7 +2,7 @@
$br = (php_sapi_name() == "cli")? "":"<br>"; $br = (php_sapi_name() == "cli")? "":"<br>";
if(!extension_loaded('friso')) { if(!extension_loaded('friso')) {
dl('friso.' . PHP_SHLIB_SUFFIX); dl('friso.' . PHP_SHLIB_SUFFIX);
} }
$module = 'friso'; $module = 'friso';
$functions = get_extension_funcs($module); $functions = get_extension_funcs($module);
@ -13,9 +13,9 @@ foreach($functions as $func) {
echo "$br\n"; echo "$br\n";
$function = 'confirm_' . $module . '_compiled'; $function = 'confirm_' . $module . '_compiled';
if (extension_loaded($module)) { if (extension_loaded($module)) {
$str = $function($module); $str = $function($module);
} else { } else {
$str = "Module $module is not compiled into PHP"; $str = "Module $module is not compiled into PHP";
} }
echo "$str\n"; echo "$str\n";
?> ?>

View File

@ -6,11 +6,11 @@ extern zend_module_entry friso_module_entry;
#define phpext_friso_ptr &friso_module_entry #define phpext_friso_ptr &friso_module_entry
#ifdef PHP_WIN32 #ifdef PHP_WIN32
# define PHP_FRISO_API __declspec(dllexport) # define PHP_FRISO_API __declspec(dllexport)
#elif defined(__GNUC__) && __GNUC__ >= 4 #elif defined(__GNUC__) && __GNUC__ >= 4
# define PHP_FRISO_API __attribute__ ((visibility("default"))) # define PHP_FRISO_API __attribute__ ((visibility("default")))
#else #else
# define PHP_FRISO_API # define PHP_FRISO_API
#endif #endif
#ifdef ZTS #ifdef ZTS
@ -36,12 +36,12 @@ PHP_FUNCTION(friso_utf8_ucode);
PHP_FUNCTION(friso_ucode_utf8); PHP_FUNCTION(friso_ucode_utf8);
/* /*
Declare any global variables you may need between the BEGIN Declare any global variables you may need between the BEGIN
and END macros here: and END macros here:
ZEND_BEGIN_MODULE_GLOBALS(friso) ZEND_BEGIN_MODULE_GLOBALS(friso)
long global_value; long global_value;
char *global_string; char *global_string;
ZEND_END_MODULE_GLOBALS(friso) ZEND_END_MODULE_GLOBALS(friso)
*/ */
@ -66,5 +66,5 @@ typedef struct {
#define FRISO_G(v) (friso_globals.v) #define FRISO_G(v) (friso_globals.v)
#endif #endif
#endif /* PHP_FRISO_H */ #endif /* PHP_FRISO_H */

View File

@ -6,14 +6,14 @@ Check for friso presence
<?php <?php
echo "friso extension is available"; echo "friso extension is available";
/* /*
you can add regression tests for your extension here you can add regression tests for your extension here
the output of your test code has to be equal to the the output of your test code has to be equal to the
text in the --EXPECT-- section below for the tests text in the --EXPECT-- section below for the tests
to pass, differences between the output and the to pass, differences between the output and the
expected text are interpreted as failure expected text are interpreted as failure
see php5/README.TESTING for further information on see php5/README.TESTING for further information on
writing regression tests writing regression tests
*/ */
?> ?>

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
/* /*
* main interface file for friso - free soul. * main interface file for friso - free soul.
* you could modify it and re-release it but never for commercial use. * you could modify it and re-release it but never for commercial use.
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#ifndef _friso_h #ifndef _friso_h
#define _friso_h #define _friso_h
@ -15,11 +15,11 @@
#define friso_version() FRISO_VERSION #define friso_version() FRISO_VERSION
#define DEFAULT_SEGMENT_LENGTH 5 #define DEFAULT_SEGMENT_LENGTH 5
#define DEFAULT_MIX_LENGTH 2 #define DEFAULT_MIX_LENGTH 2
#define DEFAULT_LNA_LENGTH 1 #define DEFAULT_LNA_LENGTH 1
#define DEFAULT_NTHRESHOLD 1000000 #define DEFAULT_NTHRESHOLD 1000000
#define DEFAULT_SEGMENT_MODE 2 #define DEFAULT_SEGMENT_MODE 2
/* /*
* Type: friso_lex_t * Type: friso_lex_t
@ -29,8 +29,8 @@
typedef enum { typedef enum {
__LEX_CJK_WORDS__ = 0, __LEX_CJK_WORDS__ = 0,
__LEX_CJK_UNITS__ = 1, __LEX_CJK_UNITS__ = 1,
__LEX_ECM_WORDS__ = 2, //english and chinese mixed words. __LEX_ECM_WORDS__ = 2, //english and chinese mixed words.
__LEX_CEM_WORDS__ = 3, //chinese and english mixed words. __LEX_CEM_WORDS__ = 3, //chinese and english mixed words.
__LEX_CN_LNAME__ = 4, __LEX_CN_LNAME__ = 4,
__LEX_CN_SNAME__ = 5, __LEX_CN_SNAME__ = 5,
__LEX_CN_DNAME1__ = 6, __LEX_CN_DNAME1__ = 6,
@ -41,8 +41,8 @@ typedef enum {
__LEX_EN_WORDS__ = 11, __LEX_EN_WORDS__ = 11,
__LEX_OTHER_WORDS__ = 15, __LEX_OTHER_WORDS__ = 15,
__LEX_NCSYN_WORDS__ = 16, __LEX_NCSYN_WORDS__ = 16,
__LEX_PUNC_WORDS__ = 17, //punctuations __LEX_PUNC_WORDS__ = 17, //punctuations
__LEX_UNKNOW_WORDS__ = 18 //unrecognized words. __LEX_UNKNOW_WORDS__ = 18 //unrecognized words.
} friso_lex_t; } friso_lex_t;
typedef friso_hash_t * friso_dic_t; typedef friso_hash_t * friso_dic_t;
@ -51,8 +51,8 @@ typedef friso_hash_t * friso_dic_t;
//charset that Friso now support. //charset that Friso now support.
typedef enum { typedef enum {
FRISO_UTF8 = 0, //UTF-8 FRISO_UTF8 = 0, //UTF-8
FRISO_GBK = 1 //GBK FRISO_GBK = 1 //GBK
} friso_charset_t; } friso_charset_t;
/* /*
@ -61,15 +61,15 @@ typedef enum {
* use to identidy the mode that the friso use. * use to identidy the mode that the friso use.
*/ */
typedef enum { typedef enum {
__FRISO_SIMPLE_MODE__ = 1, __FRISO_SIMPLE_MODE__ = 1,
__FRISO_COMPLEX_MODE__ = 2, __FRISO_COMPLEX_MODE__ = 2,
__FRISO_DETECT_MODE__ = 3 __FRISO_DETECT_MODE__ = 3
} friso_mode_t; } friso_mode_t;
/* friso entry.*/ /* friso entry.*/
typedef struct { typedef struct {
friso_dic_t dic; //friso dictionary friso_dic_t dic; //friso dictionary
friso_charset_t charset; //project charset. friso_charset_t charset; //project charset.
} friso_entry; } friso_entry;
typedef friso_entry * friso_t; typedef friso_entry * friso_t;
@ -80,26 +80,26 @@ typedef friso_entry * friso_t;
* ------------------- * -------------------
* This type used to represent the lexicon entry struct. * This type used to represent the lexicon entry struct.
*/ */
#define _LEX_APPENSYN_MASK (1 << 0) //append synoyums words. #define _LEX_APPENSYN_MASK (1 << 0) //append synoyums words.
#define lex_appensyn_open(e) e->ctrlMask |= _LEX_APPENSYN_MASK #define lex_appensyn_open(e) e->ctrlMask |= _LEX_APPENSYN_MASK
#define lex_appensyn_close(e) e->ctrlMask &= ~_LEX_APPENSYN_MASK #define lex_appensyn_close(e) e->ctrlMask &= ~_LEX_APPENSYN_MASK
#define lex_appensyn_check(e) ((e->ctrlMask & _LEX_APPENSYN_MASK) != 0) #define lex_appensyn_check(e) ((e->ctrlMask & _LEX_APPENSYN_MASK) != 0)
typedef struct { typedef struct {
/* /*
* the type of the lexicon item. * the type of the lexicon item.
* available value is all the elements in friso_lex_t enum. * available value is all the elements in friso_lex_t enum.
* and if it is __LEX_OTHER_WORDS__, we need to free it after use it. * and if it is __LEX_OTHER_WORDS__, we need to free it after use it.
*/ */
uchar_t length; //the length of the token.(after the convertor of Friso.) uchar_t length; //the length of the token.(after the convertor of Friso.)
uchar_t rlen; //the real length of the token.(before any convert) uchar_t rlen; //the real length of the token.(before any convert)
uchar_t type; uchar_t type;
uchar_t ctrlMask; //function control mask, like append the synoyums words. uchar_t ctrlMask; //function control mask, like append the synoyums words.
uint_t offset; //offset index. uint_t offset; //offset index.
fstring word; fstring word;
//fstring py; //pinyin of the word.(invalid) //fstring py; //pinyin of the word.(invalid)
friso_array_t syn; //synoyums words. friso_array_t syn; //synoyums words.
friso_array_t pos; //part of speech. friso_array_t pos; //part of speech.
uint_t fre; //single word frequency. uint_t fre; //single word frequency.
} lex_entry_cdt; } lex_entry_cdt;
typedef lex_entry_cdt * lex_entry_t; typedef lex_entry_cdt * lex_entry_t;
@ -108,11 +108,11 @@ typedef lex_entry_cdt * lex_entry_t;
#define __HITS_WORD_LENGTH__ 64 #define __HITS_WORD_LENGTH__ 64
typedef struct { typedef struct {
uchar_t type; //type of the word. (item of friso_lex_t) uchar_t type; //type of the word. (item of friso_lex_t)
uchar_t length; //length of the token. uchar_t length; //length of the token.
uchar_t rlen; //the real length of the token.(in orgin strng) uchar_t rlen; //the real length of the token.(in orgin strng)
char pos; //part of speech. char pos; //part of speech.
int offset; //start offset of the word. int offset; //start offset of the word.
char word[__HITS_WORD_LENGTH__]; char word[__HITS_WORD_LENGTH__];
//char py[0]; //char py[0];
} friso_token_entry; } friso_token_entry;
@ -122,25 +122,25 @@ typedef friso_token_entry * friso_token_t;
/* /*
* Type: friso_task_entry * Type: friso_task_entry
* This type used to represent the current segmentation content. * This type used to represent the current segmentation content.
* like the text to split, and the current index, token buffer eg.... * like the text to split, and the current index, token buffer eg....
*/ */
//action control mask for #FRISO_TASK_T#. //action control mask for #FRISO_TASK_T#.
#define _TASK_CHECK_CF_MASK (1 << 0) //Wether to check the chinese fraction. #define _TASK_CHECK_CF_MASK (1 << 0) //Wether to check the chinese fraction.
#define _TASK_START_SS_MASK (1 << 1) //Wether to start the secondary segmentation. #define _TASK_START_SS_MASK (1 << 1) //Wether to start the secondary segmentation.
#define task_ssseg_open(task) task->ctrlMask |= _TASK_START_SS_MASK #define task_ssseg_open(task) task->ctrlMask |= _TASK_START_SS_MASK
#define task_ssseg_close(task) task->ctrlMask &= ~_TASK_START_SS_MASK #define task_ssseg_close(task) task->ctrlMask &= ~_TASK_START_SS_MASK
#define task_ssseg_check(task) ((task->ctrlMask & _TASK_START_SS_MASK) != 0) #define task_ssseg_check(task) ((task->ctrlMask & _TASK_START_SS_MASK) != 0)
typedef struct { typedef struct {
fstring text; //text to tokenize fstring text; //text to tokenize
uint_t idx; //start offset index. uint_t idx; //start offset index.
uint_t length; //length of the text. uint_t length; //length of the text.
uint_t bytes; //latest word bytes in C. uint_t bytes; //latest word bytes in C.
uint_t unicode; //latest word unicode number. uint_t unicode; //latest word unicode number.
uint_t ctrlMask; //action control mask. uint_t ctrlMask; //action control mask.
friso_link_t pool; //task pool. friso_link_t pool; //task pool.
string_buffer_t sbuf; //string buffer. string_buffer_t sbuf; //string buffer.
friso_token_t token; //token result token; friso_token_t token; //token result token;
char buffer[7]; //word buffer. (1-6 bytes for an utf-8 word in C). char buffer[7]; //word buffer. (1-6 bytes for an utf-8 word in C).
} friso_task_entry; } friso_task_entry;
typedef friso_task_entry * friso_task_t; typedef friso_task_entry * friso_task_t;
@ -151,23 +151,23 @@ typedef friso_task_entry * friso_task_t;
//typedef friso_token_t ( * friso_next_hit_fn ) ( friso_t, void *, friso_task_t ); //typedef friso_token_t ( * friso_next_hit_fn ) ( friso_t, void *, friso_task_t );
//typedef lex_entry_t ( * friso_next_lex_fn ) ( friso_t, void *, friso_task_t ); //typedef lex_entry_t ( * friso_next_lex_fn ) ( friso_t, void *, friso_task_t );
struct friso_config_struct { struct friso_config_struct {
ushort_t max_len; //the max match length (4 - 7). ushort_t max_len; //the max match length (4 - 7).
ushort_t r_name; //1 for open chinese name recognition 0 for close it. ushort_t r_name; //1 for open chinese name recognition 0 for close it.
ushort_t mix_len; //the max length for the CJK words in a mix string. ushort_t mix_len; //the max length for the CJK words in a mix string.
ushort_t lna_len; //the max length for the chinese last name adron. ushort_t lna_len; //the max length for the chinese last name adron.
ushort_t add_syn; //append synonyms tokenizer words. ushort_t add_syn; //append synonyms tokenizer words.
ushort_t clr_stw; //clear the stopwords. ushort_t clr_stw; //clear the stopwords.
ushort_t keep_urec; //keep the unrecongnized words. ushort_t keep_urec; //keep the unrecongnized words.
ushort_t spx_out; //use sphinx output customize. ushort_t spx_out; //use sphinx output customize.
ushort_t en_sseg; //start the secondary segmentation. ushort_t en_sseg; //start the secondary segmentation.
ushort_t st_minl; //min length of the secondary segmentation token. ushort_t st_minl; //min length of the secondary segmentation token.
uint_t nthreshold; //the threshold value for a char to make up a chinese name. uint_t nthreshold; //the threshold value for a char to make up a chinese name.
friso_mode_t mode; //Complex mode or simple mode friso_mode_t mode; //Complex mode or simple mode
//pointer to the function to get the next token //pointer to the function to get the next token
friso_token_t (*next_token) (friso_t, struct friso_config_struct *, friso_task_t); friso_token_t (*next_token) (friso_t, struct friso_config_struct *, friso_task_t);
//pointer to the function to get the next cjk lex_entry_t //pointer to the function to get the next cjk lex_entry_t
lex_entry_t (*next_cjk ) (friso_t, struct friso_config_struct *, friso_task_t); lex_entry_t (*next_cjk ) (friso_t, struct friso_config_struct *, friso_task_t);
char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer. char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer.
}; };
@ -181,7 +181,7 @@ typedef friso_config_entry * friso_config_t;
* Usage: vars = friso_new( void ); * Usage: vars = friso_new( void );
* -------------------------------- * --------------------------------
* This function used to create a new empty friso friso_t; * This function used to create a new empty friso friso_t;
* with default value. * with default value.
*/ */
FRISO_API friso_t friso_new( void ); FRISO_API friso_t friso_new( void );
@ -202,7 +202,7 @@ FRISO_API void friso_free( friso_t );
* Usage: dic = friso_set_dic( vars, dic ); * Usage: dic = friso_set_dic( vars, dic );
* ---------------------------------------- * ----------------------------------------
* This function is used to set the dictionary for friso. * This function is used to set the dictionary for friso.
* and firso_dic_t is the pointer of a hash table array. * and firso_dic_t is the pointer of a hash table array.
*/ */
//FRISO_API void friso_set_dic( friso_t, friso_dic_t ); //FRISO_API void friso_set_dic( friso_t, friso_dic_t );
#define friso_set_dic(friso, dic)\ #define friso_set_dic(friso, dic)\
@ -272,7 +272,7 @@ FRISO_API lex_entry_t next_complex_cjk( friso_t, friso_config_t, friso_task_t );
* Usage: word = next_mmseg_token( vars, seg ); * Usage: word = next_mmseg_token( vars, seg );
* -------------------------------------- * --------------------------------------
* This function is used to get next word that friso segmented * This function is used to get next word that friso segmented
* with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__ * with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__
*/ */
FRISO_API friso_token_t next_mmseg_token( friso_t, friso_config_t, friso_task_t ); FRISO_API friso_token_t next_mmseg_token( friso_t, friso_config_t, friso_task_t );
@ -313,14 +313,14 @@ FRISO_API void free_lex_entry( lex_entry_t );
* Usage: friso_dic_load( friso, friso_lex_t, path, length ); * Usage: friso_dic_load( friso, friso_lex_t, path, length );
* -------------------------------------------------- * --------------------------------------------------
* This function is used to load dictionary from a given path. * This function is used to load dictionary from a given path.
* no length limit when length less than 0. * no length limit when length less than 0.
*/ */
FRISO_API void friso_dic_load( friso_t, friso_config_t, FRISO_API void friso_dic_load( friso_t, friso_config_t,
friso_lex_t, fstring, uint_t ); friso_lex_t, fstring, uint_t );
/* /*
* load the lexicon configuration file. * load the lexicon configuration file.
* and load all the valid lexicon from the conf file. * and load all the valid lexicon from the conf file.
*/ */
FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint_t ); FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint_t );

View File

@ -16,22 +16,22 @@
//yat, just take it as this way, 99 percent you will find no problem //yat, just take it as this way, 99 percent you will find no problem
#if ( defined(_WIN32) || defined(_WINDOWS_) || defined(__WINDOWS_) ) #if ( defined(_WIN32) || defined(_WINDOWS_) || defined(__WINDOWS_) )
# define FRISO_WINNT # define FRISO_WINNT
#else #else
# define FRISO_LINUX # define FRISO_LINUX
#endif #endif
#ifdef FRISO_WINNT #ifdef FRISO_WINNT
# define FRISO_API extern __declspec(dllexport) # define FRISO_API extern __declspec(dllexport)
# define __STATIC_API__ static # define __STATIC_API__ static
#else #else
/*platform shared library statement :: unix*/ /*platform shared library statement :: unix*/
# define FRISO_API extern # define FRISO_API extern
# define __STATIC_API__ static inline # define __STATIC_API__ static inline
#endif #endif
#define ___ALLOCATION_ERROR___ \ #define ___ALLOCATION_ERROR___ \
printf("Unable to do the memory allocation, program will now exit\n" ); \ printf("Unable to do the memory allocation, program will now exit\n" ); \
exit(1); exit(1);
#define print(str) printf("%s", str ) #define print(str) printf("%s", str )
@ -39,12 +39,12 @@ exit(1);
/* /*
* memory allocation macro definition. * memory allocation macro definition.
* cause we should use emalloc,ecalloc .ege. in php. * cause we should use emalloc,ecalloc .ege. in php.
* so you could make it better apdat the php environment. * so you could make it better apdat the php environment.
*/ */
#define FRISO_CALLOC(_bytes, _blocks) calloc(_bytes, _blocks) #define FRISO_CALLOC(_bytes, _blocks) calloc(_bytes, _blocks)
#define FRISO_MALLOC(_bytes) malloc(_bytes) #define FRISO_MALLOC(_bytes) malloc(_bytes)
#define FRISO_FREE( _ptr ) free( _ptr ) #define FRISO_FREE( _ptr ) free( _ptr )
typedef unsigned short ushort_t; typedef unsigned short ushort_t;
typedef unsigned char uchar_t; typedef unsigned char uchar_t;
@ -74,7 +74,7 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str );
/* /*
* this function will copy the chars that the fstring pointed. * this function will copy the chars that the fstring pointed.
* to the buffer. * to the buffer.
* this may cause the resize action of the buffer. * this may cause the resize action of the buffer.
*/ */
FRISO_API void string_buffer_append( string_buffer_t, fstring ); FRISO_API void string_buffer_append( string_buffer_t, fstring );
@ -88,21 +88,21 @@ FRISO_API fstring string_buffer_remove( string_buffer_t, uint_t idx, uint_t );
/* /*
* turn the string_buffer to a string. * turn the string_buffer to a string.
* or return the buffer of the string_buffer. * or return the buffer of the string_buffer.
*/ */
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t ); FRISO_API string_buffer_t string_buffer_trim( string_buffer_t );
/* /*
* free the given fstring buffer. * free the given fstring buffer.
* and this function will not free the allocations of the * and this function will not free the allocations of the
* the string_buffer_t->buffer, we return it to you, if there is * the string_buffer_t->buffer, we return it to you, if there is
* a necessary you could free it youself by calling free(); * a necessary you could free it youself by calling free();
*/ */
FRISO_API fstring string_buffer_devote( string_buffer_t ); FRISO_API fstring string_buffer_devote( string_buffer_t );
/* /*
* clear the given fstring buffer. * clear the given fstring buffer.
* reset its buffer with 0 and reset its length to 0. * reset its buffer with 0 and reset its length to 0.
*/ */
FRISO_API void string_buffer_clear( string_buffer_t ); FRISO_API void string_buffer_clear( string_buffer_t );
@ -126,8 +126,8 @@ typedef string_split_entry * string_split_t;
/** /**
* create a new string_split_entry. * create a new string_split_entry.
* *
* @param source * @param source
* @return string_split_t; * @return string_split_t;
*/ */
FRISO_API string_split_t new_string_split( fstring, fstring ); FRISO_API string_split_t new_string_split( fstring, fstring );
@ -141,12 +141,12 @@ FRISO_API void free_string_split( string_split_t );
/** /**
* get the next split fstring, and copy the * get the next split fstring, and copy the
* splited fstring into the __dst buffer . * splited fstring into the __dst buffer .
* *
* @param string_split_t * @param string_split_t
* @param __dst * @param __dst
* @return fstring (NULL if reach the end of the source * @return fstring (NULL if reach the end of the source
* or there is no more segmentation) * or there is no more segmentation)
*/ */
FRISO_API fstring string_split_next( string_split_t, fstring ); FRISO_API fstring string_split_next( string_split_t, fstring );
/* }}} */ /* }}} */
@ -175,7 +175,7 @@ FRISO_API friso_array_t new_array_list_with_opacity( uint_t );
/* /*
* free the given friso array. * free the given friso array.
* and its items, but never where the items's item to pointed to . * and its items, but never where the items's item to pointed to .
*/ */
FRISO_API void free_array_list( friso_array_t ); FRISO_API void free_array_list( friso_array_t );
@ -190,13 +190,13 @@ FRISO_API void *array_list_get( friso_array_t, uint_t );
/* /*
* set the item at a specified position. * set the item at a specified position.
* this will return the old value. * this will return the old value.
*/ */
FRISO_API void *array_list_set( friso_array_t, uint_t, void * ); FRISO_API void *array_list_set( friso_array_t, uint_t, void * );
/* /*
* remove the given item at a specified position. * remove the given item at a specified position.
* this will return the value of the removed item. * this will return the value of the removed item.
*/ */
FRISO_API void *array_list_remove( friso_array_t, uint_t ); FRISO_API void *array_list_remove( friso_array_t, uint_t );
@ -205,9 +205,9 @@ FRISO_API friso_array_t array_list_trim( friso_array_t );
/* /*
* clear the array list. * clear the array list.
* this function will free all the allocations that the pointer pointed. * this function will free all the allocations that the pointer pointed.
* but will not free the point array allocations, * but will not free the point array allocations,
* and will reset the length of it. * and will reset the length of it.
*/ */
FRISO_API friso_array_t array_list_clear( friso_array_t ); FRISO_API friso_array_t array_list_clear( friso_array_t );
@ -300,8 +300,8 @@ FRISO_API void link_list_add_first( friso_link_t, void * );
/* {{{ hashtable interface define :: start*/ /* {{{ hashtable interface define :: start*/
struct hash_entry { struct hash_entry {
fstring _key; //the node key fstring _key; //the node key
void * _val; //the node value void * _val; //the node value
struct hash_entry * _next; struct hash_entry * _next;
}; };
typedef struct hash_entry friso_hash_entry; typedef struct hash_entry friso_hash_entry;
@ -319,8 +319,8 @@ typedef struct {
typedef friso_hash_cdt * friso_hash_t; typedef friso_hash_cdt * friso_hash_t;
//default value for friso_hash_cdt //default value for friso_hash_cdt
#define DEFAULT_LENGTH 31 #define DEFAULT_LENGTH 31
#define DEFAULT_FACTOR 0.85f #define DEFAULT_FACTOR 0.85f
/* /*
* Function: new_hash_table * Function: new_hash_table
@ -359,7 +359,7 @@ FRISO_API int hash_exist_mapping( friso_hash_t, fstring );
* Usage: value = get_mapping_value( table, key ); * Usage: value = get_mapping_value( table, key );
* ----------------------------------------------- * -----------------------------------------------
* this function return the value associated with the given key. * this function return the value associated with the given key.
* UNDEFINED will be return if the mapping is not exists. * UNDEFINED will be return if the mapping is not exists.
*/ */
FRISO_API void * hash_get_value( friso_hash_t, fstring ); FRISO_API void * hash_get_value( friso_hash_t, fstring );

View File

@ -1,6 +1,6 @@
/** /**
* Friso GBK about function implements source file. * Friso GBK about function implements source file.
* @package src/friso_GBK.c . * @package src/friso_GBK.c .
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
@ -12,12 +12,12 @@
/* read the next GBK word from the specified position. /* read the next GBK word from the specified position.
* *
* @return int the bytes of the current readed word. * @return int the bytes of the current readed word.
*/ */
FRISO_API int gbk_next_word( FRISO_API int gbk_next_word(
friso_task_t task, friso_task_t task,
uint_t *idx, uint_t *idx,
fstring __word ) fstring __word )
{ {
int c; int c;
if ( *idx >= task->length ) return 0; if ( *idx >= task->length ) return 0;
@ -41,26 +41,26 @@ FRISO_API int gbk_next_word(
//} //}
//check if the given buffer is a gbk word (ANSII string). //check if the given buffer is a gbk word (ANSII string).
// included the simplified and traditional words. // included the simplified and traditional words.
FRISO_API int gbk_cn_string( char *str ) FRISO_API int gbk_cn_string( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
//GBK/2: gb2312 chinese word. //GBK/2: gb2312 chinese word.
return ( ((c1 >= 0xb0 && c1 <= 0xf7) return ( ((c1 >= 0xb0 && c1 <= 0xf7)
&& (c2 >= 0xa1 && c2 <= 0xfe)) && (c2 >= 0xa1 && c2 <= 0xfe))
//GBK/3: extend chinese words. //GBK/3: extend chinese words.
|| ((c1 >= 0x81 && c1 <= 0xa0) || ((c1 >= 0x81 && c1 <= 0xa0)
&& ( (c2 >= 0x40 && c2 <= 0x7e) && ( (c2 >= 0x40 && c2 <= 0x7e)
|| (c2 >= 0x80 && c2 <= 0xfe) )) || (c2 >= 0x80 && c2 <= 0xfe) ))
//GBK/4: extend chinese words. //GBK/4: extend chinese words.
|| ((c1 >= 0xaa && c1 <= 0xfe) || ((c1 >= 0xaa && c1 <= 0xfe)
&& ( (c2 >= 0x40 && c2 <= 0xfe) && ( (c2 >= 0x40 && c2 <= 0xfe)
|| (c2 >= 0x80 && c2 <= 0xa0) )) ); || (c2 >= 0x80 && c2 <= 0xa0) )) );
} }
/*check if the given char is a ASCII letter /*check if the given char is a ASCII letter
* include all the arabic number, letters and english puntuations.*/ * include all the arabic number, letters and english puntuations.*/
FRISO_API int gbk_halfwidth_en_char( char c ) FRISO_API int gbk_halfwidth_en_char( char c )
{ {
int u = (uchar_t) c; int u = (uchar_t) c;
@ -69,58 +69,58 @@ FRISO_API int gbk_halfwidth_en_char( char c )
/* /*
* check if the given char is a full-width latain. * check if the given char is a full-width latain.
* include the full-width arabic numeber, letters. * include the full-width arabic numeber, letters.
* but not the full-width puntuations. * but not the full-width puntuations.
*/ */
FRISO_API int gbk_fullwidth_en_char( char *str ) FRISO_API int gbk_fullwidth_en_char( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
return ( (c1 == 0xA3) return ( (c1 == 0xA3)
&& ( (c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers. && ( (c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers.
|| ( c2 >= 0xC1 && c2 <= 0xDA ) //uppercase letters. || ( c2 >= 0xC1 && c2 <= 0xDA ) //uppercase letters.
|| ( c2 >= 0xE1 && c2 <= 0xFA) ) ); //lowercase letters. || ( c2 >= 0xE1 && c2 <= 0xFA) ) ); //lowercase letters.
} }
//check if the given char is a upper case english letter. //check if the given char is a upper case english letter.
// included the full-width and half-width letters. // included the full-width and half-width letters.
FRISO_API int gbk_uppercase_letter( char *str ) FRISO_API int gbk_uppercase_letter( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
return ( c1 >= 65 && c1 <= 90 ); return ( c1 >= 65 && c1 <= 90 );
else //full-width else //full-width
return ( c1 == 0xa3 && ( c2 >= 0xc1 && c2 <= 0xda ) ); return ( c1 == 0xa3 && ( c2 >= 0xc1 && c2 <= 0xda ) );
} }
//check if the given char is a lower case char. //check if the given char is a lower case char.
// included the full-width and half-width letters. // included the full-width and half-width letters.
FRISO_API int gbk_lowercase_letter( char *str ) FRISO_API int gbk_lowercase_letter( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
return ( c1 >= 97 && c1 <= 122 ); return ( c1 >= 97 && c1 <= 122 );
else //full-width else //full-width
return ( c1 == 0xa3 && ( c2 >= 0xe1 && c2 <= 0xfa ) ); return ( c1 == 0xa3 && ( c2 >= 0xe1 && c2 <= 0xfa ) );
} }
//check if the given char is a arabic numeric. //check if the given char is a arabic numeric.
// included the full-width and half-width arabic numeric. // included the full-width and half-width arabic numeric.
FRISO_API int gbk_numeric_letter( char *str ) FRISO_API int gbk_numeric_letter( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
return ( c1 >= 48 && c1 <= 57 ); return ( c1 >= 48 && c1 <= 57 );
else //full-width else //full-width
return ( ( c1 == 0xa3 ) && ( c2 >= 0xb0 && c2 <= 0xb9 ) ); return ( ( c1 == 0xa3 ) && ( c2 >= 0xb0 && c2 <= 0xb9 ) );
} }
/* /*
* check if the given fstring is make up with numeric chars. * check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok. * both full-width,half-width numeric is ok.
*/ */
FRISO_API int gbk_numeric_string( char *str ) FRISO_API int gbk_numeric_string( char *str )
{ {
@ -130,17 +130,17 @@ FRISO_API int gbk_numeric_string( char *str )
while ( *s != '\0' ) while ( *s != '\0' )
{ {
c1 = (uchar_t) (*s++); c1 = (uchar_t) (*s++);
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
{ {
if ( c1 < 48 || c2 > 57 ) return 0; if ( c1 < 48 || c2 > 57 ) return 0;
} }
else //full-width else //full-width
{ {
if ( c1 != 0xa3 ) return 0; if ( c1 != 0xa3 ) return 0;
c2 = (uchar_t) (*s++); c2 = (uchar_t) (*s++);
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0; if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
} }
} }
return 1; return 1;
@ -157,47 +157,47 @@ FRISO_API int gbk_decimal_string( char *str )
for ( i = 0; i < len; ) for ( i = 0; i < len; )
{ {
c1 = (uchar_t) str[i++]; c1 = (uchar_t) str[i++];
//count the number of the points. //count the number of the points.
if ( c1 == 46 ) if ( c1 == 46 )
{ {
p++; p++;
continue; continue;
} }
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
{ {
if ( c1 < 48 || c1 > 57 ) return 0; if ( c1 < 48 || c1 > 57 ) return 0;
} }
else //full-width else //full-width
{ {
if ( c1 != 0xa3 ) return 0; if ( c1 != 0xa3 ) return 0;
c2 = (uchar_t) str[i++]; c2 = (uchar_t) str[i++];
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0; if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
} }
} }
return (p == 1); return (p == 1);
} }
//check if the given char is a english(ASCII) letter. //check if the given char is a english(ASCII) letter.
// (full-width and half-width), not the punctuation/arabic of course. // (full-width and half-width), not the punctuation/arabic of course.
FRISO_API int gbk_en_letter( char *str ) FRISO_API int gbk_en_letter( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) //half-width if ( c1 <= 0x80 ) //half-width
return ( (c1 >= 65 && c1 <= 90) //lowercase return ( (c1 >= 65 && c1 <= 90) //lowercase
|| (c1 >= 97 && c1 <= 122)); //uppercase || (c1 >= 97 && c1 <= 122)); //uppercase
else else
return ( (c1 == 0xa3) return ( (c1 == 0xa3)
&& ( ( c2 >= 0xc1 && c2 <= 0xda ) //lowercase && ( ( c2 >= 0xc1 && c2 <= 0xda ) //lowercase
|| ( c2 >= 0xe1 && c2 <= 0xfa ) ) ); //uppercase || ( c2 >= 0xe1 && c2 <= 0xfa ) ) ); //uppercase
return 0; return 0;
} }
//check the given char is a whitespace or not. //check the given char is a whitespace or not.
// included full-width and half-width whitespace. // included full-width and half-width whitespace.
FRISO_API int gbk_whitespace( char *str ) FRISO_API int gbk_whitespace( char *str )
{ {
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
@ -213,8 +213,8 @@ FRISO_API int gbk_letter_number( char *str )
int c1 = (uchar_t) str[0]; int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
return ( (c1 == 0xa2) return ( (c1 == 0xa2)
&& ( ( c2 >= 0xa1 && c2 <= 0xb0 ) //lowercase && ( ( c2 >= 0xa1 && c2 <= 0xb0 ) //lowercase
|| ( c2 >= 0xf0 && c2 <= 0xfe ) ) ); //uppercase || ( c2 >= 0xf0 && c2 <= 0xfe ) ) ); //uppercase
} }
/* /*
@ -232,9 +232,9 @@ FRISO_API int gbk_en_punctuation( char c )
{ {
int u = (uchar_t) c; int u = (uchar_t) c;
return ( (u > 32 && u < 48) return ( (u > 32 && u < 48)
|| ( u > 57 && u < 65 ) || ( u > 57 && u < 65 )
|| ( u > 90 && u < 97 ) || ( u > 90 && u < 97 )
|| ( u > 122 && u < 127 ) ); || ( u > 122 && u < 127 ) );
} }
//check the given char is a chinese punctuation. //check the given char is a chinese punctuation.
@ -244,16 +244,16 @@ FRISO_API int gbk_cn_punctuation( char *str )
int c2 = (uchar_t) str[1]; int c2 = (uchar_t) str[1];
//full-width en punctuation. //full-width en punctuation.
return ( (c1 == 0xa3 && (( c2 >= 0xa1 && c2 <= 0xaf ) return ( (c1 == 0xa3 && (( c2 >= 0xa1 && c2 <= 0xaf )
|| ( c2 >= 0xba && c2 <= 0xc0 ) || ( c2 >= 0xba && c2 <= 0xc0 )
|| ( c2 >= 0xdb && c2 <= 0xe0 ) || ( c2 >= 0xdb && c2 <= 0xe0 )
|| ( c2 >= 0xfb && c2 <= 0xfe ) )) || ( c2 >= 0xfb && c2 <= 0xfe ) ))
//chinese punctuation. //chinese punctuation.
|| (c1 == 0xa1 && ( (c2 >= 0xa1 && c2 <= 0xae) || (c1 == 0xa1 && ( (c2 >= 0xa1 && c2 <= 0xae)
|| ( c2 >= 0xb0 && c2 <= 0xbf ) )) || ( c2 >= 0xb0 && c2 <= 0xbf ) ))
//A6 area special punctuations:" " //A6 area special punctuations:" "
|| (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe)) || (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
//A8 area special punctuations: " ˊˋ˙–―‥‵℅ " //A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
|| (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)) ); || (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)) );
} }
/* {{{ /* {{{
@ -269,19 +269,19 @@ FRISO_API int gbk_cn_punctuation( char *str )
//FRISO_API int gbk_keep_punctuation( char *str ) //FRISO_API int gbk_keep_punctuation( char *str )
//{ //{
// if ( __keep_punctuations_hash__ == NULL ) { // if ( __keep_punctuations_hash__ == NULL ) {
// __keep_punctuations_hash__ = new_hash_table(); // __keep_punctuations_hash__ = new_hash_table();
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "$", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "^", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "-", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
// hash_put_mapping( __keep_punctuations_hash__, ":", NULL ); // hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL ); // hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "/", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
// } // }
// //check the hash. // //check the hash.
// return hash_exist_mapping( __keep_punctuations_hash__, str ); // return hash_exist_mapping( __keep_punctuations_hash__, str );

View File

@ -1,6 +1,6 @@
/** /**
* Friso utf8 about function implements source file. * Friso utf8 about function implements source file.
* @package src/friso_UTF8.c . * @package src/friso_UTF8.c .
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
@ -12,12 +12,12 @@
/* read the next utf-8 word from the specified position. /* read the next utf-8 word from the specified position.
* *
* @return int the bytes of the current readed word. * @return int the bytes of the current readed word.
*/ */
FRISO_API int utf8_next_word( FRISO_API int utf8_next_word(
friso_task_t task, friso_task_t task,
uint_t *idx, uint_t *idx,
fstring __word ) fstring __word )
{ {
if ( *idx >= task->length ) return 0; if ( *idx >= task->length ) return 0;
@ -25,7 +25,7 @@ FRISO_API int utf8_next_word(
task->bytes = get_utf8_bytes( task->text[ *idx ] ); task->bytes = get_utf8_bytes( task->text[ *idx ] );
//for ( t = 0; t < task->bytes; t++ ) { //for ( t = 0; t < task->bytes; t++ ) {
// __word[t] = task->text[ (*idx)++ ]; // __word[t] = task->text[ (*idx)++ ];
//} //}
//change the loop to memcpy. //change the loop to memcpy.
@ -52,31 +52,31 @@ FRISO_API void print_char_binary( char value )
for ( t = 0; t < __CHAR_BYTES__; t++ ) for ( t = 0; t < __CHAR_BYTES__; t++ )
{ {
if ( ( value & 0x80 ) == 0x80 ) { if ( ( value & 0x80 ) == 0x80 ) {
printf("1"); printf("1");
} else { } else {
printf("0"); printf("0");
} }
value <<= 1; value <<= 1;
} }
} }
/* /*
* get the bytes of a utf-8 char. * get the bytes of a utf-8 char.
* between 1 - 6. * between 1 - 6.
* *
* @param __char * @param __char
* @return int * @return int
*/ */
FRISO_API int get_utf8_bytes( char value ) FRISO_API int get_utf8_bytes( char value )
{ {
register uint_t t = 0; register uint_t t = 0;
//one byte ascii char. //one byte ascii char.
if ( ( value & 0x80 ) == 0 ) return 1; if ( ( value & 0x80 ) == 0 ) return 1;
for ( ; ( value & 0x80 ) != 0; value <<= 1 ) for ( ; ( value & 0x80 ) != 0; value <<= 1 )
t++; t++;
return t; return t;
} }
@ -94,25 +94,25 @@ FRISO_API int get_utf8_unicode( const fstring ch )
register char b1,b2,b3; register char b1,b2,b3;
switch ( bytes ) { switch ( bytes ) {
case 1: case 1:
*bit = *ch; *bit = *ch;
break; break;
case 2: case 2:
b1 = *ch; b1 = *ch;
b2 = *(ch + 1); b2 = *(ch + 1);
*bit = (b1 << 6) + (b2 & 0x3F); *bit = (b1 << 6) + (b2 & 0x3F);
*(bit+1) = (b1 >> 2) & 0x07; *(bit+1) = (b1 >> 2) & 0x07;
break; break;
case 3: case 3:
b1 = *ch; b1 = *ch;
b2 = *(ch + 1); b2 = *(ch + 1);
b3 = *(ch + 2); b3 = *(ch + 2);
*bit = (b2 << 6) + (b3 & 0x3F); *bit = (b2 << 6) + (b3 & 0x3F);
*(bit+1) = (b1 << 4) + ((b2 >> 2) & 0x0F); *(bit+1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
break; break;
//ignore the ones that are larger than 3 bytes; //ignore the ones that are larger than 3 bytes;
} }
return code; return code;
@ -122,50 +122,50 @@ FRISO_API int get_utf8_unicode( const fstring ch )
FRISO_API int unicode_to_utf8( uint_t u, fstring __word ) FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
{ {
if ( u <= 0x0000007F ) { if ( u <= 0x0000007F ) {
//U-00000000 - U-0000007F //U-00000000 - U-0000007F
//0xxxxxxx //0xxxxxxx
*__word = ( u & 0x7F ); *__word = ( u & 0x7F );
return 1; return 1;
} else if ( u >= 0x00000080 && u <= 0x000007FF ) { } else if ( u >= 0x00000080 && u <= 0x000007FF ) {
//U-00000080 - U-000007FF //U-00000080 - U-000007FF
//110xxxxx 10xxxxxx //110xxxxx 10xxxxxx
*( __word + 1 ) = ( u & 0x3F) | 0x80; *( __word + 1 ) = ( u & 0x3F) | 0x80;
*__word = ((u >> 6) & 0x1F) | 0xC0; *__word = ((u >> 6) & 0x1F) | 0xC0;
return 2; return 2;
} else if ( u >= 0x00000800 && u <= 0x0000FFFF ) { } else if ( u >= 0x00000800 && u <= 0x0000FFFF ) {
//U-00000800 - U-0000FFFF //U-00000800 - U-0000FFFF
//1110xxxx 10xxxxxx 10xxxxxx //1110xxxx 10xxxxxx 10xxxxxx
*( __word + 2 ) = ( u & 0x3F) | 0x80; *( __word + 2 ) = ( u & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 6) & 0x3F) | 0x80; *( __word + 1 ) = ((u >> 6) & 0x3F) | 0x80;
*__word = ((u >> 12) & 0x0F) | 0xE0; *__word = ((u >> 12) & 0x0F) | 0xE0;
return 3; return 3;
} else if ( u >= 0x00010000 && u <= 0x001FFFFF ) { } else if ( u >= 0x00010000 && u <= 0x001FFFFF ) {
//U-00010000 - U-001FFFFF //U-00010000 - U-001FFFFF
//11110xxx 10xxxxxx 10xxxxxx 10xxxxxx //11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 3 ) = ( u & 0x3F) | 0x80; *( __word + 3 ) = ( u & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 6) & 0x3F) | 0x80; *( __word + 2 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 12) & 0x3F) | 0x80; *( __word + 1 ) = ((u >> 12) & 0x3F) | 0x80;
*__word = ((u >> 18) & 0x07) | 0xF0; *__word = ((u >> 18) & 0x07) | 0xF0;
return 4; return 4;
} else if ( u >= 0x00200000 && u <= 0x03FFFFFF ) { } else if ( u >= 0x00200000 && u <= 0x03FFFFFF ) {
//U-00200000 - U-03FFFFFF //U-00200000 - U-03FFFFFF
//111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx //111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 4 ) = ( u & 0x3F) | 0x80; *( __word + 4 ) = ( u & 0x3F) | 0x80;
*( __word + 3 ) = ((u >> 6) & 0x3F) | 0x80; *( __word + 3 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 12) & 0x3F) | 0x80; *( __word + 2 ) = ((u >> 12) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 18) & 0x3F) | 0x80; *( __word + 1 ) = ((u >> 18) & 0x3F) | 0x80;
*__word = ((u >> 24) & 0x03) | 0xF8; *__word = ((u >> 24) & 0x03) | 0xF8;
return 5; return 5;
} else if ( u >= 0x04000000 && u <= 0x7FFFFFFF ) { } else if ( u >= 0x04000000 && u <= 0x7FFFFFFF ) {
//U-04000000 - U-7FFFFFFF //U-04000000 - U-7FFFFFFF
//1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx //1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 5 ) = ( u & 0x3F) | 0x80; *( __word + 5 ) = ( u & 0x3F) | 0x80;
*( __word + 4 ) = ((u >> 6) & 0x3F) | 0x80; *( __word + 4 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 3 ) = ((u >> 12) & 0x3F) | 0x80; *( __word + 3 ) = ((u >> 12) & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 18) & 0x3F) | 0x80; *( __word + 2 ) = ((u >> 18) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 24) & 0x3F) | 0x80; *( __word + 1 ) = ((u >> 24) & 0x3F) | 0x80;
*__word = ((u >> 30) & 0x01) | 0xFC; *__word = ((u >> 30) & 0x01) | 0xFC;
return 6; return 6;
} }
return 0; return 0;
@ -173,28 +173,28 @@ FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
/* /*
* check the given char is a CJK char or not. * check the given char is a CJK char or not.
* 2E80-2EFF CJK * 2E80-2EFF CJK
* 2F00-2FDF * 2F00-2FDF
* 3000-303F CJK --ignore * 3000-303F CJK --ignore
* 31C0-31EF CJK * 31C0-31EF CJK
* 3200-32FF CJK --ignore. * 3200-32FF CJK --ignore.
* 3300-33FF CJK * 3300-33FF CJK
* 3400-4DBF CJK A * 3400-4DBF CJK A
* 4DC0-4DFF * 4DC0-4DFF
* 4E00-9FBF CJK * 4E00-9FBF CJK
* F900-FAFF CJK * F900-FAFF CJK
* FE30-FE4F CJK * FE30-FE4F CJK
* FF00-FFEF ASCII --ignore (as basic latin) * FF00-FFEF ASCII --ignore (as basic latin)
* *
* Japanese: * Japanese:
* 3040-309F * 3040-309F
* 30A0-30FF * 30A0-30FF
* 31F0-31FF * 31F0-31FF
* *
* Korean: * Korean:
* AC00-D7AF * AC00-D7AF
* 1100-11FF * 1100-11FF
* 3130-318F * 3130-318F
* *
* @param ch :pointer to the char * @param ch :pointer to the char
* @return int : 1 for yes and 0 for not. * @return int : 1 for yes and 0 for not.
@ -211,23 +211,23 @@ FRISO_API int utf8_cjk_string( uint_t u )
//Chinese. //Chinese.
#ifdef FRISO_CJK_CHK_C #ifdef FRISO_CJK_CHK_C
c = ( ( u >= 0x4E00 && u <= 0x9FBF ) c = ( ( u >= 0x4E00 && u <= 0x9FBF )
|| ( u >= 0x2E80 && u <= 0x2EFF ) || ( u >= 0x2F00 && u <= 0x2FDF ) || ( u >= 0x2E80 && u <= 0x2EFF ) || ( u >= 0x2F00 && u <= 0x2FDF )
|| ( u >= 0x31C0 && u <= 0x31EF ) //|| ( u >= 0x3200 && u <= 0x32FF ) || ( u >= 0x31C0 && u <= 0x31EF ) //|| ( u >= 0x3200 && u <= 0x32FF )
|| ( u >= 0x3300 && u <= 0x33FF ) //|| ( u >= 0x3400 && u <= 0x4DBF ) || ( u >= 0x3300 && u <= 0x33FF ) //|| ( u >= 0x3400 && u <= 0x4DBF )
|| ( u >= 0x4DC0 && u <= 0x4DFF ) || ( u >= 0xF900 && u <= 0xFAFF ) || ( u >= 0x4DC0 && u <= 0x4DFF ) || ( u >= 0xF900 && u <= 0xFAFF )
|| ( u >= 0xFE30 && u <= 0xFE4F ) ); || ( u >= 0xFE30 && u <= 0xFE4F ) );
#endif #endif
//Japanese. //Japanese.
#ifdef FRISO_CJK_CHK_J #ifdef FRISO_CJK_CHK_J
j = ( ( u >= 0x3040 && u <= 0x309F ) j = ( ( u >= 0x3040 && u <= 0x309F )
|| ( u >= 0x30A0 && u <= 0x30FF ) || ( u >= 0x31F0 && u <= 0x31FF ) ); || ( u >= 0x30A0 && u <= 0x30FF ) || ( u >= 0x31F0 && u <= 0x31FF ) );
#endif #endif
//Korean //Korean
#ifdef FRISO_CJK_CHK_K #ifdef FRISO_CJK_CHK_K
k = ( ( u >= 0xAC00 && u <= 0xD7AF ) k = ( ( u >= 0xAC00 && u <= 0xD7AF )
|| ( u >= 0x1100 && u <= 0x11FF ) || ( u >= 0x3130 && u <= 0x318F ) ); || ( u >= 0x1100 && u <= 0x11FF ) || ( u >= 0x3130 && u <= 0x318F ) );
#endif #endif
return ( c || j || k ); return ( c || j || k );
@ -235,7 +235,7 @@ FRISO_API int utf8_cjk_string( uint_t u )
/* /*
* check the given char is a Basic Latin letter or not. * check the given char is a Basic Latin letter or not.
* include all the letters and english punctuations. * include all the letters and english punctuations.
* *
* @param c * @param c
* @return int 1 for yes and 0 for not. * @return int 1 for yes and 0 for not.
@ -247,21 +247,21 @@ FRISO_API int utf8_halfwidth_en_char( uint_t u )
/* /*
* check the given char is a full-width latain or not. * check the given char is a full-width latain or not.
* include the full-width arabic numeber, letters. * include the full-width arabic numeber, letters.
* but not the full-width punctuations. * but not the full-width punctuations.
* *
* @param c * @param c
* @return int * @return int
*/ */
FRISO_API int utf8_fullwidth_en_char( uint_t u ) FRISO_API int utf8_fullwidth_en_char( uint_t u )
{ {
return ( (u >= 65296 && u <= 65305 ) //arabic number return ( (u >= 65296 && u <= 65305 ) //arabic number
|| ( u >= 65313 && u <= 65338 ) //upper case letters || ( u >= 65313 && u <= 65338 ) //upper case letters
|| ( u >= 65345 && u <= 65370 ) ); //lower case letters || ( u >= 65345 && u <= 65370 ) ); //lower case letters
} }
//check the given char is a upper case letters or not. //check the given char is a upper case letters or not.
// included the full-width and half-width letters. // included the full-width and half-width letters.
FRISO_API int utf8_uppercase_letter( uint_t u ) FRISO_API int utf8_uppercase_letter( uint_t u )
{ {
if ( u > 65280 ) u -= 65248; if ( u > 65280 ) u -= 65248;
@ -269,7 +269,7 @@ FRISO_API int utf8_uppercase_letter( uint_t u )
} }
//check the given char is a upper case letters or not. //check the given char is a upper case letters or not.
// included the full-width and half-width letters. // included the full-width and half-width letters.
FRISO_API int utf8_lowercase_letter( uint_t u ) FRISO_API int utf8_lowercase_letter( uint_t u )
{ {
if ( u > 65280 ) u -= 65248; if ( u > 65280 ) u -= 65248;
@ -277,25 +277,25 @@ FRISO_API int utf8_lowercase_letter( uint_t u )
} }
//check the given char is a numeric //check the given char is a numeric
// included the full-width and half-width arabic numeric. // included the full-width and half-width arabic numeric.
FRISO_API int utf8_numeric_letter( uint_t u ) FRISO_API int utf8_numeric_letter( uint_t u )
{ {
if ( u > 65280 ) u -= 65248; //make full-width half-width. if ( u > 65280 ) u -= 65248; //make full-width half-width.
return ( ( u >= 48 && u <= 57 ) ); return ( ( u >= 48 && u <= 57 ) );
} }
//check the given char is a english letter.(included the full-width) //check the given char is a english letter.(included the full-width)
// not the punctuation of course. // not the punctuation of course.
FRISO_API int utf8_en_letter( uint_t u ) FRISO_API int utf8_en_letter( uint_t u )
{ {
if ( u > 65280 ) u -= 65248; if ( u > 65280 ) u -= 65248;
return ( ( u >= 65 && u <= 90 ) return ( ( u >= 65 && u <= 90 )
|| ( u >= 97 && u <= 122 ) ); || ( u >= 97 && u <= 122 ) );
} }
/* /*
* check if the given fstring is make up with numeric. * check if the given fstring is make up with numeric.
* both full-width,half-width numeric is ok. * both full-width,half-width numeric is ok.
* *
* @param str * @param str
* @return int * @return int
@ -317,22 +317,22 @@ FRISO_API int utf8_numeric_string( const fstring str )
while ( *s != '\0' ) while ( *s != '\0' )
{ {
//if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) { //if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) {
// return 0; // return 0;
//} //}
//new implemention. //new implemention.
//@date 2013-10-14 //@date 2013-10-14
bytes = 1; bytes = 1;
if ( *s < 0 ) //full-width chars. if ( *s < 0 ) //full-width chars.
{ {
u = get_utf8_unicode(s); u = get_utf8_unicode(s);
bytes = get_utf8_bytes(*s); bytes = get_utf8_bytes(*s);
if ( u < 65296 || u > 65305 ) return 0; if ( u < 65296 || u > 65305 ) return 0;
} }
else if ( *s < 48 || *s > 57 ) return 0; else if ( *s < 48 || *s > 57 ) return 0;
s += bytes; s += bytes;
} }
return 1; return 1;
@ -347,24 +347,24 @@ FRISO_API int utf8_decimal_string( const fstring str )
for ( i = 1; i < len; bytes = 1 ) for ( i = 1; i < len; bytes = 1 )
{ {
//count the number of char '.' //count the number of char '.'
if ( str[i] == '.' ) if ( str[i] == '.' )
{ {
i++; i++;
p++; p++;
continue; continue;
} }
//full-width numeric. //full-width numeric.
else if ( str[i] < 0 ) else if ( str[i] < 0 )
{ {
u = get_utf8_unicode(str+i); u = get_utf8_unicode(str+i);
bytes = get_utf8_bytes(str[i]); bytes = get_utf8_bytes(str[i]);
if ( u < 65296 || u > 65305 ) return 0; if ( u < 65296 || u > 65305 ) return 0;
} }
else if ( str[i] < 48 || str[i] > 57 ) return 0; else if ( str[i] < 48 || str[i] > 57 ) return 0;
i += bytes; i += bytes;
} }
return (p == 1); return (p == 1);
@ -379,7 +379,7 @@ FRISO_API int utf8_decimal_string( const fstring str )
FRISO_API int utf8_whitespace( uint_t u ) FRISO_API int utf8_whitespace( uint_t u )
{ {
if ( u == 32 || u == 12288 ) if ( u == 32 || u == 12288 )
return 1; return 1;
return 0; return 0;
} }
@ -392,16 +392,16 @@ FRISO_API int utf8_whitespace( uint_t u )
*/ */
FRISO_API int utf8_en_punctuation( uint_t u ) FRISO_API int utf8_en_punctuation( uint_t u )
{ {
//if ( u > 65280 ) u = u - 65248; //make full-width half-width //if ( u > 65280 ) u = u - 65248; //make full-width half-width
return ( (u > 32 && u < 48) return ( (u > 32 && u < 48)
|| ( u > 57 && u < 65 ) || ( u > 57 && u < 65 )
|| ( u > 90 && u < 97 ) //added @2013-08-31 || ( u > 90 && u < 97 ) //added @2013-08-31
|| ( u > 122 && u < 127 ) ); || ( u > 122 && u < 127 ) );
} }
/* /*
* check the given char is a chinese punctuation. * check the given char is a chinese punctuation.
* @date 2013-08-31 added. * @date 2013-08-31 added.
* *
* @param ch * @param ch
* @return int * @return int
@ -409,17 +409,17 @@ FRISO_API int utf8_en_punctuation( uint_t u )
FRISO_API int utf8_cn_punctuation( uint_t u ) FRISO_API int utf8_cn_punctuation( uint_t u )
{ {
return ( ( u > 65280 && u < 65296 ) return ( ( u > 65280 && u < 65296 )
|| ( u > 65305 && u < 65312 ) || ( u > 65305 && u < 65312 )
|| ( u > 65338 && u < 65345 ) || ( u > 65338 && u < 65345 )
|| ( u > 65370 && u < 65382 ) || ( u > 65370 && u < 65382 )
//cjk symbol and punctuation.(added 2013-09-06) //cjk symbol and punctuation.(added 2013-09-06)
//from http://www.unicode.org/charts/PDF/U3000.pdf //from http://www.unicode.org/charts/PDF/U3000.pdf
|| ( u >= 12289 && u <= 12319) ); || ( u >= 12289 && u <= 12319) );
} }
/* /*
* check if the given char is a letter number in unicode. * check if the given char is a letter number in unicode.
* like ''. * like ''.
* @param ch * @param ch
* @return int * @return int
*/ */
@ -430,7 +430,7 @@ FRISO_API int utf8_letter_number( uint_t u )
/* /*
* check if the given char is a other number in unicode. * check if the given char is a other number in unicode.
* like ''. * like ''.
* @param ch * @param ch
* @return int * @return int
*/ */
@ -456,19 +456,19 @@ FRISO_API int utf8_other_number( uint_t u )
//{ //{
// if ( __keep_punctuations_hash__ == NULL ) // if ( __keep_punctuations_hash__ == NULL )
// { // {
// __keep_punctuations_hash__ = new_hash_table(); // __keep_punctuations_hash__ = new_hash_table();
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
// //hash_put_mapping( __keep_punctuations_hash__, "$", NULL ); // //hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "%", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
// //hash_put_mapping( __keep_punctuations_hash__, "^", NULL ); // //hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "&", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
// //hash_put_mapping( __keep_punctuations_hash__, "-", NULL ); // //hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
// //hash_put_mapping( __keep_punctuations_hash__, ":", NULL ); // //hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
// hash_put_mapping( __keep_punctuations_hash__, ".", NULL ); // hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
// //hash_put_mapping( __keep_punctuations_hash__, "/", NULL ); // //hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "'", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "#", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
// hash_put_mapping( __keep_punctuations_hash__, "+", NULL ); // hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
// } // }
// //check the hash. // //check the hash.
// return hash_exist_mapping( __keep_punctuations_hash__, str ); // return hash_exist_mapping( __keep_punctuations_hash__, str );
@ -484,7 +484,7 @@ FRISO_API int utf8_other_number( uint_t u )
//FRISO_API int utf8_fullwidth_char( uint_t u ) //FRISO_API int utf8_fullwidth_char( uint_t u )
//{ //{
// if ( u == 12288 ) // if ( u == 12288 )
// return 1; //full-width space // return 1; //full-width space
// //(32 - 126) ascii code // //(32 - 126) ascii code
// return (u > 65280 && u <= 65406); // return (u > 65280 && u <= 65406);
//} //}

View File

@ -1,9 +1,9 @@
/* /*
* friso dynamaic interface implemented functions file * friso dynamaic interface implemented functions file
* that defined in header file "friso_API.h". * that defined in header file "friso_API.h".
* never use it for commercial use. * never use it for commercial use.
* *
* @author chenxini <chenxin619315@gmail.com> * @author chenxini <chenxin619315@gmail.com>
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -14,37 +14,37 @@
**********************************************/ **********************************************/
__STATIC_API__ void **create_array_entries( uint_t __blocks ) __STATIC_API__ void **create_array_entries( uint_t __blocks )
{ {
register uint_t t; register uint_t t;
void **block = ( void ** ) FRISO_CALLOC( sizeof( void * ), __blocks ); void **block = ( void ** ) FRISO_CALLOC( sizeof( void * ), __blocks );
if ( block == NULL ) { if ( block == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize //initialize
for ( t = 0; t < __blocks; t++ ) { for ( t = 0; t < __blocks; t++ ) {
block[t] = NULL; block[t] = NULL;
} }
return block; return block;
} }
//resize the array. (the opacity should not be smaller than array->length) //resize the array. (the opacity should not be smaller than array->length)
__STATIC_API__ friso_array_t resize_array_list( __STATIC_API__ friso_array_t resize_array_list(
friso_array_t array, friso_array_t array,
uint_t opacity ) uint_t opacity )
{ {
register uint_t t; register uint_t t;
void **block = create_array_entries( opacity ); void **block = create_array_entries( opacity );
for ( t = 0; t < array->length ; t++ ) { for ( t = 0; t < array->length ; t++ ) {
block[t] = array->items[t]; block[t] = array->items[t];
} }
FRISO_FREE( array->items ); FRISO_FREE( array->items );
array->items = block; array->items = block;
array->allocs = opacity; array->allocs = opacity;
return array; return array;
} }
@ -59,154 +59,154 @@ __STATIC_API__ friso_array_t resize_array_list(
//create a new array list with a given opacity. //create a new array list with a given opacity.
FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity ) FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity )
{ {
friso_array_t array = ( friso_array_t ) friso_array_t array = ( friso_array_t )
FRISO_MALLOC( sizeof( friso_array_entry ) ); FRISO_MALLOC( sizeof( friso_array_entry ) );
if ( array == NULL ) { if ( array == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize //initialize
array->items = create_array_entries( opacity ); array->items = create_array_entries( opacity );
array->allocs = opacity; array->allocs = opacity;
array->length = 0; array->length = 0;
return array; return array;
} }
/* /*
* free the given friso array. * free the given friso array.
* and its items, but never where its items item pointed to . * and its items, but never where its items item pointed to .
*/ */
FRISO_API void free_array_list( friso_array_t array ) FRISO_API void free_array_list( friso_array_t array )
{ {
//free the allocation that all the items pointed to //free the allocation that all the items pointed to
//register int t; //register int t;
//if ( flag == 1 ) { //if ( flag == 1 ) {
// for ( t = 0; t < array->length; t++ ) { // for ( t = 0; t < array->length; t++ ) {
// if ( array->items[t] == NULL ) continue; // if ( array->items[t] == NULL ) continue;
// FRISO_FREE( array->items[t] ); // FRISO_FREE( array->items[t] );
// array->items[t] = NULL; // array->items[t] = NULL;
// } // }
//} //}
FRISO_FREE( array->items ); FRISO_FREE( array->items );
FRISO_FREE( array ); FRISO_FREE( array );
} }
//add a new item to the array. //add a new item to the array.
FRISO_API void array_list_add( friso_array_t array, void *value ) FRISO_API void array_list_add( friso_array_t array, void *value )
{ {
//check the condition to resize. //check the condition to resize.
if ( array->length == array->allocs ) { if ( array->length == array->allocs ) {
resize_array_list( array, array->length * 2 + 1 ); resize_array_list( array, array->length * 2 + 1 );
} }
array->items[array->length++] = value; array->items[array->length++] = value;
} }
//insert a new item at a specified position. //insert a new item at a specified position.
FRISO_API void array_list_insert( FRISO_API void array_list_insert(
friso_array_t array, friso_array_t array,
uint_t idx, uint_t idx,
void *value ) void *value )
{ {
register uint_t t; register uint_t t;
if ( idx <= array->length ) if ( idx <= array->length )
{ {
//check the condition to resize the array. //check the condition to resize the array.
if ( array->length == array->allocs ) { if ( array->length == array->allocs ) {
resize_array_list( array, array->length * 2 + 1 ); resize_array_list( array, array->length * 2 + 1 );
} }
//move the elements after idx. //move the elements after idx.
//for ( t = idx; t < array->length; t++ ) { //for ( t = idx; t < array->length; t++ ) {
// array->items[t+1] = array->items[t]; // array->items[t+1] = array->items[t];
//} //}
for ( t = array->length - 1; t >= idx; t-- ) for ( t = array->length - 1; t >= idx; t-- )
{ {
array->items[t+1] = array->items[t]; array->items[t+1] = array->items[t];
} }
array->items[idx] = value; array->items[idx] = value;
array->length++; array->length++;
} }
} }
//get the item at a specified position. //get the item at a specified position.
FRISO_API void *array_list_get( friso_array_t array, uint_t idx ) FRISO_API void *array_list_get( friso_array_t array, uint_t idx )
{ {
if ( idx < array->length ) { if ( idx < array->length ) {
return array->items[idx]; return array->items[idx];
} }
return NULL; return NULL;
} }
//set the value of the item at a specified position. //set the value of the item at a specified position.
//this will return the old value. //this will return the old value.
FRISO_API void * array_list_set( FRISO_API void * array_list_set(
friso_array_t array, friso_array_t array,
uint_t idx, uint_t idx,
void * value ) void * value )
{ {
void * oval = NULL; void * oval = NULL;
if ( idx < array->length ) if ( idx < array->length )
{ {
oval = array->items[idx]; oval = array->items[idx];
array->items[idx] = value; array->items[idx] = value;
} }
return oval; return oval;
} }
//remove the item at a specified position. //remove the item at a specified position.
//this will return the value of the removed item. //this will return the value of the removed item.
FRISO_API void * array_list_remove( FRISO_API void * array_list_remove(
friso_array_t array, uint_t idx ) friso_array_t array, uint_t idx )
{ {
register uint_t t; register uint_t t;
void *oval = NULL; void *oval = NULL;
if ( idx < array->length ) if ( idx < array->length )
{ {
oval = array->items[idx]; oval = array->items[idx];
//move the elements after idx. //move the elements after idx.
for ( t = idx; t < array->length - 1; t++ ) { for ( t = idx; t < array->length - 1; t++ ) {
array->items[t] = array->items[ t + 1 ]; array->items[t] = array->items[ t + 1 ];
} }
array->items[array->length - 1] = NULL; array->items[array->length - 1] = NULL;
array->length--; array->length--;
} }
return oval; return oval;
} }
/*trim the array list*/ /*trim the array list*/
FRISO_API friso_array_t array_list_trim( friso_array_t array ) FRISO_API friso_array_t array_list_trim( friso_array_t array )
{ {
if ( array->length < array->allocs ) { if ( array->length < array->allocs ) {
return resize_array_list( array, array->length ); return resize_array_list( array, array->length );
} }
return array; return array;
} }
/* /*
* clear the array list. * clear the array list.
* this function will free all the allocations that the pointer pointed. * this function will free all the allocations that the pointer pointed.
* but will not free the point array allocations, * but will not free the point array allocations,
* and will reset the length of it. * and will reset the length of it.
*/ */
FRISO_API friso_array_t array_list_clear( friso_array_t array ) FRISO_API friso_array_t array_list_clear( friso_array_t array )
{ {
register uint_t t; register uint_t t;
//free all the allocations that the array->length's pointer pointed. //free all the allocations that the array->length's pointer pointed.
for ( t = 0; t < array->length; t++ ) { for ( t = 0; t < array->length; t++ ) {
/*if ( array->items[t] == NULL ) continue; /*if ( array->items[t] == NULL ) continue;
FRISO_FREE( array->items[t] ); */ FRISO_FREE( array->items[t] ); */
array->items[t] = NULL; array->items[t] = NULL;
} }
//attribute reset. //attribute reset.
array->length = 0; array->length = 0;
return array; return array;
} }
//get the size of the array list. (A macro define has replace this.) //get the size of the array list. (A macro define has replace this.)

View File

@ -1,7 +1,7 @@
/** /**
* friso string type check function interface, * friso string type check function interface,
* like english/CJK, full-wdith/half-width, punctuation or not. * like english/CJK, full-wdith/half-width, punctuation or not.
* @ses friso_UTF8.c and friso_GBK.c for detail. * @ses friso_UTF8.c and friso_GBK.c for detail.
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
@ -16,25 +16,25 @@
* @return int (true for cn string or false) * @return int (true for cn string or false)
* */ * */
FRISO_API int friso_cn_string( FRISO_API int friso_cn_string(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_cjk_string(task->unicode); return utf8_cjk_string(task->unicode);
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_cn_string(task->buffer); return gbk_cn_string(task->buffer);
return 0; return 0;
} }
//check if the specified word is a whitespace. //check if the specified word is a whitespace.
FRISO_API int friso_whitespace( FRISO_API int friso_whitespace(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_whitespace(task->unicode); return utf8_whitespace(task->unicode);
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_whitespace(task->buffer); return gbk_whitespace(task->buffer);
return 0; return 0;
} }
@ -52,76 +52,76 @@ FRISO_API int friso_numeric_letter(
//check if the specified word is aa english letter. //check if the specified word is aa english letter.
FRISO_API int friso_en_letter( FRISO_API int friso_en_letter(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_en_letter( ( uint_t ) task->text[task->idx]); return utf8_en_letter( ( uint_t ) task->text[task->idx]);
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_en_letter( task->text + task->idx ); return gbk_en_letter( task->text + task->idx );
return 0; return 0;
} }
//check if the specified word is a half-width letter. //check if the specified word is a half-width letter.
// punctuations are inclued. // punctuations are inclued.
FRISO_API int friso_halfwidth_en_char( FRISO_API int friso_halfwidth_en_char(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_halfwidth_en_char(task->unicode); return utf8_halfwidth_en_char(task->unicode);
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_halfwidth_en_char(task->buffer[0]); return gbk_halfwidth_en_char(task->buffer[0]);
return 0; return 0;
} }
//check if the specified word is a full-width letter. //check if the specified word is a full-width letter.
// full-width punctuations are not included. // full-width punctuations are not included.
FRISO_API int friso_fullwidth_en_char( FRISO_API int friso_fullwidth_en_char(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_fullwidth_en_char( task->unicode ); return utf8_fullwidth_en_char( task->unicode );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_fullwidth_en_char( task->buffer ); return gbk_fullwidth_en_char( task->buffer );
return 0; return 0;
} }
//check if the specified word is an english punctuations. //check if the specified word is an english punctuations.
FRISO_API int friso_en_punctuation( FRISO_API int friso_en_punctuation(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_en_punctuation( task->unicode ); return utf8_en_punctuation( task->unicode );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_en_punctuation( task->buffer[0] ); return gbk_en_punctuation( task->buffer[0] );
return 0; return 0;
} }
//check if the specified word ia sn chinese punctuation. //check if the specified word ia sn chinese punctuation.
FRISO_API int friso_cn_punctuation( FRISO_API int friso_cn_punctuation(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_cn_punctuation( task->unicode ); return utf8_cn_punctuation( task->unicode );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_cn_punctuation( task->buffer ); return gbk_cn_punctuation( task->buffer );
return 0; return 0;
} }
FRISO_API int friso_letter_number( FRISO_API int friso_letter_number(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
return 0; return 0;
} }
FRISO_API int friso_other_number( FRISO_API int friso_other_number(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
return 0; return 0;
} }
@ -129,98 +129,98 @@ FRISO_API int friso_other_number(
//check if the word is a keep punctuation. //check if the word is a keep punctuation.
//@Deprecated //@Deprecated
//FRISO_API int friso_keep_punctuation( //FRISO_API int friso_keep_punctuation(
// friso_charset_t charset, // friso_charset_t charset,
// friso_task_t task ) // friso_task_t task )
//{ //{
// if ( charset == FRISO_UTF8 ) // if ( charset == FRISO_UTF8 )
// return utf8_keep_punctuation( task->buffer ); // return utf8_keep_punctuation( task->buffer );
// else if ( charset == FRISO_GBK ) // else if ( charset == FRISO_GBK )
// return gbk_keep_punctuation( task->buffer ); // return gbk_keep_punctuation( task->buffer );
// return 0; // return 0;
//} //}
//check if the specified char is en english punctuation. //check if the specified char is en english punctuation.
// this function is the same as friso_en_punctuation. // this function is the same as friso_en_punctuation.
FRISO_API int is_en_punctuation( FRISO_API int is_en_punctuation(
friso_charset_t charset, char c ) friso_charset_t charset, char c )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_en_punctuation( (uint_t) c); return utf8_en_punctuation( (uint_t) c);
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_en_punctuation( c ); return gbk_en_punctuation( c );
return 0; return 0;
} }
//check the specified string is make up with numeric. //check the specified string is make up with numeric.
FRISO_API int friso_numeric_string( FRISO_API int friso_numeric_string(
friso_charset_t charset, friso_charset_t charset,
char *buffer ) char *buffer )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_numeric_string( buffer ); return utf8_numeric_string( buffer );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_numeric_string( buffer ); return gbk_numeric_string( buffer );
return 0; return 0;
} }
//check the specified string is a decimal string. //check the specified string is a decimal string.
FRISO_API int friso_decimal_string( FRISO_API int friso_decimal_string(
friso_charset_t charset, char *buffer ) friso_charset_t charset, char *buffer )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_decimal_string( buffer ); return utf8_decimal_string( buffer );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_decimal_string( buffer ); return gbk_decimal_string( buffer );
return 0; return 0;
} }
//check if the specified char is english uppercase letter. //check if the specified char is english uppercase letter.
// included full-width and half-width letters. // included full-width and half-width letters.
FRISO_API int friso_uppercase_letter( FRISO_API int friso_uppercase_letter(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
return utf8_uppercase_letter( task->unicode ); return utf8_uppercase_letter( task->unicode );
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
return gbk_uppercase_letter( task->buffer ); return gbk_uppercase_letter( task->buffer );
return 0; return 0;
} }
/* get the type of the specified char. /* get the type of the specified char.
* the type will be the constants defined above. * the type will be the constants defined above.
* (include the fullwidth english char.) * (include the fullwidth english char.)
*/ */
FRISO_API friso_enchar_t friso_enchar_type( FRISO_API friso_enchar_t friso_enchar_type(
friso_charset_t charset, friso_charset_t charset,
friso_task_t task ) friso_task_t task )
{ {
//Unicode or ASCII.(Both UTF-8 and GBK are valid) //Unicode or ASCII.(Both UTF-8 and GBK are valid)
uint_t u = 0; uint_t u = 0;
if ( charset == FRISO_UTF8 ) if ( charset == FRISO_UTF8 )
{ {
u = task->unicode; u = task->unicode;
//if ( u >= 65280 ) u = 65280 - 65248; //if ( u >= 65280 ) u = 65280 - 65248;
} }
else if ( charset == FRISO_GBK ) else if ( charset == FRISO_GBK )
{ {
u = (uchar_t)task->buffer[0]; u = (uchar_t)task->buffer[0];
//if ( u == 0xa3 ) ; //full-width. //if ( u == 0xa3 ) ; //full-width.
} }
//range check. //range check.
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW; if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
if ( u == 32 ) return FRISO_EN_WHITESPACE; if ( u == 32 ) return FRISO_EN_WHITESPACE;
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC; if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER; if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER; if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
return FRISO_EN_PUNCTUATION; return FRISO_EN_PUNCTUATION;
} }
/* get the type of the specified en char. /* get the type of the specified en char.
* the type will be the constants defined above. * the type will be the constants defined above.
* (the char should be half-width english char only) * (the char should be half-width english char only)
*/ */
FRISO_API friso_enchar_t get_enchar_type( char ch ) FRISO_API friso_enchar_t get_enchar_type( char ch )
@ -228,11 +228,11 @@ FRISO_API friso_enchar_t get_enchar_type( char ch )
uint_t u = (uchar_t) ch; uint_t u = (uchar_t) ch;
//range check. //range check.
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW; if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
if ( u == 32 ) return FRISO_EN_WHITESPACE; if ( u == 32 ) return FRISO_EN_WHITESPACE;
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC; if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER; if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER; if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
return FRISO_EN_PUNCTUATION; return FRISO_EN_PUNCTUATION;
} }

View File

@ -1,9 +1,9 @@
/** /**
* Friso charset about function interface header file. * Friso charset about function interface header file.
* @package src/friso_charset.h . * @package src/friso_charset.h .
* Available charset for now: * Available charset for now:
* 1. UTF8 - function start with utf8 * 1. UTF8 - function start with utf8
* 2. GBK - function start with gbk * 2. GBK - function start with gbk
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
@ -33,11 +33,11 @@ FRISO_API int friso_numeric_letter(friso_charset_t, friso_task_t);
FRISO_API int friso_en_letter( friso_charset_t, friso_task_t ); FRISO_API int friso_en_letter( friso_charset_t, friso_task_t );
//check if the specified word is a half-width letter. //check if the specified word is a half-width letter.
// punctuations are inclued. // punctuations are inclued.
FRISO_API int friso_halfwidth_en_char( friso_charset_t, friso_task_t ); FRISO_API int friso_halfwidth_en_char( friso_charset_t, friso_task_t );
//check if the specified word is a full-width letter. //check if the specified word is a full-width letter.
// full-width punctuations are not included. // full-width punctuations are not included.
FRISO_API int friso_fullwidth_en_char( friso_charset_t, friso_task_t ); FRISO_API int friso_fullwidth_en_char( friso_charset_t, friso_task_t );
//check if the specified word is an english punctuations. //check if the specified word is an english punctuations.
@ -60,32 +60,32 @@ FRISO_API int friso_numeric_string( friso_charset_t, char * );
FRISO_API int friso_decimal_string( friso_charset_t, char * ); FRISO_API int friso_decimal_string( friso_charset_t, char * );
//check if the specified char is english uppercase letter. //check if the specified char is english uppercase letter.
// included full-width and half-width letters. // included full-width and half-width letters.
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t ); FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
//en char type. //en char type.
//#define FRISO_EN_LETTER 0 //a-z && A-Z //#define FRISO_EN_LETTER 0 //a-z && A-Z
//#define FRISO_EN_NUMERIC 1 //0-9 //#define FRISO_EN_NUMERIC 1 //0-9
//#define FRISO_EN_PUNCTUATION 2 //english punctuations //#define FRISO_EN_PUNCTUATION 2 //english punctuations
//#define FRISO_EN_WHITESPACE 3 //whitespace //#define FRISO_EN_WHITESPACE 3 //whitespace
//#define FRISO_EN_UNKNOW -1 //beyond 32-122 //#define FRISO_EN_UNKNOW -1 //beyond 32-122
typedef enum { typedef enum {
FRISO_EN_LETTER = 0, //A-Z, a-z FRISO_EN_LETTER = 0, //A-Z, a-z
FRISO_EN_NUMERIC = 1, //0-9 FRISO_EN_NUMERIC = 1, //0-9
FRISO_EN_PUNCTUATION = 2, //english punctuations FRISO_EN_PUNCTUATION = 2, //english punctuations
FRISO_EN_WHITESPACE = 3, //whitespace FRISO_EN_WHITESPACE = 3, //whitespace
FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126) FRISO_EN_UNKNOW = -1 //unkow(beyond 32-126)
} friso_enchar_t; } friso_enchar_t;
/* get the type of the specified char. /* get the type of the specified char.
* the type will be the constants defined above. * the type will be the constants defined above.
* (include the fullwidth english char.) * (include the fullwidth english char.)
*/ */
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t ); FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
/* get the type of the specified en char. /* get the type of the specified en char.
* the type will be the constants defined above. * the type will be the constants defined above.
* (the char should be half-width english char only) * (the char should be half-width english char only)
*/ */
FRISO_API friso_enchar_t get_enchar_type( char ); FRISO_API friso_enchar_t get_enchar_type( char );
@ -99,7 +99,7 @@ FRISO_API friso_enchar_t get_enchar_type( char );
/* read the next utf-8 word from the specified position. /* read the next utf-8 word from the specified position.
* *
* @return int the bytes of the current readed word. * @return int the bytes of the current readed word.
*/ */
FRISO_API int utf8_next_word( friso_task_t, uint_t *, fstring ); FRISO_API int utf8_next_word( friso_task_t, uint_t *, fstring );
@ -116,31 +116,31 @@ FRISO_API int unicode_to_utf8( uint_t, fstring );
FRISO_API int utf8_cjk_string( uint_t ) ; FRISO_API int utf8_cjk_string( uint_t ) ;
/*check the given char is a Basic Latin letter or not. /*check the given char is a Basic Latin letter or not.
* include all the letters and english puntuations.*/ * include all the letters and english puntuations.*/
FRISO_API int utf8_halfwidth_en_char( uint_t ); FRISO_API int utf8_halfwidth_en_char( uint_t );
/* /*
* check the given char is a full-width latain or not. * check the given char is a full-width latain or not.
* include the full-width arabic numeber, letters. * include the full-width arabic numeber, letters.
* but not the full-width puntuations. * but not the full-width puntuations.
*/ */
FRISO_API int utf8_fullwidth_en_char( uint_t ); FRISO_API int utf8_fullwidth_en_char( uint_t );
//check the given char is a upper case letter or not. //check the given char is a upper case letter or not.
// included all the full-width and half-width letters. // included all the full-width and half-width letters.
FRISO_API int utf8_uppercase_letter( uint_t ); FRISO_API int utf8_uppercase_letter( uint_t );
//check the given char is a lower case letter or not. //check the given char is a lower case letter or not.
// included all the full-width and half-width letters. // included all the full-width and half-width letters.
FRISO_API int utf8_lowercase_letter( uint_t ); FRISO_API int utf8_lowercase_letter( uint_t );
//check the given char is a numeric. //check the given char is a numeric.
// included the full-width and half-width arabic numeric. // included the full-width and half-width arabic numeric.
FRISO_API int utf8_numeric_letter( uint_t ); FRISO_API int utf8_numeric_letter( uint_t );
/* /*
* check if the given fstring is make up with numeric chars. * check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok. * both full-width,half-width numeric is ok.
*/ */
FRISO_API int utf8_numeric_string( char * ); FRISO_API int utf8_numeric_string( char * );
@ -183,7 +183,7 @@ FRISO_API int is_en_punctuation( friso_charset_t, char );
/* read the next GBK word from the specified position. /* read the next GBK word from the specified position.
* *
* @return int the bytes of the current readed word. * @return int the bytes of the current readed word.
*/ */
FRISO_API int gbk_next_word( friso_task_t, uint_t *, fstring ); FRISO_API int gbk_next_word( friso_task_t, uint_t *, fstring );
@ -194,31 +194,31 @@ FRISO_API int get_gbk_bytes( char );
FRISO_API int gbk_cn_string( char * ) ; FRISO_API int gbk_cn_string( char * ) ;
/*check if the given char is a ASCII letter /*check if the given char is a ASCII letter
* include all the letters and english puntuations.*/ * include all the letters and english puntuations.*/
FRISO_API int gbk_halfwidth_en_char( char ); FRISO_API int gbk_halfwidth_en_char( char );
/* /*
* check if the given char is a full-width latain. * check if the given char is a full-width latain.
* include the full-width arabic numeber, letters. * include the full-width arabic numeber, letters.
* but not the full-width puntuations. * but not the full-width puntuations.
*/ */
FRISO_API int gbk_fullwidth_en_char( char * ); FRISO_API int gbk_fullwidth_en_char( char * );
//check if the given char is a upper case char. //check if the given char is a upper case char.
// included all the full-width and half-width letters. // included all the full-width and half-width letters.
FRISO_API int gbk_uppercase_letter( char * ); FRISO_API int gbk_uppercase_letter( char * );
//check if the given char is a lower case char. //check if the given char is a lower case char.
// included all the full-width and half-width letters. // included all the full-width and half-width letters.
FRISO_API int gbk_lowercase_letter( char * ); FRISO_API int gbk_lowercase_letter( char * );
//check if the given char is a numeric. //check if the given char is a numeric.
// included the full-width and half-width arabic numeric. // included the full-width and half-width arabic numeric.
FRISO_API int gbk_numeric_letter( char * ); FRISO_API int gbk_numeric_letter( char * );
/* /*
* check if the given fstring is make up with numeric chars. * check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok. * both full-width,half-width numeric is ok.
*/ */
FRISO_API int gbk_numeric_string( char * ); FRISO_API int gbk_numeric_string( char * );
@ -248,7 +248,7 @@ FRISO_API int gbk_en_punctuation( char ) ;
FRISO_API int gbk_cn_punctuation( char * ); FRISO_API int gbk_cn_punctuation( char * );
//cause the logic handle is the same as the utf8. //cause the logic handle is the same as the utf8.
// here invoke the utf8 interface directly. // here invoke the utf8 interface directly.
//FRISO_API int gbk_keep_punctuation( char * ); //FRISO_API int gbk_keep_punctuation( char * );
//@Deprecated //@Deprecated
//#define gbk_keep_punctuation( str ) utf8_keep_punctuation(str) //#define gbk_keep_punctuation( str ) utf8_keep_punctuation(str)
@ -257,4 +257,4 @@ FRISO_API int gbk_cn_punctuation( char * );
//FRISO_API int gbk_fullwidth_char( char * ) ; //FRISO_API int gbk_fullwidth_char( char * ) ;
/* }}}*/ /* }}}*/
#endif /*end _friso_charset_h*/ #endif /*end _friso_charset_h*/

View File

@ -1,8 +1,8 @@
/* /*
* friso hash table implements functions * friso hash table implements functions
* defined in header file "friso_API.h". * defined in header file "friso_API.h".
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#include "friso_API.h" #include "friso_API.h"
#include <stdlib.h> #include <stdlib.h>
@ -10,7 +10,7 @@
//-166411799L //-166411799L
//31 131 1331 13331 133331 .. //31 131 1331 13331 133331 ..
//31 131 1313 13131 131313 .. the best //31 131 1313 13131 131313 .. the best
#define HASH_FACTOR 1313131 #define HASH_FACTOR 1313131
/* ************************ /* ************************
@ -22,7 +22,7 @@ __STATIC_API__ uint_t hash( fstring str, uint_t length )
uint_t h = 0; uint_t h = 0;
while ( *str != '\0' ) while ( *str != '\0' )
h = h * HASH_FACTOR + ( *str++ ); h = h * HASH_FACTOR + ( *str++ );
return (h % length); return (h % length);
} }
@ -32,13 +32,13 @@ __STATIC_API__ int is_prime( int n )
{ {
int j; int j;
if ( n == 2 || n == 3 ) if ( n == 2 || n == 3 )
return 1; return 1;
if ( n == 1 || n % 2 == 0 ) if ( n == 1 || n % 2 == 0 )
return 0; return 0;
for ( j = 3; j * j < n; j++ ) for ( j = 3; j * j < n; j++ )
if ( n % j == 0 ) if ( n % j == 0 )
return 0; return 0;
return 1; return 1;
} }
@ -47,7 +47,7 @@ __STATIC_API__ int is_prime( int n )
__STATIC_API__ int next_prime( int n ) __STATIC_API__ int next_prime( int n )
{ {
if ( n % 2 == 0 ) if ( n % 2 == 0 )
n++; n++;
for ( ; ! is_prime( n ); n = n + 2 ) ; for ( ; ! is_prime( n ); n = n + 2 ) ;
return n; return n;
@ -72,14 +72,14 @@ __STATIC_API__ int next_prime( int n )
* static hashtable function area. * * static hashtable function area. *
***********************************/ ***********************************/
__STATIC_API__ hash_entry_t new_hash_entry( __STATIC_API__ hash_entry_t new_hash_entry(
fstring key, fstring key,
void * value, void * value,
hash_entry_t next ) hash_entry_t next )
{ {
hash_entry_t e = ( hash_entry_t ) hash_entry_t e = ( hash_entry_t )
FRISO_MALLOC( sizeof( friso_hash_entry ) ); FRISO_MALLOC( sizeof( friso_hash_entry ) );
if ( e == NULL ) { if ( e == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//e->_key = string_copy( key ); //e->_key = string_copy( key );
@ -95,13 +95,13 @@ __STATIC_API__ hash_entry_t * create_hash_entries( uint_t blocks )
{ {
register uint_t t; register uint_t t;
hash_entry_t *e = ( hash_entry_t * ) hash_entry_t *e = ( hash_entry_t * )
FRISO_CALLOC( sizeof( hash_entry_t ), blocks ); FRISO_CALLOC( sizeof( hash_entry_t ), blocks );
if ( e == NULL ) { if ( e == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
for ( t = 0; t < blocks; t++ ) { for ( t = 0; t < blocks; t++ ) {
e[t] = NULL; e[t] = NULL;
} }
return e; return e;
@ -114,22 +114,22 @@ __STATIC_API__ void rebuild_hash( friso_hash_t _hash )
//find the next prime as the length of the hashtable. //find the next prime as the length of the hashtable.
uint_t t, length = next_prime( _hash->length * 2 + 1 ); uint_t t, length = next_prime( _hash->length * 2 + 1 );
hash_entry_t e, next, *_src = _hash->table, \ hash_entry_t e, next, *_src = _hash->table, \
*table = create_hash_entries( length ); *table = create_hash_entries( length );
uint_t bucket; uint_t bucket;
//copy the nodes //copy the nodes
for ( t = 0; t < _hash->length; t++ ) for ( t = 0; t < _hash->length; t++ )
{ {
e = *( _src + t ); e = *( _src + t );
if ( e != NULL ) { if ( e != NULL ) {
do { do {
next = e->_next; next = e->_next;
bucket = hash( e->_key, length ); bucket = hash( e->_key, length );
e->_next = table[bucket]; e->_next = table[bucket];
table[bucket] = e; table[bucket] = e;
e = next; e = next;
} while ( e != NULL ); } while ( e != NULL );
} }
} }
_hash->table = table; _hash->table = table;
@ -149,35 +149,35 @@ FRISO_API friso_hash_t new_hash_table( void )
{ {
friso_hash_t _hash = ( friso_hash_t ) FRISO_MALLOC( sizeof ( friso_hash_cdt ) ); friso_hash_t _hash = ( friso_hash_t ) FRISO_MALLOC( sizeof ( friso_hash_cdt ) );
if ( _hash == NULL ) { if ( _hash == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize the the hashtable //initialize the the hashtable
_hash->length = DEFAULT_LENGTH; _hash->length = DEFAULT_LENGTH;
_hash->size = 0; _hash->size = 0;
_hash->factor = DEFAULT_FACTOR; _hash->factor = DEFAULT_FACTOR;
_hash->threshold = ( uint_t ) ( _hash->length * _hash->factor ); _hash->threshold = ( uint_t ) ( _hash->length * _hash->factor );
_hash->table = create_hash_entries( _hash->length ); _hash->table = create_hash_entries( _hash->length );
return _hash; return _hash;
} }
FRISO_API void free_hash_table( FRISO_API void free_hash_table(
friso_hash_t _hash, friso_hash_t _hash,
fhash_callback_fn_t fentry_func ) fhash_callback_fn_t fentry_func )
{ {
register uint_t j; register uint_t j;
hash_entry_t e, n; hash_entry_t e, n;
for ( j = 0; j < _hash->length; j++ ) for ( j = 0; j < _hash->length; j++ )
{ {
e = *( _hash->table + j ); e = *( _hash->table + j );
for ( ; e != NULL ; ) { for ( ; e != NULL ; ) {
n = e->_next; n = e->_next;
if ( fentry_func != NULL ) fentry_func(e); if ( fentry_func != NULL ) fentry_func(e);
FRISO_FREE( e ); FRISO_FREE( e );
e = n; e = n;
} }
} }
//free the pointer array block ( 4 * htable->length continuous bytes ). //free the pointer array block ( 4 * htable->length continuous bytes ).
@ -189,9 +189,9 @@ FRISO_API void free_hash_table(
//put a new mapping insite. //put a new mapping insite.
//the value cannot be NULL. //the value cannot be NULL.
FRISO_API void *hash_put_mapping( FRISO_API void *hash_put_mapping(
friso_hash_t _hash, friso_hash_t _hash,
fstring key, fstring key,
void * value ) void * value )
{ {
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length ); uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
hash_entry_t e = *( _hash->table + bucket ); hash_entry_t e = *( _hash->table + bucket );
@ -200,14 +200,14 @@ FRISO_API void *hash_put_mapping(
//check the given key is already exists or not. //check the given key is already exists or not.
for ( ; e != NULL; e = e->_next ) for ( ; e != NULL; e = e->_next )
{ {
if ( key == e->_key if ( key == e->_key
|| ( key != NULL && e->_key != NULL || ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 ) ) && strcmp( key, e->_key ) == 0 ) )
{ {
oval = e->_val; //bak the old value oval = e->_val; //bak the old value
e->_val = value; e->_val = value;
return oval; return oval;
} }
} }
//put a new mapping into the hashtable. //put a new mapping into the hashtable.
@ -216,27 +216,27 @@ FRISO_API void *hash_put_mapping(
//check the condition to rebuild the hashtable. //check the condition to rebuild the hashtable.
if ( _hash->size >= _hash->threshold ) if ( _hash->size >= _hash->threshold )
rebuild_hash( _hash ); rebuild_hash( _hash );
return oval; return oval;
} }
//check the existence of the mapping associated with the given key. //check the existence of the mapping associated with the given key.
FRISO_API int hash_exist_mapping( FRISO_API int hash_exist_mapping(
friso_hash_t _hash, fstring key ) friso_hash_t _hash, fstring key )
{ {
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length ); uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
hash_entry_t e; hash_entry_t e;
for ( e = *( _hash->table + bucket ); for ( e = *( _hash->table + bucket );
e != NULL; e != NULL;
e = e->_next ) { e = e->_next ) {
if ( key == e->_key if ( key == e->_key
|| ( key != NULL && e->_key != NULL || ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 )) && strcmp( key, e->_key ) == 0 ))
{ {
return 1; return 1;
} }
} }
return 0; return 0;
@ -249,14 +249,14 @@ FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
hash_entry_t e; hash_entry_t e;
for ( e = *( _hash->table + bucket ); for ( e = *( _hash->table + bucket );
e != NULL; e != NULL;
e = e->_next ) { e = e->_next ) {
if ( key == e->_key if ( key == e->_key
|| ( key != NULL && e->_key != NULL || ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 )) && strcmp( key, e->_key ) == 0 ))
{ {
return e->_val; return e->_val;
} }
} }
return NULL; return NULL;
@ -264,31 +264,31 @@ FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
//remove the mapping associated with the given key. //remove the mapping associated with the given key.
FRISO_API hash_entry_t hash_remove_mapping( FRISO_API hash_entry_t hash_remove_mapping(
friso_hash_t _hash, fstring key ) friso_hash_t _hash, fstring key )
{ {
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length ); uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
hash_entry_t e, prev = NULL; hash_entry_t e, prev = NULL;
hash_entry_t b; hash_entry_t b;
for ( e = *( _hash->table + bucket ); for ( e = *( _hash->table + bucket );
e != NULL; e != NULL;
prev = e, e = e->_next ) { prev = e, e = e->_next ) {
if ( key == e->_key if ( key == e->_key
|| ( key != NULL && e->_key != NULL || ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 ) ) && strcmp( key, e->_key ) == 0 ) )
{ {
b = e; b = e;
//the node located at *( htable->table + bucket ) //the node located at *( htable->table + bucket )
if ( prev == NULL ) { if ( prev == NULL ) {
_hash->table[bucket] = e->_next; _hash->table[bucket] = e->_next;
} else { } else {
prev->_next = e->_next; prev->_next = e->_next;
} }
//printf("%s was removed\n", b->_key); //printf("%s was removed\n", b->_key);
_hash->size--; _hash->size--;
//FRISO_FREE( b ); //FRISO_FREE( b );
return b; return b;
} }
} }
return NULL; return NULL;

View File

@ -1,102 +1,102 @@
/* /*
* friso lexicon implemented functions. * friso lexicon implemented functions.
* used to deal with the friso lexicon, like: load,remove,match... * used to deal with the friso lexicon, like: load,remove,match...
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "friso_API.h" #include "friso_API.h"
#include "friso.h" #include "friso.h"
#define __SPLIT_MAX_TOKENS__ 5 #define __SPLIT_MAX_TOKENS__ 5
#define __LEX_FILE_DELIME__ '#' #define __LEX_FILE_DELIME__ '#'
#define __FRISO_LEX_IFILE__ "friso.lex.ini" #define __FRISO_LEX_IFILE__ "friso.lex.ini"
//create a new lexicon //create a new lexicon
FRISO_API friso_dic_t friso_dic_new() FRISO_API friso_dic_t friso_dic_new()
{ {
register uint_t t; register uint_t t;
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC( friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ ); sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
if ( dic == NULL ) { if ( dic == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) { for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
dic[t] = new_hash_table(); dic[t] = new_hash_table();
} }
return dic; return dic;
} }
/** /**
* default callback function to invoke * default callback function to invoke
* when free the friso dictionary . * when free the friso dictionary .
* *
* @date 2013-06-12 * @date 2013-06-12
*/ */
__STATIC_API__ void default_fdic_callback( hash_entry_t e ) __STATIC_API__ void default_fdic_callback( hash_entry_t e )
{ {
register uint_t i; register uint_t i;
friso_array_t syn; friso_array_t syn;
lex_entry_t lex = ( lex_entry_t ) e->_val; lex_entry_t lex = ( lex_entry_t ) e->_val;
//free the lex->word //free the lex->word
FRISO_FREE( lex->word ); FRISO_FREE( lex->word );
//free the lex->syn if it is not NULL //free the lex->syn if it is not NULL
if ( lex->syn != NULL ) if ( lex->syn != NULL )
{ {
syn = lex->syn; syn = lex->syn;
for ( i = 0; i < syn->length; i++ ) { for ( i = 0; i < syn->length; i++ ) {
FRISO_FREE( syn->items[i] ); FRISO_FREE( syn->items[i] );
} }
free_array_list( syn ); free_array_list( syn );
} }
//free the e->_val //free the e->_val
//@date 2014-01-28 posted by mlemay@gmail.com //@date 2014-01-28 posted by mlemay@gmail.com
FRISO_FREE(lex); FRISO_FREE(lex);
} }
FRISO_API void friso_dic_free( friso_dic_t dic ) FRISO_API void friso_dic_free( friso_dic_t dic )
{ {
register uint_t t; register uint_t t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) { for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
//free the hash table //free the hash table
free_hash_table( dic[t], default_fdic_callback ); free_hash_table( dic[t], default_fdic_callback );
} }
FRISO_FREE( dic ); FRISO_FREE( dic );
} }
//create a new lexicon entry //create a new lexicon entry
FRISO_API lex_entry_t new_lex_entry( FRISO_API lex_entry_t new_lex_entry(
fstring word, fstring word,
friso_array_t syn, friso_array_t syn,
uint_t fre, uint_t fre,
uint_t length, uint_t length,
uint_t type ) uint_t type )
{ {
lex_entry_t e = ( lex_entry_t ) lex_entry_t e = ( lex_entry_t )
FRISO_MALLOC( sizeof( lex_entry_cdt ) ); FRISO_MALLOC( sizeof( lex_entry_cdt ) );
if ( e == NULL ) { if ( e == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize. //initialize.
e->word = word; e->word = word;
e->syn = syn; //synoyum words array list. e->syn = syn; //synoyum words array list.
e->pos = NULL; //part of speech array list. e->pos = NULL; //part of speech array list.
//e->py = NULL; //set to NULL first. //e->py = NULL; //set to NULL first.
e->fre = fre; e->fre = fre;
e->length = (uchar_t) length; //length e->length = (uchar_t) length; //length
e->rlen = (uchar_t) length; //set to length by default. e->rlen = (uchar_t) length; //set to length by default.
e->type = (uchar_t) type; //type e->type = (uchar_t) type; //type
e->ctrlMask = 0; //control mask. e->ctrlMask = 0; //control mask.
e->offset = -1; e->offset = -1;
return e; return e;
} }
/** /**
@ -109,64 +109,64 @@ FRISO_API lex_entry_t new_lex_entry(
*/ */
FRISO_API void free_lex_entry( lex_entry_t e ) FRISO_API void free_lex_entry( lex_entry_t e )
{ {
//if ( e->syn != NULL ) { //if ( e->syn != NULL ) {
// if ( flag == 1 ) free_array_list( e->syn); // if ( flag == 1 ) free_array_list( e->syn);
// else free_array_list( e->syn ); // else free_array_list( e->syn );
//} //}
FRISO_FREE( e ); FRISO_FREE( e );
} }
//add a new entry to the dictionary. //add a new entry to the dictionary.
FRISO_API void friso_dic_add( FRISO_API void friso_dic_add(
friso_dic_t dic, friso_dic_t dic,
friso_lex_t lex, friso_lex_t lex,
fstring word, fstring word,
friso_array_t syn ) friso_array_t syn )
{ {
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ )
{ {
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn); //printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
hash_put_mapping( dic[lex], word, hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, 0, new_lex_entry( word, syn, 0,
(uint_t) strlen(word), (uint_t) lex ) ); (uint_t) strlen(word), (uint_t) lex ) );
} }
} }
FRISO_API void friso_dic_add_with_fre( FRISO_API void friso_dic_add_with_fre(
friso_dic_t dic, friso_dic_t dic,
friso_lex_t lex, friso_lex_t lex,
fstring word, fstring word,
friso_array_t syn, friso_array_t syn,
uint_t frequency ) uint_t frequency )
{ {
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) { if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
hash_put_mapping( dic[lex], word, hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, frequency, new_lex_entry( word, syn, frequency,
( uint_t ) strlen(word), ( uint_t ) lex ) ); ( uint_t ) strlen(word), ( uint_t ) lex ) );
} }
} }
/* /*
* read a line from a specified stream. * read a line from a specified stream.
* the newline will be cleared. * the newline will be cleared.
* *
* @date 2012-11-24 * @date 2012-11-24
*/ */
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream ) FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
{ {
register int c; register int c;
fstring cs; fstring cs;
cs = __dst; cs = __dst;
while ( ( c = fgetc( _stream ) ) != EOF ) while ( ( c = fgetc( _stream ) ) != EOF )
{ {
if ( c == '\n' ) break; if ( c == '\n' ) break;
*cs++ = c; *cs++ = c;
} }
*cs = '\0'; *cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst; return ( c == EOF && cs == __dst ) ? NULL : __dst;
} }
/* /*
@ -174,373 +174,373 @@ FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
*/ */
///instead of memcpy ///instead of memcpy
__STATIC_API__ fstring string_copy( __STATIC_API__ fstring string_copy(
fstring _src, fstring _src,
fstring __dst, fstring __dst,
uint_t blocks ) uint_t blocks )
{ {
register fstring __src = _src; register fstring __src = _src;
register uint_t t; register uint_t t;
for ( t = 0; t < blocks; t++ ) { for ( t = 0; t < blocks; t++ ) {
if ( *__src == '\0' ) break; if ( *__src == '\0' ) break;
__dst[t] = *__src++; __dst[t] = *__src++;
} }
__dst[t] = '\0'; __dst[t] = '\0';
return __dst; return __dst;
} }
/** /**
* make a heap allocation, and copy the * make a heap allocation, and copy the
* source fstring to the new allocation, and * source fstring to the new allocation, and
* you should free it after use it . * you should free it after use it .
* *
* @param _src source fstring * @param _src source fstring
* @param blocks number of bytes to copy * @param blocks number of bytes to copy
*/ */
__STATIC_API__ fstring string_copy_heap( __STATIC_API__ fstring string_copy_heap(
fstring _src, uint_t blocks ) fstring _src, uint_t blocks )
{ {
register uint_t t; register uint_t t;
fstring str = ( fstring ) fstring str = ( fstring )
FRISO_MALLOC( blocks + 1 ); FRISO_MALLOC( blocks + 1 );
if ( str == NULL ) { if ( str == NULL ) {
___ALLOCATION_ERROR___; ___ALLOCATION_ERROR___;
} }
for ( t = 0; t < blocks; t++ ) { for ( t = 0; t < blocks; t++ ) {
if ( *_src == '\0' ) break; if ( *_src == '\0' ) break;
str[t] = *_src++; str[t] = *_src++;
} }
str[t] = '\0'; str[t] = '\0';
return str; return str;
} }
/* /*
* find the postion of the first appear of the given char. * find the postion of the first appear of the given char.
* address of the char in the fstring will be return . * address of the char in the fstring will be return .
* if not found NULL will be return . * if not found NULL will be return .
*/ */
__STATIC_API__ fstring indexOf( fstring __str, char delimiter ) __STATIC_API__ fstring indexOf( fstring __str, char delimiter )
{ {
uint_t i, __length__; uint_t i, __length__;
__length__ = strlen( __str ); __length__ = strlen( __str );
for ( i = 0; i < __length__; i++ ) { for ( i = 0; i < __length__; i++ ) {
if ( __str[i] == delimiter ) if ( __str[i] == delimiter )
return __str + i; return __str + i;
} }
return NULL; return NULL;
} }
/** /**
* load all the valid wors from a specified lexicon file . * load all the valid wors from a specified lexicon file .
* *
* @param dic friso dictionary instance (A hash array) * @param dic friso dictionary instance (A hash array)
* @param lex the lexicon type * @param lex the lexicon type
* @param lex_file the path of the lexicon file * @param lex_file the path of the lexicon file
* @param length the maximum length of the word item * @param length the maximum length of the word item
*/ */
FRISO_API void friso_dic_load( FRISO_API void friso_dic_load(
friso_t friso, friso_t friso,
friso_config_t config, friso_config_t config,
friso_lex_t lex, friso_lex_t lex,
fstring lex_file, fstring lex_file,
uint_t length ) uint_t length )
{ {
FILE * _stream; FILE * _stream;
char __char[1024], _buffer[512]; char __char[1024], _buffer[512];
fstring _line; fstring _line;
string_split_entry sse; string_split_entry sse;
fstring _word; fstring _word;
char _sbuffer[512]; char _sbuffer[512];
fstring _syn; fstring _syn;
friso_array_t sywords; friso_array_t sywords;
uint_t _fre; uint_t _fre;
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL ) if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL )
{ {
while ( ( _line = file_get_line( __char, _stream ) ) != NULL ) while ( ( _line = file_get_line( __char, _stream ) ) != NULL )
{ {
//clear up the notes //clear up the notes
//make sure the length of the line is greater than 1. //make sure the length of the line is greater than 1.
//like the single '#' mark in stopwords dictionary. //like the single '#' mark in stopwords dictionary.
if ( _line[0] == '#' && strlen(_line) > 1 ) continue; if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
//handle the stopwords. //handle the stopwords.
if ( lex == __LEX_STOPWORDS__ ) if ( lex == __LEX_STOPWORDS__ )
{ {
//clean the chinese words that its length is greater than max length. //clean the chinese words that its length is greater than max length.
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue; if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
friso_dic_add( friso->dic, __LEX_STOPWORDS__, friso_dic_add( friso->dic, __LEX_STOPWORDS__,
string_copy_heap( _line, strlen(_line) ), NULL ); string_copy_heap( _line, strlen(_line) ), NULL );
continue; continue;
} }
//split the fstring with '/'. //split the fstring with '/'.
string_split_reset( &sse, "/", _line); string_split_reset( &sse, "/", _line);
if ( string_split_next( &sse, _buffer ) == NULL ) continue; if ( string_split_next( &sse, _buffer ) == NULL ) continue;
//1. get the word. //1. get the word.
_word = string_copy_heap( _buffer, strlen(_buffer) ); _word = string_copy_heap( _buffer, strlen(_buffer) );
if ( string_split_next( &sse, _buffer ) == NULL ) if ( string_split_next( &sse, _buffer ) == NULL )
{ {
//normal lexicon type, //normal lexicon type,
//add them to the dictionary directly //add them to the dictionary directly
friso_dic_add( friso->dic, lex, _word, NULL ); friso_dic_add( friso->dic, lex, _word, NULL );
continue; continue;
} }
/* /*
* filter out the words that its length is larger * filter out the words that its length is larger
* than the specified limit. * than the specified limit.
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__ * but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
* and __LEX_CEM_WORDS__. * and __LEX_CEM_WORDS__.
*/ */
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ ) if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
&& strlen( _word ) > length ) && strlen( _word ) > length )
{ {
FRISO_FREE(_word); FRISO_FREE(_word);
continue; continue;
} }
//2. get the synonyms words. //2. get the synonyms words.
_syn = NULL; _syn = NULL;
if ( strcmp( _buffer, "null" ) != 0 ) if ( strcmp( _buffer, "null" ) != 0 )
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) ); _syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
//3. get the word frequency if it available. //3. get the word frequency if it available.
_fre = 0; _fre = 0;
if ( string_split_next( &sse, _buffer ) != NULL ) if ( string_split_next( &sse, _buffer ) != NULL )
_fre = atoi( _buffer ); _fre = atoi( _buffer );
/** /**
* Here: * Here:
* split the synonyms words with mark "," * split the synonyms words with mark ","
* and put them in a array list if the synonyms is not NULL * and put them in a array list if the synonyms is not NULL
*/ */
sywords = NULL; sywords = NULL;
if ( config->add_syn && _syn != NULL ) if ( config->add_syn && _syn != NULL )
{ {
string_split_reset( &sse, ",", _sbuffer ); string_split_reset( &sse, ",", _sbuffer );
sywords = new_array_list_with_opacity(5); sywords = new_array_list_with_opacity(5);
while ( string_split_next( &sse, _buffer ) != NULL ) while ( string_split_next( &sse, _buffer ) != NULL )
{ {
if ( strlen(_buffer) > length ) continue; if ( strlen(_buffer) > length ) continue;
array_list_add( sywords, array_list_add( sywords,
string_copy_heap(_buffer, strlen(_buffer)) ); string_copy_heap(_buffer, strlen(_buffer)) );
} }
sywords = array_list_trim( sywords ); sywords = array_list_trim( sywords );
} }
//4. add the word item //4. add the word item
friso_dic_add_with_fre( friso_dic_add_with_fre(
friso->dic, lex, _word, sywords, _fre ); friso->dic, lex, _word, sywords, _fre );
} }
fclose( _stream ); fclose( _stream );
} else { } else {
printf("Warning: Fail to open lexicon file %s\n", lex_file); printf("Warning: Fail to open lexicon file %s\n", lex_file);
} }
} }
/** /**
* get the lexicon type index with the specified * get the lexicon type index with the specified
* type keywords . * type keywords .
* *
* @see friso.h#friso_lex_t * @see friso.h#friso_lex_t
* @param _key * @param _key
* @return int * @return int
*/ */
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key ) __STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
{ {
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) { if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
return __LEX_CJK_WORDS__; return __LEX_CJK_WORDS__;
} }
else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
return __LEX_CJK_UNITS__; return __LEX_CJK_UNITS__;
} }
else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
return __LEX_ECM_WORDS__; return __LEX_ECM_WORDS__;
} }
else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
return __LEX_CEM_WORDS__; return __LEX_CEM_WORDS__;
} }
else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
return __LEX_CN_LNAME__; return __LEX_CN_LNAME__;
} }
else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
return __LEX_CN_SNAME__; return __LEX_CN_SNAME__;
} }
else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
return __LEX_CN_DNAME1__; return __LEX_CN_DNAME1__;
} }
else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
return __LEX_CN_DNAME2__; return __LEX_CN_DNAME2__;
} }
else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) { else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
return __LEX_CN_LNA__; return __LEX_CN_LNA__;
} }
else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
return __LEX_STOPWORDS__; return __LEX_STOPWORDS__;
} }
else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
return __LEX_ENPUN_WORDS__; return __LEX_ENPUN_WORDS__;
} }
else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) { else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
return __LEX_EN_WORDS__; return __LEX_EN_WORDS__;
} }
return -1; return -1;
} }
/* /*
* load the lexicon configuration file. * load the lexicon configuration file.
* and load all the valid lexicon from the configuration file. * and load all the valid lexicon from the configuration file.
* *
* @param friso friso instance * @param friso friso instance
* @param config friso_config instance * @param config friso_config instance
* @param _path dictionary directory * @param _path dictionary directory
* @param _limitts words length limit * @param _limitts words length limit
*/ */
FRISO_API void friso_dic_load_from_ifile( FRISO_API void friso_dic_load_from_ifile(
friso_t friso, friso_t friso,
friso_config_t config, friso_config_t config,
fstring _path, fstring _path,
uint_t _limits ) uint_t _limits )
{ {
//1.parse the configuration file. //1.parse the configuration file.
FILE *__stream; FILE *__stream;
char __chars__[1024], __key__[30], *__line__; char __chars__[1024], __key__[30], *__line__;
uint_t __length__, i, t; uint_t __length__, i, t;
friso_lex_t lex_t; friso_lex_t lex_t;
string_buffer_t sb; string_buffer_t sb;
//get the lexicon configruation file path //get the lexicon configruation file path
sb = new_string_buffer(); sb = new_string_buffer();
string_buffer_append( sb, _path ); string_buffer_append( sb, _path );
string_buffer_append( sb, __FRISO_LEX_IFILE__ ); string_buffer_append( sb, __FRISO_LEX_IFILE__ );
//printf("%s\n", sb->buffer); //printf("%s\n", sb->buffer);
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL ) if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL )
{ {
while ( ( __line__ = while ( ( __line__ =
file_get_line( __chars__, __stream ) ) != NULL ) file_get_line( __chars__, __stream ) ) != NULL )
{ {
//comment filter. //comment filter.
if ( __line__[0] == '#' ) continue; if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue; if ( __line__[0] == '\0' ) continue;
__length__ = strlen( __line__ ); __length__ = strlen( __line__ );
//item start //item start
if ( __line__[ __length__ - 1 ] == '[' ) if ( __line__[ __length__ - 1 ] == '[' )
{ {
//get the type key //get the type key
for ( i = 0; i < __length__ for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ ); && ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) { for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' ' if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ':' ) break; || __line__[i] == '\t' || __line__[i] == ':' ) break;
__key__[t] = __line__[i]; __key__[t] = __line__[i];
} }
__key__[t] = '\0'; __key__[t] = '\0';
//get the lexicon type //get the lexicon type
lex_t = get_lexicon_type_with_constant(__key__); lex_t = get_lexicon_type_with_constant(__key__);
if ( lex_t == -1 ) continue; if ( lex_t == -1 ) continue;
//printf("key=%s, type=%d\n", __key__, lex_t ); //printf("key=%s, type=%d\n", __key__, lex_t );
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL ) while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL )
{ {
//comments filter. //comments filter.
if ( __line__[0] == '#' ) continue; if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue; if ( __line__[0] == '\0' ) continue;
__length__ = strlen( __line__ ); __length__ = strlen( __line__ );
if ( __line__[ __length__ - 1 ] == ']' ) break; if ( __line__[ __length__ - 1 ] == ']' ) break;
for ( i = 0; i < __length__ for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ ); && ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) { for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' ' if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ';' ) break; || __line__[i] == '\t' || __line__[i] == ';' ) break;
__key__[t] = __line__[i]; __key__[t] = __line__[i];
} }
__key__[t] = '\0'; __key__[t] = '\0';
//load the lexicon item from the lexicon file. //load the lexicon item from the lexicon file.
string_buffer_clear( sb ); string_buffer_clear( sb );
string_buffer_append( sb, _path ); string_buffer_append( sb, _path );
string_buffer_append( sb, __key__ ); string_buffer_append( sb, __key__ );
//printf("key=%s, type=%d\n", __key__, lex_t); //printf("key=%s, type=%d\n", __key__, lex_t);
friso_dic_load( friso, config, lex_t, sb->buffer, _limits ); friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
} }
} }
} //end while } //end while
fclose( __stream ); fclose( __stream );
} else { } else {
printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer); printf("Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
} }
free_string_buffer(sb); free_string_buffer(sb);
} }
//match the item. //match the item.
FRISO_API int friso_dic_match( FRISO_API int friso_dic_match(
friso_dic_t dic, friso_dic_t dic,
friso_lex_t lex, friso_lex_t lex,
fstring word ) fstring word )
{ {
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) { if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_exist_mapping( dic[lex], word ); return hash_exist_mapping( dic[lex], word );
} }
return 0; return 0;
} }
//get the lex_entry_t associated with the word. //get the lex_entry_t associated with the word.
FRISO_API lex_entry_t friso_dic_get( FRISO_API lex_entry_t friso_dic_get(
friso_dic_t dic, friso_dic_t dic,
friso_lex_t lex, friso_lex_t lex,
fstring word ) fstring word )
{ {
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) { if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return ( lex_entry_t ) hash_get_value( dic[lex], word ); return ( lex_entry_t ) hash_get_value( dic[lex], word );
} }
return NULL; return NULL;
} }
//get the size of the specified type dictionary. //get the size of the specified type dictionary.
FRISO_API uint_t friso_spec_dic_size( FRISO_API uint_t friso_spec_dic_size(
friso_dic_t dic, friso_dic_t dic,
friso_lex_t lex ) friso_lex_t lex )
{ {
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) { if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_get_size( dic[lex] ); return hash_get_size( dic[lex] );
} }
return 0; return 0;
} }
//get size of the whole dictionary. //get size of the whole dictionary.
FRISO_API uint_t friso_all_dic_size( FRISO_API uint_t friso_all_dic_size(
friso_dic_t dic ) friso_dic_t dic )
{ {
register uint_t size = 0, t; register uint_t size = 0, t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) { for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
size += hash_get_size( dic[t] ); size += hash_get_size( dic[t] );
} }
return size; return size;
} }

View File

@ -1,29 +1,29 @@
/* /*
* link list implemented functions * link list implemented functions
* defined in header file "friso_API.h". * defined in header file "friso_API.h".
* when the link_node is being deleted, here we just free * when the link_node is being deleted, here we just free
* the allocation of the node, not the allcation of it's value. * the allocation of the node, not the allcation of it's value.
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#include "friso_API.h" #include "friso_API.h"
#include <stdlib.h> #include <stdlib.h>
//create a new link list node. //create a new link list node.
__STATIC_API__ link_node_t new_node_entry( __STATIC_API__ link_node_t new_node_entry(
void * value, void * value,
link_node_t prev, link_node_t prev,
link_node_t next ) link_node_t next )
{ {
link_node_t node = ( link_node_t ) link_node_t node = ( link_node_t )
FRISO_MALLOC( sizeof( link_node_entry ) ); FRISO_MALLOC( sizeof( link_node_entry ) );
if ( node == NULL ) { if ( node == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
node->value = value; node->value = value;
node->prev = prev; node->prev = prev;
node->next = next; node->next = next;
return node; return node;
} }
@ -32,14 +32,14 @@ __STATIC_API__ link_node_t new_node_entry(
FRISO_API friso_link_t new_link_list( void ) FRISO_API friso_link_t new_link_list( void )
{ {
friso_link_t e = ( friso_link_t ) friso_link_t e = ( friso_link_t )
FRISO_MALLOC( sizeof( friso_link_entry ) ); FRISO_MALLOC( sizeof( friso_link_entry ) );
if ( e == NULL ) { if ( e == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize the entry //initialize the entry
e->head = new_node_entry( NULL, NULL, NULL ); e->head = new_node_entry( NULL, NULL, NULL );
e->tail = new_node_entry( NULL, e->head, NULL ); e->tail = new_node_entry( NULL, e->head, NULL );
e->head->next = e->tail; e->head->next = e->tail;
e->size = 0; e->size = 0;
@ -52,9 +52,9 @@ FRISO_API void free_link_list( friso_link_t link )
link_node_t node, next; link_node_t node, next;
for ( node = link->head; node != NULL; ) for ( node = link->head; node != NULL; )
{ {
next = node->next; next = node->next;
FRISO_FREE( node ); FRISO_FREE( node );
node = next; node = next;
} }
FRISO_FREE( link ); FRISO_FREE( link );
@ -62,16 +62,16 @@ FRISO_API void free_link_list( friso_link_t link )
//clear all nodes in the link list. //clear all nodes in the link list.
FRISO_API friso_link_t link_list_clear( FRISO_API friso_link_t link_list_clear(
friso_link_t link ) friso_link_t link )
{ {
link_node_t node, next; link_node_t node, next;
//free all the middle nodes. //free all the middle nodes.
for ( node = link->head->next; for ( node = link->head->next;
node != link->tail; ) node != link->tail; )
{ {
next = node->next; next = node->next;
FRISO_FREE( node ); FRISO_FREE( node );
node = next; node = next;
} }
link->head->next = link->tail; link->head->next = link->tail;
@ -97,22 +97,22 @@ FRISO_API friso_link_t link_list_clear(
* static * static
*/ */
__STATIC_API__ link_node_t get_node( __STATIC_API__ link_node_t get_node(
friso_link_t link, uint_t idx ) friso_link_t link, uint_t idx )
{ {
link_node_t p = NULL; link_node_t p = NULL;
register uint_t t; register uint_t t;
if ( idx >= 0 && idx < link->size ) if ( idx >= 0 && idx < link->size )
{ {
if ( idx < link->size / 2 ) { //find from the head. if ( idx < link->size / 2 ) { //find from the head.
p = link->head; p = link->head;
for ( t = 0; t <= idx; t++ ) for ( t = 0; t <= idx; t++ )
p = p->next; p = p->next;
} else { //find from the tail. } else { //find from the tail.
p = link->tail; p = link->tail;
for ( t = link->size; t > idx; t-- ) for ( t = link->size; t > idx; t-- )
p = p->prev; p = p->prev;
} }
} }
return p; return p;
@ -123,9 +123,9 @@ __STATIC_API__ link_node_t get_node(
* static * static
*/ */
//__STATIC_API__ void insert_before( //__STATIC_API__ void insert_before(
// friso_link_t link, // friso_link_t link,
// link_node_t node, // link_node_t node,
// void * value ) // void * value )
//{ //{
// link_node_t e = new_node_entry( value, node->prev, node ); // link_node_t e = new_node_entry( value, node->prev, node );
// e->prev->next = e; // e->prev->next = e;
@ -136,10 +136,10 @@ __STATIC_API__ link_node_t get_node(
//} //}
#define insert_before( link, node, value ) \ #define insert_before( link, node, value ) \
{ \ { \
link_node_t e = new_node_entry( value, node->prev, node ); \ link_node_t e = new_node_entry( value, node->prev, node ); \
e->prev->next = e; \ e->prev->next = e; \
e->next->prev = e; \ e->next->prev = e; \
link->size++; \ link->size++; \
} }
/* /*
@ -150,7 +150,7 @@ __STATIC_API__ link_node_t get_node(
* @return the value of the removed node. * @return the value of the removed node.
*/ */
__STATIC_API__ void * remove_node( __STATIC_API__ void * remove_node(
friso_link_t link, link_node_t node ) friso_link_t link, link_node_t node )
{ {
void * _value = node->value; void * _value = node->value;
@ -166,18 +166,18 @@ __STATIC_API__ void * remove_node(
//add a new node to the link list.(insert just before the tail) //add a new node to the link list.(insert just before the tail)
FRISO_API void link_list_add( FRISO_API void link_list_add(
friso_link_t link, void * value ) friso_link_t link, void * value )
{ {
insert_before( link, link->tail, value ); insert_before( link, link->tail, value );
} }
//add a new node before the given index. //add a new node before the given index.
FRISO_API void link_list_insert_before( FRISO_API void link_list_insert_before(
friso_link_t link, uint_t idx, void * value ) friso_link_t link, uint_t idx, void * value )
{ {
link_node_t node = get_node( link, idx ); link_node_t node = get_node( link, idx );
if ( node != NULL ) { if ( node != NULL ) {
insert_before( link, node, value ); insert_before( link, node, value );
} }
} }
@ -187,11 +187,11 @@ FRISO_API void link_list_insert_before(
* @return the value of the node. * @return the value of the node.
*/ */
FRISO_API void * link_list_get( FRISO_API void * link_list_get(
friso_link_t link, uint_t idx ) friso_link_t link, uint_t idx )
{ {
link_node_t node = get_node( link, idx ); link_node_t node = get_node( link, idx );
if ( node != NULL ) { if ( node != NULL ) {
return node->value; return node->value;
} }
return NULL; return NULL;
} }
@ -199,20 +199,20 @@ FRISO_API void * link_list_get(
/* /*
* set the value of the node that located in the specified position. * set the value of the node that located in the specified position.
* we did't free the allocation of the old value, we return it to you. * we did't free the allocation of the old value, we return it to you.
* free it yourself when it is necessary. * free it yourself when it is necessary.
* *
* @return the old value. * @return the old value.
*/ */
FRISO_API void *link_list_set( FRISO_API void *link_list_set(
friso_link_t link, friso_link_t link,
uint_t idx, void * value ) uint_t idx, void * value )
{ {
link_node_t node = get_node( link, idx ); link_node_t node = get_node( link, idx );
void * _value = NULL; void * _value = NULL;
if ( node != NULL ) { if ( node != NULL ) {
_value = node->value; _value = node->value;
node->value = value; node->value = value;
} }
return _value; return _value;
@ -225,13 +225,13 @@ FRISO_API void *link_list_set(
* @return the value of the node removed. * @return the value of the node removed.
*/ */
FRISO_API void *link_list_remove( FRISO_API void *link_list_remove(
friso_link_t link, uint_t idx ) friso_link_t link, uint_t idx )
{ {
link_node_t node = get_node( link, idx ); link_node_t node = get_node( link, idx );
if ( node != NULL ) { if ( node != NULL ) {
//printf("idx=%d, node->value=%s\n", idx, (string) node->value ); //printf("idx=%d, node->value=%s\n", idx, (string) node->value );
return remove_node( link, node ); return remove_node( link, node );
} }
return NULL; return NULL;
@ -244,43 +244,43 @@ FRISO_API void *link_list_remove(
* @return the value of the node removed. * @return the value of the node removed.
*/ */
FRISO_API void *link_list_remove_node( FRISO_API void *link_list_remove_node(
friso_link_t link, friso_link_t link,
link_node_t node ) link_node_t node )
{ {
return remove_node( link, node ); return remove_node( link, node );
} }
//remove the first node after the head //remove the first node after the head
FRISO_API void *link_list_remove_first( FRISO_API void *link_list_remove_first(
friso_link_t link ) friso_link_t link )
{ {
if ( link->size > 0 ) { if ( link->size > 0 ) {
return remove_node( link, link->head->next ); return remove_node( link, link->head->next );
} }
return NULL; return NULL;
} }
//remove the last node just before the tail. //remove the last node just before the tail.
FRISO_API void *link_list_remove_last( FRISO_API void *link_list_remove_last(
friso_link_t link ) friso_link_t link )
{ {
if ( link->size > 0 ) { if ( link->size > 0 ) {
return remove_node( link, link->tail->prev ); return remove_node( link, link->tail->prev );
} }
return NULL; return NULL;
} }
//append a node from the tail. //append a node from the tail.
FRISO_API void link_list_add_last( FRISO_API void link_list_add_last(
friso_link_t link, friso_link_t link,
void *value ) void *value )
{ {
insert_before( link, link->tail, value ); insert_before( link, link->tail, value );
} }
//append a note just after the head. //append a note just after the head.
FRISO_API void link_list_add_first( FRISO_API void link_list_add_first(
friso_link_t link, void *value ) friso_link_t link, void *value )
{ {
insert_before( link, link->head->next, value ); insert_before( link, link->head->next, value );
} }

View File

@ -1,8 +1,8 @@
/* /*
* utf-8 handle function implements. * utf-8 handle function implements.
* you could modify it or re-release it but never for commercial use. * you could modify it or re-release it but never for commercial use.
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -11,14 +11,14 @@
#include <string.h> #include <string.h>
/* ****************************************** /* ******************************************
* fstring buffer functions implements. * * fstring buffer functions implements. *
********************************************/ ********************************************/
/** /**
* create a new buffer * create a new buffer
* @Note: * @Note:
* 1. it's real length is 1 byte greater than the specifield value * 1. it's real length is 1 byte greater than the specifield value
* 2. we did not do any optimization for the memory allocation to ... * 2. we did not do any optimization for the memory allocation to ...
* avoid the memory defragmentation. * avoid the memory defragmentation.
* *
* @date: 2014-10-16 * @date: 2014-10-16
*/ */
@ -26,7 +26,7 @@ __STATIC_API__ fstring create_buffer( uint_t length )
{ {
fstring buffer = ( fstring ) FRISO_MALLOC( length + 1 ); fstring buffer = ( fstring ) FRISO_MALLOC( length + 1 );
if ( buffer == NULL ) { if ( buffer == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
memset( buffer, 0x00, length + 1 ); memset( buffer, 0x00, length + 1 );
@ -36,7 +36,7 @@ __STATIC_API__ fstring create_buffer( uint_t length )
//the __allocs should not be smaller than sb->length //the __allocs should not be smaller than sb->length
__STATIC_API__ string_buffer_t resize_buffer( __STATIC_API__ string_buffer_t resize_buffer(
string_buffer_t sb, uint_t __allocs ) string_buffer_t sb, uint_t __allocs )
{ {
//create a new buffer. //create a new buffer.
//if ( __allocs < sb->length ) __allocs = sb->length + 1; //if ( __allocs < sb->length ) __allocs = sb->length + 1;
@ -44,7 +44,7 @@ __STATIC_API__ string_buffer_t resize_buffer(
//register uint_t t; //register uint_t t;
//for ( t = 0; t < sb->length; t++ ) { //for ( t = 0; t < sb->length; t++ ) {
// str[t] = sb->buffer[t]; // str[t] = sb->buffer[t];
//} //}
memcpy( str, sb->buffer, sb->length ); memcpy( str, sb->buffer, sb->length );
FRISO_FREE( sb->buffer ); FRISO_FREE( sb->buffer );
@ -65,9 +65,9 @@ __STATIC_API__ string_buffer_t resize_buffer(
FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity ) FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity )
{ {
string_buffer_t sb = ( string_buffer_t ) string_buffer_t sb = ( string_buffer_t )
FRISO_MALLOC( sizeof( string_buffer_entry ) ); FRISO_MALLOC( sizeof( string_buffer_entry ) );
if ( sb == NULL ) { if ( sb == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
sb->buffer = create_buffer( opacity ); sb->buffer = create_buffer( opacity );
@ -82,9 +82,9 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
{ {
//buffer allocations. //buffer allocations.
string_buffer_t sb = ( string_buffer_t ) string_buffer_t sb = ( string_buffer_t )
FRISO_MALLOC( sizeof( string_buffer_entry ) ); FRISO_MALLOC( sizeof( string_buffer_entry ) );
if ( sb == NULL ) { if ( sb == NULL ) {
___ALLOCATION_ERROR___ ___ALLOCATION_ERROR___
} }
//initialize //initialize
@ -95,7 +95,7 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
//register uint_t t; //register uint_t t;
//copy the str to the buffer. //copy the str to the buffer.
//for ( t = 0; t < sb->length; t++ ) { //for ( t = 0; t < sb->length; t++ ) {
// sb->buffer[t] = str[t]; // sb->buffer[t] = str[t];
//} //}
memcpy( sb->buffer, str, sb->length ); memcpy( sb->buffer, str, sb->length );
@ -103,66 +103,66 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
} }
FRISO_API void string_buffer_append( FRISO_API void string_buffer_append(
string_buffer_t sb, fstring __str ) string_buffer_t sb, fstring __str )
{ {
register uint_t __len__ = strlen( __str ); register uint_t __len__ = strlen( __str );
//check the necessity to resize the buffer. //check the necessity to resize the buffer.
if ( sb->length + __len__ > sb->allocs ) { if ( sb->length + __len__ > sb->allocs ) {
sb = resize_buffer( sb, ( sb->length + __len__ ) * 2 + 1 ); sb = resize_buffer( sb, ( sb->length + __len__ ) * 2 + 1 );
} }
//register uint_t t; //register uint_t t;
////copy the __str to the buffer. ////copy the __str to the buffer.
//for ( t = 0; t < __len__; t++ ) { //for ( t = 0; t < __len__; t++ ) {
// sb->buffer[ sb->length++ ] = __str[t]; // sb->buffer[ sb->length++ ] = __str[t];
//} //}
memcpy( sb->buffer + sb->length, __str, __len__ ); memcpy( sb->buffer + sb->length, __str, __len__ );
sb->length += __len__; sb->length += __len__;
} }
FRISO_API void string_buffer_append_char( FRISO_API void string_buffer_append_char(
string_buffer_t sb, char ch ) string_buffer_t sb, char ch )
{ {
//check the necessity to resize the buffer. //check the necessity to resize the buffer.
if ( sb->length + 1 > sb->allocs ) { if ( sb->length + 1 > sb->allocs ) {
sb = resize_buffer( sb, sb->length * 2 + 1 ); sb = resize_buffer( sb, sb->length * 2 + 1 );
} }
sb->buffer[sb->length++] = ch; sb->buffer[sb->length++] = ch;
} }
FRISO_API void string_buffer_insert( FRISO_API void string_buffer_insert(
string_buffer_t sb, string_buffer_t sb,
uint_t idx, uint_t idx,
fstring __str ) fstring __str )
{ {
} }
/* /*
* remove the given bytes from the buffer start from idx. * remove the given bytes from the buffer start from idx.
* this will cause the byte move after the idx+length. * this will cause the byte move after the idx+length.
* *
* @return the new string. * @return the new string.
*/ */
FRISO_API fstring string_buffer_remove( FRISO_API fstring string_buffer_remove(
string_buffer_t sb, string_buffer_t sb,
uint_t idx, uint_t idx,
uint_t length ) uint_t length )
{ {
uint_t t; uint_t t;
//move the bytes after the idx + length //move the bytes after the idx + length
for ( t = idx + length; t < sb->length; t++ ) { for ( t = idx + length; t < sb->length; t++ ) {
sb->buffer[t - length] = sb->buffer[t]; sb->buffer[t - length] = sb->buffer[t];
} }
sb->buffer[t] = '\0'; sb->buffer[t] = '\0';
//memcpy( sb->buffer + idx, //memcpy( sb->buffer + idx,
// sb->buffer + idx + length, // sb->buffer + idx + length,
// sb->length - idx - length ); // sb->length - idx - length );
t = sb->length - idx; t = sb->length - idx;
if ( t > 0 ) { if ( t > 0 ) {
sb->length -= ( t > length ) ? length : t; sb->length -= ( t > length ) ? length : t;
} }
sb->buffer[sb->length-1] = '\0'; sb->buffer[sb->length-1] = '\0';
@ -171,13 +171,13 @@ FRISO_API fstring string_buffer_remove(
/* /*
* turn the string_buffer to a string. * turn the string_buffer to a string.
* or return the buffer of the string_buffer. * or return the buffer of the string_buffer.
*/ */
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb ) FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
{ {
//resize the buffer. //resize the buffer.
if ( sb->length < sb->allocs - 1 ) { if ( sb->length < sb->allocs - 1 ) {
sb = resize_buffer( sb, sb->length + 1 ); sb = resize_buffer( sb, sb->length + 1 );
} }
return sb; return sb;
} }
@ -185,8 +185,8 @@ FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
/* /*
* free the given fstring buffer. * free the given fstring buffer.
* and this function will not free the allocations of the * and this function will not free the allocations of the
* string_buffer_t->buffer, we return it to you, if there is * string_buffer_t->buffer, we return it to you, if there is
* a necessary you could free it youself by calling free(); * a necessary you could free it youself by calling free();
*/ */
FRISO_API fstring string_buffer_devote( string_buffer_t sb ) FRISO_API fstring string_buffer_devote( string_buffer_t sb )
{ {
@ -197,7 +197,7 @@ FRISO_API fstring string_buffer_devote( string_buffer_t sb )
/* /*
* clear the given fstring buffer. * clear the given fstring buffer.
* reset its buffer with 0 and reset its length to 0. * reset its buffer with 0 and reset its length to 0.
*/ */
FRISO_API void string_buffer_clear( string_buffer_t sb ) FRISO_API void string_buffer_clear( string_buffer_t sb )
{ {
@ -216,17 +216,17 @@ FRISO_API void free_string_buffer( string_buffer_t sb )
/** /**
* create a new string_split_entry. * create a new string_split_entry.
* *
* @param source * @param source
* @return string_split_t; * @return string_split_t;
*/ */
FRISO_API string_split_t new_string_split( FRISO_API string_split_t new_string_split(
fstring delimiter, fstring delimiter,
fstring source ) fstring source )
{ {
string_split_t e = ( string_split_t ) string_split_t e = ( string_split_t )
FRISO_MALLOC( sizeof( string_split_entry ) ); FRISO_MALLOC( sizeof( string_split_entry ) );
if ( e == NULL ) { if ( e == NULL ) {
___ALLOCATION_ERROR___; ___ALLOCATION_ERROR___;
} }
e->delimiter = delimiter; e->delimiter = delimiter;
@ -239,19 +239,19 @@ FRISO_API string_split_t new_string_split(
} }
FRISO_API void string_split_reset( FRISO_API void string_split_reset(
string_split_t sst, string_split_t sst,
fstring delimiter, fstring delimiter,
fstring source ) fstring source )
{ {
sst->delimiter = delimiter; sst->delimiter = delimiter;
sst->delLen = strlen(delimiter); sst->delLen = strlen(delimiter);
sst->source = source; sst->source = source;
sst->srcLen = strlen(source); sst->srcLen = strlen(source);
sst->idx = 0; sst->idx = 0;
} }
FRISO_API void string_split_set_source( FRISO_API void string_split_set_source(
string_split_t sst, fstring source ) string_split_t sst, fstring source )
{ {
sst->source = source; sst->source = source;
sst->srcLen = strlen(source); sst->srcLen = strlen(source);
@ -259,7 +259,7 @@ FRISO_API void string_split_set_source(
} }
FRISO_API void string_split_set_delimiter( FRISO_API void string_split_set_delimiter(
string_split_t sst, fstring delimiter ) string_split_t sst, fstring delimiter )
{ {
sst->delimiter = delimiter; sst->delimiter = delimiter;
sst->delLen = strlen( delimiter ); sst->delLen = strlen( delimiter );
@ -273,15 +273,15 @@ FRISO_API void free_string_split( string_split_t sst )
/** /**
* get the next split fstring, and copy the * get the next split fstring, and copy the
* splited fstring into the __dst buffer . * splited fstring into the __dst buffer .
* *
* @param string_split_t * @param string_split_t
* @param __dst * @param __dst
* @return fstring (NULL if reach the end of the source * @return fstring (NULL if reach the end of the source
* or there is no more segmentation) * or there is no more segmentation)
*/ */
FRISO_API fstring string_split_next( FRISO_API fstring string_split_next(
string_split_t sst, fstring __dst) string_split_t sst, fstring __dst)
{ {
uint_t i, _ok; uint_t i, _ok;
fstring _dst = __dst; fstring _dst = __dst;
@ -291,28 +291,28 @@ FRISO_API fstring string_split_next(
while ( 1 ) while ( 1 )
{ {
_ok = 1; _ok = 1;
for ( i = 0; i < sst->delLen for ( i = 0; i < sst->delLen
&& (sst->idx + i < sst->srcLen); i++ ) && (sst->idx + i < sst->srcLen); i++ )
{ {
if ( sst->source[sst->idx+i] != sst->delimiter[i] ) if ( sst->source[sst->idx+i] != sst->delimiter[i] )
{ {
_ok = 0; _ok = 0;
break; break;
} }
} }
//find the delimiter here, //find the delimiter here,
//break the loop and self plus the sst->idx, then return the buffer . //break the loop and self plus the sst->idx, then return the buffer .
if ( _ok == 1 ) { if ( _ok == 1 ) {
sst->idx += sst->delLen; sst->idx += sst->delLen;
break; break;
} }
//coy the char to the buffer //coy the char to the buffer
*_dst++ = sst->source[sst->idx++]; *_dst++ = sst->source[sst->idx++];
//check if reach the end of the fstring //check if reach the end of the fstring
if ( sst->idx >= sst->srcLen ) break; if ( sst->idx >= sst->srcLen ) break;
} }
*_dst = '\0'; *_dst = '\0';

View File

@ -1,8 +1,8 @@
/* /*
* dynamatic array test program. * dynamatic array test program.
* *
* @author chenxin * @author chenxin
* @email chenxin619315@gmail.com * @email chenxin619315@gmail.com
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -10,42 +10,42 @@
#include <stdlib.h> #include <stdlib.h>
int main( int argc, char **args ) { int main( int argc, char **args ) {
//create a new array list. //create a new array list.
friso_array_t array = new_array_list(); friso_array_t array = new_array_list();
fstring keys[] = { fstring keys[] = {
"chenmanwen", "yangqinghua", "chenmanwen", "yangqinghua",
"chenxin", "luojiangyan", "xiaoyanzi", "bibi", "chenxin", "luojiangyan", "xiaoyanzi", "bibi",
"zhangrenfang", "yangjian", "zhangrenfang", "yangjian",
"liuxiao", "pankai", "liuxiao", "pankai",
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo", "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
"caizaili", "panpan", "xiaolude", "yintanwen" "caizaili", "panpan", "xiaolude", "yintanwen"
}; };
int j, idx = 2, len = sizeof( keys ) / sizeof( fstring ); int j, idx = 2, len = sizeof( keys ) / sizeof( fstring );
for ( j = 0; j < len; j++ ) { for ( j = 0; j < len; j++ ) {
array_list_add( array, keys[j] ); array_list_add( array, keys[j] );
} }
printf("length=%d, allocations=%d\n", array->length, array->allocs ); printf("length=%d, allocations=%d\n", array->length, array->allocs );
array_list_trim( array ); array_list_trim( array );
printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs ); printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nAfter set %dth item.\n", idx ); printf("\nAfter set %dth item.\n", idx );
array_list_set( array, idx, "chenxin__" ); array_list_set( array, idx, "chenxin__" );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nAfter remove %dth item.\n", idx ); printf("\nAfter remove %dth item.\n", idx );
array_list_remove( array, idx ); array_list_remove( array, idx );
printf("length=%d, allocations=%d\n", array->length, array->allocs ); printf("length=%d, allocations=%d\n", array->length, array->allocs );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nInsert a item at %dth\n", idx ); printf("\nInsert a item at %dth\n", idx );
array_list_insert( array, idx, "*chenxin*" ); array_list_insert( array, idx, "*chenxin*" );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
free_array_list( array ); free_array_list( array );
return 0; return 0;
} }

View File

@ -1,8 +1,8 @@
/* /*
* Friso test program. * Friso test program.
* Of couse you can make it a perfect demo for friso. * Of couse you can make it a perfect demo for friso.
* all threads or proccess share the same friso_t, * all threads or proccess share the same friso_t,
* defferent threads/proccess use defferent friso_task_t. * defferent threads/proccess use defferent friso_task_t.
* and you could share the friso_config_t if you wish... * and you could share the friso_config_t if you wish...
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
@ -17,33 +17,33 @@
#define __LENGTH__ 15 #define __LENGTH__ 15
#define __INPUT_LENGTH__ 20480 #define __INPUT_LENGTH__ 20480
#define ___EXIT_INFO___ \ #define ___EXIT_INFO___ \
println("Thanks for trying friso."); \ println("Thanks for trying friso."); \
break; break;
#define ___ABOUT___ \ #define ___ABOUT___ \
println("+-----------------------------------------------------------+"); \ println("+-----------------------------------------------------------+"); \
println("| friso - a chinese word segmentation writen by c. |"); \ println("| friso - a chinese word segmentation writen by c. |"); \
println("| bug report email - chenxin619315@gmail.com. |"); \ println("| bug report email - chenxin619315@gmail.com. |"); \
println("| or: visit http://code.google.com/p/friso. |"); \ println("| or: visit http://code.google.com/p/friso. |"); \
println("| java edition for http://code.google.com/p/jcseg |"); \ println("| java edition for http://code.google.com/p/jcseg |"); \
println("| type 'quit' to exit the program. |"); \ println("| type 'quit' to exit the program. |"); \
println("+-----------------------------------------------------------+"); println("+-----------------------------------------------------------+");
//read a line from a command line. //read a line from a command line.
static fstring getLine( FILE *fp, fstring __dst ) static fstring getLine( FILE *fp, fstring __dst )
{ {
register int c; register int c;
register fstring cs; register fstring cs;
cs = __dst; cs = __dst;
while ( ( c = getc( fp ) ) != EOF ) { while ( ( c = getc( fp ) ) != EOF ) {
if ( c == '\n' ) break; if ( c == '\n' ) break;
*cs++ = c; *cs++ = c;
} }
*cs = '\0'; *cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst; return ( c == EOF && cs == __dst ) ? NULL : __dst;
} }
/*static void printcode( fstring str ) { /*static void printcode( fstring str ) {
@ -59,94 +59,94 @@ static fstring getLine( FILE *fp, fstring __dst )
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
clock_t s_time, e_time; clock_t s_time, e_time;
char line[__INPUT_LENGTH__] = {0}; char line[__INPUT_LENGTH__] = {0};
int i; int i;
fstring __path__ = NULL, mode = NULL; fstring __path__ = NULL, mode = NULL;
friso_t friso; friso_t friso;
friso_config_t config; friso_config_t config;
friso_task_t task; friso_task_t task;
//get the lexicon directory //get the lexicon directory
for ( i = 0; i < argc; i++ ) { for ( i = 0; i < argc; i++ ) {
if ( strcasecmp( "-init", argv[i] ) == 0 ) { if ( strcasecmp( "-init", argv[i] ) == 0 ) {
__path__ = argv[i+1]; __path__ = argv[i+1];
} }
} }
if ( __path__ == NULL ) { if ( __path__ == NULL ) {
println("Usage: friso -init lexicon path"); println("Usage: friso -init lexicon path");
exit(0); exit(0);
} }
s_time = clock(); s_time = clock();
//initialize //initialize
friso = friso_new(); friso = friso_new();
config = friso_new_config(); config = friso_new_config();
/*friso_dic_t dic = friso_dic_new(); /*friso_dic_t dic = friso_dic_new();
friso_dic_load_from_ifile( dic, __path__, __LENGTH__ ); friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
friso_set_dic( friso, dic ); friso_set_dic( friso, dic );
friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/ friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
if ( friso_init_from_ifile(friso, config, __path__) != 1 ) { if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
printf("fail to initialize friso and config."); printf("fail to initialize friso and config.");
goto err; goto err;
} }
switch ( config->mode ) switch ( config->mode )
{ {
case __FRISO_SIMPLE_MODE__: case __FRISO_SIMPLE_MODE__:
mode = "Simple"; mode = "Simple";
break; break;
case __FRISO_COMPLEX_MODE__: case __FRISO_COMPLEX_MODE__:
mode = "Complex"; mode = "Complex";
break; break;
case __FRISO_DETECT_MODE__: case __FRISO_DETECT_MODE__:
mode = "Detect"; mode = "Detect";
break; break;
} }
//friso_set_mode( config, __FRISO_DETECT_MODE__ ); //friso_set_mode( config, __FRISO_DETECT_MODE__ );
//printf("clr_stw=%d\n", friso->clr_stw); //printf("clr_stw=%d\n", friso->clr_stw);
//printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" )); //printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
//printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究")); //printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));
e_time = clock(); e_time = clock();
printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC ); printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
printf("Mode: %s\n", mode); printf("Mode: %s\n", mode);
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" ); printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
___ABOUT___; ___ABOUT___;
//set the task. //set the task.
task = friso_new_task(); task = friso_new_task();
while ( 1 ) while ( 1 )
{ {
print("friso>> "); print("friso>> ");
getLine( stdin, line ); getLine( stdin, line );
//exit the programe //exit the programe
if ( strcasecmp( line, "quit" ) == 0 ) { if ( strcasecmp( line, "quit" ) == 0 ) {
___EXIT_INFO___ ___EXIT_INFO___
} }
//for ( i = 0; i < 1000000; i++ ) { //for ( i = 0; i < 1000000; i++ ) {
//set the task text. //set the task text.
friso_set_text( task, line ); friso_set_text( task, line );
println("分词结果:"); println("分词结果:");
s_time = clock(); s_time = clock();
while ( ( config->next_token( friso, config, task ) ) != NULL ) while ( ( config->next_token( friso, config, task ) ) != NULL )
{ {
//printf("%s[%d, %d, %d] ", task->token->word, //printf("%s[%d, %d, %d] ", task->token->word,
// task->token->offset, task->token->length, task->token->rlen ); // task->token->offset, task->token->length, task->token->rlen );
printf("%s ", task->token->word ); printf("%s ", task->token->word );
} }
//} //}
e_time = clock(); e_time = clock();
printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC ); printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );
} }
friso_free_task( task ); friso_free_task( task );

View File

@ -1,8 +1,8 @@
/** /**
* File Explain. * File Explain.
* *
* @author chenxin * @author chenxin
* @see http://www.webssky.com * @see http://www.webssky.com
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -10,28 +10,28 @@
void print_hash_info( friso_hash_t _hash ) { void print_hash_info( friso_hash_t _hash ) {
printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \ printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \
_hash->size, _hash->factor, _hash->threshold); _hash->size, _hash->factor, _hash->threshold);
} }
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
friso_hash_t _hash = new_hash_table(); friso_hash_t _hash = new_hash_table();
char *names[] = { char *names[] = {
"陈满文", "阳清华", "陈满文", "阳清华",
"陈鑫", "罗江艳", "陈鑫", "罗江艳",
"小燕子", "比比", "小燕子", "比比",
"张仁芳", "阳建", "张仁芳", "阳建",
"陈配", "李恒", "陈配", "李恒",
"张志刚", "张怡少", "张志刚", "张怡少",
"阳江波", "蔡再利", "阳江波", "蔡再利",
"阳绘章", "尹唐文", "阳绘章", "尹唐文",
"谭志鹏", "肖路德", "谭志鹏", "肖路德",
"潘凯", "刘潇", "潘凯", "刘潇",
"马朝辉", "张强", "马朝辉", "张强",
"殷美林", "元明清", "殷美林", "元明清",
"周安", "郭桥安", "周安", "郭桥安",
"刘敏", "黄广华", "刘敏", "黄广华",
"李胜", "黄海清" "李胜", "黄海清"
}; };
//char *str[] = {"陈鑫", "张仁芳", "比比"}; //char *str[] = {"陈鑫", "张仁芳", "比比"};
char **str = names; char **str = names;
@ -39,7 +39,7 @@ int main(int argc, char **argv)
print_hash_info( _hash ); print_hash_info( _hash );
for ( j = 0; j < len; j++) { for ( j = 0; j < len; j++) {
hash_put_mapping( _hash, names[j], names[j] ); hash_put_mapping( _hash, names[j], names[j] );
} }
print_hash_info( _hash ); print_hash_info( _hash );
@ -49,11 +49,11 @@ int main(int argc, char **argv)
//remove mappings //remove mappings
for ( j = 0; j < len; j++ ) { for ( j = 0; j < len; j++ ) {
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] )); printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
printf("Now, remove %s\n", str[j]); printf("Now, remove %s\n", str[j]);
hash_remove_mapping( _hash, str[j] ); hash_remove_mapping( _hash, str[j] );
printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] )); printf("Exist %s?%2d\n", str[j], hash_exist_mapping( _hash, str[j] ));
printf("*********************************\n"); printf("*********************************\n");
} }
printf("Press any key to continue."); printf("Press any key to continue.");

View File

@ -1,8 +1,8 @@
/* /*
* lex functions test program. * lex functions test program.
* *
* @author chenxin * @author chenxin
* @see http://www.webssky.com * @see http://www.webssky.com
*/ */
#include "friso.h" #include "friso.h"
@ -11,10 +11,10 @@
#include <string.h> #include <string.h>
#define __LENGTH__ 15 #define __LENGTH__ 15
#define ___PRINT_HELP_INFO___ \ #define ___PRINT_HELP_INFO___ \
printf("1. help print the current menu.\n"); \ printf("1. help print the current menu.\n"); \
printf("2. #set set the classify of the dictionary.\n"); \ printf("2. #set set the classify of the dictionary.\n"); \
printf("3. other search the words in the dictionary.\n"); \ printf("3. other search the words in the dictionary.\n"); \
printf("4. quit exit the programe.\n"); printf("4. quit exit the programe.\n");
int main(int argc, char **argv) int main(int argc, char **argv)
@ -62,30 +62,30 @@ int main(int argc, char **argv)
e_time = clock(); e_time = clock();
printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \ printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \
friso_all_dic_size( friso->dic ) ); friso_all_dic_size( friso->dic ) );
while ( 1 ) { while ( 1 ) {
printf("friso-%d>> ", lex); printf("friso-%d>> ", lex);
scanf("%s", _line); scanf("%s", _line);
if ( strcmp( _line, "quit" ) == 0 ) { if ( strcmp( _line, "quit" ) == 0 ) {
break; break;
} else if ( strcmp( _line, "help" ) == 0 ) { } else if ( strcmp( _line, "help" ) == 0 ) {
___PRINT_HELP_INFO___ ___PRINT_HELP_INFO___
} else if ( strcmp( _line, "#set" ) == 0 ) { } else if ( strcmp( _line, "#set" ) == 0 ) {
printf("lex_t>> "); printf("lex_t>> ");
scanf("%d", &lex); scanf("%d", &lex);
} else { } else {
s_time = clock(); s_time = clock();
e = friso_dic_get( friso->dic, lex, _line ); e = friso_dic_get( friso->dic, lex, _line );
e_time = clock(); e_time = clock();
if ( e != NULL ) { if ( e != NULL ) {
printf("word=%s, syn=%s, fre=%d, cost:%fsec\n", printf("word=%s, syn=%s, fre=%d, cost:%fsec\n",
e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre, e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre,
(double) ( e_time - s_time ) / CLOCKS_PER_SEC ); (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
} else { } else {
printf("%s was not found.\n", _line); printf("%s was not found.\n", _line);
} }
} }
} }
//friso_dic_free( friso->dic ); //friso_dic_free( friso->dic );

View File

@ -1,8 +1,8 @@
/* /*
* link list test programe. * link list test programe.
* *
* @author chenxin * @author chenxin
* @email chenxin619315@gmail.com * @email chenxin619315@gmail.com
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -13,12 +13,12 @@ int main( int argc, char **args ) {
friso_link_t link; friso_link_t link;
fstring keys[] = { fstring keys[] = {
"chenmanwen", "yangqinghua", "chenmanwen", "yangqinghua",
"chenxin", "luojiangyan", "xiaoyanzi", "bibi", "chenxin", "luojiangyan", "xiaoyanzi", "bibi",
"zhangrenfang", "yangjian", "zhangrenfang", "yangjian",
"liuxiao", "pankai", "liuxiao", "pankai",
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo", "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
"caizaili", "panpan", "xiaolude", "yintanwen" "caizaili", "panpan", "xiaolude", "yintanwen"
}; };
int j, len = sizeof( keys ) / sizeof( fstring ); int j, len = sizeof( keys ) / sizeof( fstring );
@ -28,15 +28,15 @@ int main( int argc, char **args ) {
printf("size=%d\n", link->size ); printf("size=%d\n", link->size );
for ( j = 0; j < len; j++ ) { for ( j = 0; j < len; j++ ) {
//link_add( link, keys[j] ); //link_add( link, keys[j] );
link_list_add_last( link, keys[j] ); link_list_add_last( link, keys[j] );
} }
printf("size=%d\n", link->size ); printf("size=%d\n", link->size );
for ( j = 0; j < len / 2; j++ ) { for ( j = 0; j < len / 2; j++ ) {
//printf("idx=%d, remove %s\n", j, ( fstring ) link_remove( link, 0 ) ); //printf("idx=%d, remove %s\n", j, ( fstring ) link_remove( link, 0 ) );
printf("idx=%d, remove %s\n", j, ( fstring ) link_list_remove_first( link ) ); printf("idx=%d, remove %s\n", j, ( fstring ) link_list_remove_first( link ) );
} }
printf("size=%d\n", link->size ); printf("size=%d\n", link->size );

View File

@ -11,7 +11,7 @@
int main ( int argc, char **args ) int main ( int argc, char **args )
{ {
fstring source = ",I am a chinese,,my name is chenxin,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057"; fstring source = ",I am a chinese,,my name is chenxin,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057";
char buffer[128]; char buffer[128];
string_split_t split = new_string_split(",", source ); string_split_t split = new_string_split(",", source );
@ -20,7 +20,7 @@ int main ( int argc, char **args )
printf("sst->delLen=%d\n", split->delLen); printf("sst->delLen=%d\n", split->delLen);
while ( string_split_next(split, buffer) != NULL) { while ( string_split_next(split, buffer) != NULL) {
printf("buffer:%s\n", buffer); printf("buffer:%s\n", buffer);
} }
free_string_split(split); free_string_split(split);

View File

@ -1,7 +1,7 @@
/* /*
* fstring handle mode test program. * fstring handle mode test program.
* *
* @author chenxin <chenxin619315@gmail.com> * @author chenxin <chenxin619315@gmail.com>
*/ */
#include "friso_API.h" #include "friso_API.h"
@ -20,13 +20,13 @@ int main( int argc, char **args ) {
for ( t = 0; t < length; t += bytes ) { for ( t = 0; t < length; t += bytes ) {
bytes = get_utf8_bytes( *(str + t) ); bytes = get_utf8_bytes( *(str + t) );
if ( bytes == 0 ) continue; if ( bytes == 0 ) continue;
for ( j = 0; j < bytes; j++ ) for ( j = 0; j < bytes; j++ )
word[j] = *(str + t + j ); word[j] = *(str + t + j );
word[j] = '\0'; word[j] = '\0';
string_buffer_append( sb, word ); string_buffer_append( sb, word );
printf("word=%s\n", word ); printf("word=%s\n", word );
} }
printf("length=%d, buffer=%s\n", sb->length, sb->buffer ); printf("length=%d, buffer=%s\n", sb->length, sb->buffer );