add php5 and php7 extension

This commit is contained in:
dongyado 2017-08-17 09:06:16 +08:00
parent 99f357479d
commit d9ed132c0d
56 changed files with 1037 additions and 0 deletions

470
vendors/binding/php5/friso.c vendored Normal file
View File

@ -0,0 +1,470 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_ini.h"
#include "ext/standard/info.h"
#include "php_friso.h"
#ifdef FRISO_WINNT
# define friso_default_conf_file "c:/windows/friso.ini"
#else
# define friso_default_conf_file "/etc/friso/friso.ini"
#endif
/* If you declare any globals in php_friso.h uncomment this:
ZEND_DECLARE_MODULE_GLOBALS(friso)
*/
zend_friso_globals friso_globals;
/* True global resources - no need for thread safety here */
static int le_friso = 1;
/* {{{ friso_functions[]
*
* Every user visible function must have an entry in friso_functions[].
*/
const zend_function_entry friso_functions[] = {
PHP_FE(friso_split, NULL)
PHP_FE(friso_version, NULL)
PHP_FE(friso_charset, NULL)
PHP_FE(friso_dic_exist, NULL)
PHP_FE(friso_dic_get, NULL)
PHP_FE(friso_utf8_bytes, NULL)
PHP_FE(friso_utf8_ucode, NULL)
PHP_FE(friso_ucode_utf8, NULL)
{NULL, NULL, NULL} /* Must be the last line in friso_functions[] */
};
/* }}} */
/* {{{ friso_module_entry
*/
zend_module_entry friso_module_entry = {
#if ZEND_MODULE_API_NO >= 20010901
STANDARD_MODULE_HEADER,
#endif
"friso",
friso_functions,
PHP_MINIT(friso),
PHP_MSHUTDOWN(friso),
PHP_RINIT(friso), /* Replace with NULL if there's nothing to do at request start */
PHP_RSHUTDOWN(friso), /* Replace with NULL if there's nothing to do at request end */
PHP_MINFO(friso),
#if ZEND_MODULE_API_NO >= 20010901
"0.1", /* Replace with version number for your extension */
#endif
STANDARD_MODULE_PROPERTIES
};
/* }}} */
#ifdef COMPILE_DL_FRISO
ZEND_GET_MODULE(friso)
#endif
/* {{{ PHP_INI
*/
PHP_INI_BEGIN()
PHP_INI_ENTRY("friso.ini_file", friso_default_conf_file, PHP_INI_SYSTEM, NULL)
PHP_INI_END()
/* }}} */
/* {{{ php_robbe_globals_construct */
static void php_friso_globals_construct(zend_friso_globals *friso_globals)
{
friso_globals->friso = friso_new();
friso_globals->config = friso_new_config();
friso_init_from_ifile(friso_globals->friso,
friso_globals->config, INI_STR("friso.ini_file"));
}
/* }}} */
/* {{{ php_robbe_globals_destruct*/
static void php_friso_globals_destruct(zend_friso_globals *friso_globals)
{
/*
* cause friso_free will free the dictionary
* so here we don't have to call the friso_dic_free to free the
* the robbe_dic global variable.
*/
//friso_dic_free( friso_globals->friso_dic );
//friso_globals->friso_dic = NULL;
friso_free_config( friso_globals->config );
friso_free( friso_globals->friso );
}
/* }}} */
#define FRISO_RET_WORD (1 << 0)
#define FRISO_RET_TYPE (1 << 1)
#define FRISO_RET_OFF (1 << 2)
#define FRISO_RET_LEN (1 << 3)
#define FRISO_RET_RLEN (1 << 4)
#define FRISO_RET_POS (1 << 5)
/* {{{ PHP_MINIT_FUNCTION
*/
PHP_MINIT_FUNCTION(friso)
{
/*
* register some contants that robbe may use
* at its following work.
* the constant is case sensitive and persitent.
*/
REGISTER_LONG_CONSTANT("FRISO_SIMPLE", __FRISO_SIMPLE_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_COMPLEX", __FRISO_COMPLEX_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_DETECT", __FRISO_DETECT_MODE__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_LEX_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_LEX_STOP", __LEX_STOPWORDS__, CONST_CS | CONST_PERSISTENT);
//return parts for rb_split.
REGISTER_LONG_CONSTANT("FRISO_RET_WORD", FRISO_RET_WORD, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_TYPE", FRISO_RET_TYPE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_OFF", FRISO_RET_OFF, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_LEN", FRISO_RET_LEN, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_RLEN", FRISO_RET_RLEN, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_RET_POS", FRISO_RET_POS, CONST_CS | CONST_PERSISTENT);
//lex type constants.
REGISTER_LONG_CONSTANT("FRISO_TYP_CJK", __LEX_CJK_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_ECM", __LEX_ECM_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_CEM", __LEX_CEM_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_EPUN", __LEX_ENPUN_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_PUN", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_UNK", __LEX_UNKNOW_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("FRISO_TYP_OTR", __LEX_OTHER_WORDS__, CONST_CS | CONST_PERSISTENT);
REGISTER_INI_ENTRIES();
/*initialize the globals variables.*/
php_friso_globals_construct( &friso_globals );
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MSHUTDOWN_FUNCTION
*/
PHP_MSHUTDOWN_FUNCTION(friso)
{
UNREGISTER_INI_ENTRIES();
/*destruct the globals variables*/
php_friso_globals_destruct( &friso_globals );
return SUCCESS;
}
/* }}} */
/* Remove if there's nothing to do at request start */
/* {{{ PHP_RINIT_FUNCTION
*/
PHP_RINIT_FUNCTION(friso)
{
return SUCCESS;
}
/* }}} */
/* Remove if there's nothing to do at request end */
/* {{{ PHP_RSHUTDOWN_FUNCTION
*/
PHP_RSHUTDOWN_FUNCTION(friso)
{
return SUCCESS;
}
/* }}} */
/* {{{ PHP_MINFO_FUNCTION
*/
PHP_MINFO_FUNCTION(friso)
{
php_info_print_table_start();
php_info_print_table_row(2, "Friso Support", "enabled");
php_info_print_table_row(2, "Version", FRISO_VERSION);
php_info_print_table_row(2, "Bug Report", "chenxin619315@gmail.com");
php_info_print_table_row(2, "Home page", "http://code.google.com/p/friso");
php_info_print_table_end();
DISPLAY_INI_ENTRIES();
}
/* }}} */
/* {{{ proto array friso_split(string str, long mode)
Return a array contains all the split result with a specified mode */
PHP_FUNCTION(friso_split)
{
char *_str = NULL, *_key;
int slen, idx, klen, rargs = 0;
int arg_count;
zval *ret, *cfg, **data;
//used for multiple item return.
zval *item;
HashTable *cfgArr;
HashPosition pointer;
friso_task_t task;
friso_config_t config = NULL, nconfig = NULL;
//get the arugments from the php layer.
arg_count = ZEND_NUM_ARGS();
switch ( arg_count )
{
case 2:
if ( zend_parse_parameters(arg_count TSRMLS_CC, "sz",
&_str, &slen, &cfg) == FAILURE ) return;
break;
case 3:
if (zend_parse_parameters( arg_count TSRMLS_CC, "szl",
&_str, &slen, &cfg, &rargs) == FAILURE ) return;
break;
default:
WRONG_PARAM_COUNT;
}
//make sure the RB_RET_WORD will be returned.
//rargs |= FRISO_RET_WORD;
//check and initialize the friso.
if ( Z_TYPE_P(cfg) != IS_NULL )
{
nconfig = friso_new_config();
memcpy(nconfig, friso_globals.config, sizeof(friso_config_entry));
//check the new setting.
cfgArr = Z_ARRVAL_P(cfg);
//zend_printf("array length: %d", zend_hash_num_elements(cfgArr));
for ( zend_hash_internal_pointer_reset_ex(cfgArr, &pointer);
zend_hash_get_current_data_ex(cfgArr, (void **)&data, &pointer) == SUCCESS;
zend_hash_move_forward_ex(cfgArr, &pointer) )
{
zend_hash_get_current_key_ex(cfgArr, &_key, &klen, NULL, 0, &pointer);
//zend_printf("key: %s, value: %d<br />", _key, (*data)->value.lval);
if ( strcmp(_key, "kpuncs") == 0 )
{
memcpy(nconfig->kpuncs, (*data)->value.str.val, (*data)->value.str.len);
nconfig->kpuncs[(*data)->value.str.len] = '\0';
}
else
{
//convert the data to long.
convert_to_long_ex(data);
if ( strcmp(_key, "max_len") == 0 )
nconfig->max_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "r_name") == 0 )
nconfig->r_name = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "mix_len") == 0 )
nconfig->mix_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "lna_len") == 0 )
nconfig->lna_len = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "add_syn") == 0 )
nconfig->add_syn = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "clr_stw") == 0 )
nconfig->clr_stw = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "add_syn") == 0 )
nconfig->add_syn = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "keep_urec") == 0 )
nconfig->keep_urec = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "spx_out") == 0 )
nconfig->spx_out = (ushort_t)(*data)->value.lval;
else if ( strcmp(_key, "nthreshold") == 0 )
nconfig->nthreshold = (uint_t) (*data)->value.lval;
else if ( strcmp(_key, "mode") == 0 )
friso_set_mode(nconfig, (friso_mode_t)((*data)->value.lval));
else if ( strcmp(_key, "en_sseg") == 0 )
nconfig->en_sseg = (ushort_t) (*data)->value.lval;
else if ( strcmp(_key, "st_minl") == 0 )
nconfig->st_minl = (ushort_t) (*data)->value.lval;
}
}
}
//initialize the array.
MAKE_STD_ZVAL( ret );
array_init( ret );
config = ( nconfig == NULL ) ? friso_globals.config : nconfig;
//create a new friso task.
task = friso_new_task();
idx = 0;
friso_set_text(task, _str);
while ( config->next_token( friso_globals.friso, config, task ) != NULL )
{
MAKE_STD_ZVAL(item);
array_init(item);
add_assoc_string(item, "word", task->token->word, 1);
//check the append of type
if ( (rargs & FRISO_RET_TYPE) != 0 )
add_assoc_long(item, "type", task->token->type);
if ( (rargs & FRISO_RET_LEN) != 0 )
add_assoc_long(item, "len", task->token->length);
if ( (rargs & FRISO_RET_RLEN) != 0 )
add_assoc_long(item, "rlen", task->token->rlen);
if ( (rargs & FRISO_RET_OFF) != 0 )
add_assoc_long(item, "off", task->token->offset);
if ( (rargs & FRISO_RET_POS) != 0 )
add_assoc_stringl(item, "pos", &task->token->pos, 1, 1);
//append the sub result.
add_index_zval( ret, idx++, item );
}
//free the friso task.
friso_free_task(task);
if ( nconfig != NULL ) friso_free_config(nconfig);
//RETURN_ZVAL( ret, 0, 0);
*( return_value ) = *( ret );
}
/* }}} */
/* {{{ proto string friso_version()
Return the current version of Friso. */
PHP_FUNCTION(friso_version)
{
RETURN_STRINGL(FRISO_VERSION, strlen(FRISO_VERSION), 1);
}
/* }}} */
/* {{{ proto string friso_charset()
Return the current charset of friso. */
PHP_FUNCTION(friso_charset)
{
char *charset = friso_globals.friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK";
RETURN_STRINGL(charset, strlen(charset), 1);
}
/* }}} */
/* {{{ proto bool friso_dic_exist(long type, string str)
Return a bool to confirm that the given str is a word in a specified dictionary. */
PHP_FUNCTION(friso_dic_exist)
{
char *word = NULL;
int wlen;
long type;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
return;
}
if ( friso_globals.friso->dic == NULL )
RETURN_BOOL(0);
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
type = __LEX_CJK_WORDS__;
wlen = friso_dic_match( friso_globals.friso->dic, type, word );
RETURN_BOOL(wlen);
}
/* }}} */
/* {{{ proto array friso_dic_get(long type, string str)
Return a array contains all the information of the given word.*/
PHP_FUNCTION(friso_dic_get)
{
char *word = NULL;
int wlen;
long type;
zval *entry;
lex_entry_t e;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &type, &word, &wlen) == FAILURE) {
return;
}
//check the dictionary
if ( friso_globals.friso->dic == NULL )
RETURN_BOOL(0);
MAKE_STD_ZVAL( entry );
array_init( entry );
if ( type < 0 || type >= __FRISO_LEXICON_LENGTH__ )
{
type = __LEX_CJK_WORDS__;
}
e = friso_dic_get( friso_globals.friso->dic, type, word );
if ( e != NULL )
{
add_assoc_long( entry, "length", e->length);
add_assoc_long( entry, "freq", e->fre );
*( return_value ) = * ( entry );
return;
}
RETURN_BOOL(0);
}
/* }}} */
/* {{{ proto long friso_utf8_bytes(string str)
Return the bytes that the utf-8 char takes.*/
PHP_FUNCTION(friso_utf8_bytes)
{
char *word = NULL;
int wlen, _bytes;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
return;
}
if ( word == NULL ) RETURN_LONG(0);
_bytes = get_utf8_bytes( word[0] );
RETURN_LONG(_bytes);
}
/* }}} */
/* {{{ proto long friso_utf8_ucode(string str)
Return the unicode of the given utf-8 char.*/
PHP_FUNCTION(friso_utf8_ucode)
{
char *word = NULL;
int wlen, _ucode;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &word, &wlen) == FAILURE) {
return;
}
_ucode = get_utf8_unicode( word );
RETURN_LONG(_ucode);
}
/* }}} */
/* {{{ proto string friso_ucode_utf8(long ucode)
Return char that the a unicode pointed to.*/
PHP_FUNCTION(friso_ucode_utf8)
{
unsigned long *ucode = NULL;
int _bytes;
char word[7];
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &ucode ) == FAILURE) {
return;
}
_bytes = unicode_to_utf8( ( size_t ) ucode, word );
word[_bytes] = '\0';
RETURN_STRINGL( word, _bytes, 1 );
}
/* }}} */
/* The previous line is meant for vim and emacs, so it can correctly fold and
unfold functions in source code. See the corresponding marks just before
function definition, where the functions purpose is also documented. Please
follow this convention for the convenience of others editing your code.
*/
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: noet sw=4 ts=4 fdm=marker
* vim<600: noet sw=4 ts=4
*/

70
vendors/binding/php5/php_friso.h vendored Normal file
View File

@ -0,0 +1,70 @@
#ifndef PHP_FRISO_H
#define PHP_FRISO_H
extern zend_module_entry friso_module_entry;
#define phpext_friso_ptr &friso_module_entry
#ifdef PHP_WIN32
# define PHP_FRISO_API __declspec(dllexport)
#elif defined(__GNUC__) && __GNUC__ >= 4
# define PHP_FRISO_API __attribute__ ((visibility("default")))
#else
# define PHP_FRISO_API
#endif
#ifdef ZTS
#include "TSRM.h"
#endif
#include <friso/friso_API.h>
#include <friso/friso.h>
PHP_MINIT_FUNCTION(friso);
PHP_MSHUTDOWN_FUNCTION(friso);
PHP_RINIT_FUNCTION(friso);
PHP_RSHUTDOWN_FUNCTION(friso);
PHP_MINFO_FUNCTION(friso);
PHP_FUNCTION(friso_split);
PHP_FUNCTION(friso_version);
PHP_FUNCTION(friso_charset);
PHP_FUNCTION(friso_dic_exist);
PHP_FUNCTION(friso_dic_get);
PHP_FUNCTION(friso_utf8_bytes);
PHP_FUNCTION(friso_utf8_ucode);
PHP_FUNCTION(friso_ucode_utf8);
/*
Declare any global variables you may need between the BEGIN
and END macros here:
ZEND_BEGIN_MODULE_GLOBALS(friso)
long global_value;
char *global_string;
ZEND_END_MODULE_GLOBALS(friso)
*/
typedef struct {
friso_t friso;
friso_config_t config;
} zend_friso_globals;
/* In every utility function you add that needs to use variables
in php_friso_globals, call TSRMLS_FETCH(); after declaring other
variables used by that function, or better yet, pass in TSRMLS_CC
after the last function argument and declare your utility function
with TSRMLS_DC after the last declared argument. Always refer to
the globals in your function as FRISO_G(variable). You are
encouraged to rename these macros something shorter, see
examples in any other php module directory.
*/
#ifdef ZTS
#define FRISO_G(v) TSRMG(friso_globals_id, zend_friso_globals *, v)
#else
#define FRISO_G(v) (friso_globals.v)
#endif
#endif /* PHP_FRISO_H */

1
vendors/binding/php7/CREDITS vendored Normal file
View File

@ -0,0 +1 @@
friso

63
vendors/binding/php7/config.m4 vendored Normal file
View File

@ -0,0 +1,63 @@
dnl $Id$
dnl config.m4 for extension friso
dnl Comments in this file start with the string 'dnl'.
dnl Remove where necessary. This file will not work
dnl without editing.
dnl If your extension references something external, use with:
dnl PHP_ARG_WITH(friso, for friso support,
dnl Make sure that the comment is aligned:
dnl [ --with-friso Include friso support])
dnl Otherwise use enable:
PHP_ARG_ENABLE(friso, whether to enable friso support,
Make sure that the comment is aligned:
[ --enable-friso Enable friso support])
if test "$PHP_FRISO" != "no"; then
dnl Write more examples of tests here...
dnl # --with-friso -> check with-path
dnl SEARCH_PATH="/usr/local /usr" # you might want to change this
dnl SEARCH_FOR="/include/friso.h" # you most likely want to change this
dnl if test -r $PHP_FRISO/$SEARCH_FOR; then # path given as parameter
dnl FRISO_DIR=$PHP_FRISO
dnl else # search default path list
dnl AC_MSG_CHECKING([for friso files in default path])
dnl for i in $SEARCH_PATH ; do
dnl if test -r $i/$SEARCH_FOR; then
dnl FRISO_DIR=$i
dnl AC_MSG_RESULT(found in $i)
dnl fi
dnl done
dnl fi
dnl
dnl if test -z "$FRISO_DIR"; then
dnl AC_MSG_RESULT([not found])
dnl AC_MSG_ERROR([Please reinstall the friso distribution])
dnl fi
dnl # --with-friso -> add include path
dnl PHP_ADD_INCLUDE($FRISO_DIR/include)
dnl # --with-friso -> check for lib and symbol presence
dnl LIBNAME=friso # you may want to change this
dnl LIBSYMBOL=friso # you most likely want to change this
dnl PHP_CHECK_LIBRARY($LIBNAME,$LIBSYMBOL,
dnl [
dnl PHP_ADD_LIBRARY_WITH_PATH($LIBNAME, $FRISO_DIR/lib, FRISO_SHARED_LIBADD)
dnl AC_DEFINE(HAVE_FRISOLIB,1,[ ])
dnl ],[
dnl AC_MSG_ERROR([wrong friso lib version or lib not found])
dnl ],[
dnl -L$FRISO_DIR/lib -lm
dnl ])
dnl
dnl PHP_SUBST(FRISO_SHARED_LIBADD)
PHP_NEW_EXTENSION(friso, friso.c, $ext_shared)
fi

13
vendors/binding/php7/config.w32 vendored Normal file
View File

@ -0,0 +1,13 @@
// $Id$
// vim:ft=javascript
// If your extension references something external, use ARG_WITH
// ARG_WITH("friso", "for friso support", "no");
// Otherwise, use ARG_ENABLE
// ARG_ENABLE("friso", "enable friso support", "no");
if (PHP_FRISO != "no") {
EXTENSION("friso", "friso.c");
}

72
vendors/binding/php7/demo/friso.fun.php vendored Normal file
View File

@ -0,0 +1,72 @@
<?php
header("Content-Type:text/html;charset=utf-8;");
echo "constant access:<br />";
echo "complex mode: ".FRISO_COMPLEX.", simple mode: ".FRISO_SIMPLE."<br />";
echo "friso_ucode_utf8(20013)=".friso_ucode_utf8(20013)."<br />";
echo "friso_utf8_ucode(中)=".friso_utf8_ucode("")."<br />";
echo "friso_utf8_bytes(中)=".friso_utf8_bytes("")."<p />";
echo "词库函数:<br />";
echo "friso_dic_exist(研究) ? ".friso_dic_exist(FRISO_LEX_CJK, "研究")."<br />";
$_entry = friso_dic_get(FRISO_LEX_CJK, "");
echo "friso_dic_get(你)<br />";
echo "|——length: ".$_entry["length"].", freq: ".$_entry["freq"]."<p />";
//version about.
echo "friso_version(): " , friso_version(), ", friso_charset(): ", friso_charset(), "<p />";
echo "分词函数:<br />";
if ( friso_charset() == 'UTF-8' )
{
$_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体x射线本质是什么今天去奇都ktv唱卡拉ok去哆啦a梦是一个动漫中的主角单位和全角: 2009年日开始大学之旅岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.";
echo "<p>friso_split(\"" . $_str . "\")<p />";
//API:
//rb_split(string, Array, [long])
//1.string: 要被切分的字符串。
//2.Array: 配置选项使用NULL来选择默认的配置(friso.ini中的配置)。
//3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
//1.完整的配置:
//array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
// 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX);
//1.在不了解friso内核的情况下, 请不要随便更改nthreshold
//2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
//2.返回选项:
//词条: FRISO_RET_WORD, 类别FRISO_RET_TYPE, 长度FRISO_RET_LENGTH, 真实长度FRISO_RET_RLEN, 偏移量FRISO_RET_OFF
//词性FRISO_RET_POS(待实现)
$_rargs = FRISO_RET_TYPE | FRISO_RET_LEN | FRISO_RET_RLEN | FRISO_RET_OFF | FRISO_RET_POS;
//$_rargs = 0;
//3.切分类别:
//CJK词条FRISO_TYP_CJK, 英中混合词(b超)FRISO_TYP_ECM中英混合词(卡拉ok)FRISO_TYP_CEM
//英文标点混合词(c++)FRISO_TYP_EPUN标点FRISO_TYP_PUN未知类别FRISO_TYP_UNK其他类别(同义词)FRISO_TYP_OTR
$_result = friso_split($_str, array('mode'=>FRISO_COMPLEX), $_rargs);
unset($_str);
foreach ( $_result as $_val )
{
$_str = $_val['word'];
if ( $_rargs != 0 ) {
$_str .= '[';
if ( ($_rargs & FRISO_RET_TYPE) != 0 )
$_str .= ', type: '.$_val['type']; //获取词条类别
if ( ($_rargs & FRISO_RET_LEN) != 0 )
$_str .= ', len: ' . $_val['len']; //词条长度
if ( ($_rargs & FRISO_RET_RLEN) != 0 )
$_str .= ', rlen: ' . $_val['rlen']; //词条真实长度
if ( ($_rargs & FRISO_RET_OFF) != 0 )
$_str .= ', off: ' . $_val['off']; //词条偏移量
if ( ($_rargs & FRISO_RET_POS) != 0 )
$_str .= ', pos: ' . $_val['pos']; //词条词性
$_str .= ']';
}
$_str .= '/&nbsp;&nbsp;&nbsp;';
echo $_str;
}
}
else echo "set charset to UTF-8 to test function friso_split.";
?>

153
vendors/binding/php7/demo/gbk.demo.php vendored Normal file
View File

@ -0,0 +1,153 @@
<?php
header('content-type:text/html;charset:GBK');
ini_set('magic_quotes_gpc', 0);
//check the charset
if ( friso_charset() != "GBK" ) {
$_str = "Error: GBK charset required. <br />";
$_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
$_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => FRISO_COMPLEX);
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
$text = &$_POST['text'];
$_cfg = &$_POST['config'];
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
$s_time = timer();
$_ret = friso_split($text, $_cfg);
$_timer = timer() - $s_time;
}
function timer() {
list($msec, $sec) = explode(' ', microtime());
return ((float)$msec + (float)$sec);
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>GBK - robbe分词测试程序 </title>
<meta http-equiv="content-type" content="text/html;charset=GBK" />
<style type="text/css">
#box {width: 1000px}
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
color: #555;font-size: 14px;}
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;}
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;}
.link-item {float: left;font-size: 14px;font-weight: bold;
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
#cfg-box {margin-bottom: 10px;}
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
color:#555; outline: none;}
</style>
</head>
<body>
<div id="box">
<form name="robbe" method="post" action="gbk.demo.php">
<div class="title-item">分词配置:</div>
<div id="cfg-box">
<div>
<label>最大词长: </label>
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
</div>
<div>
<label>混合词中文词长: </label>
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
</div>
<div>
<label>英文二次切分: </label>
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>二次切分子Token最小长度: </label>
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
</div>
<div>
<label>英文Token中保留的标点: </label>
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
</div>
<div>
<label>同义词追加: </label>
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>过滤停止词: </label>
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>保留未识别词: </label>
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>sphinx定制输出: </label>
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>分词模式: </label>
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
</div>
</div>
<div class="title-item">分词内容:</div>
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
<input type="hidden" name="_act" value="split"/>
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
</form>
<?php
if ( $_act == 'split' ) {
?>
<div class="title-item">分词结果:</div>
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
</textarea></div>
<div class="link-box"><a id="info-link">
<?php
$len = strlen($text);
if ( $len >= 1048576 ) {
echo substr(($len/1048576), 0, 6).'MB';
} else if ( $len >= 1024 ) {
echo substr( ($len / 1024), 0, 6).'KB';
} else {
echo $len.'B';
}
?>
&nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec
</a></div>
<?php
}
?>
</div>
<script type="text/javascript">
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
function do_submit() {
var text = document.getElementById('text');
if ( text.value.trim() == '' ) return;
document.robbe.submit();
}
</script>
</body>

153
vendors/binding/php7/demo/utf8.demo.php vendored Normal file
View File

@ -0,0 +1,153 @@
<?php
header('content-type:text/html;charset:utf-8');
ini_set('magic_quotes_gpc', 0);
//charset check.
if ( friso_charset() != "UTF-8" ) {
$_str = "Error: UTF-8 charset required. <br />";
$_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
$_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => FRISO_COMPLEX);
if ( isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split' ) {
$text = &$_POST['text'];
$_cfg = &$_POST['config'];
if ( ! isset($_cfg['add_syn']) ) $_cfg['add_syn'] = 0;
if ( ! isset($_cfg['clr_stw']) ) $_cfg['clr_stw'] = 0;
if ( ! isset($_cfg['keep_urec']) ) $_cfg['keep_urec'] = 0;
if ( ! isset($_cfg['spx_out']) ) $_cfg['spx_out'] = 0;
if ( ! isset($_cfg['en_sseg']) ) $_cfg['en_sseg'] = 0;
$s_time = timer();
$_ret = friso_split($text, $_cfg);
$_timer = timer() - $s_time;
}
function timer() {
list($msec, $sec) = explode(' ', microtime());
return ((float)$msec + (float)$sec);
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>UTF8 - robbe分词测试程序</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8" />
<style type="text/css">
#box {width: 1000px}
.input-text {border: 1px solid #CCC;width: 1000px;height: 180px;background-color: #FFF;
color: #555;font-size: 14px;}
.link-box {overflow: hidden;zoom:1;padding-top:10px;}
#submit-link {float:right;width:150px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;}
#info-link {float:right;width:300px;height: 26px;line-height: 26px;
background-color: #A50100;color: #FFF;font-weight: bold;text-align: center;
text-decoration: none;font-size: 14px;}
.link-item {float: left;font-size: 14px;font-weight: bold;
height: 26px;line-height: 26px;width: 100px;color: #A50100;}
.title-item {height:30px;line-height: 30px;font-size: 14px;font-weight: bold;}
#cfg-box {margin-bottom: 10px;}
#cfg-box div {overflow: hidden;zoom:1;color:#555;font-size:12px;}
#cfg-box div label {float: left;width: 160px;height: 26px;line-height:26px;text-align:right;
padding-right:10px;font-size:12px;font-weight:bold;color:#555;}
.input {border: 1px solid #DDD;height: 18px;line-height: 18px;padding-left: 5px;width: 120px;
color:#555; outline: none;}
</style>
</head>
<body>
<div id="box">
<form name="robbe" method="post" action="utf8.demo.php">
<div class="title-item">分词配置:</div>
<div id="cfg-box">
<div>
<label>最大词长: </label>
<input type="text" name="config[max_len]" value="<?=isset($_cfg['max_len'])?$_cfg['max_len']:5?>" class="input" />
</div>
<div>
<label>混合词中文词长: </label>
<input type="text" name="config[mix_len]" value="<?=isset($_cfg['mix_len'])?$_cfg['mix_len']:2?>" class="input" />
</div>
<div>
<label>英文二次切分: </label>
<input type="checkbox" name="config[en_sseg]" <?=isset($_cfg['en_sseg'])&&$_cfg['en_sseg']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>二次切分子Token最小长度: </label>
<input type="text" name="config[st_minl]" value="<?=isset($_cfg['st_minl'])?$_cfg['st_minl']:2?>" class="input" />
</div>
<div>
<label>英文Token中保留的标点: </label>
<input type="text" name="config[kpuncs]" value="<?=isset($_cfg['kpuncs'])?$_cfg['kpuncs']:'@%.#&+'?>" class="input" />
</div>
<div>
<label>同义词追加: </label>
<input type="checkbox" name="config[add_syn]" <?=isset($_cfg['add_syn'])&&$_cfg['add_syn']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>过滤停止词: </label>
<input type="checkbox" name="config[clr_stw]" <?=isset($_cfg['clr_stw'])&&$_cfg['clr_stw']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>保留未识别词: </label>
<input type="checkbox" name="config[keep_urec]" <?=isset($_cfg['keep_urec'])&&$_cfg['keep_urec']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>sphinx定制输出: </label>
<input type="checkbox" name="config[spx_out]" <?=isset($_cfg['spx_out'])&&$_cfg['spx_out']==1?'checked="checked"':''?> value="1" />
</div>
<div>
<label>分词模式: </label>
<input type="radio" name="config[mode]" value="<?=RB_SMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==1?'checked="checked"':''?> />简易模式
<input type="radio" name="config[mode]" value="<?=RB_CMODE?>" <?=isset($_cfg['mode'])&&$_cfg['mode']==2?'checked="checked"':''?> />复杂模式
</div>
</div>
<div class="title-item">分词内容:</div>
<div class="r-item"><textarea name="text" class="input-text" id="text"><?=$text?></textarea></div>
<input type="hidden" name="_act" value="split"/>
<a href="javascript:;" onclick="do_submit();return false;" id="submit-link">robbe分词</a>
</form>
<?php
if ( $_act == 'split' ) {
?>
<div class="title-item">分词结果:</div>
<div><textarea class="input-text"><?php foreach ( $_ret as $_val ) echo $_val['word'].' ';?>
</textarea></div>
<div class="link-box"><a id="info-link">
<?php
$len = strlen($text);
if ( $len >= 1048576 ) {
echo substr(($len/1048576), 0, 6).'MB';
} else if ( $len >= 1024 ) {
echo substr( ($len / 1024), 0, 6).'KB';
} else {
echo $len.'B';
}
?>
&nbsp;&nbsp;&nbsp;<?php printf("%.5f", $_timer)?>sec
</a></div>
<?php
}
?>
</div>
<script type="text/javascript">
String.prototype.trim = function() {return this.replace(/^\s+|\s+$/g, '');}
function do_submit() {
var text = document.getElementById('text');
if ( text.value.trim() == '' ) return;
document.robbe.submit();
}
</script>
</body>

21
vendors/binding/php7/friso.php vendored Normal file
View File

@ -0,0 +1,21 @@
<?php
$br = (php_sapi_name() == "cli")? "":"<br>";
if(!extension_loaded('friso')) {
dl('friso.' . PHP_SHLIB_SUFFIX);
}
$module = 'friso';
$functions = get_extension_funcs($module);
echo "Functions available in the test extension:$br\n";
foreach($functions as $func) {
echo $func."$br\n";
}
echo "$br\n";
$function = 'confirm_' . $module . '_compiled';
if (extension_loaded($module)) {
$str = $function($module);
} else {
$str = "Module $module is not compiled into PHP";
}
echo "$str\n";
?>

0
vendors/binding/php7/mkinstalldirs vendored Normal file
View File

21
vendors/binding/php7/tests/001.phpt vendored Normal file
View File

@ -0,0 +1,21 @@
--TEST--
Check for friso presence
--SKIPIF--
<?php if (!extension_loaded("friso")) print "skip"; ?>
--FILE--
<?php
echo "friso extension is available";
/*
you can add regression tests for your extension here
the output of your test code has to be equal to the
text in the --EXPECT-- section below for the tests
to pass, differences between the output and the
expected text are interpreted as failure
see php5/README.TESTING for further information on
writing regression tests
*/
?>
--EXPECT--
friso extension is available