2024-09-01 17:13:03 +08:00
|
|
|
package ctokenizer
|
|
|
|
|
|
|
|
/*
|
|
|
|
#cgo pkg-config: milvus_core
|
|
|
|
#include <stdlib.h> // free
|
|
|
|
#include "segcore/tokenizer_c.h"
|
|
|
|
#include "segcore/token_stream_c.h"
|
|
|
|
*/
|
|
|
|
import "C"
|
|
|
|
|
|
|
|
import (
|
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"github.com/milvus-io/milvus/internal/util/tokenizerapi"
|
|
|
|
)
|
|
|
|
|
|
|
|
var _ tokenizerapi.Tokenizer = (*CTokenizer)(nil)
|
|
|
|
|
|
|
|
type CTokenizer struct {
|
|
|
|
ptr C.CTokenizer
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewCTokenizer(ptr C.CTokenizer) *CTokenizer {
|
|
|
|
return &CTokenizer{
|
|
|
|
ptr: ptr,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (impl *CTokenizer) NewTokenStream(text string) tokenizerapi.TokenStream {
|
|
|
|
cText := C.CString(text)
|
|
|
|
defer C.free(unsafe.Pointer(cText))
|
|
|
|
ptr := C.create_token_stream(impl.ptr, cText, (C.uint32_t)(len(text)))
|
|
|
|
return NewCTokenStream(ptr)
|
|
|
|
}
|
|
|
|
|
2024-11-06 17:48:24 +08:00
|
|
|
func (impl *CTokenizer) Clone() (tokenizerapi.Tokenizer, error) {
|
|
|
|
var newptr C.CTokenizer
|
|
|
|
status := C.clone_tokenizer(&impl.ptr, &newptr)
|
|
|
|
if err := HandleCStatus(&status, "failed to clone tokenizer"); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return NewCTokenizer(newptr), nil
|
|
|
|
}
|
|
|
|
|
2024-09-01 17:13:03 +08:00
|
|
|
func (impl *CTokenizer) Destroy() {
|
|
|
|
C.free_tokenizer(impl.ptr)
|
|
|
|
}
|