enhance: optimize self defined rust error (#37975)

Prepare for issue: https://github.com/milvus-io/milvus/issues/37930

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2024-11-28 20:30:36 +08:00 committed by GitHub
parent 84698c072a
commit e6af806a0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 262 additions and 203 deletions

View File

@ -1,40 +1,35 @@
use core::fmt;
use serde_json as json;
#[derive(Debug)]
pub struct TantivyError{
reason: String,
pub enum TantivyBindingError {
JsonError(serde_json::Error),
InternalError(String),
}
impl TantivyError{
fn new(reason:String) -> Self{
TantivyError{reason:reason}
}
pub fn reason(&self) -> String{
return self.reason.clone()
impl From<serde_json::Error> for TantivyBindingError {
fn from(value: serde_json::Error) -> Self {
TantivyBindingError::JsonError(value)
}
}
impl From<&str> for TantivyError{
fn from(value: &str) -> Self {
Self::new(value.to_string())
impl fmt::Display for TantivyBindingError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TantivyBindingError::JsonError(e) => write!(f, "JsonError: {}", e),
TantivyBindingError::InternalError(e) => write!(f, "InternalError: {}", e),
}
}
}
impl From<String> for TantivyError{
fn from(value: String) -> Self {
Self::new(value)
impl std::error::Error for TantivyBindingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
TantivyBindingError::JsonError(e) => Some(e),
TantivyBindingError::InternalError(_) => None,
}
}
}
impl From<json::Error> for TantivyError{
fn from(value: json::Error) -> Self {
Self::new(value.to_string())
}
}
impl ToString for TantivyError{
fn to_string(&self) -> String {
return self.reason()
}
}
pub type Result<T> = std::result::Result<T, TantivyBindingError>;

View File

@ -1,113 +1,123 @@
use log::warn;
use std::collections::HashMap;
use tantivy::tokenizer::*;
use tantivy::tokenizer::StopWordFilter;
use serde_json as json;
use std::collections::HashMap;
use tantivy::tokenizer::StopWordFilter;
use tantivy::tokenizer::*;
use crate::error::Result;
use crate::error::TantivyBindingError;
use crate::jieba_tokenizer::JiebaTokenizer;
use crate::stop_words;
use crate::tokenizer_filter::*;
use crate::jieba_tokenizer::JiebaTokenizer;
use crate::error::TantivyError;
use crate::util::*;
// default build-in analyzer
pub(crate) fn standard_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
let builder = standard_builder()
.filter(LowerCaser);
let builder = standard_builder().filter(LowerCaser);
if stop_words.len() > 0{
if stop_words.len() > 0 {
return builder.filter(StopWordFilter::remove(stop_words)).build();
}
builder.build()
}
fn chinese_analyzer(stop_words: Vec<String>) -> TextAnalyzer{
fn chinese_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
let builder = jieba_builder().filter(CnAlphaNumOnlyFilter);
if stop_words.len() > 0{
if stop_words.len() > 0 {
return builder.filter(StopWordFilter::remove(stop_words)).build();
}
builder.build()
}
fn english_analyzer(stop_words: Vec<String>) -> TextAnalyzer{
fn english_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
let builder = standard_builder()
.filter(LowerCaser)
.filter(Stemmer::new(Language::English))
.filter(StopWordFilter::remove(stop_words::ENGLISH.iter().map(|&word| word.to_owned())));
.filter(StopWordFilter::remove(
stop_words::ENGLISH.iter().map(|&word| word.to_owned()),
));
if stop_words.len() > 0{
if stop_words.len() > 0 {
return builder.filter(StopWordFilter::remove(stop_words)).build();
}
builder.build()
}
fn standard_builder() -> TextAnalyzerBuilder{
fn standard_builder() -> TextAnalyzerBuilder {
TextAnalyzer::builder(SimpleTokenizer::default()).dynamic()
}
fn whitespace_builder()-> TextAnalyzerBuilder{
fn whitespace_builder() -> TextAnalyzerBuilder {
TextAnalyzer::builder(WhitespaceTokenizer::default()).dynamic()
}
fn jieba_builder() -> TextAnalyzerBuilder{
fn jieba_builder() -> TextAnalyzerBuilder {
TextAnalyzer::builder(JiebaTokenizer::new()).dynamic()
}
fn get_builder_by_name(name:&String) -> Result<TextAnalyzerBuilder, TantivyError>{
fn get_builder_by_name(name: &String) -> Result<TextAnalyzerBuilder> {
match name.as_str() {
"standard" => Ok(standard_builder()),
"whitespace" => Ok(whitespace_builder()),
"jieba" => Ok(jieba_builder()),
other => {
warn!("unsupported tokenizer: {}", other);
Err(format!("unsupported tokenizer: {}", other).into())
Err(TantivyBindingError::InternalError(format!(
"unsupported tokenizer: {}",
other
)))
}
}
}
struct AnalyzerBuilder<'a>{
struct AnalyzerBuilder<'a> {
// builder: TextAnalyzerBuilder
filters:HashMap<String, SystemFilter>,
params:&'a json::Map<String, json::Value>
filters: HashMap<String, SystemFilter>,
params: &'a json::Map<String, json::Value>,
}
impl AnalyzerBuilder<'_>{
fn new(params: &json::Map<String, json::Value>) -> AnalyzerBuilder{
AnalyzerBuilder{
impl AnalyzerBuilder<'_> {
fn new(params: &json::Map<String, json::Value>) -> AnalyzerBuilder {
AnalyzerBuilder {
filters: HashMap::new(),
params:params,
params: params,
}
}
fn get_tokenizer_name(&self) -> Result<String, TantivyError>{
let tokenizer=self.params.get("tokenizer");
if tokenizer.is_none(){
fn get_tokenizer_name(&self) -> Result<String> {
let tokenizer = self.params.get("tokenizer");
if tokenizer.is_none() {
return Ok("standard".to_string());
}
if !tokenizer.unwrap().is_string(){
return Err(format!("tokenizer name should be string").into());
if !tokenizer.unwrap().is_string() {
return Err(TantivyBindingError::InternalError(format!(
"tokenizer name should be string"
)));
}
Ok(tokenizer.unwrap().as_str().unwrap().to_string())
}
fn add_custom_filter(&mut self, name: &String, params: &json::Map<String, json::Value>) -> Result<(),TantivyError>{
match SystemFilter::try_from(params){
fn add_custom_filter(
&mut self,
name: &String,
params: &json::Map<String, json::Value>,
) -> Result<()> {
match SystemFilter::try_from(params) {
Ok(filter) => {
self.filters.insert(name.to_string(), filter);
Ok(())
},
Err(e) => {Err(e)},
}
Err(e) => Err(e),
}
}
fn add_custom_filters(&mut self, params:&json::Map<String, json::Value>) -> Result<(),TantivyError>{
for (name, value) in params{
if !value.is_object(){
fn add_custom_filters(&mut self, params: &json::Map<String, json::Value>) -> Result<()> {
for (name, value) in params {
if !value.is_object() {
continue;
}
self.add_custom_filter(name, value.as_object().unwrap())?;
@ -115,18 +125,24 @@ impl AnalyzerBuilder<'_>{
Ok(())
}
fn build_filter(&mut self,mut builder: TextAnalyzerBuilder, params: &json::Value) -> Result<TextAnalyzerBuilder, TantivyError>{
if !params.is_array(){
return Err("filter params should be array".into());
fn build_filter(
&mut self,
mut builder: TextAnalyzerBuilder,
params: &json::Value,
) -> Result<TextAnalyzerBuilder> {
if !params.is_array() {
return Err(TantivyBindingError::InternalError(
"filter params should be array".to_string(),
));
}
let filters = params.as_array().unwrap();
for filter in filters{
if filter.is_string(){
for filter in filters {
if filter.is_string() {
let filter_name = filter.as_str().unwrap();
let costum = self.filters.remove(filter_name);
if !costum.is_none(){
if !costum.is_none() {
builder = costum.unwrap().transform(builder);
continue;
}
@ -135,75 +151,82 @@ impl AnalyzerBuilder<'_>{
let system = SystemFilter::from(filter_name);
match system {
SystemFilter::Invalid => {
return Err(format!("build analyzer failed, filter not found :{}", filter_name).into())
return Err(TantivyBindingError::InternalError(format!(
"build analyzer failed, filter not found :{}",
filter_name
)))
}
other => {
builder = other.transform(builder);
},
}
}
}else if filter.is_object(){
let filter=SystemFilter::try_from(filter.as_object().unwrap())?;
} else if filter.is_object() {
let filter = SystemFilter::try_from(filter.as_object().unwrap())?;
builder = filter.transform(builder);
}
};
Ok(builder)
}
fn build_option(&mut self, mut builder: TextAnalyzerBuilder) -> Result<TextAnalyzerBuilder, TantivyError>{
for (key, value) in self.params{
match key.as_str(){
"tokenizer" => {},
"filter" => {
// build with filter if filter param exist
builder=self.build_filter(builder, value)?;
},
other => return Err(format!("unknown analyzer option key: {}", other).into()),
}
}
Ok(builder)
}
fn get_stop_words_option(&self) -> Result<Vec<String>, TantivyError>{
fn build_option(&mut self, mut builder: TextAnalyzerBuilder) -> Result<TextAnalyzerBuilder> {
for (key, value) in self.params {
match key.as_str() {
"tokenizer" => {}
"filter" => {
// build with filter if filter param exist
builder = self.build_filter(builder, value)?;
}
other => {
return Err(TantivyBindingError::InternalError(format!(
"unknown analyzer option key: {}",
other
)))
}
}
}
Ok(builder)
}
fn get_stop_words_option(&self) -> Result<Vec<String>> {
let value = self.params.get("stop_words");
match value{
Some(value)=>{
match value {
Some(value) => {
let str_list = get_string_list(value, "filter stop_words")?;
Ok(get_stop_words_list(str_list))
}
None => Ok(vec![])
None => Ok(vec![]),
}
}
fn build_template(self, type_: &str)-> Result<TextAnalyzer, TantivyError>{
match type_{
"standard" => {
Ok(standard_analyzer(self.get_stop_words_option()?))
},
"chinese" => {
Ok(chinese_analyzer(self.get_stop_words_option()?))
},
"english" => {
Ok(english_analyzer(self.get_stop_words_option()?))
}
other_ => Err(format!("unknown build-in analyzer type: {}", other_).into())
fn build_template(self, type_: &str) -> Result<TextAnalyzer> {
match type_ {
"standard" => Ok(standard_analyzer(self.get_stop_words_option()?)),
"chinese" => Ok(chinese_analyzer(self.get_stop_words_option()?)),
"english" => Ok(english_analyzer(self.get_stop_words_option()?)),
other_ => Err(TantivyBindingError::InternalError(format!(
"unknown build-in analyzer type: {}",
other_
))),
}
}
fn build(mut self) -> Result<TextAnalyzer, TantivyError>{
fn build(mut self) -> Result<TextAnalyzer> {
// build base build-in analyzer
match self.params.get("type"){
Some(type_) =>{
if !type_.is_string(){
return Err(format!("analyzer type shoud be string").into())
match self.params.get("type") {
Some(type_) => {
if !type_.is_string() {
return Err(TantivyBindingError::InternalError(format!(
"analyzer type shoud be string"
)));
}
return self.build_template(type_.as_str().unwrap());
},
}
None => {}
};
//build custom analyzer
let tokenizer_name = self.get_tokenizer_name()?;
let mut builder=get_builder_by_name(&tokenizer_name)?;
let mut builder = get_builder_by_name(&tokenizer_name)?;
// build with option
builder = self.build_option(builder)?;
@ -211,42 +234,46 @@ impl AnalyzerBuilder<'_>{
}
}
pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result<TextAnalyzer, TantivyError> {
match json::from_str::<json::Value>(&params){
Ok(value) =>{
if value.is_null(){
pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result<TextAnalyzer> {
match json::from_str::<json::Value>(&params) {
Ok(value) => {
if value.is_null() {
return Ok(standard_analyzer(vec![]));
}
if !value.is_object(){
return Err("tokenizer params should be a json map".into());
if !value.is_object() {
return Err(TantivyBindingError::InternalError(
"tokenizer params should be a json map".to_string(),
));
}
let json_params = value.as_object().unwrap();
// create builder
let analyzer_params=json_params.get("analyzer");
if analyzer_params.is_none(){
let analyzer_params = json_params.get("analyzer");
if analyzer_params.is_none() {
return Ok(standard_analyzer(vec![]));
}
if !analyzer_params.unwrap().is_object(){
return Err("analyzer params should be a json map".into());
if !analyzer_params.unwrap().is_object() {
return Err(TantivyBindingError::InternalError(
"analyzer params should be a json map".to_string(),
));
}
let mut builder = AnalyzerBuilder::new(analyzer_params.unwrap().as_object().unwrap());
// build custom filter
let filter_params=json_params.get("filter");
if !filter_params.is_none() && filter_params.unwrap().is_object(){
let filter_params = json_params.get("filter");
if !filter_params.is_none() && filter_params.unwrap().is_object() {
builder.add_custom_filters(filter_params.unwrap().as_object().unwrap())?;
}
// build analyzer
builder.build()
},
}
Err(err) => Err(err.into()),
}
}
pub(crate) fn create_tokenizer(params: &String) -> Result<TextAnalyzer, TantivyError> {
if params.len()==0{
pub(crate) fn create_tokenizer(params: &String) -> Result<TextAnalyzer> {
if params.len() == 0 {
return Ok(standard_analyzer(vec![]));
}
create_tokenizer_with_filter(&format!("{{\"analyzer\":{}}}", params))
@ -265,7 +292,7 @@ mod tests {
}"#;
let tokenizer = create_tokenizer(&params.to_string());
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap().reason());
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
}
#[test]
@ -275,17 +302,16 @@ mod tests {
}"#;
let tokenizer = create_tokenizer(&params.to_string());
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap().reason());
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();
let mut stream = bining.token_stream("系统安全;,'';lxyz密码");
let mut results = Vec::<String>::new();
while stream.advance(){
while stream.advance() {
let token = stream.token();
results.push(token.text.clone());
}
print!("test tokens :{:?}\n", results)
}
}

View File

@ -1,11 +1,12 @@
use tantivy::tokenizer::*;
use serde_json as json;
use regex;
use serde_json as json;
use tantivy::tokenizer::*;
use crate::error::TantivyError;
use crate::error::Result;
use crate::error::TantivyBindingError;
use crate::util::*;
pub(crate) enum SystemFilter{
pub(crate) enum SystemFilter {
Invalid,
LowerCase(LowerCaser),
AsciiFolding(AsciiFoldingFilter),
@ -15,16 +16,16 @@ pub(crate) enum SystemFilter{
Length(RemoveLongFilter),
Stop(StopWordFilter),
Decompounder(SplitCompoundWords),
Stemmer(Stemmer)
Stemmer(Stemmer),
}
impl SystemFilter{
pub(crate) fn transform(self, builder: TextAnalyzerBuilder) -> TextAnalyzerBuilder{
match self{
impl SystemFilter {
pub(crate) fn transform(self, builder: TextAnalyzerBuilder) -> TextAnalyzerBuilder {
match self {
Self::LowerCase(filter) => builder.filter(filter).dynamic(),
Self::AsciiFolding(filter) => builder.filter(filter).dynamic(),
Self::AlphaNumOnly(filter) => builder.filter(filter).dynamic(),
Self::CnCharOnly(filter) => builder.filter(filter).dynamic(),
Self::CnCharOnly(filter) => builder.filter(filter).dynamic(),
Self::CnAlphaNumOnly(filter) => builder.filter(filter).dynamic(),
Self::Length(filter) => builder.filter(filter).dynamic(),
Self::Stop(filter) => builder.filter(filter).dynamic(),
@ -41,65 +42,85 @@ impl SystemFilter{
// "max": 10, // length
// }
// TODO support min length
fn get_length_filter(params: &json::Map<String, json::Value>) -> Result<SystemFilter, TantivyError>{
fn get_length_filter(params: &json::Map<String, json::Value>) -> Result<SystemFilter> {
let limit_str = params.get("max");
if limit_str.is_none() || !limit_str.unwrap().is_u64(){
return Err("lenth max param was none or not uint".into())
if limit_str.is_none() || !limit_str.unwrap().is_u64() {
return Err(TantivyBindingError::InternalError(
"lenth max param was none or not uint".to_string(),
));
}
let limit = limit_str.unwrap().as_u64().unwrap() as usize;
Ok(SystemFilter::Length(RemoveLongFilter::limit(limit+1)))
Ok(SystemFilter::Length(RemoveLongFilter::limit(limit + 1)))
}
fn get_stop_words_filter(params: &json::Map<String, json::Value>)-> Result<SystemFilter, TantivyError>{
fn get_stop_words_filter(params: &json::Map<String, json::Value>) -> Result<SystemFilter> {
let value = params.get("stop_words");
if value.is_none(){
return Err("stop filter stop_words can't be empty".into());
if value.is_none() {
return Err(TantivyBindingError::InternalError(
"stop filter stop_words can't be empty".to_string(),
));
}
let str_list = get_string_list(value.unwrap(), "stop_words filter")?;
Ok(SystemFilter::Stop(StopWordFilter::remove(get_stop_words_list(str_list))))
Ok(SystemFilter::Stop(StopWordFilter::remove(
get_stop_words_list(str_list),
)))
}
fn get_decompounder_filter(params: &json::Map<String, json::Value>)-> Result<SystemFilter, TantivyError>{
fn get_decompounder_filter(params: &json::Map<String, json::Value>) -> Result<SystemFilter> {
let value = params.get("word_list");
if value.is_none() || !value.unwrap().is_array(){
return Err("decompounder word list should be array".into())
if value.is_none() || !value.unwrap().is_array() {
return Err(TantivyBindingError::InternalError(
"decompounder word list should be array".to_string(),
));
}
let stop_words = value.unwrap().as_array().unwrap();
let mut str_list = Vec::<String>::new();
for element in stop_words{
match element.as_str(){
for element in stop_words {
match element.as_str() {
Some(word) => str_list.push(word.to_string()),
None => return Err("decompounder word list item should be string".into())
None => {
return Err(TantivyBindingError::InternalError(
"decompounder word list item should be string".to_string(),
))
}
}
};
}
match SplitCompoundWords::from_dictionary(str_list){
match SplitCompoundWords::from_dictionary(str_list) {
Ok(f) => Ok(SystemFilter::Decompounder(f)),
Err(e) => Err(format!("create decompounder failed: {}", e.to_string()).into())
Err(e) => Err(TantivyBindingError::InternalError(format!(
"create decompounder failed: {}",
e.to_string()
))),
}
}
fn get_stemmer_filter(params: &json::Map<String, json::Value>)-> Result<SystemFilter, TantivyError>{
fn get_stemmer_filter(params: &json::Map<String, json::Value>) -> Result<SystemFilter> {
let value = params.get("language");
if value.is_none() || !value.unwrap().is_string(){
return Err("stemmer language field should be string".into())
if value.is_none() || !value.unwrap().is_string() {
return Err(TantivyBindingError::InternalError(
"stemmer language field should be string".to_string(),
));
}
match value.unwrap().as_str().unwrap().into_language(){
match value.unwrap().as_str().unwrap().into_language() {
Ok(language) => Ok(SystemFilter::Stemmer(Stemmer::new(language))),
Err(e) => Err(format!("create stemmer failed : {}", e.to_string()).into()),
Err(e) => Err(TantivyBindingError::InternalError(format!(
"create stemmer failed : {}",
e.to_string()
))),
}
}
trait LanguageParser {
type Error;
fn into_language(self) -> Result<Language, Self::Error>;
fn into_language(self) -> Result<Language>;
}
impl LanguageParser for &str {
type Error = TantivyError;
fn into_language(self) -> Result<Language, Self::Error> {
type Error = TantivyBindingError;
fn into_language(self) -> Result<Language> {
match self.to_lowercase().as_str() {
"arabig" => Ok(Language::Arabic),
"danish" => Ok(Language::Danish),
@ -119,14 +140,17 @@ impl LanguageParser for &str {
"swedish" => Ok(Language::Swedish),
"tamil" => Ok(Language::Tamil),
"turkish" => Ok(Language::Turkish),
other => Err(format!("unsupport language: {}", other).into()),
other => Err(TantivyBindingError::InternalError(format!(
"unsupport language: {}",
other
))),
}
}
}
impl From<&str> for SystemFilter{
impl From<&str> for SystemFilter {
fn from(value: &str) -> Self {
match value{
match value {
"lowercase" => Self::LowerCase(LowerCaser),
"asciifolding" => Self::AsciiFolding(AsciiFoldingFilter),
"alphanumonly" => Self::AlphaNumOnly(AlphaNumOnlyFilter),
@ -138,24 +162,31 @@ impl From<&str> for SystemFilter{
}
impl TryFrom<&json::Map<String, json::Value>> for SystemFilter {
type Error = TantivyError;
type Error = TantivyBindingError;
fn try_from(params: &json::Map<String, json::Value>) -> Result<Self, Self::Error> {
match params.get(&"type".to_string()){
Some(value) =>{
if !value.is_string(){
return Err("filter type should be string".into());
fn try_from(params: &json::Map<String, json::Value>) -> Result<Self> {
match params.get(&"type".to_string()) {
Some(value) => {
if !value.is_string() {
return Err(TantivyBindingError::InternalError(
"filter type should be string".to_string(),
));
};
match value.as_str().unwrap(){
match value.as_str().unwrap() {
"length" => get_length_filter(params),
"stop" => get_stop_words_filter(params),
"decompounder" => get_decompounder_filter(params),
"stemmer" => get_stemmer_filter(params),
other=> Err(format!("unsupport filter type: {}", other).into()),
other => Err(TantivyBindingError::InternalError(format!(
"unsupport filter type: {}",
other
))),
}
}
None => Err("no type field in filter params".into()),
None => Err(TantivyBindingError::InternalError(
"no type field in filter params".to_string(),
)),
}
}
}
@ -167,7 +198,7 @@ pub struct CnCharOnlyFilterStream<T> {
tail: T,
}
impl TokenFilter for CnCharOnlyFilter{
impl TokenFilter for CnCharOnlyFilter {
type Tokenizer<T: Tokenizer> = CnCharOnlyFilterWrapper<T>;
fn transform<T: Tokenizer>(self, tokenizer: T) -> CnCharOnlyFilterWrapper<T> {
@ -216,7 +247,7 @@ pub struct CnAlphaNumOnlyFilterStream<T> {
tail: T,
}
impl TokenFilter for CnAlphaNumOnlyFilter{
impl TokenFilter for CnAlphaNumOnlyFilter {
type Tokenizer<T: Tokenizer> = CnAlphaNumOnlyFilterWrapper<T>;
fn transform<T: Tokenizer>(self, tokenizer: T) -> CnAlphaNumOnlyFilterWrapper<T> {

View File

@ -1,10 +1,11 @@
use serde_json as json;
use std::ffi::c_void;
use std::ops::Bound;
use serde_json as json;
use tantivy::{directory::MmapDirectory, Index};
use crate::error::Result;
use crate::error::TantivyBindingError;
use crate::stop_words;
use crate::error::TantivyError;
pub fn index_exist(path: &str) -> bool {
let dir = MmapDirectory::open(path).unwrap();
@ -32,29 +33,35 @@ pub fn free_binding<T>(ptr: *mut c_void) {
}
}
pub(crate) fn get_string_list(value: &json::Value, label: &str) -> Result<Vec<String>, TantivyError>{
if !value.is_array(){
return Err(format!("{} should be array", label).into())
pub(crate) fn get_string_list(value: &json::Value, label: &str) -> Result<Vec<String>> {
if !value.is_array() {
return Err(TantivyBindingError::InternalError(
format!("{} should be array", label).to_string(),
));
}
let stop_words = value.as_array().unwrap();
let mut str_list = Vec::<String>::new();
for element in stop_words{
match element.as_str(){
for element in stop_words {
match element.as_str() {
Some(word) => str_list.push(word.to_string()),
None => return Err(format!("{} list item should be string", label).into())
None => {
return Err(TantivyBindingError::InternalError(
format!("{} list item should be string", label).to_string(),
))
}
}
};
}
Ok(str_list)
}
pub(crate) fn get_stop_words_list(str_list:Vec<String>) -> Vec<String>{
pub(crate) fn get_stop_words_list(str_list: Vec<String>) -> Vec<String> {
let mut stop_words = Vec::new();
for str in str_list{
if str.len()>0 && str.chars().nth(0).unwrap() == '_'{
match str.as_str(){
"_english_" =>{
for word in stop_words::ENGLISH{
for str in str_list {
if str.len() > 0 && str.chars().nth(0).unwrap() == '_' {
match str.as_str() {
"_english_" => {
for word in stop_words::ENGLISH {
stop_words.push(word.to_string());
}
continue;