diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs index d3ddb125cc..ead116d1ac 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs @@ -1,40 +1,35 @@ +use core::fmt; + use serde_json as json; #[derive(Debug)] -pub struct TantivyError{ - reason: String, +pub enum TantivyBindingError { + JsonError(serde_json::Error), + InternalError(String), } -impl TantivyError{ - fn new(reason:String) -> Self{ - TantivyError{reason:reason} - } - - pub fn reason(&self) -> String{ - return self.reason.clone() +impl From for TantivyBindingError { + fn from(value: serde_json::Error) -> Self { + TantivyBindingError::JsonError(value) } } -impl From<&str> for TantivyError{ - fn from(value: &str) -> Self { - Self::new(value.to_string()) +impl fmt::Display for TantivyBindingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TantivyBindingError::JsonError(e) => write!(f, "JsonError: {}", e), + TantivyBindingError::InternalError(e) => write!(f, "InternalError: {}", e), + } } } -impl From for TantivyError{ - fn from(value: String) -> Self { - Self::new(value) +impl std::error::Error for TantivyBindingError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + TantivyBindingError::JsonError(e) => Some(e), + TantivyBindingError::InternalError(_) => None, + } } } -impl From for TantivyError{ - fn from(value: json::Error) -> Self { - Self::new(value.to_string()) - } -} - -impl ToString for TantivyError{ - fn to_string(&self) -> String { - return self.reason() - } -} \ No newline at end of file +pub type Result = std::result::Result; diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs index 0e915b56d9..1630381511 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs @@ -1,113 +1,123 @@ use log::warn; -use std::collections::HashMap; -use tantivy::tokenizer::*; -use tantivy::tokenizer::StopWordFilter; use serde_json as json; +use std::collections::HashMap; +use tantivy::tokenizer::StopWordFilter; +use tantivy::tokenizer::*; +use crate::error::Result; +use crate::error::TantivyBindingError; +use crate::jieba_tokenizer::JiebaTokenizer; use crate::stop_words; use crate::tokenizer_filter::*; -use crate::jieba_tokenizer::JiebaTokenizer; -use crate::error::TantivyError; use crate::util::*; - // default build-in analyzer pub(crate) fn standard_analyzer(stop_words: Vec) -> TextAnalyzer { - let builder = standard_builder() - .filter(LowerCaser); + let builder = standard_builder().filter(LowerCaser); - if stop_words.len() > 0{ + if stop_words.len() > 0 { return builder.filter(StopWordFilter::remove(stop_words)).build(); } builder.build() } -fn chinese_analyzer(stop_words: Vec) -> TextAnalyzer{ +fn chinese_analyzer(stop_words: Vec) -> TextAnalyzer { let builder = jieba_builder().filter(CnAlphaNumOnlyFilter); - if stop_words.len() > 0{ + if stop_words.len() > 0 { return builder.filter(StopWordFilter::remove(stop_words)).build(); } builder.build() } -fn english_analyzer(stop_words: Vec) -> TextAnalyzer{ +fn english_analyzer(stop_words: Vec) -> TextAnalyzer { let builder = standard_builder() .filter(LowerCaser) .filter(Stemmer::new(Language::English)) - .filter(StopWordFilter::remove(stop_words::ENGLISH.iter().map(|&word| word.to_owned()))); + .filter(StopWordFilter::remove( + stop_words::ENGLISH.iter().map(|&word| word.to_owned()), + )); - if stop_words.len() > 0{ + if stop_words.len() > 0 { return builder.filter(StopWordFilter::remove(stop_words)).build(); } builder.build() } -fn standard_builder() -> TextAnalyzerBuilder{ +fn standard_builder() -> TextAnalyzerBuilder { TextAnalyzer::builder(SimpleTokenizer::default()).dynamic() } -fn whitespace_builder()-> TextAnalyzerBuilder{ +fn whitespace_builder() -> TextAnalyzerBuilder { TextAnalyzer::builder(WhitespaceTokenizer::default()).dynamic() } -fn jieba_builder() -> TextAnalyzerBuilder{ +fn jieba_builder() -> TextAnalyzerBuilder { TextAnalyzer::builder(JiebaTokenizer::new()).dynamic() } -fn get_builder_by_name(name:&String) -> Result{ +fn get_builder_by_name(name: &String) -> Result { match name.as_str() { "standard" => Ok(standard_builder()), "whitespace" => Ok(whitespace_builder()), "jieba" => Ok(jieba_builder()), other => { warn!("unsupported tokenizer: {}", other); - Err(format!("unsupported tokenizer: {}", other).into()) + Err(TantivyBindingError::InternalError(format!( + "unsupported tokenizer: {}", + other + ))) } } } -struct AnalyzerBuilder<'a>{ +struct AnalyzerBuilder<'a> { // builder: TextAnalyzerBuilder - filters:HashMap, - params:&'a json::Map + filters: HashMap, + params: &'a json::Map, } -impl AnalyzerBuilder<'_>{ - fn new(params: &json::Map) -> AnalyzerBuilder{ - AnalyzerBuilder{ +impl AnalyzerBuilder<'_> { + fn new(params: &json::Map) -> AnalyzerBuilder { + AnalyzerBuilder { filters: HashMap::new(), - params:params, + params: params, } } - fn get_tokenizer_name(&self) -> Result{ - let tokenizer=self.params.get("tokenizer"); - if tokenizer.is_none(){ + fn get_tokenizer_name(&self) -> Result { + let tokenizer = self.params.get("tokenizer"); + if tokenizer.is_none() { return Ok("standard".to_string()); } - if !tokenizer.unwrap().is_string(){ - return Err(format!("tokenizer name should be string").into()); + if !tokenizer.unwrap().is_string() { + return Err(TantivyBindingError::InternalError(format!( + "tokenizer name should be string" + ))); } Ok(tokenizer.unwrap().as_str().unwrap().to_string()) } - fn add_custom_filter(&mut self, name: &String, params: &json::Map) -> Result<(),TantivyError>{ - match SystemFilter::try_from(params){ + fn add_custom_filter( + &mut self, + name: &String, + params: &json::Map, + ) -> Result<()> { + match SystemFilter::try_from(params) { Ok(filter) => { self.filters.insert(name.to_string(), filter); Ok(()) - }, - Err(e) => {Err(e)}, + } + Err(e) => Err(e), } } - fn add_custom_filters(&mut self, params:&json::Map) -> Result<(),TantivyError>{ - for (name, value) in params{ - if !value.is_object(){ + fn add_custom_filters(&mut self, params: &json::Map) -> Result<()> { + for (name, value) in params { + if !value.is_object() { continue; } self.add_custom_filter(name, value.as_object().unwrap())?; @@ -115,138 +125,155 @@ impl AnalyzerBuilder<'_>{ Ok(()) } - fn build_filter(&mut self,mut builder: TextAnalyzerBuilder, params: &json::Value) -> Result{ - if !params.is_array(){ - return Err("filter params should be array".into()); + fn build_filter( + &mut self, + mut builder: TextAnalyzerBuilder, + params: &json::Value, + ) -> Result { + if !params.is_array() { + return Err(TantivyBindingError::InternalError( + "filter params should be array".to_string(), + )); } - + let filters = params.as_array().unwrap(); - for filter in filters{ - if filter.is_string(){ + for filter in filters { + if filter.is_string() { let filter_name = filter.as_str().unwrap(); let costum = self.filters.remove(filter_name); - if !costum.is_none(){ + if !costum.is_none() { builder = costum.unwrap().transform(builder); continue; } - + // check if filter was system filter let system = SystemFilter::from(filter_name); match system { SystemFilter::Invalid => { - return Err(format!("build analyzer failed, filter not found :{}", filter_name).into()) + return Err(TantivyBindingError::InternalError(format!( + "build analyzer failed, filter not found :{}", + filter_name + ))) } other => { builder = other.transform(builder); - }, + } } - }else if filter.is_object(){ - let filter=SystemFilter::try_from(filter.as_object().unwrap())?; + } else if filter.is_object() { + let filter = SystemFilter::try_from(filter.as_object().unwrap())?; builder = filter.transform(builder); } - }; - Ok(builder) - } - - fn build_option(&mut self, mut builder: TextAnalyzerBuilder) -> Result{ - for (key, value) in self.params{ - match key.as_str(){ - "tokenizer" => {}, - "filter" => { - // build with filter if filter param exist - builder=self.build_filter(builder, value)?; - }, - other => return Err(format!("unknown analyzer option key: {}", other).into()), - } } Ok(builder) } - fn get_stop_words_option(&self) -> Result, TantivyError>{ + fn build_option(&mut self, mut builder: TextAnalyzerBuilder) -> Result { + for (key, value) in self.params { + match key.as_str() { + "tokenizer" => {} + "filter" => { + // build with filter if filter param exist + builder = self.build_filter(builder, value)?; + } + other => { + return Err(TantivyBindingError::InternalError(format!( + "unknown analyzer option key: {}", + other + ))) + } + } + } + Ok(builder) + } + + fn get_stop_words_option(&self) -> Result> { let value = self.params.get("stop_words"); - match value{ - Some(value)=>{ + match value { + Some(value) => { let str_list = get_string_list(value, "filter stop_words")?; Ok(get_stop_words_list(str_list)) } - None => Ok(vec![]) - } + None => Ok(vec![]), + } } - fn build_template(self, type_: &str)-> Result{ - match type_{ - "standard" => { - Ok(standard_analyzer(self.get_stop_words_option()?)) - }, - "chinese" => { - Ok(chinese_analyzer(self.get_stop_words_option()?)) - }, - "english" => { - Ok(english_analyzer(self.get_stop_words_option()?)) - } - other_ => Err(format!("unknown build-in analyzer type: {}", other_).into()) + fn build_template(self, type_: &str) -> Result { + match type_ { + "standard" => Ok(standard_analyzer(self.get_stop_words_option()?)), + "chinese" => Ok(chinese_analyzer(self.get_stop_words_option()?)), + "english" => Ok(english_analyzer(self.get_stop_words_option()?)), + other_ => Err(TantivyBindingError::InternalError(format!( + "unknown build-in analyzer type: {}", + other_ + ))), } - } + } - fn build(mut self) -> Result{ + fn build(mut self) -> Result { // build base build-in analyzer - match self.params.get("type"){ - Some(type_) =>{ - if !type_.is_string(){ - return Err(format!("analyzer type shoud be string").into()) + match self.params.get("type") { + Some(type_) => { + if !type_.is_string() { + return Err(TantivyBindingError::InternalError(format!( + "analyzer type shoud be string" + ))); } return self.build_template(type_.as_str().unwrap()); - }, + } None => {} }; //build custom analyzer let tokenizer_name = self.get_tokenizer_name()?; - let mut builder=get_builder_by_name(&tokenizer_name)?; - + let mut builder = get_builder_by_name(&tokenizer_name)?; + // build with option builder = self.build_option(builder)?; Ok(builder.build()) } } -pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result { - match json::from_str::(¶ms){ - Ok(value) =>{ - if value.is_null(){ +pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result { + match json::from_str::(¶ms) { + Ok(value) => { + if value.is_null() { return Ok(standard_analyzer(vec![])); } - if !value.is_object(){ - return Err("tokenizer params should be a json map".into()); + if !value.is_object() { + return Err(TantivyBindingError::InternalError( + "tokenizer params should be a json map".to_string(), + )); } let json_params = value.as_object().unwrap(); // create builder - let analyzer_params=json_params.get("analyzer"); - if analyzer_params.is_none(){ + let analyzer_params = json_params.get("analyzer"); + if analyzer_params.is_none() { return Ok(standard_analyzer(vec![])); } - if !analyzer_params.unwrap().is_object(){ - return Err("analyzer params should be a json map".into()); + if !analyzer_params.unwrap().is_object() { + return Err(TantivyBindingError::InternalError( + "analyzer params should be a json map".to_string(), + )); } let mut builder = AnalyzerBuilder::new(analyzer_params.unwrap().as_object().unwrap()); - + // build custom filter - let filter_params=json_params.get("filter"); - if !filter_params.is_none() && filter_params.unwrap().is_object(){ + let filter_params = json_params.get("filter"); + if !filter_params.is_none() && filter_params.unwrap().is_object() { builder.add_custom_filters(filter_params.unwrap().as_object().unwrap())?; } // build analyzer builder.build() - }, + } Err(err) => Err(err.into()), } } -pub(crate) fn create_tokenizer(params: &String) -> Result { - if params.len()==0{ +pub(crate) fn create_tokenizer(params: &String) -> Result { + if params.len() == 0 { return Ok(standard_analyzer(vec![])); } create_tokenizer_with_filter(&format!("{{\"analyzer\":{}}}", params)) @@ -265,7 +292,7 @@ mod tests { }"#; let tokenizer = create_tokenizer(¶ms.to_string()); - assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap().reason()); + assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap()); } #[test] @@ -275,17 +302,16 @@ mod tests { }"#; let tokenizer = create_tokenizer(¶ms.to_string()); - assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap().reason()); + assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap()); let mut bining = tokenizer.unwrap(); let mut stream = bining.token_stream("系统安全;,'';lxyz密码"); - + let mut results = Vec::::new(); - while stream.advance(){ + while stream.advance() { let token = stream.token(); results.push(token.text.clone()); } print!("test tokens :{:?}\n", results) } - -} \ No newline at end of file +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_filter.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_filter.rs index cff1d21310..6b67b662a8 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_filter.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_filter.rs @@ -1,11 +1,12 @@ -use tantivy::tokenizer::*; -use serde_json as json; use regex; +use serde_json as json; +use tantivy::tokenizer::*; -use crate::error::TantivyError; +use crate::error::Result; +use crate::error::TantivyBindingError; use crate::util::*; -pub(crate) enum SystemFilter{ +pub(crate) enum SystemFilter { Invalid, LowerCase(LowerCaser), AsciiFolding(AsciiFoldingFilter), @@ -15,16 +16,16 @@ pub(crate) enum SystemFilter{ Length(RemoveLongFilter), Stop(StopWordFilter), Decompounder(SplitCompoundWords), - Stemmer(Stemmer) + Stemmer(Stemmer), } -impl SystemFilter{ - pub(crate) fn transform(self, builder: TextAnalyzerBuilder) -> TextAnalyzerBuilder{ - match self{ +impl SystemFilter { + pub(crate) fn transform(self, builder: TextAnalyzerBuilder) -> TextAnalyzerBuilder { + match self { Self::LowerCase(filter) => builder.filter(filter).dynamic(), Self::AsciiFolding(filter) => builder.filter(filter).dynamic(), Self::AlphaNumOnly(filter) => builder.filter(filter).dynamic(), - Self::CnCharOnly(filter) => builder.filter(filter).dynamic(), + Self::CnCharOnly(filter) => builder.filter(filter).dynamic(), Self::CnAlphaNumOnly(filter) => builder.filter(filter).dynamic(), Self::Length(filter) => builder.filter(filter).dynamic(), Self::Stop(filter) => builder.filter(filter).dynamic(), @@ -41,65 +42,85 @@ impl SystemFilter{ // "max": 10, // length // } // TODO support min length -fn get_length_filter(params: &json::Map) -> Result{ +fn get_length_filter(params: &json::Map) -> Result { let limit_str = params.get("max"); - if limit_str.is_none() || !limit_str.unwrap().is_u64(){ - return Err("lenth max param was none or not uint".into()) + if limit_str.is_none() || !limit_str.unwrap().is_u64() { + return Err(TantivyBindingError::InternalError( + "lenth max param was none or not uint".to_string(), + )); } let limit = limit_str.unwrap().as_u64().unwrap() as usize; - Ok(SystemFilter::Length(RemoveLongFilter::limit(limit+1))) + Ok(SystemFilter::Length(RemoveLongFilter::limit(limit + 1))) } -fn get_stop_words_filter(params: &json::Map)-> Result{ +fn get_stop_words_filter(params: &json::Map) -> Result { let value = params.get("stop_words"); - if value.is_none(){ - return Err("stop filter stop_words can't be empty".into()); + if value.is_none() { + return Err(TantivyBindingError::InternalError( + "stop filter stop_words can't be empty".to_string(), + )); } let str_list = get_string_list(value.unwrap(), "stop_words filter")?; - Ok(SystemFilter::Stop(StopWordFilter::remove(get_stop_words_list(str_list)))) + Ok(SystemFilter::Stop(StopWordFilter::remove( + get_stop_words_list(str_list), + ))) } -fn get_decompounder_filter(params: &json::Map)-> Result{ +fn get_decompounder_filter(params: &json::Map) -> Result { let value = params.get("word_list"); - if value.is_none() || !value.unwrap().is_array(){ - return Err("decompounder word list should be array".into()) + if value.is_none() || !value.unwrap().is_array() { + return Err(TantivyBindingError::InternalError( + "decompounder word list should be array".to_string(), + )); } let stop_words = value.unwrap().as_array().unwrap(); let mut str_list = Vec::::new(); - for element in stop_words{ - match element.as_str(){ + for element in stop_words { + match element.as_str() { Some(word) => str_list.push(word.to_string()), - None => return Err("decompounder word list item should be string".into()) + None => { + return Err(TantivyBindingError::InternalError( + "decompounder word list item should be string".to_string(), + )) + } } - }; + } - match SplitCompoundWords::from_dictionary(str_list){ + match SplitCompoundWords::from_dictionary(str_list) { Ok(f) => Ok(SystemFilter::Decompounder(f)), - Err(e) => Err(format!("create decompounder failed: {}", e.to_string()).into()) + Err(e) => Err(TantivyBindingError::InternalError(format!( + "create decompounder failed: {}", + e.to_string() + ))), } } -fn get_stemmer_filter(params: &json::Map)-> Result{ +fn get_stemmer_filter(params: &json::Map) -> Result { let value = params.get("language"); - if value.is_none() || !value.unwrap().is_string(){ - return Err("stemmer language field should be string".into()) + if value.is_none() || !value.unwrap().is_string() { + return Err(TantivyBindingError::InternalError( + "stemmer language field should be string".to_string(), + )); } - match value.unwrap().as_str().unwrap().into_language(){ + match value.unwrap().as_str().unwrap().into_language() { Ok(language) => Ok(SystemFilter::Stemmer(Stemmer::new(language))), - Err(e) => Err(format!("create stemmer failed : {}", e.to_string()).into()), + Err(e) => Err(TantivyBindingError::InternalError(format!( + "create stemmer failed : {}", + e.to_string() + ))), } } trait LanguageParser { type Error; - fn into_language(self) -> Result; + fn into_language(self) -> Result; } -impl LanguageParser for &str { - type Error = TantivyError; - fn into_language(self) -> Result { +impl LanguageParser for &str { + type Error = TantivyBindingError; + fn into_language(self) -> Result { match self.to_lowercase().as_str() { "arabig" => Ok(Language::Arabic), "danish" => Ok(Language::Danish), @@ -119,14 +140,17 @@ impl LanguageParser for &str { "swedish" => Ok(Language::Swedish), "tamil" => Ok(Language::Tamil), "turkish" => Ok(Language::Turkish), - other => Err(format!("unsupport language: {}", other).into()), + other => Err(TantivyBindingError::InternalError(format!( + "unsupport language: {}", + other + ))), } } } -impl From<&str> for SystemFilter{ +impl From<&str> for SystemFilter { fn from(value: &str) -> Self { - match value{ + match value { "lowercase" => Self::LowerCase(LowerCaser), "asciifolding" => Self::AsciiFolding(AsciiFoldingFilter), "alphanumonly" => Self::AlphaNumOnly(AlphaNumOnlyFilter), @@ -138,24 +162,31 @@ impl From<&str> for SystemFilter{ } impl TryFrom<&json::Map> for SystemFilter { - type Error = TantivyError; + type Error = TantivyBindingError; - fn try_from(params: &json::Map) -> Result { - match params.get(&"type".to_string()){ - Some(value) =>{ - if !value.is_string(){ - return Err("filter type should be string".into()); + fn try_from(params: &json::Map) -> Result { + match params.get(&"type".to_string()) { + Some(value) => { + if !value.is_string() { + return Err(TantivyBindingError::InternalError( + "filter type should be string".to_string(), + )); }; - match value.as_str().unwrap(){ + match value.as_str().unwrap() { "length" => get_length_filter(params), "stop" => get_stop_words_filter(params), "decompounder" => get_decompounder_filter(params), "stemmer" => get_stemmer_filter(params), - other=> Err(format!("unsupport filter type: {}", other).into()), + other => Err(TantivyBindingError::InternalError(format!( + "unsupport filter type: {}", + other + ))), } } - None => Err("no type field in filter params".into()), + None => Err(TantivyBindingError::InternalError( + "no type field in filter params".to_string(), + )), } } } @@ -167,7 +198,7 @@ pub struct CnCharOnlyFilterStream { tail: T, } -impl TokenFilter for CnCharOnlyFilter{ +impl TokenFilter for CnCharOnlyFilter { type Tokenizer = CnCharOnlyFilterWrapper; fn transform(self, tokenizer: T) -> CnCharOnlyFilterWrapper { @@ -216,7 +247,7 @@ pub struct CnAlphaNumOnlyFilterStream { tail: T, } -impl TokenFilter for CnAlphaNumOnlyFilter{ +impl TokenFilter for CnAlphaNumOnlyFilter { type Tokenizer = CnAlphaNumOnlyFilterWrapper; fn transform(self, tokenizer: T) -> CnAlphaNumOnlyFilterWrapper { @@ -255,4 +286,4 @@ impl TokenStream for CnAlphaNumOnlyFilterStream { fn token_mut(&mut self) -> &mut Token { self.tail.token_mut() } -} \ No newline at end of file +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs index 8e33b43214..8fa56898f4 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs @@ -1,10 +1,11 @@ +use serde_json as json; use std::ffi::c_void; use std::ops::Bound; -use serde_json as json; use tantivy::{directory::MmapDirectory, Index}; +use crate::error::Result; +use crate::error::TantivyBindingError; use crate::stop_words; -use crate::error::TantivyError; pub fn index_exist(path: &str) -> bool { let dir = MmapDirectory::open(path).unwrap(); @@ -32,29 +33,35 @@ pub fn free_binding(ptr: *mut c_void) { } } -pub(crate) fn get_string_list(value: &json::Value, label: &str) -> Result, TantivyError>{ - if !value.is_array(){ - return Err(format!("{} should be array", label).into()) +pub(crate) fn get_string_list(value: &json::Value, label: &str) -> Result> { + if !value.is_array() { + return Err(TantivyBindingError::InternalError( + format!("{} should be array", label).to_string(), + )); } let stop_words = value.as_array().unwrap(); let mut str_list = Vec::::new(); - for element in stop_words{ - match element.as_str(){ + for element in stop_words { + match element.as_str() { Some(word) => str_list.push(word.to_string()), - None => return Err(format!("{} list item should be string", label).into()) + None => { + return Err(TantivyBindingError::InternalError( + format!("{} list item should be string", label).to_string(), + )) + } } - }; + } Ok(str_list) } -pub(crate) fn get_stop_words_list(str_list:Vec) -> Vec{ +pub(crate) fn get_stop_words_list(str_list: Vec) -> Vec { let mut stop_words = Vec::new(); - for str in str_list{ - if str.len()>0 && str.chars().nth(0).unwrap() == '_'{ - match str.as_str(){ - "_english_" =>{ - for word in stop_words::ENGLISH{ + for str in str_list { + if str.len() > 0 && str.chars().nth(0).unwrap() == '_' { + match str.as_str() { + "_english_" => { + for word in stop_words::ENGLISH { stop_words.push(word.to_string()); } continue;