enhance: Remove raw tokenizer register. (#37886)

tantivy already register raw tokenizer by default

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2024-11-22 12:02:32 +08:00 committed by GitHub
parent b34bfb98a0
commit 06d73cf2e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -32,7 +32,6 @@ impl IndexWriterWrapper {
let field: Field;
let mut schema_builder = Schema::builder();
let mut use_raw_tokenizer = false;
match data_type {
TantivyDataType::I64 => {
field = schema_builder.add_i64_field(&field_name, INDEXED);
@ -45,11 +44,10 @@ impl IndexWriterWrapper {
}
TantivyDataType::Keyword => {
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("raw_tokenizer")
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic);
let text_options = TextOptions::default().set_indexing_options(text_field_indexing);
field = schema_builder.add_text_field(&field_name, text_options);
use_raw_tokenizer = true;
}
TantivyDataType::Text => {
panic!("text should be indexed with analyzer");
@ -58,11 +56,6 @@ impl IndexWriterWrapper {
let id_field = schema_builder.add_i64_field("doc_id", FAST);
let schema = schema_builder.build();
let index = Index::create_in_dir(path.clone(), schema).unwrap();
if use_raw_tokenizer {
index
.tokenizers()
.register("raw_tokenizer", tokenizer::RawTokenizer::default());
}
let index_writer = index
.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)
.unwrap();