fix: Escape prefix before search in inverted index (#37925)

issue: https://github.com/milvus-io/milvus/issues/37912

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2024-11-22 14:10:33 +08:00 committed by GitHub
parent 06d73cf2e2
commit 700a448a54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 249 additions and 358 deletions

File diff suppressed because it is too large Load Diff

View File

@ -205,7 +205,8 @@ impl IndexReaderWrapper {
}
pub fn prefix_query_keyword(&self, prefix: &str) -> Vec<u32> {
let pattern = format!("{}(.|\n)*", prefix);
let escaped = regex::escape(prefix);
let pattern = format!("{}(.|\n)*", escaped);
self.regex_query(&pattern)
}
@ -214,3 +215,39 @@ impl IndexReaderWrapper {
self.search(&q)
}
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use tantivy::{
doc,
schema::{self, Schema, STORED, STRING, TEXT},
Index, IndexWriter,
};
use super::IndexReaderWrapper;
#[test]
pub fn test_escape_regex() {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("title", STRING | STORED);
let schema = schema_builder.build();
let title = schema.get_field("title").unwrap();
let index = Index::create_in_ram(schema.clone());
let mut index_writer = index.writer(50000000).unwrap();
index_writer.add_document(doc!(title => "^abc")).unwrap();
index_writer.add_document(doc!(title => "$abc")).unwrap();
index_writer.commit().unwrap();
let index_shared = Arc::new(index);
let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared);
let mut res = index_reader_wrapper.prefix_query_keyword("^");
assert_eq!(res.len(), 1);
res = index_reader_wrapper.prefix_query_keyword("$");
assert_eq!(res.len(), 1);
}
}