milvus/internal/parser/planparserv2/plan_parser_v2_test.go
Ted Xu b9c037f558
feat: adding cache to expression parse (#36185)
See #36122

This PR improves the proxy node performance by adding cache to
expression parse.

---------

Signed-off-by: Ted Xu <ted.xu@zilliz.com>
2024-09-13 10:03:09 +08:00

1341 lines
34 KiB
Go

package planparserv2
import (
"fmt"
"sync"
"testing"
"github.com/antlr/antlr4/runtime/Go/antlr"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/planpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
func newTestSchema() *schemapb.CollectionSchema {
fields := []*schemapb.FieldSchema{
{FieldID: 0, Name: "FieldID", IsPrimaryKey: false, Description: "field no.1", DataType: schemapb.DataType_Int64},
}
for name, value := range schemapb.DataType_value {
dataType := schemapb.DataType(value)
newField := &schemapb.FieldSchema{
FieldID: int64(100 + value), Name: name + "Field", IsPrimaryKey: false, Description: "", DataType: dataType,
}
if dataType == schemapb.DataType_Array {
newField.ElementType = schemapb.DataType_Int64
}
fields = append(fields, newField)
}
fields = append(fields, &schemapb.FieldSchema{
FieldID: 130, Name: common.MetaFieldName, IsPrimaryKey: false, Description: "dynamic field", DataType: schemapb.DataType_JSON,
IsDynamic: true,
})
fields = append(fields, &schemapb.FieldSchema{
FieldID: 131, Name: "StringArrayField", IsPrimaryKey: false, Description: "string array field",
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_VarChar,
})
return &schemapb.CollectionSchema{
Name: "test",
Description: "schema for test used",
AutoID: true,
Fields: fields,
EnableDynamicField: true,
}
}
func newTestSchemaHelper(t *testing.T) *typeutil.SchemaHelper {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(t, err)
return schemaHelper
}
func assertValidExpr(t *testing.T, helper *typeutil.SchemaHelper, exprStr string) {
_, err := ParseExpr(helper, exprStr)
assert.NoError(t, err, exprStr)
// expr, err := ParseExpr(helper, exprStr)
// assert.NoError(t, err, exprStr)
// fmt.Printf("expr: %s\n", exprStr)
// ShowExpr(expr)
}
func assertInvalidExpr(t *testing.T, helper *typeutil.SchemaHelper, exprStr string) {
_, err := ParseExpr(helper, exprStr)
assert.Error(t, err, exprStr)
}
func TestExpr_Term(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`BoolField in [true, false]`,
`Int8Field in [1, 2]`,
`Int16Field in [3, 4]`,
`Int32Field in [5, 6]`,
`Int64Field in [7, 8]`,
`FloatField in [9.0, 10.0]`,
`DoubleField in [11.0, 12.0]`,
`StringField in ["str13", "str14"]`,
`VarCharField in ["str15", "str16"]`,
`FloatField in [1373, 115]`,
`Int64Field in [17]`,
`Int64Field in []`,
`Int64Field not in []`,
`JSONField["A"] in [1, 10]`,
`JSONField["A"] in []`,
`$meta["A"] in [1, 10]`,
`$meta["A"] in []`,
`A in [1, 10]`,
`A in []`,
`A in ["abc", "def"]`,
`A in ["1", "2", "abc", "def"]`,
`A in ["1", 2, "abc", 2.2]`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
}
func TestExpr_Compare(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`Int8Field < Int16Field`,
`Int16Field <= Int32Field`,
`Int32Field > Int64Field`,
`Int64Field >= FloatField`,
`FloatField == DoubleField`,
`StringField != VarCharField`,
`JSONField["A"] > Int16Field`,
`$meta["A"] > Int16Field`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
}
func TestExpr_UnaryRange(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`Int8Field < 0`,
`Int16Field <= 1`,
`Int32Field > 2`,
`Int64Field >= 3`,
`FloatField == 4.0`,
`FloatField == 2`,
`DoubleField != 5.0`,
`StringField > "str6"`,
`VarCharField <= "str7"`,
`JSONField["A"] > 10`,
`$meta["A"] > 10`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
}
func TestExpr_Like(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`VarCharField like "prefix%"`,
`VarCharField like "equal"`,
`JSONField["A"] like "name*"`,
`$meta["A"] like "name*"`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
// TODO: enable these after regex-match is supported.
//unsupported := []string{
// `VarCharField like "not_%_supported"`,
// `JSONField["A"] like "not_%_supported"`,
// `$meta["A"] like "not_%_supported"`,
//}
//for _, exprStr := range unsupported {
// assertInvalidExpr(t, helper, exprStr)
//}
}
func TestExpr_TextMatch(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`TextMatch(VarCharField, "query")`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
unsupported := []string{
`TextMatch(not_exist, "query")`,
`TextMatch(BoolField, "query")`,
}
for _, exprStr := range unsupported {
assertInvalidExpr(t, helper, exprStr)
}
}
func TestExpr_BinaryRange(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`1 < Int8Field < 2`,
`3 <= Int16Field < 4`,
`5 <= Int32Field <= 6`,
`7 < Int64Field <= 8`,
`9.0 < FloatField < 10.0`,
`11.0 < DoubleField < 12.0`,
`"str13" < StringField < "str14"`,
`"str15" < VarCharField < "str16"`,
`17 < DoubleField < 18`,
`10 < A < 25`,
`2 > Int8Field > 1`,
`4 >= Int16Field >= 3`,
`6 >= Int32Field >= 5`,
`8 >= Int64Field > 7`,
`10.0 > FloatField > 9.0`,
`12.0 > DoubleField > 11.0`,
`"str14" > StringField > "str13"`,
`"str16" > VarCharField > "str15"`,
`18 > DoubleField > 17`,
`100 > B > 14`,
`1 < JSONField < 3`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
invalidExprs := []string{
`1 < ArrayField < 3`,
`1 < A+B < 3`,
}
for _, exprStr := range invalidExprs {
assertInvalidExpr(t, helper, exprStr)
}
}
func TestExpr_BinaryArith(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`Int64Field % 10 == 9`,
`Int64Field % 10 != 9`,
`Int64Field + 1.1 == 2.1`,
`A % 10 != 2`,
`Int8Field + 1 < 2`,
`Int16Field - 3 <= 4`,
`Int32Field * 5 > 6`,
`Int64Field / 7 >= 8`,
`FloatField + 11 < 12`,
`DoubleField - 13 <= 14`,
`A * 15 > 16`,
`JSONField['A'] / 17 >= 18`,
`ArrayField[0] % 19 >= 20`,
`JSONField + 15 == 16`,
`15 + JSONField == 16`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
// TODO: enable these after execution backend is ready.
unsupported := []string{
`ArrayField + 15 == 16`,
`15 + ArrayField == 16`,
}
for _, exprStr := range unsupported {
assertInvalidExpr(t, helper, exprStr)
}
}
func TestExpr_Value(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`1`,
`2.0`,
`true`,
`false`,
`"str"`,
}
for _, exprStr := range exprStrs {
expr := handleExpr(helper, exprStr)
assert.NotNil(t, getExpr(expr).expr, exprStr)
// fmt.Printf("expr: %s\n", exprStr)
// ShowExpr(getExpr(expr).expr)
}
}
func TestExpr_Identifier(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`BoolField`,
`Int8Field`,
`Int16Field`,
`Int32Field`,
`Int64Field`,
`FloatField`,
`DoubleField`,
`StringField`,
`VarCharField`,
`JSONField["A"]`,
`$meta["A"]`,
}
for _, exprStr := range exprStrs {
expr := handleExpr(helper, exprStr)
assert.NotNil(t, getExpr(expr).expr, exprStr)
// fmt.Printf("expr: %s\n", exprStr)
// ShowExpr(getExpr(expr).expr)
}
}
func TestExpr_Constant(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
// ------------------- arithmetic operations ----------------
`1.0 + 2.0`,
`1.0 + 2`,
`1 + 2.0`,
`1 + 2`,
`1.0 - 2.0`,
`1.0 - 2`,
`1 - 2.0`,
`1 - 2`,
`1.0 * 2.0`,
`1.0 * 2`,
`1 * 2.0`,
`1 * 2`,
`1.0 / 2.0`,
`1.0 / 2`,
`1 / 2.0`,
`1 / 2`,
`1 % 2`,
// ------------------- logical operations ----------------
`true and false`,
`true or false`,
`!true`,
`!false`,
// ------------------- relational operations ----------------
`"1" < "2"`,
`1.0 < 2.0`,
`1.0 < 2`,
`1 < 2.0`,
`1 < 2`,
`"1" <= "2"`,
`1.0 <= 2.0`,
`1.0 <= 2`,
`1 <= 2.0`,
`1 <= 2`,
`"1" > "2"`,
`1.0 > 2.0`,
`1.0 > 2`,
`1 > 2.0`,
`1 > 2`,
`"1" >= "2"`,
`1.0 >= 2.0`,
`1.0 >= 2`,
`1 >= 2.0`,
`1 >= 2`,
`"1" == "2"`,
`1.0 == 2.0`,
`1.0 == 2`,
`1 == 2.0`,
`1 == 2`,
`true == false`,
`"1" != "2"`,
`1.0 != 2.0`,
`1.0 != 2`,
`1 != 2.0`,
`1 != 2`,
`true != false`,
}
for _, exprStr := range exprStrs {
expr := handleExpr(helper, exprStr)
assert.NotNil(t, getExpr(expr).expr, exprStr)
// fmt.Printf("expr: %s\n", exprStr)
// ShowExpr(getExpr(expr).expr)
}
}
func TestExpr_Combinations(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`not (Int8Field + 1 == 2)`,
`(Int16Field - 3 == 4) and (Int32Field * 5 != 6)`,
`(Int64Field / 7 != 8) or (Int64Field % 10 == 9)`,
`Int64Field > 0 && VarCharField > "0"`,
`Int64Field < 0 && VarCharField < "0"`,
`A > 50 or B < 40`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}
}
func TestCreateRetrievePlan(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateRetrievePlan(schema, "Int64Field > 0")
assert.NoError(t, err)
}
func TestCreateSearchPlan(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, `$meta["A"] != 10`, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
func TestCreateFloat16SearchPlan(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, `$meta["A"] != 10`, "Float16VectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
func TestCreateBFloat16earchPlan(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, `$meta["A"] != 10`, "BFloat16VectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
func TestCreateSparseFloatVectorSearchPlan(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, `$meta["A"] != 10`, "SparseFloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
func TestExpr_Invalid(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
exprStrs := []string{
`invalid expression`,
`"constant"`,
// -------------- identifier not in schema --------------
`not_in_schema`,
// ------------------------ Add/Sub ---------------------
`not_in_schema + 1`,
`1 - not_in_schema`,
`true + false`,
`"str" + "text"`,
`true + "str"`,
`true - false`,
`"str" - "text"`,
`true - "str"`,
`StringField + VarCharField`,
`StringField - 2`,
`2 + StringField`,
// ------------------------ Mul/Div/Mod ---------------------
`not_in_schema * 1`,
`1 / not_in_schema`,
`1 % not_in_schema`,
`true * false`,
`true / false`,
`true % false`,
`"str" * "text"`,
`"str" / "text"`,
`"str" % "text"`,
`2 / 0`,
`2 % 0`,
`StringField % VarCharField`,
`StringField * 2`,
`2 / StringField`,
//`JSONField / 2 == 1`,
`2 % JSONField == 1`,
`2 % Int64Field == 1`,
`ArrayField / 2 == 1`,
`2 / ArrayField == 1`,
// ----------------------- ==/!= -------------------------
//`not_in_schema != 1`, // maybe in json
//`1 == not_in_schema`, // maybe in json
`true == "str"`,
`"str" != false`,
`VarCharField != FloatField`,
`FloatField == VarCharField`,
// ---------------------- relational --------------------
//`not_in_schema < 1`, // maybe in json
//`1 <= not_in_schema`, // maybe in json
`true <= "str"`,
`"str" >= false`,
`VarCharField < FloatField`,
`FloatField > VarCharField`,
//`JSONField > 1`,
//`1 < JSONField`,
`ArrayField > 2`,
`2 < ArrayField`,
// https://github.com/milvus-io/milvus/issues/34139
"\"Int64Field\" > 500 && \"Int64Field\" < 1000",
"\"Int64Field\" == 500 || \"Int64Field\" != 1000",
`"str" < 100`,
`"str" <= 100`,
`"str" > 100`,
`"str" >= 100`,
`"str" == 100`,
`"str" != 100`,
// ------------------------ like ------------------------
`(VarCharField % 2) like "prefix%"`,
`FloatField like "prefix%"`,
//`value like "prefix%"`, // maybe in json
// ------------------------ term ------------------------
//`not_in_schema in [1, 2, 3]`, // maybe in json
`1 in [1, 2, 3]`,
`(Int8Field + 8) in [1, 2, 3]`,
`Int8Field in [(true + 1)]`,
`Int8Field in [Int16Field]`,
`BoolField in [4.0]`,
`VarCharField in [4.0]`,
`Int32Field in [4.0]`,
`FloatField in [5, 6.0, true]`,
`1 in A`,
// ----------------------- range -------------------------
//`1 < not_in_schema < 2`, // maybe in json
`1 < 3 < 2`,
`1 < (Int8Field + Int16Field) < 2`,
`(invalid_lower) < Int32Field < 2`,
`1 < Int32Field < (invalid_upper)`,
`(Int8Field) < Int32Field < 2`,
`1 < Int32Field < (Int16Field)`,
`1 < StringField < 2`,
`1 < BoolField < 2`,
`1.0 < Int32Field < 2.0`,
`true < FloatField < false`,
// `2 <= Int32Field <= 1`,
`2 = Int32Field = 1`,
`true = BoolField = false`,
// ----------------------- unary ------------------------
`-true`,
`!"str"`,
`!(not_in_schema)`,
`-Int32Field`,
`!(Int32Field)`,
// ----------------------- or/and ------------------------
`not_in_schema or true`,
`false or not_in_schema`,
`"str" or false`,
`BoolField or false`,
`Int32Field or Int64Field`,
`not_in_schema and true`,
`false and not_in_schema`,
`"str" and false`,
`BoolField and false`,
`Int32Field and Int64Field`,
// -------------------- unsupported ----------------------
`1 ^ 2`,
`1 & 2`,
`1 ** 2`,
`1 << 2`,
`1 | 2`,
// -------------------- cannot be independent ----------------------
`BoolField`,
`true`,
`false`,
`Int64Field > 100 and BoolField`,
`Int64Field < 100 or false`, // maybe this can be optimized.
`!BoolField`,
// -------------------- array ----------------------
//`A == [1, 2, 3]`,
`Int64Field == [1, 2, 3]`,
`Int64Field > [1, 2, 3]`,
`Int64Field + [1, 2, 3] == 10`,
`Int64Field % [1, 2, 3] == 10`,
`[1, 2, 3] < Int64Field < [4, 5, 6]`,
`Int64Field["A"] == 123`,
`[1,2,3] == [4,5,6]`,
`[1,2,3] == 1`,
}
for _, exprStr := range exprStrs {
_, err := ParseExpr(helper, exprStr)
assert.Error(t, err, exprStr)
}
}
func TestCreateRetrievePlan_Invalid(t *testing.T) {
t.Run("invalid expr", func(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateRetrievePlan(schema, "invalid expression")
assert.Error(t, err)
})
}
func TestCreateSearchPlan_Invalid(t *testing.T) {
t.Run("invalid expr", func(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, "invalid expression", "", nil)
assert.Error(t, err)
})
t.Run("invalid vector field", func(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, "Int64Field > 0", "not_exist", nil)
assert.Error(t, err)
})
t.Run("not vector type", func(t *testing.T) {
schema := newTestSchemaHelper(t)
_, err := CreateSearchPlan(schema, "Int64Field > 0", "VarCharField", nil)
assert.Error(t, err)
})
}
var listenerCnt int
type errorListenerTest struct {
antlr.DefaultErrorListener
}
func (l *errorListenerTest) SyntaxError(recognizer antlr.Recognizer, offendingSymbol interface{}, line, column int, msg string, e antlr.RecognitionException) {
listenerCnt += 1
}
func (l *errorListenerTest) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs antlr.ATNConfigSet) {
listenerCnt += 1
}
func (l *errorListenerTest) Error() error {
return nil
}
func Test_FixErrorListenerNotRemoved(t *testing.T) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
normal := "1 < Int32Field < (Int16Field)"
for i := 0; i < 10; i++ {
err := handleExprWithErrorListener(schemaHelper, normal, &errorListenerTest{})
err1, ok := err.(error)
assert.True(t, ok)
assert.Error(t, err1)
}
assert.True(t, listenerCnt <= 10)
}
func Test_handleExpr(t *testing.T) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
ret1 := handleExpr(schemaHelper, "1 < Int32Field < (Int16Field)")
err1, ok := ret1.(error)
assert.True(t, ok)
assert.Error(t, err1)
}
func Test_handleExpr_empty(t *testing.T) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
ret1 := handleExpr(schemaHelper, "")
assert.True(t, isAlwaysTrueExpr(getExpr(ret1).expr))
assert.Equal(t, schemapb.DataType_Bool, getExpr(ret1).dataType)
}
// test if handleExpr is thread-safe.
func Test_handleExpr_17126_26662(t *testing.T) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
assert.NoError(t, err)
normal := `VarCharField == "abcd\"defg"`
n := 400
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Add(1)
go func() {
defer wg.Done()
ret := handleExpr(schemaHelper, normal)
_, ok := ret.(error)
assert.False(t, ok)
}()
}
wg.Wait()
}
func Test_JSONExpr(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
// search
exprs := []string{
`$meta["A"] > 90`,
`JSONField["A"] > 90`,
`A < 10`,
`JSONField["A"] <= 5`,
`$meta["A"] <= 5`,
`$meta["A"] >= 95`,
`$meta["A"] == 5`,
`$meta["A"] != 95`,
`$meta["A"] > 90 && $meta["B"] < 5`,
`$meta["A"] > 95 || $meta["B"] < 5`,
`A > 95 || $meta["B"] < 5`,
`not ($meta["A"] == 95)`,
`$meta["A"] in [90, 91, 95, 97]`,
`$meta["A"] not in [90, 91, 95, 97]`,
`$meta["C"]["0"] in [90, 91, 95, 97]`,
`$meta["C"]["0"] not in [90, 91, 95, 97]`,
`C["0"] not in [90, 91, 95, 97]`,
`C[0] in [90, 91, 95, 97]`,
`C["0"] > 90`,
`C["0"] < 90`,
`C["0"] == 90`,
`10 < C["0"] < 90`,
`100 > C["0"] > 90`,
`0 <= $meta["A"] < 5`,
`0 <= A < 5`,
`$meta["A"] + 5 == 10`,
`$meta["A"] > 10 + 5`,
`100 - 5 < $meta["A"]`,
`100 == $meta["A"] + 6`,
`exists $meta["A"]`,
`exists $meta["A"]["B"]["C"] `,
`A["B"][0] > 100`,
`$meta[0] > 100`,
`A["\"\"B\"\""] > 10`,
`A["[\"B\"]"] == "abc\"bbb\"cc"`,
`A['B'] == "abc\"bbb\"cc"`,
`A['B'] == 'abc"cba'`,
`A['B'] == 'abc\"cba'`,
`A == [1,2,3]`,
`A + 1.2 == 3.3`,
`A + 1 == 2`,
`JSONField > 0`,
`JSONField == 0`,
`JSONField < 100`,
`0 < JSONField < 100`,
`20 > JSONField > 0`,
`JSONField + 5 > 0`,
`JSONField > 2 + 5`,
`JSONField * 2 > 5`,
`JSONField / 2 > 5`,
`JSONField % 10 > 5`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
}
func Test_InvalidExprOnJSONField(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
// search
exprs := []string{
`exists $meta`,
`exists JSONField`,
`exists ArrayField`,
`exists $meta["A"] > 10 `,
`exists Int64Field`,
`A[[""B""]] > 10`,
`A["[""B""]"] > 10`,
`A[[""B""]] > 10`,
`A[B] > 10`,
`A + B == 3.3`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err, expr)
}
}
func Test_InvalidExprWithoutJSONField(t *testing.T) {
fields := []*schemapb.FieldSchema{
{FieldID: 100, Name: "id", IsPrimaryKey: true, Description: "id", DataType: schemapb.DataType_Int64},
{FieldID: 101, Name: "vector", IsPrimaryKey: false, Description: "vector", DataType: schemapb.DataType_FloatVector},
}
schema := &schemapb.CollectionSchema{
Name: "test",
Description: "schema for test used",
AutoID: true,
Fields: fields,
}
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(t, err)
expr := ""
exprs := []string{
`A == 0`,
`JSON["A"] > 0`,
`A < 100`,
`0 < JSON["A"] < 100`,
`0 < A < 100`,
`100 > JSON["A"] > 0`,
`100 > A > 0`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schemaHelper, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_InvalidExprWithMultipleJSONField(t *testing.T) {
fields := []*schemapb.FieldSchema{
{FieldID: 100, Name: "id", IsPrimaryKey: true, Description: "id", DataType: schemapb.DataType_Int64},
{FieldID: 101, Name: "vector", IsPrimaryKey: false, Description: "vector", DataType: schemapb.DataType_FloatVector},
{FieldID: 102, Name: "json1", IsPrimaryKey: false, Description: "json field 1", DataType: schemapb.DataType_JSON},
{FieldID: 103, Name: "json2", IsPrimaryKey: false, Description: "json field 2", DataType: schemapb.DataType_JSON},
}
schema := &schemapb.CollectionSchema{
Name: "test",
Description: "schema for test used",
AutoID: true,
Fields: fields,
}
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(t, err)
expr := ""
exprs := []string{
`A == 0`,
`A in [1, 2, 3]`,
`A not in [1, 2, 3]`,
`"1" in A`,
`"1" not in A`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schemaHelper, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_exprWithSingleQuotes(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`'abc' < StringField < "def"`,
`'ab"c' < StringField < "d'ef"`,
`'ab\"c' < StringField < "d\'ef"`,
`'ab\'c' < StringField < "d\"ef"`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
invalidExprs := []string{
`'abc'd' < StringField < "def"`,
`'abc' < StringField < "def"g"`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_JSONContains(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`json_contains(A, 10)`,
`not json_contains(A, 10)`,
`json_contains(A, 10.5)`,
`not json_contains(A, 10.5)`,
`json_contains(A, "10")`,
`not json_contains(A, "10")`,
`json_contains($meta["A"], 10)`,
`not json_contains($meta["A"], 10)`,
`json_contains(JSONField["x"], 5)`,
`not json_contains(JSONField["x"], 5)`,
`JSON_CONTAINS(JSONField["x"], 5)`,
`json_contains(A, [1,2,3])`,
`array_contains(A, [1,2,3])`,
`array_contains(ArrayField, [1,2,3])`,
`array_contains(ArrayField, 1)`,
`json_contains(JSONField, 5)`,
`json_contains($meta, 1)`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
}
func Test_InvalidJSONContains(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`json_contains(10, A)`,
`json_contains(1, [1,2,3])`,
`json_contains([1,2,3], 1)`,
`json_contains([1,2,3], [1,2,3])`,
`json_contains([1,2,3], [1,2])`,
`json_contains(A, B)`,
`not json_contains(A, B)`,
`json_contains(A, B > 5)`,
`json_contains(StringField, "a")`,
`json_contains(A, StringField > 5)`,
`json_contains(A)`,
`json_contains(A, 5, C)`,
`json_Contains(JSONField, 5)`,
`JSON_contains(JSONField, 5)`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_isEmptyExpression(t *testing.T) {
type args struct {
s string
}
tests := []struct {
name string
args args
want bool
}{
{
args: args{s: ""},
want: true,
},
{
args: args{s: " "},
want: true,
},
{
args: args{s: "not empty"},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, isEmptyExpression(tt.args.s), "isEmptyExpression(%v)", tt.args.s)
})
}
}
func Test_EscapeString(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`A == "\"" || B == '\"'`,
`A == "\n" || B == '\n'`,
`A == "\367" || B == '\367'`,
`A == "\3678" || B == '\3678'`,
`A == "ab'c\'d" || B == 'abc"de\"'`,
`A == "'" || B == '"'`,
`A == "\'" || B == '\"' || C == '\''`,
`A == "\\'" || B == '\\"' || C == '\''`,
`A == "\\\'" || B == '\\\"' || C == '\\\''`,
`A == "\\\\'" || B == '\\\\"' || C == '\\\''`,
`A == "\\\\\'" || B == '\\\\\"' || C == '\\\\\''`,
`A == "\\\\\\'" || B == '\\\\\\"' || C == '\\\\\''`,
`str2 like 'abc\"def-%'`,
`str2 like 'abc"def-%'`,
`str4 like "abc\367-%"`,
`str4 like "中国"`,
`tag == '"blue"'`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
invalidExprs := []string{
`A == "ab
c" || B == 'ab
c'`,
`A == "\423" || B == '\378'`,
`A == "\中国"`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_JSONContainsAll(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
var plan *planpb.PlanNode
exprs := []string{
`json_contains_all(A, [1,2,3])`,
`json_contains_all(A, [1,"2",3.0])`,
`JSON_CONTAINS_ALL(A, [1,"2",3.0])`,
`array_contains_all(ArrayField, [1,2,3])`,
`array_contains_all(ArrayField, [1])`,
`json_contains_all(ArrayField, [1,2,3])`,
}
for _, expr = range exprs {
plan, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
assert.NotNil(t, plan.GetVectorAnns().GetPredicates().GetJsonContainsExpr())
assert.Equal(t, planpb.JSONContainsExpr_ContainsAll, plan.GetVectorAnns().GetPredicates().GetJsonContainsExpr().GetOp())
}
invalidExprs := []string{
`JSON_CONTAINS_ALL(A, 1)`,
`JSON_CONTAINS_ALL(A, [abc])`,
`JSON_CONTAINS_ALL(A, [2>a])`,
`JSON_CONTAINS_ALL(A, [2>>a])`,
`JSON_CONTAINS_ALL(A[""], [1,2,3])`,
`JSON_CONTAINS_ALL(Int64Field, [1,2,3])`,
`JSON_CONTAINS_ALL(A, B)`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_JSONContainsAny(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
var plan *planpb.PlanNode
exprs := []string{
`json_contains_any(A, [1,2,3])`,
`json_contains_any(A, [1,"2",3.0])`,
`JSON_CONTAINS_ANY(A, [1,"2",3.0])`,
`JSON_CONTAINS_ANY(ArrayField, [1,2,3])`,
`JSON_CONTAINS_ANY(ArrayField, [3,4,5])`,
`JSON_CONTAINS_ANY(ArrayField, [1,2,3])`,
}
for _, expr = range exprs {
plan, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
assert.NotNil(t, plan.GetVectorAnns().GetPredicates().GetJsonContainsExpr())
assert.Equal(t, planpb.JSONContainsExpr_ContainsAny, plan.GetVectorAnns().GetPredicates().GetJsonContainsExpr().GetOp())
}
invalidExprs := []string{
`JSON_CONTAINS_ANY(A, 1)`,
`JSON_CONTAINS_ANY(A, [abc])`,
`JSON_CONTAINS_ANY(A, [2>a])`,
`JSON_CONTAINS_ANY(A, [2>>a])`,
`JSON_CONTAINS_ANY(A[""], [1,2,3])`,
`JSON_CONTAINS_ANY(Int64Field, [1,2,3])`,
`JSON_CONTAINS_ANY(A, B)`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
}
func Test_ArrayExpr(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`ArrayField == [1,2,3,4]`,
`ArrayField[0] == 1`,
`ArrayField[0] > 1`,
`1 < ArrayField[0] < 3`,
`StringArrayField[0] == "abc"`,
`StringArrayField[0] < "abc"`,
`"abc" < StringArrayField[0] < "efg"`,
`array_contains(ArrayField, 1)`,
`not ARRAY_CONTAINS(ArrayField, 1)`,
`array_contains_all(ArrayField, [1,2,3,4])`,
`not ARRAY_CONTAINS_ALL(ArrayField, [1,2,3,4])`,
`array_contains_any(ArrayField, [1,2,3,4])`,
`not ARRAY_CONTAINS_ANY(ArrayField, [1,2,3,4])`,
`array_contains(StringArrayField, "abc")`,
`not ARRAY_CONTAINS(StringArrayField, "abc")`,
`array_contains_all(StringArrayField, ["a", "b", "c", "d"])`,
`not ARRAY_CONTAINS_ALL(StringArrayField, ["a", "b", "c", "d"])`,
`array_contains_any(StringArrayField, ["a", "b", "c", "d"])`,
`not ARRAY_CONTAINS_ANY(StringArrayField, ["a", "b", "c", "d"])`,
`StringArrayField[0] like "abd%"`,
`+ArrayField[0] == 1`,
`ArrayField[0] % 3 == 1`,
`ArrayField[0] + 3 == 1`,
`ArrayField[0] in [1,2,3]`,
`ArrayField[0] in []`,
`0 < ArrayField[0] < 100`,
`100 > ArrayField[0] > 0`,
`ArrayField[0] > 1`,
`ArrayField[0] == 1`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err, expr)
}
invalidExprs := []string{
`ArrayField == ["abc", "def"]`,
`"abc" < ArrayField[0] < "def"`,
`ArrayField[0] == "def"`,
`ArrayField["0"] == 1`,
`array_contains(ArrayField, "a")`,
`array_contains(StringArrayField, 1)`,
`array_contains_all(StringArrayField, ["abc", 123])`,
`array_contains_any(StringArrayField, ["abc", 123])`,
`StringArrayField like "abd%"`,
`+ArrayField == 1`,
`ArrayField % 3 == 1`,
`ArrayField + 3 == 1`,
`ArrayField in [1,2,3]`,
`ArrayField[0] in [1, "abc",3.3]`,
`ArrayField in []`,
`0 < ArrayField < 100`,
`100 > ArrayField > 0`,
`ArrayField > 1`,
`ArrayField == 1`,
`ArrayField[] == 1`,
`A[] == 1`,
`ArrayField[0] + ArrayField[1] == 1`,
`ArrayField == []`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err, expr)
}
}
func Test_ArrayLength(t *testing.T) {
schema := newTestSchemaHelper(t)
expr := ""
var err error
exprs := []string{
`array_length(ArrayField) == 10`,
`array_length(A) != 10`,
`array_length(StringArrayField) == 1`,
`array_length(B) != 1`,
`not (array_length(C[0]) == 1)`,
`not (array_length(C["D"]) != 1)`,
`array_length(StringArrayField) < 1`,
`array_length(StringArrayField) <= 1`,
`array_length(StringArrayField) > 5`,
`array_length(StringArrayField) >= 5`,
}
for _, expr = range exprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err, expr)
}
invalidExprs := []string{
`array_length(a > b) == 0`,
`array_length(a, b) == 1`,
`array_length(A)`,
`array_length("A") / 10 == 2`,
`array_length(Int64Field) == 2`,
`array_length(a-b) == 2`,
`0 < array_length(a-b) < 2`,
`0 < array_length(StringArrayField) < 1`,
`100 > array_length(ArrayField) > 10`,
`array_length(A) % 10 == 2`,
`array_length(A) / 10 == 2`,
`array_length(A) + 1 == 2`,
`array_length(JSONField) + 1 == 2`,
`array_length(A) == 2.2`,
}
for _, expr = range invalidExprs {
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err, expr)
}
}
func TestConcurrency(t *testing.T) {
schemaHelper := newTestSchemaHelper(t)
wg := sync.WaitGroup{}
wg.Add(10)
for i := 0; i < 10; i++ {
go func() {
defer wg.Done()
for j := 0; j < 10; j++ {
r := handleExpr(schemaHelper, fmt.Sprintf("array_length(ArrayField) == %d", j))
err := getError(r)
assert.NoError(t, err)
}
}()
}
wg.Wait()
}
func BenchmarkPlanCache(b *testing.B) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := handleExpr(schemaHelper, "array_length(ArrayField) == 10")
err := getError(r)
assert.NoError(b, err)
}
}
func BenchmarkNoPlanCache(b *testing.B) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := handleExpr(schemaHelper, fmt.Sprintf("array_length(ArrayField) == %d", i))
err := getError(r)
assert.NoError(b, err)
}
}