feat: adding cache to expression parse (#36185)

See #36122

This PR improves the proxy node performance by adding cache to
expression parse.

---------

Signed-off-by: Ted Xu <ted.xu@zilliz.com>
This commit is contained in:
Ted Xu 2024-09-13 10:03:09 +08:00 committed by GitHub
parent 1833f2d516
commit b9c037f558
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 76 additions and 5 deletions

2
go.mod
View File

@ -61,6 +61,7 @@ require (
github.com/cenkalti/backoff/v4 v4.2.1
github.com/cockroachdb/redact v1.1.3
github.com/greatroar/blobloom v0.0.0-00010101000000-000000000000
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/jolestar/go-commons-pool/v2 v2.1.2
github.com/milvus-io/milvus/pkg v0.0.2-0.20240801085213-a642a26ed4c6
github.com/pkg/errors v0.9.1
@ -89,7 +90,6 @@ require (
github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 // indirect
github.com/apache/thrift v0.18.1 // indirect
github.com/ardielle/ardielle-go v1.5.2 // indirect
github.com/benbjohnson/clock v1.1.0 // indirect
github.com/benesch/cgosymbolizer v0.0.0-20190515212042-bec6fe6e597b // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/sonic/loader v0.2.0 // indirect

5
go.sum
View File

@ -109,7 +109,6 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/aws/aws-sdk-go v1.32.6/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/benesch/cgosymbolizer v0.0.0-20190515212042-bec6fe6e597b h1:5JgaFtHFRnOPReItxvhMDXbvuBkjSWE+9glJyF466yw=
github.com/benesch/cgosymbolizer v0.0.0-20190515212042-bec6fe6e597b/go.mod h1:eMD2XUcPsHYbakFEocKrWZp47G0MRJYoC60qFblGjpA=
@ -462,6 +461,8 @@ github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09
github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
@ -997,8 +998,6 @@ go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
go.uber.org/zap v1.20.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw=
go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=

View File

@ -2,8 +2,10 @@ package planparserv2
import (
"fmt"
"time"
"github.com/antlr/antlr4/runtime/Go/antlr"
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/samber/lo"
"go.uber.org/zap"
@ -14,8 +16,25 @@ import (
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
// exprParseKey is used to cache the parse result. Currently only collectionName is used besides expr string, which implies
// that the same collectionName will have the same schema thus the same parse result. In the future, if there is case that the
// schema changes without changing the collectionName, we need to change the cache key.
type exprParseKey struct {
collectionName string
expr string
}
var exprCache = expirable.NewLRU[exprParseKey, any](256, nil, time.Minute*10)
func handleExpr(schema *typeutil.SchemaHelper, exprStr string) interface{} {
return handleExprWithErrorListener(schema, exprStr, &errorListenerImpl{})
parseKey := exprParseKey{collectionName: schema.GetCollectionName(), expr: exprStr}
val, ok := exprCache.Get(parseKey)
if !ok {
val = handleExprWithErrorListener(schema, exprStr, &errorListenerImpl{})
// Note that the errors will be cached, too.
exprCache.Add(parseKey, val)
}
return val
}
func handleExprWithErrorListener(schema *typeutil.SchemaHelper, exprStr string, errorListener errorListener) interface{} {

View File

@ -1,6 +1,7 @@
package planparserv2
import (
"fmt"
"sync"
"testing"
@ -1289,3 +1290,51 @@ func Test_ArrayLength(t *testing.T) {
assert.Error(t, err, expr)
}
}
func TestConcurrency(t *testing.T) {
schemaHelper := newTestSchemaHelper(t)
wg := sync.WaitGroup{}
wg.Add(10)
for i := 0; i < 10; i++ {
go func() {
defer wg.Done()
for j := 0; j < 10; j++ {
r := handleExpr(schemaHelper, fmt.Sprintf("array_length(ArrayField) == %d", j))
err := getError(r)
assert.NoError(t, err)
}
}()
}
wg.Wait()
}
func BenchmarkPlanCache(b *testing.B) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := handleExpr(schemaHelper, "array_length(ArrayField) == 10")
err := getError(r)
assert.NoError(b, err)
}
}
func BenchmarkNoPlanCache(b *testing.B) {
schema := newTestSchema()
schemaHelper, err := typeutil.CreateSchemaHelper(schema)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := handleExpr(schemaHelper, fmt.Sprintf("array_length(ArrayField) == %d", i))
err := getError(r)
assert.NoError(b, err)
}
}

View File

@ -440,6 +440,10 @@ func (helper *SchemaHelper) GetFunctionByOutputField(field *schemapb.FieldSchema
return nil, fmt.Errorf("function not exist")
}
func (helper *SchemaHelper) GetCollectionName() string {
return helper.schema.Name
}
func IsBinaryVectorType(dataType schemapb.DataType) bool {
return dataType == schemapb.DataType_BinaryVector
}