2022-04-01 18:59:29 +08:00
|
|
|
package proxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2023-04-23 09:00:32 +08:00
|
|
|
"math"
|
2022-04-01 18:59:29 +08:00
|
|
|
"strconv"
|
|
|
|
|
2023-02-26 11:31:49 +08:00
|
|
|
"github.com/cockroachdb/errors"
|
2022-04-01 18:59:29 +08:00
|
|
|
"github.com/golang/protobuf/proto"
|
2023-04-23 09:00:32 +08:00
|
|
|
"github.com/samber/lo"
|
2023-01-12 16:09:39 +08:00
|
|
|
"go.opentelemetry.io/otel"
|
2022-04-01 18:59:29 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-06-09 01:28:37 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/parser/planparserv2"
|
2022-04-01 18:59:29 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/planpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2024-04-09 14:21:18 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/exprutil"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
|
|
"github.com/milvus-io/milvus/pkg/metrics"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
2023-04-23 09:00:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
2024-04-09 14:21:18 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-04-01 18:59:29 +08:00
|
|
|
)
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
const (
|
|
|
|
SearchTaskName = "SearchTask"
|
|
|
|
SearchLevelKey = "level"
|
2023-04-23 09:00:32 +08:00
|
|
|
|
|
|
|
// requeryThreshold is the estimated threshold for the size of the search results.
|
|
|
|
// If the number of estimated search results exceeds this threshold,
|
|
|
|
// a second query request will be initiated to retrieve output fields data.
|
|
|
|
// In this case, the first search will not return any output field from QueryNodes.
|
|
|
|
requeryThreshold = 0.5 * 1024 * 1024
|
2024-02-28 11:24:58 +08:00
|
|
|
radiusKey = "radius"
|
|
|
|
rangeFilterKey = "range_filter"
|
2022-10-08 15:38:58 +08:00
|
|
|
)
|
|
|
|
|
2022-04-01 18:59:29 +08:00
|
|
|
type searchTask struct {
|
|
|
|
Condition
|
2022-04-20 16:15:41 +08:00
|
|
|
ctx context.Context
|
2024-04-09 14:21:18 +08:00
|
|
|
*internalpb.SearchRequest
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2023-04-23 09:00:32 +08:00
|
|
|
result *milvuspb.SearchResults
|
|
|
|
request *milvuspb.SearchRequest
|
|
|
|
|
2024-03-21 11:19:07 +08:00
|
|
|
tr *timerecord.TimeRecorder
|
|
|
|
collectionName string
|
|
|
|
schema *schemaInfo
|
|
|
|
requery bool
|
|
|
|
partitionKeyMode bool
|
|
|
|
enableMaterializedView bool
|
2024-04-19 10:31:20 +08:00
|
|
|
mustUsePartitionKey bool
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2023-05-23 10:19:26 +08:00
|
|
|
userOutputFields []string
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2023-06-13 10:20:37 +08:00
|
|
|
resultBuf *typeutil.ConcurrentSet[*internalpb.SearchResults]
|
2023-04-23 09:00:32 +08:00
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
partitionIDsSet *typeutil.ConcurrentSet[UniqueID]
|
|
|
|
|
2024-01-09 11:38:48 +08:00
|
|
|
qc types.QueryCoordClient
|
|
|
|
node types.ProxyComponent
|
|
|
|
lb LBPolicy
|
|
|
|
queryChannelsTs map[string]Timestamp
|
2024-04-09 14:21:18 +08:00
|
|
|
queryInfos []*planpb.QueryInfo
|
2024-04-10 15:07:17 +08:00
|
|
|
relatedDataSize int64
|
2022-06-21 13:30:12 +08:00
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
reScorers []reScorer
|
|
|
|
rankParams *rankParams
|
2022-06-21 13:30:12 +08:00
|
|
|
}
|
|
|
|
|
2024-02-21 09:52:59 +08:00
|
|
|
func (t *searchTask) CanSkipAllocTimestamp() bool {
|
|
|
|
var consistencyLevel commonpb.ConsistencyLevel
|
|
|
|
useDefaultConsistency := t.request.GetUseDefaultConsistency()
|
|
|
|
if !useDefaultConsistency {
|
|
|
|
consistencyLevel = t.request.GetConsistencyLevel()
|
|
|
|
} else {
|
|
|
|
collID, err := globalMetaCache.GetCollectionID(context.Background(), t.request.GetDbName(), t.request.GetCollectionName())
|
|
|
|
if err != nil { // err is not nil if collection not exists
|
|
|
|
log.Warn("search task get collectionID failed, can't skip alloc timestamp",
|
|
|
|
zap.String("collectionName", t.request.GetCollectionName()), zap.Error(err))
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
collectionInfo, err2 := globalMetaCache.GetCollectionInfo(context.Background(), t.request.GetDbName(), t.request.GetCollectionName(), collID)
|
|
|
|
if err2 != nil {
|
|
|
|
log.Warn("search task get collection info failed, can't skip alloc timestamp",
|
|
|
|
zap.String("collectionName", t.request.GetCollectionName()), zap.Error(err))
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
consistencyLevel = collectionInfo.consistencyLevel
|
|
|
|
}
|
|
|
|
|
|
|
|
return consistencyLevel != commonpb.ConsistencyLevel_Strong
|
|
|
|
}
|
|
|
|
|
2022-06-21 13:30:12 +08:00
|
|
|
func (t *searchTask) PreExecute(ctx context.Context) error {
|
2023-01-12 16:09:39 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-PreExecute")
|
|
|
|
defer sp.End()
|
2024-04-09 14:21:18 +08:00
|
|
|
t.SearchRequest.IsAdvanced = len(t.request.GetSubReqs()) > 0
|
2022-06-21 13:30:12 +08:00
|
|
|
t.Base.MsgType = commonpb.MsgType_Search
|
2022-11-04 14:25:38 +08:00
|
|
|
t.Base.SourceID = paramtable.GetNodeID()
|
2022-06-21 13:30:12 +08:00
|
|
|
|
|
|
|
collectionName := t.request.CollectionName
|
|
|
|
t.collectionName = collectionName
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, t.request.GetDbName(), collectionName)
|
2022-06-21 13:30:12 +08:00
|
|
|
if err != nil { // err is not nil if collection not exists
|
2024-07-01 14:56:12 +08:00
|
|
|
return merr.WrapErrAsInputErrorWhen(err, merr.ErrCollectionNotFound, merr.ErrDatabaseNotFound)
|
2022-06-21 13:30:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
t.SearchRequest.DbID = 0 // todo
|
|
|
|
t.SearchRequest.CollectionID = collID
|
2024-02-01 16:03:03 +08:00
|
|
|
log := log.Ctx(ctx).With(zap.Int64("collID", collID), zap.String("collName", collectionName))
|
2023-08-11 10:21:29 +08:00
|
|
|
t.schema, err = globalMetaCache.GetCollectionSchema(ctx, t.request.GetDbName(), collectionName)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("get collection schema failed", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
2022-06-21 13:30:12 +08:00
|
|
|
|
2024-02-01 16:03:03 +08:00
|
|
|
t.partitionKeyMode, err = isPartitionKeyMode(ctx, t.request.GetDbName(), collectionName)
|
2022-06-21 13:30:12 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("is partition key mode failed", zap.Error(err))
|
2022-06-21 13:30:12 +08:00
|
|
|
return err
|
|
|
|
}
|
2024-02-01 16:03:03 +08:00
|
|
|
if t.partitionKeyMode && len(t.request.GetPartitionNames()) != 0 {
|
2023-06-06 10:24:34 +08:00
|
|
|
return errors.New("not support manually specifying the partition names if partition key mode is used")
|
|
|
|
}
|
2024-04-19 10:31:20 +08:00
|
|
|
if t.mustUsePartitionKey && !t.partitionKeyMode {
|
2024-07-01 14:56:12 +08:00
|
|
|
return merr.WrapErrAsInputError(merr.WrapErrParameterInvalidMsg("must use partition key in the search request " +
|
|
|
|
"because the mustUsePartitionKey config is true"))
|
2024-04-19 10:31:20 +08:00
|
|
|
}
|
2022-06-21 13:30:12 +08:00
|
|
|
|
2024-02-02 16:47:13 +08:00
|
|
|
if !t.partitionKeyMode && len(t.request.GetPartitionNames()) > 0 {
|
|
|
|
// translate partition name to partition ids. Use regex-pattern to match partition name.
|
2024-04-09 14:21:18 +08:00
|
|
|
t.SearchRequest.PartitionIDs, err = getPartitionIDs(ctx, t.request.GetDbName(), collectionName, t.request.GetPartitionNames())
|
2024-02-02 16:47:13 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get partition ids", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-23 10:19:26 +08:00
|
|
|
t.request.OutputFields, t.userOutputFields, err = translateOutputFields(t.request.OutputFields, t.schema, false)
|
2022-06-21 13:30:12 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("translate output fields failed", zap.Error(err))
|
2022-06-21 13:30:12 +08:00
|
|
|
return err
|
|
|
|
}
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Debug("translate output fields",
|
2022-06-21 13:30:12 +08:00
|
|
|
zap.Strings("output fields", t.request.GetOutputFields()))
|
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
if t.SearchRequest.GetIsAdvanced() {
|
|
|
|
if len(t.request.GetSubReqs()) > defaultMaxSearchRequest {
|
|
|
|
return errors.New(fmt.Sprintf("maximum of ann search requests is %d", defaultMaxSearchRequest))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if t.SearchRequest.GetIsAdvanced() {
|
|
|
|
t.rankParams, err = parseRankParams(t.request.GetSearchParams())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Manually update nq if not set.
|
|
|
|
nq, err := t.checkNq(ctx)
|
|
|
|
if err != nil {
|
|
|
|
log.Info("failed to check nq", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.SearchRequest.Nq = nq
|
|
|
|
|
|
|
|
var ignoreGrowing bool
|
|
|
|
// parse common search params
|
|
|
|
for i, kv := range t.request.GetSearchParams() {
|
|
|
|
if kv.GetKey() == IgnoreGrowingKey {
|
|
|
|
ignoreGrowing, err = strconv.ParseBool(kv.GetValue())
|
|
|
|
if err != nil {
|
|
|
|
return errors.New("parse search growing failed")
|
|
|
|
}
|
|
|
|
t.request.SearchParams = append(t.request.GetSearchParams()[:i], t.request.GetSearchParams()[i+1:]...)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
t.SearchRequest.IgnoreGrowing = ignoreGrowing
|
|
|
|
|
|
|
|
outputFieldIDs, err := getOutputFieldIDs(t.schema, t.request.GetOutputFields())
|
|
|
|
if err != nil {
|
|
|
|
log.Info("fail to get output field ids", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.SearchRequest.OutputFieldsId = outputFieldIDs
|
|
|
|
|
|
|
|
// Currently, we get vectors by requery. Once we support getting vectors from search,
|
|
|
|
// searches with small result size could no longer need requery.
|
|
|
|
vectorOutputFields := lo.Filter(t.schema.GetFields(), func(field *schemapb.FieldSchema, _ int) bool {
|
|
|
|
return lo.Contains(t.request.GetOutputFields(), field.GetName()) && typeutil.IsVectorType(field.GetDataType())
|
|
|
|
})
|
|
|
|
|
|
|
|
if t.SearchRequest.GetIsAdvanced() {
|
|
|
|
t.requery = len(t.request.OutputFields) > 0
|
|
|
|
err = t.initAdvancedSearchRequest(ctx)
|
|
|
|
} else {
|
|
|
|
t.requery = len(vectorOutputFields) > 0
|
|
|
|
err = t.initSearchRequest(ctx)
|
|
|
|
}
|
2023-04-23 09:00:32 +08:00
|
|
|
if err != nil {
|
2024-02-01 16:03:03 +08:00
|
|
|
log.Debug("init search request failed", zap.Error(err))
|
2023-06-06 10:24:34 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-08-01 17:33:06 +08:00
|
|
|
collectionInfo, err2 := globalMetaCache.GetCollectionInfo(ctx, t.request.GetDbName(), collectionName, t.CollectionID)
|
2023-05-30 21:01:29 +08:00
|
|
|
if err2 != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("Proxy::searchTask::PreExecute failed to GetCollectionInfo from cache",
|
2023-08-01 17:33:06 +08:00
|
|
|
zap.String("collectionName", collectionName), zap.Int64("collectionID", t.CollectionID), zap.Error(err2))
|
2023-05-30 21:01:29 +08:00
|
|
|
return err2
|
|
|
|
}
|
2022-05-24 12:05:59 +08:00
|
|
|
guaranteeTs := t.request.GetGuaranteeTimestamp()
|
2023-05-30 21:01:29 +08:00
|
|
|
var consistencyLevel commonpb.ConsistencyLevel
|
|
|
|
useDefaultConsistency := t.request.GetUseDefaultConsistency()
|
|
|
|
if useDefaultConsistency {
|
|
|
|
consistencyLevel = collectionInfo.consistencyLevel
|
|
|
|
guaranteeTs = parseGuaranteeTsFromConsistency(guaranteeTs, t.BeginTs(), consistencyLevel)
|
|
|
|
} else {
|
|
|
|
consistencyLevel = t.request.GetConsistencyLevel()
|
2023-06-07 10:38:36 +08:00
|
|
|
// Compatibility logic, parse guarantee timestamp
|
2023-05-30 21:01:29 +08:00
|
|
|
if consistencyLevel == 0 && guaranteeTs > 0 {
|
|
|
|
guaranteeTs = parseGuaranteeTs(guaranteeTs, t.BeginTs())
|
|
|
|
} else {
|
|
|
|
// parse from guarantee timestamp and user input consistency level
|
|
|
|
guaranteeTs = parseGuaranteeTsFromConsistency(guaranteeTs, t.BeginTs(), consistencyLevel)
|
|
|
|
}
|
|
|
|
}
|
2022-05-24 12:05:59 +08:00
|
|
|
t.SearchRequest.GuaranteeTimestamp = guaranteeTs
|
2024-04-09 14:21:18 +08:00
|
|
|
t.SearchRequest.ConsistencyLevel = consistencyLevel
|
|
|
|
|
|
|
|
if deadline, ok := t.TraceCtx().Deadline(); ok {
|
|
|
|
t.SearchRequest.TimeoutTimestamp = tsoutil.ComposeTSByTime(deadline, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set username of this search request for feature like task scheduling.
|
|
|
|
if username, _ := GetCurUserFromContext(ctx); username != "" {
|
|
|
|
t.SearchRequest.Username = username
|
|
|
|
}
|
|
|
|
|
|
|
|
t.resultBuf = typeutil.NewConcurrentSet[*internalpb.SearchResults]()
|
2022-05-24 12:05:59 +08:00
|
|
|
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Debug("search PreExecute done.",
|
2023-08-10 18:53:17 +08:00
|
|
|
zap.Uint64("guarantee_ts", guaranteeTs),
|
2023-05-30 21:01:29 +08:00
|
|
|
zap.Bool("use_default_consistency", useDefaultConsistency),
|
|
|
|
zap.Any("consistency level", consistencyLevel),
|
2022-06-21 13:30:12 +08:00
|
|
|
zap.Uint64("timeout_ts", t.SearchRequest.GetTimeoutTimestamp()))
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
func (t *searchTask) checkNq(ctx context.Context) (int64, error) {
|
|
|
|
var nq int64
|
|
|
|
if t.SearchRequest.GetIsAdvanced() {
|
|
|
|
// In the context of Advanced Search, it is essential to verify that the number of vectors
|
|
|
|
// for each individual search, denoted as nq, remains consistent.
|
|
|
|
nq = t.request.GetNq()
|
|
|
|
for _, req := range t.request.GetSubReqs() {
|
|
|
|
subNq, err := getNqFromSubSearch(req)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
req.Nq = subNq
|
|
|
|
if nq == 0 {
|
|
|
|
nq = subNq
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if subNq != nq {
|
|
|
|
err = merr.WrapErrParameterInvalid(nq, subNq, "sub search request nq should be the same")
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
t.request.Nq = nq
|
|
|
|
} else {
|
|
|
|
var err error
|
|
|
|
nq, err = getNq(t.request)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
t.request.Nq = nq
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if nq is valid:
|
|
|
|
// https://milvus.io/docs/limitations.md
|
|
|
|
if err := validateNQLimit(nq); err != nil {
|
|
|
|
return 0, fmt.Errorf("%s [%d] is invalid, %w", NQKey, nq, err)
|
|
|
|
}
|
|
|
|
return nq, nil
|
|
|
|
}
|
|
|
|
|
2024-04-11 15:21:19 +08:00
|
|
|
func setQueryInfoIfMvEnable(queryInfo *planpb.QueryInfo, t *searchTask) error {
|
|
|
|
if t.enableMaterializedView {
|
|
|
|
partitionKeyFieldSchema, err := typeutil.GetPartitionKeyFieldSchema(t.schema.CollectionSchema)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get partition key field schema", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if typeutil.IsFieldDataTypeSupportMaterializedView(partitionKeyFieldSchema) {
|
|
|
|
queryInfo.MaterializedViewInvolved = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
func (t *searchTask) initAdvancedSearchRequest(ctx context.Context) error {
|
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "init advanced search request")
|
|
|
|
defer sp.End()
|
|
|
|
|
|
|
|
t.partitionIDsSet = typeutil.NewConcurrentSet[UniqueID]()
|
|
|
|
|
|
|
|
log := log.Ctx(ctx).With(zap.Int64("collID", t.GetCollectionID()), zap.String("collName", t.collectionName))
|
|
|
|
// fetch search_growing from search param
|
|
|
|
t.SearchRequest.SubReqs = make([]*internalpb.SubSearchRequest, len(t.request.GetSubReqs()))
|
|
|
|
t.queryInfos = make([]*planpb.QueryInfo, len(t.request.GetSubReqs()))
|
|
|
|
for index, subReq := range t.request.GetSubReqs() {
|
|
|
|
plan, queryInfo, offset, err := t.tryGeneratePlan(subReq.GetSearchParams(), subReq.GetDsl(), true)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if queryInfo.GetGroupByFieldId() != -1 {
|
|
|
|
return errors.New("not support search_group_by operation in the hybrid search")
|
|
|
|
}
|
|
|
|
internalSubReq := &internalpb.SubSearchRequest{
|
|
|
|
Dsl: subReq.GetDsl(),
|
|
|
|
PlaceholderGroup: subReq.GetPlaceholderGroup(),
|
|
|
|
DslType: subReq.GetDslType(),
|
|
|
|
SerializedExprPlan: nil,
|
|
|
|
Nq: subReq.GetNq(),
|
|
|
|
PartitionIDs: nil,
|
|
|
|
Topk: queryInfo.GetTopk(),
|
|
|
|
Offset: offset,
|
2024-04-12 16:19:18 +08:00
|
|
|
MetricType: queryInfo.GetMetricType(),
|
2024-04-09 14:21:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// set PartitionIDs for sub search
|
|
|
|
if t.partitionKeyMode {
|
|
|
|
partitionIDs, err2 := t.tryParsePartitionIDsFromPlan(plan)
|
|
|
|
if err2 != nil {
|
|
|
|
return err2
|
|
|
|
}
|
|
|
|
if len(partitionIDs) > 0 {
|
|
|
|
internalSubReq.PartitionIDs = partitionIDs
|
|
|
|
t.partitionIDsSet.Upsert(partitionIDs...)
|
2024-04-11 15:21:19 +08:00
|
|
|
setQueryInfoIfMvEnable(queryInfo, t)
|
2024-04-09 14:21:18 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
internalSubReq.PartitionIDs = t.SearchRequest.GetPartitionIDs()
|
|
|
|
}
|
|
|
|
|
|
|
|
if t.requery {
|
|
|
|
plan.OutputFieldIds = nil
|
|
|
|
} else {
|
|
|
|
plan.OutputFieldIds = t.SearchRequest.OutputFieldsId
|
|
|
|
}
|
|
|
|
|
|
|
|
internalSubReq.SerializedExprPlan, err = proto.Marshal(plan)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.SearchRequest.SubReqs[index] = internalSubReq
|
|
|
|
t.queryInfos[index] = queryInfo
|
|
|
|
log.Debug("proxy init search request",
|
|
|
|
zap.Int64s("plan.OutputFieldIds", plan.GetOutputFieldIds()),
|
|
|
|
zap.Stringer("plan", plan)) // may be very large if large term passed.
|
|
|
|
}
|
|
|
|
// used for requery
|
|
|
|
if t.partitionKeyMode {
|
|
|
|
t.SearchRequest.PartitionIDs = t.partitionIDsSet.Collect()
|
|
|
|
}
|
|
|
|
var err error
|
|
|
|
t.reScorers, err = NewReScorers(len(t.request.GetSubReqs()), t.request.GetSearchParams())
|
|
|
|
if err != nil {
|
|
|
|
log.Info("generate reScorer failed", zap.Any("params", t.request.GetSearchParams()), zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) initSearchRequest(ctx context.Context) error {
|
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "init search request")
|
|
|
|
defer sp.End()
|
|
|
|
|
|
|
|
log := log.Ctx(ctx).With(zap.Int64("collID", t.GetCollectionID()), zap.String("collName", t.collectionName))
|
|
|
|
// fetch search_growing from search param
|
|
|
|
|
|
|
|
plan, queryInfo, offset, err := t.tryGeneratePlan(t.request.GetSearchParams(), t.request.GetDsl(), false)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
t.SearchRequest.Offset = offset
|
|
|
|
|
|
|
|
if t.partitionKeyMode {
|
|
|
|
partitionIDs, err2 := t.tryParsePartitionIDsFromPlan(plan)
|
|
|
|
if err2 != nil {
|
|
|
|
return err2
|
|
|
|
}
|
|
|
|
if len(partitionIDs) > 0 {
|
|
|
|
t.SearchRequest.PartitionIDs = partitionIDs
|
2024-04-11 15:21:19 +08:00
|
|
|
setQueryInfoIfMvEnable(queryInfo, t)
|
2024-04-09 14:21:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if t.requery {
|
|
|
|
plan.OutputFieldIds = nil
|
|
|
|
} else {
|
|
|
|
plan.OutputFieldIds = t.SearchRequest.OutputFieldsId
|
|
|
|
}
|
|
|
|
|
|
|
|
t.SearchRequest.SerializedExprPlan, err = proto.Marshal(plan)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
t.SearchRequest.PlaceholderGroup = t.request.PlaceholderGroup
|
|
|
|
t.SearchRequest.Topk = queryInfo.GetTopk()
|
|
|
|
t.SearchRequest.MetricType = queryInfo.GetMetricType()
|
|
|
|
t.queryInfos = append(t.queryInfos, queryInfo)
|
|
|
|
t.SearchRequest.DslType = commonpb.DslType_BoolExprV1
|
|
|
|
log.Debug("proxy init search request",
|
|
|
|
zap.Int64s("plan.OutputFieldIds", plan.GetOutputFieldIds()),
|
|
|
|
zap.Stringer("plan", plan)) // may be very large if large term passed.
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) tryGeneratePlan(params []*commonpb.KeyValuePair, dsl string, ignoreOffset bool) (*planpb.PlanNode, *planpb.QueryInfo, int64, error) {
|
|
|
|
annsFieldName, err := funcutil.GetAttrByKeyFromRepeatedKV(AnnsFieldKey, params)
|
|
|
|
if err != nil || len(annsFieldName) == 0 {
|
|
|
|
vecFields := typeutil.GetVectorFieldSchemas(t.schema.CollectionSchema)
|
|
|
|
if len(vecFields) == 0 {
|
|
|
|
return nil, nil, 0, errors.New(AnnsFieldKey + " not found in schema")
|
|
|
|
}
|
|
|
|
|
|
|
|
if enableMultipleVectorFields && len(vecFields) > 1 {
|
|
|
|
return nil, nil, 0, errors.New("multiple anns_fields exist, please specify a anns_field in search_params")
|
|
|
|
}
|
|
|
|
annsFieldName = vecFields[0].Name
|
|
|
|
}
|
|
|
|
queryInfo, offset, parseErr := parseSearchInfo(params, t.schema.CollectionSchema, ignoreOffset)
|
|
|
|
if parseErr != nil {
|
|
|
|
return nil, nil, 0, parseErr
|
|
|
|
}
|
|
|
|
annField := typeutil.GetFieldByName(t.schema.CollectionSchema, annsFieldName)
|
|
|
|
if queryInfo.GetGroupByFieldId() != -1 && annField.GetDataType() == schemapb.DataType_BinaryVector {
|
|
|
|
return nil, nil, 0, errors.New("not support search_group_by operation based on binary vector column")
|
|
|
|
}
|
|
|
|
plan, planErr := planparserv2.CreateSearchPlan(t.schema.schemaHelper, dsl, annsFieldName, queryInfo)
|
|
|
|
if planErr != nil {
|
|
|
|
log.Warn("failed to create query plan", zap.Error(planErr),
|
|
|
|
zap.String("dsl", dsl), // may be very large if large term passed.
|
|
|
|
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
|
|
|
|
return nil, nil, 0, merr.WrapErrParameterInvalidMsg("failed to create query plan: %v", planErr)
|
|
|
|
}
|
|
|
|
log.Debug("create query plan",
|
|
|
|
zap.String("dsl", t.request.Dsl), // may be very large if large term passed.
|
|
|
|
zap.String("anns field", annsFieldName), zap.Any("query info", queryInfo))
|
|
|
|
return plan, queryInfo, offset, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) tryParsePartitionIDsFromPlan(plan *planpb.PlanNode) ([]int64, error) {
|
|
|
|
expr, err := exprutil.ParseExprFromPlan(plan)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to parse expr", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
partitionKeys := exprutil.ParseKeys(expr, exprutil.PartitionKey)
|
|
|
|
hashedPartitionNames, err := assignPartitionKeys(t.ctx, t.request.GetDbName(), t.collectionName, partitionKeys)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to assign partition keys", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(hashedPartitionNames) > 0 {
|
|
|
|
// translate partition name to partition ids. Use regex-pattern to match partition name.
|
|
|
|
PartitionIDs, err2 := getPartitionIDs(t.ctx, t.request.GetDbName(), t.collectionName, hashedPartitionNames)
|
|
|
|
if err2 != nil {
|
|
|
|
log.Warn("failed to get partition ids", zap.Error(err2))
|
|
|
|
return nil, err2
|
|
|
|
}
|
|
|
|
return PartitionIDs, nil
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) Execute(ctx context.Context) error {
|
2023-01-12 16:09:39 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-Execute")
|
|
|
|
defer sp.End()
|
2023-11-30 10:42:27 +08:00
|
|
|
log := log.Ctx(ctx).With(zap.Int64("nq", t.SearchRequest.GetNq()))
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
tr := timerecord.NewTimeRecorder(fmt.Sprintf("proxy execute search %d", t.ID()))
|
2022-08-23 10:44:52 +08:00
|
|
|
defer tr.CtxElapse(ctx, "done")
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2023-06-13 10:20:37 +08:00
|
|
|
err := t.lb.Execute(ctx, CollectionWorkLoad{
|
2023-08-01 17:33:06 +08:00
|
|
|
db: t.request.GetDbName(),
|
|
|
|
collectionID: t.SearchRequest.CollectionID,
|
|
|
|
collectionName: t.collectionName,
|
|
|
|
nq: t.Nq,
|
|
|
|
exec: t.searchShard,
|
2023-06-13 10:20:37 +08:00
|
|
|
})
|
2022-04-20 16:15:41 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("search execute failed", zap.Error(err))
|
2023-09-04 09:57:09 +08:00
|
|
|
return errors.Wrap(err, "failed to search")
|
2022-04-20 16:15:41 +08:00
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2023-04-03 10:54:23 +08:00
|
|
|
log.Debug("Search Execute done.",
|
|
|
|
zap.Int64("collection", t.GetCollectionID()),
|
|
|
|
zap.Int64s("partitionIDs", t.GetPartitionIDs()))
|
2022-04-01 18:59:29 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
func (t *searchTask) reduceResults(ctx context.Context, toReduceResults []*internalpb.SearchResults, nq, topK int64, offset int64, queryInfo *planpb.QueryInfo) (*milvuspb.SearchResults, error) {
|
|
|
|
metricType := ""
|
|
|
|
if len(toReduceResults) >= 1 {
|
|
|
|
metricType = toReduceResults[0].GetMetricType()
|
|
|
|
}
|
|
|
|
|
2024-05-07 13:03:29 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "reduceResults")
|
|
|
|
defer sp.End()
|
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
// Decode all search results
|
|
|
|
validSearchResults, err := decodeSearchResults(ctx, toReduceResults)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to decode search results", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(validSearchResults) <= 0 {
|
|
|
|
return fillInEmptyResult(nq), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reduce all search results
|
|
|
|
log.Debug("proxy search post execute reduce",
|
|
|
|
zap.Int64("collection", t.GetCollectionID()),
|
|
|
|
zap.Int64s("partitionIDs", t.GetPartitionIDs()),
|
|
|
|
zap.Int("number of valid search results", len(validSearchResults)))
|
|
|
|
primaryFieldSchema, err := t.schema.GetPkField()
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get primary field schema", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var result *milvuspb.SearchResults
|
|
|
|
result, err = reduceSearchResult(ctx, NewReduceSearchResultInfo(validSearchResults, nq, topK,
|
|
|
|
metricType, primaryFieldSchema.DataType, offset, queryInfo))
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to reduce search results", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) PostExecute(ctx context.Context) error {
|
2023-01-12 16:09:39 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Search-PostExecute")
|
|
|
|
defer sp.End()
|
2022-09-14 20:36:32 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
tr := timerecord.NewTimeRecorder("searchTask PostExecute")
|
|
|
|
defer func() {
|
2022-08-23 10:44:52 +08:00
|
|
|
tr.CtxElapse(ctx, "done")
|
2022-04-20 16:15:41 +08:00
|
|
|
}()
|
2023-11-30 10:42:27 +08:00
|
|
|
log := log.Ctx(ctx).With(zap.Int64("nq", t.SearchRequest.GetNq()))
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2023-06-13 10:20:37 +08:00
|
|
|
toReduceResults, err := t.collectSearchResults(ctx)
|
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("failed to collect search results", zap.Error(err))
|
2022-09-14 20:36:32 +08:00
|
|
|
return err
|
2022-07-06 15:06:21 +08:00
|
|
|
}
|
2022-09-14 20:36:32 +08:00
|
|
|
|
2024-01-09 11:38:48 +08:00
|
|
|
t.queryChannelsTs = make(map[string]uint64)
|
2024-04-10 15:07:17 +08:00
|
|
|
t.relatedDataSize = 0
|
2024-01-09 11:38:48 +08:00
|
|
|
for _, r := range toReduceResults {
|
2024-04-10 15:07:17 +08:00
|
|
|
t.relatedDataSize += r.GetCostAggregation().GetTotalRelatedDataSize()
|
2024-01-09 11:38:48 +08:00
|
|
|
for ch, ts := range r.GetChannelsMvcc() {
|
|
|
|
t.queryChannelsTs[ch] = ts
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-04 17:28:46 +08:00
|
|
|
primaryFieldSchema, err := t.schema.GetPkField()
|
2022-04-20 16:15:41 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("failed to get primary field schema", zap.Error(err))
|
2022-04-01 18:59:29 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-09-14 20:36:32 +08:00
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
if t.SearchRequest.GetIsAdvanced() {
|
|
|
|
multipleInternalResults := make([][]*internalpb.SearchResults, len(t.SearchRequest.GetSubReqs()))
|
|
|
|
for _, searchResult := range toReduceResults {
|
|
|
|
// if get a non-advanced result, skip all
|
|
|
|
if !searchResult.GetIsAdvanced() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, subResult := range searchResult.GetSubResults() {
|
|
|
|
// swallow copy
|
|
|
|
internalResults := &internalpb.SearchResults{
|
|
|
|
MetricType: subResult.GetMetricType(),
|
|
|
|
NumQueries: subResult.GetNumQueries(),
|
|
|
|
TopK: subResult.GetTopK(),
|
|
|
|
SlicedBlob: subResult.GetSlicedBlob(),
|
|
|
|
SlicedNumCount: subResult.GetSlicedNumCount(),
|
|
|
|
SlicedOffset: subResult.GetSlicedOffset(),
|
|
|
|
IsAdvanced: false,
|
|
|
|
}
|
|
|
|
reqIndex := subResult.GetReqIndex()
|
|
|
|
multipleInternalResults[reqIndex] = append(multipleInternalResults[reqIndex], internalResults)
|
|
|
|
}
|
|
|
|
}
|
2022-04-29 13:35:49 +08:00
|
|
|
|
2024-04-09 14:21:18 +08:00
|
|
|
multipleMilvusResults := make([]*milvuspb.SearchResults, len(t.SearchRequest.GetSubReqs()))
|
|
|
|
for index, internalResults := range multipleInternalResults {
|
|
|
|
subReq := t.SearchRequest.GetSubReqs()[index]
|
|
|
|
|
|
|
|
metricType := ""
|
|
|
|
if len(internalResults) >= 1 {
|
|
|
|
metricType = internalResults[0].GetMetricType()
|
|
|
|
}
|
|
|
|
result, err := t.reduceResults(t.ctx, internalResults, subReq.GetNq(), subReq.GetTopk(), subReq.GetOffset(), t.queryInfos[index])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.reScorers[index].setMetricType(metricType)
|
|
|
|
t.reScorers[index].reScore(result)
|
|
|
|
multipleMilvusResults[index] = result
|
|
|
|
}
|
|
|
|
t.result, err = rankSearchResultData(ctx, t.SearchRequest.GetNq(),
|
|
|
|
t.rankParams,
|
|
|
|
primaryFieldSchema.GetDataType(),
|
|
|
|
multipleMilvusResults)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("rank search result failed", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
t.result, err = t.reduceResults(t.ctx, toReduceResults, t.SearchRequest.Nq, t.SearchRequest.GetTopk(), t.SearchRequest.GetOffset(), t.queryInfos[0])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2022-09-14 20:36:32 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
t.result.CollectionName = t.collectionName
|
2022-09-14 20:36:32 +08:00
|
|
|
t.fillInFieldInfo()
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2023-04-23 09:00:32 +08:00
|
|
|
if t.requery {
|
|
|
|
err = t.Requery()
|
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("failed to requery", zap.Error(err))
|
2023-04-23 09:00:32 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2023-05-25 11:39:26 +08:00
|
|
|
t.result.Results.OutputFields = t.userOutputFields
|
2024-04-09 14:21:18 +08:00
|
|
|
t.result.CollectionName = t.request.GetCollectionName()
|
|
|
|
|
|
|
|
metrics.ProxyReduceResultLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), metrics.SearchLabel).Observe(float64(tr.RecordSpan().Milliseconds()))
|
2023-04-23 09:00:32 +08:00
|
|
|
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Debug("Search post execute done",
|
2023-04-03 10:54:23 +08:00
|
|
|
zap.Int64("collection", t.GetCollectionID()),
|
|
|
|
zap.Int64s("partitionIDs", t.GetPartitionIDs()))
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-09 11:38:48 +08:00
|
|
|
func (t *searchTask) searchShard(ctx context.Context, nodeID int64, qn types.QueryNodeClient, channel string) error {
|
2022-12-04 20:05:17 +08:00
|
|
|
searchReq := typeutil.Clone(t.SearchRequest)
|
|
|
|
searchReq.GetBase().TargetID = nodeID
|
2022-07-06 15:06:21 +08:00
|
|
|
req := &querypb.SearchRequest{
|
2023-03-24 15:21:59 +08:00
|
|
|
Req: searchReq,
|
2024-01-09 11:38:48 +08:00
|
|
|
DmlChannels: []string{channel},
|
2023-03-24 15:21:59 +08:00
|
|
|
Scope: querypb.DataScope_All,
|
2024-01-09 11:38:48 +08:00
|
|
|
TotalChannelNum: int32(1),
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
2023-01-10 21:09:38 +08:00
|
|
|
|
2023-04-03 10:54:23 +08:00
|
|
|
log := log.Ctx(ctx).With(zap.Int64("collection", t.GetCollectionID()),
|
|
|
|
zap.Int64s("partitionIDs", t.GetPartitionIDs()),
|
|
|
|
zap.Int64("nodeID", nodeID),
|
2024-01-09 11:38:48 +08:00
|
|
|
zap.String("channel", channel))
|
2023-04-03 10:54:23 +08:00
|
|
|
|
2023-01-10 21:09:38 +08:00
|
|
|
var result *internalpb.SearchResults
|
|
|
|
var err error
|
|
|
|
|
2023-04-11 11:28:31 +08:00
|
|
|
result, err = qn.Search(ctx, req)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
2023-04-03 10:54:23 +08:00
|
|
|
log.Warn("QueryNode search return error", zap.Error(err))
|
2024-03-19 18:03:07 +08:00
|
|
|
globalMetaCache.DeprecateShardCache(t.request.GetDbName(), t.collectionName)
|
2022-04-20 16:15:41 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-07-06 15:06:21 +08:00
|
|
|
if result.GetStatus().GetErrorCode() == commonpb.ErrorCode_NotShardLeader {
|
2023-04-03 10:54:23 +08:00
|
|
|
log.Warn("QueryNode is not shardLeader")
|
2024-03-19 18:03:07 +08:00
|
|
|
globalMetaCache.DeprecateShardCache(t.request.GetDbName(), t.collectionName)
|
2022-07-06 15:06:21 +08:00
|
|
|
return errInvalidShardLeaders
|
|
|
|
}
|
|
|
|
if result.GetStatus().GetErrorCode() != commonpb.ErrorCode_Success {
|
2023-04-03 10:54:23 +08:00
|
|
|
log.Warn("QueryNode search result error",
|
2022-07-06 15:06:21 +08:00
|
|
|
zap.String("reason", result.GetStatus().GetReason()))
|
2023-11-30 18:34:32 +08:00
|
|
|
return errors.Wrapf(merr.Error(result.GetStatus()), "fail to search on QueryNode %d", nodeID)
|
2022-07-06 15:06:21 +08:00
|
|
|
}
|
2024-04-09 14:21:18 +08:00
|
|
|
if t.resultBuf != nil {
|
|
|
|
t.resultBuf.Insert(result)
|
|
|
|
}
|
2023-06-16 18:38:39 +08:00
|
|
|
t.lb.UpdateCostMetrics(nodeID, result.CostAggregation)
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
return nil
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2023-04-23 09:00:32 +08:00
|
|
|
func (t *searchTask) estimateResultSize(nq int64, topK int64) (int64, error) {
|
|
|
|
vectorOutputFields := lo.Filter(t.schema.GetFields(), func(field *schemapb.FieldSchema, _ int) bool {
|
|
|
|
return lo.Contains(t.request.GetOutputFields(), field.GetName()) && typeutil.IsVectorType(field.GetDataType())
|
|
|
|
})
|
|
|
|
// Currently, we get vectors by requery. Once we support getting vectors from search,
|
|
|
|
// searches with small result size could no longer need requery.
|
|
|
|
if len(vectorOutputFields) > 0 {
|
|
|
|
return math.MaxInt64, nil
|
|
|
|
}
|
|
|
|
// If no vector field as output, no need to requery.
|
|
|
|
return 0, nil
|
|
|
|
|
|
|
|
//outputFields := lo.Filter(t.schema.GetFields(), func(field *schemapb.FieldSchema, _ int) bool {
|
|
|
|
// return lo.Contains(t.request.GetOutputFields(), field.GetName())
|
|
|
|
//})
|
|
|
|
//sizePerRecord, err := typeutil.EstimateSizePerRecord(&schemapb.CollectionSchema{Fields: outputFields})
|
|
|
|
//if err != nil {
|
|
|
|
// return 0, err
|
|
|
|
//}
|
|
|
|
//return int64(sizePerRecord) * nq * topK, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) Requery() error {
|
|
|
|
queryReq := &milvuspb.QueryRequest{
|
|
|
|
Base: &commonpb.MsgBase{
|
2024-02-21 09:52:59 +08:00
|
|
|
MsgType: commonpb.MsgType_Retrieve,
|
|
|
|
Timestamp: t.BeginTs(),
|
2023-04-23 09:00:32 +08:00
|
|
|
},
|
2024-04-09 14:21:18 +08:00
|
|
|
DbName: t.request.GetDbName(),
|
|
|
|
CollectionName: t.request.GetCollectionName(),
|
|
|
|
ConsistencyLevel: t.SearchRequest.GetConsistencyLevel(),
|
|
|
|
NotReturnAllMeta: t.request.GetNotReturnAllMeta(),
|
|
|
|
Expr: "",
|
|
|
|
OutputFields: t.request.GetOutputFields(),
|
|
|
|
PartitionNames: t.request.GetPartitionNames(),
|
|
|
|
UseDefaultConsistency: false,
|
|
|
|
GuaranteeTimestamp: t.SearchRequest.GuaranteeTimestamp,
|
|
|
|
}
|
2024-01-25 11:05:07 +08:00
|
|
|
return doRequery(t.ctx, t.GetCollectionID(), t.node, t.schema.CollectionSchema, queryReq, t.result, t.queryChannelsTs, t.GetPartitionIDs())
|
2024-01-08 15:34:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) fillInFieldInfo() {
|
|
|
|
if len(t.request.OutputFields) != 0 && len(t.result.Results.FieldsData) != 0 {
|
|
|
|
for i, name := range t.request.OutputFields {
|
|
|
|
for _, field := range t.schema.Fields {
|
|
|
|
if t.result.Results.FieldsData[i] != nil && field.Name == name {
|
|
|
|
t.result.Results.FieldsData[i].FieldName = field.Name
|
|
|
|
t.result.Results.FieldsData[i].FieldId = field.FieldID
|
|
|
|
t.result.Results.FieldsData[i].Type = field.DataType
|
|
|
|
t.result.Results.FieldsData[i].IsDynamic = field.IsDynamic
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *searchTask) collectSearchResults(ctx context.Context) ([]*internalpb.SearchResults, error) {
|
|
|
|
select {
|
|
|
|
case <-t.TraceCtx().Done():
|
|
|
|
log.Ctx(ctx).Warn("search task wait to finish timeout!")
|
|
|
|
return nil, fmt.Errorf("search task wait to finish timeout, msgID=%d", t.ID())
|
|
|
|
default:
|
|
|
|
toReduceResults := make([]*internalpb.SearchResults, 0)
|
|
|
|
log.Ctx(ctx).Debug("all searches are finished or canceled")
|
|
|
|
t.resultBuf.Range(func(res *internalpb.SearchResults) bool {
|
|
|
|
toReduceResults = append(toReduceResults, res)
|
|
|
|
log.Ctx(ctx).Debug("proxy receives one search result",
|
|
|
|
zap.Int64("sourceID", res.GetBase().GetSourceID()))
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return toReduceResults, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func doRequery(ctx context.Context,
|
|
|
|
collectionID int64,
|
|
|
|
node types.ProxyComponent,
|
|
|
|
schema *schemapb.CollectionSchema,
|
|
|
|
request *milvuspb.QueryRequest,
|
|
|
|
result *milvuspb.SearchResults,
|
2024-01-09 11:38:48 +08:00
|
|
|
queryChannelsTs map[string]Timestamp,
|
2024-01-25 11:05:07 +08:00
|
|
|
partitionIDs []int64,
|
2024-01-08 15:34:48 +08:00
|
|
|
) error {
|
|
|
|
outputFields := request.GetOutputFields()
|
|
|
|
pkField, err := typeutil.GetPrimaryFieldSchema(schema)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
ids := result.GetResults().GetIds()
|
|
|
|
plan := planparserv2.CreateRequeryPlan(pkField, ids)
|
2024-01-09 11:38:48 +08:00
|
|
|
channelsMvcc := make(map[string]Timestamp)
|
|
|
|
for k, v := range queryChannelsTs {
|
|
|
|
channelsMvcc[k] = v
|
|
|
|
}
|
2023-10-26 19:30:10 +08:00
|
|
|
qt := &queryTask{
|
2024-01-08 15:34:48 +08:00
|
|
|
ctx: ctx,
|
|
|
|
Condition: NewTaskCondition(ctx),
|
2023-10-26 19:30:10 +08:00
|
|
|
RetrieveRequest: &internalpb.RetrieveRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_Retrieve),
|
|
|
|
commonpbutil.WithSourceID(paramtable.GetNodeID()),
|
|
|
|
),
|
2024-01-25 11:05:07 +08:00
|
|
|
ReqID: paramtable.GetNodeID(),
|
|
|
|
PartitionIDs: partitionIDs, // use search partitionIDs
|
2023-10-26 19:30:10 +08:00
|
|
|
},
|
2024-01-09 11:38:48 +08:00
|
|
|
request: request,
|
|
|
|
plan: plan,
|
|
|
|
qc: node.(*Proxy).queryCoord,
|
|
|
|
lb: node.(*Proxy).lbPolicy,
|
|
|
|
channelsMvcc: channelsMvcc,
|
|
|
|
fastSkip: true,
|
2024-01-25 11:05:07 +08:00
|
|
|
reQuery: true,
|
2023-10-26 19:30:10 +08:00
|
|
|
}
|
2024-01-08 15:34:48 +08:00
|
|
|
queryResult, err := node.(*Proxy).query(ctx, qt)
|
2023-04-23 09:00:32 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if queryResult.GetStatus().GetErrorCode() != commonpb.ErrorCode_Success {
|
|
|
|
return merr.Error(queryResult.GetStatus())
|
|
|
|
}
|
|
|
|
// Reorganize Results. The order of query result ids will be altered and differ from queried ids.
|
|
|
|
// We should reorganize query results to keep the order of original queried ids. For example:
|
|
|
|
// ===========================================
|
|
|
|
// 3 2 5 4 1 (query ids)
|
|
|
|
// ||
|
|
|
|
// || (query)
|
|
|
|
// \/
|
|
|
|
// 4 3 5 1 2 (result ids)
|
|
|
|
// v4 v3 v5 v1 v2 (result vectors)
|
|
|
|
// ||
|
|
|
|
// || (reorganize)
|
|
|
|
// \/
|
|
|
|
// 3 2 5 4 1 (result ids)
|
|
|
|
// v3 v2 v5 v4 v1 (result vectors)
|
|
|
|
// ===========================================
|
2024-05-07 13:03:29 +08:00
|
|
|
_, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "reorganizeRequeryResults")
|
|
|
|
defer sp.End()
|
2023-04-23 09:00:32 +08:00
|
|
|
pkFieldData, err := typeutil.GetPrimaryFieldData(queryResult.GetFieldsData(), pkField)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
offsets := make(map[any]int)
|
2023-06-29 11:04:22 +08:00
|
|
|
for i := 0; i < typeutil.GetPKSize(pkFieldData); i++ {
|
2023-04-23 09:00:32 +08:00
|
|
|
pk := typeutil.GetData(pkFieldData, i)
|
|
|
|
offsets[pk] = i
|
|
|
|
}
|
|
|
|
|
2024-01-08 15:34:48 +08:00
|
|
|
result.Results.FieldsData = make([]*schemapb.FieldData, len(queryResult.GetFieldsData()))
|
2023-04-23 09:00:32 +08:00
|
|
|
for i := 0; i < typeutil.GetSizeOfIDs(ids); i++ {
|
|
|
|
id := typeutil.GetPK(ids, int64(i))
|
|
|
|
if _, ok := offsets[id]; !ok {
|
2024-03-18 14:15:04 +08:00
|
|
|
return merr.WrapErrInconsistentRequery(fmt.Sprintf("incomplete query result, missing id %s, len(searchIDs) = %d, len(queryIDs) = %d, collection=%d",
|
|
|
|
id, typeutil.GetSizeOfIDs(ids), len(offsets), collectionID))
|
2023-04-23 09:00:32 +08:00
|
|
|
}
|
2024-01-08 15:34:48 +08:00
|
|
|
typeutil.AppendFieldData(result.Results.FieldsData, queryResult.GetFieldsData(), int64(offsets[id]))
|
2023-04-23 09:00:32 +08:00
|
|
|
}
|
|
|
|
|
2023-05-09 17:26:41 +08:00
|
|
|
// filter id field out if it is not specified as output
|
2024-01-08 15:34:48 +08:00
|
|
|
result.Results.FieldsData = lo.Filter(result.Results.FieldsData, func(fieldData *schemapb.FieldData, i int) bool {
|
|
|
|
return lo.Contains(outputFields, fieldData.GetFieldName())
|
2023-05-09 17:26:41 +08:00
|
|
|
})
|
|
|
|
|
2023-04-23 09:00:32 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-08-23 10:44:52 +08:00
|
|
|
func decodeSearchResults(ctx context.Context, searchResults []*internalpb.SearchResults) ([]*schemapb.SearchResultData, error) {
|
2024-05-07 13:03:29 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "decodeSearchResults")
|
|
|
|
defer sp.End()
|
2022-04-01 18:59:29 +08:00
|
|
|
tr := timerecord.NewTimeRecorder("decodeSearchResults")
|
|
|
|
results := make([]*schemapb.SearchResultData, 0)
|
|
|
|
for _, partialSearchResult := range searchResults {
|
|
|
|
if partialSearchResult.SlicedBlob == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
var partialResultData schemapb.SearchResultData
|
|
|
|
err := proto.Unmarshal(partialSearchResult.SlicedBlob, &partialResultData)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
results = append(results, &partialResultData)
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
tr.CtxElapse(ctx, "decodeSearchResults done")
|
2022-04-01 18:59:29 +08:00
|
|
|
return results, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkSearchResultData(data *schemapb.SearchResultData, nq int64, topk int64) error {
|
|
|
|
if data.NumQueries != nq {
|
|
|
|
return fmt.Errorf("search result's nq(%d) mis-match with %d", data.NumQueries, nq)
|
|
|
|
}
|
|
|
|
if data.TopK != topk {
|
|
|
|
return fmt.Errorf("search result's topk(%d) mis-match with %d", data.TopK, topk)
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-04-29 13:35:49 +08:00
|
|
|
pkHitNum := typeutil.GetSizeOfIDs(data.GetIds())
|
|
|
|
if len(data.Scores) != pkHitNum {
|
2022-04-20 16:15:41 +08:00
|
|
|
return fmt.Errorf("search result's score length invalid, score length=%d, expectedLength=%d",
|
2022-04-29 13:35:49 +08:00
|
|
|
len(data.Scores), pkHitNum)
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-14 20:36:32 +08:00
|
|
|
func selectHighestScoreIndex(subSearchResultData []*schemapb.SearchResultData, subSearchNqOffset [][]int64, cursors []int64, qi int64) (int, int64) {
|
|
|
|
var (
|
|
|
|
subSearchIdx = -1
|
|
|
|
resultDataIdx int64 = -1
|
|
|
|
)
|
|
|
|
maxScore := minFloat32
|
|
|
|
for i := range cursors {
|
|
|
|
if cursors[i] >= subSearchResultData[i].Topks[qi] {
|
2022-04-01 18:59:29 +08:00
|
|
|
continue
|
|
|
|
}
|
2022-09-14 20:36:32 +08:00
|
|
|
sIdx := subSearchNqOffset[i][qi] + cursors[i]
|
|
|
|
sScore := subSearchResultData[i].Scores[sIdx]
|
2022-09-26 18:02:52 +08:00
|
|
|
|
|
|
|
// Choose the larger score idx or the smaller pk idx with the same score
|
2023-02-01 14:59:51 +08:00
|
|
|
if subSearchIdx == -1 || sScore > maxScore {
|
2022-09-14 20:36:32 +08:00
|
|
|
subSearchIdx = i
|
|
|
|
resultDataIdx = sIdx
|
|
|
|
maxScore = sScore
|
2022-09-26 18:02:52 +08:00
|
|
|
} else if sScore == maxScore {
|
2022-12-13 17:03:21 +08:00
|
|
|
if subSearchIdx == -1 {
|
|
|
|
// A bad case happens where Knowhere returns distance/score == +/-maxFloat32
|
|
|
|
// by mistake.
|
|
|
|
log.Error("a bad score is returned, something is wrong here!", zap.Float32("score", sScore))
|
|
|
|
} else if typeutil.ComparePK(
|
|
|
|
typeutil.GetPK(subSearchResultData[i].GetIds(), sIdx),
|
|
|
|
typeutil.GetPK(subSearchResultData[subSearchIdx].GetIds(), resultDataIdx)) {
|
2022-09-26 18:02:52 +08:00
|
|
|
subSearchIdx = i
|
|
|
|
resultDataIdx = sIdx
|
|
|
|
maxScore = sScore
|
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
}
|
2022-09-14 20:36:32 +08:00
|
|
|
return subSearchIdx, resultDataIdx
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) TraceCtx() context.Context {
|
|
|
|
return t.ctx
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) ID() UniqueID {
|
|
|
|
return t.Base.MsgID
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) SetID(uid UniqueID) {
|
|
|
|
t.Base.MsgID = uid
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) Name() string {
|
2022-04-01 18:59:29 +08:00
|
|
|
return SearchTaskName
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) Type() commonpb.MsgType {
|
|
|
|
return t.Base.MsgType
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) BeginTs() Timestamp {
|
|
|
|
return t.Base.Timestamp
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) EndTs() Timestamp {
|
|
|
|
return t.Base.Timestamp
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) SetTs(ts Timestamp) {
|
|
|
|
t.Base.Timestamp = ts
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *searchTask) OnEnqueue() error {
|
2022-10-19 10:01:26 +08:00
|
|
|
t.Base = commonpbutil.NewMsgBase()
|
2022-04-20 16:15:41 +08:00
|
|
|
t.Base.MsgType = commonpb.MsgType_Search
|
2022-11-04 14:25:38 +08:00
|
|
|
t.Base.SourceID = paramtable.GetNodeID()
|
2022-04-01 18:59:29 +08:00
|
|
|
return nil
|
|
|
|
}
|