2022-04-01 18:59:29 +08:00
|
|
|
package proxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2022-05-27 14:12:01 +08:00
|
|
|
"strconv"
|
2022-04-01 18:59:29 +08:00
|
|
|
"strings"
|
|
|
|
|
2022-08-16 18:16:48 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
2022-05-19 17:15:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/parser/planparserv2"
|
|
|
|
|
2022-04-01 18:59:29 +08:00
|
|
|
"github.com/golang/protobuf/proto"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
2022-05-27 14:12:01 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/metrics"
|
2022-04-01 18:59:29 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2022-06-07 12:20:06 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/grpcclient"
|
2022-04-20 16:15:41 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/timerecord"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/tsoutil"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
2022-04-01 18:59:29 +08:00
|
|
|
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
2022-04-20 16:15:41 +08:00
|
|
|
)
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
const (
|
|
|
|
WithCache = true
|
|
|
|
WithoutCache = false
|
2022-04-01 18:59:29 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type queryTask struct {
|
|
|
|
Condition
|
|
|
|
*internalpb.RetrieveRequest
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-04-01 18:59:29 +08:00
|
|
|
ctx context.Context
|
|
|
|
result *milvuspb.QueryResults
|
2022-04-20 16:15:41 +08:00
|
|
|
request *milvuspb.QueryRequest
|
2022-04-01 18:59:29 +08:00
|
|
|
qc types.QueryCoord
|
|
|
|
ids *schemapb.IDs
|
|
|
|
collectionName string
|
2022-09-01 18:54:58 +08:00
|
|
|
queryParams *queryParams
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
resultBuf chan *internalpb.RetrieveResults
|
|
|
|
toReduceResults []*internalpb.RetrieveResults
|
|
|
|
|
2022-06-02 12:16:03 +08:00
|
|
|
queryShardPolicy pickShardPolicy
|
|
|
|
shardMgr *shardClientMgr
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-09-01 18:54:58 +08:00
|
|
|
type queryParams struct {
|
|
|
|
limit int64
|
|
|
|
offset int64
|
|
|
|
}
|
|
|
|
|
2022-06-23 10:46:13 +08:00
|
|
|
// translateOutputFields translates output fields name to output fields id.
|
|
|
|
func translateToOutputFieldIDs(outputFields []string, schema *schemapb.CollectionSchema) ([]UniqueID, error) {
|
2022-08-16 18:16:48 +08:00
|
|
|
outputFieldIDs := make([]UniqueID, 0, len(outputFields)+1)
|
2022-06-23 10:46:13 +08:00
|
|
|
if len(outputFields) == 0 {
|
|
|
|
for _, field := range schema.Fields {
|
2022-08-16 18:16:48 +08:00
|
|
|
if field.FieldID >= common.StartOfUserFieldID && field.DataType != schemapb.DataType_FloatVector && field.DataType != schemapb.DataType_BinaryVector {
|
2022-06-23 10:46:13 +08:00
|
|
|
outputFieldIDs = append(outputFieldIDs, field.FieldID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2022-08-16 18:16:48 +08:00
|
|
|
var pkFieldID UniqueID
|
|
|
|
for _, field := range schema.Fields {
|
|
|
|
if field.IsPrimaryKey {
|
|
|
|
pkFieldID = field.FieldID
|
|
|
|
}
|
|
|
|
}
|
2022-06-23 10:46:13 +08:00
|
|
|
for _, reqField := range outputFields {
|
2022-08-16 18:16:48 +08:00
|
|
|
var fieldFound bool
|
2022-06-23 10:46:13 +08:00
|
|
|
for _, field := range schema.Fields {
|
|
|
|
if reqField == field.Name {
|
|
|
|
outputFieldIDs = append(outputFieldIDs, field.FieldID)
|
2022-08-16 18:16:48 +08:00
|
|
|
fieldFound = true
|
|
|
|
break
|
2022-06-23 10:46:13 +08:00
|
|
|
}
|
|
|
|
}
|
2022-08-16 18:16:48 +08:00
|
|
|
if !fieldFound {
|
2022-06-23 10:46:13 +08:00
|
|
|
return nil, fmt.Errorf("field %s not exist", reqField)
|
|
|
|
}
|
|
|
|
}
|
2022-08-16 18:16:48 +08:00
|
|
|
|
|
|
|
// pk field needs to be in output field list
|
|
|
|
var pkFound bool
|
|
|
|
for _, outputField := range outputFieldIDs {
|
|
|
|
if outputField == pkFieldID {
|
|
|
|
pkFound = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !pkFound {
|
|
|
|
outputFieldIDs = append(outputFieldIDs, pkFieldID)
|
|
|
|
}
|
|
|
|
|
2022-06-23 10:46:13 +08:00
|
|
|
}
|
|
|
|
return outputFieldIDs, nil
|
|
|
|
}
|
|
|
|
|
2022-09-01 18:54:58 +08:00
|
|
|
// parseQueryParams get limit and offset from queryParamsPair, both are optional.
|
|
|
|
func parseQueryParams(queryParamsPair []*commonpb.KeyValuePair) (*queryParams, error) {
|
|
|
|
var (
|
|
|
|
limit int64
|
|
|
|
offset int64
|
|
|
|
err error
|
|
|
|
)
|
|
|
|
|
|
|
|
// if limit is provided
|
|
|
|
limitStr, err := funcutil.GetAttrByKeyFromRepeatedKV(LimitKey, queryParamsPair)
|
|
|
|
if err != nil {
|
|
|
|
return &queryParams{}, nil
|
|
|
|
}
|
|
|
|
limit, err = strconv.ParseInt(limitStr, 0, 64)
|
|
|
|
if err != nil || limit <= 0 {
|
|
|
|
return nil, fmt.Errorf("%s [%s] is invalid", LimitKey, limitStr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// if offset is provided
|
|
|
|
if offsetStr, err := funcutil.GetAttrByKeyFromRepeatedKV(OffsetKey, queryParamsPair); err == nil {
|
|
|
|
offset, err = strconv.ParseInt(offsetStr, 0, 64)
|
|
|
|
if err != nil || offset < 0 {
|
|
|
|
return nil, fmt.Errorf("%s [%s] is invalid", OffsetKey, offsetStr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = validateTopK(limit + offset); err != nil {
|
|
|
|
return nil, fmt.Errorf("invalid limit[%d] + offset[%d], %w", limit, offset, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return &queryParams{
|
|
|
|
limit: limit,
|
|
|
|
offset: offset,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) PreExecute(ctx context.Context) error {
|
|
|
|
if t.queryShardPolicy == nil {
|
2022-07-06 15:06:21 +08:00
|
|
|
t.queryShardPolicy = mergeRoundRobinPolicy
|
2022-04-20 16:15:41 +08:00
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
t.Base.MsgType = commonpb.MsgType_Retrieve
|
2022-04-24 22:03:44 +08:00
|
|
|
t.Base.SourceID = Params.ProxyCfg.GetNodeID()
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
collectionName := t.request.CollectionName
|
|
|
|
t.collectionName = collectionName
|
|
|
|
if err := validateCollectionName(collectionName); err != nil {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("Invalid collection name.", zap.String("collectionName", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.String("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Validate collection name.", zap.Any("collectionName", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, collectionName)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("Failed to get collection id.", zap.Any("collectionName", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
t.CollectionID = collID
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Get collection ID by name",
|
2022-04-20 16:15:41 +08:00
|
|
|
zap.Int64("collectionID", t.CollectionID), zap.String("collection name", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
for _, tag := range t.request.PartitionNames {
|
2022-04-01 18:59:29 +08:00
|
|
|
if err := validatePartitionTag(tag, false); err != nil {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("invalid partition name", zap.String("partition name", tag),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Validate partition names.",
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-06-23 10:46:13 +08:00
|
|
|
t.RetrieveRequest.PartitionIDs, err = getPartitionIDs(ctx, collectionName, t.request.GetPartitionNames())
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("failed to get partitions in collection.", zap.String("collection name", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Error(err),
|
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Get partitions in collection.", zap.Any("collectionName", collectionName),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-09-01 18:54:58 +08:00
|
|
|
queryParams, err := parseQueryParams(t.request.GetQueryParams())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.queryParams = queryParams
|
|
|
|
t.RetrieveRequest.Limit = queryParams.limit + queryParams.offset
|
|
|
|
|
2022-06-23 10:46:13 +08:00
|
|
|
loaded, err := checkIfLoaded(ctx, t.qc, collectionName, t.RetrieveRequest.GetPartitionIDs())
|
2022-05-19 10:13:56 +08:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("checkIfLoaded failed when query, collection:%v, partitions:%v, err = %s", collectionName, t.request.GetPartitionNames(), err)
|
|
|
|
}
|
|
|
|
if !loaded {
|
|
|
|
return fmt.Errorf("collection:%v or partition:%v not loaded into memory when query", collectionName, t.request.GetPartitionNames())
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
schema, _ := globalMetaCache.GetCollectionSchema(ctx, collectionName)
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
if t.ids != nil {
|
2022-04-01 18:59:29 +08:00
|
|
|
pkField := ""
|
|
|
|
for _, field := range schema.Fields {
|
|
|
|
if field.IsPrimaryKey {
|
|
|
|
pkField = field.Name
|
|
|
|
}
|
|
|
|
}
|
2022-04-29 13:35:49 +08:00
|
|
|
t.request.Expr = IDs2Expr(pkField, t.ids)
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
if t.request.Expr == "" {
|
|
|
|
return fmt.Errorf("query expression is empty")
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-05-19 17:15:57 +08:00
|
|
|
plan, err := planparserv2.CreateRetrievePlan(schema, t.request.Expr)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
t.request.OutputFields, err = translateOutputFields(t.request.OutputFields, schema, true)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("translate output fields", zap.Any("OutputFields", t.request.OutputFields),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-06-23 10:46:13 +08:00
|
|
|
outputFieldIDs, err := translateToOutputFieldIDs(t.request.GetOutputFields(), schema)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
2022-06-23 10:46:13 +08:00
|
|
|
t.RetrieveRequest.OutputFieldsId = outputFieldIDs
|
|
|
|
plan.OutputFieldIds = outputFieldIDs
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("translate output fields to field ids", zap.Any("OutputFieldsID", t.OutputFieldsId),
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
t.RetrieveRequest.SerializedExprPlan, err = proto.Marshal(plan)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
|
|
|
if t.request.TravelTimestamp == 0 {
|
|
|
|
t.TravelTimestamp = t.BeginTs()
|
2022-04-01 18:59:29 +08:00
|
|
|
} else {
|
2022-04-20 16:15:41 +08:00
|
|
|
t.TravelTimestamp = t.request.TravelTimestamp
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-05-24 12:05:59 +08:00
|
|
|
err = validateTravelTimestamp(t.TravelTimestamp, t.BeginTs())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-05-24 12:05:59 +08:00
|
|
|
guaranteeTs := t.request.GetGuaranteeTimestamp()
|
|
|
|
t.GuaranteeTimestamp = parseGuaranteeTs(guaranteeTs, t.BeginTs())
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
deadline, ok := t.TraceCtx().Deadline()
|
|
|
|
if ok {
|
|
|
|
t.TimeoutTimestamp = tsoutil.ComposeTSByTime(deadline, 0)
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
t.DbID = 0 // TODO
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Query PreExecute done.",
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"),
|
|
|
|
zap.Uint64("guarantee_ts", guaranteeTs), zap.Uint64("travel_ts", t.GetTravelTimestamp()),
|
|
|
|
zap.Uint64("timeout_ts", t.GetTimeoutTimestamp()))
|
2022-04-01 18:59:29 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) Execute(ctx context.Context) error {
|
|
|
|
tr := timerecord.NewTimeRecorder(fmt.Sprintf("proxy execute query %d", t.ID()))
|
2022-08-23 10:44:52 +08:00
|
|
|
defer tr.CtxElapse(ctx, "done")
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
executeQuery := func(withCache bool) error {
|
2022-06-02 12:16:03 +08:00
|
|
|
shards, err := globalMetaCache.GetShards(ctx, withCache, t.collectionName)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
t.resultBuf = make(chan *internalpb.RetrieveResults, len(shards))
|
|
|
|
t.toReduceResults = make([]*internalpb.RetrieveResults, 0, len(shards))
|
|
|
|
|
2022-07-06 15:06:21 +08:00
|
|
|
if err := t.queryShardPolicy(ctx, t.shardMgr, t.queryShard, shards); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
err := executeQuery(WithCache)
|
2022-06-07 12:20:06 +08:00
|
|
|
if errors.Is(err, errInvalidShardLeaders) || funcutil.IsGrpcErr(err) || errors.Is(err, grpcclient.ErrConnect) {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("invalid shard leaders cache, updating shardleader caches and retry search",
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Error(err))
|
2022-04-20 16:15:41 +08:00
|
|
|
return executeQuery(WithoutCache)
|
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
2022-04-26 11:27:53 +08:00
|
|
|
return fmt.Errorf("fail to search on all shard leaders, err=%s", err.Error())
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Query Execute done.",
|
2022-06-23 10:46:13 +08:00
|
|
|
zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) PostExecute(ctx context.Context) error {
|
2022-04-01 18:59:29 +08:00
|
|
|
tr := timerecord.NewTimeRecorder("queryTask PostExecute")
|
|
|
|
defer func() {
|
2022-08-23 10:44:52 +08:00
|
|
|
tr.CtxElapse(ctx, "done")
|
2022-04-01 18:59:29 +08:00
|
|
|
}()
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
var err error
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-07-06 15:06:21 +08:00
|
|
|
select {
|
|
|
|
case <-t.TraceCtx().Done():
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("proxy", zap.Int64("Query: wait to finish failed, timeout!, msgID:", t.ID()))
|
2022-07-06 15:06:21 +08:00
|
|
|
return nil
|
|
|
|
default:
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("all queries are finished or canceled", zap.Int64("msgID", t.ID()))
|
2022-07-06 15:06:21 +08:00
|
|
|
close(t.resultBuf)
|
|
|
|
for res := range t.resultBuf {
|
|
|
|
t.toReduceResults = append(t.toReduceResults, res)
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("proxy receives one query result", zap.Int64("sourceID", res.GetBase().GetSourceID()), zap.Any("msgID", t.ID()))
|
2022-07-06 15:06:21 +08:00
|
|
|
}
|
|
|
|
}
|
2022-05-27 14:12:01 +08:00
|
|
|
|
|
|
|
metrics.ProxyDecodeResultLatency.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10), metrics.QueryLabel).Observe(0.0)
|
2022-08-23 10:44:52 +08:00
|
|
|
tr.CtxRecord(ctx, "reduceResultStart")
|
|
|
|
t.result, err = mergeRetrieveResults(ctx, t.toReduceResults)
|
2022-04-20 16:15:41 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-05-27 14:12:01 +08:00
|
|
|
metrics.ProxyReduceResultLatency.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10), metrics.QueryLabel).Observe(float64(tr.RecordSpan().Milliseconds()))
|
2022-04-20 16:15:41 +08:00
|
|
|
t.result.CollectionName = t.collectionName
|
|
|
|
|
|
|
|
if len(t.result.FieldsData) > 0 {
|
|
|
|
t.result.Status = &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
}
|
|
|
|
} else {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("Query result is nil", zap.Int64("msgID", t.ID()), zap.Any("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
t.result.Status = &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_EmptyCollection,
|
2022-07-18 09:58:28 +08:00
|
|
|
Reason: "empty collection", // TODO
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
2022-04-01 18:59:29 +08:00
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, t.request.CollectionName)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for i := 0; i < len(t.result.FieldsData); i++ {
|
|
|
|
for _, field := range schema.Fields {
|
|
|
|
if field.FieldID == t.OutputFieldsId[i] {
|
|
|
|
t.result.FieldsData[i].FieldName = field.Name
|
|
|
|
t.result.FieldsData[i].FieldId = field.FieldID
|
|
|
|
t.result.FieldsData[i].Type = field.DataType
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
}
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("Query PostExecute done", zap.Int64("msgID", t.ID()), zap.String("requestType", "query"))
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-07-06 15:06:21 +08:00
|
|
|
func (t *queryTask) queryShard(ctx context.Context, nodeID int64, qn types.QueryNode, channelIDs []string) error {
|
|
|
|
req := &querypb.QueryRequest{
|
|
|
|
Req: t.RetrieveRequest,
|
|
|
|
DmlChannels: channelIDs,
|
|
|
|
Scope: querypb.DataScope_All,
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-07-06 15:06:21 +08:00
|
|
|
result, err := qn.Query(ctx, req)
|
2022-04-01 18:59:29 +08:00
|
|
|
if err != nil {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("QueryNode query return error", zap.Int64("msgID", t.ID()),
|
2022-07-06 15:06:21 +08:00
|
|
|
zap.Int64("nodeID", nodeID), zap.Strings("channels", channelIDs), zap.Error(err))
|
2022-04-20 16:15:41 +08:00
|
|
|
return err
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
2022-07-06 15:06:21 +08:00
|
|
|
if result.GetStatus().GetErrorCode() == commonpb.ErrorCode_NotShardLeader {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("QueryNode is not shardLeader", zap.Int64("nodeID", nodeID), zap.Strings("channels", channelIDs))
|
2022-07-06 15:06:21 +08:00
|
|
|
return errInvalidShardLeaders
|
|
|
|
}
|
|
|
|
if result.GetStatus().GetErrorCode() != commonpb.ErrorCode_Success {
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Warn("QueryNode query result error", zap.Int64("msgID", t.ID()), zap.Int64("nodeID", nodeID),
|
2022-07-06 15:06:21 +08:00
|
|
|
zap.String("reason", result.GetStatus().GetReason()))
|
|
|
|
return fmt.Errorf("fail to Query, QueryNode ID = %d, reason=%s", nodeID, result.GetStatus().GetReason())
|
|
|
|
}
|
2022-04-20 16:15:41 +08:00
|
|
|
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("get query result", zap.Int64("msgID", t.ID()), zap.Int64("nodeID", nodeID), zap.Strings("channelIDs", channelIDs))
|
2022-07-06 15:06:21 +08:00
|
|
|
t.resultBuf <- result
|
2022-04-20 16:15:41 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-04-01 18:59:29 +08:00
|
|
|
// IDs2Expr converts ids slices to bool expresion with specified field name
|
2022-04-29 13:35:49 +08:00
|
|
|
func IDs2Expr(fieldName string, ids *schemapb.IDs) string {
|
|
|
|
var idsStr string
|
|
|
|
switch ids.GetIdField().(type) {
|
|
|
|
case *schemapb.IDs_IntId:
|
|
|
|
idsStr = strings.Trim(strings.Join(strings.Fields(fmt.Sprint(ids.GetIntId().GetData())), ", "), "[]")
|
|
|
|
case *schemapb.IDs_StrId:
|
|
|
|
idsStr = strings.Trim(strings.Join(ids.GetStrId().GetData(), ", "), "[]")
|
|
|
|
}
|
|
|
|
|
2022-04-01 18:59:29 +08:00
|
|
|
return fieldName + " in [ " + idsStr + " ]"
|
|
|
|
}
|
|
|
|
|
2022-08-23 10:44:52 +08:00
|
|
|
func mergeRetrieveResults(ctx context.Context, retrieveResults []*internalpb.RetrieveResults) (*milvuspb.QueryResults, error) {
|
2022-04-01 18:59:29 +08:00
|
|
|
var ret *milvuspb.QueryResults
|
|
|
|
var skipDupCnt int64
|
2022-04-29 13:35:49 +08:00
|
|
|
var idSet = make(map[interface{}]struct{})
|
2022-04-01 18:59:29 +08:00
|
|
|
|
|
|
|
// merge results and remove duplicates
|
|
|
|
for _, rr := range retrieveResults {
|
2022-04-29 13:35:49 +08:00
|
|
|
numPks := typeutil.GetSizeOfIDs(rr.GetIds())
|
2022-04-01 18:59:29 +08:00
|
|
|
// skip empty result, it will break merge result
|
2022-04-29 13:35:49 +08:00
|
|
|
if rr == nil || rr.Ids == nil || rr.GetIds() == nil || numPks == 0 {
|
2022-04-01 18:59:29 +08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if ret == nil {
|
|
|
|
ret = &milvuspb.QueryResults{
|
|
|
|
FieldsData: make([]*schemapb.FieldData, len(rr.FieldsData)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(ret.FieldsData) != len(rr.FieldsData) {
|
|
|
|
return nil, fmt.Errorf("mismatch FieldData in proxy RetrieveResults, expect %d get %d", len(ret.FieldsData), len(rr.FieldsData))
|
|
|
|
}
|
|
|
|
|
2022-04-29 13:35:49 +08:00
|
|
|
for i := 0; i < numPks; i++ {
|
|
|
|
id := typeutil.GetPK(rr.GetIds(), int64(i))
|
2022-04-01 18:59:29 +08:00
|
|
|
if _, ok := idSet[id]; !ok {
|
|
|
|
typeutil.AppendFieldData(ret.FieldsData, rr.FieldsData, int64(i))
|
|
|
|
idSet[id] = struct{}{}
|
|
|
|
} else {
|
|
|
|
// primary keys duplicate
|
|
|
|
skipDupCnt++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-08-23 10:44:52 +08:00
|
|
|
log.Ctx(ctx).Debug("skip duplicated query result", zap.Int64("count", skipDupCnt))
|
2022-04-01 18:59:29 +08:00
|
|
|
|
|
|
|
if ret == nil {
|
|
|
|
ret = &milvuspb.QueryResults{
|
|
|
|
FieldsData: []*schemapb.FieldData{},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) TraceCtx() context.Context {
|
|
|
|
return t.ctx
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) ID() UniqueID {
|
|
|
|
return t.Base.MsgID
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) SetID(uid UniqueID) {
|
|
|
|
t.Base.MsgID = uid
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) Name() string {
|
2022-04-01 18:59:29 +08:00
|
|
|
return RetrieveTaskName
|
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) Type() commonpb.MsgType {
|
|
|
|
return t.Base.MsgType
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) BeginTs() Timestamp {
|
|
|
|
return t.Base.Timestamp
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) EndTs() Timestamp {
|
|
|
|
return t.Base.Timestamp
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) SetTs(ts Timestamp) {
|
|
|
|
t.Base.Timestamp = ts
|
2022-04-01 18:59:29 +08:00
|
|
|
}
|
|
|
|
|
2022-04-20 16:15:41 +08:00
|
|
|
func (t *queryTask) OnEnqueue() error {
|
|
|
|
t.Base.MsgType = commonpb.MsgType_Retrieve
|
2022-04-01 18:59:29 +08:00
|
|
|
return nil
|
|
|
|
}
|