2022-07-11 14:38:24 +08:00
|
|
|
package proxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
|
2023-04-06 19:14:32 +08:00
|
|
|
"go.opentelemetry.io/otel"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-06-09 01:28:37 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2022-07-11 14:38:24 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/allocator"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
|
|
"github.com/milvus-io/milvus/pkg/metrics"
|
|
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
2023-09-04 09:57:09 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-07-11 14:38:24 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type insertTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-07-11 14:38:24 +08:00
|
|
|
// req *milvuspb.InsertRequest
|
|
|
|
Condition
|
2022-12-08 18:37:19 +08:00
|
|
|
insertMsg *BaseInsertTask
|
|
|
|
ctx context.Context
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
result *milvuspb.MutationResult
|
|
|
|
idAllocator *allocator.IDAllocator
|
|
|
|
segIDAssigner *segIDAssigner
|
|
|
|
chMgr channelsMgr
|
|
|
|
chTicker channelsTimeTicker
|
|
|
|
vChannels []vChan
|
|
|
|
pChannels []pChan
|
|
|
|
schema *schemapb.CollectionSchema
|
2023-06-06 10:24:34 +08:00
|
|
|
partitionKeys *schemapb.FieldData
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// TraceCtx returns insertTask context
|
|
|
|
func (it *insertTask) TraceCtx() context.Context {
|
|
|
|
return it.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) ID() UniqueID {
|
2022-12-08 18:37:19 +08:00
|
|
|
return it.insertMsg.Base.MsgID
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) SetID(uid UniqueID) {
|
2022-12-08 18:37:19 +08:00
|
|
|
it.insertMsg.Base.MsgID = uid
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) Name() string {
|
|
|
|
return InsertTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) Type() commonpb.MsgType {
|
2022-12-08 18:37:19 +08:00
|
|
|
return it.insertMsg.Base.MsgType
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) BeginTs() Timestamp {
|
2022-12-08 18:37:19 +08:00
|
|
|
return it.insertMsg.BeginTimestamp
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) SetTs(ts Timestamp) {
|
2022-12-08 18:37:19 +08:00
|
|
|
it.insertMsg.BeginTimestamp = ts
|
|
|
|
it.insertMsg.EndTimestamp = ts
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) EndTs() Timestamp {
|
2022-12-08 18:37:19 +08:00
|
|
|
return it.insertMsg.EndTimestamp
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
2023-06-16 16:36:40 +08:00
|
|
|
func (it *insertTask) setChannels() error {
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(it.ctx, it.insertMsg.GetDbName(), it.insertMsg.CollectionName)
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-06-16 16:36:40 +08:00
|
|
|
return err
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
2023-04-11 11:00:31 +08:00
|
|
|
channels, err := it.chMgr.getChannels(collID)
|
|
|
|
if err != nil {
|
2023-06-16 16:36:40 +08:00
|
|
|
return err
|
2023-04-11 11:00:31 +08:00
|
|
|
}
|
|
|
|
it.pChannels = channels
|
2023-06-16 16:36:40 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) getChannels() []pChan {
|
|
|
|
return it.pChannels
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) OnEnqueue() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) PreExecute(ctx context.Context) error {
|
2023-08-16 11:41:33 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Insert-PreExecute")
|
2023-01-12 16:09:39 +08:00
|
|
|
defer sp.End()
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
it.result = &milvuspb.MutationResult{
|
2023-10-11 21:01:35 +08:00
|
|
|
Status: merr.Success(),
|
2022-07-11 14:38:24 +08:00
|
|
|
IDs: &schemapb.IDs{
|
|
|
|
IdField: nil,
|
|
|
|
},
|
|
|
|
Timestamp: it.EndTs(),
|
|
|
|
}
|
|
|
|
|
2022-12-08 18:37:19 +08:00
|
|
|
collectionName := it.insertMsg.CollectionName
|
2022-07-11 14:38:24 +08:00
|
|
|
if err := validateCollectionName(collectionName); err != nil {
|
2023-06-19 13:28:41 +08:00
|
|
|
log.Warn("valid collection name failed", zap.String("collectionName", collectionName), zap.Error(err))
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-04-19 10:31:20 +08:00
|
|
|
maxInsertSize := Params.QuotaConfig.MaxInsertSize.GetAsInt()
|
|
|
|
if maxInsertSize != -1 && it.insertMsg.Size() > maxInsertSize {
|
|
|
|
log.Warn("insert request size exceeds maxInsertSize",
|
|
|
|
zap.Int("request size", it.insertMsg.Size()), zap.Int("maxInsertSize", maxInsertSize))
|
|
|
|
return merr.WrapErrParameterTooLarge("insert request size exceeds maxInsertSize")
|
|
|
|
}
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, it.insertMsg.GetDbName(), collectionName)
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-06-19 13:28:41 +08:00
|
|
|
log.Warn("get collection schema from global meta cache failed", zap.String("collectionName", collectionName), zap.Error(err))
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
2024-01-04 17:28:46 +08:00
|
|
|
it.schema = schema.CollectionSchema
|
2022-07-11 14:38:24 +08:00
|
|
|
|
2022-12-08 18:37:19 +08:00
|
|
|
rowNums := uint32(it.insertMsg.NRows())
|
2022-07-11 14:38:24 +08:00
|
|
|
// set insertTask.rowIDs
|
|
|
|
var rowIDBegin UniqueID
|
|
|
|
var rowIDEnd UniqueID
|
|
|
|
tr := timerecord.NewTimeRecorder("applyPK")
|
|
|
|
rowIDBegin, rowIDEnd, _ = it.idAllocator.Alloc(rowNums)
|
2022-11-29 15:23:14 +08:00
|
|
|
metrics.ProxyApplyPrimaryKeyLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
2022-07-11 14:38:24 +08:00
|
|
|
|
2022-12-08 18:37:19 +08:00
|
|
|
it.insertMsg.RowIDs = make([]UniqueID, rowNums)
|
2022-07-11 14:38:24 +08:00
|
|
|
for i := rowIDBegin; i < rowIDEnd; i++ {
|
|
|
|
offset := i - rowIDBegin
|
2022-12-08 18:37:19 +08:00
|
|
|
it.insertMsg.RowIDs[offset] = i
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
// set insertTask.timeStamps
|
2022-12-08 18:37:19 +08:00
|
|
|
rowNum := it.insertMsg.NRows()
|
|
|
|
it.insertMsg.Timestamps = make([]uint64, rowNum)
|
|
|
|
for index := range it.insertMsg.Timestamps {
|
|
|
|
it.insertMsg.Timestamps[index] = it.insertMsg.BeginTimestamp
|
2022-07-11 14:38:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// set result.SuccIndex
|
|
|
|
sliceIndex := make([]uint32, rowNums)
|
|
|
|
for i := uint32(0); i < rowNums; i++ {
|
|
|
|
sliceIndex[i] = i
|
|
|
|
}
|
|
|
|
it.result.SuccIndex = sliceIndex
|
|
|
|
|
2023-05-18 09:33:24 +08:00
|
|
|
if it.schema.EnableDynamicField {
|
2023-08-30 10:52:26 +08:00
|
|
|
err = checkDynamicFieldData(it.schema, it.insertMsg)
|
2023-05-18 09:33:24 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-11 14:38:24 +08:00
|
|
|
// check primaryFieldData whether autoID is true or not
|
|
|
|
// set rowIDs as primary data if autoID == true
|
2022-12-08 18:37:19 +08:00
|
|
|
// TODO(dragondriver): in fact, NumRows is not trustable, we should check all input fields
|
2023-01-17 17:53:42 +08:00
|
|
|
it.result.IDs, err = checkPrimaryFieldData(it.schema, it.result, it.insertMsg, true)
|
2022-11-14 15:29:06 +08:00
|
|
|
log := log.Ctx(ctx).With(zap.String("collectionName", collectionName))
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-06-19 13:28:41 +08:00
|
|
|
log.Warn("check primary field data and hash primary key failed",
|
2022-11-14 15:29:06 +08:00
|
|
|
zap.Error(err))
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// set field ID to insert field data
|
2024-01-04 17:28:46 +08:00
|
|
|
err = fillFieldIDBySchema(it.insertMsg.GetFieldsData(), schema.CollectionSchema)
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-06-06 10:24:34 +08:00
|
|
|
log.Info("set fieldID to fieldData failed",
|
2022-11-14 15:29:06 +08:00
|
|
|
zap.Error(err))
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
partitionKeyMode, err := isPartitionKeyMode(ctx, it.insertMsg.GetDbName(), collectionName)
|
2023-06-06 10:24:34 +08:00
|
|
|
if err != nil {
|
2023-07-14 15:56:31 +08:00
|
|
|
log.Warn("check partition key mode failed", zap.String("collectionName", collectionName), zap.Error(err))
|
2023-06-06 10:24:34 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if partitionKeyMode {
|
|
|
|
fieldSchema, _ := typeutil.GetPartitionKeyFieldSchema(it.schema)
|
|
|
|
it.partitionKeys, err = getPartitionKeyFieldData(fieldSchema, it.insertMsg)
|
|
|
|
if err != nil {
|
2023-07-14 15:56:31 +08:00
|
|
|
log.Warn("get partition keys from insert request failed", zap.String("collectionName", collectionName), zap.Error(err))
|
2023-06-06 10:24:34 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// set default partition name if not use partition key
|
|
|
|
// insert to _default partition
|
|
|
|
partitionTag := it.insertMsg.GetPartitionName()
|
|
|
|
if len(partitionTag) <= 0 {
|
|
|
|
partitionTag = Params.CommonCfg.DefaultPartitionName.GetValue()
|
|
|
|
it.insertMsg.PartitionName = partitionTag
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := validatePartitionTag(partitionTag, true); err != nil {
|
2023-06-19 13:28:41 +08:00
|
|
|
log.Warn("valid partition name failed", zap.String("partition name", partitionTag), zap.Error(err))
|
2023-06-06 10:24:34 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-19 14:23:23 +08:00
|
|
|
if err := newValidateUtil(withNANCheck(), withOverflowCheck(), withMaxLenCheck(), withMaxCapCheck()).
|
2024-01-04 17:28:46 +08:00
|
|
|
Validate(it.insertMsg.GetFieldsData(), schema.CollectionSchema, it.insertMsg.NRows()); err != nil {
|
2023-03-14 14:03:58 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-11-14 15:29:06 +08:00
|
|
|
log.Debug("Proxy Insert PreExecute done")
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) Execute(ctx context.Context) error {
|
2023-08-16 11:41:33 +08:00
|
|
|
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Insert-Execute")
|
2023-01-12 16:09:39 +08:00
|
|
|
defer sp.End()
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
tr := timerecord.NewTimeRecorder(fmt.Sprintf("proxy execute insert %d", it.ID()))
|
|
|
|
|
2022-12-08 18:37:19 +08:00
|
|
|
collectionName := it.insertMsg.CollectionName
|
2023-07-14 16:08:31 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(it.ctx, it.insertMsg.GetDbName(), collectionName)
|
|
|
|
log := log.Ctx(ctx)
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("fail to get collection id", zap.Error(err))
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-12-08 18:37:19 +08:00
|
|
|
it.insertMsg.CollectionID = collID
|
2023-06-06 10:24:34 +08:00
|
|
|
|
2023-03-30 18:56:22 +08:00
|
|
|
getCacheDur := tr.RecordSpan()
|
2022-07-11 14:38:24 +08:00
|
|
|
stream, err := it.chMgr.getOrCreateDmlStream(collID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-03-30 18:56:22 +08:00
|
|
|
getMsgStreamDur := tr.RecordSpan()
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
channelNames, err := it.chMgr.getVChannels(collID)
|
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("get vChannels failed", zap.Int64("collectionID", collID), zap.Error(err))
|
2023-09-26 17:15:27 +08:00
|
|
|
it.result.Status = merr.Status(err)
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Debug("send insert request to virtual channels",
|
2022-12-08 18:37:19 +08:00
|
|
|
zap.String("partition", it.insertMsg.GetPartitionName()),
|
2023-06-19 13:28:41 +08:00
|
|
|
zap.Int64("collectionID", collID),
|
2022-07-11 14:38:24 +08:00
|
|
|
zap.Strings("virtual_channels", channelNames),
|
2023-03-30 18:56:22 +08:00
|
|
|
zap.Int64("task_id", it.ID()),
|
|
|
|
zap.Duration("get cache duration", getCacheDur),
|
|
|
|
zap.Duration("get msgStream duration", getMsgStreamDur))
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
// assign segmentID for insert data and repack data by segmentID
|
2022-12-08 18:37:19 +08:00
|
|
|
var msgPack *msgstream.MsgPack
|
2023-06-06 10:24:34 +08:00
|
|
|
if it.partitionKeys == nil {
|
|
|
|
msgPack, err = repackInsertData(it.TraceCtx(), channelNames, it.insertMsg, it.result, it.idAllocator, it.segIDAssigner)
|
|
|
|
} else {
|
|
|
|
msgPack, err = repackInsertDataWithPartitionKey(it.TraceCtx(), channelNames, it.partitionKeys, it.insertMsg, it.result, it.idAllocator, it.segIDAssigner)
|
|
|
|
}
|
2022-07-11 14:38:24 +08:00
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("assign segmentID and repack insert data failed", zap.Error(err))
|
2023-09-26 17:15:27 +08:00
|
|
|
it.result.Status = merr.Status(err)
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
2023-03-30 18:56:22 +08:00
|
|
|
assignSegmentIDDur := tr.RecordSpan()
|
|
|
|
|
2022-11-14 15:29:06 +08:00
|
|
|
log.Debug("assign segmentID for insert data success",
|
2023-03-30 18:56:22 +08:00
|
|
|
zap.Duration("assign segmentID duration", assignSegmentIDDur))
|
2022-07-11 14:38:24 +08:00
|
|
|
err = stream.Produce(msgPack)
|
|
|
|
if err != nil {
|
2023-07-14 16:08:31 +08:00
|
|
|
log.Warn("fail to produce insert msg", zap.Error(err))
|
2023-09-26 17:15:27 +08:00
|
|
|
it.result.Status = merr.Status(err)
|
2022-07-11 14:38:24 +08:00
|
|
|
return err
|
|
|
|
}
|
2023-03-30 18:56:22 +08:00
|
|
|
sendMsgDur := tr.RecordSpan()
|
2022-11-04 14:25:38 +08:00
|
|
|
metrics.ProxySendMutationReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), metrics.InsertLabel).Observe(float64(sendMsgDur.Milliseconds()))
|
2023-03-30 18:56:22 +08:00
|
|
|
totalExecDur := tr.ElapseSpan()
|
2022-11-14 15:29:06 +08:00
|
|
|
log.Debug("Proxy Insert Execute done",
|
2023-03-30 18:56:22 +08:00
|
|
|
zap.String("collectionName", collectionName),
|
|
|
|
zap.Duration("send message duration", sendMsgDur),
|
|
|
|
zap.Duration("execute duration", totalExecDur))
|
2022-07-11 14:38:24 +08:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (it *insertTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|