mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-03 12:29:36 +08:00
82ad488555
Signed-off-by: GuoRentong <rentong.guo@zilliz.com>
202 lines
6.3 KiB
Go
202 lines
6.3 KiB
Go
package proxy
|
|
|
|
import (
|
|
"log"
|
|
"sort"
|
|
|
|
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
|
|
|
|
"github.com/zilliztech/milvus-distributed/internal/allocator"
|
|
"github.com/zilliztech/milvus-distributed/internal/errors"
|
|
"github.com/zilliztech/milvus-distributed/internal/msgstream"
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
|
|
"github.com/zilliztech/milvus-distributed/internal/proto/internalpb"
|
|
)
|
|
|
|
func insertRepackFunc(tsMsgs []msgstream.TsMsg,
|
|
hashKeys [][]int32,
|
|
segIDAssigner *allocator.SegIDAssigner,
|
|
together bool) (map[int32]*msgstream.MsgPack, error) {
|
|
|
|
result := make(map[int32]*msgstream.MsgPack)
|
|
|
|
channelCountMap := make(map[UniqueID]map[int32]uint32) // reqID --> channelID to count
|
|
channelMaxTSMap := make(map[UniqueID]map[int32]Timestamp) // reqID --> channelID to max Timestamp
|
|
reqSchemaMap := make(map[UniqueID][]string)
|
|
|
|
for i, request := range tsMsgs {
|
|
if request.Type() != internalpb.MsgType_kInsert {
|
|
return nil, errors.New(string("msg's must be Insert"))
|
|
}
|
|
insertRequest, ok := request.(*msgstream.InsertMsg)
|
|
if !ok {
|
|
return nil, errors.New(string("msg's must be Insert"))
|
|
}
|
|
|
|
keys := hashKeys[i]
|
|
timestampLen := len(insertRequest.Timestamps)
|
|
rowIDLen := len(insertRequest.RowIDs)
|
|
rowDataLen := len(insertRequest.RowData)
|
|
keysLen := len(keys)
|
|
|
|
if keysLen != timestampLen || keysLen != rowIDLen || keysLen != rowDataLen {
|
|
return nil, errors.New(string("the length of hashValue, timestamps, rowIDs, RowData are not equal"))
|
|
}
|
|
|
|
reqID := insertRequest.ReqID
|
|
if _, ok := channelCountMap[reqID]; !ok {
|
|
channelCountMap[reqID] = make(map[int32]uint32)
|
|
}
|
|
|
|
if _, ok := channelMaxTSMap[reqID]; !ok {
|
|
channelMaxTSMap[reqID] = make(map[int32]Timestamp)
|
|
}
|
|
|
|
if _, ok := reqSchemaMap[reqID]; !ok {
|
|
reqSchemaMap[reqID] = []string{insertRequest.CollectionName, insertRequest.PartitionTag}
|
|
}
|
|
|
|
for idx, channelID := range keys {
|
|
channelCountMap[reqID][channelID]++
|
|
if _, ok := channelMaxTSMap[reqID][channelID]; !ok {
|
|
channelMaxTSMap[reqID][channelID] = typeutil.ZeroTimestamp
|
|
}
|
|
ts := insertRequest.Timestamps[idx]
|
|
if channelMaxTSMap[reqID][channelID] < ts {
|
|
channelMaxTSMap[reqID][channelID] = ts
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
reqSegCountMap := make(map[UniqueID]map[int32]map[UniqueID]uint32)
|
|
|
|
for reqID, countInfo := range channelCountMap {
|
|
if _, ok := reqSegCountMap[reqID]; !ok {
|
|
reqSegCountMap[reqID] = make(map[int32]map[UniqueID]uint32)
|
|
}
|
|
schema := reqSchemaMap[reqID]
|
|
collName, partitionTag := schema[0], schema[1]
|
|
for channelID, count := range countInfo {
|
|
ts, ok := channelMaxTSMap[reqID][channelID]
|
|
if !ok {
|
|
ts = typeutil.ZeroTimestamp
|
|
log.Println("Warning: did not get max Timstamp!")
|
|
}
|
|
mapInfo, err := segIDAssigner.GetSegmentID(collName, partitionTag, channelID, count, ts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
reqSegCountMap[reqID][channelID] = make(map[UniqueID]uint32)
|
|
reqSegCountMap[reqID][channelID] = mapInfo
|
|
}
|
|
}
|
|
|
|
reqSegAccumulateCountMap := make(map[UniqueID]map[int32][]uint32)
|
|
reqSegIDMap := make(map[UniqueID]map[int32][]UniqueID)
|
|
reqSegAllocateCounter := make(map[UniqueID]map[int32]uint32)
|
|
|
|
for reqID, channelInfo := range reqSegCountMap {
|
|
if _, ok := reqSegAccumulateCountMap[reqID]; !ok {
|
|
reqSegAccumulateCountMap[reqID] = make(map[int32][]uint32)
|
|
}
|
|
if _, ok := reqSegIDMap[reqID]; !ok {
|
|
reqSegIDMap[reqID] = make(map[int32][]UniqueID)
|
|
}
|
|
if _, ok := reqSegAllocateCounter[reqID]; !ok {
|
|
reqSegAllocateCounter[reqID] = make(map[int32]uint32)
|
|
}
|
|
for channelID, segInfo := range channelInfo {
|
|
reqSegAllocateCounter[reqID][channelID] = 0
|
|
keys := make([]UniqueID, len(segInfo))
|
|
i := 0
|
|
for key := range segInfo {
|
|
keys[i] = key
|
|
i++
|
|
}
|
|
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
|
|
accumulate := uint32(0)
|
|
for _, key := range keys {
|
|
accumulate += segInfo[key]
|
|
if _, ok := reqSegAccumulateCountMap[reqID][channelID]; !ok {
|
|
reqSegAccumulateCountMap[reqID][channelID] = make([]uint32, 0)
|
|
}
|
|
reqSegAccumulateCountMap[reqID][channelID] = append(
|
|
reqSegAccumulateCountMap[reqID][channelID],
|
|
accumulate,
|
|
)
|
|
if _, ok := reqSegIDMap[reqID][channelID]; !ok {
|
|
reqSegIDMap[reqID][channelID] = make([]UniqueID, 0)
|
|
}
|
|
reqSegIDMap[reqID][channelID] = append(
|
|
reqSegIDMap[reqID][channelID],
|
|
key,
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
var getSegmentID = func(reqID UniqueID, channelID int32) UniqueID {
|
|
reqSegAllocateCounter[reqID][channelID]++
|
|
cur := reqSegAllocateCounter[reqID][channelID]
|
|
accumulateSlice := reqSegAccumulateCountMap[reqID][channelID]
|
|
segIDSlice := reqSegIDMap[reqID][channelID]
|
|
for index, count := range accumulateSlice {
|
|
if cur <= count {
|
|
return segIDSlice[index]
|
|
}
|
|
}
|
|
log.Panic("Can't Found SegmentID")
|
|
return 0
|
|
}
|
|
|
|
for i, request := range tsMsgs {
|
|
insertRequest := request.(*msgstream.InsertMsg)
|
|
keys := hashKeys[i]
|
|
reqID := insertRequest.ReqID
|
|
collectionName := insertRequest.CollectionName
|
|
partitionTag := insertRequest.PartitionTag
|
|
proxyID := insertRequest.ProxyID
|
|
for index, key := range keys {
|
|
ts := insertRequest.Timestamps[index]
|
|
rowID := insertRequest.RowIDs[index]
|
|
row := insertRequest.RowData[index]
|
|
_, ok := result[key]
|
|
if !ok {
|
|
msgPack := msgstream.MsgPack{}
|
|
result[key] = &msgPack
|
|
}
|
|
segmentID := getSegmentID(reqID, key)
|
|
sliceRequest := internalpb.InsertRequest{
|
|
MsgType: internalpb.MsgType_kInsert,
|
|
ReqID: reqID,
|
|
CollectionName: collectionName,
|
|
PartitionTag: partitionTag,
|
|
SegmentID: segmentID,
|
|
ChannelID: int64(key),
|
|
ProxyID: proxyID,
|
|
Timestamps: []uint64{ts},
|
|
RowIDs: []int64{rowID},
|
|
RowData: []*commonpb.Blob{row},
|
|
}
|
|
insertMsg := &msgstream.InsertMsg{
|
|
InsertRequest: sliceRequest,
|
|
}
|
|
if together { // all rows with same hash value are accumulated to only one message
|
|
if len(result[key].Msgs) <= 0 {
|
|
result[key].Msgs = append(result[key].Msgs, insertMsg)
|
|
} else {
|
|
accMsgs, _ := result[key].Msgs[0].(*msgstream.InsertMsg)
|
|
accMsgs.Timestamps = append(accMsgs.Timestamps, ts)
|
|
accMsgs.RowIDs = append(accMsgs.RowIDs, rowID)
|
|
accMsgs.RowData = append(accMsgs.RowData, row)
|
|
}
|
|
} else { // every row is a message
|
|
result[key].Msgs = append(result[key].Msgs, insertMsg)
|
|
}
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|