milvus/internal/proxy/repack_func.go
XuanYang-cn c0797333ce Add ddl binlog metaTable in writenode
Signed-off-by: XuanYang-cn <xuan.yang@zilliz.com>
2020-12-24 20:55:40 +08:00

202 lines
6.3 KiB
Go

package proxy
import (
"log"
"sort"
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
"github.com/zilliztech/milvus-distributed/internal/allocator"
"github.com/zilliztech/milvus-distributed/internal/errors"
"github.com/zilliztech/milvus-distributed/internal/msgstream"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
"github.com/zilliztech/milvus-distributed/internal/proto/internalpb"
)
func insertRepackFunc(tsMsgs []msgstream.TsMsg,
hashKeys [][]int32,
segIDAssigner *allocator.SegIDAssigner,
together bool) (map[int32]*msgstream.MsgPack, error) {
result := make(map[int32]*msgstream.MsgPack)
channelCountMap := make(map[UniqueID]map[int32]uint32) // reqID --> channelID to count
channelMaxTSMap := make(map[UniqueID]map[int32]Timestamp) // reqID --> channelID to max Timestamp
reqSchemaMap := make(map[UniqueID][]string)
for i, request := range tsMsgs {
if request.Type() != internalpb.MsgType_kInsert {
return nil, errors.New(string("msg's must be Insert"))
}
insertRequest, ok := request.(*msgstream.InsertMsg)
if !ok {
return nil, errors.New(string("msg's must be Insert"))
}
keys := hashKeys[i]
timestampLen := len(insertRequest.Timestamps)
rowIDLen := len(insertRequest.RowIDs)
rowDataLen := len(insertRequest.RowData)
keysLen := len(keys)
if keysLen != timestampLen || keysLen != rowIDLen || keysLen != rowDataLen {
return nil, errors.New(string("the length of hashValue, timestamps, rowIDs, RowData are not equal"))
}
reqID := insertRequest.ReqID
if _, ok := channelCountMap[reqID]; !ok {
channelCountMap[reqID] = make(map[int32]uint32)
}
if _, ok := channelMaxTSMap[reqID]; !ok {
channelMaxTSMap[reqID] = make(map[int32]Timestamp)
}
if _, ok := reqSchemaMap[reqID]; !ok {
reqSchemaMap[reqID] = []string{insertRequest.CollectionName, insertRequest.PartitionTag}
}
for idx, channelID := range keys {
channelCountMap[reqID][channelID]++
if _, ok := channelMaxTSMap[reqID][channelID]; !ok {
channelMaxTSMap[reqID][channelID] = typeutil.ZeroTimestamp
}
ts := insertRequest.Timestamps[idx]
if channelMaxTSMap[reqID][channelID] < ts {
channelMaxTSMap[reqID][channelID] = ts
}
}
}
reqSegCountMap := make(map[UniqueID]map[int32]map[UniqueID]uint32)
for reqID, countInfo := range channelCountMap {
if _, ok := reqSegCountMap[reqID]; !ok {
reqSegCountMap[reqID] = make(map[int32]map[UniqueID]uint32)
}
schema := reqSchemaMap[reqID]
collName, partitionTag := schema[0], schema[1]
for channelID, count := range countInfo {
ts, ok := channelMaxTSMap[reqID][channelID]
if !ok {
ts = typeutil.ZeroTimestamp
log.Println("Warning: did not get max Timstamp!")
}
mapInfo, err := segIDAssigner.GetSegmentID(collName, partitionTag, channelID, count, ts)
if err != nil {
return nil, err
}
reqSegCountMap[reqID][channelID] = make(map[UniqueID]uint32)
reqSegCountMap[reqID][channelID] = mapInfo
}
}
reqSegAccumulateCountMap := make(map[UniqueID]map[int32][]uint32)
reqSegIDMap := make(map[UniqueID]map[int32][]UniqueID)
reqSegAllocateCounter := make(map[UniqueID]map[int32]uint32)
for reqID, channelInfo := range reqSegCountMap {
if _, ok := reqSegAccumulateCountMap[reqID]; !ok {
reqSegAccumulateCountMap[reqID] = make(map[int32][]uint32)
}
if _, ok := reqSegIDMap[reqID]; !ok {
reqSegIDMap[reqID] = make(map[int32][]UniqueID)
}
if _, ok := reqSegAllocateCounter[reqID]; !ok {
reqSegAllocateCounter[reqID] = make(map[int32]uint32)
}
for channelID, segInfo := range channelInfo {
reqSegAllocateCounter[reqID][channelID] = 0
keys := make([]UniqueID, len(segInfo))
i := 0
for key := range segInfo {
keys[i] = key
i++
}
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
accumulate := uint32(0)
for _, key := range keys {
accumulate += segInfo[key]
if _, ok := reqSegAccumulateCountMap[reqID][channelID]; !ok {
reqSegAccumulateCountMap[reqID][channelID] = make([]uint32, 0)
}
reqSegAccumulateCountMap[reqID][channelID] = append(
reqSegAccumulateCountMap[reqID][channelID],
accumulate,
)
if _, ok := reqSegIDMap[reqID][channelID]; !ok {
reqSegIDMap[reqID][channelID] = make([]UniqueID, 0)
}
reqSegIDMap[reqID][channelID] = append(
reqSegIDMap[reqID][channelID],
key,
)
}
}
}
var getSegmentID = func(reqID UniqueID, channelID int32) UniqueID {
reqSegAllocateCounter[reqID][channelID]++
cur := reqSegAllocateCounter[reqID][channelID]
accumulateSlice := reqSegAccumulateCountMap[reqID][channelID]
segIDSlice := reqSegIDMap[reqID][channelID]
for index, count := range accumulateSlice {
if cur <= count {
return segIDSlice[index]
}
}
log.Panic("Can't Found SegmentID")
return 0
}
for i, request := range tsMsgs {
insertRequest := request.(*msgstream.InsertMsg)
keys := hashKeys[i]
reqID := insertRequest.ReqID
collectionName := insertRequest.CollectionName
partitionTag := insertRequest.PartitionTag
proxyID := insertRequest.ProxyID
for index, key := range keys {
ts := insertRequest.Timestamps[index]
rowID := insertRequest.RowIDs[index]
row := insertRequest.RowData[index]
_, ok := result[key]
if !ok {
msgPack := msgstream.MsgPack{}
result[key] = &msgPack
}
segmentID := getSegmentID(reqID, key)
sliceRequest := internalpb.InsertRequest{
MsgType: internalpb.MsgType_kInsert,
ReqID: reqID,
CollectionName: collectionName,
PartitionTag: partitionTag,
SegmentID: segmentID,
ChannelID: int64(key),
ProxyID: proxyID,
Timestamps: []uint64{ts},
RowIDs: []int64{rowID},
RowData: []*commonpb.Blob{row},
}
insertMsg := &msgstream.InsertMsg{
InsertRequest: sliceRequest,
}
if together { // all rows with same hash value are accumulated to only one message
if len(result[key].Msgs) <= 0 {
result[key].Msgs = append(result[key].Msgs, insertMsg)
} else {
accMsgs, _ := result[key].Msgs[0].(*msgstream.InsertMsg)
accMsgs.Timestamps = append(accMsgs.Timestamps, ts)
accMsgs.RowIDs = append(accMsgs.RowIDs, rowID)
accMsgs.RowData = append(accMsgs.RowData, row)
}
} else { // every row is a message
result[key].Msgs = append(result[key].Msgs, insertMsg)
}
}
}
return result, nil
}