mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 20:09:57 +08:00
9b371067d2
- Add Compaction l0 views - Add Compaction scheduler - Add Compaction triggerv2 - Add Compaction view manager See also: #27606 --------- Signed-off-by: yangxuan <xuan.yang@zilliz.com>
177 lines
4.6 KiB
Go
177 lines
4.6 KiB
Go
package datacoord
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
)
|
|
|
|
type CompactionView interface {
|
|
GetGroupLabel() *CompactionGroupLabel
|
|
GetSegmentsView() []*SegmentView
|
|
String() string
|
|
Trigger() CompactionView
|
|
}
|
|
|
|
type FullViews struct {
|
|
collections map[int64][]*SegmentView // collectionID
|
|
}
|
|
|
|
type SegmentViewSelector func(view *SegmentView) bool
|
|
|
|
func (v *FullViews) GetSegmentViewBy(collectionID UniqueID, selectors SegmentViewSelector) []*SegmentView {
|
|
views, ok := v.collections[collectionID]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
var ret []*SegmentView
|
|
|
|
for _, view := range views {
|
|
if selectors(view) {
|
|
ret = append(ret, view)
|
|
}
|
|
}
|
|
|
|
return ret
|
|
}
|
|
|
|
type CompactionGroupLabel struct {
|
|
CollectionID UniqueID
|
|
PartitionID UniqueID
|
|
Channel string
|
|
}
|
|
|
|
func (label *CompactionGroupLabel) IsMinGroup() bool {
|
|
return len(label.Channel) != 0 && label.PartitionID != 0 && label.CollectionID != 0
|
|
}
|
|
|
|
func (label *CompactionGroupLabel) Equal(other *CompactionGroupLabel) bool {
|
|
return other != nil &&
|
|
other.CollectionID == label.CollectionID &&
|
|
other.PartitionID == label.PartitionID &&
|
|
other.Channel == label.Channel
|
|
}
|
|
|
|
func (label *CompactionGroupLabel) String() string {
|
|
return fmt.Sprintf("coll=%d, part=%d, channel=%s", label.CollectionID, label.PartitionID, label.Channel)
|
|
}
|
|
|
|
type SegmentView struct {
|
|
ID UniqueID
|
|
|
|
label *CompactionGroupLabel
|
|
|
|
State commonpb.SegmentState
|
|
Level datapb.SegmentLevel
|
|
|
|
// positions
|
|
startPos *msgpb.MsgPosition
|
|
dmlPos *msgpb.MsgPosition
|
|
|
|
// size
|
|
Size float64
|
|
ExpireSize float64
|
|
DeltaSize float64
|
|
|
|
// file numbers
|
|
BinlogCount int
|
|
StatslogCount int
|
|
DeltalogCount int
|
|
}
|
|
|
|
func GetSegmentViews(segments ...*SegmentInfo) []*SegmentView {
|
|
return lo.Map(segments, func(segment *SegmentInfo, _ int) *SegmentView {
|
|
return &SegmentView{
|
|
ID: segment.ID,
|
|
label: &CompactionGroupLabel{
|
|
CollectionID: segment.CollectionID,
|
|
PartitionID: segment.PartitionID,
|
|
Channel: segment.GetInsertChannel(),
|
|
},
|
|
|
|
State: segment.GetState(),
|
|
Level: segment.GetLevel(),
|
|
|
|
// positions
|
|
startPos: segment.GetStartPosition(),
|
|
dmlPos: segment.GetDmlPosition(),
|
|
|
|
DeltaSize: GetBinlogSizeAsBytes(segment.GetDeltalogs()),
|
|
DeltalogCount: GetBinlogCount(segment.GetDeltalogs()),
|
|
|
|
Size: GetBinlogSizeAsBytes(segment.GetBinlogs()),
|
|
BinlogCount: GetBinlogCount(segment.GetBinlogs()),
|
|
StatslogCount: GetBinlogCount(segment.GetStatslogs()),
|
|
|
|
// TODO: set the following
|
|
// ExpireSize float64
|
|
}
|
|
})
|
|
}
|
|
|
|
func (v *SegmentView) Equal(other *SegmentView) bool {
|
|
return v.Size == other.Size &&
|
|
v.ExpireSize == other.ExpireSize &&
|
|
v.DeltaSize == other.DeltaSize &&
|
|
v.BinlogCount == other.BinlogCount &&
|
|
v.StatslogCount == other.StatslogCount &&
|
|
v.DeltalogCount == other.DeltalogCount
|
|
}
|
|
|
|
func (v *SegmentView) String() string {
|
|
return fmt.Sprintf("ID=%d, label=<%s>, state=%s, level=%s, binlogSize=%.2f, binlogCount=%d, deltaSize=%.2f, deltaCount=%d, expireSize=%.2f",
|
|
v.ID, v.label, v.State.String(), v.Level.String(), v.Size, v.BinlogCount, v.DeltaSize, v.DeltalogCount, v.ExpireSize)
|
|
}
|
|
|
|
func (v *SegmentView) LevelZeroString() string {
|
|
return fmt.Sprintf("<ID=%d, level=%s, deltaSize=%.2f, deltaCount=%d>",
|
|
v.ID, v.Level.String(), v.DeltaSize, v.DeltalogCount)
|
|
}
|
|
|
|
func GetBinlogCount(fieldBinlogs []*datapb.FieldBinlog) int {
|
|
var num int
|
|
for _, binlog := range fieldBinlogs {
|
|
num += len(binlog.GetBinlogs())
|
|
}
|
|
return num
|
|
}
|
|
|
|
func GetExpiredSizeAsBytes(expireTime Timestamp, fieldBinlogs []*datapb.FieldBinlog) float64 {
|
|
var expireSize float64
|
|
for _, binlogs := range fieldBinlogs {
|
|
for _, l := range binlogs.GetBinlogs() {
|
|
// TODO, we should probably estimate expired log entries by total rows
|
|
// in binlog and the ralationship of timeTo, timeFrom and expire time
|
|
if l.TimestampTo < expireTime {
|
|
log.Info("mark binlog as expired",
|
|
zap.Int64("binlogID", l.GetLogID()),
|
|
zap.Uint64("binlogTimestampTo", l.TimestampTo),
|
|
zap.Uint64("compactExpireTime", expireTime))
|
|
expireSize += float64(l.GetLogSize())
|
|
}
|
|
}
|
|
}
|
|
return expireSize
|
|
}
|
|
|
|
func GetBinlogSizeAsBytes(deltaBinlogs []*datapb.FieldBinlog) float64 {
|
|
var deltaSize float64
|
|
for _, deltaLogs := range deltaBinlogs {
|
|
for _, l := range deltaLogs.GetBinlogs() {
|
|
deltaSize += float64(l.GetLogSize())
|
|
}
|
|
}
|
|
return deltaSize
|
|
}
|
|
|
|
func buildGroupKey(partitionID UniqueID, channel string) string {
|
|
return fmt.Sprintf("%d-%s", partitionID, channel)
|
|
}
|