milvus/internal/datacoord/compaction_trigger_test.go
congqixia 5f5fdb0789
Compaction bypass stale segments (#20202)
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2022-10-31 19:13:34 +08:00

1554 lines
43 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"fmt"
"sort"
"testing"
"time"
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/stretchr/testify/assert"
)
type spyCompactionHandler struct {
spyChan chan *datapb.CompactionPlan
}
// execCompactionPlan start to execute plan and return immediately
func (h *spyCompactionHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error {
fmt.Println("plan ", plan.SegmentBinlogs)
h.spyChan <- plan
return nil
}
// completeCompaction record the result of a compaction
func (h *spyCompactionHandler) completeCompaction(result *datapb.CompactionResult) error {
return nil
}
// getCompaction return compaction task. If planId does not exist, return nil.
func (h *spyCompactionHandler) getCompaction(planID int64) *compactionTask {
panic("not implemented") // TODO: Implement
}
// expireCompaction set the compaction state to expired
func (h *spyCompactionHandler) updateCompaction(ts Timestamp) error {
panic("not implemented") // TODO: Implement
}
// isFull return true if the task pool is full
func (h *spyCompactionHandler) isFull() bool {
return false
}
// get compaction tasks by signal id
func (h *spyCompactionHandler) getCompactionTasksBySignalID(signalID int64) []*compactionTask {
panic("not implemented") // TODO: Implement
}
func (h *spyCompactionHandler) start() {}
func (h *spyCompactionHandler) stop() {}
var _ compactionPlanContext = (*spyCompactionHandler)(nil)
func Test_compactionTrigger_force(t *testing.T) {
type fields struct {
meta *meta
allocator allocator
signals chan *compactionSignal
compactionHandler compactionPlanContext
globalTrigger *time.Ticker
segRefer *SegmentReferenceManager
}
Params.Init()
Params.CommonCfg.RetentionDuration = 200
pts, _ := tsoutil.ParseTS(0)
ttRetention := pts.Add(-time.Duration(Params.CommonCfg.RetentionDuration) * time.Second)
timeTravel := tsoutil.ComposeTS(ttRetention.UnixNano()/int64(time.Millisecond), 0)
vecFieldID := int64(201)
tests := []struct {
name string
fields fields
collectionID UniqueID
wantErr bool
wantPlans []*datapb.CompactionPlan
}{
{
"test force compaction",
fields{
&meta{
segments: &SegmentsInfo{
map[int64]*SegmentInfo{
1: {
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 100,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1"},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
},
2: {
SegmentInfo: &datapb.SegmentInfo{
ID: 2,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 100,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log2"},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog2"},
},
},
},
},
},
3: {
SegmentInfo: &datapb.SegmentInfo{
ID: 3,
CollectionID: 1111,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 100,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
},
},
},
},
collections: map[int64]*collectionInfo{
2: {
ID: 2,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
Properties: map[string]string{
common.CollectionTTLConfigKey: "0",
},
},
1111: {
ID: 1111,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128",
},
},
},
},
},
Properties: map[string]string{
common.CollectionTTLConfigKey: "error",
},
},
1000: {
ID: 1000,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128",
},
},
},
},
},
},
// error (has no vector field)
2000: {
ID: 2000,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_Int16,
},
},
},
},
// error (has no dim)
3000: {
ID: 3000,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{},
},
},
},
},
},
// error (dim parse fail)
4000: {
ID: 4000,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: "dim",
Value: "128error",
},
},
},
},
},
},
10000: {
ID: 10000,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
},
},
},
&MockAllocator0{},
nil,
&spyCompactionHandler{spyChan: make(chan *datapb.CompactionPlan, 1)},
nil,
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}},
},
2,
false,
[]*datapb.CompactionPlan{
{
PlanID: 0,
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
{
SegmentID: 1,
FieldBinlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1"},
},
},
},
Field2StatslogPaths: nil,
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
{
SegmentID: 2,
FieldBinlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log2"},
},
},
},
Field2StatslogPaths: nil,
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog2"},
},
},
},
},
},
StartTime: 0,
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds,
Type: datapb.CompactionType_MixCompaction,
Timetravel: timeTravel,
Channel: "ch1",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCoord,
}
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
plan := <-spy.spyChan
sortPlanCompactionBinlogs(plan)
assert.EqualValues(t, tt.wantPlans[0], plan)
})
t.Run(tt.name+" with DiskANN index", func(t *testing.T) {
indexCood := newMockIndexCoord()
segmentIDs := make([]int64, 0)
for _, segment := range tt.fields.meta.segments.GetSegments() {
segmentIDs = append(segmentIDs, segment.GetID())
// Collection 1000 means it has DiskANN index
segment.CollectionID = 1000
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
tt.collectionID = 1000
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
// expect max row num = 2048*1024*1024/(128*4) = 4194304
assert.EqualValues(t, 4194304, tt.fields.meta.segments.GetSegments()[0].MaxRowNum)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
<-spy.spyChan
})
t.Run(tt.name+" with no filtered segments", func(t *testing.T) {
indexCood := newMockIndexCoord()
segmentIDs := make([]int64, 0)
for _, segment := range tt.fields.meta.segments.GetSegments() {
segmentIDs = append(segmentIDs, segment.GetID())
// Collection 2000 means has no vector field
segment.CollectionID = 2000
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
tt.collectionID = 2000
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
})
t.Run(tt.name+" with meta error", func(t *testing.T) {
indexCood := newMockIndexCoord()
segmentIDs := make([]int64, 0)
for _, segment := range tt.fields.meta.segments.GetSegments() {
segmentIDs = append(segmentIDs, segment.GetID())
// Collection 3000 means field has no dim info
segment.CollectionID = 3000
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
tt.collectionID = 3000
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
})
t.Run(tt.name+" with meta error", func(t *testing.T) {
indexCood := newMockIndexCoord()
segmentIDs := make([]int64, 0)
for _, segment := range tt.fields.meta.segments.GetSegments() {
segmentIDs = append(segmentIDs, segment.GetID())
// Collection 4000 means field has dim mistake
segment.CollectionID = 4000
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
tt.collectionID = 4000
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
})
t.Run(tt.name+" with index info error", func(t *testing.T) {
indexCood := newMockIndexCoord()
segmentIDs := make([]int64, 0)
for _, segment := range tt.fields.meta.segments.GetSegments() {
segmentIDs = append(segmentIDs, segment.GetID())
// Collection 10000 means it has DescribeIndexInfo error
segment.CollectionID = 10000
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
}
tt.collectionID = 10000
_, err := tr.forceTriggerCompaction(tt.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
})
t.Run(tt.name+" with allocate ts error", func(t *testing.T) {
indexCood := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: &FailsAllocator{allocIDSucceed: true},
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
{
// test alloc ts fail for handle global signal
signal := &compactionSignal{
id: 0,
isForce: true,
isGlobal: true,
collectionID: tt.collectionID,
}
tr.handleGlobalSignal(signal)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
}
{
// test alloc ts fail for handle signal
signal := &compactionSignal{
id: 0,
isForce: true,
collectionID: tt.collectionID,
segmentID: 3,
}
tr.handleSignal(signal)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
}
})
t.Run(tt.name+" with getCompact error", func(t *testing.T) {
indexCood := newMockIndexCoord()
for _, segment := range tt.fields.meta.segments.GetSegments() {
segment.CollectionID = 1111
}
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: tt.fields.segRefer,
indexCoord: indexCood,
estimateDiskSegmentPolicy: calBySchemaPolicyWithDiskIndex,
}
{
// test getCompactTime fail for handle global signal
signal := &compactionSignal{
id: 0,
isForce: true,
isGlobal: true,
collectionID: 1111,
}
tr.handleGlobalSignal(signal)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
}
{
// test getCompactTime fail for handle signal
signal := &compactionSignal{
id: 0,
isForce: true,
collectionID: 1111,
segmentID: 3,
}
tr.handleSignal(signal)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
hasPlan := true
select {
case <-spy.spyChan:
hasPlan = true
case <-time.After(2 * time.Second):
hasPlan = false
}
assert.Equal(t, false, hasPlan)
}
})
}
}
// test force compaction with too many Segment
func Test_compactionTrigger_force_maxSegmentLimit(t *testing.T) {
type fields struct {
meta *meta
allocator allocator
signals chan *compactionSignal
compactionHandler compactionPlanContext
globalTrigger *time.Ticker
}
type args struct {
collectionID int64
compactTime *compactTime
}
Params.Init()
vecFieldID := int64(201)
segmentInfos := &SegmentsInfo{
segments: make(map[UniqueID]*SegmentInfo),
}
for i := UniqueID(0); i < 50; i++ {
info := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: i,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 100,
MaxRowNum: 300000,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1"},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
}
segmentInfos.segments[i] = info
}
tests := []struct {
name string
fields fields
args args
wantErr bool
wantPlans []*datapb.CompactionPlan
}{
{
"test many segments",
fields{
&meta{
segments: segmentInfos,
collections: map[int64]*collectionInfo{
2: {
ID: 2,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
},
},
},
newMockAllocator(),
nil,
&spyCompactionHandler{spyChan: make(chan *datapb.CompactionPlan, 2)},
nil,
},
args{
2,
&compactTime{travelTime: 200, expireTime: 0},
},
false,
[]*datapb.CompactionPlan{
{
PlanID: 2,
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
{
SegmentID: 1,
FieldBinlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1"},
},
},
},
Field2StatslogPaths: nil,
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
{
SegmentID: 2,
FieldBinlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log2"},
},
},
},
Field2StatslogPaths: nil,
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog2"},
},
},
},
},
},
StartTime: 3,
TimeoutInSeconds: Params.DataCoordCfg.CompactionTimeoutInSeconds,
Type: datapb.CompactionType_MixCompaction,
Timetravel: 200,
Channel: "ch1",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: &SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}},
indexCoord: indexCoord,
}
_, err := tr.forceTriggerCompaction(tt.args.collectionID)
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
// should be split into two plans
plan := <-spy.spyChan
assert.Equal(t, len(plan.SegmentBinlogs), 30)
plan = <-spy.spyChan
assert.Equal(t, len(plan.SegmentBinlogs), 20)
})
}
}
func sortPlanCompactionBinlogs(plan *datapb.CompactionPlan) {
sort.Slice(plan.SegmentBinlogs, func(i, j int) bool {
return plan.SegmentBinlogs[i].SegmentID < plan.SegmentBinlogs[j].SegmentID
})
}
// Test no compaction selection
func Test_compactionTrigger_noplan(t *testing.T) {
type fields struct {
meta *meta
allocator allocator
signals chan *compactionSignal
compactionHandler compactionPlanContext
globalTrigger *time.Ticker
}
type args struct {
collectionID int64
compactTime *compactTime
}
Params.Init()
vecFieldID := int64(201)
tests := []struct {
name string
fields fields
args args
wantErr bool
wantPlans []*datapb.CompactionPlan
}{
{
"test no plan",
fields{
&meta{
// 4 segment
segments: &SegmentsInfo{
map[int64]*SegmentInfo{
1: {
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 1,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
},
lastFlushTime: time.Now(),
},
2: {
SegmentInfo: &datapb.SegmentInfo{
ID: 2,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 200,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log2", LogSize: int64(Params.DataCoordCfg.SegmentMaxSize)*1024*1024 - 1},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog2"},
},
},
},
},
lastFlushTime: time.Now(),
},
3: {
SegmentInfo: &datapb.SegmentInfo{
ID: 3,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 2,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
},
lastFlushTime: time.Now(),
},
4: {
SegmentInfo: &datapb.SegmentInfo{
ID: 4,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 3,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
},
lastFlushTime: time.Now(),
},
5: {
SegmentInfo: &datapb.SegmentInfo{
ID: 4,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 3,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
},
lastFlushTime: time.Unix(0, 0),
},
},
},
collections: map[int64]*collectionInfo{
2: {
ID: 2,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
},
},
},
newMockAllocator(),
make(chan *compactionSignal, 1),
&spyCompactionHandler{spyChan: make(chan *datapb.CompactionPlan, 1)},
nil,
},
args{
2,
&compactTime{travelTime: 200, expireTime: 0},
},
false,
nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: &SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}},
indexCoord: indexCoord,
}
tr.start()
defer tr.stop()
err := tr.triggerCompaction()
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
select {
case val := <-spy.spyChan:
assert.Fail(t, "we expect no compaction generated", val)
return
case <-time.After(3 * time.Second):
return
}
})
}
}
// Test compaction with small files
func Test_compactionTrigger_smallfiles(t *testing.T) {
type fields struct {
meta *meta
allocator allocator
signals chan *compactionSignal
compactionHandler compactionPlanContext
globalTrigger *time.Ticker
}
type args struct {
collectionID int64
compactTime *compactTime
}
Params.Init()
vecFieldID := int64(201)
tests := []struct {
name string
fields fields
args args
wantErr bool
wantPlans []*datapb.CompactionPlan
}{
{
"test small segment",
fields{
&meta{
// 4 small segments
segments: &SegmentsInfo{
map[int64]*SegmentInfo{
1: {
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 50,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
lastFlushTime: time.Now(),
},
2: {
SegmentInfo: &datapb.SegmentInfo{
ID: 2,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 50,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log2", LogSize: 800},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog2"},
},
},
},
},
lastFlushTime: time.Now(),
},
3: {
SegmentInfo: &datapb.SegmentInfo{
ID: 3,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 50,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
lastFlushTime: time.Now(),
},
4: {
SegmentInfo: &datapb.SegmentInfo{
ID: 4,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 50,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
},
},
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "deltalog1"},
},
},
},
},
lastFlushTime: time.Now(),
},
},
},
collections: map[int64]*collectionInfo{
2: {
ID: 2,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
},
},
},
newMockAllocator(),
make(chan *compactionSignal, 1),
&spyCompactionHandler{spyChan: make(chan *datapb.CompactionPlan, 1)},
nil,
},
args{
2,
&compactTime{travelTime: 200, expireTime: 0},
},
false,
nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: &SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}},
indexCoord: indexCoord,
}
tr.start()
defer tr.stop()
err := tr.triggerCompaction()
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
select {
case val := <-spy.spyChan:
// 4 segments in the final pick list
assert.Equal(t, len(val.SegmentBinlogs), 4)
return
case <-time.After(3 * time.Second):
assert.Fail(t, "failed to get plan")
return
}
})
}
}
// Test segment compaction target size
func Test_compactionTrigger_noplan_random_size(t *testing.T) {
type fields struct {
meta *meta
allocator allocator
signals chan *compactionSignal
compactionHandler compactionPlanContext
globalTrigger *time.Ticker
}
type args struct {
collectionID int64
compactTime *compactTime
}
Params.Init()
segmentInfos := &SegmentsInfo{
segments: make(map[UniqueID]*SegmentInfo),
}
size := []int64{
510, 500, 480, 300, 250, 200, 128, 128, 128, 127,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
}
vecFieldID := int64(201)
for i := UniqueID(0); i < 50; i++ {
info := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: i,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: size[i],
MaxRowNum: 512,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: size[i] * 1024 * 1024},
},
},
},
},
lastFlushTime: time.Now(),
}
segmentInfos.segments[i] = info
}
tests := []struct {
name string
fields fields
args args
wantErr bool
wantPlans []*datapb.CompactionPlan
}{
{
"test rand size segment",
fields{
&meta{
segments: segmentInfos,
collections: map[int64]*collectionInfo{
2: {
ID: 2,
Schema: &schemapb.CollectionSchema{
Fields: []*schemapb.FieldSchema{
{
FieldID: vecFieldID,
DataType: schemapb.DataType_FloatVector,
},
},
},
},
},
},
newMockAllocator(),
make(chan *compactionSignal, 1),
&spyCompactionHandler{spyChan: make(chan *datapb.CompactionPlan, 10)},
nil,
},
args{
2,
&compactTime{travelTime: 200, expireTime: 0},
},
false,
nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
tr := &compactionTrigger{
meta: tt.fields.meta,
handler: newMockHandlerWithMeta(tt.fields.meta),
allocator: tt.fields.allocator,
signals: tt.fields.signals,
compactionHandler: tt.fields.compactionHandler,
globalTrigger: tt.fields.globalTrigger,
segRefer: &SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}},
indexCoord: indexCoord,
}
tr.start()
defer tr.stop()
err := tr.triggerCompaction()
assert.Equal(t, tt.wantErr, err != nil)
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
// should be split into two plans
var plans []*datapb.CompactionPlan
WAIT:
for {
select {
case val := <-spy.spyChan:
plans = append(plans, val)
case <-time.After(1 * time.Second):
break WAIT
}
}
for _, plan := range plans {
size := int64(0)
for _, log := range plan.SegmentBinlogs {
size += log.FieldBinlogs[0].GetBinlogs()[0].LogSize
}
fmt.Println("target ", len(plan.SegmentBinlogs))
}
assert.Equal(t, len(plans), 3)
// plan 1: 250 + 20 * 10 + 3 * 20
// plan 2: 200 + 7 * 20 + 4 * 40
// plan 3 128 + 6 * 40 + 127
assert.Equal(t, len(plans[0].SegmentBinlogs), 24)
assert.Equal(t, len(plans[1].SegmentBinlogs), 12)
assert.Equal(t, len(plans[2].SegmentBinlogs), 8)
})
}
}
// Test shouldDoSingleCompaction
func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
Params.Init()
indexCoord := newMockIndexCoord()
trigger := newCompactionTrigger(&meta{}, &compactionPlanHandler{}, newMockAllocator(),
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, indexCoord, newMockHandler())
// Test too many files.
var binlogs []*datapb.FieldBinlog
for i := UniqueID(0); i < 5000; i++ {
binlogs = append(binlogs, &datapb.FieldBinlog{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
},
})
}
info := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 100,
NumOfRows: 100,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: binlogs,
},
}
couldDo := trigger.ShouldDoSingleCompaction(info, &compactTime{travelTime: 200, expireTime: 0})
assert.True(t, couldDo)
//Test expire triggered compaction
var binlogs2 []*datapb.FieldBinlog
for i := UniqueID(0); i < 100; i++ {
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500},
},
})
}
for i := UniqueID(0); i < 100; i++ {
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 1000000, TimestampFrom: 300, TimestampTo: 1000},
},
})
}
info2 := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 600,
NumOfRows: 10000,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: binlogs2,
},
}
// expire time < Timestamp To
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 300})
assert.False(t, couldDo)
// didn't reach single compaction size 10 * 1024 * 1024
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 600})
assert.False(t, couldDo)
// expire time < Timestamp False
couldDo = trigger.ShouldDoSingleCompaction(info2, &compactTime{travelTime: 200, expireTime: 1200})
assert.True(t, couldDo)
// Test Delete triggered compaction
var binlogs3 []*datapb.FieldBinlog
for i := UniqueID(0); i < 100; i++ {
binlogs3 = append(binlogs2, &datapb.FieldBinlog{
Binlogs: []*datapb.Binlog{
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500},
},
})
}
info3 := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: 1,
CollectionID: 2,
PartitionID: 1,
LastExpireTime: 700,
NumOfRows: 100,
MaxRowNum: 300,
InsertChannel: "ch1",
State: commonpb.SegmentState_Flushed,
Binlogs: binlogs3,
Deltalogs: []*datapb.FieldBinlog{
{
Binlogs: []*datapb.Binlog{
{EntriesNum: 200, LogPath: "deltalog1"},
},
},
},
},
}
// expire time < Timestamp To
couldDo = trigger.ShouldDoSingleCompaction(info3, &compactTime{travelTime: 600, expireTime: 0})
assert.False(t, couldDo)
// deltalog is large enough, should do compaction
couldDo = trigger.ShouldDoSingleCompaction(info3, &compactTime{travelTime: 800, expireTime: 0})
assert.True(t, couldDo)
}
func Test_newCompactionTrigger(t *testing.T) {
type args struct {
meta *meta
compactionHandler compactionPlanContext
allocator allocator
}
tests := []struct {
name string
args args
}{
{
"test new trigger",
args{
&meta{},
&compactionPlanHandler{},
newMockAllocator(),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
indexCoord := newMockIndexCoord()
got := newCompactionTrigger(tt.args.meta, tt.args.compactionHandler, tt.args.allocator,
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, indexCoord, newMockHandler())
assert.Equal(t, tt.args.meta, got.meta)
assert.Equal(t, tt.args.compactionHandler, got.compactionHandler)
assert.Equal(t, tt.args.allocator, got.allocator)
})
}
}
func Test_handleSignal(t *testing.T) {
indexCoord := newMockIndexCoord()
got := newCompactionTrigger(&meta{segments: NewSegmentsInfo()}, &compactionPlanHandler{}, newMockAllocator(),
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, indexCoord, newMockHandler())
signal := &compactionSignal{
segmentID: 1,
}
assert.NotPanics(t, func() {
got.handleSignal(signal)
})
}
func Test_allocTs(t *testing.T) {
got := newCompactionTrigger(&meta{segments: NewSegmentsInfo()}, &compactionPlanHandler{}, newMockAllocator(),
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, nil, newMockHandler())
ts, err := got.allocTs()
assert.NoError(t, err)
assert.True(t, ts > 0)
got = newCompactionTrigger(&meta{segments: NewSegmentsInfo()}, &compactionPlanHandler{}, &FailsAllocator{},
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, nil, newMockHandler())
ts, err = got.allocTs()
assert.Error(t, err)
assert.Equal(t, uint64(0), ts)
}
func Test_getCompactTime(t *testing.T) {
collections := map[UniqueID]*collectionInfo{
1: {
ID: 1,
Schema: newTestSchema(),
Partitions: []UniqueID{1},
Properties: map[string]string{
common.CollectionTTLConfigKey: "10",
},
},
2: {
ID: 2,
Schema: newTestSchema(),
Partitions: []UniqueID{1},
Properties: map[string]string{
common.CollectionTTLConfigKey: "error",
},
},
}
m := &meta{segments: NewSegmentsInfo(), collections: collections}
got := newCompactionTrigger(m, &compactionPlanHandler{}, newMockAllocator(),
&SegmentReferenceManager{segmentsLock: map[UniqueID]map[UniqueID]*datapb.SegmentReferenceLock{}}, nil, &ServerHandler{
&Server{
meta: m,
},
})
now := tsoutil.GetCurrentTime()
ct, err := got.getCompactTime(now, 1)
assert.NoError(t, err)
assert.NotNil(t, ct)
}