mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-01 03:18:29 +08:00
0095869d5b
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
873 lines
27 KiB
Go
873 lines
27 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datanode
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
|
|
// "math"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/milvuspb"
|
|
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
|
memkv "github.com/milvus-io/milvus/internal/kv/mem"
|
|
"github.com/milvus-io/milvus/internal/mocks"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
var compactTestDir = "/tmp/milvus_test/compact"
|
|
|
|
func TestCompactionTaskInnerMethods(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir))
|
|
defer cm.RemoveWithPrefix(ctx, "")
|
|
t.Run("Test getSegmentMeta", func(t *testing.T) {
|
|
rc := &RootCoordFactory{
|
|
pkType: schemapb.DataType_Int64,
|
|
}
|
|
channel := newChannel("a", 1, nil, rc, cm)
|
|
var err error
|
|
|
|
task := &compactionTask{
|
|
Channel: channel,
|
|
}
|
|
|
|
_, _, _, err = task.getSegmentMeta(100)
|
|
assert.Error(t, err)
|
|
|
|
err = channel.addSegment(addSegmentReq{
|
|
segType: datapb.SegmentType_New,
|
|
segID: 100,
|
|
collID: 1,
|
|
partitionID: 10,
|
|
startPos: new(internalpb.MsgPosition),
|
|
endPos: nil,
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
collID, partID, meta, err := task.getSegmentMeta(100)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, UniqueID(1), collID)
|
|
assert.Equal(t, UniqueID(10), partID)
|
|
assert.NotNil(t, meta)
|
|
|
|
rc.setCollectionID(-2)
|
|
task.Channel.(*ChannelMeta).collSchema = nil
|
|
_, _, _, err = task.getSegmentMeta(100)
|
|
assert.Error(t, err)
|
|
})
|
|
|
|
t.Run("Test.interface2FieldData", func(t *testing.T) {
|
|
tests := []struct {
|
|
isvalid bool
|
|
|
|
tp schemapb.DataType
|
|
content []interface{}
|
|
|
|
description string
|
|
}{
|
|
{true, schemapb.DataType_Bool, []interface{}{true, false}, "valid bool"},
|
|
{true, schemapb.DataType_Int8, []interface{}{int8(1), int8(2)}, "valid int8"},
|
|
{true, schemapb.DataType_Int16, []interface{}{int16(1), int16(2)}, "valid int16"},
|
|
{true, schemapb.DataType_Int32, []interface{}{int32(1), int32(2)}, "valid int32"},
|
|
{true, schemapb.DataType_Int64, []interface{}{int64(1), int64(2)}, "valid int64"},
|
|
{true, schemapb.DataType_Float, []interface{}{float32(1), float32(2)}, "valid float32"},
|
|
{true, schemapb.DataType_Double, []interface{}{float64(1), float64(2)}, "valid float64"},
|
|
{true, schemapb.DataType_VarChar, []interface{}{"test1", "test2"}, "valid varChar"},
|
|
{true, schemapb.DataType_FloatVector, []interface{}{[]float32{1.0, 2.0}}, "valid floatvector"},
|
|
{true, schemapb.DataType_BinaryVector, []interface{}{[]byte{255}}, "valid binaryvector"},
|
|
{false, schemapb.DataType_Bool, []interface{}{1, 2}, "invalid bool"},
|
|
{false, schemapb.DataType_Int8, []interface{}{nil, nil}, "invalid int8"},
|
|
{false, schemapb.DataType_Int16, []interface{}{nil, nil}, "invalid int16"},
|
|
{false, schemapb.DataType_Int32, []interface{}{nil, nil}, "invalid int32"},
|
|
{false, schemapb.DataType_Int64, []interface{}{nil, nil}, "invalid int64"},
|
|
{false, schemapb.DataType_Float, []interface{}{nil, nil}, "invalid float32"},
|
|
{false, schemapb.DataType_Double, []interface{}{nil, nil}, "invalid float64"},
|
|
{false, schemapb.DataType_VarChar, []interface{}{nil, nil}, "invalid varChar"},
|
|
{false, schemapb.DataType_FloatVector, []interface{}{nil, nil}, "invalid floatvector"},
|
|
{false, schemapb.DataType_BinaryVector, []interface{}{nil, nil}, "invalid binaryvector"},
|
|
{false, schemapb.DataType_None, nil, "invalid data type"},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.description, func(t *testing.T) {
|
|
if test.isvalid {
|
|
fd, err := interface2FieldData(test.tp, test.content, 2)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 2, fd.RowNum())
|
|
} else {
|
|
fd, err := interface2FieldData(test.tp, test.content, 2)
|
|
assert.Error(t, err)
|
|
assert.Nil(t, fd)
|
|
}
|
|
})
|
|
}
|
|
|
|
})
|
|
|
|
t.Run("Test mergeDeltalogs", func(t *testing.T) {
|
|
t.Run("One segment with timetravel", func(t *testing.T) {
|
|
invalidBlobs := map[UniqueID][]*Blob{
|
|
1: {},
|
|
}
|
|
|
|
blobs, err := getInt64DeltaBlobs(
|
|
100,
|
|
[]UniqueID{
|
|
1,
|
|
2,
|
|
3,
|
|
4,
|
|
5,
|
|
1,
|
|
},
|
|
[]Timestamp{
|
|
20000,
|
|
20001,
|
|
20002,
|
|
30000,
|
|
50000,
|
|
50000,
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
validBlobs := map[UniqueID][]*Blob{
|
|
100: blobs,
|
|
}
|
|
|
|
tests := []struct {
|
|
isvalid bool
|
|
|
|
dBlobs map[UniqueID][]*Blob
|
|
timetravel Timestamp
|
|
|
|
description string
|
|
}{
|
|
{false, invalidBlobs, 0, "invalid dBlobs"},
|
|
{true, validBlobs, 21000, "valid blobs"},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
task := &compactionTask{}
|
|
t.Run(test.description, func(t *testing.T) {
|
|
if test.isvalid {
|
|
pk2ts, db, err := task.mergeDeltalogs(test.dBlobs, test.timetravel)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 3, len(pk2ts))
|
|
assert.Equal(t, int64(3), db.GetEntriesNum())
|
|
assert.Equal(t, int64(3), db.delData.RowCount)
|
|
matchedPks := []primaryKey{newInt64PrimaryKey(1), newInt64PrimaryKey(4), newInt64PrimaryKey(5)}
|
|
assert.ElementsMatch(t, matchedPks, db.delData.Pks)
|
|
assert.ElementsMatch(t, []Timestamp{30000, 50000, 50000}, db.delData.Tss)
|
|
|
|
} else {
|
|
|
|
pk2ts, db, err := task.mergeDeltalogs(test.dBlobs, test.timetravel)
|
|
assert.Error(t, err)
|
|
assert.Nil(t, pk2ts)
|
|
assert.Nil(t, db)
|
|
}
|
|
})
|
|
|
|
}
|
|
})
|
|
|
|
t.Run("Multiple segments with timetravel", func(t *testing.T) {
|
|
tests := []struct {
|
|
segIDA UniqueID
|
|
dataApk []UniqueID
|
|
dataAts []Timestamp
|
|
|
|
segIDB UniqueID
|
|
dataBpk []UniqueID
|
|
dataBts []Timestamp
|
|
|
|
segIDC UniqueID
|
|
dataCpk []UniqueID
|
|
dataCts []Timestamp
|
|
|
|
timetravel Timestamp
|
|
expectedpk2ts int
|
|
expecteddb int
|
|
description string
|
|
}{
|
|
{
|
|
0, nil, nil,
|
|
100, []UniqueID{1, 2, 3}, []Timestamp{20000, 30000, 20005},
|
|
200, []UniqueID{4, 5, 6}, []Timestamp{50000, 50001, 50002},
|
|
40000, 3, 3, "2 segments with timetravel 40000",
|
|
},
|
|
{
|
|
300, []UniqueID{10, 20}, []Timestamp{20001, 40001},
|
|
100, []UniqueID{1, 2, 3}, []Timestamp{20000, 30000, 20005},
|
|
200, []UniqueID{4, 5, 6}, []Timestamp{50000, 50001, 50002},
|
|
40000, 4, 4, "3 segments with timetravel 40000",
|
|
},
|
|
}
|
|
|
|
for _, test := range tests {
|
|
t.Run(test.description, func(t *testing.T) {
|
|
dBlobs := make(map[UniqueID][]*Blob)
|
|
if test.segIDA != UniqueID(0) {
|
|
d, err := getInt64DeltaBlobs(test.segIDA, test.dataApk, test.dataAts)
|
|
require.NoError(t, err)
|
|
dBlobs[test.segIDA] = d
|
|
}
|
|
if test.segIDB != UniqueID(0) {
|
|
d, err := getInt64DeltaBlobs(test.segIDB, test.dataBpk, test.dataBts)
|
|
require.NoError(t, err)
|
|
dBlobs[test.segIDB] = d
|
|
}
|
|
if test.segIDC != UniqueID(0) {
|
|
d, err := getInt64DeltaBlobs(test.segIDC, test.dataCpk, test.dataCts)
|
|
require.NoError(t, err)
|
|
dBlobs[test.segIDC] = d
|
|
}
|
|
|
|
task := &compactionTask{}
|
|
pk2ts, db, err := task.mergeDeltalogs(dBlobs, test.timetravel)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, test.expectedpk2ts, len(pk2ts))
|
|
assert.Equal(t, test.expecteddb, int(db.GetEntriesNum()))
|
|
})
|
|
}
|
|
})
|
|
|
|
})
|
|
|
|
t.Run("Test merge", func(t *testing.T) {
|
|
collectionID := int64(1)
|
|
meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64)
|
|
|
|
rc := &mocks.RootCoord{}
|
|
rc.EXPECT().DescribeCollection(mock.Anything, mock.Anything).
|
|
Return(&milvuspb.DescribeCollectionResponse{
|
|
Schema: meta.GetSchema(),
|
|
}, nil)
|
|
channel := newChannel("a", collectionID, meta.GetSchema(), rc, nil)
|
|
t.Run("Merge without expiration", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
Params.CommonCfg.EntityExpirationTTL = 0
|
|
iData := genInsertDataWithExpiredTS()
|
|
|
|
var allPaths [][]string
|
|
inpath, err := mockbIO.uploadInsertLog(context.Background(), 1, 0, iData, meta)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 12, len(inpath))
|
|
binlogNum := len(inpath[0].GetBinlogs())
|
|
assert.Equal(t, 1, binlogNum)
|
|
|
|
for idx := 0; idx < binlogNum; idx++ {
|
|
var ps []string
|
|
for _, path := range inpath {
|
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
|
}
|
|
allPaths = append(allPaths, ps)
|
|
}
|
|
|
|
dm := map[interface{}]Timestamp{
|
|
1: 10000,
|
|
}
|
|
|
|
ct := &compactionTask{Channel: channel, downloader: mockbIO, uploader: mockbIO}
|
|
inPaths, statsPaths, _, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(2), numOfRow)
|
|
assert.Equal(t, 1, len(inPaths[0].GetBinlogs()))
|
|
assert.Equal(t, 1, len(statsPaths))
|
|
})
|
|
t.Run("Merge without expiration2", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
Params.CommonCfg.EntityExpirationTTL = 0
|
|
flushInsertBufferSize := Params.DataNodeCfg.FlushInsertBufferSize
|
|
defer func() {
|
|
Params.DataNodeCfg.FlushInsertBufferSize = flushInsertBufferSize
|
|
}()
|
|
Params.DataNodeCfg.FlushInsertBufferSize = 128
|
|
iData := genInsertDataWithExpiredTS()
|
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
|
|
|
var allPaths [][]string
|
|
inpath, err := mockbIO.uploadInsertLog(context.Background(), 1, 0, iData, meta)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 12, len(inpath))
|
|
binlogNum := len(inpath[0].GetBinlogs())
|
|
assert.Equal(t, 1, binlogNum)
|
|
|
|
for idx := 0; idx < binlogNum; idx++ {
|
|
var ps []string
|
|
for _, path := range inpath {
|
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
|
}
|
|
allPaths = append(allPaths, ps)
|
|
}
|
|
|
|
dm := map[interface{}]Timestamp{}
|
|
|
|
ct := &compactionTask{Channel: channel, downloader: mockbIO, uploader: mockbIO}
|
|
inPaths, statsPaths, _, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(2), numOfRow)
|
|
assert.Equal(t, 2, len(inPaths[0].GetBinlogs()))
|
|
assert.Equal(t, 1, len(statsPaths))
|
|
assert.Equal(t, 1, len(statsPaths[0].GetBinlogs()))
|
|
})
|
|
|
|
t.Run("Merge with expiration", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
|
|
iData := genInsertDataWithExpiredTS()
|
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
|
|
|
var allPaths [][]string
|
|
inpath, err := mockbIO.uploadInsertLog(context.Background(), 1, 0, iData, meta)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 12, len(inpath))
|
|
binlogNum := len(inpath[0].GetBinlogs())
|
|
assert.Equal(t, 1, binlogNum)
|
|
|
|
for idx := 0; idx < binlogNum; idx++ {
|
|
var ps []string
|
|
for _, path := range inpath {
|
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
|
}
|
|
allPaths = append(allPaths, ps)
|
|
}
|
|
|
|
dm := map[interface{}]Timestamp{
|
|
1: 10000,
|
|
}
|
|
|
|
// 10 days in seconds
|
|
ct := &compactionTask{
|
|
Channel: channel,
|
|
downloader: mockbIO,
|
|
uploader: mockbIO,
|
|
plan: &datapb.CompactionPlan{
|
|
CollectionTtl: 864000,
|
|
},
|
|
}
|
|
inPaths, statsPaths, _, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(0), numOfRow)
|
|
assert.Equal(t, 0, len(inPaths))
|
|
assert.Equal(t, 0, len(statsPaths))
|
|
})
|
|
|
|
t.Run("Merge with meta error", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
Params.CommonCfg.EntityExpirationTTL = 0
|
|
iData := genInsertDataWithExpiredTS()
|
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
|
|
|
var allPaths [][]string
|
|
inpath, err := mockbIO.uploadInsertLog(context.Background(), 1, 0, iData, meta)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 12, len(inpath))
|
|
binlogNum := len(inpath[0].GetBinlogs())
|
|
assert.Equal(t, 1, binlogNum)
|
|
|
|
for idx := 0; idx < binlogNum; idx++ {
|
|
var ps []string
|
|
for _, path := range inpath {
|
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
|
}
|
|
allPaths = append(allPaths, ps)
|
|
}
|
|
|
|
dm := map[interface{}]Timestamp{
|
|
1: 10000,
|
|
}
|
|
|
|
ct := &compactionTask{Channel: channel, downloader: mockbIO, uploader: mockbIO}
|
|
_, _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{
|
|
Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{
|
|
{DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
|
|
{Key: "dim", Value: "64"},
|
|
}},
|
|
}},
|
|
}, dm)
|
|
assert.Error(t, err)
|
|
})
|
|
|
|
t.Run("Merge with meta type param error", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
Params.CommonCfg.EntityExpirationTTL = 0
|
|
iData := genInsertDataWithExpiredTS()
|
|
meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64)
|
|
|
|
var allPaths [][]string
|
|
inpath, err := mockbIO.uploadInsertLog(context.Background(), 1, 0, iData, meta)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 12, len(inpath))
|
|
binlogNum := len(inpath[0].GetBinlogs())
|
|
assert.Equal(t, 1, binlogNum)
|
|
|
|
for idx := 0; idx < binlogNum; idx++ {
|
|
var ps []string
|
|
for _, path := range inpath {
|
|
ps = append(ps, path.GetBinlogs()[idx].GetLogPath())
|
|
}
|
|
allPaths = append(allPaths, ps)
|
|
}
|
|
|
|
dm := map[interface{}]Timestamp{
|
|
1: 10000,
|
|
}
|
|
|
|
ct := &compactionTask{Channel: channel, downloader: mockbIO, uploader: mockbIO}
|
|
|
|
_, _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{
|
|
Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{
|
|
{DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
|
|
{Key: "dim", Value: "dim"},
|
|
}},
|
|
}}}, dm)
|
|
assert.Error(t, err)
|
|
})
|
|
})
|
|
|
|
t.Run("Test isExpiredEntity", func(t *testing.T) {
|
|
t.Run("When CompactionEntityExpiration is set math.MaxInt64", func(t *testing.T) {
|
|
ct := &compactionTask{
|
|
plan: &datapb.CompactionPlan{
|
|
CollectionTtl: math.MaxInt64,
|
|
},
|
|
}
|
|
|
|
res := ct.isExpiredEntity(0, genTimestamp())
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, genTimestamp())
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(0, math.MaxInt64)
|
|
assert.Equal(t, true, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64)
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, 0)
|
|
assert.Equal(t, false, res)
|
|
})
|
|
t.Run("When CompactionEntityExpiration is set MAX_ENTITY_EXPIRATION = 0", func(t *testing.T) {
|
|
// 0 means expiration is not enabled
|
|
ct := &compactionTask{
|
|
plan: &datapb.CompactionPlan{
|
|
CollectionTtl: 0,
|
|
},
|
|
}
|
|
res := ct.isExpiredEntity(0, genTimestamp())
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, genTimestamp())
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(0, math.MaxInt64)
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64)
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, 0)
|
|
assert.Equal(t, false, res)
|
|
})
|
|
t.Run("When CompactionEntityExpiration is set 10 days", func(t *testing.T) {
|
|
// 10 days in seconds
|
|
ct := &compactionTask{
|
|
plan: &datapb.CompactionPlan{
|
|
CollectionTtl: 864000,
|
|
},
|
|
}
|
|
res := ct.isExpiredEntity(0, genTimestamp())
|
|
assert.Equal(t, true, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, genTimestamp())
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(0, math.MaxInt64)
|
|
assert.Equal(t, true, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64)
|
|
assert.Equal(t, false, res)
|
|
|
|
res = ct.isExpiredEntity(math.MaxInt64, 0)
|
|
assert.Equal(t, false, res)
|
|
})
|
|
})
|
|
}
|
|
|
|
func getInt64DeltaBlobs(segID UniqueID, pks []UniqueID, tss []Timestamp) ([]*Blob, error) {
|
|
primaryKeys := make([]primaryKey, len(pks))
|
|
for index, v := range pks {
|
|
primaryKeys[index] = newInt64PrimaryKey(v)
|
|
}
|
|
deltaData := &DeleteData{
|
|
Pks: primaryKeys,
|
|
Tss: tss,
|
|
RowCount: int64(len(pks)),
|
|
}
|
|
|
|
dCodec := storage.NewDeleteCodec()
|
|
blob, err := dCodec.Serialize(1, 10, segID, deltaData)
|
|
return []*Blob{blob}, err
|
|
}
|
|
|
|
func getInsertBlobs(segID UniqueID, iData *InsertData, meta *etcdpb.CollectionMeta) ([]*Blob, error) {
|
|
iCodec := storage.NewInsertCodec(meta)
|
|
|
|
iblobs, _, err := iCodec.Serialize(10, segID, iData)
|
|
return iblobs, err
|
|
}
|
|
|
|
func TestCompactorInterfaceMethods(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir))
|
|
defer cm.RemoveWithPrefix(ctx, "")
|
|
notEmptySegmentBinlogs := []*datapb.CompactionSegmentBinlogs{{
|
|
SegmentID: 100,
|
|
FieldBinlogs: nil,
|
|
Field2StatslogPaths: nil,
|
|
Deltalogs: nil,
|
|
}}
|
|
Params.CommonCfg.EntityExpirationTTL = 0 // Turn off auto expiration
|
|
|
|
t.Run("Test compact invalid", func(t *testing.T) {
|
|
invalidAlloc := NewAllocatorFactory(-1)
|
|
ctx, cancel := context.WithCancel(context.TODO())
|
|
emptyTask := &compactionTask{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
}
|
|
|
|
plan := &datapb.CompactionPlan{
|
|
PlanID: 999,
|
|
SegmentBinlogs: notEmptySegmentBinlogs,
|
|
StartTime: 0,
|
|
TimeoutInSeconds: 10,
|
|
Type: datapb.CompactionType_UndefinedCompaction,
|
|
Channel: "",
|
|
}
|
|
|
|
emptyTask.plan = plan
|
|
_, err := emptyTask.compact()
|
|
assert.Error(t, err)
|
|
|
|
plan.Type = datapb.CompactionType_MergeCompaction
|
|
emptyTask.allocatorInterface = invalidAlloc
|
|
plan.SegmentBinlogs = notEmptySegmentBinlogs
|
|
_, err = emptyTask.compact()
|
|
assert.Error(t, err)
|
|
|
|
emptyTask.stop()
|
|
})
|
|
|
|
t.Run("Test typeII compact valid", func(t *testing.T) {
|
|
alloc := NewAllocatorFactory(1)
|
|
type testCase struct {
|
|
pkType schemapb.DataType
|
|
iData1 storage.FieldData
|
|
iData2 storage.FieldData
|
|
pks1 [2]primaryKey
|
|
pks2 [2]primaryKey
|
|
colID UniqueID
|
|
parID UniqueID
|
|
segID1 UniqueID
|
|
segID2 UniqueID
|
|
}
|
|
cases := []testCase{
|
|
{
|
|
pkType: schemapb.DataType_Int64,
|
|
iData1: &storage.Int64FieldData{Data: []UniqueID{1}},
|
|
iData2: &storage.Int64FieldData{Data: []UniqueID{9}},
|
|
pks1: [2]primaryKey{newInt64PrimaryKey(1), newInt64PrimaryKey(2)},
|
|
pks2: [2]primaryKey{newInt64PrimaryKey(9), newInt64PrimaryKey(10)},
|
|
colID: 1,
|
|
parID: 10,
|
|
segID1: 100,
|
|
segID2: 101,
|
|
},
|
|
{
|
|
pkType: schemapb.DataType_VarChar,
|
|
iData1: &storage.StringFieldData{Data: []string{"aaaa"}},
|
|
iData2: &storage.StringFieldData{Data: []string{"milvus"}},
|
|
pks1: [2]primaryKey{newVarCharPrimaryKey("aaaa"), newVarCharPrimaryKey("bbbb")},
|
|
pks2: [2]primaryKey{newVarCharPrimaryKey("milvus"), newVarCharPrimaryKey("mmmm")},
|
|
colID: 2,
|
|
parID: 11,
|
|
segID1: 102,
|
|
segID2: 103,
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
rc := &RootCoordFactory{
|
|
pkType: c.pkType,
|
|
}
|
|
mockfm := &mockFlushManager{}
|
|
mockKv := memkv.NewMemoryKV()
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
channel := newChannel("a", c.colID, nil, rc, cm)
|
|
|
|
channel.addFlushedSegmentWithPKs(c.segID1, c.colID, c.parID, 2, c.iData1)
|
|
channel.addFlushedSegmentWithPKs(c.segID2, c.colID, c.parID, 2, c.iData2)
|
|
require.True(t, channel.hasSegment(c.segID1, true))
|
|
require.True(t, channel.hasSegment(c.segID2, true))
|
|
|
|
meta := NewMetaFactory().GetCollectionMeta(c.colID, "test_compact_coll_name", c.pkType)
|
|
iData1 := genInsertDataWithPKs(c.pks1, c.pkType)
|
|
dData1 := &DeleteData{
|
|
Pks: []primaryKey{c.pks1[0]},
|
|
Tss: []Timestamp{20000},
|
|
RowCount: 1,
|
|
}
|
|
iData2 := genInsertDataWithPKs(c.pks2, c.pkType)
|
|
dData2 := &DeleteData{
|
|
Pks: []primaryKey{c.pks2[0]},
|
|
Tss: []Timestamp{30000},
|
|
RowCount: 1,
|
|
}
|
|
|
|
cpaths1, err := mockbIO.upload(context.TODO(), c.segID1, c.parID, []*InsertData{iData1}, []byte{}, dData1, meta)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 12, len(cpaths1.inPaths))
|
|
|
|
cpaths2, err := mockbIO.upload(context.TODO(), c.segID2, c.parID, []*InsertData{iData2}, []byte{}, dData2, meta)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 12, len(cpaths2.inPaths))
|
|
|
|
plan := &datapb.CompactionPlan{
|
|
PlanID: 10080,
|
|
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
|
|
{
|
|
SegmentID: c.segID1,
|
|
FieldBinlogs: cpaths1.inPaths,
|
|
Field2StatslogPaths: cpaths1.statsPaths,
|
|
Deltalogs: cpaths1.deltaInfo,
|
|
},
|
|
{
|
|
SegmentID: c.segID2,
|
|
FieldBinlogs: cpaths2.inPaths,
|
|
Field2StatslogPaths: cpaths2.statsPaths,
|
|
Deltalogs: cpaths2.deltaInfo,
|
|
},
|
|
},
|
|
StartTime: 0,
|
|
TimeoutInSeconds: 10,
|
|
Type: datapb.CompactionType_MergeCompaction,
|
|
Timetravel: 40000,
|
|
Channel: "channelname",
|
|
}
|
|
|
|
alloc.random = false // generated ID = 19530
|
|
task := newCompactionTask(context.TODO(), mockbIO, mockbIO, channel, mockfm, alloc, plan)
|
|
result, err := task.compact()
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
assert.Equal(t, plan.GetPlanID(), result.GetPlanID())
|
|
assert.Equal(t, UniqueID(19530), result.GetSegmentID())
|
|
assert.Equal(t, int64(2), result.GetNumOfRows())
|
|
assert.NotEmpty(t, result.InsertLogs)
|
|
assert.NotEmpty(t, result.Field2StatslogPaths)
|
|
|
|
// New test, remove all the binlogs in memkv
|
|
// Deltas in timetravel range
|
|
err = mockKv.RemoveWithPrefix("/")
|
|
require.NoError(t, err)
|
|
plan.PlanID++
|
|
|
|
plan.Timetravel = Timestamp(25000)
|
|
channel.addFlushedSegmentWithPKs(c.segID1, c.colID, c.parID, 2, c.iData1)
|
|
channel.addFlushedSegmentWithPKs(c.segID2, c.colID, c.parID, 2, c.iData2)
|
|
channel.removeSegments(19530)
|
|
require.True(t, channel.hasSegment(c.segID1, true))
|
|
require.True(t, channel.hasSegment(c.segID2, true))
|
|
require.False(t, channel.hasSegment(19530, true))
|
|
|
|
result, err = task.compact()
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
assert.Equal(t, plan.GetPlanID(), result.GetPlanID())
|
|
assert.Equal(t, UniqueID(19530), result.GetSegmentID())
|
|
assert.Equal(t, int64(3), result.GetNumOfRows())
|
|
assert.NotEmpty(t, result.InsertLogs)
|
|
assert.NotEmpty(t, result.Field2StatslogPaths)
|
|
|
|
// New test, remove all the binlogs in memkv
|
|
// Deltas in timetravel range
|
|
err = mockKv.RemoveWithPrefix("/")
|
|
require.NoError(t, err)
|
|
plan.PlanID++
|
|
|
|
plan.Timetravel = Timestamp(10000)
|
|
channel.addFlushedSegmentWithPKs(c.segID1, c.colID, c.parID, 2, c.iData1)
|
|
channel.addFlushedSegmentWithPKs(c.segID2, c.colID, c.parID, 2, c.iData2)
|
|
channel.removeSegments(19530)
|
|
require.True(t, channel.hasSegment(c.segID1, true))
|
|
require.True(t, channel.hasSegment(c.segID2, true))
|
|
require.False(t, channel.hasSegment(19530, true))
|
|
|
|
result, err = task.compact()
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
assert.Equal(t, plan.GetPlanID(), result.GetPlanID())
|
|
assert.Equal(t, UniqueID(19530), result.GetSegmentID())
|
|
assert.Equal(t, int64(4), result.GetNumOfRows())
|
|
assert.NotEmpty(t, result.InsertLogs)
|
|
assert.NotEmpty(t, result.Field2StatslogPaths)
|
|
}
|
|
})
|
|
|
|
t.Run("Test typeII compact 2 segments with the same pk", func(t *testing.T) {
|
|
// Test merge compactions, two segments with the same pk, one deletion pk=1
|
|
// The merged segment 19530 should only contain 2 rows and both pk=2
|
|
// Both pk = 1 rows of the two segments are compacted.
|
|
var collID, partID, segID1, segID2 UniqueID = 1, 10, 200, 201
|
|
|
|
alloc := NewAllocatorFactory(1)
|
|
rc := &RootCoordFactory{
|
|
pkType: schemapb.DataType_Int64,
|
|
}
|
|
mockfm := &mockFlushManager{}
|
|
mockbIO := &binlogIO{cm, alloc}
|
|
channel := newChannel("channelname", collID, nil, rc, cm)
|
|
|
|
channel.addFlushedSegmentWithPKs(segID1, collID, partID, 2, &storage.Int64FieldData{Data: []UniqueID{1}})
|
|
channel.addFlushedSegmentWithPKs(segID2, collID, partID, 2, &storage.Int64FieldData{Data: []UniqueID{1}})
|
|
require.True(t, channel.hasSegment(segID1, true))
|
|
require.True(t, channel.hasSegment(segID2, true))
|
|
|
|
meta := NewMetaFactory().GetCollectionMeta(collID, "test_compact_coll_name", schemapb.DataType_Int64)
|
|
// the same pk for segmentI and segmentII
|
|
pks := [2]primaryKey{newInt64PrimaryKey(1), newInt64PrimaryKey(2)}
|
|
iData1 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
|
iData2 := genInsertDataWithPKs(pks, schemapb.DataType_Int64)
|
|
|
|
pk1 := newInt64PrimaryKey(1)
|
|
dData1 := &DeleteData{
|
|
Pks: []primaryKey{pk1},
|
|
Tss: []Timestamp{20000},
|
|
RowCount: 1,
|
|
}
|
|
// empty dData2
|
|
dData2 := &DeleteData{
|
|
Pks: []primaryKey{},
|
|
Tss: []Timestamp{},
|
|
RowCount: 0,
|
|
}
|
|
|
|
cpaths1, err := mockbIO.upload(context.TODO(), segID1, partID, []*InsertData{iData1}, []byte{}, dData1, meta)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 12, len(cpaths1.inPaths))
|
|
|
|
cpaths2, err := mockbIO.upload(context.TODO(), segID2, partID, []*InsertData{iData2}, []byte{}, dData2, meta)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 12, len(cpaths2.inPaths))
|
|
|
|
plan := &datapb.CompactionPlan{
|
|
PlanID: 20080,
|
|
SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{
|
|
{
|
|
SegmentID: segID1,
|
|
FieldBinlogs: cpaths1.inPaths,
|
|
Field2StatslogPaths: cpaths1.statsPaths,
|
|
Deltalogs: cpaths1.deltaInfo,
|
|
},
|
|
{
|
|
SegmentID: segID2,
|
|
FieldBinlogs: cpaths2.inPaths,
|
|
Field2StatslogPaths: cpaths2.statsPaths,
|
|
Deltalogs: cpaths2.deltaInfo,
|
|
},
|
|
},
|
|
StartTime: 0,
|
|
TimeoutInSeconds: 10,
|
|
Type: datapb.CompactionType_MergeCompaction,
|
|
Timetravel: 40000,
|
|
Channel: "channelname",
|
|
}
|
|
|
|
alloc.random = false // generated ID = 19530
|
|
task := newCompactionTask(context.TODO(), mockbIO, mockbIO, channel, mockfm, alloc, plan)
|
|
result, err := task.compact()
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
assert.Equal(t, plan.GetPlanID(), result.GetPlanID())
|
|
assert.Equal(t, UniqueID(19530), result.GetSegmentID())
|
|
assert.Equal(t, int64(2), result.GetNumOfRows())
|
|
assert.NotEmpty(t, result.InsertLogs)
|
|
assert.NotEmpty(t, result.Field2StatslogPaths)
|
|
})
|
|
}
|
|
|
|
type mockFlushManager struct {
|
|
sleepSeconds int32
|
|
returnError bool
|
|
}
|
|
|
|
var _ flushManager = (*mockFlushManager)(nil)
|
|
|
|
func (mfm *mockFlushManager) flushBufferData(data *BufferData, segStats []byte, segmentID UniqueID, flushed bool, dropped bool, pos *internalpb.MsgPosition) error {
|
|
if mfm.returnError {
|
|
return fmt.Errorf("mock error")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (mfm *mockFlushManager) flushDelData(data *DelDataBuf, segmentID UniqueID, pos *internalpb.MsgPosition) error {
|
|
if mfm.returnError {
|
|
return fmt.Errorf("mock error")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (mfm *mockFlushManager) injectFlush(injection *taskInjection, segments ...UniqueID) {
|
|
go func() {
|
|
time.Sleep(time.Second * time.Duration(mfm.sleepSeconds))
|
|
//injection.injected <- struct{}{}
|
|
close(injection.injected)
|
|
<-injection.injectOver
|
|
}()
|
|
}
|
|
|
|
func (mfm *mockFlushManager) notifyAllFlushed() {}
|
|
|
|
func (mfm *mockFlushManager) startDropping() {}
|
|
|
|
func (mfm *mockFlushManager) close() {}
|