2022-10-20 16:39:29 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package datanode
|
|
|
|
|
|
|
|
import (
|
2022-11-15 14:45:07 +08:00
|
|
|
"container/heap"
|
2023-04-10 18:42:30 +08:00
|
|
|
"context"
|
2022-11-10 22:13:04 +08:00
|
|
|
"fmt"
|
2022-10-20 16:39:29 +08:00
|
|
|
"math"
|
2022-12-07 18:01:19 +08:00
|
|
|
"strconv"
|
2022-10-20 16:39:29 +08:00
|
|
|
"testing"
|
2023-04-10 18:42:30 +08:00
|
|
|
"time"
|
2022-10-20 16:39:29 +08:00
|
|
|
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
2022-11-10 22:13:04 +08:00
|
|
|
|
2023-06-09 01:28:37 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2023-04-10 18:42:30 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/datanode/allocator"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2023-05-16 17:41:22 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2022-10-20 16:39:29 +08:00
|
|
|
)
|
|
|
|
|
2022-12-05 11:15:17 +08:00
|
|
|
func genTestCollectionSchema(dim int64, vectorType schemapb.DataType) *schemapb.CollectionSchema {
|
2022-11-10 22:13:04 +08:00
|
|
|
floatVecFieldSchema := &schemapb.FieldSchema{
|
|
|
|
FieldID: 100,
|
|
|
|
Name: "vec",
|
2022-12-05 11:15:17 +08:00
|
|
|
DataType: vectorType,
|
2022-11-10 22:13:04 +08:00
|
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
|
|
{
|
2023-05-16 17:41:22 +08:00
|
|
|
Key: common.DimKey,
|
2022-11-10 22:13:04 +08:00
|
|
|
Value: fmt.Sprintf("%d", dim),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "collection-0",
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
floatVecFieldSchema,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
return schema
|
|
|
|
}
|
|
|
|
|
2022-10-20 16:39:29 +08:00
|
|
|
func TestBufferData(t *testing.T) {
|
2022-12-07 18:01:19 +08:00
|
|
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, strconv.FormatInt(16*(1<<20), 10)) // 16 MB
|
2022-10-20 16:39:29 +08:00
|
|
|
tests := []struct {
|
|
|
|
isValid bool
|
|
|
|
|
|
|
|
indim int64
|
|
|
|
expectedLimit int64
|
2022-12-05 11:15:17 +08:00
|
|
|
vectorType schemapb.DataType
|
2022-10-20 16:39:29 +08:00
|
|
|
|
|
|
|
description string
|
|
|
|
}{
|
2022-12-05 11:15:17 +08:00
|
|
|
{true, 1, 4194304, schemapb.DataType_FloatVector, "Smallest of the DIM"},
|
|
|
|
{true, 128, 32768, schemapb.DataType_FloatVector, "Normal DIM"},
|
|
|
|
{true, 32768, 128, schemapb.DataType_FloatVector, "Largest DIM"},
|
|
|
|
{true, 4096, 32768, schemapb.DataType_BinaryVector, "Normal binary"},
|
|
|
|
{false, 0, 0, schemapb.DataType_FloatVector, "Illegal DIM"},
|
2022-10-20 16:39:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, test := range tests {
|
|
|
|
t.Run(test.description, func(t *testing.T) {
|
2022-12-05 11:15:17 +08:00
|
|
|
idata, err := newBufferData(genTestCollectionSchema(test.indim, test.vectorType))
|
2022-10-20 16:39:29 +08:00
|
|
|
|
|
|
|
if test.isValid {
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.NotNil(t, idata)
|
|
|
|
|
|
|
|
assert.Equal(t, test.expectedLimit, idata.limit)
|
|
|
|
assert.Zero(t, idata.size)
|
|
|
|
|
|
|
|
capacity := idata.effectiveCap()
|
|
|
|
assert.Equal(t, test.expectedLimit, capacity)
|
|
|
|
} else {
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.Nil(t, idata)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestBufferData_updateTimeRange(t *testing.T) {
|
2022-12-07 18:01:19 +08:00
|
|
|
paramtable.Get().Save(Params.DataNodeCfg.FlushInsertBufferSize.Key, strconv.FormatInt(16*(1<<20), 10)) // 16 MB
|
2022-10-20 16:39:29 +08:00
|
|
|
|
|
|
|
type testCase struct {
|
|
|
|
tag string
|
|
|
|
|
|
|
|
trs []TimeRange
|
|
|
|
expectFrom Timestamp
|
|
|
|
expectTo Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
cases := []testCase{
|
|
|
|
{
|
|
|
|
tag: "no input range",
|
|
|
|
expectTo: 0,
|
|
|
|
expectFrom: math.MaxUint64,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
tag: "single range",
|
|
|
|
trs: []TimeRange{
|
|
|
|
{timestampMin: 100, timestampMax: 200},
|
|
|
|
},
|
|
|
|
expectFrom: 100,
|
|
|
|
expectTo: 200,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
tag: "multiple range",
|
|
|
|
trs: []TimeRange{
|
|
|
|
{timestampMin: 150, timestampMax: 250},
|
|
|
|
{timestampMin: 100, timestampMax: 200},
|
|
|
|
{timestampMin: 50, timestampMax: 180},
|
|
|
|
},
|
|
|
|
expectFrom: 50,
|
|
|
|
expectTo: 250,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, tc := range cases {
|
|
|
|
t.Run(tc.tag, func(t *testing.T) {
|
2022-12-05 11:15:17 +08:00
|
|
|
bd, err := newBufferData(genTestCollectionSchema(16, schemapb.DataType_FloatVector))
|
2022-10-20 16:39:29 +08:00
|
|
|
require.NoError(t, err)
|
|
|
|
for _, tr := range tc.trs {
|
|
|
|
bd.updateTimeRange(tr)
|
|
|
|
}
|
|
|
|
|
|
|
|
assert.Equal(t, tc.expectFrom, bd.tsFrom)
|
|
|
|
assert.Equal(t, tc.expectTo, bd.tsTo)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2022-11-15 14:45:07 +08:00
|
|
|
|
2023-03-31 18:30:23 +08:00
|
|
|
func TestPriorityQueueString(t *testing.T) {
|
|
|
|
item := &Item{
|
|
|
|
segmentID: 0,
|
|
|
|
memorySize: 1,
|
|
|
|
}
|
|
|
|
|
|
|
|
assert.Equal(t, "<segmentID=0, memorySize=1>", item.String())
|
|
|
|
|
|
|
|
pq := &PriorityQueue{}
|
|
|
|
heap.Push(pq, item)
|
|
|
|
assert.Equal(t, "[<segmentID=0, memorySize=1>]", pq.String())
|
|
|
|
}
|
|
|
|
|
2022-11-15 14:45:07 +08:00
|
|
|
func Test_CompactSegBuff(t *testing.T) {
|
|
|
|
channelSegments := make(map[UniqueID]*Segment)
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager := &DeltaBufferManager{
|
2022-11-15 14:45:07 +08:00
|
|
|
channel: &ChannelMeta{
|
|
|
|
segments: channelSegments,
|
|
|
|
},
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufHeap: &PriorityQueue{},
|
2022-11-15 14:45:07 +08:00
|
|
|
}
|
2023-09-21 09:45:27 +08:00
|
|
|
// 1. set compactTo and compactFrom
|
2023-04-10 18:42:30 +08:00
|
|
|
targetSeg := &Segment{segmentID: 3333}
|
|
|
|
targetSeg.setType(datapb.SegmentType_Flushed)
|
|
|
|
|
|
|
|
seg1 := &Segment{
|
|
|
|
segmentID: 1111,
|
|
|
|
compactedTo: targetSeg.segmentID,
|
|
|
|
}
|
|
|
|
seg1.setType(datapb.SegmentType_Compacted)
|
|
|
|
|
|
|
|
seg2 := &Segment{
|
|
|
|
segmentID: 2222,
|
|
|
|
compactedTo: targetSeg.segmentID,
|
|
|
|
}
|
|
|
|
seg2.setType(datapb.SegmentType_Compacted)
|
|
|
|
|
|
|
|
channelSegments[seg1.segmentID] = seg1
|
|
|
|
channelSegments[seg2.segmentID] = seg2
|
|
|
|
channelSegments[targetSeg.segmentID] = targetSeg
|
2022-11-15 14:45:07 +08:00
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 2. set up deleteDataBuf for seg1 and seg2
|
2023-04-10 18:42:30 +08:00
|
|
|
delDataBuf1 := newDelDataBuf(seg1.segmentID)
|
2022-11-15 14:45:07 +08:00
|
|
|
delDataBuf1.EntriesNum++
|
2023-03-04 23:21:50 +08:00
|
|
|
delDataBuf1.updateStartAndEndPosition(nil, &msgpb.MsgPosition{Timestamp: 50})
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager.updateMeta(seg1.segmentID, delDataBuf1)
|
2022-11-15 14:45:07 +08:00
|
|
|
heap.Push(delBufferManager.delBufHeap, delDataBuf1.item)
|
2023-04-10 18:42:30 +08:00
|
|
|
|
|
|
|
delDataBuf2 := newDelDataBuf(seg2.segmentID)
|
2022-11-15 14:45:07 +08:00
|
|
|
delDataBuf2.EntriesNum++
|
2023-03-04 23:21:50 +08:00
|
|
|
delDataBuf2.updateStartAndEndPosition(nil, &msgpb.MsgPosition{Timestamp: 50})
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager.updateMeta(seg2.segmentID, delDataBuf2)
|
2022-11-15 14:45:07 +08:00
|
|
|
heap.Push(delBufferManager.delBufHeap, delDataBuf2.item)
|
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 3. test compact
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager.UpdateCompactedSegments()
|
2022-11-15 14:45:07 +08:00
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 4. expect results in two aspects:
|
|
|
|
// 4.1 compactedFrom segments are removed from delBufferManager
|
|
|
|
// 4.2 compactedTo seg is set properly with correct entriesNum
|
2023-04-10 18:42:30 +08:00
|
|
|
_, seg1Exist := delBufferManager.Load(seg1.segmentID)
|
|
|
|
_, seg2Exist := delBufferManager.Load(seg2.segmentID)
|
2022-11-15 14:45:07 +08:00
|
|
|
assert.False(t, seg1Exist)
|
|
|
|
assert.False(t, seg2Exist)
|
2023-04-10 18:42:30 +08:00
|
|
|
assert.Equal(t, int64(2), delBufferManager.GetEntriesNum(targetSeg.segmentID))
|
2022-11-15 19:41:16 +08:00
|
|
|
|
2023-03-28 19:14:01 +08:00
|
|
|
// test item of compactedToSegID is correct
|
2023-04-10 18:42:30 +08:00
|
|
|
targetSegBuf, ok := delBufferManager.Load(targetSeg.segmentID)
|
2023-03-28 19:14:01 +08:00
|
|
|
assert.True(t, ok)
|
2023-04-10 18:42:30 +08:00
|
|
|
assert.NotNil(t, targetSegBuf.item)
|
|
|
|
assert.Equal(t, targetSeg.segmentID, targetSegBuf.item.segmentID)
|
2023-03-28 19:14:01 +08:00
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 5. test roll and evict (https://github.com/milvus-io/milvus/issues/20501)
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager.channel.rollDeleteBuffer(targetSeg.segmentID)
|
|
|
|
_, segCompactedToExist := delBufferManager.Load(targetSeg.segmentID)
|
2022-11-15 19:41:16 +08:00
|
|
|
assert.False(t, segCompactedToExist)
|
2023-04-10 18:42:30 +08:00
|
|
|
delBufferManager.channel.evictHistoryDeleteBuffer(targetSeg.segmentID, &msgpb.MsgPosition{
|
2022-11-15 19:41:16 +08:00
|
|
|
Timestamp: 100,
|
|
|
|
})
|
2023-03-04 23:21:50 +08:00
|
|
|
cp := delBufferManager.channel.getChannelCheckpoint(&msgpb.MsgPosition{
|
2022-11-15 19:41:16 +08:00
|
|
|
Timestamp: 200,
|
|
|
|
})
|
|
|
|
assert.Equal(t, Timestamp(200), cp.Timestamp) // evict all buffer, use ttPos as cp
|
2022-11-15 14:45:07 +08:00
|
|
|
}
|
2023-04-10 18:42:30 +08:00
|
|
|
|
|
|
|
func TestUpdateCompactedSegments(t *testing.T) {
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
cm := storage.NewLocalChunkManager(storage.RootPath(deleteNodeTestDir))
|
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
|
|
|
|
|
|
|
fm := NewRendezvousFlushManager(allocator.NewMockAllocator(t), cm, nil, func(*segmentFlushPack) {}, emptyFlushAndDropFunc)
|
|
|
|
|
|
|
|
chanName := "datanode-test-FlowGraphDeletenode-showDelBuf"
|
|
|
|
testPath := "/test/datanode/root/meta"
|
|
|
|
assert.NoError(t, clearEtcd(testPath))
|
2023-09-05 10:31:48 +08:00
|
|
|
Params.Save("etcd.rootPath", "/test/datanode/root")
|
2023-04-10 18:42:30 +08:00
|
|
|
|
|
|
|
channel := ChannelMeta{
|
|
|
|
segments: make(map[UniqueID]*Segment),
|
|
|
|
}
|
|
|
|
|
|
|
|
c := &nodeConfig{
|
|
|
|
channel: &channel,
|
|
|
|
vChannelName: chanName,
|
|
|
|
}
|
|
|
|
delBufManager := &DeltaBufferManager{
|
|
|
|
channel: &channel,
|
|
|
|
delBufHeap: &PriorityQueue{},
|
|
|
|
}
|
|
|
|
delNode, err := newDeleteNode(ctx, fm, delBufManager, make(chan string, 1), c)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
tests := []struct {
|
|
|
|
description string
|
|
|
|
compactToExist bool
|
|
|
|
|
|
|
|
compactedToIDs []UniqueID
|
|
|
|
compactedFromIDs []UniqueID
|
|
|
|
|
|
|
|
expectedSegsRemain []UniqueID
|
|
|
|
}{
|
2023-09-21 09:45:27 +08:00
|
|
|
{
|
|
|
|
"zero segments", false,
|
|
|
|
[]UniqueID{},
|
|
|
|
[]UniqueID{},
|
|
|
|
[]UniqueID{},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment no compaction", false,
|
|
|
|
[]UniqueID{},
|
|
|
|
[]UniqueID{},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted", true,
|
|
|
|
[]UniqueID{200},
|
|
|
|
[]UniqueID{103},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted 100>201", true,
|
|
|
|
[]UniqueID{201},
|
|
|
|
[]UniqueID{100},
|
|
|
|
[]UniqueID{101, 201},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted 100+101>201", true,
|
|
|
|
[]UniqueID{201, 201},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
[]UniqueID{201},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted 100>201, 101>202", true,
|
|
|
|
[]UniqueID{201, 202},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
[]UniqueID{201, 202},
|
|
|
|
},
|
2023-04-10 18:42:30 +08:00
|
|
|
// false
|
2023-09-21 09:45:27 +08:00
|
|
|
{
|
|
|
|
"segment compacted 100>201", false,
|
|
|
|
[]UniqueID{201},
|
|
|
|
[]UniqueID{100},
|
|
|
|
[]UniqueID{101},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted 100+101>201", false,
|
|
|
|
[]UniqueID{201, 201},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
[]UniqueID{},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"segment compacted 100>201, 101>202", false,
|
|
|
|
[]UniqueID{201, 202},
|
|
|
|
[]UniqueID{100, 101},
|
|
|
|
[]UniqueID{},
|
|
|
|
},
|
2023-04-10 18:42:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, test := range tests {
|
|
|
|
t.Run(test.description, func(t *testing.T) {
|
|
|
|
if test.compactToExist {
|
|
|
|
for _, segID := range test.compactedToIDs {
|
|
|
|
seg := Segment{
|
|
|
|
segmentID: segID,
|
|
|
|
numRows: 10,
|
|
|
|
}
|
|
|
|
seg.setType(datapb.SegmentType_Flushed)
|
|
|
|
channel.segments[segID] = &seg
|
|
|
|
}
|
|
|
|
} else { // clear all segments in channel
|
|
|
|
channel.segments = make(map[UniqueID]*Segment)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, segID := range test.compactedFromIDs {
|
|
|
|
seg := Segment{
|
|
|
|
segmentID: segID,
|
|
|
|
compactedTo: test.compactedToIDs[i],
|
|
|
|
}
|
|
|
|
seg.setType(datapb.SegmentType_Compacted)
|
|
|
|
channel.segments[segID] = &seg
|
|
|
|
}
|
|
|
|
|
|
|
|
delNode.delBufferManager.UpdateCompactedSegments()
|
|
|
|
|
|
|
|
for _, remain := range test.expectedSegsRemain {
|
|
|
|
delNode.channel.hasSegment(remain, true)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|