feat: Add import scheduler and manager (#29367)

This PR introduces novel managerial roles for importv2:
1. ImportMeta: To manage all the import tasks;
2. ImportScheduler: To process tasks and modify their states;
3. ImportChecker: To ascertain the completion of all tasks and instigate
relevant operations.

issue: https://github.com/milvus-io/milvus/issues/28521

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2024-03-01 18:31:02 +08:00 committed by GitHub
parent 85de56e894
commit a434d33e75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
68 changed files with 8915 additions and 1924 deletions

View File

@ -434,7 +434,6 @@ dataCoord:
import:
filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task.
taskRetention: 10800 # The retention period in seconds for tasks in the Completed or Failed state.
inactiveTimeout: 1800 # The timeout duration in seconds for a task in the "InProgress" state if it remains inactive (with no progress updates).
enableGarbageCollection: true
gc:

View File

@ -18,6 +18,7 @@ package datacoord
import (
"context"
"time"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
@ -30,6 +31,7 @@ import (
type allocator interface {
allocTimestamp(context.Context) (Timestamp, error)
allocID(context.Context) (UniqueID, error)
allocN(n int64) (UniqueID, UniqueID, error)
}
// make sure rootCoordAllocator implements allocator interface
@ -79,3 +81,25 @@ func (alloc *rootCoordAllocator) allocID(ctx context.Context) (UniqueID, error)
return resp.ID, nil
}
// allocID allocates an `UniqueID` from RootCoord, invoking AllocID grpc
func (alloc *rootCoordAllocator) allocN(n int64) (UniqueID, UniqueID, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if n <= 0 {
n = 1
}
resp, err := alloc.AllocID(ctx, &rootcoordpb.AllocIDRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_RequestID),
commonpbutil.WithSourceID(paramtable.GetNodeID()),
),
Count: uint32(n),
})
if err = VerifyResponse(resp, err); err != nil {
return 0, 0, err
}
start, count := resp.GetID(), resp.GetCount()
return start, start + int64(count), nil
}

View File

@ -32,6 +32,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
//go:generate mockery --name=Broker --structname=MockBroker --output=./ --filename=mock_coordinator_broker.go --with-expecter --inpackage
type Broker interface {
DescribeCollectionInternal(ctx context.Context, collectionID int64) (*milvuspb.DescribeCollectionResponse, error)
ShowPartitionsInternal(ctx context.Context, collectionID int64) ([]int64, error)

View File

@ -0,0 +1,309 @@
// Code generated by mockery v2.30.1. DO NOT EDIT.
package broker
import (
context "context"
milvuspb "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
mock "github.com/stretchr/testify/mock"
)
// MockBroker is an autogenerated mock type for the Broker type
type MockBroker struct {
mock.Mock
}
type MockBroker_Expecter struct {
mock *mock.Mock
}
func (_m *MockBroker) EXPECT() *MockBroker_Expecter {
return &MockBroker_Expecter{mock: &_m.Mock}
}
// DescribeCollectionInternal provides a mock function with given fields: ctx, collectionID
func (_m *MockBroker) DescribeCollectionInternal(ctx context.Context, collectionID int64) (*milvuspb.DescribeCollectionResponse, error) {
ret := _m.Called(ctx, collectionID)
var r0 *milvuspb.DescribeCollectionResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64) (*milvuspb.DescribeCollectionResponse, error)); ok {
return rf(ctx, collectionID)
}
if rf, ok := ret.Get(0).(func(context.Context, int64) *milvuspb.DescribeCollectionResponse); ok {
r0 = rf(ctx, collectionID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*milvuspb.DescribeCollectionResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64) error); ok {
r1 = rf(ctx, collectionID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockBroker_DescribeCollectionInternal_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DescribeCollectionInternal'
type MockBroker_DescribeCollectionInternal_Call struct {
*mock.Call
}
// DescribeCollectionInternal is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
func (_e *MockBroker_Expecter) DescribeCollectionInternal(ctx interface{}, collectionID interface{}) *MockBroker_DescribeCollectionInternal_Call {
return &MockBroker_DescribeCollectionInternal_Call{Call: _e.mock.On("DescribeCollectionInternal", ctx, collectionID)}
}
func (_c *MockBroker_DescribeCollectionInternal_Call) Run(run func(ctx context.Context, collectionID int64)) *MockBroker_DescribeCollectionInternal_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64))
})
return _c
}
func (_c *MockBroker_DescribeCollectionInternal_Call) Return(_a0 *milvuspb.DescribeCollectionResponse, _a1 error) *MockBroker_DescribeCollectionInternal_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockBroker_DescribeCollectionInternal_Call) RunAndReturn(run func(context.Context, int64) (*milvuspb.DescribeCollectionResponse, error)) *MockBroker_DescribeCollectionInternal_Call {
_c.Call.Return(run)
return _c
}
// HasCollection provides a mock function with given fields: ctx, collectionID
func (_m *MockBroker) HasCollection(ctx context.Context, collectionID int64) (bool, error) {
ret := _m.Called(ctx, collectionID)
var r0 bool
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64) (bool, error)); ok {
return rf(ctx, collectionID)
}
if rf, ok := ret.Get(0).(func(context.Context, int64) bool); ok {
r0 = rf(ctx, collectionID)
} else {
r0 = ret.Get(0).(bool)
}
if rf, ok := ret.Get(1).(func(context.Context, int64) error); ok {
r1 = rf(ctx, collectionID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockBroker_HasCollection_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'HasCollection'
type MockBroker_HasCollection_Call struct {
*mock.Call
}
// HasCollection is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
func (_e *MockBroker_Expecter) HasCollection(ctx interface{}, collectionID interface{}) *MockBroker_HasCollection_Call {
return &MockBroker_HasCollection_Call{Call: _e.mock.On("HasCollection", ctx, collectionID)}
}
func (_c *MockBroker_HasCollection_Call) Run(run func(ctx context.Context, collectionID int64)) *MockBroker_HasCollection_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64))
})
return _c
}
func (_c *MockBroker_HasCollection_Call) Return(_a0 bool, _a1 error) *MockBroker_HasCollection_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockBroker_HasCollection_Call) RunAndReturn(run func(context.Context, int64) (bool, error)) *MockBroker_HasCollection_Call {
_c.Call.Return(run)
return _c
}
// ListDatabases provides a mock function with given fields: ctx
func (_m *MockBroker) ListDatabases(ctx context.Context) (*milvuspb.ListDatabasesResponse, error) {
ret := _m.Called(ctx)
var r0 *milvuspb.ListDatabasesResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context) (*milvuspb.ListDatabasesResponse, error)); ok {
return rf(ctx)
}
if rf, ok := ret.Get(0).(func(context.Context) *milvuspb.ListDatabasesResponse); ok {
r0 = rf(ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*milvuspb.ListDatabasesResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context) error); ok {
r1 = rf(ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockBroker_ListDatabases_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListDatabases'
type MockBroker_ListDatabases_Call struct {
*mock.Call
}
// ListDatabases is a helper method to define mock.On call
// - ctx context.Context
func (_e *MockBroker_Expecter) ListDatabases(ctx interface{}) *MockBroker_ListDatabases_Call {
return &MockBroker_ListDatabases_Call{Call: _e.mock.On("ListDatabases", ctx)}
}
func (_c *MockBroker_ListDatabases_Call) Run(run func(ctx context.Context)) *MockBroker_ListDatabases_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context))
})
return _c
}
func (_c *MockBroker_ListDatabases_Call) Return(_a0 *milvuspb.ListDatabasesResponse, _a1 error) *MockBroker_ListDatabases_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockBroker_ListDatabases_Call) RunAndReturn(run func(context.Context) (*milvuspb.ListDatabasesResponse, error)) *MockBroker_ListDatabases_Call {
_c.Call.Return(run)
return _c
}
// ShowCollections provides a mock function with given fields: ctx, dbName
func (_m *MockBroker) ShowCollections(ctx context.Context, dbName string) (*milvuspb.ShowCollectionsResponse, error) {
ret := _m.Called(ctx, dbName)
var r0 *milvuspb.ShowCollectionsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, string) (*milvuspb.ShowCollectionsResponse, error)); ok {
return rf(ctx, dbName)
}
if rf, ok := ret.Get(0).(func(context.Context, string) *milvuspb.ShowCollectionsResponse); ok {
r0 = rf(ctx, dbName)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*milvuspb.ShowCollectionsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, string) error); ok {
r1 = rf(ctx, dbName)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockBroker_ShowCollections_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ShowCollections'
type MockBroker_ShowCollections_Call struct {
*mock.Call
}
// ShowCollections is a helper method to define mock.On call
// - ctx context.Context
// - dbName string
func (_e *MockBroker_Expecter) ShowCollections(ctx interface{}, dbName interface{}) *MockBroker_ShowCollections_Call {
return &MockBroker_ShowCollections_Call{Call: _e.mock.On("ShowCollections", ctx, dbName)}
}
func (_c *MockBroker_ShowCollections_Call) Run(run func(ctx context.Context, dbName string)) *MockBroker_ShowCollections_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(string))
})
return _c
}
func (_c *MockBroker_ShowCollections_Call) Return(_a0 *milvuspb.ShowCollectionsResponse, _a1 error) *MockBroker_ShowCollections_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockBroker_ShowCollections_Call) RunAndReturn(run func(context.Context, string) (*milvuspb.ShowCollectionsResponse, error)) *MockBroker_ShowCollections_Call {
_c.Call.Return(run)
return _c
}
// ShowPartitionsInternal provides a mock function with given fields: ctx, collectionID
func (_m *MockBroker) ShowPartitionsInternal(ctx context.Context, collectionID int64) ([]int64, error) {
ret := _m.Called(ctx, collectionID)
var r0 []int64
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64) ([]int64, error)); ok {
return rf(ctx, collectionID)
}
if rf, ok := ret.Get(0).(func(context.Context, int64) []int64); ok {
r0 = rf(ctx, collectionID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]int64)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64) error); ok {
r1 = rf(ctx, collectionID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockBroker_ShowPartitionsInternal_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ShowPartitionsInternal'
type MockBroker_ShowPartitionsInternal_Call struct {
*mock.Call
}
// ShowPartitionsInternal is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
func (_e *MockBroker_Expecter) ShowPartitionsInternal(ctx interface{}, collectionID interface{}) *MockBroker_ShowPartitionsInternal_Call {
return &MockBroker_ShowPartitionsInternal_Call{Call: _e.mock.On("ShowPartitionsInternal", ctx, collectionID)}
}
func (_c *MockBroker_ShowPartitionsInternal_Call) Run(run func(ctx context.Context, collectionID int64)) *MockBroker_ShowPartitionsInternal_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64))
})
return _c
}
func (_c *MockBroker_ShowPartitionsInternal_Call) Return(_a0 []int64, _a1 error) *MockBroker_ShowPartitionsInternal_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockBroker_ShowPartitionsInternal_Call) RunAndReturn(run func(context.Context, int64) ([]int64, error)) *MockBroker_ShowPartitionsInternal_Call {
_c.Call.Return(run)
return _c
}
// NewMockBroker creates a new instance of MockBroker. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewMockBroker(t interface {
mock.TestingT
Cleanup(func())
}) *MockBroker {
mock := &MockBroker{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -41,6 +41,11 @@ type Cluster interface {
FlushChannels(ctx context.Context, nodeID int64, flushTs Timestamp, channels []string) error
Import(ctx context.Context, nodeID int64, it *datapb.ImportTaskRequest)
AddImportSegment(ctx context.Context, req *datapb.AddImportSegmentRequest) (*datapb.AddImportSegmentResponse, error)
PreImport(nodeID int64, in *datapb.PreImportRequest) error
ImportV2(nodeID int64, in *datapb.ImportRequest) error
QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)
QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)
DropImport(nodeID int64, in *datapb.DropImportRequest) error
GetSessions() []*Session
Close()
}
@ -161,6 +166,26 @@ func (c *ClusterImpl) AddImportSegment(ctx context.Context, req *datapb.AddImpor
return c.sessionManager.AddImportSegment(ctx, nodeID, req)
}
func (c *ClusterImpl) PreImport(nodeID int64, in *datapb.PreImportRequest) error {
return c.sessionManager.PreImport(nodeID, in)
}
func (c *ClusterImpl) ImportV2(nodeID int64, in *datapb.ImportRequest) error {
return c.sessionManager.ImportV2(nodeID, in)
}
func (c *ClusterImpl) QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) {
return c.sessionManager.QueryPreImport(nodeID, in)
}
func (c *ClusterImpl) QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) {
return c.sessionManager.QueryImport(nodeID, in)
}
func (c *ClusterImpl) DropImport(nodeID int64, in *datapb.DropImportRequest) error {
return c.sessionManager.DropImport(nodeID, in)
}
// GetSessions returns all sessions
func (c *ClusterImpl) GetSessions() []*Session {
return c.sessionManager.GetSessions()

View File

@ -31,8 +31,8 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
@ -98,7 +98,6 @@ func Test_compactionTrigger_force(t *testing.T) {
}
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().AlterSegment(mock.Anything, mock.Anything, mock.Anything).Return(nil).Maybe()
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything).Return(nil).Maybe()
vecFieldID := int64(201)
@ -2516,7 +2515,6 @@ func Test_compactionTrigger_updateSegmentMaxSize(t *testing.T) {
}
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().AlterSegment(mock.Anything, mock.Anything, mock.Anything).Return(nil).Maybe()
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything).Return(nil).Maybe()
tests := []struct {

View File

@ -0,0 +1,392 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"sync"
"time"
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/datacoord/broker"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
type ImportChecker interface {
Start()
Close()
}
type importChecker struct {
meta *meta
broker broker.Broker
cluster Cluster
alloc allocator
sm Manager
imeta ImportMeta
buildIndexCh chan UniqueID
closeOnce sync.Once
closeChan chan struct{}
}
func NewImportChecker(meta *meta,
broker broker.Broker,
cluster Cluster,
alloc allocator,
sm Manager,
imeta ImportMeta,
buildIndexCh chan UniqueID,
) ImportChecker {
return &importChecker{
meta: meta,
broker: broker,
cluster: cluster,
alloc: alloc,
sm: sm,
imeta: imeta,
buildIndexCh: buildIndexCh,
closeChan: make(chan struct{}),
}
}
func (c *importChecker) Start() {
log.Info("start import checker")
var (
ticker1 = time.NewTicker(Params.DataCoordCfg.ImportCheckIntervalHigh.GetAsDuration(time.Second)) // 2s
ticker2 = time.NewTicker(Params.DataCoordCfg.ImportCheckIntervalLow.GetAsDuration(time.Second)) // 2min
)
defer ticker1.Stop()
defer ticker2.Stop()
for {
select {
case <-c.closeChan:
log.Info("import checker exited")
return
case <-ticker1.C:
jobs := c.imeta.GetJobBy()
for _, job := range jobs {
switch job.GetState() {
case internalpb.ImportJobState_Pending:
c.checkPendingJob(job)
case internalpb.ImportJobState_PreImporting:
c.checkPreImportingJob(job)
case internalpb.ImportJobState_Importing:
c.checkImportingJob(job)
case internalpb.ImportJobState_Failed:
c.tryFailingTasks(job)
}
}
case <-ticker2.C:
jobs := c.imeta.GetJobBy()
for _, job := range jobs {
c.tryTimeoutJob(job)
c.checkGC(job)
}
jobsByColl := lo.GroupBy(jobs, func(job ImportJob) int64 {
return job.GetCollectionID()
})
for collID, collJobs := range jobsByColl {
c.checkCollection(collID, collJobs)
}
c.LogStats()
}
}
}
func (c *importChecker) Close() {
c.closeOnce.Do(func() {
close(c.closeChan)
})
}
func (c *importChecker) LogStats() {
logFunc := func(tasks []ImportTask, taskType TaskType) {
byState := lo.GroupBy(tasks, func(t ImportTask) datapb.ImportTaskStateV2 {
return t.GetState()
})
pending := len(byState[datapb.ImportTaskStateV2_Pending])
inProgress := len(byState[datapb.ImportTaskStateV2_InProgress])
completed := len(byState[datapb.ImportTaskStateV2_Completed])
failed := len(byState[datapb.ImportTaskStateV2_Failed])
log.Info("import task stats", zap.String("type", taskType.String()),
zap.Int("pending", pending), zap.Int("inProgress", inProgress),
zap.Int("completed", completed), zap.Int("failed", failed))
metrics.ImportTasks.WithLabelValues(taskType.String(), datapb.ImportTaskStateV2_Pending.String()).Set(float64(pending))
metrics.ImportTasks.WithLabelValues(taskType.String(), datapb.ImportTaskStateV2_InProgress.String()).Set(float64(inProgress))
metrics.ImportTasks.WithLabelValues(taskType.String(), datapb.ImportTaskStateV2_Completed.String()).Set(float64(completed))
metrics.ImportTasks.WithLabelValues(taskType.String(), datapb.ImportTaskStateV2_Failed.String()).Set(float64(failed))
}
tasks := c.imeta.GetTaskBy(WithType(PreImportTaskType))
logFunc(tasks, PreImportTaskType)
tasks = c.imeta.GetTaskBy(WithType(ImportTaskType))
logFunc(tasks, ImportTaskType)
}
func (c *importChecker) getLackFilesForPreImports(job ImportJob) []*internalpb.ImportFile {
lacks := lo.KeyBy(job.GetFiles(), func(file *internalpb.ImportFile) int64 {
return file.GetId()
})
exists := c.imeta.GetTaskBy(WithType(PreImportTaskType), WithJob(job.GetJobID()))
for _, task := range exists {
for _, file := range task.GetFileStats() {
delete(lacks, file.GetImportFile().GetId())
}
}
return lo.Values(lacks)
}
func (c *importChecker) getLackFilesForImports(job ImportJob) []*datapb.ImportFileStats {
preimports := c.imeta.GetTaskBy(WithType(PreImportTaskType), WithJob(job.GetJobID()))
lacks := make(map[int64]*datapb.ImportFileStats, 0)
for _, t := range preimports {
if t.GetState() != datapb.ImportTaskStateV2_Completed {
// Preimport tasks are not fully completed, thus generating imports should not be triggered.
return nil
}
for _, stat := range t.GetFileStats() {
lacks[stat.GetImportFile().GetId()] = stat
}
}
exists := c.imeta.GetTaskBy(WithType(ImportTaskType), WithJob(job.GetJobID()))
for _, task := range exists {
for _, file := range task.GetFileStats() {
delete(lacks, file.GetImportFile().GetId())
}
}
return lo.Values(lacks)
}
func (c *importChecker) checkPendingJob(job ImportJob) {
lacks := c.getLackFilesForPreImports(job)
if len(lacks) == 0 {
return
}
fileGroups := lo.Chunk(lacks, Params.DataCoordCfg.FilesPerPreImportTask.GetAsInt())
newTasks, err := NewPreImportTasks(fileGroups, job, c.alloc)
if err != nil {
log.Warn("new preimport tasks failed", zap.Error(err))
return
}
for _, t := range newTasks {
err = c.imeta.AddTask(t)
if err != nil {
log.Warn("add preimport task failed", WrapTaskLog(t, zap.Error(err))...)
return
}
log.Info("add new preimport task", WrapTaskLog(t)...)
}
err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_PreImporting))
if err != nil {
log.Warn("failed to update job state to PreImporting", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
func (c *importChecker) checkPreImportingJob(job ImportJob) {
lacks := c.getLackFilesForImports(job)
if len(lacks) == 0 {
return
}
groups := RegroupImportFiles(job, lacks)
newTasks, err := NewImportTasks(groups, job, c.sm, c.alloc)
if err != nil {
log.Warn("new import tasks failed", zap.Error(err))
return
}
for _, t := range newTasks {
err = c.imeta.AddTask(t)
if err != nil {
log.Warn("add new import task failed", WrapTaskLog(t, zap.Error(err))...)
return
}
log.Info("add new import task", WrapTaskLog(t)...)
}
err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Importing))
if err != nil {
log.Warn("failed to update job state to Importing", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
func (c *importChecker) checkImportingJob(job ImportJob) {
tasks := c.imeta.GetTaskBy(WithType(ImportTaskType), WithJob(job.GetJobID()))
for _, t := range tasks {
if t.GetState() != datapb.ImportTaskStateV2_Completed {
return
}
}
unfinished := make([]int64, 0)
for _, task := range tasks {
segmentIDs := task.(*importTask).GetSegmentIDs()
for _, segmentID := range segmentIDs {
segment := c.meta.GetSegment(segmentID)
if segment == nil {
log.Warn("cannot find segment, may be compacted", WrapTaskLog(task, zap.Int64("segmentID", segmentID))...)
continue
}
if segment.GetIsImporting() {
unfinished = append(unfinished, segmentID)
}
}
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := c.sm.FlushImportSegments(ctx, job.GetCollectionID(), unfinished)
if err != nil {
log.Warn("flush imported segments failed", zap.Int64("jobID", job.GetJobID()),
zap.Int64("collectionID", job.GetCollectionID()), zap.Int64s("segments", unfinished), zap.Error(err))
return
}
channels, err := c.meta.GetSegmentsChannels(unfinished)
if err != nil {
log.Warn("get segments channels failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
return
}
for _, segmentID := range unfinished {
err = AddImportSegment(c.cluster, c.meta, segmentID)
if err != nil {
log.Warn("add import segment failed", zap.Int64("jobID", job.GetJobID()),
zap.Int64("collectionID", job.GetCollectionID()), zap.Error(err))
return
}
c.buildIndexCh <- segmentID // accelerate index building
channelCP := c.meta.GetChannelCheckpoint(channels[segmentID])
if channelCP == nil {
log.Warn("nil channel checkpoint", zap.Int64("jobID", job.GetJobID()))
return
}
op1 := UpdateStartPosition([]*datapb.SegmentStartPosition{{StartPosition: channelCP, SegmentID: segmentID}})
op2 := UpdateDmlPosition(segmentID, channelCP)
op3 := UpdateIsImporting(segmentID, false)
err = c.meta.UpdateSegmentsInfo(op1, op2, op3)
if err != nil {
log.Warn("update import segment failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
return
}
}
err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Completed))
if err != nil {
log.Warn("failed to update job state to Completed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
func (c *importChecker) tryFailingTasks(job ImportJob) {
tasks := c.imeta.GetTaskBy(WithJob(job.GetJobID()), WithStates(datapb.ImportTaskStateV2_Pending,
datapb.ImportTaskStateV2_InProgress, datapb.ImportTaskStateV2_Completed))
if len(tasks) == 0 {
return
}
log.Warn("Import job has failed, all tasks with the same jobID"+
" will be marked as failed", zap.Int64("jobID", job.GetJobID()))
for _, task := range tasks {
err := c.imeta.UpdateTask(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed),
UpdateReason(job.GetReason()))
if err != nil {
log.Warn("failed to update import task state to failed", WrapTaskLog(task, zap.Error(err))...)
continue
}
}
}
func (c *importChecker) tryTimeoutJob(job ImportJob) {
timeoutTime := tsoutil.PhysicalTime(job.GetTimeoutTs())
if time.Now().After(timeoutTime) {
log.Warn("Import timeout, expired the specified time limit",
zap.Int64("jobID", job.GetJobID()), zap.Time("timeoutTime", timeoutTime))
err := c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed),
UpdateJobReason("import timeout"))
if err != nil {
log.Warn("failed to update job state to Failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
}
func (c *importChecker) checkCollection(collectionID int64, jobs []ImportJob) {
if len(jobs) == 0 {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
has, err := c.broker.HasCollection(ctx, collectionID)
if err != nil {
log.Warn("verify existence of collection failed", zap.Int64("collection", collectionID), zap.Error(err))
return
}
if !has {
for _, job := range jobs {
err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed),
UpdateJobReason(fmt.Sprintf("collection %d dropped", collectionID)))
if err != nil {
log.Warn("failed to update job state to Failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
}
}
func (c *importChecker) checkGC(job ImportJob) {
if job.GetState() != internalpb.ImportJobState_Completed &&
job.GetState() != internalpb.ImportJobState_Failed {
return
}
GCRetention := Params.DataCoordCfg.ImportTaskRetention.GetAsDuration(time.Second)
cleanupTime := tsoutil.PhysicalTime(job.GetCleanupTs())
if time.Since(cleanupTime) >= GCRetention {
log.Info("job has reached the GC retention", zap.Int64("jobID", job.GetJobID()),
zap.Time("cleanupTime", cleanupTime), zap.Duration("GCRetention", GCRetention))
tasks := c.imeta.GetTaskBy(WithJob(job.GetJobID()))
shouldRemoveJob := true
for _, task := range tasks {
if job.GetState() == internalpb.ImportJobState_Failed && task.GetType() == ImportTaskType {
if len(task.(*importTask).GetSegmentIDs()) != 0 {
shouldRemoveJob = false
continue
}
}
if task.GetNodeID() != NullNodeID {
shouldRemoveJob = false
continue
}
err := c.imeta.RemoveTask(task.GetTaskID())
if err != nil {
log.Warn("remove task failed during GC", WrapTaskLog(task, zap.Error(err))...)
shouldRemoveJob = false
continue
}
log.Info("reached GC retention, task removed", WrapTaskLog(task)...)
}
if !shouldRemoveJob {
return
}
err := c.imeta.RemoveJob(job.GetJobID())
if err != nil {
log.Warn("remove import job failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err))
}
}
}

View File

@ -0,0 +1,455 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"math/rand"
"testing"
"time"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
broker2 "github.com/milvus-io/milvus/internal/datacoord/broker"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
type ImportCheckerSuite struct {
suite.Suite
jobID int64
imeta ImportMeta
checker *importChecker
}
func (s *ImportCheckerSuite) SetupTest() {
catalog := mocks.NewDataCoordCatalog(s.T())
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().ListSegments(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListChannelCheckpoint(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListSegmentIndexes(mock.Anything).Return(nil, nil)
cluster := NewMockCluster(s.T())
alloc := NewNMockAllocator(s.T())
imeta, err := NewImportMeta(catalog)
s.NoError(err)
s.imeta = imeta
meta, err := newMeta(context.TODO(), catalog, nil)
s.NoError(err)
broker := broker2.NewMockBroker(s.T())
sm := NewMockManager(s.T())
buildIndexCh := make(chan UniqueID, 1024)
checker := NewImportChecker(meta, broker, cluster, alloc, sm, imeta, buildIndexCh).(*importChecker)
s.checker = checker
job := &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: 1,
PartitionIDs: []int64{2},
Vchannels: []string{"ch0"},
State: internalpb.ImportJobState_Pending,
TimeoutTs: 1000,
CleanupTs: tsoutil.GetCurrentTime(),
Files: []*internalpb.ImportFile{
{
Id: 1,
Paths: []string{"a.json"},
},
{
Id: 2,
Paths: []string{"b.json"},
},
{
Id: 3,
Paths: []string{"c.json"},
},
},
},
}
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
err = s.imeta.AddJob(job)
s.NoError(err)
s.jobID = job.GetJobID()
}
func (s *ImportCheckerSuite) TestLogStats() {
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
pit1 := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: s.jobID,
TaskID: 1,
State: datapb.ImportTaskStateV2_Failed,
},
}
err := s.imeta.AddTask(pit1)
s.NoError(err)
it1 := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: s.jobID,
TaskID: 2,
SegmentIDs: []int64{10, 11, 12},
State: datapb.ImportTaskStateV2_Pending,
},
}
err = s.imeta.AddTask(it1)
s.NoError(err)
s.checker.LogStats()
}
func (s *ImportCheckerSuite) TestCheckJob() {
job := s.imeta.GetJob(s.jobID)
// test checkPendingJob
alloc := s.checker.alloc.(*NMockAllocator)
alloc.EXPECT().allocN(mock.Anything).RunAndReturn(func(n int64) (int64, int64, error) {
id := rand.Int63()
return id, id + n, nil
})
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
s.checker.checkPendingJob(job)
preimportTasks := s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(PreImportTaskType))
s.Equal(2, len(preimportTasks))
s.Equal(internalpb.ImportJobState_PreImporting, s.imeta.GetJob(job.GetJobID()).GetState())
s.checker.checkPendingJob(job) // no lack
preimportTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(PreImportTaskType))
s.Equal(2, len(preimportTasks))
s.Equal(internalpb.ImportJobState_PreImporting, s.imeta.GetJob(job.GetJobID()).GetState())
// test checkPreImportingJob
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
for _, t := range preimportTasks {
err := s.imeta.UpdateTask(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed))
s.NoError(err)
}
s.checker.checkPreImportingJob(job)
importTasks := s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(ImportTaskType))
s.Equal(1, len(importTasks))
s.Equal(internalpb.ImportJobState_Importing, s.imeta.GetJob(job.GetJobID()).GetState())
s.checker.checkPreImportingJob(job) // no lack
importTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(ImportTaskType))
s.Equal(1, len(importTasks))
s.Equal(internalpb.ImportJobState_Importing, s.imeta.GetJob(job.GetJobID()).GetState())
// test checkImportingJob
s.checker.checkImportingJob(job) // not completed
s.Equal(internalpb.ImportJobState_Importing, s.imeta.GetJob(job.GetJobID()).GetState())
for _, t := range importTasks {
task := s.imeta.GetTask(t.GetTaskID())
for _, id := range task.(*importTask).GetSegmentIDs() {
segment := s.checker.meta.GetSegment(id)
s.Equal(true, segment.GetIsImporting())
}
}
sm := s.checker.sm.(*MockManager)
sm.EXPECT().FlushImportSegments(mock.Anything, mock.Anything, mock.Anything).Return(nil)
cluster := s.checker.cluster.(*MockCluster)
cluster.EXPECT().AddImportSegment(mock.Anything, mock.Anything).Return(nil, nil)
catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(nil)
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything).Return(nil)
catalog.EXPECT().SaveChannelCheckpoint(mock.Anything, mock.Anything, mock.Anything).Return(nil)
for _, t := range importTasks {
segment := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: rand.Int63(),
State: commonpb.SegmentState_Flushed,
IsImporting: true,
InsertChannel: "ch0",
},
}
err := s.checker.meta.AddSegment(context.Background(), segment)
s.NoError(err)
err = s.imeta.UpdateTask(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed),
UpdateSegmentIDs([]int64{segment.GetID()}))
s.NoError(err)
err = s.checker.meta.UpdateChannelCheckpoint(segment.GetInsertChannel(), &msgpb.MsgPosition{MsgID: []byte{0}})
s.NoError(err)
}
s.checker.checkImportingJob(job)
for _, t := range importTasks {
task := s.imeta.GetTask(t.GetTaskID())
for _, id := range task.(*importTask).GetSegmentIDs() {
segment := s.checker.meta.GetSegment(id)
s.Equal(false, segment.GetIsImporting())
}
}
s.Equal(internalpb.ImportJobState_Completed, s.imeta.GetJob(job.GetJobID()).GetState())
}
func (s *ImportCheckerSuite) TestCheckJob_Failed() {
mockErr := errors.New("mock err")
job := s.imeta.GetJob(s.jobID)
// test checkPendingJob
alloc := s.checker.alloc.(*NMockAllocator)
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, nil)
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(mockErr)
s.checker.checkPendingJob(job)
preimportTasks := s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(PreImportTaskType))
s.Equal(0, len(preimportTasks))
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(job.GetJobID()).GetState())
alloc.ExpectedCalls = nil
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, mockErr)
s.checker.checkPendingJob(job)
preimportTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(PreImportTaskType))
s.Equal(0, len(preimportTasks))
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(job.GetJobID()).GetState())
alloc.ExpectedCalls = nil
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, nil)
catalog.ExpectedCalls = nil
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
s.checker.checkPendingJob(job)
preimportTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(PreImportTaskType))
s.Equal(2, len(preimportTasks))
s.Equal(internalpb.ImportJobState_PreImporting, s.imeta.GetJob(job.GetJobID()).GetState())
// test checkPreImportingJob
for _, t := range preimportTasks {
err := s.imeta.UpdateTask(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed))
s.NoError(err)
}
catalog.ExpectedCalls = nil
catalog.EXPECT().SaveImportTask(mock.Anything).Return(mockErr)
s.checker.checkPreImportingJob(job)
importTasks := s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(ImportTaskType))
s.Equal(0, len(importTasks))
s.Equal(internalpb.ImportJobState_PreImporting, s.imeta.GetJob(job.GetJobID()).GetState())
alloc.ExpectedCalls = nil
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, mockErr)
importTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(ImportTaskType))
s.Equal(0, len(importTasks))
s.Equal(internalpb.ImportJobState_PreImporting, s.imeta.GetJob(job.GetJobID()).GetState())
catalog.ExpectedCalls = nil
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
alloc.ExpectedCalls = nil
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, nil)
s.checker.checkPreImportingJob(job)
importTasks = s.imeta.GetTaskBy(WithJob(job.GetJobID()), WithType(ImportTaskType))
s.Equal(1, len(importTasks))
s.Equal(internalpb.ImportJobState_Importing, s.imeta.GetJob(job.GetJobID()).GetState())
}
func (s *ImportCheckerSuite) TestCheckTimeout() {
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
var task ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: s.jobID,
TaskID: 1,
State: datapb.ImportTaskStateV2_InProgress,
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
s.checker.tryTimeoutJob(s.imeta.GetJob(s.jobID))
job := s.imeta.GetJob(s.jobID)
s.Equal(internalpb.ImportJobState_Failed, job.GetState())
s.Equal("import timeout", job.GetReason())
}
func (s *ImportCheckerSuite) TestCheckFailure() {
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
pit1 := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: s.jobID,
TaskID: 1,
State: datapb.ImportTaskStateV2_Pending,
},
}
err := s.imeta.AddTask(pit1)
s.NoError(err)
pit2 := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: s.jobID,
TaskID: 2,
State: datapb.ImportTaskStateV2_Completed,
},
}
err = s.imeta.AddTask(pit2)
s.NoError(err)
catalog.ExpectedCalls = nil
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(errors.New("mock error"))
s.checker.tryFailingTasks(s.imeta.GetJob(s.jobID))
tasks := s.imeta.GetTaskBy(WithJob(s.jobID), WithStates(datapb.ImportTaskStateV2_Failed))
s.Equal(0, len(tasks))
catalog.ExpectedCalls = nil
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
s.checker.tryFailingTasks(s.imeta.GetJob(s.jobID))
tasks = s.imeta.GetTaskBy(WithJob(s.jobID), WithStates(datapb.ImportTaskStateV2_Failed))
s.Equal(2, len(tasks))
}
func (s *ImportCheckerSuite) TestCheckGC() {
mockErr := errors.New("mock err")
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
var task ImportTask = &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: s.jobID,
TaskID: 1,
State: datapb.ImportTaskStateV2_Failed,
SegmentIDs: []int64{2},
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
// not failed or completed
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(1, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
err = s.imeta.UpdateJob(s.jobID, UpdateJobState(internalpb.ImportJobState_Failed))
s.NoError(err)
// not reach cleanup ts
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(1, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
GCRetention := Params.DataCoordCfg.ImportTaskRetention.GetAsDuration(time.Second)
job := s.imeta.GetJob(s.jobID)
job.(*importJob).CleanupTs = tsoutil.AddPhysicalDurationOnTs(job.GetCleanupTs(), GCRetention*-2)
err = s.imeta.AddJob(job)
s.NoError(err)
// segment not dropped
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(1, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
err = s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs([]int64{}))
s.NoError(err)
// task is not dropped
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(1, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
err = s.imeta.UpdateTask(task.GetTaskID(), UpdateNodeID(NullNodeID))
s.NoError(err)
// remove task failed
catalog.EXPECT().DropImportTask(mock.Anything).Return(mockErr)
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(1, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
// remove job failed
catalog.ExpectedCalls = nil
catalog.EXPECT().DropImportTask(mock.Anything).Return(nil)
catalog.EXPECT().DropImportJob(mock.Anything).Return(mockErr)
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(0, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(1, len(s.imeta.GetJobBy()))
// normal case
catalog.ExpectedCalls = nil
catalog.EXPECT().DropImportJob(mock.Anything).Return(nil)
s.checker.checkGC(s.imeta.GetJob(s.jobID))
s.Equal(0, len(s.imeta.GetTaskBy(WithJob(s.jobID))))
s.Equal(0, len(s.imeta.GetJobBy()))
}
func (s *ImportCheckerSuite) TestCheckCollection() {
mockErr := errors.New("mock err")
catalog := s.imeta.(*importMeta).catalog.(*mocks.DataCoordCatalog)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
var task ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: s.jobID,
TaskID: 1,
State: datapb.ImportTaskStateV2_Pending,
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
// no jobs
s.checker.checkCollection(1, []ImportJob{})
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(s.jobID).GetState())
// collection exist
broker := s.checker.broker.(*broker2.MockBroker)
broker.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(true, nil)
s.checker.checkCollection(1, []ImportJob{s.imeta.GetJob(s.jobID)})
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(s.jobID).GetState())
// HasCollection failed
s.checker.broker = broker2.NewMockBroker(s.T())
broker = s.checker.broker.(*broker2.MockBroker)
broker.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(true, mockErr)
s.checker.checkCollection(1, []ImportJob{s.imeta.GetJob(s.jobID)})
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(s.jobID).GetState())
// SaveImportJob failed
s.checker.broker = broker2.NewMockBroker(s.T())
broker = s.checker.broker.(*broker2.MockBroker)
broker.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(false, nil)
catalog.ExpectedCalls = nil
catalog.EXPECT().SaveImportJob(mock.Anything).Return(mockErr)
s.checker.checkCollection(1, []ImportJob{s.imeta.GetJob(s.jobID)})
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(s.jobID).GetState())
// collection dropped
s.checker.broker = broker2.NewMockBroker(s.T())
broker = s.checker.broker.(*broker2.MockBroker)
broker.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(false, nil)
catalog.ExpectedCalls = nil
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.checker.checkCollection(1, []ImportJob{s.imeta.GetJob(s.jobID)})
s.Equal(internalpb.ImportJobState_Failed, s.imeta.GetJob(s.jobID).GetState())
}
func TestImportChecker(t *testing.T) {
suite.Run(t, new(ImportCheckerSuite))
}

View File

@ -0,0 +1,81 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"time"
"github.com/golang/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
)
type ImportJobFilter func(job ImportJob) bool
func WithCollectionID(collectionID int64) ImportJobFilter {
return func(job ImportJob) bool {
return job.GetCollectionID() == collectionID
}
}
type UpdateJobAction func(job ImportJob)
func UpdateJobState(state internalpb.ImportJobState) UpdateJobAction {
return func(job ImportJob) {
job.(*importJob).ImportJob.State = state
if state == internalpb.ImportJobState_Completed || state == internalpb.ImportJobState_Failed {
dur := Params.DataCoordCfg.ImportTaskRetention.GetAsDuration(time.Second)
cleanupTs := tsoutil.ComposeTSByTime(time.Now().Add(dur), 0)
job.(*importJob).ImportJob.CleanupTs = cleanupTs
}
}
}
func UpdateJobReason(reason string) UpdateJobAction {
return func(job ImportJob) {
job.(*importJob).ImportJob.Reason = reason
}
}
type ImportJob interface {
GetJobID() int64
GetCollectionID() int64
GetPartitionIDs() []int64
GetVchannels() []string
GetSchema() *schemapb.CollectionSchema
GetTimeoutTs() uint64
GetCleanupTs() uint64
GetState() internalpb.ImportJobState
GetReason() string
GetFiles() []*internalpb.ImportFile
GetOptions() []*commonpb.KeyValuePair
Clone() ImportJob
}
type importJob struct {
*datapb.ImportJob
}
func (j *importJob) Clone() ImportJob {
return &importJob{
ImportJob: proto.Clone(j.ImportJob).(*datapb.ImportJob),
}
}

View File

@ -0,0 +1,238 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"sync"
"github.com/milvus-io/milvus/internal/metastore"
)
type ImportMeta interface {
AddJob(job ImportJob) error
UpdateJob(jobID int64, actions ...UpdateJobAction) error
GetJob(jobID int64) ImportJob
GetJobBy(filters ...ImportJobFilter) []ImportJob
RemoveJob(jobID int64) error
AddTask(task ImportTask) error
UpdateTask(taskID int64, actions ...UpdateAction) error
GetTask(taskID int64) ImportTask
GetTaskBy(filters ...ImportTaskFilter) []ImportTask
RemoveTask(taskID int64) error
}
type importMeta struct {
mu sync.RWMutex // guards jobs and tasks
jobs map[int64]ImportJob
tasks map[int64]ImportTask
catalog metastore.DataCoordCatalog
}
func NewImportMeta(catalog metastore.DataCoordCatalog) (ImportMeta, error) {
restoredPreImportTasks, err := catalog.ListPreImportTasks()
if err != nil {
return nil, err
}
restoredImportTasks, err := catalog.ListImportTasks()
if err != nil {
return nil, err
}
restoredJobs, err := catalog.ListImportJobs()
if err != nil {
return nil, err
}
tasks := make(map[int64]ImportTask)
for _, task := range restoredPreImportTasks {
tasks[task.GetTaskID()] = &preImportTask{
PreImportTask: task,
}
}
for _, task := range restoredImportTasks {
tasks[task.GetTaskID()] = &importTask{
ImportTaskV2: task,
}
}
jobs := make(map[int64]ImportJob)
for _, job := range restoredJobs {
jobs[job.GetJobID()] = &importJob{
ImportJob: job,
}
}
return &importMeta{
jobs: jobs,
tasks: tasks,
catalog: catalog,
}, nil
}
func (m *importMeta) AddJob(job ImportJob) error {
m.mu.Lock()
defer m.mu.Unlock()
err := m.catalog.SaveImportJob(job.(*importJob).ImportJob)
if err != nil {
return err
}
m.jobs[job.GetJobID()] = job
return nil
}
func (m *importMeta) UpdateJob(jobID int64, actions ...UpdateJobAction) error {
m.mu.Lock()
defer m.mu.Unlock()
if job, ok := m.jobs[jobID]; ok {
updatedJob := job.Clone()
for _, action := range actions {
action(updatedJob)
}
err := m.catalog.SaveImportJob(updatedJob.(*importJob).ImportJob)
if err != nil {
return err
}
m.jobs[updatedJob.GetJobID()] = updatedJob
}
return nil
}
func (m *importMeta) GetJob(jobID int64) ImportJob {
m.mu.RLock()
defer m.mu.RUnlock()
return m.jobs[jobID]
}
func (m *importMeta) GetJobBy(filters ...ImportJobFilter) []ImportJob {
m.mu.RLock()
defer m.mu.RUnlock()
ret := make([]ImportJob, 0)
OUTER:
for _, job := range m.jobs {
for _, f := range filters {
if !f(job) {
continue OUTER
}
}
ret = append(ret, job)
}
return ret
}
func (m *importMeta) RemoveJob(jobID int64) error {
m.mu.Lock()
defer m.mu.Unlock()
if _, ok := m.jobs[jobID]; ok {
err := m.catalog.DropImportJob(jobID)
if err != nil {
return err
}
delete(m.jobs, jobID)
}
return nil
}
func (m *importMeta) AddTask(task ImportTask) error {
m.mu.Lock()
defer m.mu.Unlock()
switch task.GetType() {
case PreImportTaskType:
err := m.catalog.SavePreImportTask(task.(*preImportTask).PreImportTask)
if err != nil {
return err
}
m.tasks[task.GetTaskID()] = task
case ImportTaskType:
err := m.catalog.SaveImportTask(task.(*importTask).ImportTaskV2)
if err != nil {
return err
}
m.tasks[task.GetTaskID()] = task
}
return nil
}
func (m *importMeta) UpdateTask(taskID int64, actions ...UpdateAction) error {
m.mu.Lock()
defer m.mu.Unlock()
if task, ok := m.tasks[taskID]; ok {
updatedTask := task.Clone()
for _, action := range actions {
action(updatedTask)
}
switch updatedTask.GetType() {
case PreImportTaskType:
err := m.catalog.SavePreImportTask(updatedTask.(*preImportTask).PreImportTask)
if err != nil {
return err
}
m.tasks[updatedTask.GetTaskID()] = updatedTask
case ImportTaskType:
err := m.catalog.SaveImportTask(updatedTask.(*importTask).ImportTaskV2)
if err != nil {
return err
}
m.tasks[updatedTask.GetTaskID()] = updatedTask
}
}
return nil
}
func (m *importMeta) GetTask(taskID int64) ImportTask {
m.mu.RLock()
defer m.mu.RUnlock()
return m.tasks[taskID]
}
func (m *importMeta) GetTaskBy(filters ...ImportTaskFilter) []ImportTask {
m.mu.RLock()
defer m.mu.RUnlock()
ret := make([]ImportTask, 0)
OUTER:
for _, task := range m.tasks {
for _, f := range filters {
if !f(task) {
continue OUTER
}
}
ret = append(ret, task)
}
return ret
}
func (m *importMeta) RemoveTask(taskID int64) error {
m.mu.Lock()
defer m.mu.Unlock()
if task, ok := m.tasks[taskID]; ok {
switch task.GetType() {
case PreImportTaskType:
err := m.catalog.DropPreImportTask(taskID)
if err != nil {
return err
}
case ImportTaskType:
err := m.catalog.DropImportTask(taskID)
if err != nil {
return err
}
}
delete(m.tasks, taskID)
}
return nil
}

View File

@ -0,0 +1,207 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreementassert. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"testing"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
)
func TestImportMeta_Restore(t *testing.T) {
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return([]*datapb.ImportJob{{JobID: 0}}, nil)
catalog.EXPECT().ListPreImportTasks().Return([]*datapb.PreImportTask{{TaskID: 1}}, nil)
catalog.EXPECT().ListImportTasks().Return([]*datapb.ImportTaskV2{{TaskID: 2}}, nil)
im, err := NewImportMeta(catalog)
assert.NoError(t, err)
jobs := im.GetJobBy()
assert.Equal(t, 1, len(jobs))
assert.Equal(t, int64(0), jobs[0].GetJobID())
tasks := im.GetTaskBy()
assert.Equal(t, 2, len(tasks))
tasks = im.GetTaskBy(WithType(PreImportTaskType))
assert.Equal(t, 1, len(tasks))
assert.Equal(t, int64(1), tasks[0].GetTaskID())
tasks = im.GetTaskBy(WithType(ImportTaskType))
assert.Equal(t, 1, len(tasks))
assert.Equal(t, int64(2), tasks[0].GetTaskID())
// new meta failed
mockErr := errors.New("mock error")
catalog = mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListPreImportTasks().Return([]*datapb.PreImportTask{{TaskID: 1}}, mockErr)
_, err = NewImportMeta(catalog)
assert.Error(t, err)
catalog = mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportTasks().Return([]*datapb.ImportTaskV2{{TaskID: 2}}, mockErr)
catalog.EXPECT().ListPreImportTasks().Return([]*datapb.PreImportTask{{TaskID: 1}}, nil)
_, err = NewImportMeta(catalog)
assert.Error(t, err)
catalog = mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return([]*datapb.ImportJob{{JobID: 0}}, mockErr)
catalog.EXPECT().ListPreImportTasks().Return([]*datapb.PreImportTask{{TaskID: 1}}, nil)
catalog.EXPECT().ListImportTasks().Return([]*datapb.ImportTaskV2{{TaskID: 2}}, nil)
_, err = NewImportMeta(catalog)
assert.Error(t, err)
}
func TestImportMeta_ImportJob(t *testing.T) {
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
catalog.EXPECT().DropImportJob(mock.Anything).Return(nil)
im, err := NewImportMeta(catalog)
assert.NoError(t, err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: 1,
PartitionIDs: []int64{2},
Vchannels: []string{"ch0"},
State: internalpb.ImportJobState_Pending,
},
}
err = im.AddJob(job)
assert.NoError(t, err)
jobs := im.GetJobBy()
assert.Equal(t, 1, len(jobs))
err = im.AddJob(job)
assert.NoError(t, err)
jobs = im.GetJobBy()
assert.Equal(t, 1, len(jobs))
assert.Nil(t, job.GetSchema())
err = im.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Completed))
assert.NoError(t, err)
job2 := im.GetJob(job.GetJobID())
assert.Equal(t, internalpb.ImportJobState_Completed, job2.GetState())
assert.Equal(t, job.GetJobID(), job2.GetJobID())
assert.Equal(t, job.GetCollectionID(), job2.GetCollectionID())
assert.Equal(t, job.GetPartitionIDs(), job2.GetPartitionIDs())
assert.Equal(t, job.GetVchannels(), job2.GetVchannels())
err = im.RemoveJob(job.GetJobID())
assert.NoError(t, err)
jobs = im.GetJobBy()
assert.Equal(t, 0, len(jobs))
// test failed
mockErr := errors.New("mock err")
catalog = mocks.NewDataCoordCatalog(t)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(mockErr)
catalog.EXPECT().DropImportJob(mock.Anything).Return(mockErr)
im.(*importMeta).catalog = catalog
err = im.AddJob(job)
assert.Error(t, err)
im.(*importMeta).jobs[job.GetJobID()] = job
err = im.UpdateJob(job.GetJobID())
assert.Error(t, err)
err = im.RemoveJob(job.GetJobID())
assert.Error(t, err)
}
func TestImportMeta_ImportTask(t *testing.T) {
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
catalog.EXPECT().DropImportTask(mock.Anything).Return(nil)
im, err := NewImportMeta(catalog)
assert.NoError(t, err)
task1 := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: 1,
TaskID: 2,
CollectionID: 3,
SegmentIDs: []int64{5, 6},
NodeID: 7,
State: datapb.ImportTaskStateV2_Pending,
},
}
err = im.AddTask(task1)
assert.NoError(t, err)
err = im.AddTask(task1)
assert.NoError(t, err)
res := im.GetTask(task1.GetTaskID())
assert.Equal(t, task1, res)
task2 := task1.Clone()
task2.(*importTask).TaskID = 8
task2.(*importTask).State = datapb.ImportTaskStateV2_Completed
err = im.AddTask(task2)
assert.NoError(t, err)
tasks := im.GetTaskBy(WithJob(task1.GetJobID()))
assert.Equal(t, 2, len(tasks))
tasks = im.GetTaskBy(WithType(ImportTaskType), WithStates(datapb.ImportTaskStateV2_Completed))
assert.Equal(t, 1, len(tasks))
assert.Equal(t, task2.GetTaskID(), tasks[0].GetTaskID())
err = im.UpdateTask(task1.GetTaskID(), UpdateNodeID(9),
UpdateState(datapb.ImportTaskStateV2_Failed),
UpdateFileStats([]*datapb.ImportFileStats{1: {
FileSize: 100,
}}))
assert.NoError(t, err)
task := im.GetTask(task1.GetTaskID())
assert.Equal(t, int64(9), task.GetNodeID())
assert.Equal(t, datapb.ImportTaskStateV2_Failed, task.GetState())
err = im.RemoveTask(task1.GetTaskID())
assert.NoError(t, err)
tasks = im.GetTaskBy()
assert.Equal(t, 1, len(tasks))
err = im.RemoveTask(10)
assert.NoError(t, err)
tasks = im.GetTaskBy()
assert.Equal(t, 1, len(tasks))
// test failed
mockErr := errors.New("mock err")
catalog = mocks.NewDataCoordCatalog(t)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(mockErr)
catalog.EXPECT().DropImportTask(mock.Anything).Return(mockErr)
im.(*importMeta).catalog = catalog
err = im.AddTask(task1)
assert.Error(t, err)
im.(*importMeta).tasks[task1.GetTaskID()] = task1
err = im.UpdateTask(task1.GetTaskID(), UpdateNodeID(9))
assert.Error(t, err)
err = im.RemoveTask(task1.GetTaskID())
assert.Error(t, err)
}

View File

@ -0,0 +1,335 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"sort"
"sync"
"time"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
)
const (
NullNodeID = -1
)
type ImportScheduler interface {
Start()
Close()
}
type importScheduler struct {
meta *meta
cluster Cluster
alloc allocator
imeta ImportMeta
closeOnce sync.Once
closeChan chan struct{}
}
func NewImportScheduler(meta *meta,
cluster Cluster,
alloc allocator,
imeta ImportMeta,
) ImportScheduler {
return &importScheduler{
meta: meta,
cluster: cluster,
alloc: alloc,
imeta: imeta,
closeChan: make(chan struct{}),
}
}
func (s *importScheduler) Start() {
log.Info("start import scheduler")
ticker := time.NewTicker(Params.DataCoordCfg.ImportScheduleInterval.GetAsDuration(time.Second))
defer ticker.Stop()
for {
select {
case <-s.closeChan:
log.Info("import scheduler exited")
return
case <-ticker.C:
s.process()
}
}
}
func (s *importScheduler) Close() {
s.closeOnce.Do(func() {
close(s.closeChan)
})
}
func (s *importScheduler) process() {
getNodeID := func(nodeSlots map[int64]int64) int64 {
for nodeID, slots := range nodeSlots {
if slots > 0 {
nodeSlots[nodeID]--
return nodeID
}
}
return NullNodeID
}
jobs := s.imeta.GetJobBy()
sort.Slice(jobs, func(i, j int) bool {
return jobs[i].GetJobID() < jobs[j].GetJobID()
})
nodeSlots := s.peekSlots()
for _, job := range jobs {
tasks := s.imeta.GetTaskBy(WithJob(job.GetJobID()))
for _, task := range tasks {
switch task.GetState() {
case datapb.ImportTaskStateV2_Pending:
nodeID := getNodeID(nodeSlots)
switch task.GetType() {
case PreImportTaskType:
s.processPendingPreImport(task, nodeID)
case ImportTaskType:
s.processPendingImport(task, nodeID)
}
case datapb.ImportTaskStateV2_InProgress:
switch task.GetType() {
case PreImportTaskType:
s.processInProgressPreImport(task)
case ImportTaskType:
s.processInProgressImport(task)
}
case datapb.ImportTaskStateV2_Completed:
s.processCompleted(task)
case datapb.ImportTaskStateV2_Failed:
s.processFailed(task)
}
}
}
}
func (s *importScheduler) checkErr(task ImportTask, err error) {
if merr.IsRetryableErr(err) || merr.IsCanceledOrTimeout(err) || errors.Is(err, merr.ErrNodeNotFound) {
err = s.imeta.UpdateTask(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Pending))
if err != nil {
log.Warn("failed to update import task state to pending", WrapTaskLog(task, zap.Error(err))...)
return
}
log.Info("reset task state to pending due to error occurs", WrapTaskLog(task, zap.Error(err))...)
} else {
err = s.imeta.UpdateJob(task.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed), UpdateJobReason(err.Error()))
if err != nil {
log.Warn("failed to update job state to Failed", zap.Int64("jobID", task.GetJobID()), zap.Error(err))
return
}
log.Info("import task failed", WrapTaskLog(task, zap.Error(err))...)
}
}
func (s *importScheduler) peekSlots() map[int64]int64 {
nodeIDs := lo.Map(s.cluster.GetSessions(), func(s *Session, _ int) int64 {
return s.info.NodeID
})
nodeSlots := make(map[int64]int64)
mu := &sync.Mutex{}
wg := &sync.WaitGroup{}
for _, nodeID := range nodeIDs {
wg.Add(1)
go func(nodeID int64) {
defer wg.Done()
resp, err := s.cluster.QueryImport(nodeID, &datapb.QueryImportRequest{QuerySlot: true})
if err != nil {
log.Warn("query import failed", zap.Error(err))
return
}
mu.Lock()
defer mu.Unlock()
nodeSlots[nodeID] = resp.GetSlots()
}(nodeID)
}
wg.Wait()
log.Info("peek slots done", zap.Any("nodeSlots", nodeSlots))
return nodeSlots
}
func (s *importScheduler) processPendingPreImport(task ImportTask, nodeID int64) {
if nodeID == NullNodeID {
return
}
log.Info("processing pending preimport task...", WrapTaskLog(task)...)
job := s.imeta.GetJob(task.GetJobID())
req := AssemblePreImportRequest(task, job)
err := s.cluster.PreImport(nodeID, req)
if err != nil {
log.Warn("preimport failed", WrapTaskLog(task, zap.Error(err))...)
return
}
err = s.imeta.UpdateTask(task.GetTaskID(),
UpdateState(datapb.ImportTaskStateV2_InProgress),
UpdateNodeID(nodeID))
if err != nil {
log.Warn("update import task failed", WrapTaskLog(task, zap.Error(err))...)
return
}
log.Info("process pending preimport task done", WrapTaskLog(task)...)
}
func (s *importScheduler) processPendingImport(task ImportTask, nodeID int64) {
if nodeID == NullNodeID {
return
}
log.Info("processing pending import task...", WrapTaskLog(task)...)
job := s.imeta.GetJob(task.GetJobID())
req, err := AssembleImportRequest(task, job, s.meta, s.alloc)
if err != nil {
log.Warn("assemble import request failed", WrapTaskLog(task, zap.Error(err))...)
return
}
err = s.cluster.ImportV2(nodeID, req)
if err != nil {
log.Warn("import failed", WrapTaskLog(task, zap.Error(err))...)
return
}
err = s.imeta.UpdateTask(task.GetTaskID(),
UpdateState(datapb.ImportTaskStateV2_InProgress),
UpdateNodeID(nodeID))
if err != nil {
log.Warn("update import task failed", WrapTaskLog(task, zap.Error(err))...)
return
}
log.Info("processing pending import task done", WrapTaskLog(task)...)
}
func (s *importScheduler) processInProgressPreImport(task ImportTask) {
req := &datapb.QueryPreImportRequest{
JobID: task.GetJobID(),
TaskID: task.GetTaskID(),
}
resp, err := s.cluster.QueryPreImport(task.GetNodeID(), req)
if err != nil {
log.Warn("query preimport failed", WrapTaskLog(task, zap.Error(err))...)
s.checkErr(task, err)
return
}
if resp.GetState() == datapb.ImportTaskStateV2_Failed {
err = s.imeta.UpdateJob(task.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed),
UpdateJobReason(resp.GetReason()))
if err != nil {
log.Warn("failed to update job state to Failed", zap.Int64("jobID", task.GetJobID()), zap.Error(err))
}
log.Warn("preimport failed", WrapTaskLog(task, zap.String("reason", resp.GetReason()))...)
return
}
actions := []UpdateAction{UpdateFileStats(resp.GetFileStats())}
if resp.GetState() == datapb.ImportTaskStateV2_Completed {
actions = append(actions, UpdateState(datapb.ImportTaskStateV2_Completed))
}
err = s.imeta.UpdateTask(task.GetTaskID(), actions...)
if err != nil {
log.Warn("update preimport task failed", WrapTaskLog(task, zap.Error(err))...)
return
}
log.Info("query preimport", WrapTaskLog(task, zap.String("state", resp.GetState().String()),
zap.Any("fileStats", resp.GetFileStats()))...)
}
func (s *importScheduler) processInProgressImport(task ImportTask) {
req := &datapb.QueryImportRequest{
JobID: task.GetJobID(),
TaskID: task.GetTaskID(),
}
resp, err := s.cluster.QueryImport(task.GetNodeID(), req)
if err != nil {
log.Warn("query import failed", WrapTaskLog(task, zap.Error(err))...)
s.checkErr(task, err)
return
}
if resp.GetState() == datapb.ImportTaskStateV2_Failed {
err = s.imeta.UpdateJob(task.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed),
UpdateJobReason(resp.GetReason()))
if err != nil {
log.Warn("failed to update job state to Failed", zap.Int64("jobID", task.GetJobID()), zap.Error(err))
}
log.Warn("import failed", WrapTaskLog(task, zap.String("reason", resp.GetReason()))...)
return
}
for _, info := range resp.GetImportSegmentsInfo() {
segment := s.meta.GetSegment(info.GetSegmentID())
if info.GetImportedRows() <= segment.GetNumOfRows() {
continue // rows not changed, no need to update
}
op := UpdateImportedRows(info.GetSegmentID(), info.GetImportedRows())
err = s.meta.UpdateSegmentsInfo(op)
if err != nil {
log.Warn("update import segment rows failed", WrapTaskLog(task, zap.Error(err))...)
return
}
}
if resp.GetState() == datapb.ImportTaskStateV2_Completed {
for _, info := range resp.GetImportSegmentsInfo() {
op := ReplaceBinlogsOperator(info.GetSegmentID(), info.GetBinlogs(), info.GetStatslogs(), nil)
err = s.meta.UpdateSegmentsInfo(op)
if err != nil {
log.Warn("update import segment binlogs failed", WrapTaskLog(task, zap.Error(err))...)
return
}
}
err = s.imeta.UpdateTask(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed))
if err != nil {
log.Warn("update import task failed", WrapTaskLog(task, zap.Error(err))...)
return
}
}
log.Info("query import", WrapTaskLog(task, zap.String("state", resp.GetState().String()),
zap.String("reason", resp.GetReason()))...)
}
func (s *importScheduler) processCompleted(task ImportTask) {
err := DropImportTask(task, s.cluster, s.imeta)
if err != nil {
log.Warn("drop import failed", WrapTaskLog(task, zap.Error(err))...)
}
}
func (s *importScheduler) processFailed(task ImportTask) {
if task.GetType() == ImportTaskType {
segments := task.(*importTask).GetSegmentIDs()
for _, segment := range segments {
err := s.meta.DropSegment(segment)
if err != nil {
log.Warn("drop import segment failed",
WrapTaskLog(task, zap.Int64("segment", segment), zap.Error(err))...)
return
}
}
err := s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs(nil))
if err != nil {
log.Warn("update import task segments failed", WrapTaskLog(task, zap.Error(err))...)
}
}
err := DropImportTask(task, s.cluster, s.imeta)
if err != nil {
log.Warn("drop import failed", WrapTaskLog(task, zap.Error(err))...)
}
}

View File

@ -0,0 +1,328 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"math"
"testing"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/util/merr"
)
type ImportSchedulerSuite struct {
suite.Suite
collectionID int64
catalog *mocks.DataCoordCatalog
alloc *NMockAllocator
cluster *MockCluster
meta *meta
imeta ImportMeta
scheduler *importScheduler
}
func (s *ImportSchedulerSuite) SetupTest() {
var err error
s.collectionID = 1
s.catalog = mocks.NewDataCoordCatalog(s.T())
s.catalog.EXPECT().ListImportJobs().Return(nil, nil)
s.catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
s.catalog.EXPECT().ListImportTasks().Return(nil, nil)
s.catalog.EXPECT().ListSegments(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListChannelCheckpoint(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListIndexes(mock.Anything).Return(nil, nil)
s.catalog.EXPECT().ListSegmentIndexes(mock.Anything).Return(nil, nil)
s.cluster = NewMockCluster(s.T())
s.alloc = NewNMockAllocator(s.T())
s.meta, err = newMeta(context.TODO(), s.catalog, nil)
s.NoError(err)
s.meta.AddCollection(&collectionInfo{
ID: s.collectionID,
Schema: newTestSchema(),
})
s.imeta, err = NewImportMeta(s.catalog)
s.NoError(err)
s.scheduler = NewImportScheduler(s.meta, s.cluster, s.alloc, s.imeta).(*importScheduler)
}
func (s *ImportSchedulerSuite) TestCheckErr() {
s.catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: s.collectionID,
TimeoutTs: math.MaxUint64,
Schema: &schemapb.CollectionSchema{},
State: internalpb.ImportJobState_Pending,
},
}
err := s.imeta.AddJob(job)
s.NoError(err)
var task ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: 0,
TaskID: 1,
CollectionID: s.collectionID,
State: datapb.ImportTaskStateV2_InProgress,
},
}
err = s.imeta.AddTask(task)
s.NoError(err)
// checkErr and update state
s.scheduler.checkErr(task, merr.ErrNodeNotFound)
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_Pending, task.GetState())
s.scheduler.checkErr(task, errors.New("mock err"))
job = s.imeta.GetJob(job.GetJobID())
s.Equal(internalpb.ImportJobState_Failed, job.GetState())
// update state failed
err = s.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Pending))
s.NoError(err)
err = s.imeta.UpdateTask(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_None))
s.NoError(err)
s.catalog.ExpectedCalls = nil
s.catalog.EXPECT().SaveImportJob(mock.Anything).Return(errors.New("mock err"))
s.catalog.EXPECT().SavePreImportTask(mock.Anything).Return(errors.New("mock err"))
s.scheduler.checkErr(task, merr.ErrNodeNotFound)
s.Equal(datapb.ImportTaskStateV2_None, s.imeta.GetTask(task.GetTaskID()).GetState())
s.scheduler.checkErr(task, errors.New("mock err"))
s.Equal(internalpb.ImportJobState_Pending, s.imeta.GetJob(job.GetJobID()).GetState())
}
func (s *ImportSchedulerSuite) TestProcessPreImport() {
s.catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
var task ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: 0,
TaskID: 1,
CollectionID: s.collectionID,
State: datapb.ImportTaskStateV2_Pending,
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: s.collectionID,
TimeoutTs: math.MaxUint64,
Schema: &schemapb.CollectionSchema{},
},
}
err = s.imeta.AddJob(job)
s.NoError(err)
// pending -> inProgress
const nodeID = 10
s.cluster.EXPECT().QueryImport(mock.Anything, mock.Anything).Return(&datapb.QueryImportResponse{
Slots: 1,
}, nil)
s.cluster.EXPECT().PreImport(mock.Anything, mock.Anything).Return(nil)
s.cluster.EXPECT().GetSessions().Return([]*Session{
{
info: &NodeInfo{
NodeID: nodeID,
},
},
})
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_InProgress, task.GetState())
s.Equal(int64(nodeID), task.GetNodeID())
// inProgress -> completed
s.cluster.EXPECT().QueryPreImport(mock.Anything, mock.Anything).Return(&datapb.QueryPreImportResponse{
State: datapb.ImportTaskStateV2_Completed,
}, nil)
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_Completed, task.GetState())
// drop import task
s.cluster.EXPECT().DropImport(mock.Anything, mock.Anything).Return(nil)
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(int64(NullNodeID), task.GetNodeID())
}
func (s *ImportSchedulerSuite) TestProcessImport() {
s.catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
var task ImportTask = &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: 0,
TaskID: 1,
CollectionID: s.collectionID,
State: datapb.ImportTaskStateV2_Pending,
FileStats: []*datapb.ImportFileStats{
{
HashedStats: map[string]*datapb.PartitionImportStats{
"channel1": {
PartitionRows: map[int64]int64{
int64(2): 100,
},
PartitionDataSize: map[int64]int64{
int64(2): 100,
},
},
},
},
},
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: s.collectionID,
PartitionIDs: []int64{2},
Vchannels: []string{"channel1"},
Schema: &schemapb.CollectionSchema{},
TimeoutTs: math.MaxUint64,
},
}
err = s.imeta.AddJob(job)
s.NoError(err)
// pending -> inProgress
const nodeID = 10
s.alloc.EXPECT().allocN(mock.Anything).Return(100, 200, nil)
s.alloc.EXPECT().allocTimestamp(mock.Anything).Return(300, nil)
s.cluster.EXPECT().QueryImport(mock.Anything, mock.Anything).Return(&datapb.QueryImportResponse{
Slots: 1,
}, nil)
s.cluster.EXPECT().ImportV2(mock.Anything, mock.Anything).Return(nil)
s.cluster.EXPECT().GetSessions().Return([]*Session{
{
info: &NodeInfo{
NodeID: nodeID,
},
},
})
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_InProgress, task.GetState())
s.Equal(int64(nodeID), task.GetNodeID())
// inProgress -> completed
s.cluster.ExpectedCalls = lo.Filter(s.cluster.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "QueryImport"
})
s.cluster.EXPECT().QueryImport(mock.Anything, mock.Anything).Return(&datapb.QueryImportResponse{
State: datapb.ImportTaskStateV2_Completed,
}, nil)
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_Completed, task.GetState())
// drop import task
s.cluster.EXPECT().DropImport(mock.Anything, mock.Anything).Return(nil)
s.scheduler.process()
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(int64(NullNodeID), task.GetNodeID())
}
func (s *ImportSchedulerSuite) TestProcessFailed() {
s.catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
var task ImportTask = &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: 0,
TaskID: 1,
CollectionID: s.collectionID,
NodeID: 6,
SegmentIDs: []int64{2, 3},
State: datapb.ImportTaskStateV2_Failed,
},
}
err := s.imeta.AddTask(task)
s.NoError(err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: s.collectionID,
PartitionIDs: []int64{2},
Vchannels: []string{"channel1"},
Schema: &schemapb.CollectionSchema{},
TimeoutTs: math.MaxUint64,
},
}
err = s.imeta.AddJob(job)
s.NoError(err)
s.catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(nil)
s.cluster.EXPECT().QueryImport(mock.Anything, mock.Anything).Return(&datapb.QueryImportResponse{
Slots: 1,
}, nil)
s.cluster.EXPECT().GetSessions().Return([]*Session{
{
info: &NodeInfo{
NodeID: 6,
},
},
})
for _, id := range task.(*importTask).GetSegmentIDs() {
segment := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: id, IsImporting: true},
}
err = s.meta.AddSegment(context.Background(), segment)
s.NoError(err)
}
for _, id := range task.(*importTask).GetSegmentIDs() {
segment := s.meta.GetSegment(id)
s.NotNil(segment)
}
s.cluster.EXPECT().DropImport(mock.Anything, mock.Anything).Return(nil)
s.catalog.EXPECT().DropSegment(mock.Anything, mock.Anything).Return(nil)
s.scheduler.process()
for _, id := range task.(*importTask).GetSegmentIDs() {
segment := s.meta.GetSegment(id)
s.Nil(segment)
}
task = s.imeta.GetTask(task.GetTaskID())
s.Equal(datapb.ImportTaskStateV2_Failed, task.GetState())
s.Equal(0, len(task.(*importTask).GetSegmentIDs()))
s.Equal(int64(NullNodeID), task.GetNodeID())
}
func TestImportScheduler(t *testing.T) {
suite.Run(t, new(ImportSchedulerSuite))
}

View File

@ -0,0 +1,155 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"github.com/golang/protobuf/proto"
"github.com/milvus-io/milvus/internal/proto/datapb"
)
type TaskType int
const (
PreImportTaskType TaskType = 0
ImportTaskType TaskType = 1
)
var ImportTaskTypeName = map[TaskType]string{
0: "PreImportTask",
1: "ImportTask",
}
func (t TaskType) String() string {
return ImportTaskTypeName[t]
}
type ImportTaskFilter func(task ImportTask) bool
func WithType(taskType TaskType) ImportTaskFilter {
return func(task ImportTask) bool {
return task.GetType() == taskType
}
}
func WithJob(jobID int64) ImportTaskFilter {
return func(task ImportTask) bool {
return task.GetJobID() == jobID
}
}
func WithStates(states ...datapb.ImportTaskStateV2) ImportTaskFilter {
return func(task ImportTask) bool {
for _, state := range states {
if task.GetState() == state {
return true
}
}
return false
}
}
type UpdateAction func(task ImportTask)
func UpdateState(state datapb.ImportTaskStateV2) UpdateAction {
return func(t ImportTask) {
switch t.GetType() {
case PreImportTaskType:
t.(*preImportTask).PreImportTask.State = state
case ImportTaskType:
t.(*importTask).ImportTaskV2.State = state
}
}
}
func UpdateReason(reason string) UpdateAction {
return func(t ImportTask) {
switch t.GetType() {
case PreImportTaskType:
t.(*preImportTask).PreImportTask.Reason = reason
case ImportTaskType:
t.(*importTask).ImportTaskV2.Reason = reason
}
}
}
func UpdateNodeID(nodeID int64) UpdateAction {
return func(t ImportTask) {
switch t.GetType() {
case PreImportTaskType:
t.(*preImportTask).PreImportTask.NodeID = nodeID
case ImportTaskType:
t.(*importTask).ImportTaskV2.NodeID = nodeID
}
}
}
func UpdateFileStats(fileStats []*datapb.ImportFileStats) UpdateAction {
return func(t ImportTask) {
if task, ok := t.(*preImportTask); ok {
task.PreImportTask.FileStats = fileStats
}
}
}
func UpdateSegmentIDs(segmentIDs []UniqueID) UpdateAction {
return func(t ImportTask) {
if task, ok := t.(*importTask); ok {
task.ImportTaskV2.SegmentIDs = segmentIDs
}
}
}
type ImportTask interface {
GetJobID() int64
GetTaskID() int64
GetCollectionID() int64
GetNodeID() int64
GetType() TaskType
GetState() datapb.ImportTaskStateV2
GetReason() string
GetFileStats() []*datapb.ImportFileStats
Clone() ImportTask
}
type preImportTask struct {
*datapb.PreImportTask
}
func (p *preImportTask) GetType() TaskType {
return PreImportTaskType
}
func (p *preImportTask) Clone() ImportTask {
return &preImportTask{
PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask),
}
}
type importTask struct {
*datapb.ImportTaskV2
}
func (t *importTask) GetType() TaskType {
return ImportTaskType
}
func (t *importTask) Clone() ImportTask {
return &importTask{
ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2),
}
}

View File

@ -0,0 +1,398 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"path"
"sort"
"time"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func WrapTaskLog(task ImportTask, fields ...zap.Field) []zap.Field {
res := []zap.Field{
zap.Int64("taskID", task.GetTaskID()),
zap.Int64("jobID", task.GetJobID()),
zap.Int64("collectionID", task.GetCollectionID()),
zap.String("type", task.GetType().String()),
}
res = append(res, fields...)
return res
}
func NewPreImportTasks(fileGroups [][]*internalpb.ImportFile,
job ImportJob,
alloc allocator,
) ([]ImportTask, error) {
idStart, _, err := alloc.allocN(int64(len(fileGroups)))
if err != nil {
return nil, err
}
tasks := make([]ImportTask, 0, len(fileGroups))
for i, files := range fileGroups {
fileStats := lo.Map(files, func(f *internalpb.ImportFile, _ int) *datapb.ImportFileStats {
return &datapb.ImportFileStats{
ImportFile: f,
}
})
task := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: job.GetJobID(),
TaskID: idStart + int64(i),
CollectionID: job.GetCollectionID(),
State: datapb.ImportTaskStateV2_Pending,
FileStats: fileStats,
},
}
tasks = append(tasks, task)
}
return tasks, nil
}
func NewImportTasks(fileGroups [][]*datapb.ImportFileStats,
job ImportJob,
manager Manager,
alloc allocator,
) ([]ImportTask, error) {
idBegin, _, err := alloc.allocN(int64(len(fileGroups)))
if err != nil {
return nil, err
}
tasks := make([]ImportTask, 0, len(fileGroups))
for i, group := range fileGroups {
task := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: job.GetJobID(),
TaskID: idBegin + int64(i),
CollectionID: job.GetCollectionID(),
NodeID: NullNodeID,
State: datapb.ImportTaskStateV2_Pending,
FileStats: group,
},
}
segments, err := AssignSegments(task, manager)
if err != nil {
return nil, err
}
task.SegmentIDs = segments
tasks = append(tasks, task)
}
return tasks, nil
}
func AssignSegments(task ImportTask, manager Manager) ([]int64, error) {
// merge hashed sizes
hashedDataSize := make(map[string]map[int64]int64) // vchannel->(partitionID->size)
for _, fileStats := range task.GetFileStats() {
for vchannel, partStats := range fileStats.GetHashedStats() {
if hashedDataSize[vchannel] == nil {
hashedDataSize[vchannel] = make(map[int64]int64)
}
for partitionID, size := range partStats.GetPartitionDataSize() {
hashedDataSize[vchannel][partitionID] += size
}
}
}
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt64() * 1024 * 1024
// alloc new segments
segments := make([]int64, 0)
addSegment := func(vchannel string, partitionID int64, size int64) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
for size > 0 {
segmentInfo, err := manager.AllocImportSegment(ctx, task.GetTaskID(), task.GetCollectionID(), partitionID, vchannel)
if err != nil {
return err
}
segments = append(segments, segmentInfo.GetID())
size -= segmentMaxSize
}
return nil
}
for vchannel, partitionSizes := range hashedDataSize {
for partitionID, size := range partitionSizes {
err := addSegment(vchannel, partitionID, size)
if err != nil {
return nil, err
}
}
}
return segments, nil
}
func AssemblePreImportRequest(task ImportTask, job ImportJob) *datapb.PreImportRequest {
importFiles := lo.Map(task.(*preImportTask).GetFileStats(),
func(fileStats *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
return fileStats.GetImportFile()
})
return &datapb.PreImportRequest{
JobID: task.GetJobID(),
TaskID: task.GetTaskID(),
CollectionID: task.GetCollectionID(),
PartitionIDs: job.GetPartitionIDs(),
Vchannels: job.GetVchannels(),
Schema: job.GetSchema(),
ImportFiles: importFiles,
Options: job.GetOptions(),
}
}
func AssembleImportRequest(task ImportTask, job ImportJob, meta *meta, alloc allocator) (*datapb.ImportRequest, error) {
requestSegments := make([]*datapb.ImportRequestSegment, 0)
for _, segmentID := range task.(*importTask).GetSegmentIDs() {
segment := meta.GetSegment(segmentID)
if segment == nil {
return nil, merr.WrapErrSegmentNotFound(segmentID, "assemble import request failed")
}
requestSegments = append(requestSegments, &datapb.ImportRequestSegment{
SegmentID: segment.GetID(),
PartitionID: segment.GetPartitionID(),
Vchannel: segment.GetInsertChannel(),
})
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
ts, err := alloc.allocTimestamp(ctx)
if err != nil {
return nil, err
}
totalRows := lo.SumBy(task.GetFileStats(), func(stat *datapb.ImportFileStats) int64 {
return stat.GetTotalRows()
})
idBegin, idEnd, err := alloc.allocN(totalRows)
if err != nil {
return nil, err
}
importFiles := lo.Map(task.GetFileStats(), func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
return fileStat.GetImportFile()
})
return &datapb.ImportRequest{
JobID: task.GetJobID(),
TaskID: task.GetTaskID(),
CollectionID: task.GetCollectionID(),
PartitionIDs: job.GetPartitionIDs(),
Vchannels: job.GetVchannels(),
Schema: job.GetSchema(),
Files: importFiles,
Options: job.GetOptions(),
Ts: ts,
AutoIDRange: &datapb.AutoIDRange{Begin: idBegin, End: idEnd},
RequestSegments: requestSegments,
}, nil
}
func RegroupImportFiles(job ImportJob, files []*datapb.ImportFileStats) [][]*datapb.ImportFileStats {
if len(files) == 0 {
return nil
}
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt() * 1024 * 1024
threshold := paramtable.Get().DataCoordCfg.MaxSizeInMBPerImportTask.GetAsInt() * 1024 * 1024
maxSizePerFileGroup := segmentMaxSize * len(job.GetPartitionIDs()) * len(job.GetVchannels())
if maxSizePerFileGroup > threshold {
maxSizePerFileGroup = threshold
}
fileGroups := make([][]*datapb.ImportFileStats, 0)
currentGroup := make([]*datapb.ImportFileStats, 0)
currentSum := 0
sort.Slice(files, func(i, j int) bool {
return files[i].GetTotalMemorySize() < files[j].GetTotalMemorySize()
})
for _, file := range files {
size := int(file.GetTotalMemorySize())
if size > maxSizePerFileGroup {
fileGroups = append(fileGroups, []*datapb.ImportFileStats{file})
} else if currentSum+size <= maxSizePerFileGroup {
currentGroup = append(currentGroup, file)
currentSum += size
} else {
fileGroups = append(fileGroups, currentGroup)
currentGroup = []*datapb.ImportFileStats{file}
currentSum = size
}
}
if len(currentGroup) > 0 {
fileGroups = append(fileGroups, currentGroup)
}
return fileGroups
}
func AddImportSegment(cluster Cluster, meta *meta, segmentID int64) error {
segment := meta.GetSegment(segmentID)
req := &datapb.AddImportSegmentRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithSourceID(paramtable.GetNodeID()),
),
SegmentId: segment.GetID(),
ChannelName: segment.GetInsertChannel(),
CollectionId: segment.GetCollectionID(),
PartitionId: segment.GetPartitionID(),
RowNum: segment.GetNumOfRows(),
StatsLog: segment.GetStatslogs(),
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
_, err := cluster.AddImportSegment(ctx, req)
return err
}
func getPendingProgress(jobID int64, imeta ImportMeta) float32 {
tasks := imeta.GetTaskBy(WithJob(jobID), WithType(PreImportTaskType))
preImportingFiles := lo.SumBy(tasks, func(task ImportTask) int {
return len(task.GetFileStats())
})
totalFiles := len(imeta.GetJob(jobID).GetFiles())
return float32(preImportingFiles) / float32(totalFiles)
}
func getPreImportingProgress(jobID int64, imeta ImportMeta) float32 {
tasks := imeta.GetTaskBy(WithJob(jobID), WithType(PreImportTaskType))
completedTasks := lo.Filter(tasks, func(task ImportTask, _ int) bool {
return task.GetState() == datapb.ImportTaskStateV2_Completed
})
return float32(len(completedTasks)) / float32(len(tasks))
}
func getImportingProgress(jobID int64, imeta ImportMeta, meta *meta) float32 {
var (
importedRows int64
totalRows int64
)
tasks := imeta.GetTaskBy(WithJob(jobID), WithType(ImportTaskType))
segmentIDs := make([]int64, 0)
for _, task := range tasks {
totalRows += lo.SumBy(task.GetFileStats(), func(file *datapb.ImportFileStats) int64 {
return file.GetTotalRows()
})
segmentIDs = append(segmentIDs, task.(*importTask).GetSegmentIDs()...)
}
importedRows = meta.GetSegmentsTotalCurrentRows(segmentIDs)
importingProgress := float32(importedRows) / float32(totalRows)
var (
unsetIsImportingSegment int64
totalSegment int64
)
for _, task := range tasks {
segmentIDs := task.(*importTask).GetSegmentIDs()
for _, segmentID := range segmentIDs {
segment := meta.GetSegment(segmentID)
if segment == nil {
log.Warn("cannot find segment, may be compacted", WrapTaskLog(task, zap.Int64("segmentID", segmentID))...)
continue
}
totalSegment++
if !segment.GetIsImporting() {
unsetIsImportingSegment++
}
}
}
completedProgress := float32(unsetIsImportingSegment) / float32(totalSegment)
return importingProgress*0.8 + completedProgress*0.2
}
func GetImportProgress(jobID int64, imeta ImportMeta, meta *meta) (int64, internalpb.ImportJobState, string) {
job := imeta.GetJob(jobID)
switch job.GetState() {
case internalpb.ImportJobState_Pending:
progress := getPendingProgress(jobID, imeta)
return int64(progress * 10), internalpb.ImportJobState_Pending, ""
case internalpb.ImportJobState_PreImporting:
progress := getPreImportingProgress(jobID, imeta)
return 10 + int64(progress*40), internalpb.ImportJobState_Importing, ""
case internalpb.ImportJobState_Importing:
progress := getImportingProgress(jobID, imeta, meta)
return 10 + 40 + int64(progress*50), internalpb.ImportJobState_Importing, ""
case internalpb.ImportJobState_Completed:
return 100, internalpb.ImportJobState_Completed, ""
case internalpb.ImportJobState_Failed:
return 0, internalpb.ImportJobState_Failed, job.GetReason()
}
return 0, internalpb.ImportJobState_None, "unknown import job state"
}
func DropImportTask(task ImportTask, cluster Cluster, tm ImportMeta) error {
if task.GetNodeID() == NullNodeID {
return nil
}
req := &datapb.DropImportRequest{
JobID: task.GetJobID(),
TaskID: task.GetTaskID(),
}
err := cluster.DropImport(task.GetNodeID(), req)
if err != nil && !errors.Is(err, merr.ErrNodeNotFound) {
return err
}
log.Info("drop import in datanode done", WrapTaskLog(task)...)
return tm.UpdateTask(task.GetTaskID(), UpdateNodeID(NullNodeID))
}
func ListBinlogsAndGroupBySegment(ctx context.Context, cm storage.ChunkManager, importFile *internalpb.ImportFile) ([]*internalpb.ImportFile, error) {
if len(importFile.GetPaths()) < 1 {
return nil, merr.WrapErrImportFailed("no insert binlogs to import")
}
segmentInsertPaths, _, err := cm.ListWithPrefix(ctx, importFile.GetPaths()[0], false)
if err != nil {
return nil, err
}
segmentImportFiles := lo.Map(segmentInsertPaths, func(segmentPath string, _ int) *internalpb.ImportFile {
return &internalpb.ImportFile{Paths: []string{segmentPath}}
})
if len(importFile.GetPaths()) < 2 {
return segmentImportFiles, nil
}
segmentDeltaPaths, _, err := cm.ListWithPrefix(context.Background(), importFile.GetPaths()[1], false)
if err != nil {
return nil, err
}
if len(segmentDeltaPaths) == 0 {
return segmentImportFiles, nil
}
deltaSegmentIDs := lo.KeyBy(segmentDeltaPaths, func(deltaPrefix string) string {
return path.Base(deltaPrefix)
})
for i := range segmentImportFiles {
segmentID := path.Base(segmentImportFiles[i].GetPaths()[0])
if deltaPrefix, ok := deltaSegmentIDs[segmentID]; ok {
segmentImportFiles[i].Paths = append(segmentImportFiles[i].Paths, deltaPrefix)
}
}
return segmentImportFiles, nil
}

View File

@ -0,0 +1,523 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"math/rand"
"path"
"testing"
"github.com/samber/lo"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/metastore/mocks"
mocks2 "github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func TestImportUtil_NewPreImportTasks(t *testing.T) {
fileGroups := [][]*internalpb.ImportFile{
{
{Id: 0, Paths: []string{"a.json"}},
{Id: 1, Paths: []string{"b.json"}},
},
{
{Id: 2, Paths: []string{"c.npy", "d.npy"}},
{Id: 3, Paths: []string{"e.npy", "f.npy"}},
},
}
job := &importJob{
ImportJob: &datapb.ImportJob{JobID: 1, CollectionID: 2},
}
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocN(mock.Anything).RunAndReturn(func(n int64) (int64, int64, error) {
id := rand.Int63()
return id, id + n, nil
})
tasks, err := NewPreImportTasks(fileGroups, job, alloc)
assert.NoError(t, err)
assert.Equal(t, 2, len(tasks))
}
func TestImportUtil_NewImportTasks(t *testing.T) {
dataSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt64() * 1024 * 1024
fileGroups := [][]*datapb.ImportFileStats{
{
{
ImportFile: &internalpb.ImportFile{Id: 0, Paths: []string{"a.json"}},
HashedStats: map[string]*datapb.PartitionImportStats{"c0": {PartitionDataSize: map[int64]int64{100: dataSize}}},
},
{
ImportFile: &internalpb.ImportFile{Id: 1, Paths: []string{"b.json"}},
HashedStats: map[string]*datapb.PartitionImportStats{"c0": {PartitionDataSize: map[int64]int64{100: dataSize * 2}}},
},
},
{
{
ImportFile: &internalpb.ImportFile{Id: 2, Paths: []string{"c.npy", "d.npy"}},
HashedStats: map[string]*datapb.PartitionImportStats{"c0": {PartitionDataSize: map[int64]int64{100: dataSize}}},
},
{
ImportFile: &internalpb.ImportFile{Id: 3, Paths: []string{"e.npy", "f.npy"}},
HashedStats: map[string]*datapb.PartitionImportStats{"c0": {PartitionDataSize: map[int64]int64{100: dataSize * 2}}},
},
},
}
job := &importJob{
ImportJob: &datapb.ImportJob{JobID: 1, CollectionID: 2},
}
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocN(mock.Anything).RunAndReturn(func(n int64) (int64, int64, error) {
id := rand.Int63()
return id, id + n, nil
})
manager := NewMockManager(t)
manager.EXPECT().AllocImportSegment(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).
RunAndReturn(func(ctx context.Context, taskID int64, collectionID int64, partitionID int64, vchannel string) (*SegmentInfo, error) {
return &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{
ID: rand.Int63(),
CollectionID: collectionID,
PartitionID: partitionID,
InsertChannel: vchannel,
IsImporting: true,
},
}, nil
})
tasks, err := NewImportTasks(fileGroups, job, manager, alloc)
assert.NoError(t, err)
assert.Equal(t, 2, len(tasks))
for _, task := range tasks {
segmentIDs := task.(*importTask).GetSegmentIDs()
assert.Equal(t, 3, len(segmentIDs))
}
}
func TestImportUtil_AssembleRequest(t *testing.T) {
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{JobID: 0, CollectionID: 1, PartitionIDs: []int64{2}, Vchannels: []string{"v0"}},
}
var pt ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: 0,
TaskID: 3,
CollectionID: 1,
State: datapb.ImportTaskStateV2_Pending,
},
}
preimportReq := AssemblePreImportRequest(pt, job)
assert.Equal(t, pt.GetJobID(), preimportReq.GetJobID())
assert.Equal(t, pt.GetTaskID(), preimportReq.GetTaskID())
assert.Equal(t, pt.GetCollectionID(), preimportReq.GetCollectionID())
assert.Equal(t, job.GetPartitionIDs(), preimportReq.GetPartitionIDs())
assert.Equal(t, job.GetVchannels(), preimportReq.GetVchannels())
var task ImportTask = &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: 0,
TaskID: 4,
CollectionID: 1,
SegmentIDs: []int64{5, 6},
},
}
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListSegments(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListChannelCheckpoint(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListSegmentIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(nil)
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocN(mock.Anything).RunAndReturn(func(n int64) (int64, int64, error) {
id := rand.Int63()
return id, id + n, nil
})
alloc.EXPECT().allocTimestamp(mock.Anything).Return(800, nil)
meta, err := newMeta(context.TODO(), catalog, nil)
assert.NoError(t, err)
segment := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 5, IsImporting: true},
}
err = meta.AddSegment(context.Background(), segment)
assert.NoError(t, err)
segment.ID = 6
err = meta.AddSegment(context.Background(), segment)
assert.NoError(t, err)
importReq, err := AssembleImportRequest(task, job, meta, alloc)
assert.NoError(t, err)
assert.Equal(t, task.GetJobID(), importReq.GetJobID())
assert.Equal(t, task.GetTaskID(), importReq.GetTaskID())
assert.Equal(t, task.GetCollectionID(), importReq.GetCollectionID())
assert.Equal(t, job.GetPartitionIDs(), importReq.GetPartitionIDs())
assert.Equal(t, job.GetVchannels(), importReq.GetVchannels())
}
func TestImportUtil_RegroupImportFiles(t *testing.T) {
fileNum := 4096
dataSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt64() * 1024 * 1024
threshold := paramtable.Get().DataCoordCfg.MaxSizeInMBPerImportTask.GetAsInt64() * 1024 * 1024
files := make([]*datapb.ImportFileStats, 0, fileNum)
for i := 0; i < fileNum; i++ {
files = append(files, &datapb.ImportFileStats{
ImportFile: &internalpb.ImportFile{
Id: int64(i),
Paths: []string{fmt.Sprintf("%d.json", i)},
},
TotalMemorySize: dataSize * (rand.Int63n(99) + 1) / 100,
})
}
job := &importJob{
ImportJob: &datapb.ImportJob{
JobID: 1,
CollectionID: 2,
PartitionIDs: []int64{3, 4, 5, 6, 7},
Vchannels: []string{"v0", "v1", "v2", "v3"},
},
}
groups := RegroupImportFiles(job, files)
total := 0
for i, fs := range groups {
sum := lo.SumBy(fs, func(f *datapb.ImportFileStats) int64 {
return f.GetTotalMemorySize()
})
assert.True(t, sum <= threshold)
if i != len(groups)-1 {
assert.True(t, len(fs) >= int(threshold/dataSize))
assert.True(t, sum >= threshold-dataSize)
}
total += len(fs)
}
assert.Equal(t, fileNum, total)
}
func TestImportUtil_AddImportSegment(t *testing.T) {
cluster := NewMockCluster(t)
cluster.EXPECT().AddImportSegment(mock.Anything, mock.Anything).Return(nil, nil)
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListSegments(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListChannelCheckpoint(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListSegmentIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(nil)
meta, err := newMeta(context.TODO(), catalog, nil)
assert.NoError(t, err)
segment := &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 1, IsImporting: true},
}
err = meta.AddSegment(context.Background(), segment)
assert.NoError(t, err)
err = AddImportSegment(cluster, meta, segment.GetID())
assert.NoError(t, err)
}
func TestImportUtil_DropImportTask(t *testing.T) {
cluster := NewMockCluster(t)
cluster.EXPECT().DropImport(mock.Anything, mock.Anything).Return(nil)
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
imeta, err := NewImportMeta(catalog)
assert.NoError(t, err)
task := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: 0,
TaskID: 1,
},
}
err = imeta.AddTask(task)
assert.NoError(t, err)
err = DropImportTask(task, cluster, imeta)
assert.NoError(t, err)
}
func TestImportUtil_ListBinlogsAndGroupBySegment(t *testing.T) {
const (
insertPrefix = "mock-insert-binlog-prefix"
deltaPrefix = "mock-delta-binlog-prefix"
)
segmentInsertPaths := []string{
// segment 435978159261483008
"backup/bak1/data/insert_log/435978159196147009/435978159196147010/435978159261483008",
// segment 435978159261483009
"backup/bak1/data/insert_log/435978159196147009/435978159196147010/435978159261483009",
}
segmentDeltaPaths := []string{
"backup/bak1/data/delta_log/435978159196147009/435978159196147010/435978159261483008",
"backup/bak1/data/delta_log/435978159196147009/435978159196147010/435978159261483009",
}
ctx := context.Background()
cm := mocks2.NewChunkManager(t)
cm.EXPECT().ListWithPrefix(mock.Anything, insertPrefix, mock.Anything).Return(segmentInsertPaths, nil, nil)
cm.EXPECT().ListWithPrefix(mock.Anything, deltaPrefix, mock.Anything).Return(segmentDeltaPaths, nil, nil)
file := &internalpb.ImportFile{
Id: 1,
Paths: []string{insertPrefix, deltaPrefix},
}
files, err := ListBinlogsAndGroupBySegment(ctx, cm, file)
assert.NoError(t, err)
assert.Equal(t, 2, len(files))
for _, f := range files {
assert.Equal(t, 2, len(f.GetPaths()))
for _, p := range f.GetPaths() {
segmentID := path.Base(p)
assert.True(t, segmentID == "435978159261483008" || segmentID == "435978159261483009")
}
}
}
func TestImportUtil_GetImportProgress(t *testing.T) {
ctx := context.Background()
mockErr := "mock err"
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().ListSegments(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListChannelCheckpoint(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().ListSegmentIndexes(mock.Anything).Return(nil, nil)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
catalog.EXPECT().SaveImportTask(mock.Anything).Return(nil)
catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(nil)
catalog.EXPECT().AlterSegments(mock.Anything, mock.Anything).Return(nil)
imeta, err := NewImportMeta(catalog)
assert.NoError(t, err)
meta, err := newMeta(context.TODO(), catalog, nil)
assert.NoError(t, err)
file1 := &internalpb.ImportFile{
Id: 1,
Paths: []string{"a.json"},
}
file2 := &internalpb.ImportFile{
Id: 2,
Paths: []string{"b.json"},
}
file3 := &internalpb.ImportFile{
Id: 3,
Paths: []string{"c.json"},
}
job := &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
Files: []*internalpb.ImportFile{file1, file2, file3},
},
}
err = imeta.AddJob(job)
assert.NoError(t, err)
pit1 := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: job.GetJobID(),
TaskID: 1,
State: datapb.ImportTaskStateV2_Completed,
Reason: mockErr,
FileStats: []*datapb.ImportFileStats{
{
ImportFile: file1,
},
{
ImportFile: file2,
},
},
},
}
err = imeta.AddTask(pit1)
assert.NoError(t, err)
pit2 := &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: job.GetJobID(),
TaskID: 2,
State: datapb.ImportTaskStateV2_Completed,
FileStats: []*datapb.ImportFileStats{
{
ImportFile: file3,
},
},
},
}
err = imeta.AddTask(pit2)
assert.NoError(t, err)
it1 := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: job.GetJobID(),
TaskID: 3,
SegmentIDs: []int64{10, 11, 12},
State: datapb.ImportTaskStateV2_Pending,
FileStats: []*datapb.ImportFileStats{
{
ImportFile: file1,
TotalRows: 100,
},
{
ImportFile: file2,
TotalRows: 200,
},
},
},
}
err = imeta.AddTask(it1)
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 10, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 11, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 12, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
it2 := &importTask{
ImportTaskV2: &datapb.ImportTaskV2{
JobID: job.GetJobID(),
TaskID: 4,
SegmentIDs: []int64{20, 21, 22},
State: datapb.ImportTaskStateV2_Pending,
FileStats: []*datapb.ImportFileStats{
{
ImportFile: file3,
TotalRows: 300,
},
},
},
}
err = imeta.AddTask(it2)
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 20, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 21, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
err = meta.AddSegment(ctx, &SegmentInfo{
SegmentInfo: &datapb.SegmentInfo{ID: 22, IsImporting: true, State: commonpb.SegmentState_Flushed}, currRows: 50,
})
assert.NoError(t, err)
// failed state
err = imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed), UpdateJobReason(mockErr))
assert.NoError(t, err)
progress, state, reason := GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(0), progress)
assert.Equal(t, internalpb.ImportJobState_Failed, state)
assert.Equal(t, mockErr, reason)
// pending state
err = imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Pending))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(10), progress)
assert.Equal(t, internalpb.ImportJobState_Pending, state)
assert.Equal(t, "", reason)
// preImporting state
err = imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_PreImporting))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(10+40), progress)
assert.Equal(t, internalpb.ImportJobState_Importing, state)
assert.Equal(t, "", reason)
// importing state, segmentImportedRows/totalRows = 0.5
err = imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Importing))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(10+40+40*0.5), progress)
assert.Equal(t, internalpb.ImportJobState_Importing, state)
assert.Equal(t, "", reason)
// importing state, segmentImportedRows/totalRows = 1, partial segments is in importing state
op1 := UpdateIsImporting(10, false)
op2 := UpdateImportedRows(10, 100)
err = meta.UpdateSegmentsInfo(op1, op2)
assert.NoError(t, err)
op1 = UpdateIsImporting(20, false)
op2 = UpdateImportedRows(20, 100)
err = meta.UpdateSegmentsInfo(op1, op2)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateImportedRows(11, 100))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateImportedRows(12, 100))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateImportedRows(21, 100))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateImportedRows(22, 100))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(float32(10+40+40+10*2/6)), progress)
assert.Equal(t, internalpb.ImportJobState_Importing, state)
assert.Equal(t, "", reason)
// importing state, no segment is in importing state
err = meta.UpdateSegmentsInfo(UpdateIsImporting(11, false))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateIsImporting(12, false))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateIsImporting(21, false))
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(UpdateIsImporting(22, false))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(10+40+40+10), progress)
assert.Equal(t, internalpb.ImportJobState_Importing, state)
assert.Equal(t, "", reason)
// completed state
err = imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Completed))
assert.NoError(t, err)
progress, state, reason = GetImportProgress(job.GetJobID(), imeta, meta)
assert.Equal(t, int64(100), progress)
assert.Equal(t, internalpb.ImportJobState_Completed, state)
assert.Equal(t, "", reason)
}

View File

@ -365,6 +365,35 @@ func (m *meta) GetAllSegmentsUnsafe() []*SegmentInfo {
return m.segments.GetSegments()
}
func (m *meta) GetSegmentsTotalCurrentRows(segmentIDs []UniqueID) int64 {
m.RLock()
defer m.RUnlock()
var sum int64 = 0
for _, segmentID := range segmentIDs {
segment := m.segments.GetSegment(segmentID)
if segment == nil {
log.Warn("cannot find segment", zap.Int64("segmentID", segmentID))
continue
}
sum += segment.currRows
}
return sum
}
func (m *meta) GetSegmentsChannels(segmentIDs []UniqueID) (map[int64]string, error) {
m.RLock()
defer m.RUnlock()
segChannels := make(map[int64]string)
for _, segmentID := range segmentIDs {
segment := m.segments.GetSegment(segmentID)
if segment == nil {
return nil, errors.New(fmt.Sprintf("cannot find segment %d", segmentID))
}
segChannels[segmentID] = segment.GetInsertChannel()
}
return segChannels, nil
}
// SetState setting segment with provided ID state
func (m *meta) SetState(segmentID UniqueID, targetState commonpb.SegmentState) error {
log.Debug("meta update: setting segment state",
@ -598,6 +627,25 @@ func UpdateBinlogsOperator(segmentID int64, binlogs, statslogs, deltalogs []*dat
}
}
func ReplaceBinlogsOperator(segmentID int64, binlogs, statslogs, deltalogs []*datapb.FieldBinlog) UpdateOperator {
return func(modPack *updateSegmentPack) bool {
segment := modPack.Get(segmentID)
if segment == nil {
log.Warn("meta update: replace binlog failed - segment not found",
zap.Int64("segmentID", segmentID))
return false
}
segment.Binlogs = binlogs
segment.Statslogs = statslogs
segment.Deltalogs = deltalogs
modPack.increments[segmentID] = metastore.BinlogsIncrement{
Segment: segment.SegmentInfo,
}
return true
}
}
// update startPosition
func UpdateStartPosition(startPositions []*datapb.SegmentStartPosition) UpdateOperator {
return func(modPack *updateSegmentPack) bool {
@ -616,6 +664,26 @@ func UpdateStartPosition(startPositions []*datapb.SegmentStartPosition) UpdateOp
}
}
func UpdateDmlPosition(segmentID int64, dmlPosition *msgpb.MsgPosition) UpdateOperator {
return func(modPack *updateSegmentPack) bool {
if len(dmlPosition.GetMsgID()) == 0 {
log.Warn("meta update: update dml position failed - nil position msg id",
zap.Int64("segmentID", segmentID))
return false
}
segment := modPack.Get(segmentID)
if segment == nil {
log.Warn("meta update: update dml position failed - segment not found",
zap.Int64("segmentID", segmentID))
return false
}
segment.DmlPosition = dmlPosition
return true
}
}
// update segment checkpoint and num rows
// if was importing segment
// only update rows.
@ -660,6 +728,34 @@ func UpdateCheckPointOperator(segmentID int64, importing bool, checkpoints []*da
}
}
func UpdateImportedRows(segmentID int64, rows int64) UpdateOperator {
return func(modPack *updateSegmentPack) bool {
segment := modPack.Get(segmentID)
if segment == nil {
log.Warn("meta update: update NumOfRows failed - segment not found",
zap.Int64("segmentID", segmentID))
return false
}
segment.currRows = rows
segment.NumOfRows = rows
segment.MaxRowNum = rows
return true
}
}
func UpdateIsImporting(segmentID int64, isImporting bool) UpdateOperator {
return func(modPack *updateSegmentPack) bool {
segment := modPack.Get(segmentID)
if segment == nil {
log.Warn("meta update: update isImporting failed - segment not found",
zap.Int64("segmentID", segmentID))
return false
}
segment.IsImporting = isImporting
return true
}
}
// updateSegmentsInfo update segment infos
// will exec all operators, and update all changed segments
func (m *meta) UpdateSegmentsInfo(operators ...UpdateOperator) error {

View File

@ -31,8 +31,9 @@ import (
"github.com/milvus-io/milvus/internal/kv"
mockkv "github.com/milvus-io/milvus/internal/kv/mocks"
"github.com/milvus-io/milvus/internal/metastore/kv/datacoord"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
mocks2 "github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/metrics"
@ -227,7 +228,7 @@ func (suite *MetaBasicSuite) TestCompleteCompactionMutation() {
},
}
mockChMgr := mocks.NewChunkManager(suite.T())
mockChMgr := mocks2.NewChunkManager(suite.T())
mockChMgr.EXPECT().RootPath().Return("mockroot").Times(4)
mockChMgr.EXPECT().Read(mock.Anything, mock.Anything).Return(nil, nil).Twice()
mockChMgr.EXPECT().Write(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice()
@ -792,6 +793,31 @@ func TestUpdateSegmentsInfo(t *testing.T) {
UpdateCheckPointOperator(1, false, []*datapb.CheckPoint{{SegmentID: 1, NumOfRows: 10}}),
)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(
ReplaceBinlogsOperator(1, nil, nil, nil),
)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(
UpdateDmlPosition(1, nil),
)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(
UpdateDmlPosition(1, &msgpb.MsgPosition{MsgID: []byte{1}}),
)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(
UpdateImportedRows(1, 0),
)
assert.NoError(t, err)
err = meta.UpdateSegmentsInfo(
UpdateIsImporting(1, true),
)
assert.NoError(t, err)
})
t.Run("update checkpoints and start position of non existed segment", func(t *testing.T) {

View File

@ -73,6 +73,65 @@ func (_c *NMockAllocator_allocID_Call) RunAndReturn(run func(context.Context) (i
return _c
}
// allocN provides a mock function with given fields: n
func (_m *NMockAllocator) allocN(n int64) (int64, int64, error) {
ret := _m.Called(n)
var r0 int64
var r1 int64
var r2 error
if rf, ok := ret.Get(0).(func(int64) (int64, int64, error)); ok {
return rf(n)
}
if rf, ok := ret.Get(0).(func(int64) int64); ok {
r0 = rf(n)
} else {
r0 = ret.Get(0).(int64)
}
if rf, ok := ret.Get(1).(func(int64) int64); ok {
r1 = rf(n)
} else {
r1 = ret.Get(1).(int64)
}
if rf, ok := ret.Get(2).(func(int64) error); ok {
r2 = rf(n)
} else {
r2 = ret.Error(2)
}
return r0, r1, r2
}
// NMockAllocator_allocN_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'allocN'
type NMockAllocator_allocN_Call struct {
*mock.Call
}
// allocN is a helper method to define mock.On call
// - n int64
func (_e *NMockAllocator_Expecter) allocN(n interface{}) *NMockAllocator_allocN_Call {
return &NMockAllocator_allocN_Call{Call: _e.mock.On("allocN", n)}
}
func (_c *NMockAllocator_allocN_Call) Run(run func(n int64)) *NMockAllocator_allocN_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *NMockAllocator_allocN_Call) Return(_a0 int64, _a1 int64, _a2 error) *NMockAllocator_allocN_Call {
_c.Call.Return(_a0, _a1, _a2)
return _c
}
func (_c *NMockAllocator_allocN_Call) RunAndReturn(run func(int64) (int64, int64, error)) *NMockAllocator_allocN_Call {
_c.Call.Return(run)
return _c
}
// allocTimestamp provides a mock function with given fields: _a0
func (_m *NMockAllocator) allocTimestamp(_a0 context.Context) (uint64, error) {
ret := _m.Called(_a0)

View File

@ -109,6 +109,49 @@ func (_c *MockCluster_Close_Call) RunAndReturn(run func()) *MockCluster_Close_Ca
return _c
}
// DropImport provides a mock function with given fields: nodeID, in
func (_m *MockCluster) DropImport(nodeID int64, in *datapb.DropImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.DropImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockCluster_DropImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropImport'
type MockCluster_DropImport_Call struct {
*mock.Call
}
// DropImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.DropImportRequest
func (_e *MockCluster_Expecter) DropImport(nodeID interface{}, in interface{}) *MockCluster_DropImport_Call {
return &MockCluster_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)}
}
func (_c *MockCluster_DropImport_Call) Run(run func(nodeID int64, in *datapb.DropImportRequest)) *MockCluster_DropImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.DropImportRequest))
})
return _c
}
func (_c *MockCluster_DropImport_Call) Return(_a0 error) *MockCluster_DropImport_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockCluster_DropImport_Call) RunAndReturn(run func(int64, *datapb.DropImportRequest) error) *MockCluster_DropImport_Call {
_c.Call.Return(run)
return _c
}
// Flush provides a mock function with given fields: ctx, nodeID, channel, segments
func (_m *MockCluster) Flush(ctx context.Context, nodeID int64, channel string, segments []*datapb.SegmentInfo) error {
ret := _m.Called(ctx, nodeID, channel, segments)
@ -277,6 +320,202 @@ func (_c *MockCluster_Import_Call) RunAndReturn(run func(context.Context, int64,
return _c
}
// ImportV2 provides a mock function with given fields: nodeID, in
func (_m *MockCluster) ImportV2(nodeID int64, in *datapb.ImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.ImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockCluster_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockCluster_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.ImportRequest
func (_e *MockCluster_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockCluster_ImportV2_Call {
return &MockCluster_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)}
}
func (_c *MockCluster_ImportV2_Call) Run(run func(nodeID int64, in *datapb.ImportRequest)) *MockCluster_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.ImportRequest))
})
return _c
}
func (_c *MockCluster_ImportV2_Call) Return(_a0 error) *MockCluster_ImportV2_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockCluster_ImportV2_Call) RunAndReturn(run func(int64, *datapb.ImportRequest) error) *MockCluster_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// PreImport provides a mock function with given fields: nodeID, in
func (_m *MockCluster) PreImport(nodeID int64, in *datapb.PreImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.PreImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockCluster_PreImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'PreImport'
type MockCluster_PreImport_Call struct {
*mock.Call
}
// PreImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.PreImportRequest
func (_e *MockCluster_Expecter) PreImport(nodeID interface{}, in interface{}) *MockCluster_PreImport_Call {
return &MockCluster_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)}
}
func (_c *MockCluster_PreImport_Call) Run(run func(nodeID int64, in *datapb.PreImportRequest)) *MockCluster_PreImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.PreImportRequest))
})
return _c
}
func (_c *MockCluster_PreImport_Call) Return(_a0 error) *MockCluster_PreImport_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockCluster_PreImport_Call) RunAndReturn(run func(int64, *datapb.PreImportRequest) error) *MockCluster_PreImport_Call {
_c.Call.Return(run)
return _c
}
// QueryImport provides a mock function with given fields: nodeID, in
func (_m *MockCluster) QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) {
ret := _m.Called(nodeID, in)
var r0 *datapb.QueryImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)); ok {
return rf(nodeID, in)
}
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryImportRequest) *datapb.QueryImportResponse); ok {
r0 = rf(nodeID, in)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*datapb.QueryImportResponse)
}
}
if rf, ok := ret.Get(1).(func(int64, *datapb.QueryImportRequest) error); ok {
r1 = rf(nodeID, in)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockCluster_QueryImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'QueryImport'
type MockCluster_QueryImport_Call struct {
*mock.Call
}
// QueryImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.QueryImportRequest
func (_e *MockCluster_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockCluster_QueryImport_Call {
return &MockCluster_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)}
}
func (_c *MockCluster_QueryImport_Call) Run(run func(nodeID int64, in *datapb.QueryImportRequest)) *MockCluster_QueryImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.QueryImportRequest))
})
return _c
}
func (_c *MockCluster_QueryImport_Call) Return(_a0 *datapb.QueryImportResponse, _a1 error) *MockCluster_QueryImport_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockCluster_QueryImport_Call) RunAndReturn(run func(int64, *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)) *MockCluster_QueryImport_Call {
_c.Call.Return(run)
return _c
}
// QueryPreImport provides a mock function with given fields: nodeID, in
func (_m *MockCluster) QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) {
ret := _m.Called(nodeID, in)
var r0 *datapb.QueryPreImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)); ok {
return rf(nodeID, in)
}
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryPreImportRequest) *datapb.QueryPreImportResponse); ok {
r0 = rf(nodeID, in)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*datapb.QueryPreImportResponse)
}
}
if rf, ok := ret.Get(1).(func(int64, *datapb.QueryPreImportRequest) error); ok {
r1 = rf(nodeID, in)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockCluster_QueryPreImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'QueryPreImport'
type MockCluster_QueryPreImport_Call struct {
*mock.Call
}
// QueryPreImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.QueryPreImportRequest
func (_e *MockCluster_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockCluster_QueryPreImport_Call {
return &MockCluster_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)}
}
func (_c *MockCluster_QueryPreImport_Call) Run(run func(nodeID int64, in *datapb.QueryPreImportRequest)) *MockCluster_QueryPreImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.QueryPreImportRequest))
})
return _c
}
func (_c *MockCluster_QueryPreImport_Call) Return(_a0 *datapb.QueryPreImportResponse, _a1 error) *MockCluster_QueryPreImport_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockCluster_QueryPreImport_Call) RunAndReturn(run func(int64, *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)) *MockCluster_QueryPreImport_Call {
_c.Call.Return(run)
return _c
}
// Register provides a mock function with given fields: node
func (_m *MockCluster) Register(node *NodeInfo) error {
ret := _m.Called(node)

View File

@ -0,0 +1,478 @@
// Code generated by mockery v2.30.1. DO NOT EDIT.
package datacoord
import (
context "context"
mock "github.com/stretchr/testify/mock"
)
// MockManager is an autogenerated mock type for the Manager type
type MockManager struct {
mock.Mock
}
type MockManager_Expecter struct {
mock *mock.Mock
}
func (_m *MockManager) EXPECT() *MockManager_Expecter {
return &MockManager_Expecter{mock: &_m.Mock}
}
// AllocImportSegment provides a mock function with given fields: ctx, taskID, collectionID, partitionID, channelName
func (_m *MockManager) AllocImportSegment(ctx context.Context, taskID int64, collectionID int64, partitionID int64, channelName string) (*SegmentInfo, error) {
ret := _m.Called(ctx, taskID, collectionID, partitionID, channelName)
var r0 *SegmentInfo
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, int64, string) (*SegmentInfo, error)); ok {
return rf(ctx, taskID, collectionID, partitionID, channelName)
}
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, int64, string) *SegmentInfo); ok {
r0 = rf(ctx, taskID, collectionID, partitionID, channelName)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*SegmentInfo)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64, int64, int64, string) error); ok {
r1 = rf(ctx, taskID, collectionID, partitionID, channelName)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockManager_AllocImportSegment_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'AllocImportSegment'
type MockManager_AllocImportSegment_Call struct {
*mock.Call
}
// AllocImportSegment is a helper method to define mock.On call
// - ctx context.Context
// - taskID int64
// - collectionID int64
// - partitionID int64
// - channelName string
func (_e *MockManager_Expecter) AllocImportSegment(ctx interface{}, taskID interface{}, collectionID interface{}, partitionID interface{}, channelName interface{}) *MockManager_AllocImportSegment_Call {
return &MockManager_AllocImportSegment_Call{Call: _e.mock.On("AllocImportSegment", ctx, taskID, collectionID, partitionID, channelName)}
}
func (_c *MockManager_AllocImportSegment_Call) Run(run func(ctx context.Context, taskID int64, collectionID int64, partitionID int64, channelName string)) *MockManager_AllocImportSegment_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64), args[2].(int64), args[3].(int64), args[4].(string))
})
return _c
}
func (_c *MockManager_AllocImportSegment_Call) Return(_a0 *SegmentInfo, _a1 error) *MockManager_AllocImportSegment_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockManager_AllocImportSegment_Call) RunAndReturn(run func(context.Context, int64, int64, int64, string) (*SegmentInfo, error)) *MockManager_AllocImportSegment_Call {
_c.Call.Return(run)
return _c
}
// AllocSegment provides a mock function with given fields: ctx, collectionID, partitionID, channelName, requestRows
func (_m *MockManager) AllocSegment(ctx context.Context, collectionID int64, partitionID int64, channelName string, requestRows int64) ([]*Allocation, error) {
ret := _m.Called(ctx, collectionID, partitionID, channelName, requestRows)
var r0 []*Allocation
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, string, int64) ([]*Allocation, error)); ok {
return rf(ctx, collectionID, partitionID, channelName, requestRows)
}
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, string, int64) []*Allocation); ok {
r0 = rf(ctx, collectionID, partitionID, channelName, requestRows)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*Allocation)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64, int64, string, int64) error); ok {
r1 = rf(ctx, collectionID, partitionID, channelName, requestRows)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockManager_AllocSegment_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'AllocSegment'
type MockManager_AllocSegment_Call struct {
*mock.Call
}
// AllocSegment is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
// - partitionID int64
// - channelName string
// - requestRows int64
func (_e *MockManager_Expecter) AllocSegment(ctx interface{}, collectionID interface{}, partitionID interface{}, channelName interface{}, requestRows interface{}) *MockManager_AllocSegment_Call {
return &MockManager_AllocSegment_Call{Call: _e.mock.On("AllocSegment", ctx, collectionID, partitionID, channelName, requestRows)}
}
func (_c *MockManager_AllocSegment_Call) Run(run func(ctx context.Context, collectionID int64, partitionID int64, channelName string, requestRows int64)) *MockManager_AllocSegment_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64), args[2].(int64), args[3].(string), args[4].(int64))
})
return _c
}
func (_c *MockManager_AllocSegment_Call) Return(_a0 []*Allocation, _a1 error) *MockManager_AllocSegment_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockManager_AllocSegment_Call) RunAndReturn(run func(context.Context, int64, int64, string, int64) ([]*Allocation, error)) *MockManager_AllocSegment_Call {
_c.Call.Return(run)
return _c
}
// DropSegment provides a mock function with given fields: ctx, segmentID
func (_m *MockManager) DropSegment(ctx context.Context, segmentID int64) {
_m.Called(ctx, segmentID)
}
// MockManager_DropSegment_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropSegment'
type MockManager_DropSegment_Call struct {
*mock.Call
}
// DropSegment is a helper method to define mock.On call
// - ctx context.Context
// - segmentID int64
func (_e *MockManager_Expecter) DropSegment(ctx interface{}, segmentID interface{}) *MockManager_DropSegment_Call {
return &MockManager_DropSegment_Call{Call: _e.mock.On("DropSegment", ctx, segmentID)}
}
func (_c *MockManager_DropSegment_Call) Run(run func(ctx context.Context, segmentID int64)) *MockManager_DropSegment_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64))
})
return _c
}
func (_c *MockManager_DropSegment_Call) Return() *MockManager_DropSegment_Call {
_c.Call.Return()
return _c
}
func (_c *MockManager_DropSegment_Call) RunAndReturn(run func(context.Context, int64)) *MockManager_DropSegment_Call {
_c.Call.Return(run)
return _c
}
// DropSegmentsOfChannel provides a mock function with given fields: ctx, channel
func (_m *MockManager) DropSegmentsOfChannel(ctx context.Context, channel string) {
_m.Called(ctx, channel)
}
// MockManager_DropSegmentsOfChannel_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropSegmentsOfChannel'
type MockManager_DropSegmentsOfChannel_Call struct {
*mock.Call
}
// DropSegmentsOfChannel is a helper method to define mock.On call
// - ctx context.Context
// - channel string
func (_e *MockManager_Expecter) DropSegmentsOfChannel(ctx interface{}, channel interface{}) *MockManager_DropSegmentsOfChannel_Call {
return &MockManager_DropSegmentsOfChannel_Call{Call: _e.mock.On("DropSegmentsOfChannel", ctx, channel)}
}
func (_c *MockManager_DropSegmentsOfChannel_Call) Run(run func(ctx context.Context, channel string)) *MockManager_DropSegmentsOfChannel_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(string))
})
return _c
}
func (_c *MockManager_DropSegmentsOfChannel_Call) Return() *MockManager_DropSegmentsOfChannel_Call {
_c.Call.Return()
return _c
}
func (_c *MockManager_DropSegmentsOfChannel_Call) RunAndReturn(run func(context.Context, string)) *MockManager_DropSegmentsOfChannel_Call {
_c.Call.Return(run)
return _c
}
// ExpireAllocations provides a mock function with given fields: channel, ts
func (_m *MockManager) ExpireAllocations(channel string, ts uint64) error {
ret := _m.Called(channel, ts)
var r0 error
if rf, ok := ret.Get(0).(func(string, uint64) error); ok {
r0 = rf(channel, ts)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockManager_ExpireAllocations_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ExpireAllocations'
type MockManager_ExpireAllocations_Call struct {
*mock.Call
}
// ExpireAllocations is a helper method to define mock.On call
// - channel string
// - ts uint64
func (_e *MockManager_Expecter) ExpireAllocations(channel interface{}, ts interface{}) *MockManager_ExpireAllocations_Call {
return &MockManager_ExpireAllocations_Call{Call: _e.mock.On("ExpireAllocations", channel, ts)}
}
func (_c *MockManager_ExpireAllocations_Call) Run(run func(channel string, ts uint64)) *MockManager_ExpireAllocations_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(string), args[1].(uint64))
})
return _c
}
func (_c *MockManager_ExpireAllocations_Call) Return(_a0 error) *MockManager_ExpireAllocations_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockManager_ExpireAllocations_Call) RunAndReturn(run func(string, uint64) error) *MockManager_ExpireAllocations_Call {
_c.Call.Return(run)
return _c
}
// FlushImportSegments provides a mock function with given fields: ctx, collectionID, segmentIDs
func (_m *MockManager) FlushImportSegments(ctx context.Context, collectionID int64, segmentIDs []int64) error {
ret := _m.Called(ctx, collectionID, segmentIDs)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, int64, []int64) error); ok {
r0 = rf(ctx, collectionID, segmentIDs)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockManager_FlushImportSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'FlushImportSegments'
type MockManager_FlushImportSegments_Call struct {
*mock.Call
}
// FlushImportSegments is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
// - segmentIDs []int64
func (_e *MockManager_Expecter) FlushImportSegments(ctx interface{}, collectionID interface{}, segmentIDs interface{}) *MockManager_FlushImportSegments_Call {
return &MockManager_FlushImportSegments_Call{Call: _e.mock.On("FlushImportSegments", ctx, collectionID, segmentIDs)}
}
func (_c *MockManager_FlushImportSegments_Call) Run(run func(ctx context.Context, collectionID int64, segmentIDs []int64)) *MockManager_FlushImportSegments_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64), args[2].([]int64))
})
return _c
}
func (_c *MockManager_FlushImportSegments_Call) Return(_a0 error) *MockManager_FlushImportSegments_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockManager_FlushImportSegments_Call) RunAndReturn(run func(context.Context, int64, []int64) error) *MockManager_FlushImportSegments_Call {
_c.Call.Return(run)
return _c
}
// GetFlushableSegments provides a mock function with given fields: ctx, channel, ts
func (_m *MockManager) GetFlushableSegments(ctx context.Context, channel string, ts uint64) ([]int64, error) {
ret := _m.Called(ctx, channel, ts)
var r0 []int64
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, string, uint64) ([]int64, error)); ok {
return rf(ctx, channel, ts)
}
if rf, ok := ret.Get(0).(func(context.Context, string, uint64) []int64); ok {
r0 = rf(ctx, channel, ts)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]int64)
}
}
if rf, ok := ret.Get(1).(func(context.Context, string, uint64) error); ok {
r1 = rf(ctx, channel, ts)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockManager_GetFlushableSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetFlushableSegments'
type MockManager_GetFlushableSegments_Call struct {
*mock.Call
}
// GetFlushableSegments is a helper method to define mock.On call
// - ctx context.Context
// - channel string
// - ts uint64
func (_e *MockManager_Expecter) GetFlushableSegments(ctx interface{}, channel interface{}, ts interface{}) *MockManager_GetFlushableSegments_Call {
return &MockManager_GetFlushableSegments_Call{Call: _e.mock.On("GetFlushableSegments", ctx, channel, ts)}
}
func (_c *MockManager_GetFlushableSegments_Call) Run(run func(ctx context.Context, channel string, ts uint64)) *MockManager_GetFlushableSegments_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(string), args[2].(uint64))
})
return _c
}
func (_c *MockManager_GetFlushableSegments_Call) Return(_a0 []int64, _a1 error) *MockManager_GetFlushableSegments_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockManager_GetFlushableSegments_Call) RunAndReturn(run func(context.Context, string, uint64) ([]int64, error)) *MockManager_GetFlushableSegments_Call {
_c.Call.Return(run)
return _c
}
// SealAllSegments provides a mock function with given fields: ctx, collectionID, segIDs
func (_m *MockManager) SealAllSegments(ctx context.Context, collectionID int64, segIDs []int64) ([]int64, error) {
ret := _m.Called(ctx, collectionID, segIDs)
var r0 []int64
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64, []int64) ([]int64, error)); ok {
return rf(ctx, collectionID, segIDs)
}
if rf, ok := ret.Get(0).(func(context.Context, int64, []int64) []int64); ok {
r0 = rf(ctx, collectionID, segIDs)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]int64)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64, []int64) error); ok {
r1 = rf(ctx, collectionID, segIDs)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockManager_SealAllSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SealAllSegments'
type MockManager_SealAllSegments_Call struct {
*mock.Call
}
// SealAllSegments is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
// - segIDs []int64
func (_e *MockManager_Expecter) SealAllSegments(ctx interface{}, collectionID interface{}, segIDs interface{}) *MockManager_SealAllSegments_Call {
return &MockManager_SealAllSegments_Call{Call: _e.mock.On("SealAllSegments", ctx, collectionID, segIDs)}
}
func (_c *MockManager_SealAllSegments_Call) Run(run func(ctx context.Context, collectionID int64, segIDs []int64)) *MockManager_SealAllSegments_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64), args[2].([]int64))
})
return _c
}
func (_c *MockManager_SealAllSegments_Call) Return(_a0 []int64, _a1 error) *MockManager_SealAllSegments_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockManager_SealAllSegments_Call) RunAndReturn(run func(context.Context, int64, []int64) ([]int64, error)) *MockManager_SealAllSegments_Call {
_c.Call.Return(run)
return _c
}
// allocSegmentForImport provides a mock function with given fields: ctx, collectionID, partitionID, channelName, requestRows, taskID
func (_m *MockManager) allocSegmentForImport(ctx context.Context, collectionID int64, partitionID int64, channelName string, requestRows int64, taskID int64) (*Allocation, error) {
ret := _m.Called(ctx, collectionID, partitionID, channelName, requestRows, taskID)
var r0 *Allocation
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, string, int64, int64) (*Allocation, error)); ok {
return rf(ctx, collectionID, partitionID, channelName, requestRows, taskID)
}
if rf, ok := ret.Get(0).(func(context.Context, int64, int64, string, int64, int64) *Allocation); ok {
r0 = rf(ctx, collectionID, partitionID, channelName, requestRows, taskID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*Allocation)
}
}
if rf, ok := ret.Get(1).(func(context.Context, int64, int64, string, int64, int64) error); ok {
r1 = rf(ctx, collectionID, partitionID, channelName, requestRows, taskID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockManager_allocSegmentForImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'allocSegmentForImport'
type MockManager_allocSegmentForImport_Call struct {
*mock.Call
}
// allocSegmentForImport is a helper method to define mock.On call
// - ctx context.Context
// - collectionID int64
// - partitionID int64
// - channelName string
// - requestRows int64
// - taskID int64
func (_e *MockManager_Expecter) allocSegmentForImport(ctx interface{}, collectionID interface{}, partitionID interface{}, channelName interface{}, requestRows interface{}, taskID interface{}) *MockManager_allocSegmentForImport_Call {
return &MockManager_allocSegmentForImport_Call{Call: _e.mock.On("allocSegmentForImport", ctx, collectionID, partitionID, channelName, requestRows, taskID)}
}
func (_c *MockManager_allocSegmentForImport_Call) Run(run func(ctx context.Context, collectionID int64, partitionID int64, channelName string, requestRows int64, taskID int64)) *MockManager_allocSegmentForImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(int64), args[2].(int64), args[3].(string), args[4].(int64), args[5].(int64))
})
return _c
}
func (_c *MockManager_allocSegmentForImport_Call) Return(_a0 *Allocation, _a1 error) *MockManager_allocSegmentForImport_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockManager_allocSegmentForImport_Call) RunAndReturn(run func(context.Context, int64, int64, string, int64, int64) (*Allocation, error)) *MockManager_allocSegmentForImport_Call {
_c.Call.Return(run)
return _c
}
// NewMockManager creates a new instance of MockManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewMockManager(t interface {
mock.TestingT
Cleanup(func())
}) *MockManager {
mock := &MockManager{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -320,6 +320,49 @@ func (_c *MockSessionManager_DeleteSession_Call) RunAndReturn(run func(*NodeInfo
return _c
}
// DropImport provides a mock function with given fields: nodeID, in
func (_m *MockSessionManager) DropImport(nodeID int64, in *datapb.DropImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.DropImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockSessionManager_DropImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropImport'
type MockSessionManager_DropImport_Call struct {
*mock.Call
}
// DropImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.DropImportRequest
func (_e *MockSessionManager_Expecter) DropImport(nodeID interface{}, in interface{}) *MockSessionManager_DropImport_Call {
return &MockSessionManager_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)}
}
func (_c *MockSessionManager_DropImport_Call) Run(run func(nodeID int64, in *datapb.DropImportRequest)) *MockSessionManager_DropImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.DropImportRequest))
})
return _c
}
func (_c *MockSessionManager_DropImport_Call) Return(_a0 error) *MockSessionManager_DropImport_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockSessionManager_DropImport_Call) RunAndReturn(run func(int64, *datapb.DropImportRequest) error) *MockSessionManager_DropImport_Call {
_c.Call.Return(run)
return _c
}
// Flush provides a mock function with given fields: ctx, nodeID, req
func (_m *MockSessionManager) Flush(ctx context.Context, nodeID int64, req *datapb.FlushSegmentsRequest) {
_m.Called(ctx, nodeID, req)
@ -563,6 +606,49 @@ func (_c *MockSessionManager_Import_Call) RunAndReturn(run func(context.Context,
return _c
}
// ImportV2 provides a mock function with given fields: nodeID, in
func (_m *MockSessionManager) ImportV2(nodeID int64, in *datapb.ImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.ImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockSessionManager_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockSessionManager_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.ImportRequest
func (_e *MockSessionManager_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockSessionManager_ImportV2_Call {
return &MockSessionManager_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)}
}
func (_c *MockSessionManager_ImportV2_Call) Run(run func(nodeID int64, in *datapb.ImportRequest)) *MockSessionManager_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.ImportRequest))
})
return _c
}
func (_c *MockSessionManager_ImportV2_Call) Return(_a0 error) *MockSessionManager_ImportV2_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockSessionManager_ImportV2_Call) RunAndReturn(run func(int64, *datapb.ImportRequest) error) *MockSessionManager_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// NotifyChannelOperation provides a mock function with given fields: ctx, nodeID, req
func (_m *MockSessionManager) NotifyChannelOperation(ctx context.Context, nodeID int64, req *datapb.ChannelOperationsRequest) error {
ret := _m.Called(ctx, nodeID, req)
@ -607,6 +693,159 @@ func (_c *MockSessionManager_NotifyChannelOperation_Call) RunAndReturn(run func(
return _c
}
// PreImport provides a mock function with given fields: nodeID, in
func (_m *MockSessionManager) PreImport(nodeID int64, in *datapb.PreImportRequest) error {
ret := _m.Called(nodeID, in)
var r0 error
if rf, ok := ret.Get(0).(func(int64, *datapb.PreImportRequest) error); ok {
r0 = rf(nodeID, in)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockSessionManager_PreImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'PreImport'
type MockSessionManager_PreImport_Call struct {
*mock.Call
}
// PreImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.PreImportRequest
func (_e *MockSessionManager_Expecter) PreImport(nodeID interface{}, in interface{}) *MockSessionManager_PreImport_Call {
return &MockSessionManager_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)}
}
func (_c *MockSessionManager_PreImport_Call) Run(run func(nodeID int64, in *datapb.PreImportRequest)) *MockSessionManager_PreImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.PreImportRequest))
})
return _c
}
func (_c *MockSessionManager_PreImport_Call) Return(_a0 error) *MockSessionManager_PreImport_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *MockSessionManager_PreImport_Call) RunAndReturn(run func(int64, *datapb.PreImportRequest) error) *MockSessionManager_PreImport_Call {
_c.Call.Return(run)
return _c
}
// QueryImport provides a mock function with given fields: nodeID, in
func (_m *MockSessionManager) QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) {
ret := _m.Called(nodeID, in)
var r0 *datapb.QueryImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)); ok {
return rf(nodeID, in)
}
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryImportRequest) *datapb.QueryImportResponse); ok {
r0 = rf(nodeID, in)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*datapb.QueryImportResponse)
}
}
if rf, ok := ret.Get(1).(func(int64, *datapb.QueryImportRequest) error); ok {
r1 = rf(nodeID, in)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockSessionManager_QueryImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'QueryImport'
type MockSessionManager_QueryImport_Call struct {
*mock.Call
}
// QueryImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.QueryImportRequest
func (_e *MockSessionManager_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryImport_Call {
return &MockSessionManager_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)}
}
func (_c *MockSessionManager_QueryImport_Call) Run(run func(nodeID int64, in *datapb.QueryImportRequest)) *MockSessionManager_QueryImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.QueryImportRequest))
})
return _c
}
func (_c *MockSessionManager_QueryImport_Call) Return(_a0 *datapb.QueryImportResponse, _a1 error) *MockSessionManager_QueryImport_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockSessionManager_QueryImport_Call) RunAndReturn(run func(int64, *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)) *MockSessionManager_QueryImport_Call {
_c.Call.Return(run)
return _c
}
// QueryPreImport provides a mock function with given fields: nodeID, in
func (_m *MockSessionManager) QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) {
ret := _m.Called(nodeID, in)
var r0 *datapb.QueryPreImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)); ok {
return rf(nodeID, in)
}
if rf, ok := ret.Get(0).(func(int64, *datapb.QueryPreImportRequest) *datapb.QueryPreImportResponse); ok {
r0 = rf(nodeID, in)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*datapb.QueryPreImportResponse)
}
}
if rf, ok := ret.Get(1).(func(int64, *datapb.QueryPreImportRequest) error); ok {
r1 = rf(nodeID, in)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockSessionManager_QueryPreImport_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'QueryPreImport'
type MockSessionManager_QueryPreImport_Call struct {
*mock.Call
}
// QueryPreImport is a helper method to define mock.On call
// - nodeID int64
// - in *datapb.QueryPreImportRequest
func (_e *MockSessionManager_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryPreImport_Call {
return &MockSessionManager_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)}
}
func (_c *MockSessionManager_QueryPreImport_Call) Run(run func(nodeID int64, in *datapb.QueryPreImportRequest)) *MockSessionManager_QueryPreImport_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64), args[1].(*datapb.QueryPreImportRequest))
})
return _c
}
func (_c *MockSessionManager_QueryPreImport_Call) Return(_a0 *datapb.QueryPreImportResponse, _a1 error) *MockSessionManager_QueryPreImport_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockSessionManager_QueryPreImport_Call) RunAndReturn(run func(int64, *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)) *MockSessionManager_QueryPreImport_Call {
_c.Call.Return(run)
return _c
}
// SyncSegments provides a mock function with given fields: nodeID, req
func (_m *MockSessionManager) SyncSegments(nodeID int64, req *datapb.SyncSegmentsRequest) error {
ret := _m.Called(nodeID, req)

View File

@ -104,6 +104,11 @@ func (m *MockAllocator) allocID(ctx context.Context) (UniqueID, error) {
return val, nil
}
func (m *MockAllocator) allocN(n int64) (UniqueID, UniqueID, error) {
val := atomic.AddInt64(&m.cnt, n)
return val, val + n, nil
}
type MockAllocator0 struct{}
func (m *MockAllocator0) allocTimestamp(ctx context.Context) (Timestamp, error) {
@ -114,6 +119,10 @@ func (m *MockAllocator0) allocID(ctx context.Context) (UniqueID, error) {
return 0, nil
}
func (m *MockAllocator0) allocN(n int64) (UniqueID, UniqueID, error) {
return 0, n, nil
}
var _ allocator = (*FailsAllocator)(nil)
// FailsAllocator allocator that fails
@ -136,6 +145,13 @@ func (a *FailsAllocator) allocID(_ context.Context) (UniqueID, error) {
return 0, errors.New("always fail")
}
func (a *FailsAllocator) allocN(_ int64) (UniqueID, UniqueID, error) {
if a.allocIDSucceed {
return 0, 0, nil
}
return 0, 0, errors.New("always fail")
}
func newMockAllocator() *MockAllocator {
return &MockAllocator{}
}

View File

@ -19,6 +19,7 @@ package datacoord
import (
"context"
"fmt"
"math"
"sync"
"time"
@ -28,6 +29,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/retry"
@ -66,6 +68,8 @@ func putAllocation(a *Allocation) {
}
// Manager manages segment related operations.
//
//go:generate mockery --name=Manager --structname=MockManager --output=./ --filename=mock_segment_manager.go --with-expecter --inpackage
type Manager interface {
// CreateSegment create new segment when segment not exist
@ -74,6 +78,7 @@ type Manager interface {
// allocSegmentForImport allocates one segment allocation for bulk insert.
// TODO: Remove this method and AllocSegment() above instead.
allocSegmentForImport(ctx context.Context, collectionID, partitionID UniqueID, channelName string, requestRows int64, taskID int64) (*Allocation, error)
AllocImportSegment(ctx context.Context, taskID int64, collectionID UniqueID, partitionID UniqueID, channelName string) (*SegmentInfo, error)
// DropSegment drops the segment from manager.
DropSegment(ctx context.Context, segmentID UniqueID)
// FlushImportSegments set importing segment state to Flushed.
@ -381,6 +386,56 @@ func (s *SegmentManager) genExpireTs(ctx context.Context, isImported bool) (Time
return expireTs, nil
}
func (s *SegmentManager) AllocImportSegment(ctx context.Context, taskID int64, collectionID UniqueID,
partitionID UniqueID, channelName string,
) (*SegmentInfo, error) {
log := log.Ctx(ctx)
ctx, sp := otel.Tracer(typeutil.DataCoordRole).Start(ctx, "open-Segment")
defer sp.End()
id, err := s.allocator.allocID(ctx)
if err != nil {
log.Error("failed to open new segment while allocID", zap.Error(err))
return nil, err
}
ts, err := s.allocator.allocTimestamp(ctx)
if err != nil {
return nil, err
}
position := &msgpb.MsgPosition{
ChannelName: channelName,
MsgID: nil,
Timestamp: ts,
}
segmentInfo := &datapb.SegmentInfo{
ID: id,
CollectionID: collectionID,
PartitionID: partitionID,
InsertChannel: channelName,
NumOfRows: 0,
State: commonpb.SegmentState_Flushed,
MaxRowNum: 0,
Level: datapb.SegmentLevel_L1,
LastExpireTime: math.MaxUint64,
StartPosition: position,
DmlPosition: position,
}
segmentInfo.IsImporting = true
segment := NewSegmentInfo(segmentInfo)
if err := s.meta.AddSegment(ctx, segment); err != nil {
log.Error("failed to add import segment", zap.Error(err))
return nil, err
}
s.segments = append(s.segments, id)
log.Info("add import segment done",
zap.Int64("taskID", taskID),
zap.Int64("CollectionID", segmentInfo.CollectionID),
zap.Int64("SegmentID", segmentInfo.ID),
zap.String("Channel", segmentInfo.InsertChannel))
return segment, nil
}
func (s *SegmentManager) openNewSegment(ctx context.Context, collectionID UniqueID, partitionID UniqueID,
channelName string, segmentState commonpb.SegmentState, level datapb.SegmentLevel,
) (*SegmentInfo, error) {

View File

@ -32,6 +32,7 @@ import (
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
mockkv "github.com/milvus-io/milvus/internal/kv/mocks"
"github.com/milvus-io/milvus/internal/metastore/kv/datacoord"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/pkg/util/etcd"
"github.com/milvus-io/milvus/pkg/util/metautil"
@ -149,6 +150,10 @@ func TestLastExpireReset(t *testing.T) {
paramtable.Init()
Params.Save(Params.DataCoordCfg.AllocLatestExpireAttempt.Key, "1")
Params.Save(Params.DataCoordCfg.SegmentMaxSize.Key, "1")
defer func() {
Params.Save(Params.DataCoordCfg.AllocLatestExpireAttempt.Key, "200")
Params.Save(Params.DataCoordCfg.SegmentMaxSize.Key, "1024")
}()
mockAllocator := newRootCoordAllocator(newMockRootCoordClient())
etcdCli, _ := etcd.GetEtcdClient(
Params.EtcdCfg.UseEmbedEtcd.GetAsBool(),
@ -279,6 +284,64 @@ func TestAllocSegmentForImport(t *testing.T) {
})
}
func TestSegmentManager_AllocImportSegment(t *testing.T) {
ctx := context.Background()
mockErr := errors.New("mock error")
t.Run("normal case", func(t *testing.T) {
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocID(mock.Anything).Return(0, nil)
alloc.EXPECT().allocTimestamp(mock.Anything).Return(0, nil)
meta, err := newMemoryMeta()
assert.NoError(t, err)
sm, err := newSegmentManager(meta, alloc)
assert.NoError(t, err)
segment, err := sm.AllocImportSegment(ctx, 0, 1, 1, "ch1")
assert.NoError(t, err)
segment2 := meta.GetSegment(segment.GetID())
assert.NotNil(t, segment2)
assert.Equal(t, true, segment2.GetIsImporting())
})
t.Run("alloc id failed", func(t *testing.T) {
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocID(mock.Anything).Return(0, mockErr)
meta, err := newMemoryMeta()
assert.NoError(t, err)
sm, err := newSegmentManager(meta, alloc)
assert.NoError(t, err)
_, err = sm.AllocImportSegment(ctx, 0, 1, 1, "ch1")
assert.Error(t, err)
})
t.Run("alloc ts failed", func(t *testing.T) {
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocID(mock.Anything).Return(0, nil)
alloc.EXPECT().allocTimestamp(mock.Anything).Return(0, mockErr)
meta, err := newMemoryMeta()
assert.NoError(t, err)
sm, err := newSegmentManager(meta, alloc)
assert.NoError(t, err)
_, err = sm.AllocImportSegment(ctx, 0, 1, 1, "ch1")
assert.Error(t, err)
})
t.Run("add segment failed", func(t *testing.T) {
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocID(mock.Anything).Return(0, nil)
alloc.EXPECT().allocTimestamp(mock.Anything).Return(0, nil)
meta, err := newMemoryMeta()
assert.NoError(t, err)
sm, _ := newSegmentManager(meta, alloc)
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().AddSegment(mock.Anything, mock.Anything).Return(mockErr)
meta.catalog = catalog
_, err = sm.AllocImportSegment(ctx, 0, 1, 1, "ch1")
assert.Error(t, err)
})
}
func TestLoadSegmentsFromMeta(t *testing.T) {
ctx := context.Background()
paramtable.Init()

View File

@ -122,6 +122,9 @@ type Server struct {
garbageCollector *garbageCollector
gcOpt GcOption
handler Handler
importMeta ImportMeta
importScheduler ImportScheduler
importChecker ImportChecker
compactionTrigger trigger
compactionHandler compactionPlanContext
@ -372,6 +375,13 @@ func (s *Server) initDataCoord() error {
s.initGarbageCollection(storageCli)
s.initIndexBuilder(storageCli)
s.importMeta, err = NewImportMeta(s.meta.catalog)
if err != nil {
return err
}
s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta)
s.importChecker = NewImportChecker(s.meta, s.broker, s.cluster, s.allocator, s.segmentManager, s.importMeta, s.buildIndexCh)
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
log.Info("init datacoord done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", s.address))
@ -654,6 +664,8 @@ func (s *Server) startServerLoop() {
s.startWatchService(s.serverLoopCtx)
s.startFlushLoop(s.serverLoopCtx)
s.startIndexService(s.serverLoopCtx)
go s.importScheduler.Start()
go s.importChecker.Start()
s.garbageCollector.start()
}
@ -1097,6 +1109,11 @@ func (s *Server) Stop() error {
s.garbageCollector.close()
logutil.Logger(s.ctx).Info("datacoord garbage collector stopped")
s.stopServerLoop()
s.importScheduler.Close()
s.importChecker.Close()
if Params.DataCoordCfg.EnableCompaction.GetAsBool() {
s.stopCompactionTrigger()
s.stopCompactionHandler()

View File

@ -1004,6 +1004,10 @@ func (s *spySegmentManager) allocSegmentForImport(ctx context.Context, collectio
panic("not implemented") // TODO: Implement
}
func (s *spySegmentManager) AllocImportSegment(ctx context.Context, taskID int64, collectionID UniqueID, partitionID UniqueID, channelName string) (*SegmentInfo, error) {
panic("not implemented")
}
// DropSegment drops the segment from manager.
func (s *spySegmentManager) DropSegment(ctx context.Context, segmentID UniqueID) {
}

View File

@ -19,6 +19,7 @@ package datacoord
import (
"context"
"fmt"
"math"
"math/rand"
"strconv"
"time"
@ -35,11 +36,13 @@ import (
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/importutilv2"
"github.com/milvus-io/milvus/internal/util/segmentutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
@ -1743,3 +1746,135 @@ func (s *Server) GcControl(ctx context.Context, request *datapb.GcControlRequest
return status, nil
}
func (s *Server) ImportV2(ctx context.Context, in *internalpb.ImportRequestInternal) (*internalpb.ImportResponse, error) {
if err := merr.CheckHealthy(s.GetStateCode()); err != nil {
return &internalpb.ImportResponse{
Status: merr.Status(err),
}, nil
}
resp := &internalpb.ImportResponse{
Status: merr.Success(),
}
log := log.With(zap.Int64("collection", in.GetCollectionID()),
zap.Int64s("partitions", in.GetPartitionIDs()),
zap.Strings("channels", in.GetChannelNames()),
zap.Any("files", in.GetFiles()))
log.Info("receive import request")
var timeoutTs uint64 = math.MaxUint64
timeoutStr, err := funcutil.GetAttrByKeyFromRepeatedKV("timeout", in.GetOptions())
if err == nil {
dur, err := time.ParseDuration(timeoutStr)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("parse import timeout failed, err=%w", err)))
return resp, nil
}
ts, err := s.allocator.allocTimestamp(ctx)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("alloc ts failed, err=%w", err)))
return resp, nil
}
timeoutTs = tsoutil.AddPhysicalDurationOnTs(ts, dur)
}
files := in.GetFiles()
isBackup := importutilv2.IsBackup(in.GetOptions())
if isBackup {
files = make([]*internalpb.ImportFile, 0)
for _, importFile := range in.GetFiles() {
segmentPrefixes, err := ListBinlogsAndGroupBySegment(ctx, s.meta.chunkManager, importFile)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("list binlogs and group by segment failed, err=%w", err)))
return resp, nil
}
files = append(files, segmentPrefixes...)
}
}
idStart, _, err := s.allocator.allocN(int64(len(files)) + 1)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("alloc id failed, err=%w", err)))
return resp, nil
}
files = lo.Map(files, func(importFile *internalpb.ImportFile, i int) *internalpb.ImportFile {
importFile.Id = idStart + int64(i) + 1
return importFile
})
job := &importJob{
ImportJob: &datapb.ImportJob{
JobID: idStart,
CollectionID: in.GetCollectionID(),
PartitionIDs: in.GetPartitionIDs(),
Vchannels: in.GetChannelNames(),
Schema: in.GetSchema(),
TimeoutTs: timeoutTs,
CleanupTs: math.MaxUint64,
State: internalpb.ImportJobState_Pending,
Files: files,
Options: in.GetOptions(),
},
}
err = s.importMeta.AddJob(job)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("add import job failed, err=%w", err)))
return resp, nil
}
resp.JobID = fmt.Sprint(job.GetJobID())
log.Info("add import job done", zap.Int64("jobID", job.GetJobID()))
return resp, nil
}
func (s *Server) GetImportProgress(ctx context.Context, in *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
log := log.With(zap.String("jobID", in.GetJobID()))
if err := merr.CheckHealthy(s.GetStateCode()); err != nil {
return &internalpb.GetImportProgressResponse{
Status: merr.Status(err),
}, nil
}
resp := &internalpb.GetImportProgressResponse{
Status: merr.Success(),
}
jobID, err := strconv.ParseInt(in.GetJobID(), 10, 64)
if err != nil {
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("parse job id failed, err=%w", err)))
return resp, nil
}
progress, state, reason := GetImportProgress(jobID, s.importMeta, s.meta)
resp.State = state
resp.Reason = reason
resp.Progress = progress
log.Info("GetImportProgress done", zap.Any("resp", resp))
return resp, nil
}
func (s *Server) ListImports(ctx context.Context, req *internalpb.ListImportsRequestInternal) (*internalpb.ListImportsResponse, error) {
if err := merr.CheckHealthy(s.GetStateCode()); err != nil {
return &internalpb.ListImportsResponse{
Status: merr.Status(err),
}, nil
}
resp := &internalpb.ListImportsResponse{
Status: merr.Success(),
JobIDs: make([]string, 0),
States: make([]internalpb.ImportJobState, 0),
Reasons: make([]string, 0),
Progresses: make([]int64, 0),
}
jobs := s.importMeta.GetJobBy(WithCollectionID(req.GetCollectionID()))
for _, job := range jobs {
progress, state, reason := GetImportProgress(job.GetJobID(), s.importMeta, s.meta)
resp.JobIDs = append(resp.JobIDs, fmt.Sprintf("%d", job.GetJobID()))
resp.States = append(resp.States, state)
resp.Reasons = append(resp.Reasons, reason)
resp.Progresses = append(resp.Progresses, progress)
}
return resp, nil
}

View File

@ -18,10 +18,13 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/metastore/model"
"github.com/milvus-io/milvus/internal/mocks"
mocks2 "github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
@ -1339,6 +1342,193 @@ func TestGetRecoveryInfoV2(t *testing.T) {
})
}
func TestImportV2(t *testing.T) {
ctx := context.Background()
mockErr := errors.New("mock err")
t.Run("ImportV2", func(t *testing.T) {
// server not healthy
s := &Server{}
s.stateCode.Store(commonpb.StateCode_Initializing)
resp, err := s.ImportV2(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), resp.GetStatus().GetCode())
s.stateCode.Store(commonpb.StateCode_Healthy)
// parse timeout failed
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{
Options: []*commonpb.KeyValuePair{
{
Key: "timeout",
Value: "@$#$%#%$",
},
},
})
assert.NoError(t, err)
assert.True(t, errors.Is(merr.Error(resp.GetStatus()), merr.ErrImportFailed))
// list binlog failed
cm := mocks2.NewChunkManager(t)
cm.EXPECT().ListWithPrefix(mock.Anything, mock.Anything, mock.Anything).Return(nil, nil, mockErr)
s.meta = &meta{chunkManager: cm}
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{
Files: []*internalpb.ImportFile{
{
Id: 1,
Paths: []string{"mock_insert_prefix"},
},
},
Options: []*commonpb.KeyValuePair{
{
Key: "backup",
Value: "true",
},
},
})
assert.NoError(t, err)
assert.True(t, errors.Is(merr.Error(resp.GetStatus()), merr.ErrImportFailed))
// alloc failed
alloc := NewNMockAllocator(t)
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, mockErr)
s.allocator = alloc
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{})
assert.NoError(t, err)
assert.True(t, errors.Is(merr.Error(resp.GetStatus()), merr.ErrImportFailed))
alloc = NewNMockAllocator(t)
alloc.EXPECT().allocN(mock.Anything).Return(0, 0, nil)
s.allocator = alloc
// add job failed
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(mockErr)
s.importMeta, err = NewImportMeta(catalog)
assert.NoError(t, err)
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{
Files: []*internalpb.ImportFile{
{
Id: 1,
Paths: []string{"a.json"},
},
},
})
assert.NoError(t, err)
assert.True(t, errors.Is(merr.Error(resp.GetStatus()), merr.ErrImportFailed))
jobs := s.importMeta.GetJobBy()
assert.Equal(t, 0, len(jobs))
catalog.ExpectedCalls = lo.Filter(catalog.ExpectedCalls, func(call *mock.Call, _ int) bool {
return call.Method != "SaveImportJob"
})
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
// normal case
resp, err = s.ImportV2(ctx, &internalpb.ImportRequestInternal{
Files: []*internalpb.ImportFile{
{
Id: 1,
Paths: []string{"a.json"},
},
},
})
assert.NoError(t, err)
assert.Equal(t, int32(0), resp.GetStatus().GetCode())
jobs = s.importMeta.GetJobBy()
assert.Equal(t, 1, len(jobs))
})
t.Run("GetImportProgress", func(t *testing.T) {
// server not healthy
s := &Server{}
s.stateCode.Store(commonpb.StateCode_Initializing)
resp, err := s.GetImportProgress(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), resp.GetStatus().GetCode())
s.stateCode.Store(commonpb.StateCode_Healthy)
// illegal jobID
resp, err = s.GetImportProgress(ctx, &internalpb.GetImportProgressRequest{
JobID: "@%$%$#%",
})
assert.NoError(t, err)
assert.True(t, errors.Is(merr.Error(resp.GetStatus()), merr.ErrImportFailed))
// normal case
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
s.importMeta, err = NewImportMeta(catalog)
assert.NoError(t, err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
Schema: &schemapb.CollectionSchema{},
State: internalpb.ImportJobState_Failed,
},
}
err = s.importMeta.AddJob(job)
assert.NoError(t, err)
resp, err = s.GetImportProgress(ctx, &internalpb.GetImportProgressRequest{
JobID: "0",
})
assert.NoError(t, err)
assert.Equal(t, int32(0), resp.GetStatus().GetCode())
assert.Equal(t, int64(0), resp.GetProgress())
assert.Equal(t, internalpb.ImportJobState_Failed, resp.GetState())
})
t.Run("ListImports", func(t *testing.T) {
// server not healthy
s := &Server{}
s.stateCode.Store(commonpb.StateCode_Initializing)
resp, err := s.ListImports(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), resp.GetStatus().GetCode())
s.stateCode.Store(commonpb.StateCode_Healthy)
// normal case
catalog := mocks.NewDataCoordCatalog(t)
catalog.EXPECT().ListImportJobs().Return(nil, nil)
catalog.EXPECT().ListPreImportTasks().Return(nil, nil)
catalog.EXPECT().ListImportTasks().Return(nil, nil)
catalog.EXPECT().SaveImportJob(mock.Anything).Return(nil)
catalog.EXPECT().SavePreImportTask(mock.Anything).Return(nil)
s.importMeta, err = NewImportMeta(catalog)
assert.NoError(t, err)
var job ImportJob = &importJob{
ImportJob: &datapb.ImportJob{
JobID: 0,
CollectionID: 1,
Schema: &schemapb.CollectionSchema{},
},
}
err = s.importMeta.AddJob(job)
assert.NoError(t, err)
var task ImportTask = &preImportTask{
PreImportTask: &datapb.PreImportTask{
JobID: 0,
TaskID: 1,
State: datapb.ImportTaskStateV2_Failed,
},
}
err = s.importMeta.AddTask(task)
assert.NoError(t, err)
resp, err = s.ListImports(ctx, &internalpb.ListImportsRequestInternal{
CollectionID: 1,
})
assert.NoError(t, err)
assert.Equal(t, int32(0), resp.GetStatus().GetCode())
assert.Equal(t, 1, len(resp.GetJobIDs()))
assert.Equal(t, 1, len(resp.GetStates()))
assert.Equal(t, 1, len(resp.GetReasons()))
assert.Equal(t, 1, len(resp.GetProgresses()))
})
}
type GcControlServiceSuite struct {
suite.Suite

View File

@ -45,6 +45,8 @@ const (
flushTimeout = 15 * time.Second
// TODO: evaluate and update import timeout.
importTimeout = 3 * time.Hour
importTaskTimeout = 10 * time.Second
)
type SessionManager interface {
@ -62,6 +64,11 @@ type SessionManager interface {
NotifyChannelOperation(ctx context.Context, nodeID int64, req *datapb.ChannelOperationsRequest) error
CheckChannelOperationProgress(ctx context.Context, nodeID int64, info *datapb.ChannelWatchInfo) (*datapb.ChannelOperationProgressResponse, error)
AddImportSegment(ctx context.Context, nodeID int64, req *datapb.AddImportSegmentRequest) (*datapb.AddImportSegmentResponse, error)
PreImport(nodeID int64, in *datapb.PreImportRequest) error
ImportV2(nodeID int64, in *datapb.ImportRequest) error
QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error)
QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error)
DropImport(nodeID int64, in *datapb.DropImportRequest) error
CheckHealth(ctx context.Context) error
Close()
}
@ -157,7 +164,7 @@ func (c *SessionManagerImpl) getClient(ctx context.Context, nodeID int64) (types
c.sessions.RUnlock()
if !ok {
return nil, fmt.Errorf("can not find session of node %d", nodeID)
return nil, merr.WrapErrNodeNotFound(nodeID, "can not find session")
}
return session.GetOrCreateClient(ctx)
@ -394,6 +401,100 @@ func (c *SessionManagerImpl) AddImportSegment(ctx context.Context, nodeID int64,
return resp, err
}
func (c *SessionManagerImpl) PreImport(nodeID int64, in *datapb.PreImportRequest) error {
log := log.With(
zap.Int64("nodeID", nodeID),
zap.Int64("jobID", in.GetJobID()),
zap.Int64("taskID", in.GetTaskID()),
zap.Int64("collectionID", in.GetCollectionID()),
zap.Int64s("partitionIDs", in.GetPartitionIDs()),
)
ctx, cancel := context.WithTimeout(context.Background(), importTaskTimeout)
defer cancel()
cli, err := c.getClient(ctx, nodeID)
if err != nil {
log.Info("failed to get client", zap.Error(err))
return err
}
status, err := cli.PreImport(ctx, in)
return VerifyResponse(status, err)
}
func (c *SessionManagerImpl) ImportV2(nodeID int64, in *datapb.ImportRequest) error {
log := log.With(
zap.Int64("nodeID", nodeID),
zap.Int64("jobID", in.GetJobID()),
zap.Int64("taskID", in.GetTaskID()),
zap.Int64("collectionID", in.GetCollectionID()),
)
ctx, cancel := context.WithTimeout(context.Background(), importTaskTimeout)
defer cancel()
cli, err := c.getClient(ctx, nodeID)
if err != nil {
log.Info("failed to get client", zap.Error(err))
return err
}
status, err := cli.ImportV2(ctx, in)
return VerifyResponse(status, err)
}
func (c *SessionManagerImpl) QueryPreImport(nodeID int64, in *datapb.QueryPreImportRequest) (*datapb.QueryPreImportResponse, error) {
log := log.With(
zap.Int64("nodeID", nodeID),
zap.Int64("jobID", in.GetJobID()),
zap.Int64("taskID", in.GetTaskID()),
)
ctx, cancel := context.WithTimeout(context.Background(), importTaskTimeout)
defer cancel()
cli, err := c.getClient(ctx, nodeID)
if err != nil {
log.Info("failed to get client", zap.Error(err))
return nil, err
}
resp, err := cli.QueryPreImport(ctx, in)
if err = VerifyResponse(resp.GetStatus(), err); err != nil {
return nil, err
}
return resp, nil
}
func (c *SessionManagerImpl) QueryImport(nodeID int64, in *datapb.QueryImportRequest) (*datapb.QueryImportResponse, error) {
log := log.With(
zap.Int64("nodeID", nodeID),
zap.Int64("jobID", in.GetJobID()),
zap.Int64("taskID", in.GetTaskID()),
)
ctx, cancel := context.WithTimeout(context.Background(), importTaskTimeout)
defer cancel()
cli, err := c.getClient(ctx, nodeID)
if err != nil {
log.Info("failed to get client", zap.Error(err))
return nil, err
}
resp, err := cli.QueryImport(ctx, in)
if err = VerifyResponse(resp.GetStatus(), err); err != nil {
return nil, err
}
return resp, nil
}
func (c *SessionManagerImpl) DropImport(nodeID int64, in *datapb.DropImportRequest) error {
log := log.With(
zap.Int64("nodeID", nodeID),
zap.Int64("jobID", in.GetJobID()),
zap.Int64("taskID", in.GetTaskID()),
)
ctx, cancel := context.WithTimeout(context.Background(), importTaskTimeout)
defer cancel()
cli, err := c.getClient(ctx, nodeID)
if err != nil {
log.Info("failed to get client", zap.Error(err))
return err
}
status, err := cli.DropImport(ctx, in)
return VerifyResponse(status, err)
}
func (c *SessionManagerImpl) CheckHealth(ctx context.Context) error {
group, ctx := errgroup.WithContext(ctx)

View File

@ -118,3 +118,61 @@ func (s *SessionManagerSuite) TestCheckCHannelOperationProgress() {
s.EqualValues(100, resp.Progress)
})
}
func (s *SessionManagerSuite) TestImportV2() {
mockErr := errors.New("mock error")
s.Run("PreImport", func() {
err := s.m.PreImport(0, &datapb.PreImportRequest{})
s.Error(err)
s.SetupTest()
s.dn.EXPECT().PreImport(mock.Anything, mock.Anything).Return(merr.Success(), nil)
err = s.m.PreImport(1000, &datapb.PreImportRequest{})
s.NoError(err)
})
s.Run("ImportV2", func() {
err := s.m.ImportV2(0, &datapb.ImportRequest{})
s.Error(err)
s.SetupTest()
s.dn.EXPECT().ImportV2(mock.Anything, mock.Anything).Return(merr.Success(), nil)
err = s.m.ImportV2(1000, &datapb.ImportRequest{})
s.NoError(err)
})
s.Run("QueryPreImport", func() {
_, err := s.m.QueryPreImport(0, &datapb.QueryPreImportRequest{})
s.Error(err)
s.SetupTest()
s.dn.EXPECT().QueryPreImport(mock.Anything, mock.Anything).Return(&datapb.QueryPreImportResponse{
Status: merr.Status(mockErr),
}, nil)
_, err = s.m.QueryPreImport(1000, &datapb.QueryPreImportRequest{})
s.Error(err)
})
s.Run("QueryImport", func() {
_, err := s.m.QueryImport(0, &datapb.QueryImportRequest{})
s.Error(err)
s.SetupTest()
s.dn.EXPECT().QueryImport(mock.Anything, mock.Anything).Return(&datapb.QueryImportResponse{
Status: merr.Status(mockErr),
}, nil)
_, err = s.m.QueryImport(1000, &datapb.QueryImportRequest{})
s.Error(err)
})
s.Run("DropImport", func() {
err := s.m.DropImport(0, &datapb.DropImportRequest{})
s.Error(err)
s.SetupTest()
s.dn.EXPECT().DropImport(mock.Anything, mock.Anything).Return(merr.Success(), nil)
err = s.m.DropImport(1000, &datapb.DropImportRequest{})
s.NoError(err)
})
}

View File

@ -134,6 +134,10 @@ func (f *fixedTSOAllocator) allocID(_ context.Context) (UniqueID, error) {
panic("not implemented") // TODO: Implement
}
func (f *fixedTSOAllocator) allocN(_ context.Context, _ int64) (UniqueID, UniqueID, error) {
panic("not implemented") // TODO: Implement
}
func (suite *UtilSuite) TestGetZeroTime() {
n := 10
for i := 0; i < n; i++ {

View File

@ -83,7 +83,7 @@ func (e *executor) Start() {
log.Info("import executor exited")
return
case <-exeTicker.C:
tasks := e.manager.GetBy(WithStates(internalpb.ImportState_Pending))
tasks := e.manager.GetBy(WithStates(datapb.ImportTaskStateV2_Pending))
wg := &sync.WaitGroup{}
for _, task := range tasks {
wg.Add(1)
@ -105,7 +105,7 @@ func (e *executor) Start() {
}
func (e *executor) Slots() int64 {
tasks := e.manager.GetBy(WithStates(internalpb.ImportState_Pending, internalpb.ImportState_InProgress))
tasks := e.manager.GetBy(WithStates(datapb.ImportTaskStateV2_Pending, datapb.ImportTaskStateV2_InProgress))
return paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt64() - int64(len(tasks))
}
@ -128,7 +128,7 @@ func WrapLogFields(task Task, fields ...zap.Field) []zap.Field {
func (e *executor) handleErr(task Task, err error, msg string) {
log.Warn(msg, WrapLogFields(task, zap.Error(err))...)
e.manager.Update(task.GetTaskID(), UpdateState(internalpb.ImportState_Failed), UpdateReason(err.Error()))
e.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error()))
}
func (e *executor) PreImport(task Task) {
@ -136,7 +136,7 @@ func (e *executor) PreImport(task Task) {
log.Info("start to preimport", WrapLogFields(task,
zap.Int("bufferSize", bufferSize),
zap.Any("schema", task.GetSchema()))...)
e.manager.Update(task.GetTaskID(), UpdateState(internalpb.ImportState_InProgress))
e.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
files := lo.Map(task.(*PreImportTask).GetFileStats(),
func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile {
return fileStat.GetImportFile()
@ -175,14 +175,15 @@ func (e *executor) PreImport(task Task) {
return
}
e.manager.Update(task.GetTaskID(), UpdateState(internalpb.ImportState_Completed))
e.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed))
log.Info("executor preimport done",
WrapLogFields(task, zap.Any("fileStats", task.(*PreImportTask).GetFileStats()))...)
}
func (e *executor) readFileStat(reader importutilv2.Reader, task Task, fileIdx int) error {
totalRows := 0
hashedRows := make(map[string]*datapb.PartitionRows)
totalSize := 0
hashedStats := make(map[string]*datapb.PartitionImportStats)
for {
data, err := reader.Read()
if err != nil {
@ -199,15 +200,18 @@ func (e *executor) readFileStat(reader importutilv2.Reader, task Task, fileIdx i
if err != nil {
return err
}
MergeHashedRowsCount(rowsCount, hashedRows)
MergeHashedStats(rowsCount, hashedStats)
rows := data.GetRowNum()
size := data.GetMemorySize()
totalRows += rows
log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows))...)
totalSize += size
log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows), zap.Int("readSize", size))...)
}
stat := &datapb.ImportFileStats{
TotalRows: int64(totalRows),
HashedRows: hashedRows,
TotalMemorySize: int64(totalSize),
HashedStats: hashedStats,
}
e.manager.Update(task.GetTaskID(), UpdateFileStat(fileIdx, stat))
return nil
@ -218,7 +222,7 @@ func (e *executor) Import(task Task) {
log.Info("start to import", WrapLogFields(task,
zap.Int("bufferSize", bufferSize),
zap.Any("schema", task.GetSchema()))...)
e.manager.Update(task.GetTaskID(), UpdateState(internalpb.ImportState_InProgress))
e.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress))
req := task.(*ImportTask).req
@ -254,7 +258,7 @@ func (e *executor) Import(task Task) {
return
}
e.manager.Update(task.GetTaskID(), UpdateState(internalpb.ImportState_Completed))
e.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Completed))
log.Info("import done", WrapLogFields(task)...)
}
@ -304,15 +308,18 @@ func (e *executor) Sync(task *ImportTask, hashedData HashedData) ([]*conc.Future
log.Info("start to sync import data", WrapLogFields(task)...)
futures := make([]*conc.Future[error], 0)
syncTasks := make([]syncmgr.Task, 0)
segmentImportedSizes := make(map[int64]int)
for channelIdx, datas := range hashedData {
channel := task.vchannels[channelIdx]
channel := task.GetVchannels()[channelIdx]
for partitionIdx, data := range datas {
partitionID := task.partitions[partitionIdx]
segmentID := PickSegment(task, channel, partitionID, data.GetRowNum())
partitionID := task.GetPartitionIDs()[partitionIdx]
size := data.GetMemorySize()
segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size)
syncTask, err := NewSyncTask(task.GetCtx(), task, segmentID, partitionID, channel, data)
if err != nil {
return nil, nil, err
}
segmentImportedSizes[segmentID] += size
future := e.syncMgr.SyncData(task.GetCtx(), syncTask)
futures = append(futures, future)
syncTasks = append(syncTasks, syncTask)

View File

@ -278,7 +278,7 @@ func (s *ExecutorSuite) TestExecutor_Start_Preimport() {
go s.executor.Start()
defer s.executor.Close()
s.Eventually(func() bool {
return s.manager.Get(preimportTask.GetTaskID()).GetState() == internalpb.ImportState_Completed
return s.manager.Get(preimportTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Completed
}, 10*time.Second, 100*time.Millisecond)
}
@ -331,7 +331,7 @@ func (s *ExecutorSuite) TestExecutor_Start_Preimport_Failed() {
go s.executor.Start()
defer s.executor.Close()
s.Eventually(func() bool {
return s.manager.Get(preimportTask.GetTaskID()).GetState() == internalpb.ImportState_Failed
return s.manager.Get(preimportTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Failed
}, 10*time.Second, 100*time.Millisecond)
}
@ -365,6 +365,8 @@ func (s *ExecutorSuite) TestExecutor_Start_Import() {
JobID: 10,
TaskID: 11,
CollectionID: 12,
PartitionIDs: []int64{13},
Vchannels: []string{"v0"},
Schema: s.schema,
Files: []*internalpb.ImportFile{
{
@ -378,8 +380,8 @@ func (s *ExecutorSuite) TestExecutor_Start_Import() {
},
RequestSegments: []*datapb.ImportRequestSegment{
{
SegmentID: 13,
PartitionID: 14,
SegmentID: 14,
PartitionID: 13,
Vchannel: "v0",
},
},
@ -390,7 +392,7 @@ func (s *ExecutorSuite) TestExecutor_Start_Import() {
go s.executor.Start()
defer s.executor.Close()
s.Eventually(func() bool {
return s.manager.Get(importTask.GetTaskID()).GetState() == internalpb.ImportState_Completed
return s.manager.Get(importTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Completed
}, 10*time.Second, 100*time.Millisecond)
}
@ -424,6 +426,8 @@ func (s *ExecutorSuite) TestExecutor_Start_Import_Failed() {
JobID: 10,
TaskID: 11,
CollectionID: 12,
PartitionIDs: []int64{13},
Vchannels: []string{"v0"},
Schema: s.schema,
Files: []*internalpb.ImportFile{
{
@ -437,8 +441,8 @@ func (s *ExecutorSuite) TestExecutor_Start_Import_Failed() {
},
RequestSegments: []*datapb.ImportRequestSegment{
{
SegmentID: 13,
PartitionID: 14,
SegmentID: 14,
PartitionID: 13,
Vchannel: "v0",
},
},
@ -449,7 +453,7 @@ func (s *ExecutorSuite) TestExecutor_Start_Import_Failed() {
go s.executor.Start()
defer s.executor.Close()
s.Eventually(func() bool {
return s.manager.Get(importTask.GetTaskID()).GetState() == internalpb.ImportState_Failed
return s.manager.Get(importTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Failed
}, 10*time.Second, 100*time.Millisecond)
}
@ -509,6 +513,8 @@ func (s *ExecutorSuite) TestExecutor_ImportFile() {
JobID: 10,
TaskID: 11,
CollectionID: 12,
PartitionIDs: []int64{13},
Vchannels: []string{"v0"},
Schema: s.schema,
Files: []*internalpb.ImportFile{
{
@ -522,8 +528,8 @@ func (s *ExecutorSuite) TestExecutor_ImportFile() {
},
RequestSegments: []*datapb.ImportRequestSegment{
{
SegmentID: 13,
PartitionID: 14,
SegmentID: 14,
PartitionID: 13,
Vchannel: "v0",
},
},

View File

@ -74,7 +74,7 @@ func HashData(task Task, rows *storage.InsertData) (HashedData, error) {
return res, nil
}
func GetRowsStats(task Task, rows *storage.InsertData) (map[string]*datapb.PartitionRows, error) {
func GetRowsStats(task Task, rows *storage.InsertData) (map[string]*datapb.PartitionImportStats, error) {
var (
schema = task.GetSchema()
channelNum = len(task.GetVchannels())
@ -88,8 +88,10 @@ func GetRowsStats(task Task, rows *storage.InsertData) (map[string]*datapb.Parti
partKeyField, _ := typeutil.GetPartitionKeyFieldSchema(schema)
hashRowsCount := make([][]int, channelNum)
hashDataSize := make([][]int, channelNum)
for i := 0; i < channelNum; i++ {
hashRowsCount[i] = make([]int, partitionNum)
hashDataSize[i] = make([]int, partitionNum)
}
rowNum := GetInsertDataRowCount(rows, schema)
@ -104,6 +106,7 @@ func GetRowsStats(task Task, rows *storage.InsertData) (map[string]*datapb.Parti
for i := 0; i < rowNum; i++ {
p1, p2 := fn1(id, num), fn2(rows.GetRow(i))
hashRowsCount[p1][p2]++
hashDataSize[p1][p2] += rows.GetRowSize(i)
id++
}
} else {
@ -113,20 +116,23 @@ func GetRowsStats(task Task, rows *storage.InsertData) (map[string]*datapb.Parti
row := rows.GetRow(i)
p1, p2 := f1(row), f2(row)
hashRowsCount[p1][p2]++
hashDataSize[p1][p2] += rows.GetRowSize(i)
}
}
res := make(map[string]*datapb.PartitionRows)
res := make(map[string]*datapb.PartitionImportStats)
for _, channel := range task.GetVchannels() {
res[channel] = &datapb.PartitionRows{
res[channel] = &datapb.PartitionImportStats{
PartitionRows: make(map[int64]int64),
PartitionDataSize: make(map[int64]int64),
}
}
for i, partitionRows := range hashRowsCount {
for i := range hashRowsCount {
channel := task.GetVchannels()[i]
for j, n := range partitionRows {
for j := range hashRowsCount[i] {
partition := task.GetPartitionIDs()[j]
res[channel].PartitionRows[partition] = int64(n)
res[channel].PartitionRows[partition] = int64(hashRowsCount[i][j])
res[channel].PartitionDataSize[partition] = int64(hashDataSize[i][j])
}
}
return res, nil
@ -187,15 +193,17 @@ func hashByID() func(id int64, shardNum int64) int64 {
}
}
func MergeHashedRowsCount(src, dst map[string]*datapb.PartitionRows) {
for channel, partitionRows := range src {
for partitionID, rowCount := range partitionRows.GetPartitionRows() {
func MergeHashedStats(src, dst map[string]*datapb.PartitionImportStats) {
for channel, partitionStats := range src {
for partitionID := range partitionStats.GetPartitionRows() {
if dst[channel] == nil {
dst[channel] = &datapb.PartitionRows{
dst[channel] = &datapb.PartitionImportStats{
PartitionRows: make(map[int64]int64),
PartitionDataSize: make(map[int64]int64),
}
}
dst[channel].PartitionRows[partitionID] += rowCount
dst[channel].PartitionRows[partitionID] += partitionStats.GetPartitionRows()[partitionID]
dst[channel].PartitionDataSize[partitionID] += partitionStats.GetPartitionDataSize()[partitionID]
}
}
}

View File

@ -48,7 +48,7 @@ func (t TaskType) String() string {
type TaskFilter func(task Task) bool
func WithStates(states ...internalpb.ImportState) TaskFilter {
func WithStates(states ...datapb.ImportTaskStateV2) TaskFilter {
return func(task Task) bool {
for _, state := range states {
if task.GetState() == state {
@ -67,7 +67,7 @@ func WithType(taskType TaskType) TaskFilter {
type UpdateAction func(task Task)
func UpdateState(state internalpb.ImportState) UpdateAction {
func UpdateState(state datapb.ImportTaskStateV2) UpdateAction {
return func(t Task) {
switch t.GetType() {
case PreImportTaskType:
@ -93,7 +93,7 @@ func UpdateFileStat(idx int, fileStat *datapb.ImportFileStats) UpdateAction {
return func(task Task) {
if it, ok := task.(*PreImportTask); ok {
it.PreImportTask.FileStats[idx].TotalRows = fileStat.GetTotalRows()
it.PreImportTask.FileStats[idx].HashedRows = fileStat.GetHashedRows()
it.PreImportTask.FileStats[idx].HashedStats = fileStat.GetHashedStats()
}
}
}
@ -133,7 +133,7 @@ type Task interface {
GetPartitionIDs() []int64
GetVchannels() []string
GetType() TaskType
GetState() internalpb.ImportState
GetState() datapb.ImportTaskStateV2
GetReason() string
GetSchema() *schemapb.CollectionSchema
GetCtx() context.Context
@ -146,7 +146,10 @@ type PreImportTask struct {
*datapb.PreImportTask
ctx context.Context
cancel context.CancelFunc
partitionIDs []int64
vchannels []string
schema *schemapb.CollectionSchema
options []*commonpb.KeyValuePair
}
func NewPreImportTask(req *datapb.PreImportRequest) Task {
@ -161,18 +164,26 @@ func NewPreImportTask(req *datapb.PreImportRequest) Task {
JobID: req.GetJobID(),
TaskID: req.GetTaskID(),
CollectionID: req.GetCollectionID(),
PartitionIDs: req.GetPartitionIDs(),
Vchannels: req.GetVchannels(),
State: internalpb.ImportState_Pending,
State: datapb.ImportTaskStateV2_Pending,
FileStats: fileStats,
Options: req.GetOptions(),
},
ctx: ctx,
cancel: cancel,
partitionIDs: req.GetPartitionIDs(),
vchannels: req.GetVchannels(),
schema: req.GetSchema(),
options: req.GetOptions(),
}
}
func (p *PreImportTask) GetPartitionIDs() []int64 {
return p.partitionIDs
}
func (p *PreImportTask) GetVchannels() []string {
return p.vchannels
}
func (p *PreImportTask) GetType() TaskType {
return PreImportTaskType
}
@ -181,6 +192,10 @@ func (p *PreImportTask) GetSchema() *schemapb.CollectionSchema {
return p.schema
}
func (p *PreImportTask) GetOptions() []*commonpb.KeyValuePair {
return p.options
}
func (p *PreImportTask) GetCtx() context.Context {
return p.ctx
}
@ -195,7 +210,10 @@ func (p *PreImportTask) Clone() Task {
PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask),
ctx: ctx,
cancel: cancel,
partitionIDs: p.GetPartitionIDs(),
vchannels: p.GetVchannels(),
schema: p.GetSchema(),
options: p.GetOptions(),
}
}
@ -203,11 +221,8 @@ type ImportTask struct {
*datapb.ImportTaskV2
ctx context.Context
cancel context.CancelFunc
schema *schemapb.CollectionSchema
segmentsInfo map[int64]*datapb.ImportSegmentInfo
req *datapb.ImportRequest
vchannels []string
partitions []int64
metaCaches map[string]metacache.MetaCache
}
@ -218,29 +233,21 @@ func NewImportTask(req *datapb.ImportRequest) Task {
JobID: req.GetJobID(),
TaskID: req.GetTaskID(),
CollectionID: req.GetCollectionID(),
State: internalpb.ImportState_Pending,
Options: req.GetOptions(),
State: datapb.ImportTaskStateV2_Pending,
},
ctx: ctx,
cancel: cancel,
schema: req.GetSchema(),
segmentsInfo: make(map[int64]*datapb.ImportSegmentInfo),
req: req,
}
task.Init(req)
task.initMetaCaches(req)
return task
}
func (t *ImportTask) Init(req *datapb.ImportRequest) {
func (t *ImportTask) initMetaCaches(req *datapb.ImportRequest) {
metaCaches := make(map[string]metacache.MetaCache)
channels := make(map[string]struct{})
partitions := make(map[int64]struct{})
for _, info := range req.GetRequestSegments() {
channels[info.GetVchannel()] = struct{}{}
partitions[info.GetPartitionID()] = struct{}{}
}
schema := typeutil.AppendSystemFields(req.GetSchema())
for _, channel := range lo.Keys(channels) {
for _, channel := range req.GetVchannels() {
info := &datapb.ChannelWatchInfo{
Vchan: &datapb.VchannelInfo{
CollectionID: req.GetCollectionID(),
@ -253,8 +260,6 @@ func (t *ImportTask) Init(req *datapb.ImportRequest) {
})
metaCaches[channel] = metaCache
}
t.vchannels = lo.Keys(channels)
t.partitions = lo.Keys(partitions)
t.metaCaches = metaCaches
}
@ -263,15 +268,19 @@ func (t *ImportTask) GetType() TaskType {
}
func (t *ImportTask) GetPartitionIDs() []int64 {
return t.partitions
return t.req.GetPartitionIDs()
}
func (t *ImportTask) GetVchannels() []string {
return t.vchannels
return t.req.GetVchannels()
}
func (t *ImportTask) GetSchema() *schemapb.CollectionSchema {
return t.schema
return t.req.GetSchema()
}
func (t *ImportTask) GetOptions() []*commonpb.KeyValuePair {
return t.req.GetOptions()
}
func (t *ImportTask) GetCtx() context.Context {
@ -292,11 +301,8 @@ func (t *ImportTask) Clone() Task {
ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2),
ctx: ctx,
cancel: cancel,
schema: t.GetSchema(),
segmentsInfo: t.segmentsInfo,
req: t.req,
vchannels: t.GetVchannels(),
partitions: t.GetPartitionIDs(),
metaCaches: t.metaCaches,
}
}

View File

@ -23,7 +23,6 @@ import (
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
)
func TestImportManager(t *testing.T) {
@ -36,7 +35,7 @@ func TestImportManager(t *testing.T) {
CollectionID: 3,
SegmentIDs: []int64{5, 6},
NodeID: 7,
State: internalpb.ImportState_Pending,
State: datapb.ImportTaskStateV2_Pending,
},
ctx: ctx,
cancel: cancel,
@ -53,7 +52,7 @@ func TestImportManager(t *testing.T) {
CollectionID: 3,
SegmentIDs: []int64{5, 6},
NodeID: 7,
State: internalpb.ImportState_Completed,
State: datapb.ImportTaskStateV2_Completed,
},
ctx: ctx,
cancel: cancel,
@ -62,13 +61,13 @@ func TestImportManager(t *testing.T) {
tasks := manager.GetBy()
assert.Equal(t, 2, len(tasks))
tasks = manager.GetBy(WithStates(internalpb.ImportState_Completed))
tasks = manager.GetBy(WithStates(datapb.ImportTaskStateV2_Completed))
assert.Equal(t, 1, len(tasks))
assert.Equal(t, task2.GetTaskID(), tasks[0].GetTaskID())
manager.Update(task1.GetTaskID(), UpdateState(internalpb.ImportState_Failed))
manager.Update(task1.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed))
task := manager.Get(task1.GetTaskID())
assert.Equal(t, internalpb.ImportState_Failed, task.GetState())
assert.Equal(t, datapb.ImportTaskStateV2_Failed, task.GetState())
manager.Remove(task1.GetTaskID())
tasks = manager.GetBy()

View File

@ -29,12 +29,12 @@ import (
"github.com/milvus-io/milvus/internal/datanode/metacache"
"github.com/milvus-io/milvus/internal/datanode/syncmgr"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@ -99,30 +99,27 @@ func NewImportSegmentInfo(syncTask syncmgr.Task, task *ImportTask) (*datapb.Impo
}, nil
}
func PickSegment(task *ImportTask, vchannel string, partitionID int64, rows int) int64 {
func PickSegment(task *ImportTask, segmentImportedSizes map[int64]int, vchannel string, partitionID int64, sizeToImport int) int64 {
candidates := lo.Filter(task.req.GetRequestSegments(), func(info *datapb.ImportRequestSegment, _ int) bool {
return info.GetVchannel() == vchannel && info.GetPartitionID() == partitionID
})
importedSegments := lo.KeyBy(task.GetSegmentsInfo(), func(segment *datapb.ImportSegmentInfo) int64 {
return segment.GetSegmentID()
})
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt() * 1024 * 1024
for _, candidate := range candidates {
var importedRows int64 = 0
if segment, ok := importedSegments[candidate.GetSegmentID()]; ok {
importedRows = segment.GetImportedRows()
}
if importedRows+int64(rows) <= candidate.GetMaxRows() {
sizeImported := segmentImportedSizes[candidate.GetSegmentID()]
if sizeImported+sizeToImport <= segmentMaxSize {
return candidate.GetSegmentID()
}
}
segmentID := lo.MinBy(task.GetSegmentsInfo(), func(s1, s2 *datapb.ImportSegmentInfo) bool {
return s1.GetImportedRows() < s2.GetImportedRows()
return segmentImportedSizes[s1.GetSegmentID()] < segmentImportedSizes[s2.GetSegmentID()]
}).GetSegmentID()
log.Warn("failed to pick an appropriate segment, opt for the smallest one instead",
WrapLogFields(task, zap.Int64("segmentID", segmentID), zap.Int64("maxRows", candidates[0].GetMaxRows()),
zap.Int("rows", rows), zap.Int64("importedRows", importedSegments[segmentID].GetImportedRows()))...)
WrapLogFields(task, zap.Int64("segmentID", segmentID),
zap.Int("sizeToImport", sizeToImport),
zap.Int("sizeImported", segmentImportedSizes[segmentID]),
zap.Int("segmentMaxSize", segmentMaxSize))...)
return segmentID
}
@ -206,14 +203,14 @@ func GetInsertDataRowCount(data *storage.InsertData, schema *schemapb.Collection
func LogStats(manager TaskManager) {
logFunc := func(tasks []Task, taskType TaskType) {
byState := lo.GroupBy(tasks, func(t Task) internalpb.ImportState {
byState := lo.GroupBy(tasks, func(t Task) datapb.ImportTaskStateV2 {
return t.GetState()
})
log.Info("import task stats", zap.String("type", taskType.String()),
zap.Int("pending", len(byState[internalpb.ImportState_Pending])),
zap.Int("inProgress", len(byState[internalpb.ImportState_InProgress])),
zap.Int("completed", len(byState[internalpb.ImportState_Completed])),
zap.Int("failed", len(byState[internalpb.ImportState_Failed])))
zap.Int("pending", len(byState[datapb.ImportTaskStateV2_Pending])),
zap.Int("inProgress", len(byState[datapb.ImportTaskStateV2_InProgress])),
zap.Int("completed", len(byState[datapb.ImportTaskStateV2_Completed])),
zap.Int("failed", len(byState[datapb.ImportTaskStateV2_Failed])))
}
tasks := manager.GetBy(WithType(PreImportTaskType))
logFunc(tasks, PreImportTaskType)

View File

@ -61,8 +61,8 @@ func Test_AppendSystemFieldsData(t *testing.T) {
Begin: 0,
End: count,
},
Schema: schema,
},
schema: schema,
}
pkField.DataType = schemapb.DataType_Int64

View File

@ -1007,7 +1007,7 @@ func (node *DataNode) QueryPreImport(ctx context.Context, req *datapb.QueryPreIm
if task == nil || task.GetType() != importv2.PreImportTaskType {
status = merr.Status(importv2.WrapNoTaskError(req.GetTaskID(), importv2.PreImportTaskType))
}
log.RatedInfo(10, "datanode query preimport done", zap.String("state", task.GetState().String()),
log.RatedInfo(10, "datanode query preimport", zap.String("state", task.GetState().String()),
zap.String("reason", task.GetReason()))
return &datapb.QueryPreImportResponse{
Status: status,
@ -1041,7 +1041,7 @@ func (node *DataNode) QueryImport(ctx context.Context, req *datapb.QueryImportRe
if task == nil || task.GetType() != importv2.ImportTaskType {
status = merr.Status(importv2.WrapNoTaskError(req.GetTaskID(), importv2.ImportTaskType))
}
log.RatedInfo(10, "datanode query import done", zap.String("state", task.GetState().String()),
log.RatedInfo(10, "datanode query import", zap.String("state", task.GetState().String()),
zap.String("reason", task.GetReason()))
return &datapb.QueryImportResponse{
Status: status,

View File

@ -631,3 +631,21 @@ func (c *Client) GcControl(ctx context.Context, req *datapb.GcControlRequest, op
return client.GcControl(ctx, req)
})
}
func (c *Client) ImportV2(ctx context.Context, in *internalpb.ImportRequestInternal, opts ...grpc.CallOption) (*internalpb.ImportResponse, error) {
return wrapGrpcCall(ctx, c, func(client datapb.DataCoordClient) (*internalpb.ImportResponse, error) {
return client.ImportV2(ctx, in)
})
}
func (c *Client) GetImportProgress(ctx context.Context, in *internalpb.GetImportProgressRequest, opts ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error) {
return wrapGrpcCall(ctx, c, func(client datapb.DataCoordClient) (*internalpb.GetImportProgressResponse, error) {
return client.GetImportProgress(ctx, in)
})
}
func (c *Client) ListImports(ctx context.Context, in *internalpb.ListImportsRequestInternal, opts ...grpc.CallOption) (*internalpb.ListImportsResponse, error) {
return wrapGrpcCall(ctx, c, func(client datapb.DataCoordClient) (*internalpb.ListImportsResponse, error) {
return client.ListImports(ctx, in)
})
}

View File

@ -489,3 +489,15 @@ func (s *Server) ReportDataNodeTtMsgs(ctx context.Context, req *datapb.ReportDat
func (s *Server) GcControl(ctx context.Context, req *datapb.GcControlRequest) (*commonpb.Status, error) {
return s.dataCoord.GcControl(ctx, req)
}
func (s *Server) ImportV2(ctx context.Context, in *internalpb.ImportRequestInternal) (*internalpb.ImportResponse, error) {
return s.dataCoord.ImportV2(ctx, in)
}
func (s *Server) GetImportProgress(ctx context.Context, in *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
return s.dataCoord.GetImportProgress(ctx, in)
}
func (s *Server) ListImports(ctx context.Context, in *internalpb.ListImportsRequestInternal) (*internalpb.ListImportsResponse, error) {
return s.dataCoord.ListImports(ctx, in)
}

View File

@ -198,3 +198,21 @@ func (c *Client) GetDdChannel(ctx context.Context, req *internalpb.GetDdChannelR
return client.GetDdChannel(ctx, req)
})
}
func (c *Client) ImportV2(ctx context.Context, req *internalpb.ImportRequest, opts ...grpc.CallOption) (*internalpb.ImportResponse, error) {
return wrapGrpcCall(ctx, c, func(client proxypb.ProxyClient) (*internalpb.ImportResponse, error) {
return client.ImportV2(ctx, req)
})
}
func (c *Client) GetImportProgress(ctx context.Context, req *internalpb.GetImportProgressRequest, opts ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error) {
return wrapGrpcCall(ctx, c, func(client proxypb.ProxyClient) (*internalpb.GetImportProgressResponse, error) {
return client.GetImportProgress(ctx, req)
})
}
func (c *Client) ListImports(ctx context.Context, req *internalpb.ListImportsRequest, opts ...grpc.CallOption) (*internalpb.ListImportsResponse, error) {
return wrapGrpcCall(ctx, c, func(client proxypb.ProxyClient) (*internalpb.ListImportsResponse, error) {
return client.ListImports(ctx, req)
})
}

View File

@ -433,3 +433,32 @@ func Test_GetDdChannel(t *testing.T) {
_, err = client.GetDdChannel(ctx, &internalpb.GetDdChannelRequest{})
assert.ErrorIs(t, err, context.DeadlineExceeded)
}
func Test_ImportV2(t *testing.T) {
paramtable.Init()
ctx := context.Background()
client, err := NewClient(ctx, "test", 1)
assert.NoError(t, err)
defer client.Close()
mockProxy := mocks.NewMockProxyClient(t)
mockGrpcClient := mocks.NewMockGrpcClient[proxypb.ProxyClient](t)
mockGrpcClient.EXPECT().Close().Return(nil)
mockGrpcClient.EXPECT().ReCall(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, f func(proxypb.ProxyClient) (interface{}, error)) (interface{}, error) {
return f(mockProxy)
})
client.(*Client).grpcClient = mockGrpcClient
mockProxy.EXPECT().ImportV2(mock.Anything, mock.Anything).Return(&internalpb.ImportResponse{Status: merr.Success()}, nil)
_, err = client.ImportV2(ctx, &internalpb.ImportRequest{})
assert.Nil(t, err)
mockProxy.EXPECT().GetImportProgress(mock.Anything, mock.Anything).Return(&internalpb.GetImportProgressResponse{Status: merr.Success()}, nil)
_, err = client.GetImportProgress(ctx, &internalpb.GetImportProgressRequest{})
assert.Nil(t, err)
mockProxy.EXPECT().ListImports(mock.Anything, mock.Anything).Return(&internalpb.ListImportsResponse{Status: merr.Success()}, nil)
_, err = client.ListImports(ctx, &internalpb.ListImportsRequest{})
assert.Nil(t, err)
}

View File

@ -1207,3 +1207,15 @@ func (s *Server) AllocTimestamp(ctx context.Context, req *milvuspb.AllocTimestam
func (s *Server) ReplicateMessage(ctx context.Context, req *milvuspb.ReplicateMessageRequest) (*milvuspb.ReplicateMessageResponse, error) {
return s.proxy.ReplicateMessage(ctx, req)
}
func (s *Server) ImportV2(ctx context.Context, req *internalpb.ImportRequest) (*internalpb.ImportResponse, error) {
return s.proxy.ImportV2(ctx, req)
}
func (s *Server) GetImportProgress(ctx context.Context, req *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
return s.proxy.GetImportProgress(ctx, req)
}
func (s *Server) ListImports(ctx context.Context, req *internalpb.ListImportsRequest) (*internalpb.ListImportsResponse, error) {
return s.proxy.ListImports(ctx, req)
}

View File

@ -135,6 +135,16 @@ type DataCoordCatalog interface {
AlterSegmentIndexes(ctx context.Context, newSegIdxes []*model.SegmentIndex) error
DropSegmentIndex(ctx context.Context, collID, partID, segID, buildID typeutil.UniqueID) error
SaveImportJob(job *datapb.ImportJob) error
ListImportJobs() ([]*datapb.ImportJob, error)
DropImportJob(jobID int64) error
SavePreImportTask(task *datapb.PreImportTask) error
ListPreImportTasks() ([]*datapb.PreImportTask, error)
DropPreImportTask(taskID int64) error
SaveImportTask(task *datapb.ImportTaskV2) error
ListImportTasks() ([]*datapb.ImportTaskV2, error)
DropImportTask(taskID int64) error
GcConfirm(ctx context.Context, collectionID, partitionID typeutil.UniqueID) bool
}

View File

@ -24,6 +24,9 @@ const (
SegmentStatslogPathPrefix = MetaPrefix + "/statslog"
ChannelRemovePrefix = MetaPrefix + "/channel-removal"
ChannelCheckpointPrefix = MetaPrefix + "/channel-cp"
ImportJobPrefix = MetaPrefix + "/import-job"
ImportTaskPrefix = MetaPrefix + "/import-task"
PreImportTaskPrefix = MetaPrefix + "/preimport-task"
NonRemoveFlagTomestone = "non-removed"
RemoveFlagTomestone = "removed"

View File

@ -690,6 +690,102 @@ func (kc *Catalog) DropSegmentIndex(ctx context.Context, collID, partID, segID,
return nil
}
func (kc *Catalog) SaveImportJob(job *datapb.ImportJob) error {
key := buildImportJobKey(job.GetJobID())
value, err := proto.Marshal(job)
if err != nil {
return err
}
return kc.MetaKv.Save(key, string(value))
}
func (kc *Catalog) ListImportJobs() ([]*datapb.ImportJob, error) {
jobs := make([]*datapb.ImportJob, 0)
_, values, err := kc.MetaKv.LoadWithPrefix(ImportJobPrefix)
if err != nil {
return nil, err
}
for _, value := range values {
job := &datapb.ImportJob{}
err = proto.Unmarshal([]byte(value), job)
if err != nil {
return nil, err
}
jobs = append(jobs, job)
}
return jobs, nil
}
func (kc *Catalog) DropImportJob(jobID int64) error {
key := buildImportJobKey(jobID)
return kc.MetaKv.Remove(key)
}
func (kc *Catalog) SavePreImportTask(task *datapb.PreImportTask) error {
key := buildPreImportTaskKey(task.GetTaskID())
value, err := proto.Marshal(task)
if err != nil {
return err
}
return kc.MetaKv.Save(key, string(value))
}
func (kc *Catalog) ListPreImportTasks() ([]*datapb.PreImportTask, error) {
tasks := make([]*datapb.PreImportTask, 0)
_, values, err := kc.MetaKv.LoadWithPrefix(PreImportTaskPrefix)
if err != nil {
return nil, err
}
for _, value := range values {
task := &datapb.PreImportTask{}
err = proto.Unmarshal([]byte(value), task)
if err != nil {
return nil, err
}
tasks = append(tasks, task)
}
return tasks, nil
}
func (kc *Catalog) DropPreImportTask(taskID int64) error {
key := buildPreImportTaskKey(taskID)
return kc.MetaKv.Remove(key)
}
func (kc *Catalog) SaveImportTask(task *datapb.ImportTaskV2) error {
key := buildImportTaskKey(task.GetTaskID())
value, err := proto.Marshal(task)
if err != nil {
return err
}
return kc.MetaKv.Save(key, string(value))
}
func (kc *Catalog) ListImportTasks() ([]*datapb.ImportTaskV2, error) {
tasks := make([]*datapb.ImportTaskV2, 0)
_, values, err := kc.MetaKv.LoadWithPrefix(ImportTaskPrefix)
if err != nil {
return nil, err
}
for _, value := range values {
task := &datapb.ImportTaskV2{}
err = proto.Unmarshal([]byte(value), task)
if err != nil {
return nil, err
}
tasks = append(tasks, task)
}
return tasks, nil
}
func (kc *Catalog) DropImportTask(taskID int64) error {
key := buildImportTaskKey(taskID)
return kc.MetaKv.Remove(key)
}
const allPartitionID = -1
// GcConfirm returns true if related collection/partition is not found.

View File

@ -1238,3 +1238,182 @@ func TestCatalog_GcConfirm(t *testing.T) {
Return(nil, nil, nil)
assert.True(t, kc.GcConfirm(context.TODO(), 100, 10000))
}
func TestCatalog_Import(t *testing.T) {
kc := &Catalog{}
mockErr := errors.New("mock error")
job := &datapb.ImportJob{
JobID: 0,
}
pit := &datapb.PreImportTask{
JobID: 0,
TaskID: 1,
}
it := &datapb.ImportTaskV2{
JobID: 0,
TaskID: 2,
}
t.Run("SaveImportJob", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.SaveImportJob(job)
assert.NoError(t, err)
err = kc.SaveImportJob(nil)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.SaveImportJob(job)
assert.Error(t, err)
})
t.Run("ListImportJobs", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
value, err := proto.Marshal(job)
assert.NoError(t, err)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{string(value)}, nil)
kc.MetaKv = txn
jobs, err := kc.ListImportJobs()
assert.NoError(t, err)
assert.Equal(t, 1, len(jobs))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{"@#%#^#"}, nil)
kc.MetaKv = txn
_, err = kc.ListImportJobs()
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
_, err = kc.ListImportJobs()
assert.Error(t, err)
})
t.Run("DropImportJob", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.DropImportJob(job.GetJobID())
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.DropImportJob(job.GetJobID())
assert.Error(t, err)
})
t.Run("SavePreImportTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.SavePreImportTask(pit)
assert.NoError(t, err)
err = kc.SavePreImportTask(nil)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.SavePreImportTask(pit)
assert.Error(t, err)
})
t.Run("ListPreImportTasks", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
value, err := proto.Marshal(pit)
assert.NoError(t, err)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{string(value)}, nil)
kc.MetaKv = txn
tasks, err := kc.ListPreImportTasks()
assert.NoError(t, err)
assert.Equal(t, 1, len(tasks))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{"@#%#^#"}, nil)
kc.MetaKv = txn
_, err = kc.ListPreImportTasks()
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
_, err = kc.ListPreImportTasks()
assert.Error(t, err)
})
t.Run("DropPreImportTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.DropPreImportTask(pit.GetTaskID())
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.DropPreImportTask(pit.GetTaskID())
assert.Error(t, err)
})
t.Run("SaveImportTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.SaveImportTask(it)
assert.NoError(t, err)
err = kc.SaveImportTask(nil)
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Save(mock.Anything, mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.SaveImportTask(it)
assert.Error(t, err)
})
t.Run("ListImportTasks", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
value, err := proto.Marshal(it)
assert.NoError(t, err)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{string(value)}, nil)
kc.MetaKv = txn
tasks, err := kc.ListImportTasks()
assert.NoError(t, err)
assert.Equal(t, 1, len(tasks))
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, []string{"@#%#^#"}, nil)
kc.MetaKv = txn
_, err = kc.ListImportTasks()
assert.Error(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().LoadWithPrefix(mock.Anything).Return(nil, nil, mockErr)
kc.MetaKv = txn
_, err = kc.ListImportTasks()
assert.Error(t, err)
})
t.Run("DropImportTask", func(t *testing.T) {
txn := mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(nil)
kc.MetaKv = txn
err := kc.DropImportTask(it.GetTaskID())
assert.NoError(t, err)
txn = mocks.NewMetaKv(t)
txn.EXPECT().Remove(mock.Anything).Return(mockErr)
kc.MetaKv = txn
err = kc.DropImportTask(it.GetTaskID())
assert.Error(t, err)
})
}

View File

@ -282,3 +282,15 @@ func buildCollectionPrefix(collectionID typeutil.UniqueID) string {
func buildPartitionPrefix(collectionID, partitionID typeutil.UniqueID) string {
return fmt.Sprintf("%s/%d/%d", SegmentPrefix, collectionID, partitionID)
}
func buildImportJobKey(jobID int64) string {
return fmt.Sprintf("%s/%d", ImportJobPrefix, jobID)
}
func buildImportTaskKey(taskID int64) string {
return fmt.Sprintf("%s/%d", ImportTaskPrefix, taskID)
}
func buildPreImportTaskKey(taskID int64) string {
return fmt.Sprintf("%s/%d", PreImportTaskPrefix, taskID)
}

View File

@ -430,6 +430,90 @@ func (_c *DataCoordCatalog_DropChannelCheckpoint_Call) RunAndReturn(run func(con
return _c
}
// DropImportJob provides a mock function with given fields: jobID
func (_m *DataCoordCatalog) DropImportJob(jobID int64) error {
ret := _m.Called(jobID)
var r0 error
if rf, ok := ret.Get(0).(func(int64) error); ok {
r0 = rf(jobID)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_DropImportJob_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropImportJob'
type DataCoordCatalog_DropImportJob_Call struct {
*mock.Call
}
// DropImportJob is a helper method to define mock.On call
// - jobID int64
func (_e *DataCoordCatalog_Expecter) DropImportJob(jobID interface{}) *DataCoordCatalog_DropImportJob_Call {
return &DataCoordCatalog_DropImportJob_Call{Call: _e.mock.On("DropImportJob", jobID)}
}
func (_c *DataCoordCatalog_DropImportJob_Call) Run(run func(jobID int64)) *DataCoordCatalog_DropImportJob_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *DataCoordCatalog_DropImportJob_Call) Return(_a0 error) *DataCoordCatalog_DropImportJob_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_DropImportJob_Call) RunAndReturn(run func(int64) error) *DataCoordCatalog_DropImportJob_Call {
_c.Call.Return(run)
return _c
}
// DropImportTask provides a mock function with given fields: taskID
func (_m *DataCoordCatalog) DropImportTask(taskID int64) error {
ret := _m.Called(taskID)
var r0 error
if rf, ok := ret.Get(0).(func(int64) error); ok {
r0 = rf(taskID)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_DropImportTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropImportTask'
type DataCoordCatalog_DropImportTask_Call struct {
*mock.Call
}
// DropImportTask is a helper method to define mock.On call
// - taskID int64
func (_e *DataCoordCatalog_Expecter) DropImportTask(taskID interface{}) *DataCoordCatalog_DropImportTask_Call {
return &DataCoordCatalog_DropImportTask_Call{Call: _e.mock.On("DropImportTask", taskID)}
}
func (_c *DataCoordCatalog_DropImportTask_Call) Run(run func(taskID int64)) *DataCoordCatalog_DropImportTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *DataCoordCatalog_DropImportTask_Call) Return(_a0 error) *DataCoordCatalog_DropImportTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_DropImportTask_Call) RunAndReturn(run func(int64) error) *DataCoordCatalog_DropImportTask_Call {
_c.Call.Return(run)
return _c
}
// DropIndex provides a mock function with given fields: ctx, collID, dropIdxID
func (_m *DataCoordCatalog) DropIndex(ctx context.Context, collID int64, dropIdxID int64) error {
ret := _m.Called(ctx, collID, dropIdxID)
@ -474,6 +558,48 @@ func (_c *DataCoordCatalog_DropIndex_Call) RunAndReturn(run func(context.Context
return _c
}
// DropPreImportTask provides a mock function with given fields: taskID
func (_m *DataCoordCatalog) DropPreImportTask(taskID int64) error {
ret := _m.Called(taskID)
var r0 error
if rf, ok := ret.Get(0).(func(int64) error); ok {
r0 = rf(taskID)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_DropPreImportTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DropPreImportTask'
type DataCoordCatalog_DropPreImportTask_Call struct {
*mock.Call
}
// DropPreImportTask is a helper method to define mock.On call
// - taskID int64
func (_e *DataCoordCatalog_Expecter) DropPreImportTask(taskID interface{}) *DataCoordCatalog_DropPreImportTask_Call {
return &DataCoordCatalog_DropPreImportTask_Call{Call: _e.mock.On("DropPreImportTask", taskID)}
}
func (_c *DataCoordCatalog_DropPreImportTask_Call) Run(run func(taskID int64)) *DataCoordCatalog_DropPreImportTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(int64))
})
return _c
}
func (_c *DataCoordCatalog_DropPreImportTask_Call) Return(_a0 error) *DataCoordCatalog_DropPreImportTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_DropPreImportTask_Call) RunAndReturn(run func(int64) error) *DataCoordCatalog_DropPreImportTask_Call {
_c.Call.Return(run)
return _c
}
// DropSegment provides a mock function with given fields: ctx, segment
func (_m *DataCoordCatalog) DropSegment(ctx context.Context, segment *datapb.SegmentInfo) error {
ret := _m.Called(ctx, segment)
@ -661,6 +787,112 @@ func (_c *DataCoordCatalog_ListChannelCheckpoint_Call) RunAndReturn(run func(con
return _c
}
// ListImportJobs provides a mock function with given fields:
func (_m *DataCoordCatalog) ListImportJobs() ([]*datapb.ImportJob, error) {
ret := _m.Called()
var r0 []*datapb.ImportJob
var r1 error
if rf, ok := ret.Get(0).(func() ([]*datapb.ImportJob, error)); ok {
return rf()
}
if rf, ok := ret.Get(0).(func() []*datapb.ImportJob); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*datapb.ImportJob)
}
}
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DataCoordCatalog_ListImportJobs_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImportJobs'
type DataCoordCatalog_ListImportJobs_Call struct {
*mock.Call
}
// ListImportJobs is a helper method to define mock.On call
func (_e *DataCoordCatalog_Expecter) ListImportJobs() *DataCoordCatalog_ListImportJobs_Call {
return &DataCoordCatalog_ListImportJobs_Call{Call: _e.mock.On("ListImportJobs")}
}
func (_c *DataCoordCatalog_ListImportJobs_Call) Run(run func()) *DataCoordCatalog_ListImportJobs_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *DataCoordCatalog_ListImportJobs_Call) Return(_a0 []*datapb.ImportJob, _a1 error) *DataCoordCatalog_ListImportJobs_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *DataCoordCatalog_ListImportJobs_Call) RunAndReturn(run func() ([]*datapb.ImportJob, error)) *DataCoordCatalog_ListImportJobs_Call {
_c.Call.Return(run)
return _c
}
// ListImportTasks provides a mock function with given fields:
func (_m *DataCoordCatalog) ListImportTasks() ([]*datapb.ImportTaskV2, error) {
ret := _m.Called()
var r0 []*datapb.ImportTaskV2
var r1 error
if rf, ok := ret.Get(0).(func() ([]*datapb.ImportTaskV2, error)); ok {
return rf()
}
if rf, ok := ret.Get(0).(func() []*datapb.ImportTaskV2); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*datapb.ImportTaskV2)
}
}
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DataCoordCatalog_ListImportTasks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImportTasks'
type DataCoordCatalog_ListImportTasks_Call struct {
*mock.Call
}
// ListImportTasks is a helper method to define mock.On call
func (_e *DataCoordCatalog_Expecter) ListImportTasks() *DataCoordCatalog_ListImportTasks_Call {
return &DataCoordCatalog_ListImportTasks_Call{Call: _e.mock.On("ListImportTasks")}
}
func (_c *DataCoordCatalog_ListImportTasks_Call) Run(run func()) *DataCoordCatalog_ListImportTasks_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *DataCoordCatalog_ListImportTasks_Call) Return(_a0 []*datapb.ImportTaskV2, _a1 error) *DataCoordCatalog_ListImportTasks_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *DataCoordCatalog_ListImportTasks_Call) RunAndReturn(run func() ([]*datapb.ImportTaskV2, error)) *DataCoordCatalog_ListImportTasks_Call {
_c.Call.Return(run)
return _c
}
// ListIndexes provides a mock function with given fields: ctx
func (_m *DataCoordCatalog) ListIndexes(ctx context.Context) ([]*model.Index, error) {
ret := _m.Called(ctx)
@ -715,6 +947,59 @@ func (_c *DataCoordCatalog_ListIndexes_Call) RunAndReturn(run func(context.Conte
return _c
}
// ListPreImportTasks provides a mock function with given fields:
func (_m *DataCoordCatalog) ListPreImportTasks() ([]*datapb.PreImportTask, error) {
ret := _m.Called()
var r0 []*datapb.PreImportTask
var r1 error
if rf, ok := ret.Get(0).(func() ([]*datapb.PreImportTask, error)); ok {
return rf()
}
if rf, ok := ret.Get(0).(func() []*datapb.PreImportTask); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*datapb.PreImportTask)
}
}
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DataCoordCatalog_ListPreImportTasks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListPreImportTasks'
type DataCoordCatalog_ListPreImportTasks_Call struct {
*mock.Call
}
// ListPreImportTasks is a helper method to define mock.On call
func (_e *DataCoordCatalog_Expecter) ListPreImportTasks() *DataCoordCatalog_ListPreImportTasks_Call {
return &DataCoordCatalog_ListPreImportTasks_Call{Call: _e.mock.On("ListPreImportTasks")}
}
func (_c *DataCoordCatalog_ListPreImportTasks_Call) Run(run func()) *DataCoordCatalog_ListPreImportTasks_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *DataCoordCatalog_ListPreImportTasks_Call) Return(_a0 []*datapb.PreImportTask, _a1 error) *DataCoordCatalog_ListPreImportTasks_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *DataCoordCatalog_ListPreImportTasks_Call) RunAndReturn(run func() ([]*datapb.PreImportTask, error)) *DataCoordCatalog_ListPreImportTasks_Call {
_c.Call.Return(run)
return _c
}
// ListSegmentIndexes provides a mock function with given fields: ctx
func (_m *DataCoordCatalog) ListSegmentIndexes(ctx context.Context) ([]*model.SegmentIndex, error) {
ret := _m.Called(ctx)
@ -996,6 +1281,132 @@ func (_c *DataCoordCatalog_SaveDroppedSegmentsInBatch_Call) RunAndReturn(run fun
return _c
}
// SaveImportJob provides a mock function with given fields: job
func (_m *DataCoordCatalog) SaveImportJob(job *datapb.ImportJob) error {
ret := _m.Called(job)
var r0 error
if rf, ok := ret.Get(0).(func(*datapb.ImportJob) error); ok {
r0 = rf(job)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_SaveImportJob_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SaveImportJob'
type DataCoordCatalog_SaveImportJob_Call struct {
*mock.Call
}
// SaveImportJob is a helper method to define mock.On call
// - job *datapb.ImportJob
func (_e *DataCoordCatalog_Expecter) SaveImportJob(job interface{}) *DataCoordCatalog_SaveImportJob_Call {
return &DataCoordCatalog_SaveImportJob_Call{Call: _e.mock.On("SaveImportJob", job)}
}
func (_c *DataCoordCatalog_SaveImportJob_Call) Run(run func(job *datapb.ImportJob)) *DataCoordCatalog_SaveImportJob_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*datapb.ImportJob))
})
return _c
}
func (_c *DataCoordCatalog_SaveImportJob_Call) Return(_a0 error) *DataCoordCatalog_SaveImportJob_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_SaveImportJob_Call) RunAndReturn(run func(*datapb.ImportJob) error) *DataCoordCatalog_SaveImportJob_Call {
_c.Call.Return(run)
return _c
}
// SaveImportTask provides a mock function with given fields: task
func (_m *DataCoordCatalog) SaveImportTask(task *datapb.ImportTaskV2) error {
ret := _m.Called(task)
var r0 error
if rf, ok := ret.Get(0).(func(*datapb.ImportTaskV2) error); ok {
r0 = rf(task)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_SaveImportTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SaveImportTask'
type DataCoordCatalog_SaveImportTask_Call struct {
*mock.Call
}
// SaveImportTask is a helper method to define mock.On call
// - task *datapb.ImportTaskV2
func (_e *DataCoordCatalog_Expecter) SaveImportTask(task interface{}) *DataCoordCatalog_SaveImportTask_Call {
return &DataCoordCatalog_SaveImportTask_Call{Call: _e.mock.On("SaveImportTask", task)}
}
func (_c *DataCoordCatalog_SaveImportTask_Call) Run(run func(task *datapb.ImportTaskV2)) *DataCoordCatalog_SaveImportTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*datapb.ImportTaskV2))
})
return _c
}
func (_c *DataCoordCatalog_SaveImportTask_Call) Return(_a0 error) *DataCoordCatalog_SaveImportTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_SaveImportTask_Call) RunAndReturn(run func(*datapb.ImportTaskV2) error) *DataCoordCatalog_SaveImportTask_Call {
_c.Call.Return(run)
return _c
}
// SavePreImportTask provides a mock function with given fields: task
func (_m *DataCoordCatalog) SavePreImportTask(task *datapb.PreImportTask) error {
ret := _m.Called(task)
var r0 error
if rf, ok := ret.Get(0).(func(*datapb.PreImportTask) error); ok {
r0 = rf(task)
} else {
r0 = ret.Error(0)
}
return r0
}
// DataCoordCatalog_SavePreImportTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SavePreImportTask'
type DataCoordCatalog_SavePreImportTask_Call struct {
*mock.Call
}
// SavePreImportTask is a helper method to define mock.On call
// - task *datapb.PreImportTask
func (_e *DataCoordCatalog_Expecter) SavePreImportTask(task interface{}) *DataCoordCatalog_SavePreImportTask_Call {
return &DataCoordCatalog_SavePreImportTask_Call{Call: _e.mock.On("SavePreImportTask", task)}
}
func (_c *DataCoordCatalog_SavePreImportTask_Call) Run(run func(task *datapb.PreImportTask)) *DataCoordCatalog_SavePreImportTask_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(*datapb.PreImportTask))
})
return _c
}
func (_c *DataCoordCatalog_SavePreImportTask_Call) Return(_a0 error) *DataCoordCatalog_SavePreImportTask_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *DataCoordCatalog_SavePreImportTask_Call) RunAndReturn(run func(*datapb.PreImportTask) error) *DataCoordCatalog_SavePreImportTask_Call {
_c.Call.Return(run)
return _c
}
// ShouldDropChannel provides a mock function with given fields: ctx, channel
func (_m *DataCoordCatalog) ShouldDropChannel(ctx context.Context, channel string) bool {
ret := _m.Called(ctx, channel)

View File

@ -1026,6 +1026,61 @@ func (_c *MockDataCoord_GetFlushedSegments_Call) RunAndReturn(run func(context.C
return _c
}
// GetImportProgress provides a mock function with given fields: _a0, _a1
func (_m *MockDataCoord) GetImportProgress(_a0 context.Context, _a1 *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.GetImportProgressResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest) *internalpb.GetImportProgressResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.GetImportProgressResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.GetImportProgressRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoord_GetImportProgress_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetImportProgress'
type MockDataCoord_GetImportProgress_Call struct {
*mock.Call
}
// GetImportProgress is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.GetImportProgressRequest
func (_e *MockDataCoord_Expecter) GetImportProgress(_a0 interface{}, _a1 interface{}) *MockDataCoord_GetImportProgress_Call {
return &MockDataCoord_GetImportProgress_Call{Call: _e.mock.On("GetImportProgress", _a0, _a1)}
}
func (_c *MockDataCoord_GetImportProgress_Call) Run(run func(_a0 context.Context, _a1 *internalpb.GetImportProgressRequest)) *MockDataCoord_GetImportProgress_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.GetImportProgressRequest))
})
return _c
}
func (_c *MockDataCoord_GetImportProgress_Call) Return(_a0 *internalpb.GetImportProgressResponse, _a1 error) *MockDataCoord_GetImportProgress_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoord_GetImportProgress_Call) RunAndReturn(run func(context.Context, *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error)) *MockDataCoord_GetImportProgress_Call {
_c.Call.Return(run)
return _c
}
// GetIndexBuildProgress provides a mock function with given fields: _a0, _a1
func (_m *MockDataCoord) GetIndexBuildProgress(_a0 context.Context, _a1 *indexpb.GetIndexBuildProgressRequest) (*indexpb.GetIndexBuildProgressResponse, error) {
ret := _m.Called(_a0, _a1)
@ -1961,6 +2016,61 @@ func (_c *MockDataCoord_Import_Call) RunAndReturn(run func(context.Context, *dat
return _c
}
// ImportV2 provides a mock function with given fields: _a0, _a1
func (_m *MockDataCoord) ImportV2(_a0 context.Context, _a1 *internalpb.ImportRequestInternal) (*internalpb.ImportResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.ImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequestInternal) (*internalpb.ImportResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequestInternal) *internalpb.ImportResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ImportResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ImportRequestInternal) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoord_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockDataCoord_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.ImportRequestInternal
func (_e *MockDataCoord_Expecter) ImportV2(_a0 interface{}, _a1 interface{}) *MockDataCoord_ImportV2_Call {
return &MockDataCoord_ImportV2_Call{Call: _e.mock.On("ImportV2", _a0, _a1)}
}
func (_c *MockDataCoord_ImportV2_Call) Run(run func(_a0 context.Context, _a1 *internalpb.ImportRequestInternal)) *MockDataCoord_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.ImportRequestInternal))
})
return _c
}
func (_c *MockDataCoord_ImportV2_Call) Return(_a0 *internalpb.ImportResponse, _a1 error) *MockDataCoord_ImportV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoord_ImportV2_Call) RunAndReturn(run func(context.Context, *internalpb.ImportRequestInternal) (*internalpb.ImportResponse, error)) *MockDataCoord_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// Init provides a mock function with given fields:
func (_m *MockDataCoord) Init() error {
ret := _m.Called()
@ -2002,6 +2112,61 @@ func (_c *MockDataCoord_Init_Call) RunAndReturn(run func() error) *MockDataCoord
return _c
}
// ListImports provides a mock function with given fields: _a0, _a1
func (_m *MockDataCoord) ListImports(_a0 context.Context, _a1 *internalpb.ListImportsRequestInternal) (*internalpb.ListImportsResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.ListImportsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequestInternal) (*internalpb.ListImportsResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequestInternal) *internalpb.ListImportsResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ListImportsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ListImportsRequestInternal) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoord_ListImports_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImports'
type MockDataCoord_ListImports_Call struct {
*mock.Call
}
// ListImports is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.ListImportsRequestInternal
func (_e *MockDataCoord_Expecter) ListImports(_a0 interface{}, _a1 interface{}) *MockDataCoord_ListImports_Call {
return &MockDataCoord_ListImports_Call{Call: _e.mock.On("ListImports", _a0, _a1)}
}
func (_c *MockDataCoord_ListImports_Call) Run(run func(_a0 context.Context, _a1 *internalpb.ListImportsRequestInternal)) *MockDataCoord_ListImports_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.ListImportsRequestInternal))
})
return _c
}
func (_c *MockDataCoord_ListImports_Call) Return(_a0 *internalpb.ListImportsResponse, _a1 error) *MockDataCoord_ListImports_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoord_ListImports_Call) RunAndReturn(run func(context.Context, *internalpb.ListImportsRequestInternal) (*internalpb.ListImportsResponse, error)) *MockDataCoord_ListImports_Call {
_c.Call.Return(run)
return _c
}
// ManualCompaction provides a mock function with given fields: _a0, _a1
func (_m *MockDataCoord) ManualCompaction(_a0 context.Context, _a1 *milvuspb.ManualCompactionRequest) (*milvuspb.ManualCompactionResponse, error) {
ret := _m.Called(_a0, _a1)

File diff suppressed because it is too large Load Diff

View File

@ -1334,6 +1334,76 @@ func (_c *MockDataCoordClient_GetFlushedSegments_Call) RunAndReturn(run func(con
return _c
}
// GetImportProgress provides a mock function with given fields: ctx, in, opts
func (_m *MockDataCoordClient) GetImportProgress(ctx context.Context, in *internalpb.GetImportProgressRequest, opts ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.GetImportProgressResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) *internalpb.GetImportProgressResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.GetImportProgressResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoordClient_GetImportProgress_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetImportProgress'
type MockDataCoordClient_GetImportProgress_Call struct {
*mock.Call
}
// GetImportProgress is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.GetImportProgressRequest
// - opts ...grpc.CallOption
func (_e *MockDataCoordClient_Expecter) GetImportProgress(ctx interface{}, in interface{}, opts ...interface{}) *MockDataCoordClient_GetImportProgress_Call {
return &MockDataCoordClient_GetImportProgress_Call{Call: _e.mock.On("GetImportProgress",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockDataCoordClient_GetImportProgress_Call) Run(run func(ctx context.Context, in *internalpb.GetImportProgressRequest, opts ...grpc.CallOption)) *MockDataCoordClient_GetImportProgress_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.GetImportProgressRequest), variadicArgs...)
})
return _c
}
func (_c *MockDataCoordClient_GetImportProgress_Call) Return(_a0 *internalpb.GetImportProgressResponse, _a1 error) *MockDataCoordClient_GetImportProgress_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoordClient_GetImportProgress_Call) RunAndReturn(run func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error)) *MockDataCoordClient_GetImportProgress_Call {
_c.Call.Return(run)
return _c
}
// GetIndexBuildProgress provides a mock function with given fields: ctx, in, opts
func (_m *MockDataCoordClient) GetIndexBuildProgress(ctx context.Context, in *indexpb.GetIndexBuildProgressRequest, opts ...grpc.CallOption) (*indexpb.GetIndexBuildProgressResponse, error) {
_va := make([]interface{}, len(opts))
@ -2524,6 +2594,146 @@ func (_c *MockDataCoordClient_Import_Call) RunAndReturn(run func(context.Context
return _c
}
// ImportV2 provides a mock function with given fields: ctx, in, opts
func (_m *MockDataCoordClient) ImportV2(ctx context.Context, in *internalpb.ImportRequestInternal, opts ...grpc.CallOption) (*internalpb.ImportResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.ImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequestInternal, ...grpc.CallOption) (*internalpb.ImportResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequestInternal, ...grpc.CallOption) *internalpb.ImportResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ImportResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ImportRequestInternal, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoordClient_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockDataCoordClient_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.ImportRequestInternal
// - opts ...grpc.CallOption
func (_e *MockDataCoordClient_Expecter) ImportV2(ctx interface{}, in interface{}, opts ...interface{}) *MockDataCoordClient_ImportV2_Call {
return &MockDataCoordClient_ImportV2_Call{Call: _e.mock.On("ImportV2",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockDataCoordClient_ImportV2_Call) Run(run func(ctx context.Context, in *internalpb.ImportRequestInternal, opts ...grpc.CallOption)) *MockDataCoordClient_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.ImportRequestInternal), variadicArgs...)
})
return _c
}
func (_c *MockDataCoordClient_ImportV2_Call) Return(_a0 *internalpb.ImportResponse, _a1 error) *MockDataCoordClient_ImportV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoordClient_ImportV2_Call) RunAndReturn(run func(context.Context, *internalpb.ImportRequestInternal, ...grpc.CallOption) (*internalpb.ImportResponse, error)) *MockDataCoordClient_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// ListImports provides a mock function with given fields: ctx, in, opts
func (_m *MockDataCoordClient) ListImports(ctx context.Context, in *internalpb.ListImportsRequestInternal, opts ...grpc.CallOption) (*internalpb.ListImportsResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.ListImportsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequestInternal, ...grpc.CallOption) (*internalpb.ListImportsResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequestInternal, ...grpc.CallOption) *internalpb.ListImportsResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ListImportsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ListImportsRequestInternal, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockDataCoordClient_ListImports_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImports'
type MockDataCoordClient_ListImports_Call struct {
*mock.Call
}
// ListImports is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.ListImportsRequestInternal
// - opts ...grpc.CallOption
func (_e *MockDataCoordClient_Expecter) ListImports(ctx interface{}, in interface{}, opts ...interface{}) *MockDataCoordClient_ListImports_Call {
return &MockDataCoordClient_ListImports_Call{Call: _e.mock.On("ListImports",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockDataCoordClient_ListImports_Call) Run(run func(ctx context.Context, in *internalpb.ListImportsRequestInternal, opts ...grpc.CallOption)) *MockDataCoordClient_ListImports_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.ListImportsRequestInternal), variadicArgs...)
})
return _c
}
func (_c *MockDataCoordClient_ListImports_Call) Return(_a0 *internalpb.ListImportsResponse, _a1 error) *MockDataCoordClient_ListImports_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockDataCoordClient_ListImports_Call) RunAndReturn(run func(context.Context, *internalpb.ListImportsRequestInternal, ...grpc.CallOption) (*internalpb.ListImportsResponse, error)) *MockDataCoordClient_ListImports_Call {
_c.Call.Return(run)
return _c
}
// ManualCompaction provides a mock function with given fields: ctx, in, opts
func (_m *MockDataCoordClient) ManualCompaction(ctx context.Context, in *milvuspb.ManualCompactionRequest, opts ...grpc.CallOption) (*milvuspb.ManualCompactionResponse, error) {
_va := make([]interface{}, len(opts))

View File

@ -2220,6 +2220,61 @@ func (_c *MockProxy_GetFlushState_Call) RunAndReturn(run func(context.Context, *
return _c
}
// GetImportProgress provides a mock function with given fields: _a0, _a1
func (_m *MockProxy) GetImportProgress(_a0 context.Context, _a1 *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.GetImportProgressResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest) *internalpb.GetImportProgressResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.GetImportProgressResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.GetImportProgressRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxy_GetImportProgress_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetImportProgress'
type MockProxy_GetImportProgress_Call struct {
*mock.Call
}
// GetImportProgress is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.GetImportProgressRequest
func (_e *MockProxy_Expecter) GetImportProgress(_a0 interface{}, _a1 interface{}) *MockProxy_GetImportProgress_Call {
return &MockProxy_GetImportProgress_Call{Call: _e.mock.On("GetImportProgress", _a0, _a1)}
}
func (_c *MockProxy_GetImportProgress_Call) Run(run func(_a0 context.Context, _a1 *internalpb.GetImportProgressRequest)) *MockProxy_GetImportProgress_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.GetImportProgressRequest))
})
return _c
}
func (_c *MockProxy_GetImportProgress_Call) Return(_a0 *internalpb.GetImportProgressResponse, _a1 error) *MockProxy_GetImportProgress_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxy_GetImportProgress_Call) RunAndReturn(run func(context.Context, *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error)) *MockProxy_GetImportProgress_Call {
_c.Call.Return(run)
return _c
}
// GetImportState provides a mock function with given fields: _a0, _a1
func (_m *MockProxy) GetImportState(_a0 context.Context, _a1 *milvuspb.GetImportStateRequest) (*milvuspb.GetImportStateResponse, error) {
ret := _m.Called(_a0, _a1)
@ -3263,6 +3318,61 @@ func (_c *MockProxy_Import_Call) RunAndReturn(run func(context.Context, *milvusp
return _c
}
// ImportV2 provides a mock function with given fields: _a0, _a1
func (_m *MockProxy) ImportV2(_a0 context.Context, _a1 *internalpb.ImportRequest) (*internalpb.ImportResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.ImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequest) (*internalpb.ImportResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequest) *internalpb.ImportResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ImportResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ImportRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxy_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockProxy_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.ImportRequest
func (_e *MockProxy_Expecter) ImportV2(_a0 interface{}, _a1 interface{}) *MockProxy_ImportV2_Call {
return &MockProxy_ImportV2_Call{Call: _e.mock.On("ImportV2", _a0, _a1)}
}
func (_c *MockProxy_ImportV2_Call) Run(run func(_a0 context.Context, _a1 *internalpb.ImportRequest)) *MockProxy_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.ImportRequest))
})
return _c
}
func (_c *MockProxy_ImportV2_Call) Return(_a0 *internalpb.ImportResponse, _a1 error) *MockProxy_ImportV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxy_ImportV2_Call) RunAndReturn(run func(context.Context, *internalpb.ImportRequest) (*internalpb.ImportResponse, error)) *MockProxy_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// Init provides a mock function with given fields:
func (_m *MockProxy) Init() error {
ret := _m.Called()
@ -3744,6 +3854,61 @@ func (_c *MockProxy_ListImportTasks_Call) RunAndReturn(run func(context.Context,
return _c
}
// ListImports provides a mock function with given fields: _a0, _a1
func (_m *MockProxy) ListImports(_a0 context.Context, _a1 *internalpb.ListImportsRequest) (*internalpb.ListImportsResponse, error) {
ret := _m.Called(_a0, _a1)
var r0 *internalpb.ListImportsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequest) (*internalpb.ListImportsResponse, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequest) *internalpb.ListImportsResponse); ok {
r0 = rf(_a0, _a1)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ListImportsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ListImportsRequest) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxy_ListImports_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImports'
type MockProxy_ListImports_Call struct {
*mock.Call
}
// ListImports is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 *internalpb.ListImportsRequest
func (_e *MockProxy_Expecter) ListImports(_a0 interface{}, _a1 interface{}) *MockProxy_ListImports_Call {
return &MockProxy_ListImports_Call{Call: _e.mock.On("ListImports", _a0, _a1)}
}
func (_c *MockProxy_ListImports_Call) Run(run func(_a0 context.Context, _a1 *internalpb.ListImportsRequest)) *MockProxy_ListImports_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(*internalpb.ListImportsRequest))
})
return _c
}
func (_c *MockProxy_ListImports_Call) Return(_a0 *internalpb.ListImportsResponse, _a1 error) *MockProxy_ListImports_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxy_ListImports_Call) RunAndReturn(run func(context.Context, *internalpb.ListImportsRequest) (*internalpb.ListImportsResponse, error)) *MockProxy_ListImports_Call {
_c.Call.Return(run)
return _c
}
// ListIndexedSegment provides a mock function with given fields: _a0, _a1
func (_m *MockProxy) ListIndexedSegment(_a0 context.Context, _a1 *federpb.ListIndexedSegmentRequest) (*federpb.ListIndexedSegmentResponse, error) {
ret := _m.Called(_a0, _a1)

View File

@ -212,6 +212,76 @@ func (_c *MockProxyClient_GetDdChannel_Call) RunAndReturn(run func(context.Conte
return _c
}
// GetImportProgress provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) GetImportProgress(ctx context.Context, in *internalpb.GetImportProgressRequest, opts ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.GetImportProgressResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) *internalpb.GetImportProgressResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.GetImportProgressResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxyClient_GetImportProgress_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetImportProgress'
type MockProxyClient_GetImportProgress_Call struct {
*mock.Call
}
// GetImportProgress is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.GetImportProgressRequest
// - opts ...grpc.CallOption
func (_e *MockProxyClient_Expecter) GetImportProgress(ctx interface{}, in interface{}, opts ...interface{}) *MockProxyClient_GetImportProgress_Call {
return &MockProxyClient_GetImportProgress_Call{Call: _e.mock.On("GetImportProgress",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockProxyClient_GetImportProgress_Call) Run(run func(ctx context.Context, in *internalpb.GetImportProgressRequest, opts ...grpc.CallOption)) *MockProxyClient_GetImportProgress_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.GetImportProgressRequest), variadicArgs...)
})
return _c
}
func (_c *MockProxyClient_GetImportProgress_Call) Return(_a0 *internalpb.GetImportProgressResponse, _a1 error) *MockProxyClient_GetImportProgress_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxyClient_GetImportProgress_Call) RunAndReturn(run func(context.Context, *internalpb.GetImportProgressRequest, ...grpc.CallOption) (*internalpb.GetImportProgressResponse, error)) *MockProxyClient_GetImportProgress_Call {
_c.Call.Return(run)
return _c
}
// GetProxyMetrics provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) GetProxyMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest, opts ...grpc.CallOption) (*milvuspb.GetMetricsResponse, error) {
_va := make([]interface{}, len(opts))
@ -352,6 +422,76 @@ func (_c *MockProxyClient_GetStatisticsChannel_Call) RunAndReturn(run func(conte
return _c
}
// ImportV2 provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) ImportV2(ctx context.Context, in *internalpb.ImportRequest, opts ...grpc.CallOption) (*internalpb.ImportResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.ImportResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequest, ...grpc.CallOption) (*internalpb.ImportResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ImportRequest, ...grpc.CallOption) *internalpb.ImportResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ImportResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ImportRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxyClient_ImportV2_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ImportV2'
type MockProxyClient_ImportV2_Call struct {
*mock.Call
}
// ImportV2 is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.ImportRequest
// - opts ...grpc.CallOption
func (_e *MockProxyClient_Expecter) ImportV2(ctx interface{}, in interface{}, opts ...interface{}) *MockProxyClient_ImportV2_Call {
return &MockProxyClient_ImportV2_Call{Call: _e.mock.On("ImportV2",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockProxyClient_ImportV2_Call) Run(run func(ctx context.Context, in *internalpb.ImportRequest, opts ...grpc.CallOption)) *MockProxyClient_ImportV2_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.ImportRequest), variadicArgs...)
})
return _c
}
func (_c *MockProxyClient_ImportV2_Call) Return(_a0 *internalpb.ImportResponse, _a1 error) *MockProxyClient_ImportV2_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxyClient_ImportV2_Call) RunAndReturn(run func(context.Context, *internalpb.ImportRequest, ...grpc.CallOption) (*internalpb.ImportResponse, error)) *MockProxyClient_ImportV2_Call {
_c.Call.Return(run)
return _c
}
// InvalidateCollectionMetaCache provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) InvalidateCollectionMetaCache(ctx context.Context, in *proxypb.InvalidateCollMetaCacheRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))
@ -562,6 +702,76 @@ func (_c *MockProxyClient_ListClientInfos_Call) RunAndReturn(run func(context.Co
return _c
}
// ListImports provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) ListImports(ctx context.Context, in *internalpb.ListImportsRequest, opts ...grpc.CallOption) (*internalpb.ListImportsResponse, error) {
_va := make([]interface{}, len(opts))
for _i := range opts {
_va[_i] = opts[_i]
}
var _ca []interface{}
_ca = append(_ca, ctx, in)
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 *internalpb.ListImportsResponse
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequest, ...grpc.CallOption) (*internalpb.ListImportsResponse, error)); ok {
return rf(ctx, in, opts...)
}
if rf, ok := ret.Get(0).(func(context.Context, *internalpb.ListImportsRequest, ...grpc.CallOption) *internalpb.ListImportsResponse); ok {
r0 = rf(ctx, in, opts...)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*internalpb.ListImportsResponse)
}
}
if rf, ok := ret.Get(1).(func(context.Context, *internalpb.ListImportsRequest, ...grpc.CallOption) error); ok {
r1 = rf(ctx, in, opts...)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockProxyClient_ListImports_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListImports'
type MockProxyClient_ListImports_Call struct {
*mock.Call
}
// ListImports is a helper method to define mock.On call
// - ctx context.Context
// - in *internalpb.ListImportsRequest
// - opts ...grpc.CallOption
func (_e *MockProxyClient_Expecter) ListImports(ctx interface{}, in interface{}, opts ...interface{}) *MockProxyClient_ListImports_Call {
return &MockProxyClient_ListImports_Call{Call: _e.mock.On("ListImports",
append([]interface{}{ctx, in}, opts...)...)}
}
func (_c *MockProxyClient_ListImports_Call) Run(run func(ctx context.Context, in *internalpb.ListImportsRequest, opts ...grpc.CallOption)) *MockProxyClient_ListImports_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]grpc.CallOption, len(args)-2)
for i, a := range args[2:] {
if a != nil {
variadicArgs[i] = a.(grpc.CallOption)
}
}
run(args[0].(context.Context), args[1].(*internalpb.ListImportsRequest), variadicArgs...)
})
return _c
}
func (_c *MockProxyClient_ListImports_Call) Return(_a0 *internalpb.ListImportsResponse, _a1 error) *MockProxyClient_ListImports_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *MockProxyClient_ListImports_Call) RunAndReturn(run func(context.Context, *internalpb.ListImportsRequest, ...grpc.CallOption) (*internalpb.ListImportsResponse, error)) *MockProxyClient_ListImports_Call {
_c.Call.Return(run)
return _c
}
// RefreshPolicyInfoCache provides a mock function with given fields: ctx, in, opts
func (_m *MockProxyClient) RefreshPolicyInfoCache(ctx context.Context, in *proxypb.RefreshPolicyInfoCacheRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
_va := make([]interface{}, len(opts))

View File

@ -95,6 +95,11 @@ service DataCoord {
rpc ReportDataNodeTtMsgs(ReportDataNodeTtMsgsRequest) returns (common.Status) {}
rpc GcControl(GcControlRequest) returns(common.Status){}
// importV2
rpc ImportV2(internal.ImportRequestInternal) returns(internal.ImportResponse){}
rpc GetImportProgress(internal.GetImportProgressRequest) returns(internal.GetImportProgressResponse){}
rpc ListImports(internal.ListImportsRequestInternal) returns(internal.ListImportsResponse){}
}
service DataNode {
@ -774,7 +779,6 @@ message ImportRequestSegment {
int64 segmentID = 1;
int64 partitionID = 2;
string vchannel = 3;
int64 max_rows = 4;
}
message ImportRequest {
@ -782,12 +786,14 @@ message ImportRequest {
int64 jobID = 2;
int64 taskID = 3;
int64 collectionID = 4;
schema.CollectionSchema schema = 5;
repeated internal.ImportFile files = 6;
repeated common.KeyValuePair options = 7;
uint64 ts = 8;
autoIDRange autoID_range = 9;
repeated ImportRequestSegment request_segments = 10;
repeated int64 partitionIDs = 5;
repeated string vchannels = 6;
schema.CollectionSchema schema = 7;
repeated internal.ImportFile files = 8;
repeated common.KeyValuePair options = 9;
uint64 ts = 10;
autoIDRange autoID_range = 11;
repeated ImportRequestSegment request_segments = 12;
}
message QueryPreImportRequest {
@ -796,21 +802,23 @@ message QueryPreImportRequest {
int64 taskID = 3;
}
message PartitionRows {
map<int64, int64> partition_rows = 1;
message PartitionImportStats {
map<int64, int64> partition_rows = 1; // partitionID -> numRows
map<int64, int64> partition_data_size = 2; // partitionID -> dataSize
}
message ImportFileStats {
internal.ImportFile import_file = 1;
int64 file_size = 2;
int64 total_rows = 3;
map<string, PartitionRows> hashed_rows = 4;
int64 total_memory_size = 4;
map<string, PartitionImportStats> hashed_stats = 5; // channel -> PartitionImportStats
}
message QueryPreImportResponse {
common.Status status = 1;
int64 taskID = 2;
internal.ImportState state = 3;
ImportTaskStateV2 state = 3;
string reason = 4;
int64 slots = 5;
repeated ImportFileStats file_stats = 6;
@ -833,7 +841,7 @@ message ImportSegmentInfo {
message QueryImportResponse {
common.Status status = 1;
int64 taskID = 2;
internal.ImportState state = 3;
ImportTaskStateV2 state = 3;
string reason = 4;
int64 slots = 5;
repeated ImportSegmentInfo import_segments_info = 6;
@ -845,18 +853,37 @@ message DropImportRequest {
int64 taskID = 3;
}
message ImportJob {
int64 jobID = 1;
int64 dbID = 2;
int64 collectionID = 3;
repeated int64 partitionIDs = 4;
repeated string vchannels = 5;
schema.CollectionSchema schema = 6;
uint64 timeout_ts = 7;
uint64 cleanup_ts = 8;
internal.ImportJobState state = 9;
string reason = 10;
repeated internal.ImportFile files = 11;
repeated common.KeyValuePair options = 12;
}
enum ImportTaskStateV2 {
None = 0;
Pending = 1;
InProgress = 2;
Failed = 3;
Completed = 4;
}
message PreImportTask {
int64 jobID = 1;
int64 taskID = 2;
int64 collectionID = 3;
repeated int64 partitionIDs = 4;
repeated string vchannels = 5;
int64 nodeID = 6;
internal.ImportState state = 7;
ImportTaskStateV2 state = 7;
string reason = 8;
uint64 timeout_ts = 9;
repeated ImportFileStats file_stats = 10;
repeated common.KeyValuePair options = 11;
}
message ImportTaskV2 {
@ -865,11 +892,9 @@ message ImportTaskV2 {
int64 collectionID = 3;
repeated int64 segmentIDs = 4;
int64 nodeID = 5;
internal.ImportState state = 6;
ImportTaskStateV2 state = 6;
string reason = 7;
uint64 timeout_ts = 8;
repeated ImportFileStats file_stats = 9;
repeated common.KeyValuePair options = 10;
}
enum GcCommand {

View File

@ -262,26 +262,29 @@ message Rate {
double r = 2;
}
enum ImportState {
enum ImportJobState {
None = 0;
Pending = 1;
InProgress = 2;
Failed = 3;
Completed = 4;
PreImporting = 2;
Importing = 3;
Failed = 4;
Completed = 5;
}
message ImportFile {
int64 id = 1;
// A singular row-based file or multiple column-based files.
repeated string paths = 1;
repeated string paths = 2;
}
message ImportRequestInternal {
int64 collectionID = 1;
repeated int64 partitionIDs = 2;
repeated string channel_names = 3;
schema.CollectionSchema schema = 4;
repeated internal.ImportFile files = 5;
repeated common.KeyValuePair options = 6;
int64 dbID = 1;
int64 collectionID = 2;
repeated int64 partitionIDs = 3;
repeated string channel_names = 4;
schema.CollectionSchema schema = 5;
repeated ImportFile files = 6;
repeated common.KeyValuePair options = 7;
}
message ImportRequest {
@ -304,11 +307,16 @@ message GetImportProgressRequest {
message GetImportProgressResponse {
common.Status status = 1;
ImportState state = 2;
ImportJobState state = 2;
string reason = 3;
int64 progress = 4;
}
message ListImportsRequestInternal {
int64 dbID = 1;
int64 collectionID = 2;
}
message ListImportsRequest {
string db_name = 1;
string collection_name = 2;
@ -317,7 +325,7 @@ message ListImportsRequest {
message ListImportsResponse {
common.Status status = 1;
repeated string jobIDs = 2;
repeated ImportState states = 3;
repeated ImportJobState states = 3;
repeated string reasons = 4;
repeated int64 progresses = 5;
}

View File

@ -22,6 +22,11 @@ service Proxy {
rpc SetRates(SetRatesRequest) returns (common.Status) {}
rpc ListClientInfos(ListClientInfosRequest) returns (ListClientInfosResponse) {}
// importV2
rpc ImportV2(internal.ImportRequest) returns(internal.ImportResponse){}
rpc GetImportProgress(internal.GetImportProgressRequest) returns(internal.GetImportProgressResponse){}
rpc ListImports(internal.ListImportsRequest) returns(internal.ListImportsResponse){}
}
message InvalidateCollMetaCacheRequest {

View File

@ -43,6 +43,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proxy/connection"
"github.com/milvus-io/milvus/internal/util/importutil"
"github.com/milvus-io/milvus/internal/util/importutilv2"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
@ -5550,3 +5551,110 @@ func (node *Proxy) GetVersion(ctx context.Context, request *milvuspb.GetVersionR
Status: merr.Success(),
}, nil
}
func (node *Proxy) ImportV2(ctx context.Context, req *internalpb.ImportRequest) (*internalpb.ImportResponse, error) {
if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return &internalpb.ImportResponse{Status: merr.Status(err)}, nil
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
method := "ImportV2"
log.Info(rpcReceived(method))
resp := &internalpb.ImportResponse{
Status: merr.Success(),
}
collectionID, err := globalMetaCache.GetCollectionID(ctx, req.GetDbName(), req.GetCollectionName())
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
schema, err := globalMetaCache.GetCollectionSchema(ctx, req.GetDbName(), req.GetCollectionName())
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
channels, err := node.chMgr.getVChannels(collectionID)
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
hasPartitionKey := typeutil.HasPartitionKey(schema.CollectionSchema)
if req.GetPartitionName() != "" && hasPartitionKey {
resp.Status = merr.Status(merr.WrapErrImportFailed("not allow to set partition name for collection with partition key"))
return resp, nil
}
var partitionIDs []int64
if req.GetPartitionName() == "" && hasPartitionKey {
partitions, err := globalMetaCache.GetPartitions(ctx, req.GetDbName(), req.GetCollectionName())
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
partitionIDs = lo.Values(partitions)
} else {
partitionName := req.GetPartitionName()
if req.GetPartitionName() == "" {
partitionName = Params.CommonCfg.DefaultPartitionName.GetValue()
}
partitionID, err := globalMetaCache.GetPartitionID(ctx, req.GetDbName(), req.GetCollectionName(), partitionName)
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
partitionIDs = []UniqueID{partitionID}
}
req.Files = lo.Filter(req.GetFiles(), func(file *internalpb.ImportFile, _ int) bool {
return len(file.GetPaths()) > 0
})
if len(req.Files) == 0 {
resp.Status = merr.Status(merr.WrapErrParameterInvalidMsg("import request is empty"))
return resp, nil
}
for _, file := range req.GetFiles() {
_, err = importutilv2.GetFileType(file)
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
}
importRequest := &internalpb.ImportRequestInternal{
CollectionID: collectionID,
PartitionIDs: partitionIDs,
ChannelNames: channels,
Schema: schema.CollectionSchema,
Files: req.GetFiles(),
Options: req.GetOptions(),
}
return node.dataCoord.ImportV2(ctx, importRequest)
}
func (node *Proxy) GetImportProgress(ctx context.Context, req *internalpb.GetImportProgressRequest) (*internalpb.GetImportProgressResponse, error) {
if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return &internalpb.GetImportProgressResponse{
Status: merr.Status(err),
}, nil
}
return node.dataCoord.GetImportProgress(ctx, req)
}
func (node *Proxy) ListImports(ctx context.Context, req *internalpb.ListImportsRequest) (*internalpb.ListImportsResponse, error) {
if err := merr.CheckHealthy(node.GetStateCode()); err != nil {
return &internalpb.ListImportsResponse{
Status: merr.Status(err),
}, nil
}
resp := &internalpb.ListImportsResponse{
Status: merr.Success(),
}
collectionID, err := globalMetaCache.GetCollectionID(ctx, req.GetDbName(), req.GetCollectionName())
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
return node.dataCoord.ListImports(ctx, &internalpb.ListImportsRequestInternal{
CollectionID: collectionID,
})
}

View File

@ -37,6 +37,7 @@ import (
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/proxypb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
@ -1457,3 +1458,165 @@ func TestProxy_ReplicateMessage(t *testing.T) {
}
})
}
func TestProxy_ImportV2(t *testing.T) {
ctx := context.Background()
mockErr := errors.New("mock error")
cache := globalMetaCache
defer func() { globalMetaCache = cache }()
t.Run("ImportV2", func(t *testing.T) {
// server is not healthy
node := &Proxy{}
node.UpdateStateCode(commonpb.StateCode_Abnormal)
rsp, err := node.ImportV2(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
node.UpdateStateCode(commonpb.StateCode_Healthy)
// no such collection
mc := NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, mockErr)
globalMetaCache = mc
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// get schema failed
mc = NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
mc.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything, mock.Anything).Return(nil, mockErr)
globalMetaCache = mc
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// get channel failed
mc = NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
mc.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything, mock.Anything).Return(&schemaInfo{
CollectionSchema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{
{IsPartitionKey: true},
}},
}, nil)
globalMetaCache = mc
chMgr := NewMockChannelsMgr(t)
chMgr.EXPECT().getVChannels(mock.Anything).Return(nil, mockErr)
node.chMgr = chMgr
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// set partition name and with partition key
chMgr = NewMockChannelsMgr(t)
chMgr.EXPECT().getVChannels(mock.Anything).Return([]string{"ch0"}, nil)
node.chMgr = chMgr
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa", PartitionName: "bbb"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// get partitions failed
mc = NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
mc.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything, mock.Anything).Return(&schemaInfo{
CollectionSchema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{
{IsPartitionKey: true},
}},
}, nil)
mc.EXPECT().GetPartitions(mock.Anything, mock.Anything, mock.Anything).Return(nil, mockErr)
globalMetaCache = mc
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// get partitionID failed
mc = NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
mc.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything, mock.Anything).Return(&schemaInfo{
CollectionSchema: &schemapb.CollectionSchema{},
}, nil)
mc.EXPECT().GetPartitionID(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(0, mockErr)
globalMetaCache = mc
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa", PartitionName: "bbb"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// no file
mc = NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
mc.EXPECT().GetCollectionSchema(mock.Anything, mock.Anything, mock.Anything).Return(&schemaInfo{
CollectionSchema: &schemapb.CollectionSchema{},
}, nil)
mc.EXPECT().GetPartitionID(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
globalMetaCache = mc
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{CollectionName: "aaa", PartitionName: "bbb"})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// illegal file type
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{
CollectionName: "aaa",
PartitionName: "bbb",
Files: []*internalpb.ImportFile{{
Id: 1,
Paths: []string{"a.cpp"},
}},
})
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
// normal case
dataCoord := mocks.NewMockDataCoordClient(t)
dataCoord.EXPECT().ImportV2(mock.Anything, mock.Anything).Return(nil, nil)
node.dataCoord = dataCoord
rsp, err = node.ImportV2(ctx, &internalpb.ImportRequest{
CollectionName: "aaa",
Files: []*internalpb.ImportFile{{
Id: 1,
Paths: []string{"a.json"},
}},
})
assert.NoError(t, err)
assert.Equal(t, int32(0), rsp.GetStatus().GetCode())
})
t.Run("GetImportProgress", func(t *testing.T) {
// server is not healthy
node := &Proxy{}
node.UpdateStateCode(commonpb.StateCode_Abnormal)
rsp, err := node.GetImportProgress(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
node.UpdateStateCode(commonpb.StateCode_Healthy)
// normal case
dataCoord := mocks.NewMockDataCoordClient(t)
dataCoord.EXPECT().GetImportProgress(mock.Anything, mock.Anything).Return(nil, nil)
node.dataCoord = dataCoord
rsp, err = node.GetImportProgress(ctx, &internalpb.GetImportProgressRequest{})
assert.NoError(t, err)
assert.Equal(t, int32(0), rsp.GetStatus().GetCode())
})
t.Run("ListImports", func(t *testing.T) {
// server is not healthy
node := &Proxy{}
node.UpdateStateCode(commonpb.StateCode_Abnormal)
rsp, err := node.ListImports(ctx, nil)
assert.NoError(t, err)
assert.NotEqual(t, int32(0), rsp.GetStatus().GetCode())
node.UpdateStateCode(commonpb.StateCode_Healthy)
// normal case
mc := NewMockCache(t)
mc.EXPECT().GetCollectionID(mock.Anything, mock.Anything, mock.Anything).Return(0, nil)
globalMetaCache = mc
dataCoord := mocks.NewMockDataCoordClient(t)
dataCoord.EXPECT().ListImports(mock.Anything, mock.Anything).Return(nil, nil)
node.dataCoord = dataCoord
rsp, err = node.ListImports(ctx, &internalpb.ListImportsRequest{})
assert.NoError(t, err)
assert.Equal(t, int32(0), rsp.GetStatus().GetCode())
})
}

View File

@ -107,7 +107,7 @@ func (i *InsertData) Append(row map[FieldID]interface{}) error {
}
if err := field.AppendRow(v); err != nil {
return err
return merr.WrapErrParameterInvalidMsg(fmt.Sprintf("append data for field %d failed, err=%s", fID, err.Error()))
}
}
@ -122,11 +122,20 @@ func (i *InsertData) GetRow(idx int) map[FieldID]interface{} {
return res
}
func (i *InsertData) GetRowSize(idx int) int {
size := 0
for _, data := range i.Data {
size += data.GetRowSize(idx)
}
return size
}
// FieldData defines field data interface
type FieldData interface {
GetMemorySize() int
RowNum() int
GetRow(i int) any
GetRowSize(i int) int
GetRows() any
AppendRow(row interface{}) error
AppendRows(rows interface{}) error
@ -689,3 +698,38 @@ func (data *JSONFieldData) GetMemorySize() int {
}
return size
}
func (data *BoolFieldData) GetRowSize(i int) int { return 1 }
func (data *Int8FieldData) GetRowSize(i int) int { return 1 }
func (data *Int16FieldData) GetRowSize(i int) int { return 2 }
func (data *Int32FieldData) GetRowSize(i int) int { return 4 }
func (data *Int64FieldData) GetRowSize(i int) int { return 8 }
func (data *FloatFieldData) GetRowSize(i int) int { return 4 }
func (data *DoubleFieldData) GetRowSize(i int) int { return 8 }
func (data *BinaryVectorFieldData) GetRowSize(i int) int { return data.Dim / 8 }
func (data *FloatVectorFieldData) GetRowSize(i int) int { return data.Dim }
func (data *Float16VectorFieldData) GetRowSize(i int) int { return data.Dim / 2 }
func (data *BFloat16VectorFieldData) GetRowSize(i int) int { return data.Dim / 2 }
func (data *StringFieldData) GetRowSize(i int) int { return len(data.Data[i]) + 16 }
func (data *JSONFieldData) GetRowSize(i int) int { return len(data.Data[i]) + 16 }
func (data *ArrayFieldData) GetRowSize(i int) int {
switch data.ElementType {
case schemapb.DataType_Bool:
return binary.Size(data.Data[i].GetBoolData().GetData())
case schemapb.DataType_Int8:
return binary.Size(data.Data[i].GetIntData().GetData()) / 4
case schemapb.DataType_Int16:
return binary.Size(data.Data[i].GetIntData().GetData()) / 2
case schemapb.DataType_Int32:
return binary.Size(data.Data[i].GetIntData().GetData())
case schemapb.DataType_Int64:
return binary.Size(data.Data[i].GetLongData().GetData())
case schemapb.DataType_Float:
return binary.Size(data.Data[i].GetFloatData().GetData())
case schemapb.DataType_Double:
return binary.Size(data.Data[i].GetDoubleData().GetData())
case schemapb.DataType_String, schemapb.DataType_VarChar:
return (&StringFieldData{Data: data.Data[i].GetStringData().GetData()}).GetMemorySize()
}
return 0
}

View File

@ -151,6 +151,25 @@ func (s *InsertDataSuite) TestMemorySize() {
s.Equal(s.iDataTwoRows.Data[BFloat16VectorField].GetMemorySize(), 20)
}
func (s *InsertDataSuite) TestGetRowSize() {
s.Equal(s.iDataOneRow.Data[RowIDField].GetRowSize(0), 8)
s.Equal(s.iDataOneRow.Data[TimestampField].GetRowSize(0), 8)
s.Equal(s.iDataOneRow.Data[BoolField].GetRowSize(0), 1)
s.Equal(s.iDataOneRow.Data[Int8Field].GetRowSize(0), 1)
s.Equal(s.iDataOneRow.Data[Int16Field].GetRowSize(0), 2)
s.Equal(s.iDataOneRow.Data[Int32Field].GetRowSize(0), 4)
s.Equal(s.iDataOneRow.Data[Int64Field].GetRowSize(0), 8)
s.Equal(s.iDataOneRow.Data[FloatField].GetRowSize(0), 4)
s.Equal(s.iDataOneRow.Data[DoubleField].GetRowSize(0), 8)
s.Equal(s.iDataOneRow.Data[StringField].GetRowSize(0), 19)
s.Equal(s.iDataOneRow.Data[JSONField].GetRowSize(0), len([]byte(`{"batch":1}`))+16)
s.Equal(s.iDataOneRow.Data[ArrayField].GetRowSize(0), 3*4)
s.Equal(s.iDataOneRow.Data[BinaryVectorField].GetRowSize(0), 1)
s.Equal(s.iDataOneRow.Data[FloatVectorField].GetRowSize(0), 4)
s.Equal(s.iDataOneRow.Data[Float16VectorField].GetRowSize(0), 2)
s.Equal(s.iDataOneRow.Data[BFloat16VectorField].GetRowSize(0), 2)
}
func (s *InsertDataSuite) TestGetDataType() {
for _, field := range s.schema.GetFields() {
fieldData, ok := s.iDataOneRow.Data[field.GetFieldID()]
@ -330,4 +349,5 @@ func (s *ArrayFieldDataSuite) TestArrayFieldData() {
s.Equal(1, insertData.GetRowNum())
s.Equal(114, insertData.GetMemorySize())
s.False(insertData.IsEmpty())
s.Equal(114, insertData.GetRowSize(0))
}

View File

@ -268,6 +268,14 @@ var (
Name: "index_node_num",
Help: "number of IndexNodes managed by IndexCoord",
}, []string{})
ImportTasks = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.DataCoordRole,
Name: "import_tasks",
Help: "the import tasks grouping by type and state",
}, []string{"task_type", "import_state"})
)
// RegisterDataCoord registers DataCoord metrics
@ -290,6 +298,7 @@ func RegisterDataCoord(registry *prometheus.Registry) {
registry.MustRegister(IndexRequestCounter)
registry.MustRegister(IndexTaskNum)
registry.MustRegister(IndexNodeNum)
registry.MustRegister(ImportTasks)
}
func CleanupDataCoordSegmentMetrics(collectionID int64, segmentID int64) {

View File

@ -2519,6 +2519,14 @@ type dataCoordConfig struct {
AutoBalance ParamItem `refreshable:"true"`
CheckAutoBalanceConfigInterval ParamItem `refreshable:"false"`
// import
FilesPerPreImportTask ParamItem `refreshable:"true"`
ImportTaskRetention ParamItem `refreshable:"true"`
MaxSizeInMBPerImportTask ParamItem `refreshable:"true"`
ImportScheduleInterval ParamItem `refreshable:"true"`
ImportCheckIntervalHigh ParamItem `refreshable:"true"`
ImportCheckIntervalLow ParamItem `refreshable:"true"`
GracefulStopTimeout ParamItem `refreshable:"true"`
}
@ -2975,6 +2983,67 @@ During compaction, the size of segment # of rows is able to exceed segment max #
}
p.AutoUpgradeSegmentIndex.Init(base.mgr)
p.FilesPerPreImportTask = ParamItem{
Key: "dataCoord.import.filesPerPreImportTask",
Version: "2.4.0",
Doc: "The maximum number of files allowed per pre-import task.",
DefaultValue: "2",
PanicIfEmpty: false,
Export: true,
}
p.FilesPerPreImportTask.Init(base.mgr)
p.ImportTaskRetention = ParamItem{
Key: "dataCoord.import.taskRetention",
Version: "2.4.0",
Doc: "The retention period in seconds for tasks in the Completed or Failed state.",
DefaultValue: "10800",
PanicIfEmpty: false,
Export: true,
}
p.ImportTaskRetention.Init(base.mgr)
p.MaxSizeInMBPerImportTask = ParamItem{
Key: "dataCoord.import.maxSizeInMBPerImportTask",
Version: "2.4.0",
Doc: "To prevent generating of small segments, we will re-group imported files. " +
"This parameter represents the sum of file sizes in each group (each ImportTask).",
DefaultValue: "6144",
PanicIfEmpty: false,
Export: true,
}
p.MaxSizeInMBPerImportTask.Init(base.mgr)
p.ImportScheduleInterval = ParamItem{
Key: "dataCoord.import.scheduleInterval",
Version: "2.4.0",
Doc: "The interval for scheduling import, measured in seconds.",
DefaultValue: "2",
PanicIfEmpty: false,
Export: true,
}
p.ImportScheduleInterval.Init(base.mgr)
p.ImportCheckIntervalHigh = ParamItem{
Key: "dataCoord.import.checkIntervalHigh",
Version: "2.4.0",
Doc: "The interval for checking import, measured in seconds, is set to a high frequency for the import checker.",
DefaultValue: "2",
PanicIfEmpty: false,
Export: true,
}
p.ImportCheckIntervalHigh.Init(base.mgr)
p.ImportCheckIntervalLow = ParamItem{
Key: "dataCoord.import.checkIntervalLow",
Version: "2.4.0",
Doc: "The interval for checking import, measured in seconds, is set to a low frequency for the import checker.",
DefaultValue: "120",
PanicIfEmpty: false,
Export: true,
}
p.ImportCheckIntervalLow.Init(base.mgr)
p.GracefulStopTimeout = ParamItem{
Key: "dataCoord.gracefulStopTimeout",
Version: "2.3.7",

View File

@ -377,6 +377,12 @@ func TestComponentParam(t *testing.T) {
assert.Equal(t, true, Params.AutoBalance.GetAsBool())
assert.Equal(t, 10, Params.CheckAutoBalanceConfigInterval.GetAsInt())
assert.Equal(t, false, Params.AutoUpgradeSegmentIndex.GetAsBool())
assert.Equal(t, 2, Params.FilesPerPreImportTask.GetAsInt())
assert.Equal(t, 10800*time.Second, Params.ImportTaskRetention.GetAsDuration(time.Second))
assert.Equal(t, 6144, Params.MaxSizeInMBPerImportTask.GetAsInt())
assert.Equal(t, 2*time.Second, Params.ImportScheduleInterval.GetAsDuration(time.Second))
assert.Equal(t, 2*time.Second, Params.ImportCheckIntervalHigh.GetAsDuration(time.Second))
assert.Equal(t, 120*time.Second, Params.ImportCheckIntervalLow.GetAsDuration(time.Second))
params.Save("datacoord.gracefulStopTimeout", "100")
assert.Equal(t, 100*time.Second, Params.GracefulStopTimeout.GetAsDuration(time.Second))

View File

@ -0,0 +1,259 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importv2
import (
"context"
"fmt"
"math/rand"
"os"
"testing"
"time"
"github.com/golang/protobuf/proto"
"github.com/stretchr/testify/suite"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/util/importutilv2"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/metric"
"github.com/milvus-io/milvus/tests/integration"
)
type BulkInsertSuite struct {
integration.MiniClusterSuite
pkType schemapb.DataType
autoID bool
fileType importutilv2.FileType
}
func (s *BulkInsertSuite) SetupTest() {
s.MiniClusterSuite.SetupTest()
s.fileType = importutilv2.Parquet
s.pkType = schemapb.DataType_Int64
s.autoID = false
}
func (s *BulkInsertSuite) run() {
const (
rowCount = 100
)
c := s.Cluster
ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second)
defer cancel()
collectionName := "TestBulkInsert" + funcutil.GenRandomStr()
schema := integration.ConstructSchema(collectionName, dim, s.autoID,
&schemapb.FieldSchema{FieldID: 100, Name: "id", DataType: s.pkType, TypeParams: []*commonpb.KeyValuePair{{Key: common.MaxLengthKey, Value: "128"}}, IsPrimaryKey: true, AutoID: s.autoID},
&schemapb.FieldSchema{FieldID: 101, Name: "image_path", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{{Key: common.MaxLengthKey, Value: "65535"}}},
&schemapb.FieldSchema{FieldID: 102, Name: "embeddings", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)
createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
CollectionName: collectionName,
Schema: marshaledSchema,
ShardsNum: common.DefaultShardsNum,
})
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, createCollectionStatus.GetErrorCode())
var files []*internalpb.ImportFile
err = os.MkdirAll(c.ChunkManager.RootPath(), os.ModePerm)
s.NoError(err)
if s.fileType == importutilv2.Numpy {
importFile, err := GenerateNumpyFiles(c.ChunkManager, schema, rowCount)
s.NoError(err)
files = []*internalpb.ImportFile{importFile}
} else if s.fileType == importutilv2.JSON {
rowBasedFile := c.ChunkManager.RootPath() + "/" + "test.json"
GenerateJSONFile(s.T(), rowBasedFile, schema, rowCount)
defer os.Remove(rowBasedFile)
files = []*internalpb.ImportFile{
{
Paths: []string{
rowBasedFile,
},
},
}
} else if s.fileType == importutilv2.Parquet {
filePath := fmt.Sprintf("/tmp/test_%d.parquet", rand.Int())
err = GenerateParquetFile(filePath, schema, rowCount)
s.NoError(err)
defer os.Remove(filePath)
files = []*internalpb.ImportFile{
{
Paths: []string{
filePath,
},
},
}
}
importResp, err := c.Proxy.ImportV2(ctx, &internalpb.ImportRequest{
CollectionName: collectionName,
Files: files,
})
s.NoError(err)
s.Equal(int32(0), importResp.GetStatus().GetCode())
log.Info("Import result", zap.Any("importResp", importResp))
jobID := importResp.GetJobID()
err = WaitForImportDone(ctx, c, jobID)
s.NoError(err)
segments, err := c.MetaWatcher.ShowSegments()
s.NoError(err)
s.NotEmpty(segments)
// create index
createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName,
FieldName: "embeddings",
IndexName: "_default",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexHNSW, metric.L2),
})
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, createIndexStatus.GetErrorCode())
s.WaitForIndexBuilt(ctx, collectionName, "embeddings")
// load
loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
CollectionName: collectionName,
})
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode())
s.WaitForLoad(ctx, collectionName)
// search
expr := ""
nq := 10
topk := 10
roundDecimal := -1
params := integration.GetSearchParams(integration.IndexHNSW, metric.L2)
searchReq := integration.ConstructSearchRequest("", collectionName, expr,
"embeddings", schemapb.DataType_FloatVector, nil, metric.L2, params, nq, dim, topk, roundDecimal)
searchResult, err := c.Proxy.Search(ctx, searchReq)
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, searchResult.GetStatus().GetErrorCode())
}
func (s *BulkInsertSuite) TestNumpy() {
s.fileType = importutilv2.Numpy
s.run()
}
func (s *BulkInsertSuite) TestJSON() {
s.fileType = importutilv2.JSON
s.run()
}
func (s *BulkInsertSuite) TestParquet() {
s.fileType = importutilv2.Parquet
s.run()
}
func (s *BulkInsertSuite) TestAutoID() {
s.pkType = schemapb.DataType_Int64
s.autoID = true
s.run()
s.pkType = schemapb.DataType_VarChar
s.autoID = true
s.run()
}
func (s *BulkInsertSuite) TestPK() {
s.pkType = schemapb.DataType_Int64
s.run()
s.pkType = schemapb.DataType_VarChar
s.run()
}
func (s *BulkInsertSuite) TestZeroRowCount() {
const (
rowCount = 0
)
c := s.Cluster
ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second)
defer cancel()
collectionName := "TestBulkInsert_" + funcutil.GenRandomStr()
schema := integration.ConstructSchema(collectionName, dim, true,
&schemapb.FieldSchema{FieldID: 100, Name: "id", DataType: schemapb.DataType_Int64, IsPrimaryKey: true, AutoID: true},
&schemapb.FieldSchema{FieldID: 101, Name: "image_path", DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{{Key: common.MaxLengthKey, Value: "65535"}}},
&schemapb.FieldSchema{FieldID: 102, Name: "embeddings", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)
createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
CollectionName: collectionName,
Schema: marshaledSchema,
ShardsNum: common.DefaultShardsNum,
})
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, createCollectionStatus.GetErrorCode())
var files []*internalpb.ImportFile
filePath := fmt.Sprintf("/tmp/test_%d.parquet", rand.Int())
err = GenerateParquetFile(filePath, schema, rowCount)
s.NoError(err)
defer os.Remove(filePath)
files = []*internalpb.ImportFile{
{
Paths: []string{
filePath,
},
},
}
importResp, err := c.Proxy.ImportV2(ctx, &internalpb.ImportRequest{
CollectionName: collectionName,
Files: files,
})
s.NoError(err)
log.Info("Import result", zap.Any("importResp", importResp))
jobID := importResp.GetJobID()
err = WaitForImportDone(ctx, c, jobID)
s.NoError(err)
segments, err := c.MetaWatcher.ShowSegments()
s.NoError(err)
s.Empty(segments)
}
func TestBulkInsert(t *testing.T) {
suite.Run(t, new(BulkInsertSuite))
}

View File

@ -0,0 +1,682 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importv2
import (
"context"
rand2 "crypto/rand"
"encoding/json"
"fmt"
"math/rand"
"os"
"strconv"
"testing"
"time"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/apache/arrow/go/v12/parquet"
"github.com/apache/arrow/go/v12/parquet/pqarrow"
"github.com/samber/lo"
"github.com/sbinet/npyio"
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
"github.com/milvus-io/milvus/tests/integration"
)
const dim = 128
func createInsertData(t *testing.T, schema *schemapb.CollectionSchema, rowCount int) *storage.InsertData {
insertData, err := storage.NewInsertData(schema)
assert.NoError(t, err)
for _, field := range schema.GetFields() {
if field.GetAutoID() {
continue
}
switch field.GetDataType() {
case schemapb.DataType_Bool:
boolData := make([]bool, 0)
for i := 0; i < rowCount; i++ {
boolData = append(boolData, i%3 != 0)
}
insertData.Data[field.GetFieldID()] = &storage.BoolFieldData{Data: boolData}
case schemapb.DataType_Float:
floatData := make([]float32, 0)
for i := 0; i < rowCount; i++ {
floatData = append(floatData, float32(i/2))
}
insertData.Data[field.GetFieldID()] = &storage.FloatFieldData{Data: floatData}
case schemapb.DataType_Double:
doubleData := make([]float64, 0)
for i := 0; i < rowCount; i++ {
doubleData = append(doubleData, float64(i/5))
}
insertData.Data[field.GetFieldID()] = &storage.DoubleFieldData{Data: doubleData}
case schemapb.DataType_Int8:
int8Data := make([]int8, 0)
for i := 0; i < rowCount; i++ {
int8Data = append(int8Data, int8(i%256))
}
insertData.Data[field.GetFieldID()] = &storage.Int8FieldData{Data: int8Data}
case schemapb.DataType_Int16:
int16Data := make([]int16, 0)
for i := 0; i < rowCount; i++ {
int16Data = append(int16Data, int16(i%65536))
}
insertData.Data[field.GetFieldID()] = &storage.Int16FieldData{Data: int16Data}
case schemapb.DataType_Int32:
int32Data := make([]int32, 0)
for i := 0; i < rowCount; i++ {
int32Data = append(int32Data, int32(i%1000))
}
insertData.Data[field.GetFieldID()] = &storage.Int32FieldData{Data: int32Data}
case schemapb.DataType_Int64:
int64Data := make([]int64, 0)
for i := 0; i < rowCount; i++ {
int64Data = append(int64Data, int64(i))
}
insertData.Data[field.GetFieldID()] = &storage.Int64FieldData{Data: int64Data}
case schemapb.DataType_BinaryVector:
dim, err := typeutil.GetDim(field)
assert.NoError(t, err)
binVecData := make([]byte, 0)
total := rowCount * int(dim) / 8
for i := 0; i < total; i++ {
binVecData = append(binVecData, byte(i%256))
}
insertData.Data[field.GetFieldID()] = &storage.BinaryVectorFieldData{Data: binVecData, Dim: int(dim)}
case schemapb.DataType_FloatVector:
dim, err := typeutil.GetDim(field)
assert.NoError(t, err)
floatVecData := make([]float32, 0)
total := rowCount * int(dim)
for i := 0; i < total; i++ {
floatVecData = append(floatVecData, rand.Float32())
}
insertData.Data[field.GetFieldID()] = &storage.FloatVectorFieldData{Data: floatVecData, Dim: int(dim)}
case schemapb.DataType_Float16Vector:
dim, err := typeutil.GetDim(field)
assert.NoError(t, err)
total := int64(rowCount) * dim * 2
float16VecData := make([]byte, total)
_, err = rand2.Read(float16VecData)
assert.NoError(t, err)
insertData.Data[field.GetFieldID()] = &storage.Float16VectorFieldData{Data: float16VecData, Dim: int(dim)}
case schemapb.DataType_String, schemapb.DataType_VarChar:
varcharData := make([]string, 0)
for i := 0; i < rowCount; i++ {
varcharData = append(varcharData, strconv.Itoa(i))
}
insertData.Data[field.GetFieldID()] = &storage.StringFieldData{Data: varcharData}
case schemapb.DataType_JSON:
jsonData := make([][]byte, 0)
for i := 0; i < rowCount; i++ {
jsonData = append(jsonData, []byte(fmt.Sprintf("{\"y\": %d}", i)))
}
insertData.Data[field.GetFieldID()] = &storage.JSONFieldData{Data: jsonData}
case schemapb.DataType_Array:
arrayData := make([]*schemapb.ScalarField, 0)
for i := 0; i < rowCount; i++ {
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{int32(i), int32(i + 1), int32(i + 2)},
},
},
})
}
insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData}
default:
panic(fmt.Sprintf("unexpected data type: %s", field.GetDataType().String()))
}
}
return insertData
}
func milvusDataTypeToArrowType(dataType schemapb.DataType, isBinary bool) arrow.DataType {
switch dataType {
case schemapb.DataType_Bool:
return &arrow.BooleanType{}
case schemapb.DataType_Int8:
return &arrow.Int8Type{}
case schemapb.DataType_Int16:
return &arrow.Int16Type{}
case schemapb.DataType_Int32:
return &arrow.Int32Type{}
case schemapb.DataType_Int64:
return &arrow.Int64Type{}
case schemapb.DataType_Float:
return &arrow.Float32Type{}
case schemapb.DataType_Double:
return &arrow.Float64Type{}
case schemapb.DataType_VarChar, schemapb.DataType_String:
return &arrow.StringType{}
case schemapb.DataType_Array:
return &arrow.ListType{}
case schemapb.DataType_JSON:
return &arrow.StringType{}
case schemapb.DataType_FloatVector:
return arrow.ListOfField(arrow.Field{
Name: "item",
Type: &arrow.Float32Type{},
Nullable: true,
Metadata: arrow.Metadata{},
})
case schemapb.DataType_BinaryVector:
if isBinary {
return &arrow.BinaryType{}
}
return arrow.ListOfField(arrow.Field{
Name: "item",
Type: &arrow.Uint8Type{},
Nullable: true,
Metadata: arrow.Metadata{},
})
case schemapb.DataType_Float16Vector:
return arrow.ListOfField(arrow.Field{
Name: "item",
Type: &arrow.Float16Type{},
Nullable: true,
Metadata: arrow.Metadata{},
})
default:
panic("unsupported data type")
}
}
func convertMilvusSchemaToArrowSchema(schema *schemapb.CollectionSchema) *arrow.Schema {
fields := make([]arrow.Field, 0)
for _, field := range schema.GetFields() {
if field.GetIsPrimaryKey() && field.GetAutoID() {
continue
}
if field.GetDataType() == schemapb.DataType_Array {
fields = append(fields, arrow.Field{
Name: field.GetName(),
Type: arrow.ListOfField(arrow.Field{
Name: "item",
Type: milvusDataTypeToArrowType(field.GetElementType(), false),
Nullable: true,
Metadata: arrow.Metadata{},
}),
Nullable: true,
Metadata: arrow.Metadata{},
})
continue
}
fields = append(fields, arrow.Field{
Name: field.GetName(),
Type: milvusDataTypeToArrowType(field.GetDataType(), field.Name == "FieldBinaryVector2"),
Nullable: true,
Metadata: arrow.Metadata{},
})
}
return arrow.NewSchema(fields, nil)
}
func randomString(length int) string {
letterRunes := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
b := make([]rune, length)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
func buildArrayData(dataType, elementType schemapb.DataType, dim, rows int, isBinary bool) arrow.Array {
mem := memory.NewGoAllocator()
switch dataType {
case schemapb.DataType_Bool:
builder := array.NewBooleanBuilder(mem)
for i := 0; i < rows; i++ {
builder.Append(i%2 == 0)
}
return builder.NewBooleanArray()
case schemapb.DataType_Int8:
builder := array.NewInt8Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(int8(i))
}
return builder.NewInt8Array()
case schemapb.DataType_Int16:
builder := array.NewInt16Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(int16(i))
}
return builder.NewInt16Array()
case schemapb.DataType_Int32:
builder := array.NewInt32Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(int32(i))
}
return builder.NewInt32Array()
case schemapb.DataType_Int64:
builder := array.NewInt64Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(int64(i))
}
return builder.NewInt64Array()
case schemapb.DataType_Float:
builder := array.NewFloat32Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(float32(i) * 0.1)
}
return builder.NewFloat32Array()
case schemapb.DataType_Double:
builder := array.NewFloat64Builder(mem)
for i := 0; i < rows; i++ {
builder.Append(float64(i) * 0.02)
}
return builder.NewFloat64Array()
case schemapb.DataType_VarChar, schemapb.DataType_String:
builder := array.NewStringBuilder(mem)
for i := 0; i < rows; i++ {
builder.Append(randomString(10))
}
return builder.NewStringArray()
case schemapb.DataType_FloatVector:
builder := array.NewListBuilder(mem, &arrow.Float32Type{})
offsets := make([]int32, 0, rows)
valid := make([]bool, 0, rows)
for i := 0; i < dim*rows; i++ {
builder.ValueBuilder().(*array.Float32Builder).Append(float32(i))
}
for i := 0; i < rows; i++ {
offsets = append(offsets, int32(i*dim))
valid = append(valid, true)
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_BinaryVector:
if isBinary {
builder := array.NewBinaryBuilder(mem, &arrow.BinaryType{})
for i := 0; i < rows; i++ {
element := make([]byte, dim/8)
for j := 0; j < dim/8; j++ {
element[j] = randomString(1)[0]
}
builder.Append(element)
}
return builder.NewBinaryArray()
}
builder := array.NewListBuilder(mem, &arrow.Uint8Type{})
offsets := make([]int32, 0, rows)
valid := make([]bool, 0)
for i := 0; i < dim*rows/8; i++ {
builder.ValueBuilder().(*array.Uint8Builder).Append(uint8(i))
}
for i := 0; i < rows; i++ {
offsets = append(offsets, int32(dim*i/8))
valid = append(valid, true)
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_JSON:
builder := array.NewStringBuilder(mem)
for i := 0; i < rows; i++ {
builder.Append(fmt.Sprintf("{\"a\": \"%s\", \"b\": %d}", randomString(3), i))
}
return builder.NewStringArray()
case schemapb.DataType_Array:
offsets := make([]int32, 0, rows)
valid := make([]bool, 0, rows)
index := 0
for i := 0; i < rows; i++ {
index += i % 10
offsets = append(offsets, int32(index))
valid = append(valid, true)
}
switch elementType {
case schemapb.DataType_Bool:
builder := array.NewListBuilder(mem, &arrow.BooleanType{})
valueBuilder := builder.ValueBuilder().(*array.BooleanBuilder)
for i := 0; i < index; i++ {
valueBuilder.Append(i%2 == 0)
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Int8:
builder := array.NewListBuilder(mem, &arrow.Int8Type{})
valueBuilder := builder.ValueBuilder().(*array.Int8Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(int8(i))
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Int16:
builder := array.NewListBuilder(mem, &arrow.Int16Type{})
valueBuilder := builder.ValueBuilder().(*array.Int16Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(int16(i))
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Int32:
builder := array.NewListBuilder(mem, &arrow.Int32Type{})
valueBuilder := builder.ValueBuilder().(*array.Int32Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(int32(i))
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Int64:
builder := array.NewListBuilder(mem, &arrow.Int64Type{})
valueBuilder := builder.ValueBuilder().(*array.Int64Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(int64(i))
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Float:
builder := array.NewListBuilder(mem, &arrow.Float32Type{})
valueBuilder := builder.ValueBuilder().(*array.Float32Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(float32(i) * 0.1)
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_Double:
builder := array.NewListBuilder(mem, &arrow.Float64Type{})
valueBuilder := builder.ValueBuilder().(*array.Float64Builder)
for i := 0; i < index; i++ {
valueBuilder.Append(float64(i) * 0.02)
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
case schemapb.DataType_VarChar, schemapb.DataType_String:
builder := array.NewListBuilder(mem, &arrow.StringType{})
valueBuilder := builder.ValueBuilder().(*array.StringBuilder)
for i := 0; i < index; i++ {
valueBuilder.Append(randomString(5) + "-" + fmt.Sprintf("%d", i))
}
builder.AppendValues(offsets, valid)
return builder.NewListArray()
}
}
return nil
}
func GenerateParquetFile(filePath string, schema *schemapb.CollectionSchema, numRows int) error {
w, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, 0o666)
if err != nil {
return err
}
pqSchema := convertMilvusSchemaToArrowSchema(schema)
fw, err := pqarrow.NewFileWriter(pqSchema, w, parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(numRows))), pqarrow.DefaultWriterProps())
if err != nil {
return err
}
defer fw.Close()
columns := make([]arrow.Array, 0, len(schema.Fields))
for _, field := range schema.Fields {
if field.GetIsPrimaryKey() && field.GetAutoID() {
continue
}
columnData := buildArrayData(field.DataType, field.ElementType, dim, numRows, field.Name == "FieldBinaryVector2")
columns = append(columns, columnData)
}
recordBatch := array.NewRecord(pqSchema, columns, int64(numRows))
return fw.Write(recordBatch)
}
func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSchema, rowCount int) (*internalpb.ImportFile, error) {
paths := make([]string, 0)
for _, field := range schema.GetFields() {
if field.GetAutoID() && field.GetIsPrimaryKey() {
continue
}
path := fmt.Sprintf("%s/%s.npy", cm.RootPath(), field.GetName())
err := GenerateNumpyFile(path, rowCount, field.GetDataType())
if err != nil {
return nil, err
}
paths = append(paths, path)
}
return &internalpb.ImportFile{
Paths: paths,
}, nil
}
func GenerateNumpyFile(filePath string, rowCount int, dType schemapb.DataType) error {
writeFn := func(path string, data interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
err = npyio.Write(f, data)
if err != nil {
return err
}
return nil
}
switch dType {
case schemapb.DataType_Bool:
boolData := make([]bool, 0)
for i := 0; i < rowCount; i++ {
boolData = append(boolData, i%3 != 0)
}
err := writeFn(filePath, boolData)
if err != nil {
return err
}
case schemapb.DataType_Float:
floatData := make([]float32, 0)
for i := 0; i < rowCount; i++ {
floatData = append(floatData, float32(i/2))
}
err := writeFn(filePath, floatData)
if err != nil {
return err
}
case schemapb.DataType_Double:
doubleData := make([]float64, 0)
for i := 0; i < rowCount; i++ {
doubleData = append(doubleData, float64(i/5))
}
err := writeFn(filePath, doubleData)
if err != nil {
return err
}
case schemapb.DataType_Int8:
int8Data := make([]int8, 0)
for i := 0; i < rowCount; i++ {
int8Data = append(int8Data, int8(i%256))
}
err := writeFn(filePath, int8Data)
if err != nil {
return err
}
case schemapb.DataType_Int16:
int16Data := make([]int16, 0)
for i := 0; i < rowCount; i++ {
int16Data = append(int16Data, int16(i%65536))
}
err := writeFn(filePath, int16Data)
if err != nil {
return err
}
case schemapb.DataType_Int32:
int32Data := make([]int32, 0)
for i := 0; i < rowCount; i++ {
int32Data = append(int32Data, int32(i%1000))
}
err := writeFn(filePath, int32Data)
if err != nil {
return err
}
case schemapb.DataType_Int64:
int64Data := make([]int64, 0)
for i := 0; i < rowCount; i++ {
int64Data = append(int64Data, int64(i))
}
err := writeFn(filePath, int64Data)
if err != nil {
return err
}
case schemapb.DataType_BinaryVector:
binVecData := make([]byte, 0)
total := rowCount * dim / 8
for i := 0; i < total; i++ {
binVecData = append(binVecData, byte(i%256))
}
err := writeFn(filePath, binVecData)
if err != nil {
return err
}
case schemapb.DataType_FloatVector:
data := make([][dim]float32, 0, rowCount)
for i := 0; i < rowCount; i++ {
vec := [dim]float32{}
for j := 0; j < dim; j++ {
vec[j] = 1.1
}
data = append(data, vec)
}
err := writeFn(filePath, data)
if err != nil {
return err
}
case schemapb.DataType_Float16Vector:
total := int64(rowCount) * dim * 2
float16VecData := make([]byte, total)
_, err := rand2.Read(float16VecData)
if err != nil {
return err
}
err = writeFn(filePath, float16VecData)
if err != nil {
return err
}
case schemapb.DataType_String, schemapb.DataType_VarChar:
varcharData := make([]string, 0)
for i := 0; i < rowCount; i++ {
varcharData = append(varcharData, strconv.Itoa(i))
}
err := writeFn(filePath, varcharData)
if err != nil {
return err
}
case schemapb.DataType_JSON:
jsonData := make([][]byte, 0)
for i := 0; i < rowCount; i++ {
jsonData = append(jsonData, []byte(fmt.Sprintf("{\"y\": %d}", i)))
}
err := writeFn(filePath, jsonData)
if err != nil {
return err
}
case schemapb.DataType_Array:
arrayData := make([]*schemapb.ScalarField, 0)
for i := 0; i < rowCount; i++ {
arrayData = append(arrayData, &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{
Data: []int32{int32(i), int32(i + 1), int32(i + 2)},
},
},
})
}
err := writeFn(filePath, arrayData)
if err != nil {
return err
}
default:
panic(fmt.Sprintf("unimplemented data type: %s", dType.String()))
}
return nil
}
func GenerateJSONFile(t *testing.T, filePath string, schema *schemapb.CollectionSchema, count int) {
insertData := createInsertData(t, schema, count)
rows := make([]map[string]any, 0, count)
fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 {
return field.GetFieldID()
})
for i := 0; i < count; i++ {
data := make(map[int64]interface{})
for fieldID, v := range insertData.Data {
dataType := fieldIDToField[fieldID].GetDataType()
if fieldIDToField[fieldID].GetAutoID() {
continue
}
if dataType == schemapb.DataType_Array {
data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData()
} else if dataType == schemapb.DataType_JSON {
data[fieldID] = string(v.GetRow(i).([]byte))
} else if dataType == schemapb.DataType_BinaryVector || dataType == schemapb.DataType_Float16Vector {
bytes := v.GetRow(i).([]byte)
ints := make([]int, 0, len(bytes))
for _, b := range bytes {
ints = append(ints, int(b))
}
data[fieldID] = ints
} else {
data[fieldID] = v.GetRow(i)
}
}
row := lo.MapKeys(data, func(_ any, fieldID int64) string {
return fieldIDToField[fieldID].GetName()
})
rows = append(rows, row)
}
jsonBytes, err := json.Marshal(rows)
assert.NoError(t, err)
err = os.WriteFile(filePath, jsonBytes, 0644) // nolint
assert.NoError(t, err)
}
func WaitForImportDone(ctx context.Context, c *integration.MiniClusterV2, jobID string) error {
for {
resp, err := c.Proxy.GetImportProgress(ctx, &internalpb.GetImportProgressRequest{
JobID: jobID,
})
if err != nil {
return err
}
if err = merr.Error(resp.GetStatus()); err != nil {
return err
}
switch resp.GetState() {
case internalpb.ImportJobState_Completed:
return nil
case internalpb.ImportJobState_Failed:
return merr.WrapErrImportFailed(resp.GetReason())
default:
log.Info("import progress", zap.String("jobID", jobID),
zap.Int64("progress", resp.GetProgress()),
zap.String("state", resp.GetState().String()))
time.Sleep(1 * time.Second)
}
}
}