mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
enhance: Add max length check for csv import (#37077)
1. Add max length check for csv import. 2. Tidy import options. 3. Tidy common import util functions. issue: https://github.com/milvus-io/milvus/issues/34150 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
parent
088d5d7d76
commit
b45cf2d49f
@ -1681,20 +1681,13 @@ func (s *Server) ImportV2(ctx context.Context, in *internalpb.ImportRequestInter
|
|||||||
zap.Int64("collection", in.GetCollectionID()),
|
zap.Int64("collection", in.GetCollectionID()),
|
||||||
zap.Int64s("partitions", in.GetPartitionIDs()),
|
zap.Int64s("partitions", in.GetPartitionIDs()),
|
||||||
zap.Strings("channels", in.GetChannelNames()))
|
zap.Strings("channels", in.GetChannelNames()))
|
||||||
log.Info("receive import request", zap.Any("files", in.GetFiles()))
|
log.Info("receive import request", zap.Any("files", in.GetFiles()), zap.Any("options", in.GetOptions()))
|
||||||
|
|
||||||
var timeoutTs uint64 = math.MaxUint64
|
timeoutTs, err := importutilv2.GetTimeoutTs(in.GetOptions())
|
||||||
timeoutStr, err := funcutil.GetAttrByKeyFromRepeatedKV("timeout", in.GetOptions())
|
|
||||||
if err == nil {
|
|
||||||
// Specifies the timeout duration for import, such as "300s", "1.5h" or "1h45m".
|
|
||||||
dur, err := time.ParseDuration(timeoutStr)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
resp.Status = merr.Status(merr.WrapErrImportFailed(fmt.Sprint("parse import timeout failed, err=%w", err)))
|
resp.Status = merr.Status(merr.WrapErrImportFailed(err.Error()))
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
curTs := tsoutil.GetCurrentTime()
|
|
||||||
timeoutTs = tsoutil.AddPhysicalDurationOnTs(curTs, dur)
|
|
||||||
}
|
|
||||||
|
|
||||||
files := in.GetFiles()
|
files := in.GetFiles()
|
||||||
isBackup := importutilv2.IsBackup(in.GetOptions())
|
isBackup := importutilv2.IsBackup(in.GetOptions())
|
||||||
|
@ -6247,6 +6247,7 @@ func (node *Proxy) ImportV2(ctx context.Context, req *internalpb.ImportRequest)
|
|||||||
zap.String("partition name", req.GetPartitionName()),
|
zap.String("partition name", req.GetPartitionName()),
|
||||||
zap.Any("files", req.GetFiles()),
|
zap.Any("files", req.GetFiles()),
|
||||||
zap.String("role", typeutil.ProxyRole),
|
zap.String("role", typeutil.ProxyRole),
|
||||||
|
zap.Any("options", req.GetOptions()),
|
||||||
)
|
)
|
||||||
|
|
||||||
resp := &internalpb.ImportResponse{
|
resp := &internalpb.ImportResponse{
|
||||||
|
@ -78,3 +78,14 @@ func CheckArrayCapacity(arrLength int, maxCapacity int64) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func EstimateReadCountPerBatch(bufferSize int, schema *schemapb.CollectionSchema) (int64, error) {
|
||||||
|
sizePerRecord, err := typeutil.EstimateMaxSizePerRecord(schema)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if 1000*sizePerRecord <= bufferSize {
|
||||||
|
return 1000, nil
|
||||||
|
}
|
||||||
|
return int64(bufferSize) / int64(sizePerRecord), nil
|
||||||
|
}
|
||||||
|
68
internal/util/importutilv2/common/util_test.go
Normal file
68
internal/util/importutilv2/common/util_test.go
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUtil_EstimateReadCountPerBatch(t *testing.T) {
|
||||||
|
schema := &schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Name: "pk",
|
||||||
|
IsPrimaryKey: true,
|
||||||
|
DataType: schemapb.DataType_Int64,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Name: "vec",
|
||||||
|
DataType: schemapb.DataType_FloatVector,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.DimKey,
|
||||||
|
Value: "128",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
count, err := EstimateReadCountPerBatch(16*1024*1024, schema)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, int64(1000), count)
|
||||||
|
|
||||||
|
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
|
||||||
|
FieldID: 102,
|
||||||
|
Name: "vec2",
|
||||||
|
DataType: schemapb.DataType_FloatVector,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{
|
||||||
|
Key: common.DimKey,
|
||||||
|
Value: "invalidDim",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
_, err = EstimateReadCountPerBatch(16*1024*1024, schema)
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
@ -1,3 +1,19 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
package csv
|
package csv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -11,6 +27,7 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
)
|
)
|
||||||
@ -36,13 +53,10 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("read csv file failed, path=%s, err=%s", path, err.Error()))
|
return nil, merr.WrapErrImportFailed(fmt.Sprintf("read csv file failed, path=%s, err=%s", path, err.Error()))
|
||||||
}
|
}
|
||||||
// count, err := estimateReadCountPerBatch(bufferSize, schema)
|
count, err := common.EstimateReadCountPerBatch(bufferSize, schema)
|
||||||
// if err != nil {
|
if err != nil {
|
||||||
// return nil, err
|
return nil, err
|
||||||
// }
|
}
|
||||||
|
|
||||||
// set the interval for determining if the buffer is exceeded
|
|
||||||
var count int64 = 1000
|
|
||||||
|
|
||||||
csvReader := csv.NewReader(cmReader)
|
csvReader := csv.NewReader(cmReader)
|
||||||
csvReader.Comma = sep
|
csvReader.Comma = sep
|
||||||
@ -119,14 +133,3 @@ func (r *reader) Size() (int64, error) {
|
|||||||
r.fileSize.Store(size)
|
r.fileSize.Store(size)
|
||||||
return size, nil
|
return size, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func estimateReadCountPerBatch(bufferSize int, schema *schemapb.CollectionSchema) (int64, error) {
|
|
||||||
// sizePerRecord, err := typeutil.EstimateMaxSizePerRecord(schema)
|
|
||||||
// if err != nil {
|
|
||||||
// return 0, err
|
|
||||||
// }
|
|
||||||
// if 1000*sizePerRecord <= bufferSize {
|
|
||||||
// return 1000, nil
|
|
||||||
// }
|
|
||||||
// return int64(bufferSize) / int64(sizePerRecord), nil
|
|
||||||
// }
|
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
package csv
|
package csv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -72,6 +88,10 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data
|
|||||||
Key: common.MaxLengthKey,
|
Key: common.MaxLengthKey,
|
||||||
Value: "128",
|
Value: "128",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Key: common.MaxCapacityKey,
|
||||||
|
Value: "256",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Nullable: nullable,
|
Nullable: nullable,
|
||||||
},
|
},
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
package csv
|
package csv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -9,7 +25,9 @@ import (
|
|||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/parameterutil"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -224,6 +242,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
|||||||
if nullable && obj == r.nullkey {
|
if nullable && obj == r.nullkey {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
maxLength, err := parameterutil.GetMaxLength(field)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = common.CheckVarcharLength(obj, maxLength); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
return obj, nil
|
return obj, nil
|
||||||
case schemapb.DataType_BinaryVector:
|
case schemapb.DataType_BinaryVector:
|
||||||
if nullable && obj == r.nullkey {
|
if nullable && obj == r.nullkey {
|
||||||
@ -323,6 +348,13 @@ func (r *rowParser) parseEntity(field *schemapb.FieldSchema, obj string) (any, e
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, r.wrapTypeError(obj, field)
|
return nil, r.wrapTypeError(obj, field)
|
||||||
}
|
}
|
||||||
|
maxCapacity, err := parameterutil.GetMaxCapacity(field)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = common.CheckArrayCapacity(len(vec), maxCapacity); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
// elements in array not support null value
|
// elements in array not support null value
|
||||||
scalarFieldData, err := r.arrayToFieldData(vec, field.GetElementType())
|
scalarFieldData, err := r.arrayToFieldData(vec, field.GetElementType())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
package csv
|
package csv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -27,8 +27,8 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/importutilv2/common"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -58,7 +58,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("read json file failed, path=%s, err=%s", path, err.Error()))
|
return nil, merr.WrapErrImportFailed(fmt.Sprintf("read json file failed, path=%s, err=%s", path, err.Error()))
|
||||||
}
|
}
|
||||||
count, err := estimateReadCountPerBatch(bufferSize, schema)
|
count, err := common.EstimateReadCountPerBatch(bufferSize, schema)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -181,14 +181,3 @@ func (j *reader) Size() (int64, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (j *reader) Close() {}
|
func (j *reader) Close() {}
|
||||||
|
|
||||||
func estimateReadCountPerBatch(bufferSize int, schema *schemapb.CollectionSchema) (int64, error) {
|
|
||||||
sizePerRecord, err := typeutil.EstimateMaxSizePerRecord(schema)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if 1000*sizePerRecord <= bufferSize {
|
|
||||||
return 1000, nil
|
|
||||||
}
|
|
||||||
return int64(bufferSize) / int64(sizePerRecord), nil
|
|
||||||
}
|
|
||||||
|
@ -399,7 +399,9 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
|||||||
}
|
}
|
||||||
case schemapb.DataType_Array:
|
case schemapb.DataType_Array:
|
||||||
arr, ok := obj.([]interface{})
|
arr, ok := obj.([]interface{})
|
||||||
|
if !ok {
|
||||||
|
return nil, r.wrapTypeError(obj, fieldID)
|
||||||
|
}
|
||||||
maxCapacity, err := parameterutil.GetMaxCapacity(r.id2Field[fieldID])
|
maxCapacity, err := parameterutil.GetMaxCapacity(r.id2Field[fieldID])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -407,9 +409,6 @@ func (r *rowParser) parseEntity(fieldID int64, obj any) (any, error) {
|
|||||||
if err = common.CheckArrayCapacity(len(arr), maxCapacity); err != nil {
|
if err = common.CheckArrayCapacity(len(arr), maxCapacity); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if !ok {
|
|
||||||
return nil, r.wrapTypeError(obj, fieldID)
|
|
||||||
}
|
|
||||||
scalarFieldData, err := r.arrayToFieldData(arr, r.id2Field[fieldID].GetElementType())
|
scalarFieldData, err := r.arrayToFieldData(arr, r.id2Field[fieldID].GetElementType())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -53,7 +53,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
|
|||||||
fields := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 {
|
fields := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 {
|
||||||
return field.GetFieldID()
|
return field.GetFieldID()
|
||||||
})
|
})
|
||||||
count, err := calcRowCount(bufferSize, schema)
|
count, err := common.EstimateReadCountPerBatch(bufferSize, schema)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,6 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -241,12 +240,3 @@ func validateHeader(npyReader *npy.Reader, field *schemapb.FieldSchema, dim int)
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func calcRowCount(bufferSize int, schema *schemapb.CollectionSchema) (int64, error) {
|
|
||||||
sizePerRecord, err := typeutil.EstimateMaxSizePerRecord(schema)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
rowCount := int64(bufferSize) / int64(sizePerRecord)
|
|
||||||
return rowCount, nil
|
|
||||||
}
|
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
|
||||||
@ -31,17 +32,51 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
// Timeout specifies the timeout duration for import, such as "300s", "1.5h" or "1h45m".
|
||||||
|
Timeout = "timeout"
|
||||||
|
|
||||||
|
// SkipDQC indicates whether to bypass the disk quota check, default to false.
|
||||||
|
SkipDQC = "skip_disk_quota_check"
|
||||||
|
|
||||||
|
// CSVSep specifies the delimiter used for importing CSV files.
|
||||||
|
CSVSep = "sep"
|
||||||
|
|
||||||
|
// CSVNullKey specifies the null key used when importing CSV files.
|
||||||
|
CSVNullKey = "nullkey"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Options for backup-restore mode.
|
||||||
|
const (
|
||||||
|
// BackupFlag indicates whether the import is in backup-restore mode, default to false.
|
||||||
|
BackupFlag = "backup"
|
||||||
|
|
||||||
|
// L0Import indicates whether to import l0 segments only.
|
||||||
|
L0Import = "l0_import"
|
||||||
|
|
||||||
|
// StartTs StartTs2 EndTs EndTs2 are used to filter data during backup-restore import.
|
||||||
StartTs = "start_ts"
|
StartTs = "start_ts"
|
||||||
StartTs2 = "startTs"
|
StartTs2 = "startTs"
|
||||||
EndTs = "end_ts"
|
EndTs = "end_ts"
|
||||||
EndTs2 = "endTs"
|
EndTs2 = "endTs"
|
||||||
BackupFlag = "backup"
|
|
||||||
L0Import = "l0_import"
|
|
||||||
SkipDQC = "skip_disk_quota_check"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options []*commonpb.KeyValuePair
|
type Options []*commonpb.KeyValuePair
|
||||||
|
|
||||||
|
func GetTimeoutTs(options Options) (uint64, error) {
|
||||||
|
var timeoutTs uint64 = math.MaxUint64
|
||||||
|
timeoutStr, err := funcutil.GetAttrByKeyFromRepeatedKV(Timeout, options)
|
||||||
|
if err == nil {
|
||||||
|
var dur time.Duration
|
||||||
|
dur, err = time.ParseDuration(timeoutStr)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("parse timeout failed, err=%w", err)
|
||||||
|
}
|
||||||
|
curTs := tsoutil.GetCurrentTime()
|
||||||
|
timeoutTs = tsoutil.AddPhysicalDurationOnTs(curTs, dur)
|
||||||
|
}
|
||||||
|
return timeoutTs, nil
|
||||||
|
}
|
||||||
|
|
||||||
func ParseTimeRange(options Options) (uint64, uint64, error) {
|
func ParseTimeRange(options Options) (uint64, uint64, error) {
|
||||||
importOptions := funcutil.KeyValuePair2Map(options)
|
importOptions := funcutil.KeyValuePair2Map(options)
|
||||||
getTimestamp := func(defaultValue uint64, targetKeys ...string) (uint64, error) {
|
getTimestamp := func(defaultValue uint64, targetKeys ...string) (uint64, error) {
|
||||||
@ -103,7 +138,7 @@ func SkipDiskQuotaCheck(options Options) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetCSVSep(options Options) (rune, error) {
|
func GetCSVSep(options Options) (rune, error) {
|
||||||
sep, err := funcutil.GetAttrByKeyFromRepeatedKV("sep", options)
|
sep, err := funcutil.GetAttrByKeyFromRepeatedKV(CSVSep, options)
|
||||||
unsupportedSep := []rune{0, '\n', '\r', '"', 0xFFFD}
|
unsupportedSep := []rune{0, '\n', '\r', '"', 0xFFFD}
|
||||||
defaultSep := ','
|
defaultSep := ','
|
||||||
if err != nil || len(sep) == 0 {
|
if err != nil || len(sep) == 0 {
|
||||||
@ -115,7 +150,7 @@ func GetCSVSep(options Options) (rune, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetCSVNullKey(options Options) (string, error) {
|
func GetCSVNullKey(options Options) (string, error) {
|
||||||
nullKey, err := funcutil.GetAttrByKeyFromRepeatedKV("nullkey", options)
|
nullKey, err := funcutil.GetAttrByKeyFromRepeatedKV(CSVNullKey, options)
|
||||||
defaultNullKey := ""
|
defaultNullKey := ""
|
||||||
if err != nil || len(nullKey) == 0 {
|
if err != nil || len(nullKey) == 0 {
|
||||||
return defaultNullKey, nil
|
return defaultNullKey, nil
|
||||||
|
53
internal/util/importutilv2/option_test.go
Normal file
53
internal/util/importutilv2/option_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
// Licensed to the LF AI & Data foundation under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package importutilv2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestOption_GetTimeout(t *testing.T) {
|
||||||
|
const delta = 3 * time.Second
|
||||||
|
|
||||||
|
options := []*commonpb.KeyValuePair{{Key: Timeout, Value: "300s"}}
|
||||||
|
ts, err := GetTimeoutTs(options)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
pt := tsoutil.PhysicalTime(ts)
|
||||||
|
assert.WithinDuration(t, time.Now().Add(300*time.Second), pt, delta)
|
||||||
|
|
||||||
|
options = []*commonpb.KeyValuePair{{Key: Timeout, Value: "1.5h"}}
|
||||||
|
ts, err = GetTimeoutTs(options)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
pt = tsoutil.PhysicalTime(ts)
|
||||||
|
assert.WithinDuration(t, time.Now().Add(90*time.Minute), pt, delta)
|
||||||
|
|
||||||
|
options = []*commonpb.KeyValuePair{{Key: Timeout, Value: "1h45m"}}
|
||||||
|
ts, err = GetTimeoutTs(options)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
pt = tsoutil.PhysicalTime(ts)
|
||||||
|
assert.WithinDuration(t, time.Now().Add(105*time.Minute), pt, delta)
|
||||||
|
|
||||||
|
options = []*commonpb.KeyValuePair{{Key: Timeout, Value: "invalidTime"}}
|
||||||
|
_, err = GetTimeoutTs(options)
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
@ -74,7 +74,7 @@ func NewReader(ctx context.Context, cm storage.ChunkManager, schema *schemapb.Co
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
count, err := estimateReadCountPerBatch(bufferSize, schema)
|
count, err := common.EstimateReadCountPerBatch(bufferSize, schema)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -264,17 +264,6 @@ func isSchemaEqual(schema *schemapb.CollectionSchema, arrSchema *arrow.Schema) e
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func estimateReadCountPerBatch(bufferSize int, schema *schemapb.CollectionSchema) (int64, error) {
|
|
||||||
sizePerRecord, err := typeutil.EstimateMaxSizePerRecord(schema)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if 1000*sizePerRecord <= bufferSize {
|
|
||||||
return 1000, nil
|
|
||||||
}
|
|
||||||
return int64(bufferSize) / int64(sizePerRecord), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo(smellthemoon): use byte to store valid_data
|
// todo(smellthemoon): use byte to store valid_data
|
||||||
func bytesToBoolArray(length int, bytes []byte) []bool {
|
func bytesToBoolArray(length int, bytes []byte) []bool {
|
||||||
bools := make([]bool, 0, length)
|
bools := make([]bool, 0, length)
|
||||||
|
Loading…
Reference in New Issue
Block a user