mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 10:59:32 +08:00
enhance: Support Array DataType for bulk_insert (#28341)
issue: #28272 Support array DataType for bulk_insert with json, binlog files. Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
This commit is contained in:
parent
8fe2fb3eb9
commit
c29b60e18e
@ -23,24 +23,24 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
// A struct to hold insert log paths and delta log paths of a segment
|
||||
// SegmentFilesHolder A struct to hold insert log paths and delta log paths of a segment
|
||||
type SegmentFilesHolder struct {
|
||||
segmentID int64 // id of the segment
|
||||
fieldFiles map[storage.FieldID][]string // mapping of field id and data file path
|
||||
deltaFiles []string // a list of delta log file path, typically has only one item
|
||||
}
|
||||
|
||||
// Adapter class to process insertlog/deltalog of a backuped segment
|
||||
// BinlogAdapter Adapter class to process insertlog/deltalog of a backuped segment
|
||||
// This class do the following works:
|
||||
// 1. read insert log of each field, then constructs SegmentData in memory.
|
||||
// 2. read delta log to remove deleted entities(TimeStampField is used to apply or skip the operation).
|
||||
@ -78,17 +78,17 @@ func NewBinlogAdapter(ctx context.Context,
|
||||
) (*BinlogAdapter, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("Binlog adapter: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
if chunkManager == nil {
|
||||
log.Warn("Binlog adapter: chunk manager pointer is nil")
|
||||
return nil, errors.New("chunk manager pointer is nil")
|
||||
return nil, merr.WrapErrImportFailed("chunk manager pointer is nil")
|
||||
}
|
||||
|
||||
if flushFunc == nil {
|
||||
log.Warn("Binlog adapter: flush function is nil")
|
||||
return nil, errors.New("flush function is nil")
|
||||
return nil, merr.WrapErrImportFailed("flush function is nil")
|
||||
}
|
||||
|
||||
adapter := &BinlogAdapter{
|
||||
@ -113,7 +113,7 @@ func NewBinlogAdapter(ctx context.Context,
|
||||
func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
if segmentHolder == nil {
|
||||
log.Warn("Binlog adapter: segment files holder is nil")
|
||||
return errors.New("segment files holder is nil")
|
||||
return merr.WrapErrImportFailed("segment files holder is nil")
|
||||
}
|
||||
|
||||
log.Info("Binlog adapter: read segment", zap.Int64("segmentID", segmentHolder.segmentID))
|
||||
@ -149,7 +149,7 @@ func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
shardData := initShardData(p.collectionInfo.Schema, p.collectionInfo.PartitionIDs)
|
||||
if shardData == nil {
|
||||
log.Warn("Binlog adapter: fail to initialize in-memory segment data", zap.Int("shardID", i))
|
||||
return fmt.Errorf("fail to initialize in-memory segment data for shard id %d", i)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("fail to initialize in-memory segment data for shard id %d", i))
|
||||
}
|
||||
shardsData = append(shardsData, shardData)
|
||||
}
|
||||
@ -157,7 +157,7 @@ func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
// read binlog files batch by batch
|
||||
primaryKey := p.collectionInfo.PrimaryKey
|
||||
for i := 0; i < batchCount; i++ {
|
||||
// batchFiles excludes the primary key field and the timestamp field
|
||||
// batchFiles excludes the primary key field and the timestamp field.
|
||||
// timestamp field is used to compare the tsEndPoint to skip some rows, no need to pass old timestamp to new segment.
|
||||
// once a new segment generated, the timestamp field will be re-generated, too.
|
||||
batchFiles := make(map[storage.FieldID]string)
|
||||
@ -201,7 +201,7 @@ func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
}
|
||||
} else {
|
||||
log.Warn("Binlog adapter: unsupported primary key type", zap.Int("type", int(primaryKey.GetDataType())))
|
||||
return fmt.Errorf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType()))
|
||||
}
|
||||
|
||||
// if shardList is empty, that means all the primary keys have been deleted(or skipped), no need to read other files
|
||||
@ -214,7 +214,7 @@ func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
// outside context might be canceled(service stop, or future enhancement for canceling import task)
|
||||
if isCanceled(p.ctx) {
|
||||
log.Warn("Binlog adapter: import task was canceled")
|
||||
return errors.New("import task was canceled")
|
||||
return merr.WrapErrImportFailed("import task was canceled")
|
||||
}
|
||||
|
||||
err = p.readInsertlog(fieldID, file, shardsData, shardList)
|
||||
@ -235,25 +235,25 @@ func (p *BinlogAdapter) Read(segmentHolder *SegmentFilesHolder) error {
|
||||
}
|
||||
|
||||
// verify method verify the schema and binlog files
|
||||
// 1. each field must has binlog file
|
||||
// 1. each field must have binlog file
|
||||
// 2. binlog file count of each field must be equal
|
||||
// 3. the collectionSchema doesn't contain TimeStampField and RowIDField since the import_wrapper excludes them,
|
||||
// but the segmentHolder.fieldFiles need to contains the two fields.
|
||||
// but the segmentHolder.fieldFiles need to contain the two fields.
|
||||
func (p *BinlogAdapter) verify(segmentHolder *SegmentFilesHolder) error {
|
||||
if segmentHolder == nil {
|
||||
log.Warn("Binlog adapter: segment files holder is nil")
|
||||
return errors.New("segment files holder is nil")
|
||||
return merr.WrapErrImportFailed("segment files holder is nil")
|
||||
}
|
||||
|
||||
firstFieldFileCount := 0
|
||||
// each field must has binlog file
|
||||
// each field must have binlog file
|
||||
for i := 0; i < len(p.collectionInfo.Schema.Fields); i++ {
|
||||
schema := p.collectionInfo.Schema.Fields[i]
|
||||
|
||||
files, ok := segmentHolder.fieldFiles[schema.FieldID]
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: a field has no binlog file", zap.Int64("fieldID", schema.FieldID))
|
||||
return fmt.Errorf("the field %d has no binlog file", schema.FieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("the field %d has no binlog file", schema.FieldID))
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
@ -261,26 +261,26 @@ func (p *BinlogAdapter) verify(segmentHolder *SegmentFilesHolder) error {
|
||||
}
|
||||
}
|
||||
|
||||
// the segmentHolder.fieldFiles need to contains RowIDField
|
||||
// the segmentHolder.fieldFiles need to contain RowIDField
|
||||
_, ok := segmentHolder.fieldFiles[common.RowIDField]
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: the binlog files of RowIDField is missed")
|
||||
return errors.New("the binlog files of RowIDField is missed")
|
||||
return merr.WrapErrImportFailed("the binlog files of RowIDField is missed")
|
||||
}
|
||||
|
||||
// the segmentHolder.fieldFiles need to contains TimeStampField
|
||||
// the segmentHolder.fieldFiles need to contain TimeStampField
|
||||
_, ok = segmentHolder.fieldFiles[common.TimeStampField]
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: the binlog files of TimeStampField is missed")
|
||||
return errors.New("the binlog files of TimeStampField is missed")
|
||||
return merr.WrapErrImportFailed("the binlog files of TimeStampField is missed")
|
||||
}
|
||||
|
||||
// binlog file count of each field must be equal
|
||||
for _, files := range segmentHolder.fieldFiles {
|
||||
if firstFieldFileCount != len(files) {
|
||||
log.Warn("Binlog adapter: file count of each field must be equal", zap.Int("firstFieldFileCount", firstFieldFileCount))
|
||||
return fmt.Errorf("binlog file count of each field must be equal, first field files count: %d, other field files count: %d",
|
||||
firstFieldFileCount, len(files))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog file count of each field must be equal, first field files count: %d, other field files count: %d",
|
||||
firstFieldFileCount, len(files)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -318,7 +318,7 @@ func (p *BinlogAdapter) readDeltalogs(segmentHolder *SegmentFilesHolder) (map[in
|
||||
return nil, deletedIDDict, nil
|
||||
}
|
||||
log.Warn("Binlog adapter: unsupported primary key type", zap.Int("type", int(primaryKey.GetDataType())))
|
||||
return nil, nil, fmt.Errorf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType())
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType()))
|
||||
}
|
||||
|
||||
// decodeDeleteLogs decodes string array(read from delta log) to storage.DeleteLog array
|
||||
@ -363,8 +363,8 @@ func (p *BinlogAdapter) decodeDeleteLogs(segmentHolder *SegmentFilesHolder) ([]*
|
||||
log.Warn("Binlog adapter: delta log data type is not equal to collection's primary key data type",
|
||||
zap.Int64("deltaDataType", deleteLogs[i].PkType),
|
||||
zap.Int64("pkDataType", int64(primaryKey.GetDataType())))
|
||||
return nil, fmt.Errorf("delta log data type %d is not equal to collection's primary key data type %d",
|
||||
deleteLogs[i].PkType, primaryKey.GetDataType())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("delta log data type %d is not equal to collection's primary key data type %d",
|
||||
deleteLogs[i].PkType, primaryKey.GetDataType()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -382,13 +382,13 @@ func (p *BinlogAdapter) decodeDeleteLog(deltaStr string) (*storage.DeleteLog, er
|
||||
splits := strings.Split(deltaStr, ",")
|
||||
if len(splits) != 2 {
|
||||
log.Warn("Binlog adapter: the format of deletion string is incorrect", zap.String("deltaStr", deltaStr))
|
||||
return nil, fmt.Errorf("the format of deletion string is incorrect, '%s' can not be split", deltaStr)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the format of deletion string is incorrect, '%s' can not be split", deltaStr))
|
||||
}
|
||||
pk, err := strconv.ParseInt(splits[0], 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to parse primary key of deletion string from old version",
|
||||
zap.String("deltaStr", deltaStr), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse primary key of deletion string '%s' from old version, error: %w", deltaStr, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse primary key of deletion string '%s' from old version, error: %v", deltaStr, err))
|
||||
}
|
||||
deleteLog.Pk = &storage.Int64PrimaryKey{
|
||||
Value: pk,
|
||||
@ -398,7 +398,7 @@ func (p *BinlogAdapter) decodeDeleteLog(deltaStr string) (*storage.DeleteLog, er
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to parse timestamp of deletion string from old version",
|
||||
zap.String("deltaStr", deltaStr), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse timestamp of deletion string '%s' from old version, error: %w", deltaStr, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse timestamp of deletion string '%s' from old version, error: %v", deltaStr, err))
|
||||
}
|
||||
}
|
||||
|
||||
@ -411,13 +411,13 @@ func (p *BinlogAdapter) readDeltalog(logPath string) ([]string, error) {
|
||||
binlogFile, err := NewBinlogFile(p.chunkManager)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to initialize binlog file", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to initialize binlog file '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize binlog file '%s', error: %v", logPath, err))
|
||||
}
|
||||
|
||||
err = binlogFile.Open(logPath)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to open delta log", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to open delta log '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to open delta log '%s', error: %v", logPath, err))
|
||||
}
|
||||
defer binlogFile.Close()
|
||||
|
||||
@ -425,7 +425,7 @@ func (p *BinlogAdapter) readDeltalog(logPath string) ([]string, error) {
|
||||
data, err := binlogFile.ReadVarchar()
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to read delta log", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read delta log '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read delta log '%s', error: %v", logPath, err))
|
||||
}
|
||||
log.Info("Binlog adapter: successfully read deltalog", zap.Int("deleteCount", len(data)))
|
||||
|
||||
@ -438,13 +438,13 @@ func (p *BinlogAdapter) readTimestamp(logPath string) ([]int64, error) {
|
||||
binlogFile, err := NewBinlogFile(p.chunkManager)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to initialize binlog file", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to initialize binlog file '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize binlog file '%s', error: %v", logPath, err))
|
||||
}
|
||||
|
||||
err = binlogFile.Open(logPath)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to open timestamp log file", zap.String("logPath", logPath))
|
||||
return nil, fmt.Errorf("failed to open timestamp log file '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to open timestamp log file '%s', error: %v", logPath, err))
|
||||
}
|
||||
defer binlogFile.Close()
|
||||
|
||||
@ -452,7 +452,7 @@ func (p *BinlogAdapter) readTimestamp(logPath string) ([]int64, error) {
|
||||
int64List, err := binlogFile.ReadInt64()
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to read timestamp data from log file", zap.String("logPath", logPath))
|
||||
return nil, fmt.Errorf("failed to read timestamp data from log file '%s', error: %w", logPath, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read timestamp data from log file '%s', error: %v", logPath, err))
|
||||
}
|
||||
|
||||
log.Info("Binlog adapter: read timestamp from log file", zap.Int("tsCount", len(int64List)))
|
||||
@ -466,13 +466,13 @@ func (p *BinlogAdapter) readPrimaryKeys(logPath string) ([]int64, []string, erro
|
||||
binlogFile, err := NewBinlogFile(p.chunkManager)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to initialize binlog file", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, nil, fmt.Errorf("failed to initialize binlog file '%s', error: %w", logPath, err)
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize binlog file '%s', error: %v", logPath, err))
|
||||
}
|
||||
|
||||
err = binlogFile.Open(logPath)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to open primary key binlog", zap.String("logPath", logPath))
|
||||
return nil, nil, fmt.Errorf("failed to open primary key binlog '%s', error: %w", logPath, err)
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to open primary key binlog '%s', error: %v", logPath, err))
|
||||
}
|
||||
defer binlogFile.Close()
|
||||
|
||||
@ -482,7 +482,7 @@ func (p *BinlogAdapter) readPrimaryKeys(logPath string) ([]int64, []string, erro
|
||||
idList, err := binlogFile.ReadInt64()
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to read int64 primary key from binlog", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, nil, fmt.Errorf("failed to read int64 primary key from binlog '%s', error: %w", logPath, err)
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int64 primary key from binlog '%s', error: %v", logPath, err))
|
||||
}
|
||||
log.Info("Binlog adapter: succeed to read int64 primary key binlog", zap.Int("len", len(idList)))
|
||||
return idList, nil, nil
|
||||
@ -490,13 +490,13 @@ func (p *BinlogAdapter) readPrimaryKeys(logPath string) ([]int64, []string, erro
|
||||
idList, err := binlogFile.ReadVarchar()
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to read varchar primary key from binlog", zap.String("logPath", logPath), zap.Error(err))
|
||||
return nil, nil, fmt.Errorf("failed to read varchar primary key from binlog '%s', error: %w", logPath, err)
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read varchar primary key from binlog '%s', error: %v", logPath, err))
|
||||
}
|
||||
log.Info("Binlog adapter: succeed to read varchar primary key binlog", zap.Int("len", len(idList)))
|
||||
return nil, idList, nil
|
||||
}
|
||||
log.Warn("Binlog adapter: unsupported primary key type", zap.Int("type", int(primaryKey.GetDataType())))
|
||||
return nil, nil, fmt.Errorf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType())
|
||||
return nil, nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported primary key type %d, primary key should be int64 or varchar", primaryKey.GetDataType()))
|
||||
}
|
||||
|
||||
// getShardingListByPrimaryInt64 method generates a shard id list by primary key(int64) list and deleted list.
|
||||
@ -511,7 +511,7 @@ func (p *BinlogAdapter) getShardingListByPrimaryInt64(primaryKeys []int64,
|
||||
if len(timestampList) != len(primaryKeys) {
|
||||
log.Warn("Binlog adapter: primary key length is not equal to timestamp list length",
|
||||
zap.Int("primaryKeysLen", len(primaryKeys)), zap.Int("timestampLen", len(timestampList)))
|
||||
return nil, fmt.Errorf("primary key length %d is not equal to timestamp list length %d", len(primaryKeys), len(timestampList))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("primary key length %d is not equal to timestamp list length %d", len(primaryKeys), len(timestampList)))
|
||||
}
|
||||
|
||||
log.Info("Binlog adapter: building shard list", zap.Int("pkLen", len(primaryKeys)), zap.Int("tsLen", len(timestampList)))
|
||||
@ -565,7 +565,7 @@ func (p *BinlogAdapter) getShardingListByPrimaryVarchar(primaryKeys []string,
|
||||
if len(timestampList) != len(primaryKeys) {
|
||||
log.Warn("Binlog adapter: primary key length is not equal to timestamp list length",
|
||||
zap.Int("primaryKeysLen", len(primaryKeys)), zap.Int("timestampLen", len(timestampList)))
|
||||
return nil, fmt.Errorf("primary key length %d is not equal to timestamp list length %d", len(primaryKeys), len(timestampList))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("primary key length %d is not equal to timestamp list length %d", len(primaryKeys), len(timestampList)))
|
||||
}
|
||||
|
||||
log.Info("Binlog adapter: building shard list", zap.Int("pkLen", len(primaryKeys)), zap.Int("tsLen", len(timestampList)))
|
||||
@ -616,7 +616,7 @@ func (p *BinlogAdapter) verifyField(fieldID storage.FieldID, memoryData []ShardD
|
||||
_, ok := fields[fieldID]
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: the field ID doesn't exist in collection schema", zap.Int64("fieldID", fieldID))
|
||||
return fmt.Errorf("the field ID %d doesn't exist in collection schema", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("the field ID %d doesn't exist in collection schema", fieldID))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@ -637,20 +637,20 @@ func (p *BinlogAdapter) readInsertlog(fieldID storage.FieldID, logPath string,
|
||||
err := p.verifyField(fieldID, memoryData)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: could not read binlog file", zap.String("logPath", logPath), zap.Error(err))
|
||||
return fmt.Errorf("could not read binlog file %s, error: %w", logPath, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("could not read binlog file %s, error: %v", logPath, err))
|
||||
}
|
||||
|
||||
// open the insert log file
|
||||
binlogFile, err := NewBinlogFile(p.chunkManager)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to initialize binlog file", zap.String("logPath", logPath), zap.Error(err))
|
||||
return fmt.Errorf("failed to initialize binlog file %s, error: %w", logPath, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize binlog file %s, error: %v", logPath, err))
|
||||
}
|
||||
|
||||
err = binlogFile.Open(logPath)
|
||||
if err != nil {
|
||||
log.Warn("Binlog adapter: failed to open insert log", zap.String("logPath", logPath), zap.Error(err))
|
||||
return fmt.Errorf("failed to open insert log %s, error: %w", logPath, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to open insert log %s, error: %v", logPath, err))
|
||||
}
|
||||
defer binlogFile.Close()
|
||||
|
||||
@ -766,8 +766,19 @@ func (p *BinlogAdapter) readInsertlog(fieldID storage.FieldID, logPath string,
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case schemapb.DataType_Array:
|
||||
data, err := binlogFile.ReadArray()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = p.dispatchArrayToShards(data, memoryData, shardList, fieldID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupported data type %d", binlogFile.DataType())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("unsupported data type %d", binlogFile.DataType()))
|
||||
}
|
||||
log.Info("Binlog adapter: read data into shard list", zap.Int("dataType", int(binlogFile.DataType())), zap.Int("shardLen", len(shardList)))
|
||||
|
||||
@ -780,10 +791,10 @@ func (p *BinlogAdapter) dispatchBoolToShards(data []bool, memoryData []ShardData
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: bool field row count is not equal to shard list row count %d", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("bool field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("bool field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -791,7 +802,7 @@ func (p *BinlogAdapter) dispatchBoolToShards(data []bool, memoryData []ShardData
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: bool field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("bool field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("bool field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -801,7 +812,7 @@ func (p *BinlogAdapter) dispatchBoolToShards(data []bool, memoryData []ShardData
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is bool type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is bool type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is bool type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -815,10 +826,10 @@ func (p *BinlogAdapter) dispatchInt8ToShards(data []int8, memoryData []ShardData
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: int8 field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("int8 field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int8 field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entity acoording to shard list
|
||||
// dispatch entity according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -826,7 +837,7 @@ func (p *BinlogAdapter) dispatchInt8ToShards(data []int8, memoryData []ShardData
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: int8 field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("int8 field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int8 field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -836,7 +847,7 @@ func (p *BinlogAdapter) dispatchInt8ToShards(data []int8, memoryData []ShardData
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is int8 type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is int8 type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is int8 type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -850,10 +861,10 @@ func (p *BinlogAdapter) dispatchInt16ToShards(data []int16, memoryData []ShardDa
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: int16 field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("int16 field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int16 field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -861,7 +872,7 @@ func (p *BinlogAdapter) dispatchInt16ToShards(data []int16, memoryData []ShardDa
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: int16 field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("int16 field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int16 field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -871,7 +882,7 @@ func (p *BinlogAdapter) dispatchInt16ToShards(data []int16, memoryData []ShardDa
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is int16 type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is int16 type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is int16 type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -885,10 +896,10 @@ func (p *BinlogAdapter) dispatchInt32ToShards(data []int32, memoryData []ShardDa
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: int32 field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("int32 field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int32 field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -896,7 +907,7 @@ func (p *BinlogAdapter) dispatchInt32ToShards(data []int32, memoryData []ShardDa
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: int32 field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("int32 field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int32 field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -906,7 +917,7 @@ func (p *BinlogAdapter) dispatchInt32ToShards(data []int32, memoryData []ShardDa
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is int32 type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is int32 type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is int32 type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -920,10 +931,10 @@ func (p *BinlogAdapter) dispatchInt64ToShards(data []int64, memoryData []ShardDa
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: int64 field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("int64 field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int64 field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -931,7 +942,7 @@ func (p *BinlogAdapter) dispatchInt64ToShards(data []int64, memoryData []ShardDa
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: int64 field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("int64 field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("int64 field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -941,7 +952,7 @@ func (p *BinlogAdapter) dispatchInt64ToShards(data []int64, memoryData []ShardDa
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is int64 type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is int64 type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is int64 type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -955,10 +966,10 @@ func (p *BinlogAdapter) dispatchFloatToShards(data []float32, memoryData []Shard
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: float field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("float field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("float field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -966,7 +977,7 @@ func (p *BinlogAdapter) dispatchFloatToShards(data []float32, memoryData []Shard
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: float field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("float field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("float field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -976,7 +987,7 @@ func (p *BinlogAdapter) dispatchFloatToShards(data []float32, memoryData []Shard
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is float type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is float type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is float type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -990,10 +1001,10 @@ func (p *BinlogAdapter) dispatchDoubleToShards(data []float64, memoryData []Shar
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: double field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("double field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("double field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -1001,7 +1012,7 @@ func (p *BinlogAdapter) dispatchDoubleToShards(data []float64, memoryData []Shar
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: double field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("double field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("double field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -1011,7 +1022,7 @@ func (p *BinlogAdapter) dispatchDoubleToShards(data []float64, memoryData []Shar
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is double type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is double type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is double type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -1025,10 +1036,10 @@ func (p *BinlogAdapter) dispatchVarcharToShards(data []string, memoryData []Shar
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: varchar field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("varchar field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("varchar field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -1036,7 +1047,7 @@ func (p *BinlogAdapter) dispatchVarcharToShards(data []string, memoryData []Shar
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: varchar field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("varchar field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("varchar field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -1046,7 +1057,7 @@ func (p *BinlogAdapter) dispatchVarcharToShards(data []string, memoryData []Shar
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is varchar type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is varchar type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is varchar type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -1060,10 +1071,10 @@ func (p *BinlogAdapter) dispatchBytesToShards(data [][]byte, memoryData []ShardD
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: JSON field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("varchar JSON row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("varchar JSON row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -1071,7 +1082,7 @@ func (p *BinlogAdapter) dispatchBytesToShards(data [][]byte, memoryData []ShardD
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: JSON field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("JSON field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("JSON field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -1081,7 +1092,7 @@ func (p *BinlogAdapter) dispatchBytesToShards(data [][]byte, memoryData []ShardD
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is JSON type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is JSON type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is JSON type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
}
|
||||
@ -1098,10 +1109,10 @@ func (p *BinlogAdapter) dispatchBinaryVecToShards(data []byte, dim int, memoryDa
|
||||
if count != len(shardList) {
|
||||
log.Warn("Binlog adapter: binary vector field row count is not equal to shard list row count",
|
||||
zap.Int("dataLen", count), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("binary vector field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binary vector field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i := 0; i < count; i++ {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -1109,7 +1120,7 @@ func (p *BinlogAdapter) dispatchBinaryVecToShards(data []byte, dim int, memoryDa
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: binary vector field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("binary vector field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binary vector field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -1119,13 +1130,13 @@ func (p *BinlogAdapter) dispatchBinaryVecToShards(data []byte, dim int, memoryDa
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is binary vector type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is binary vector type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is binary vector type, unequal to field %d", fieldID))
|
||||
}
|
||||
|
||||
if fieldData.Dim != dim {
|
||||
log.Warn("Binlog adapter: binary vector dimension mismatch",
|
||||
zap.Int("sourceDim", dim), zap.Int("schemaDim", fieldData.Dim))
|
||||
return fmt.Errorf("binary vector dimension %d is not equal to schema dimension %d", dim, fieldData.Dim)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binary vector dimension %d is not equal to schema dimension %d", dim, fieldData.Dim))
|
||||
}
|
||||
for j := 0; j < bytesPerVector; j++ {
|
||||
val := data[bytesPerVector*i+j]
|
||||
@ -1145,10 +1156,10 @@ func (p *BinlogAdapter) dispatchFloatVecToShards(data []float32, dim int, memory
|
||||
if count != len(shardList) {
|
||||
log.Warn("Binlog adapter: float vector field row count is not equal to shard list row count",
|
||||
zap.Int("dataLen", count), zap.Int("shardLen", len(shardList)))
|
||||
return fmt.Errorf("float vector field row count %d is not equal to shard list row count %d", len(data), len(shardList))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("float vector field row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities acoording to shard list
|
||||
// dispatch entities according to shard list
|
||||
for i := 0; i < count; i++ {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
@ -1156,7 +1167,7 @@ func (p *BinlogAdapter) dispatchFloatVecToShards(data []float32, dim int, memory
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: float vector field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return fmt.Errorf("float vector field's shard ID %d is larger than shards number %d", shardID, len(memoryData))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("float vector field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
@ -1166,13 +1177,13 @@ func (p *BinlogAdapter) dispatchFloatVecToShards(data []float32, dim int, memory
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is float vector type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return fmt.Errorf("binlog is float vector type, unequal to field %d", fieldID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is float vector type, unequal to field %d", fieldID))
|
||||
}
|
||||
|
||||
if fieldData.Dim != dim {
|
||||
log.Warn("Binlog adapter: float vector dimension mismatch",
|
||||
zap.Int("sourceDim", dim), zap.Int("schemaDim", fieldData.Dim))
|
||||
return fmt.Errorf("binary vector dimension %d is not equal to schema dimension %d", dim, fieldData.Dim)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binary vector dimension %d is not equal to schema dimension %d", dim, fieldData.Dim))
|
||||
}
|
||||
for j := 0; j < dim; j++ {
|
||||
val := data[dim*i+j]
|
||||
@ -1182,3 +1193,39 @@ func (p *BinlogAdapter) dispatchFloatVecToShards(data []float32, dim int, memory
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *BinlogAdapter) dispatchArrayToShards(data []*schemapb.ScalarField, memoryData []ShardData,
|
||||
shardList []int32, fieldID storage.FieldID,
|
||||
) error {
|
||||
// verify row count
|
||||
if len(data) != len(shardList) {
|
||||
log.Warn("Binlog adapter: Array field row count is not equal to shard list row count", zap.Int("dataLen", len(data)), zap.Int("shardLen", len(shardList)))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("array row count %d is not equal to shard list row count %d", len(data), len(shardList)))
|
||||
}
|
||||
|
||||
// dispatch entities according to shard list
|
||||
for i, val := range data {
|
||||
shardID := shardList[i]
|
||||
if shardID < 0 {
|
||||
continue // this entity has been deleted or excluded by timestamp
|
||||
}
|
||||
if shardID >= int32(len(memoryData)) {
|
||||
log.Warn("Binlog adapter: Array field's shard ID is illegal", zap.Int32("shardID", shardID), zap.Int("shardsCount", len(memoryData)))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("array field's shard ID %d is larger than shards number %d", shardID, len(memoryData)))
|
||||
}
|
||||
|
||||
partitions := memoryData[shardID] // initBlockData() can ensure the existence, no need to check bound here
|
||||
fields := partitions[p.collectionInfo.PartitionIDs[0]] // NewBinlogAdapter() can ensure only one partition
|
||||
field := fields[fieldID] // initBlockData() can ensure the existence, no need to check existence here
|
||||
fieldData, ok := field.(*storage.ArrayFieldData) // avoid data type mismatch between binlog file and schema
|
||||
if !ok {
|
||||
log.Warn("Binlog adapter: binlog is array type, unequal to field",
|
||||
zap.Int64("fieldID", fieldID), zap.Int32("shardID", shardID))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("binlog is array type, unequal to field %d", fieldID))
|
||||
}
|
||||
fieldData.Data = append(fieldData.Data, val)
|
||||
// TODO @cai: set element type
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ func Test_BinlogAdapterVerify(t *testing.T) {
|
||||
|
||||
// row id field missed
|
||||
holder.fieldFiles = make(map[int64][]string)
|
||||
for i := int64(102); i <= 112; i++ {
|
||||
for i := int64(102); i <= 113; i++ {
|
||||
holder.fieldFiles[i] = make([]string, 0)
|
||||
}
|
||||
err = adapter.verify(holder)
|
||||
@ -156,7 +156,7 @@ func Test_BinlogAdapterVerify(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
|
||||
// succeed
|
||||
for i := int64(102); i <= 112; i++ {
|
||||
for i := int64(102); i <= 113; i++ {
|
||||
holder.fieldFiles[i] = []string{
|
||||
"a",
|
||||
}
|
||||
@ -667,6 +667,7 @@ func Test_BinlogAdapterReadInt64PK(t *testing.T) {
|
||||
int64(110): {"110_insertlog"},
|
||||
int64(111): {"111_insertlog"},
|
||||
int64(112): {"112_insertlog"},
|
||||
int64(113): {"113_insertlog"},
|
||||
}
|
||||
holder.deltaFiles = []string{"deltalog"}
|
||||
err = adapter.Read(holder)
|
||||
@ -689,6 +690,7 @@ func Test_BinlogAdapterReadInt64PK(t *testing.T) {
|
||||
"110_insertlog": createBinlogBuf(t, schemapb.DataType_BinaryVector, fieldsData[110].([][]byte)),
|
||||
"111_insertlog": createBinlogBuf(t, schemapb.DataType_FloatVector, fieldsData[111].([][]float32)),
|
||||
"112_insertlog": createBinlogBuf(t, schemapb.DataType_JSON, fieldsData[112].([][]byte)),
|
||||
"113_insertlog": createBinlogBuf(t, schemapb.DataType_Array, fieldsData[113].([]*schemapb.ScalarField)),
|
||||
"deltalog": createDeltalogBuf(t, deletedItems, false),
|
||||
}
|
||||
|
||||
@ -1013,6 +1015,79 @@ func Test_BinlogAdapterDispatch(t *testing.T) {
|
||||
assert.Equal(t, 0, shardsData[2][partitionID][fieldID].RowNum())
|
||||
})
|
||||
|
||||
t.Run("dispatch Array data", func(t *testing.T) {
|
||||
fieldID := int64(113)
|
||||
// row count mismatch
|
||||
data := []*schemapb.ScalarField{
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{1, 2, 3, 4, 5},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{7, 8, 9},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{10, 11},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err = adapter.dispatchArrayToShards(data, shardsData, shardList, fieldID)
|
||||
assert.Error(t, err)
|
||||
for _, shardData := range shardsData {
|
||||
assert.Equal(t, 0, shardData[partitionID][fieldID].RowNum())
|
||||
}
|
||||
|
||||
// illegal shard ID
|
||||
err = adapter.dispatchArrayToShards(data, shardsData, []int32{9, 1, 0, 2}, fieldID)
|
||||
assert.Error(t, err)
|
||||
|
||||
// succeed
|
||||
err = adapter.dispatchArrayToShards([]*schemapb.ScalarField{
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, shardsData, shardList, fieldID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, shardsData[0][partitionID][fieldID].RowNum())
|
||||
assert.Equal(t, 1, shardsData[1][partitionID][fieldID].RowNum())
|
||||
assert.Equal(t, 0, shardsData[2][partitionID][fieldID].RowNum())
|
||||
})
|
||||
|
||||
t.Run("dispatch binary vector data", func(t *testing.T) {
|
||||
fieldID := int64(110)
|
||||
// row count mismatch
|
||||
@ -1186,6 +1261,10 @@ func Test_BinlogAdapterReadInsertlog(t *testing.T) {
|
||||
failedFunc(111, "floatvector", schemapb.DataType_FloatVector, 110, schemapb.DataType_BinaryVector)
|
||||
})
|
||||
|
||||
t.Run("failed to dispatch Array data", func(t *testing.T) {
|
||||
failedFunc(113, "array", schemapb.DataType_Array, 111, schemapb.DataType_FloatVector)
|
||||
})
|
||||
|
||||
// succeed
|
||||
chunkManager.readBuf["int32"] = createBinlogBuf(t, schemapb.DataType_Int32, fieldsData[105].([]int32))
|
||||
err = adapter.readInsertlog(105, "int32", shardsData, []int32{0, 1, 1})
|
||||
|
@ -20,12 +20,12 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
// BinlogFile class is a wrapper of storage.BinlogReader, to read binlog file, block by block.
|
||||
@ -40,7 +40,7 @@ type BinlogFile struct {
|
||||
func NewBinlogFile(chunkManager storage.ChunkManager) (*BinlogFile, error) {
|
||||
if chunkManager == nil {
|
||||
log.Warn("Binlog file: chunk manager pointer is nil")
|
||||
return nil, errors.New("chunk manager pointer is nil")
|
||||
return nil, merr.WrapErrImportFailed("chunk manager pointer is nil")
|
||||
}
|
||||
|
||||
binlogFile := &BinlogFile{
|
||||
@ -54,20 +54,20 @@ func (p *BinlogFile) Open(filePath string) error {
|
||||
p.Close()
|
||||
if len(filePath) == 0 {
|
||||
log.Warn("Binlog file: binlog path is empty")
|
||||
return errors.New("binlog path is empty")
|
||||
return merr.WrapErrImportFailed("binlog path is empty")
|
||||
}
|
||||
|
||||
// TODO add context
|
||||
bytes, err := p.chunkManager.Read(context.TODO(), filePath)
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to open binlog", zap.String("filePath", filePath), zap.Error(err))
|
||||
return fmt.Errorf("failed to open binlog %s", filePath)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to open binlog %s", filePath))
|
||||
}
|
||||
|
||||
p.reader, err = storage.NewBinlogReader(bytes)
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to initialize binlog reader", zap.String("filePath", filePath), zap.Error(err))
|
||||
return fmt.Errorf("failed to initialize binlog reader for binlog %s, error: %w", filePath, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize binlog reader for binlog %s, error: %v", filePath, err))
|
||||
}
|
||||
|
||||
log.Info("Binlog file: open binlog successfully", zap.String("filePath", filePath))
|
||||
@ -95,7 +95,7 @@ func (p *BinlogFile) DataType() schemapb.DataType {
|
||||
func (p *BinlogFile) ReadBool() ([]bool, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]bool, 0)
|
||||
@ -103,7 +103,7 @@ func (p *BinlogFile) ReadBool() ([]bool, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -113,18 +113,18 @@ func (p *BinlogFile) ReadBool() ([]bool, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Bool {
|
||||
log.Warn("Binlog file: binlog data type is not bool")
|
||||
return nil, errors.New("binlog data type is not bool")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not bool")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetBoolFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read bool data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read bool data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read bool data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -138,7 +138,7 @@ func (p *BinlogFile) ReadBool() ([]bool, error) {
|
||||
func (p *BinlogFile) ReadInt8() ([]int8, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]int8, 0)
|
||||
@ -146,7 +146,7 @@ func (p *BinlogFile) ReadInt8() ([]int8, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -156,18 +156,18 @@ func (p *BinlogFile) ReadInt8() ([]int8, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Int8 {
|
||||
log.Warn("Binlog file: binlog data type is not int8")
|
||||
return nil, errors.New("binlog data type is not int8")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not int8")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetInt8FromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read int8 data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int8 data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int8 data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -181,7 +181,7 @@ func (p *BinlogFile) ReadInt8() ([]int8, error) {
|
||||
func (p *BinlogFile) ReadInt16() ([]int16, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]int16, 0)
|
||||
@ -189,7 +189,7 @@ func (p *BinlogFile) ReadInt16() ([]int16, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -199,18 +199,18 @@ func (p *BinlogFile) ReadInt16() ([]int16, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Int16 {
|
||||
log.Warn("Binlog file: binlog data type is not int16")
|
||||
return nil, errors.New("binlog data type is not int16")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not int16")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetInt16FromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read int16 data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int16 data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int16 data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -224,7 +224,7 @@ func (p *BinlogFile) ReadInt16() ([]int16, error) {
|
||||
func (p *BinlogFile) ReadInt32() ([]int32, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]int32, 0)
|
||||
@ -232,7 +232,7 @@ func (p *BinlogFile) ReadInt32() ([]int32, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -242,18 +242,18 @@ func (p *BinlogFile) ReadInt32() ([]int32, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Int32 {
|
||||
log.Warn("Binlog file: binlog data type is not int32")
|
||||
return nil, errors.New("binlog data type is not int32")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not int32")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetInt32FromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read int32 data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int32 data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int32 data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -267,7 +267,7 @@ func (p *BinlogFile) ReadInt32() ([]int32, error) {
|
||||
func (p *BinlogFile) ReadInt64() ([]int64, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]int64, 0)
|
||||
@ -275,7 +275,7 @@ func (p *BinlogFile) ReadInt64() ([]int64, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -285,18 +285,18 @@ func (p *BinlogFile) ReadInt64() ([]int64, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Int64 {
|
||||
log.Warn("Binlog file: binlog data type is not int64")
|
||||
return nil, errors.New("binlog data type is not int64")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not int64")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetInt64FromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read int64 data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int64 data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int64 data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -310,7 +310,7 @@ func (p *BinlogFile) ReadInt64() ([]int64, error) {
|
||||
func (p *BinlogFile) ReadFloat() ([]float32, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]float32, 0)
|
||||
@ -318,7 +318,7 @@ func (p *BinlogFile) ReadFloat() ([]float32, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -328,18 +328,18 @@ func (p *BinlogFile) ReadFloat() ([]float32, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Float {
|
||||
log.Warn("Binlog file: binlog data type is not float")
|
||||
return nil, errors.New("binlog data type is not float")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not float")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetFloatFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read float data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -353,7 +353,7 @@ func (p *BinlogFile) ReadFloat() ([]float32, error) {
|
||||
func (p *BinlogFile) ReadDouble() ([]float64, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]float64, 0)
|
||||
@ -361,7 +361,7 @@ func (p *BinlogFile) ReadDouble() ([]float64, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -371,18 +371,18 @@ func (p *BinlogFile) ReadDouble() ([]float64, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Double {
|
||||
log.Warn("Binlog file: binlog data type is not double")
|
||||
return nil, errors.New("binlog data type is not double")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not double")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetDoubleFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read double data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read double data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read double data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -396,7 +396,7 @@ func (p *BinlogFile) ReadDouble() ([]float64, error) {
|
||||
func (p *BinlogFile) ReadVarchar() ([]string, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]string, 0)
|
||||
@ -404,7 +404,7 @@ func (p *BinlogFile) ReadVarchar() ([]string, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -415,18 +415,18 @@ func (p *BinlogFile) ReadVarchar() ([]string, error) {
|
||||
// special case: delete event data type is varchar
|
||||
if event.TypeCode != storage.InsertEventType && event.TypeCode != storage.DeleteEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if (p.DataType() != schemapb.DataType_VarChar) && (p.DataType() != schemapb.DataType_String) {
|
||||
log.Warn("Binlog file: binlog data type is not varchar")
|
||||
return nil, errors.New("binlog data type is not varchar")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not varchar")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetStringFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read varchar data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read varchar data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read varchar data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -440,7 +440,7 @@ func (p *BinlogFile) ReadVarchar() ([]string, error) {
|
||||
func (p *BinlogFile) ReadJSON() ([][]byte, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, errors.New("binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([][]byte, 0)
|
||||
@ -448,7 +448,7 @@ func (p *BinlogFile) ReadJSON() ([][]byte, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -458,18 +458,61 @@ func (p *BinlogFile) ReadJSON() ([][]byte, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, errors.New("binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_JSON {
|
||||
log.Warn("Binlog file: binlog data type is not JSON")
|
||||
return nil, errors.New("binlog data type is not JSON")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not JSON")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetJSONFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read JSON data", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read JSON data, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read JSON data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// ReadArray method reads all the blocks of a binlog by a data type.
|
||||
// A binlog is designed to support multiple blocks, but so far each binlog always contains only one block.
|
||||
func (p *BinlogFile) ReadArray() ([]*schemapb.ScalarField, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
result := make([]*schemapb.ScalarField, 0)
|
||||
for {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
if event == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Array {
|
||||
log.Warn("Binlog file: binlog data type is not Array")
|
||||
return nil, merr.WrapErrImportFailed("binlog data type is not Array")
|
||||
}
|
||||
|
||||
data, err := event.PayloadReaderInterface.GetArrayFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read Array data", zap.Error(err))
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read Array data, error: %v", err))
|
||||
}
|
||||
|
||||
result = append(result, data...)
|
||||
@ -484,7 +527,7 @@ func (p *BinlogFile) ReadJSON() ([][]byte, error) {
|
||||
func (p *BinlogFile) ReadBinaryVector() ([]byte, int, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, 0, errors.New("binlog reader not yet initialized")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
dim := 0
|
||||
@ -493,7 +536,7 @@ func (p *BinlogFile) ReadBinaryVector() ([]byte, int, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -503,18 +546,18 @@ func (p *BinlogFile) ReadBinaryVector() ([]byte, int, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, 0, errors.New("binlog file is not insert log")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_BinaryVector {
|
||||
log.Warn("Binlog file: binlog data type is not binary vector")
|
||||
return nil, 0, errors.New("binlog data type is not binary vector")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog data type is not binary vector")
|
||||
}
|
||||
|
||||
data, dimenson, err := event.PayloadReaderInterface.GetBinaryVectorFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read binary vector data", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to read binary vector data, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to read binary vector data, error: %v", err))
|
||||
}
|
||||
|
||||
dim = dimenson
|
||||
@ -527,7 +570,7 @@ func (p *BinlogFile) ReadBinaryVector() ([]byte, int, error) {
|
||||
func (p *BinlogFile) ReadFloat16Vector() ([]byte, int, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, 0, errors.New("binlog reader not yet initialized")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
dim := 0
|
||||
@ -536,7 +579,7 @@ func (p *BinlogFile) ReadFloat16Vector() ([]byte, int, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -546,18 +589,18 @@ func (p *BinlogFile) ReadFloat16Vector() ([]byte, int, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, 0, errors.New("binlog file is not insert log")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_Float16Vector {
|
||||
log.Warn("Binlog file: binlog data type is not float16 vector")
|
||||
return nil, 0, errors.New("binlog data type is not float16 vector")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog data type is not float16 vector")
|
||||
}
|
||||
|
||||
data, dimenson, err := event.PayloadReaderInterface.GetFloat16VectorFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read float16 vector data", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to read float16 vector data, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float16 vector data, error: %v", err))
|
||||
}
|
||||
|
||||
dim = dimenson
|
||||
@ -573,7 +616,7 @@ func (p *BinlogFile) ReadFloat16Vector() ([]byte, int, error) {
|
||||
func (p *BinlogFile) ReadFloatVector() ([]float32, int, error) {
|
||||
if p.reader == nil {
|
||||
log.Warn("Binlog file: binlog reader not yet initialized")
|
||||
return nil, 0, errors.New("binlog reader not yet initialized")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog reader not yet initialized")
|
||||
}
|
||||
|
||||
dim := 0
|
||||
@ -582,7 +625,7 @@ func (p *BinlogFile) ReadFloatVector() ([]float32, int, error) {
|
||||
event, err := p.reader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to iterate events reader", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to iterate events reader, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to iterate events reader, error: %v", err))
|
||||
}
|
||||
|
||||
// end of the file
|
||||
@ -592,18 +635,18 @@ func (p *BinlogFile) ReadFloatVector() ([]float32, int, error) {
|
||||
|
||||
if event.TypeCode != storage.InsertEventType {
|
||||
log.Warn("Binlog file: binlog file is not insert log")
|
||||
return nil, 0, errors.New("binlog file is not insert log")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog file is not insert log")
|
||||
}
|
||||
|
||||
if p.DataType() != schemapb.DataType_FloatVector {
|
||||
log.Warn("Binlog file: binlog data type is not float vector")
|
||||
return nil, 0, errors.New("binlog data type is not float vector")
|
||||
return nil, 0, merr.WrapErrImportFailed("binlog data type is not float vector")
|
||||
}
|
||||
|
||||
data, dimension, err := event.PayloadReaderInterface.GetFloatVectorFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("Binlog file: failed to read float vector data", zap.Error(err))
|
||||
return nil, 0, fmt.Errorf("failed to read float vector data, error: %w", err)
|
||||
return nil, 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float vector data, error: %v", err))
|
||||
}
|
||||
|
||||
dim = dimension
|
||||
|
@ -129,6 +129,17 @@ func createBinlogBuf(t *testing.T, dataType schemapb.DataType, data interface{})
|
||||
// without the two lines, the case will crash at here.
|
||||
// the "original_size" is come from storage.originalSizeKey
|
||||
w.AddExtra("original_size", fmt.Sprintf("%v", sizeTotal))
|
||||
case schemapb.DataType_Array:
|
||||
rows := data.([]*schemapb.ScalarField)
|
||||
sizeTotal := 0
|
||||
for i := 0; i < len(rows); i++ {
|
||||
err = evt.AddOneArrayToPayload(rows[i])
|
||||
assert.NoError(t, err)
|
||||
sizeTotal += binary.Size(rows[i])
|
||||
}
|
||||
// without the two lines, the case will crash at here.
|
||||
// the "original_size" is come from storage.originalSizeKey
|
||||
w.AddExtra("original_size", fmt.Sprintf("%v", sizeTotal))
|
||||
case schemapb.DataType_BinaryVector:
|
||||
vectors := data.([][]byte)
|
||||
for i := 0; i < len(vectors); i++ {
|
||||
@ -276,6 +287,10 @@ func Test_BinlogFileOpen(t *testing.T) {
|
||||
assert.Nil(t, dataFloat16Vector)
|
||||
assert.Equal(t, 0, dim)
|
||||
assert.Error(t, err)
|
||||
|
||||
dataArray, err := binlogFile.ReadArray()
|
||||
assert.Nil(t, dataArray)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func Test_BinlogFileBool(t *testing.T) {
|
||||
@ -780,6 +795,99 @@ func Test_BinlogFileJSON(t *testing.T) {
|
||||
binlogFile.Close()
|
||||
}
|
||||
|
||||
func Test_BinlogFileArray(t *testing.T) {
|
||||
source := []*schemapb.ScalarField{
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{1, 2, 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{4, 5},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{6, 7, 8, 9},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
chunkManager := &MockChunkManager{
|
||||
readBuf: map[string][]byte{
|
||||
"dummy": createBinlogBuf(t, schemapb.DataType_Array, source),
|
||||
},
|
||||
}
|
||||
|
||||
binlogFile, err := NewBinlogFile(chunkManager)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, binlogFile)
|
||||
|
||||
// correct reading
|
||||
err = binlogFile.Open("dummy")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, schemapb.DataType_Array, binlogFile.DataType())
|
||||
|
||||
data, err := binlogFile.ReadArray()
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, data)
|
||||
assert.Equal(t, len(source), len(data))
|
||||
for i := 0; i < len(source); i++ {
|
||||
assert.ElementsMatch(t, source[i].GetIntData().GetData(), data[i].GetIntData().GetData())
|
||||
}
|
||||
|
||||
binlogFile.Close()
|
||||
|
||||
// wrong data type reading
|
||||
binlogFile, err = NewBinlogFile(chunkManager)
|
||||
assert.NoError(t, err)
|
||||
err = binlogFile.Open("dummy")
|
||||
assert.NoError(t, err)
|
||||
|
||||
d, dim, err := binlogFile.ReadBinaryVector()
|
||||
assert.Zero(t, len(d))
|
||||
assert.Zero(t, dim)
|
||||
assert.Error(t, err)
|
||||
|
||||
binlogFile.Close()
|
||||
|
||||
// wrong log type
|
||||
chunkManager.readBuf["dummy"] = createDeltalogBuf(t, []int64{1}, false)
|
||||
err = binlogFile.Open("dummy")
|
||||
assert.NoError(t, err)
|
||||
|
||||
data, err = binlogFile.ReadArray()
|
||||
assert.Zero(t, len(data))
|
||||
assert.Error(t, err)
|
||||
|
||||
// failed to iterate events reader
|
||||
binlogFile.reader.Close()
|
||||
data, err = binlogFile.ReadArray()
|
||||
assert.Zero(t, len(data))
|
||||
assert.Error(t, err)
|
||||
|
||||
binlogFile.Close()
|
||||
|
||||
chunkManager.readBuf["dummy"] = createBinlogBuf(t, schemapb.DataType_Bool, []bool{true, false})
|
||||
binlogFile, err = NewBinlogFile(chunkManager)
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, binlogFile)
|
||||
|
||||
// correct reading
|
||||
err = binlogFile.Open("dummy")
|
||||
assert.NoError(t, err)
|
||||
data, err = binlogFile.ReadArray()
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, data)
|
||||
binlogFile.Close()
|
||||
}
|
||||
|
||||
func Test_BinlogFileBinaryVector(t *testing.T) {
|
||||
vectors := make([][]byte, 0)
|
||||
vectors = append(vectors, []byte{1, 3, 5, 7})
|
||||
|
@ -24,11 +24,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
type BinlogParser struct {
|
||||
@ -64,23 +64,23 @@ func NewBinlogParser(ctx context.Context,
|
||||
) (*BinlogParser, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("Binlog parser: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
if chunkManager == nil {
|
||||
log.Warn("Binlog parser: chunk manager pointer is nil")
|
||||
return nil, errors.New("chunk manager pointer is nil")
|
||||
return nil, merr.WrapErrImportFailed("chunk manager pointer is nil")
|
||||
}
|
||||
|
||||
if flushFunc == nil {
|
||||
log.Warn("Binlog parser: flush function is nil")
|
||||
return nil, errors.New("flush function is nil")
|
||||
return nil, merr.WrapErrImportFailed("flush function is nil")
|
||||
}
|
||||
|
||||
if tsStartPoint > tsEndPoint {
|
||||
log.Warn("Binlog parser: the tsStartPoint should be less than tsEndPoint",
|
||||
zap.Uint64("tsStartPoint", tsStartPoint), zap.Uint64("tsEndPoint", tsEndPoint))
|
||||
return nil, fmt.Errorf("Binlog parser: the tsStartPoint %d should be less than tsEndPoint %d", tsStartPoint, tsEndPoint)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("Binlog parser: the tsStartPoint %d should be less than tsEndPoint %d", tsStartPoint, tsEndPoint))
|
||||
}
|
||||
|
||||
v := &BinlogParser{
|
||||
@ -121,7 +121,7 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
insertlogs, _, err := p.chunkManager.ListWithPrefix(context.TODO(), insertlogRoot, true)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: list insert logs error", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to list insert logs with root path %s, error: %w", insertlogRoot, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to list insert logs with root path %s, error: %v", insertlogRoot, err))
|
||||
}
|
||||
|
||||
// collect insert log paths
|
||||
@ -139,7 +139,7 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
fieldID, err := strconv.ParseInt(fieldStrID, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: failed to parse field id", zap.String("fieldPath", fieldPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse field id from insert log path %s, error: %w", insertlog, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse field id from insert log path %s, error: %v", insertlog, err))
|
||||
}
|
||||
|
||||
segmentPath := path.Dir(fieldPath)
|
||||
@ -147,7 +147,7 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
segmentID, err := strconv.ParseInt(segmentStrID, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: failed to parse segment id", zap.String("segmentPath", segmentPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse segment id from insert log path %s, error: %w", insertlog, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse segment id from insert log path %s, error: %v", insertlog, err))
|
||||
}
|
||||
|
||||
holder, ok := holders[segmentID]
|
||||
@ -186,7 +186,7 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
deltalogs, _, err := p.chunkManager.ListWithPrefix(context.TODO(), deltalogRoot, true)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: failed to list delta logs", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to list delta logs, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to list delta logs, error: %v", err))
|
||||
}
|
||||
|
||||
log.Info("Binlog parser: list delta logs", zap.Int("logsCount", len(deltalogs)))
|
||||
@ -197,7 +197,7 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
segmentID, err := strconv.ParseInt(segmentStrID, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: failed to parse segment id", zap.String("segmentPath", segmentPath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse segment id from delta log path %s, error: %w", deltalog, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse segment id from delta log path %s, error: %v", deltalog, err))
|
||||
}
|
||||
|
||||
// if the segment id doesn't exist, no need to process this deltalog
|
||||
@ -221,14 +221,14 @@ func (p *BinlogParser) constructSegmentHolders(insertlogRoot string, deltalogRoo
|
||||
func (p *BinlogParser) parseSegmentFiles(segmentHolder *SegmentFilesHolder) error {
|
||||
if segmentHolder == nil {
|
||||
log.Warn("Binlog parser: segment files holder is nil")
|
||||
return errors.New("segment files holder is nil")
|
||||
return merr.WrapErrImportFailed("segment files holder is nil")
|
||||
}
|
||||
|
||||
adapter, err := NewBinlogAdapter(p.ctx, p.collectionInfo, p.blockSize,
|
||||
MaxTotalSizeInMemory, p.chunkManager, p.callFlushFunc, p.tsStartPoint, p.tsEndPoint)
|
||||
if err != nil {
|
||||
log.Warn("Binlog parser: failed to create binlog adapter", zap.Error(err))
|
||||
return fmt.Errorf("failed to create binlog adapter, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to create binlog adapter, error: %v", err))
|
||||
}
|
||||
|
||||
return adapter.Read(segmentHolder)
|
||||
@ -240,7 +240,7 @@ func (p *BinlogParser) parseSegmentFiles(segmentHolder *SegmentFilesHolder) erro
|
||||
func (p *BinlogParser) Parse(filePaths []string) error {
|
||||
if len(filePaths) != 1 && len(filePaths) != 2 {
|
||||
log.Warn("Binlog parser: illegal paths for binlog import, partition binlog path and delta path are required")
|
||||
return errors.New("illegal paths for binlog import, partition binlog path and delta path are required")
|
||||
return merr.WrapErrImportFailed("illegal paths for binlog import, partition binlog path and delta path are required")
|
||||
}
|
||||
|
||||
insertlogPath := filePaths[0]
|
||||
|
@ -337,6 +337,7 @@ func Test_BinlogParserParse(t *testing.T) {
|
||||
"123/110/a",
|
||||
"123/111/a",
|
||||
"123/112/a",
|
||||
"123/113/a",
|
||||
}
|
||||
chunkManager.readBuf = map[string][]byte{
|
||||
"123/0/a": createBinlogBuf(t, schemapb.DataType_Int64, fieldsData[106].([]int64)),
|
||||
@ -352,6 +353,7 @@ func Test_BinlogParserParse(t *testing.T) {
|
||||
"123/110/a": createBinlogBuf(t, schemapb.DataType_BinaryVector, fieldsData[110].([][]byte)),
|
||||
"123/111/a": createBinlogBuf(t, schemapb.DataType_FloatVector, fieldsData[111].([][]float32)),
|
||||
"123/112/a": createBinlogBuf(t, schemapb.DataType_JSON, fieldsData[112].([][]byte)),
|
||||
"123/113/a": createBinlogBuf(t, schemapb.DataType_Array, fieldsData[113].([]*schemapb.ScalarField)),
|
||||
}
|
||||
|
||||
callTime := 0
|
||||
|
@ -19,10 +19,9 @@ package importutil
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
type CollectionInfo struct {
|
||||
@ -43,11 +42,11 @@ func NewCollectionInfo(collectionSchema *schemapb.CollectionSchema,
|
||||
partitionIDs []int64,
|
||||
) (*CollectionInfo, error) {
|
||||
if shardNum <= 0 {
|
||||
return nil, fmt.Errorf("illegal shard number %d", shardNum)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("illegal shard number %d", shardNum))
|
||||
}
|
||||
|
||||
if len(partitionIDs) == 0 {
|
||||
return nil, errors.New("partition list is empty")
|
||||
return nil, merr.WrapErrImportFailed("partition list is empty")
|
||||
}
|
||||
|
||||
info := &CollectionInfo{
|
||||
@ -65,7 +64,7 @@ func NewCollectionInfo(collectionSchema *schemapb.CollectionSchema,
|
||||
|
||||
func (c *CollectionInfo) resetSchema(collectionSchema *schemapb.CollectionSchema) error {
|
||||
if collectionSchema == nil {
|
||||
return errors.New("collection schema is null")
|
||||
return merr.WrapErrImportFailed("collection schema is null")
|
||||
}
|
||||
|
||||
fields := make([]*schemapb.FieldSchema, 0)
|
||||
@ -92,11 +91,11 @@ func (c *CollectionInfo) resetSchema(collectionSchema *schemapb.CollectionSchema
|
||||
}
|
||||
|
||||
if primaryKey == nil {
|
||||
return errors.New("collection schema has no primary key")
|
||||
return merr.WrapErrImportFailed("collection schema has no primary key")
|
||||
}
|
||||
|
||||
if partitionKey == nil && len(c.PartitionIDs) != 1 {
|
||||
return errors.New("only allow one partition when there is no partition key")
|
||||
return merr.WrapErrImportFailed("only allow one partition when there is no partition key")
|
||||
}
|
||||
|
||||
c.Schema = &schemapb.CollectionSchema{
|
||||
|
@ -23,7 +23,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
@ -31,6 +30,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -60,14 +60,14 @@ func NewCSVRowConsumer(ctx context.Context,
|
||||
) (*CSVRowConsumer, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("CSV row consumer: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
v := &CSVRowConsumer{
|
||||
ctx: ctx,
|
||||
collectionInfo: collectionInfo,
|
||||
rowIDAllocator: idAlloc,
|
||||
validators: make(map[storage.FieldID]*CSVValidator, 0),
|
||||
validators: make(map[storage.FieldID]*CSVValidator),
|
||||
rowCounter: 0,
|
||||
shardsData: make([]ShardData, 0, collectionInfo.ShardNum),
|
||||
blockSize: blockSize,
|
||||
@ -77,14 +77,14 @@ func NewCSVRowConsumer(ctx context.Context,
|
||||
|
||||
if err := v.initValidators(collectionInfo.Schema); err != nil {
|
||||
log.Warn("CSV row consumer: fail to initialize csv row-based consumer", zap.Error(err))
|
||||
return nil, fmt.Errorf("fail to initialize csv row-based consumer, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("fail to initialize csv row-based consumer, error: %v", err))
|
||||
}
|
||||
|
||||
for i := 0; i < int(collectionInfo.ShardNum); i++ {
|
||||
shardData := initShardData(collectionInfo.Schema, collectionInfo.PartitionIDs)
|
||||
if shardData == nil {
|
||||
log.Warn("CSV row consumer: fail to initialize in-memory segment data", zap.Int("shardID", i))
|
||||
return nil, fmt.Errorf("fail to initialize in-memory segment data for shard id %d", i)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("fail to initialize in-memory segment data for shard id %d", i))
|
||||
}
|
||||
v.shardsData = append(v.shardsData, shardData)
|
||||
}
|
||||
@ -92,7 +92,7 @@ func NewCSVRowConsumer(ctx context.Context,
|
||||
// primary key is autoid, id generator is required
|
||||
if v.collectionInfo.PrimaryKey.GetAutoID() && idAlloc == nil {
|
||||
log.Warn("CSV row consumer: ID allocator is nil")
|
||||
return nil, errors.New("ID allocator is nil")
|
||||
return nil, merr.WrapErrImportFailed("ID allocator is nil")
|
||||
}
|
||||
|
||||
return v, nil
|
||||
@ -106,7 +106,7 @@ type CSVValidator struct {
|
||||
|
||||
func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSchema) error {
|
||||
if collectionSchema == nil {
|
||||
return errors.New("collection schema is nil")
|
||||
return merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
validators := v.validators
|
||||
@ -124,7 +124,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
var value bool
|
||||
if err := json.Unmarshal([]byte(str), &value); err != nil {
|
||||
return fmt.Errorf("illegal value '%v' for bool type field '%s'", str, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for bool type field '%s'", str, schema.GetName()))
|
||||
}
|
||||
field.(*storage.BoolFieldData).Data = append(field.(*storage.BoolFieldData).Data, value)
|
||||
return nil
|
||||
@ -151,7 +151,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
value, err := strconv.ParseInt(str, 0, 8)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int8 field '%s', error: %w", str, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int8 field '%s', error: %v", str, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int8FieldData).Data = append(field.(*storage.Int8FieldData).Data, int8(value))
|
||||
return nil
|
||||
@ -160,7 +160,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
value, err := strconv.ParseInt(str, 0, 16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int16 field '%s', error: %w", str, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int16 field '%s', error: %v", str, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int16FieldData).Data = append(field.(*storage.Int16FieldData).Data, int16(value))
|
||||
return nil
|
||||
@ -169,7 +169,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
value, err := strconv.ParseInt(str, 0, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int32 field '%s', error: %w", str, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int32 field '%s', error: %v", str, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int32FieldData).Data = append(field.(*storage.Int32FieldData).Data, int32(value))
|
||||
return nil
|
||||
@ -178,7 +178,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
value, err := strconv.ParseInt(str, 0, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int64 field '%s', error: %w", str, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int64 field '%s', error: %v", str, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int64FieldData).Data = append(field.(*storage.Int64FieldData).Data, value)
|
||||
return nil
|
||||
@ -194,23 +194,23 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
desc := json.NewDecoder(strings.NewReader(str))
|
||||
desc.UseNumber()
|
||||
if err := desc.Decode(&arr); err != nil {
|
||||
return fmt.Errorf("'%v' is not an array for binary vector field '%s'", str, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for binary vector field '%s'", str, schema.GetName()))
|
||||
}
|
||||
|
||||
// we use uint8 to represent binary vector in csv file, each uint8 value represents 8 dimensions.
|
||||
if len(arr)*8 != dim {
|
||||
return fmt.Errorf("bit size %d doesn't equal to vector dimension %d of field '%s'", len(arr)*8, dim, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("bit size %d doesn't equal to vector dimension %d of field '%s'", len(arr)*8, dim, schema.GetName()))
|
||||
}
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseUint(string(num), 0, 8)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for binary vector field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for binary vector field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.BinaryVectorFieldData).Data = append(field.(*storage.BinaryVectorFieldData).Data, byte(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for binary vector field '%s'", str, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for binary vector field '%s'", str, schema.GetName()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -227,11 +227,11 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
desc := json.NewDecoder(strings.NewReader(str))
|
||||
desc.UseNumber()
|
||||
if err := desc.Decode(&arr); err != nil {
|
||||
return fmt.Errorf("'%v' is not an array for float vector field '%s'", str, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for float vector field '%s'", str, schema.GetName()))
|
||||
}
|
||||
|
||||
if len(arr) != dim {
|
||||
return fmt.Errorf("array size %d doesn't equal to vector dimension %d of field '%s'", len(arr), dim, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("array size %d doesn't equal to vector dimension %d of field '%s'", len(arr), dim, schema.GetName()))
|
||||
}
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
@ -242,7 +242,7 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
}
|
||||
field.(*storage.FloatVectorFieldData).Data = append(field.(*storage.FloatVectorFieldData).Data, float32(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for float vector field '%s'", str, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for float vector field '%s'", str, schema.GetName()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -259,13 +259,25 @@ func (v *CSVRowConsumer) initValidators(collectionSchema *schemapb.CollectionSch
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
var dummy interface{}
|
||||
if err := json.Unmarshal([]byte(str), &dummy); err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for JSON field '%s', error: %w", str, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for JSON field '%s', error: %v", str, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.JSONFieldData).Data = append(field.(*storage.JSONFieldData).Data, []byte(str))
|
||||
return nil
|
||||
}
|
||||
case schemapb.DataType_Array:
|
||||
validators[schema.GetFieldID()].convertFunc = func(str string, field storage.FieldData) error {
|
||||
var arr []interface{}
|
||||
desc := json.NewDecoder(strings.NewReader(str))
|
||||
desc.UseNumber()
|
||||
if err := desc.Decode(&arr); err != nil {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for array field '%s'", str, schema.GetName()))
|
||||
}
|
||||
|
||||
return getArrayElementData(schema, arr, field)
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupport data type: %s", getTypeName(collectionSchema.Fields[i].DataType))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("unsupport data type: %s", getTypeName(collectionSchema.Fields[i].DataType)))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@ -282,7 +294,7 @@ func (v *CSVRowConsumer) RowCount() int64 {
|
||||
func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
if v == nil || v.validators == nil || len(v.validators) == 0 {
|
||||
log.Warn("CSV row consumer is not initialized")
|
||||
return errors.New("CSV row consumer is not initialized")
|
||||
return merr.WrapErrImportFailed("CSV row consumer is not initialized")
|
||||
}
|
||||
// if rows is nil, that means read to end of file, force flush all data
|
||||
if rows == nil {
|
||||
@ -297,7 +309,7 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
err := tryFlushBlocks(v.ctx, v.shardsData, v.collectionInfo.Schema, v.callFlushFunc, v.blockSize, MaxTotalSizeInMemory, false)
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: try flush data but failed", zap.Error(err))
|
||||
return fmt.Errorf("try flush data but failed, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("try flush data but failed, error: %v", err))
|
||||
}
|
||||
|
||||
// prepare autoid, no matter int64 or varchar pk, we always generate autoid since the hidden field RowIDField requires them
|
||||
@ -308,24 +320,24 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
if v.collectionInfo.PrimaryKey.AutoID {
|
||||
if v.rowIDAllocator == nil {
|
||||
log.Warn("CSV row consumer: primary keys is auto-generated but IDAllocator is nil")
|
||||
return fmt.Errorf("primary keys is auto-generated but IDAllocator is nil")
|
||||
return merr.WrapErrImportFailed("primary keys is auto-generated but IDAllocator is nil")
|
||||
}
|
||||
var err error
|
||||
rowIDBegin, rowIDEnd, err = v.rowIDAllocator.Alloc(uint32(len(rows)))
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: failed to generate primary keys", zap.Int("count", len(rows)), zap.Error(err))
|
||||
return fmt.Errorf("failed to generate %d primary keys, error: %w", len(rows), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to generate %d primary keys, error: %v", len(rows), err))
|
||||
}
|
||||
if rowIDEnd-rowIDBegin != int64(len(rows)) {
|
||||
log.Warn("CSV row consumer: try to generate primary keys but allocated ids are not enough",
|
||||
zap.Int("count", len(rows)), zap.Int64("generated", rowIDEnd-rowIDBegin))
|
||||
return fmt.Errorf("try to generate %d primary keys but only %d keys were allocated", len(rows), rowIDEnd-rowIDBegin)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("try to generate %d primary keys but only %d keys were allocated", len(rows), rowIDEnd-rowIDBegin))
|
||||
}
|
||||
log.Info("CSV row consumer: auto-generate primary keys", zap.Int64("begin", rowIDBegin), zap.Int64("end", rowIDEnd))
|
||||
if primaryValidator.isString {
|
||||
// if pk is varchar, no need to record auto-generated row ids
|
||||
log.Warn("CSV row consumer: string type primary key connot be auto-generated")
|
||||
return errors.New("string type primary key connot be auto-generated")
|
||||
return merr.WrapErrImportFailed("string type primary key connot be auto-generated")
|
||||
}
|
||||
v.autoIDRange = append(v.autoIDRange, rowIDBegin, rowIDEnd)
|
||||
}
|
||||
@ -361,8 +373,8 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: failed to parse primary key at the row",
|
||||
zap.String("value", pkStr), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to parse primary key '%s' at the row %d, error: %w",
|
||||
pkStr, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse primary key '%s' at the row %d, error: %v",
|
||||
pkStr, rowNumber, err))
|
||||
}
|
||||
}
|
||||
|
||||
@ -370,7 +382,7 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: failed to hash primary key at the row",
|
||||
zap.Int64("key", pk), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to hash primary key %d at the row %d, error: %w", pk, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to hash primary key %d at the row %d, error: %v", pk, rowNumber, err))
|
||||
}
|
||||
|
||||
// hash to shard based on pk, hash to partition if partition key exist
|
||||
@ -395,8 +407,8 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
if err := validator.convertFunc(value, v.shardsData[shardID][partitionID][fieldID]); err != nil {
|
||||
log.Warn("CSV row consumer: failed to convert value for field at the row",
|
||||
zap.String("fieldName", validator.fieldName), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to convert value for field '%s' at the row %d, error: %w",
|
||||
validator.fieldName, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert value for field '%s' at the row %d, error: %v",
|
||||
validator.fieldName, rowNumber, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -405,12 +417,12 @@ func (v *CSVRowConsumer) Handle(rows []map[storage.FieldID]string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// hashToPartition hash partition key to get an partition ID, return the first partition ID if no partition key exist
|
||||
// hashToPartition hash partition key to get a partition ID, return the first partition ID if no partition key exist
|
||||
// CollectionInfo ensures only one partition ID in the PartitionIDs if no partition key exist
|
||||
func (v *CSVRowConsumer) hashToPartition(row map[storage.FieldID]string, rowNumber int64) (int64, error) {
|
||||
if v.collectionInfo.PartitionKey == nil {
|
||||
if len(v.collectionInfo.PartitionIDs) != 1 {
|
||||
return 0, fmt.Errorf("collection '%s' partition list is empty", v.collectionInfo.Schema.Name)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("collection '%s' partition list is empty", v.collectionInfo.Schema.Name))
|
||||
}
|
||||
// no partition key, directly return the target partition id
|
||||
return v.collectionInfo.PartitionIDs[0], nil
|
||||
@ -429,15 +441,15 @@ func (v *CSVRowConsumer) hashToPartition(row map[storage.FieldID]string, rowNumb
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: failed to parse partition key at the row",
|
||||
zap.String("value", value), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse partition key '%s' at the row %d, error: %w",
|
||||
value, rowNumber, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse partition key '%s' at the row %d, error: %v",
|
||||
value, rowNumber, err))
|
||||
}
|
||||
|
||||
hashValue, err = typeutil.Hash32Int64(pk)
|
||||
if err != nil {
|
||||
log.Warn("CSV row consumer: failed to hash partition key at the row",
|
||||
zap.Int64("key", pk), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to hash partition key %d at the row %d, error: %w", pk, rowNumber, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to hash partition key %d at the row %d, error: %v", pk, rowNumber, err))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,6 +189,10 @@ func Test_CSVRowConsumerInitValidators(t *testing.T) {
|
||||
checkConvertFunc("FieldFloatVector", validVal, invalidVal)
|
||||
invalidVal = `[1]`
|
||||
checkConvertFunc("FieldFloatVector", validVal, invalidVal)
|
||||
|
||||
validVal = "[1,2,3,4]"
|
||||
invalidVal = "[bool, false]"
|
||||
checkConvertFunc("FieldArray", validVal, invalidVal)
|
||||
})
|
||||
|
||||
t.Run("init error cases", func(t *testing.T) {
|
||||
@ -278,6 +282,168 @@ func Test_CSVRowConsumerInitValidators(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, 2, fieldData.RowNum())
|
||||
})
|
||||
|
||||
t.Run("array field", func(t *testing.T) {
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Bool,
|
||||
},
|
||||
},
|
||||
}
|
||||
consumer.validators = make(map[int64]*CSVValidator)
|
||||
err = consumer.initValidators(schema)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok := consumer.validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields := initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData := fields[113]
|
||||
|
||||
err = v.convertFunc("[true, false]", fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Int64,
|
||||
},
|
||||
},
|
||||
}
|
||||
consumer.validators = make(map[int64]*CSVValidator)
|
||||
err = consumer.initValidators(schema)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = consumer.validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc("[1,2,3,4]", fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Float,
|
||||
},
|
||||
},
|
||||
}
|
||||
consumer.validators = make(map[int64]*CSVValidator)
|
||||
err = consumer.initValidators(schema)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = consumer.validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc("[1.1,2.2,3.3,4.4]", fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Double,
|
||||
},
|
||||
},
|
||||
}
|
||||
consumer.validators = make(map[int64]*CSVValidator)
|
||||
err = consumer.initValidators(schema)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = consumer.validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc("[1.2,2.3,3.4,4.5]", fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_VarChar,
|
||||
},
|
||||
},
|
||||
}
|
||||
consumer.validators = make(map[int64]*CSVValidator)
|
||||
err = consumer.initValidators(schema)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = consumer.validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc(`["abc", "vv"]`, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
})
|
||||
}
|
||||
|
||||
func Test_CSVRowConsumerHandleIntPK(t *testing.T) {
|
||||
|
@ -24,11 +24,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -43,7 +43,7 @@ type CSVParser struct {
|
||||
func NewCSVParser(ctx context.Context, collectionInfo *CollectionInfo, updateProgressFunc func(percent int64)) (*CSVParser, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("CSV parser: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
parser := &CSVParser{
|
||||
@ -108,7 +108,7 @@ func (p *CSVParser) combineDynamicRow(dynamicValues map[string]string, row map[s
|
||||
desc.UseNumber()
|
||||
if err := desc.Decode(&mp); err != nil {
|
||||
log.Warn("CSV parser: illegal value for dynamic field, not a JSON object")
|
||||
return errors.New("illegal value for dynamic field, not a JSON object")
|
||||
return merr.WrapErrImportFailed("illegal value for dynamic field, not a JSON object")
|
||||
}
|
||||
}
|
||||
// case 4
|
||||
@ -145,7 +145,7 @@ func (p *CSVParser) combineDynamicRow(dynamicValues map[string]string, row map[s
|
||||
bs, err := json.Marshal(mp)
|
||||
if err != nil {
|
||||
log.Warn("CSV parser: illegal value for dynamic field, not a JSON object")
|
||||
return errors.New("illegal value for dynamic field, not a JSON object")
|
||||
return merr.WrapErrImportFailed("illegal value for dynamic field, not a JSON object")
|
||||
}
|
||||
row[dynamicFieldID] = string(bs)
|
||||
} else if !ok && len(dynamicValues) == 0 {
|
||||
@ -168,7 +168,7 @@ func (p *CSVParser) verifyRow(raw []string) (map[storage.FieldID]string, error)
|
||||
if fieldID == p.collectionInfo.PrimaryKey.GetFieldID() && p.collectionInfo.PrimaryKey.GetAutoID() {
|
||||
// primary key is auto-id, no need to provide
|
||||
log.Warn("CSV parser: the primary key is auto-generated, no need to provide", zap.String("fieldName", fieldName))
|
||||
return nil, fmt.Errorf("the primary key '%s' is auto-generated, no need to provide", fieldName)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the primary key '%s' is auto-generated, no need to provide", fieldName))
|
||||
}
|
||||
|
||||
if ok {
|
||||
@ -179,7 +179,7 @@ func (p *CSVParser) verifyRow(raw []string) (map[storage.FieldID]string, error)
|
||||
} else {
|
||||
// no dynamic field. if user provided redundant field, return error
|
||||
log.Warn("CSV parser: the field is not defined in collection schema", zap.String("fieldName", fieldName))
|
||||
return nil, fmt.Errorf("the field '%s' is not defined in collection schema", fieldName)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the field '%s' is not defined in collection schema", fieldName))
|
||||
}
|
||||
}
|
||||
// some fields not provided?
|
||||
@ -198,7 +198,7 @@ func (p *CSVParser) verifyRow(raw []string) (map[storage.FieldID]string, error)
|
||||
if !ok {
|
||||
// not auto-id primary key, no dynamic field, must provide value
|
||||
log.Warn("CSV parser: a field value is missed", zap.String("fieldName", k))
|
||||
return nil, fmt.Errorf("value of field '%s' is missed", k)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("value of field '%s' is missed", k))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -215,7 +215,7 @@ func (p *CSVParser) verifyRow(raw []string) (map[storage.FieldID]string, error)
|
||||
func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
if reader == nil || handle == nil {
|
||||
log.Warn("CSV Parser: CSV parse handle is nil")
|
||||
return errors.New("CSV parse handle is nil")
|
||||
return merr.WrapErrImportFailed("CSV parse handle is nil")
|
||||
}
|
||||
// discard bom in the file
|
||||
RuneScanner := reader.r.(io.RuneScanner)
|
||||
@ -228,7 +228,9 @@ func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
return err
|
||||
}
|
||||
if bom != '\ufeff' {
|
||||
RuneScanner.UnreadRune()
|
||||
if err = RuneScanner.UnreadRune(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
r := NewReader(reader.r)
|
||||
|
||||
@ -252,7 +254,7 @@ func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
break
|
||||
} else if err != nil {
|
||||
log.Warn("CSV Parser: failed to parse the field value", zap.Error(err))
|
||||
return fmt.Errorf("failed to read the field value, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to read the field value, error: %v", err))
|
||||
}
|
||||
p.fieldsName = fieldsName
|
||||
// read buffer
|
||||
@ -265,7 +267,7 @@ func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
break
|
||||
} else if err != nil {
|
||||
log.Warn("CSV parser: failed to parse row value", zap.Error(err))
|
||||
return fmt.Errorf("failed to parse row value, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse row value, error: %v", err))
|
||||
}
|
||||
|
||||
row, err := p.verifyRow(values)
|
||||
@ -280,7 +282,7 @@ func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
isEmpty = false
|
||||
if err = handle.Handle(buf); err != nil {
|
||||
log.Warn("CSV parser: failed to convert row value to entity", zap.Error(err))
|
||||
return fmt.Errorf("failed to convert row value to entity, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert row value to entity, error: %v", err))
|
||||
}
|
||||
// clean the buffer
|
||||
buf = make([]map[storage.FieldID]string, 0, p.bufRowCount)
|
||||
@ -290,14 +292,14 @@ func (p *CSVParser) ParseRows(reader *IOReader, handle CSVRowHandler) error {
|
||||
isEmpty = false
|
||||
if err = handle.Handle(buf); err != nil {
|
||||
log.Warn("CSV parser: failed to convert row value to entity", zap.Error(err))
|
||||
return fmt.Errorf("failed to convert row value to entity, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert row value to entity, error: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
// outside context might be canceled(service stop, or future enhancement for canceling import task)
|
||||
if isCanceled(p.ctx) {
|
||||
log.Warn("CSV parser: import task was canceled")
|
||||
return errors.New("import task was canceled")
|
||||
return merr.WrapErrImportFailed("import task was canceled")
|
||||
}
|
||||
// nolint
|
||||
// this break means we require the first row must be fieldsName
|
||||
|
@ -86,8 +86,8 @@ func Test_CSVParserParseRows_IntPK(t *testing.T) {
|
||||
}
|
||||
|
||||
reader := strings.NewReader(
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
true,10,101,1001,10001,3.14,1.56,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"`)
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
true,10,101,1001,10001,3.14,1.56,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"`)
|
||||
|
||||
t.Run("parse success", func(t *testing.T) {
|
||||
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||
@ -117,28 +117,28 @@ func Test_CSVParserParseRows_IntPK(t *testing.T) {
|
||||
|
||||
// csv parse error, fields len error
|
||||
reader := strings.NewReader(
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"`)
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"`)
|
||||
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||
assert.Error(t, err)
|
||||
|
||||
// redundant field
|
||||
reader = strings.NewReader(
|
||||
`dummy,FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
1,true,0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"`)
|
||||
`dummy,FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
1,true,0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"`)
|
||||
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||
assert.Error(t, err)
|
||||
|
||||
// field missed
|
||||
reader = strings.NewReader(
|
||||
`FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"`)
|
||||
`FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"`)
|
||||
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||
assert.Error(t, err)
|
||||
|
||||
// handle() error
|
||||
content := `FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
true,0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"`
|
||||
content := `FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
true,0,100,1000,99999999999999999,3,1,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"`
|
||||
consumer.handleErr = errors.New("error")
|
||||
reader = strings.NewReader(content)
|
||||
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
|
||||
|
@ -63,6 +63,8 @@ import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
// A ParseError is returned for parsing errors.
|
||||
@ -75,7 +77,7 @@ type ParseError struct {
|
||||
}
|
||||
|
||||
func (e *ParseError) Error() string {
|
||||
if e.Err == ErrFieldCount {
|
||||
if errors.Is(e.Err, ErrFieldCount) {
|
||||
return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
|
||||
}
|
||||
if e.StartLine != e.Line {
|
||||
@ -88,15 +90,15 @@ func (e *ParseError) Unwrap() error { return e.Err }
|
||||
|
||||
// These are the errors that can be returned in ParseError.Err.
|
||||
var (
|
||||
ErrBareQuote = errors.New("bare \" in non-quoted-field")
|
||||
ErrQuote = errors.New("extraneous or missing \" in quoted-field")
|
||||
ErrFieldCount = errors.New("wrong number of fields")
|
||||
ErrBareQuote = merr.WrapErrImportFailed("bare \" in non-quoted-field")
|
||||
ErrQuote = merr.WrapErrImportFailed("extraneous or missing \" in quoted-field")
|
||||
ErrFieldCount = merr.WrapErrImportFailed("wrong number of fields")
|
||||
|
||||
// Deprecated: ErrTrailingComma is no longer used.
|
||||
ErrTrailingComma = errors.New("extra delimiter at end of line")
|
||||
ErrTrailingComma = merr.WrapErrImportFailed("extra delimiter at end of line")
|
||||
)
|
||||
|
||||
var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
|
||||
var errInvalidDelim = merr.WrapErrImportFailed("csv: invalid field or comment delimiter")
|
||||
|
||||
func validDelim(r rune) bool {
|
||||
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
|
||||
@ -257,9 +259,9 @@ func (r *Reader) ReadAll() (records [][]string, err error) {
|
||||
// The result is only valid until the next call to readLine.
|
||||
func (r *Reader) readLine() ([]byte, error) {
|
||||
line, err := r.r.ReadSlice('\n')
|
||||
if err == bufio.ErrBufferFull {
|
||||
if errors.Is(err, bufio.ErrBufferFull) {
|
||||
r.rawBuffer = append(r.rawBuffer[:0], line...)
|
||||
for err == bufio.ErrBufferFull {
|
||||
for errors.Is(err, bufio.ErrBufferFull) {
|
||||
line, err = r.r.ReadSlice('\n')
|
||||
r.rawBuffer = append(r.rawBuffer, line...)
|
||||
}
|
||||
|
@ -21,10 +21,9 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||
)
|
||||
|
||||
@ -81,7 +80,7 @@ func ValidateOptions(options []*commonpb.KeyValuePair) error {
|
||||
}
|
||||
}
|
||||
if startTs > endTs {
|
||||
return errors.New("start_ts shouldn't be larger than end_ts")
|
||||
return merr.WrapErrImportFailed("start_ts shouldn't be larger than end_ts")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -25,7 +25,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
|
||||
@ -34,6 +33,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -112,6 +112,11 @@ func initBlockData(collectionSchema *schemapb.CollectionSchema) BlockData {
|
||||
blockData[schema.GetFieldID()] = &storage.JSONFieldData{
|
||||
Data: make([][]byte, 0),
|
||||
}
|
||||
case schemapb.DataType_Array:
|
||||
blockData[schema.GetFieldID()] = &storage.ArrayFieldData{
|
||||
Data: make([]*schemapb.ScalarField, 0),
|
||||
ElementType: schema.GetElementType(),
|
||||
}
|
||||
default:
|
||||
log.Warn("Import util: unsupported data type", zap.String("DataType", getTypeName(schema.DataType)))
|
||||
return nil
|
||||
@ -137,12 +142,12 @@ func initShardData(collectionSchema *schemapb.CollectionSchema, partitionIDs []i
|
||||
func parseFloat(s string, bitsize int, fieldName string) (float64, error) {
|
||||
value, err := strconv.ParseFloat(s, bitsize)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to parse value '%s' for field '%s', error: %w", s, fieldName, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%s' for field '%s', error: %v", s, fieldName, err))
|
||||
}
|
||||
|
||||
err = typeutil.VerifyFloat(value)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("illegal value '%s' for field '%s', error: %w", s, fieldName, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%s' for field '%s', error: %v", s, fieldName, err))
|
||||
}
|
||||
|
||||
return value, nil
|
||||
@ -162,7 +167,7 @@ type Validator struct {
|
||||
// initValidators constructs valiator methods and data conversion methods
|
||||
func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[storage.FieldID]*Validator) error {
|
||||
if collectionSchema == nil {
|
||||
return errors.New("collection schema is nil")
|
||||
return merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
for i := 0; i < len(collectionSchema.Fields); i++ {
|
||||
@ -181,7 +186,7 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if value, ok := obj.(bool); ok {
|
||||
field.(*storage.BoolFieldData).Data = append(field.(*storage.BoolFieldData).Data, value)
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for bool type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for bool type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -195,7 +200,7 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
}
|
||||
field.(*storage.FloatFieldData).Data = append(field.(*storage.FloatFieldData).Data, float32(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for float type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for float type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -209,7 +214,7 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
}
|
||||
field.(*storage.DoubleFieldData).Data = append(field.(*storage.DoubleFieldData).Data, value)
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for double type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for double type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -218,11 +223,11 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if num, ok := obj.(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 8)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int8 field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int8 field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int8FieldData).Data = append(field.(*storage.Int8FieldData).Data, int8(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for int8 type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int8 type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -231,11 +236,11 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if num, ok := obj.(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int16 field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int16 field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int16FieldData).Data = append(field.(*storage.Int16FieldData).Data, int16(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for int16 type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int16 type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -244,11 +249,11 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if num, ok := obj.(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 32)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int32 field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int32 field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int32FieldData).Data = append(field.(*storage.Int32FieldData).Data, int32(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for int32 type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int32 type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -257,11 +262,11 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if num, ok := obj.(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for int64 field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for int64 field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.Int64FieldData).Data = append(field.(*storage.Int64FieldData).Data, value)
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for int64 type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int64 type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -275,22 +280,22 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
validators[schema.GetFieldID()].convertFunc = func(obj interface{}, field storage.FieldData) error {
|
||||
arr, ok := obj.([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("'%v' is not an array for binary vector field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for binary vector field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
// we use uint8 to represent binary vector in json file, each uint8 value represents 8 dimensions.
|
||||
if len(arr)*8 != dim {
|
||||
return fmt.Errorf("bit size %d doesn't equal to vector dimension %d of field '%s'", len(arr)*8, dim, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("bit size %d doesn't equal to vector dimension %d of field '%s'", len(arr)*8, dim, schema.GetName()))
|
||||
}
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseUint(string(num), 0, 8)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for binary vector field '%s', error: %w", num, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for binary vector field '%s', error: %v", num, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.BinaryVectorFieldData).Data = append(field.(*storage.BinaryVectorFieldData).Data, byte(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for binary vector field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for binary vector field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -306,10 +311,10 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
validators[schema.GetFieldID()].convertFunc = func(obj interface{}, field storage.FieldData) error {
|
||||
arr, ok := obj.([]interface{})
|
||||
if !ok {
|
||||
return fmt.Errorf("'%v' is not an array for float vector field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for float vector field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
if len(arr) != dim {
|
||||
return fmt.Errorf("array size %d doesn't equal to vector dimension %d of field '%s'", len(arr), dim, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("array size %d doesn't equal to vector dimension %d of field '%s'", len(arr), dim, schema.GetName()))
|
||||
}
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
@ -320,7 +325,7 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
}
|
||||
field.(*storage.FloatVectorFieldData).Data = append(field.(*storage.FloatVectorFieldData).Data, float32(value))
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for float vector field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for float vector field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -333,7 +338,7 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
if value, ok := obj.(string); ok {
|
||||
field.(*storage.StringFieldData).Data = append(field.(*storage.StringFieldData).Data, value)
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for varchar type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for varchar type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -345,28 +350,197 @@ func initValidators(collectionSchema *schemapb.CollectionSchema, validators map[
|
||||
var dummy interface{}
|
||||
err := json.Unmarshal([]byte(value), &dummy)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value '%v' for JSON field '%s', error: %w", value, schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for JSON field '%s', error: %v", value, schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.JSONFieldData).Data = append(field.(*storage.JSONFieldData).Data, []byte(value))
|
||||
} else if mp, ok := obj.(map[string]interface{}); ok {
|
||||
bs, err := json.Marshal(mp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse value for JSON field '%s', error: %w", schema.GetName(), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value for JSON field '%s', error: %v", schema.GetName(), err))
|
||||
}
|
||||
field.(*storage.JSONFieldData).Data = append(field.(*storage.JSONFieldData).Data, bs)
|
||||
} else {
|
||||
return fmt.Errorf("illegal value '%v' for JSON type field '%s'", obj, schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for JSON type field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
case schemapb.DataType_Array:
|
||||
validators[schema.GetFieldID()].convertFunc = func(obj interface{}, field storage.FieldData) error {
|
||||
arr, ok := obj.([]interface{})
|
||||
if !ok {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("'%v' is not an array for array field '%s'", obj, schema.GetName()))
|
||||
}
|
||||
return getArrayElementData(schema, arr, field)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupport data type: %s", getTypeName(collectionSchema.Fields[i].DataType))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("unsupport data type: %s", getTypeName(collectionSchema.Fields[i].DataType)))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getArrayElementData(schema *schemapb.FieldSchema, arr []interface{}, field storage.FieldData) error {
|
||||
switch schema.GetElementType() {
|
||||
case schemapb.DataType_Bool:
|
||||
boolData := make([]bool, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if value, ok := arr[i].(bool); ok {
|
||||
boolData = append(boolData, value)
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for bool array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_BoolData{
|
||||
BoolData: &schemapb.BoolArray{
|
||||
Data: boolData,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_Int8:
|
||||
int8Data := make([]int32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 8)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
int8Data = append(int8Data, int32(value))
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: int8Data,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
case schemapb.DataType_Int16:
|
||||
int16Data := make([]int32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 16)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
int16Data = append(int16Data, int32(value))
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: int16Data,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_Int32:
|
||||
intData := make([]int32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
intData = append(intData, int32(value))
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: intData,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_Int64:
|
||||
longData := make([]int64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := strconv.ParseInt(string(num), 0, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
longData = append(longData, value)
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for long array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_LongData{
|
||||
LongData: &schemapb.LongArray{
|
||||
Data: longData,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_Float:
|
||||
floatData := make([]float32, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := parseFloat(string(num), 32, schema.GetName())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
floatData = append(floatData, float32(value))
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for float array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_FloatData{
|
||||
FloatData: &schemapb.FloatArray{
|
||||
Data: floatData,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_Double:
|
||||
doubleData := make([]float64, 0)
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if num, ok := arr[i].(json.Number); ok {
|
||||
value, err := parseFloat(string(num), 32, schema.GetName())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
doubleData = append(doubleData, value)
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for double array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_DoubleData{
|
||||
DoubleData: &schemapb.DoubleArray{
|
||||
Data: doubleData,
|
||||
},
|
||||
},
|
||||
})
|
||||
case schemapb.DataType_String, schemapb.DataType_VarChar:
|
||||
stringFieldData := &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_StringData{
|
||||
StringData: &schemapb.StringArray{
|
||||
Data: make([]string, 0),
|
||||
},
|
||||
},
|
||||
}
|
||||
for i := 0; i < len(arr); i++ {
|
||||
if str, ok := arr[i].(string); ok {
|
||||
stringFieldData.GetStringData().Data = append(stringFieldData.GetStringData().Data, str)
|
||||
} else {
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for string array field '%s'", arr, schema.GetName()))
|
||||
}
|
||||
}
|
||||
field.(*storage.ArrayFieldData).Data = append(field.(*storage.ArrayFieldData).Data, stringFieldData)
|
||||
default:
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("unsupport element type: %v", getTypeName(schema.GetElementType())))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printFieldsDataInfo(fieldsData BlockData, msg string, files []string) {
|
||||
stats := make([]zapcore.Field, 0)
|
||||
for k, v := range fieldsData {
|
||||
@ -395,13 +569,13 @@ func getFieldDimension(schema *schemapb.FieldSchema) (int, error) {
|
||||
if key == common.DimKey {
|
||||
dim, err := strconv.Atoi(value)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("illegal vector dimension '%s' for field '%s', error: %w", value, schema.GetName(), err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("illegal vector dimension '%s' for field '%s', error: %v", value, schema.GetName(), err))
|
||||
}
|
||||
return dim, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("vector dimension is not defined for field '%s'", schema.GetName())
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("vector dimension is not defined for field '%s'", schema.GetName()))
|
||||
}
|
||||
|
||||
// triggerGC triggers golang gc to return all free memory back to the underlying system at once,
|
||||
@ -426,7 +600,7 @@ func fillDynamicData(blockData BlockData, collectionSchema *schemapb.CollectionS
|
||||
}
|
||||
|
||||
if dynamicFieldID < 0 {
|
||||
return fmt.Errorf("the collection schema is dynamic but dynamic field is not found")
|
||||
return merr.WrapErrImportFailed("the collection schema is dynamic but dynamic field is not found")
|
||||
}
|
||||
|
||||
rowCount := 0
|
||||
@ -483,7 +657,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
// outside context might be canceled(service stop, or future enhancement for canceling import task)
|
||||
if isCanceled(ctx) {
|
||||
log.Warn("Import util: import task was canceled")
|
||||
return errors.New("import task was canceled")
|
||||
return merr.WrapErrImportFailed("import task was canceled")
|
||||
}
|
||||
|
||||
shardData := shardsData[i]
|
||||
@ -491,7 +665,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
err := fillDynamicData(blockData, collectionSchema)
|
||||
if err != nil {
|
||||
log.Warn("Import util: failed to fill dynamic field", zap.Error(err))
|
||||
return fmt.Errorf("failed to fill dynamic field, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to fill dynamic field, error: %v", err))
|
||||
}
|
||||
|
||||
// Note: even rowCount is 0, the size is still non-zero
|
||||
@ -509,7 +683,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
if err != nil {
|
||||
log.Warn("Import util: failed to force flush block data", zap.Int("shardID", i),
|
||||
zap.Int64("partitionID", partitionID), zap.Error(err))
|
||||
return fmt.Errorf("failed to force flush block data for shard id %d to partition %d, error: %w", i, partitionID, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to force flush block data for shard id %d to partition %d, error: %v", i, partitionID, err))
|
||||
}
|
||||
log.Info("Import util: force flush", zap.Int("rowCount", rowCount), zap.Int("size", size),
|
||||
zap.Int("shardID", i), zap.Int64("partitionID", partitionID))
|
||||
@ -517,7 +691,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
shardData[partitionID] = initBlockData(collectionSchema)
|
||||
if shardData[partitionID] == nil {
|
||||
log.Warn("Import util: failed to initialize FieldData list", zap.Int("shardID", i), zap.Int64("partitionID", partitionID))
|
||||
return fmt.Errorf("failed to initialize FieldData list for shard id %d to partition %d", i, partitionID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize FieldData list for shard id %d to partition %d", i, partitionID))
|
||||
}
|
||||
continue
|
||||
}
|
||||
@ -530,7 +704,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
if err != nil {
|
||||
log.Warn("Import util: failed to flush block data", zap.Int("shardID", i),
|
||||
zap.Int64("partitionID", partitionID), zap.Error(err))
|
||||
return fmt.Errorf("failed to flush block data for shard id %d to partition %d, error: %w", i, partitionID, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to flush block data for shard id %d to partition %d, error: %v", i, partitionID, err))
|
||||
}
|
||||
log.Info("Import util: block size exceed limit and flush", zap.Int("rowCount", rowCount), zap.Int("size", size),
|
||||
zap.Int("shardID", i), zap.Int64("partitionID", partitionID), zap.Int64("blockSize", blockSize))
|
||||
@ -538,7 +712,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
shardData[partitionID] = initBlockData(collectionSchema)
|
||||
if shardData[partitionID] == nil {
|
||||
log.Warn("Import util: failed to initialize FieldData list", zap.Int("shardID", i), zap.Int64("partitionID", partitionID))
|
||||
return fmt.Errorf("failed to initialize FieldData list for shard id %d to partition %d", i, partitionID)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize FieldData list for shard id %d to partition %d", i, partitionID))
|
||||
}
|
||||
continue
|
||||
}
|
||||
@ -559,14 +733,14 @@ func tryFlushBlocks(ctx context.Context,
|
||||
// outside context might be canceled(service stop, or future enhancement for canceling import task)
|
||||
if isCanceled(ctx) {
|
||||
log.Warn("Import util: import task was canceled")
|
||||
return errors.New("import task was canceled")
|
||||
return merr.WrapErrImportFailed("import task was canceled")
|
||||
}
|
||||
|
||||
blockData := shardsData[biggestItem][biggestPartition]
|
||||
err := fillDynamicData(blockData, collectionSchema)
|
||||
if err != nil {
|
||||
log.Warn("Import util: failed to fill dynamic field", zap.Error(err))
|
||||
return fmt.Errorf("failed to fill dynamic field, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to fill dynamic field, error: %v", err))
|
||||
}
|
||||
|
||||
// Note: even rowCount is 0, the size is still non-zero
|
||||
@ -583,8 +757,8 @@ func tryFlushBlocks(ctx context.Context,
|
||||
if err != nil {
|
||||
log.Warn("Import util: failed to flush biggest block data", zap.Int("shardID", biggestItem),
|
||||
zap.Int64("partitionID", biggestPartition))
|
||||
return fmt.Errorf("failed to flush biggest block data for shard id %d to partition %d, error: %w",
|
||||
biggestItem, biggestPartition, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to flush biggest block data for shard id %d to partition %d, error: %v",
|
||||
biggestItem, biggestPartition, err))
|
||||
}
|
||||
log.Info("Import util: total size exceed limit and flush", zap.Int("rowCount", rowCount),
|
||||
zap.Int("size", size), zap.Int("totalSize", totalSize), zap.Int("shardID", biggestItem))
|
||||
@ -593,7 +767,7 @@ func tryFlushBlocks(ctx context.Context,
|
||||
if shardsData[biggestItem][biggestPartition] == nil {
|
||||
log.Warn("Import util: failed to initialize FieldData list", zap.Int("shardID", biggestItem),
|
||||
zap.Int64("partitionID", biggestPartition))
|
||||
return fmt.Errorf("failed to initialize FieldData list for shard id %d to partition %d", biggestItem, biggestPartition)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to initialize FieldData list for shard id %d to partition %d", biggestItem, biggestPartition))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -642,7 +816,7 @@ func pkToShard(pk interface{}, shardNum uint32) (uint32, error) {
|
||||
intPK, ok := pk.(int64)
|
||||
if !ok {
|
||||
log.Warn("Numpy parser: primary key field must be int64 or varchar")
|
||||
return 0, fmt.Errorf("primary key field must be int64 or varchar")
|
||||
return 0, merr.WrapErrImportFailed("primary key field must be int64 or varchar")
|
||||
}
|
||||
hash, _ := typeutil.Hash32Int64(intPK)
|
||||
shard = hash % shardNum
|
||||
@ -653,7 +827,7 @@ func pkToShard(pk interface{}, shardNum uint32) (uint32, error) {
|
||||
|
||||
func UpdateKVInfo(infos *[]*commonpb.KeyValuePair, k string, v string) error {
|
||||
if infos == nil {
|
||||
return errors.New("Import util: kv array pointer is nil")
|
||||
return merr.WrapErrImportFailed("Import util: kv array pointer is nil")
|
||||
}
|
||||
|
||||
found := false
|
||||
|
@ -127,6 +127,14 @@ func sampleSchema() *schemapb.CollectionSchema {
|
||||
Description: "json",
|
||||
DataType: schemapb.DataType_JSON,
|
||||
},
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
Description: "array",
|
||||
DataType: schemapb.DataType_Array,
|
||||
ElementType: schemapb.DataType_Int32,
|
||||
},
|
||||
},
|
||||
}
|
||||
return schema
|
||||
@ -145,6 +153,7 @@ type sampleRow struct {
|
||||
FieldJSON string
|
||||
FieldBinaryVector []int
|
||||
FieldFloatVector []float32
|
||||
FieldArray []int32
|
||||
}
|
||||
type sampleContent struct {
|
||||
Rows []sampleRow
|
||||
@ -324,7 +333,18 @@ func createFieldsData(collectionSchema *schemapb.CollectionSchema, rowCount int)
|
||||
jsonData = append(jsonData, []byte(fmt.Sprintf("{\"y\": %d}", i)))
|
||||
}
|
||||
fieldsData[schema.GetFieldID()] = jsonData
|
||||
|
||||
case schemapb.DataType_Array:
|
||||
arrayData := make([]*schemapb.ScalarField, 0)
|
||||
for i := 0; i < rowCount; i++ {
|
||||
arrayData = append(arrayData, &schemapb.ScalarField{
|
||||
Data: &schemapb.ScalarField_IntData{
|
||||
IntData: &schemapb.IntArray{
|
||||
Data: []int32{int32(i), int32(i + 1), int32(i + 2)},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
fieldsData[schema.GetFieldID()] = arrayData
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
@ -372,6 +392,8 @@ func createBlockData(collectionSchema *schemapb.CollectionSchema, fieldsData map
|
||||
blockData[fieldID].(*storage.StringFieldData).Data = append(blockData[fieldID].(*storage.StringFieldData).Data, fieldsData[fieldID].([]string)...)
|
||||
case schemapb.DataType_JSON:
|
||||
blockData[fieldID].(*storage.JSONFieldData).Data = append(blockData[fieldID].(*storage.JSONFieldData).Data, fieldsData[fieldID].([][]byte)...)
|
||||
case schemapb.DataType_Array:
|
||||
blockData[fieldID].(*storage.ArrayFieldData).Data = append(blockData[fieldID].(*storage.ArrayFieldData).Data, fieldsData[fieldID].([]*schemapb.ScalarField)...)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
@ -586,6 +608,7 @@ func Test_InitValidators(t *testing.T) {
|
||||
checkConvertFunc("FieldFloatVector", validVal, invalidVal)
|
||||
invalidVal = []interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), true}
|
||||
checkConvertFunc("FieldFloatVector", validVal, invalidVal)
|
||||
checkConvertFunc("FieldArray", validVal, invalidVal)
|
||||
})
|
||||
|
||||
t.Run("init error cases", func(t *testing.T) {
|
||||
@ -673,6 +696,230 @@ func Test_InitValidators(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, 2, fieldData.RowNum())
|
||||
})
|
||||
|
||||
t.Run("array field", func(t *testing.T) {
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Bool,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok := validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields := initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData := fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{1, 2}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Int32,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), jsonNumber("4")}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.1"), jsonNumber("2.2")}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Int64,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1"), jsonNumber("2"), jsonNumber("3"), jsonNumber("4")}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.1"), jsonNumber("2.2")}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Float,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.1"), jsonNumber("2.2"), jsonNumber("3.3"), jsonNumber("4.4")}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.1.1"), jsonNumber("2.2.2")}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_Double,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.2"), jsonNumber("2.3"), jsonNumber("3.4"), jsonNumber("4.5")}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
err = v.convertFunc([]interface{}{jsonNumber("1.1.1"), jsonNumber("2.2.2")}, fieldData)
|
||||
assert.Error(t, err)
|
||||
|
||||
schema = &schemapb.CollectionSchema{
|
||||
Name: "schema",
|
||||
Description: "schema",
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{
|
||||
{
|
||||
FieldID: 113,
|
||||
Name: "FieldArray",
|
||||
IsPrimaryKey: false,
|
||||
DataType: schemapb.DataType_Array,
|
||||
TypeParams: []*commonpb.KeyValuePair{
|
||||
{Key: "max_capacity", Value: "100"},
|
||||
},
|
||||
ElementType: schemapb.DataType_VarChar,
|
||||
},
|
||||
},
|
||||
}
|
||||
validators = make(map[storage.FieldID]*Validator)
|
||||
err = initValidators(schema, validators)
|
||||
assert.NoError(t, err)
|
||||
|
||||
v, ok = validators[113]
|
||||
assert.True(t, ok)
|
||||
|
||||
fields = initBlockData(schema)
|
||||
assert.NotNil(t, fields)
|
||||
fieldData = fields[113]
|
||||
|
||||
err = v.convertFunc([]interface{}{"abc", "def"}, fieldData)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, fieldData.RowNum())
|
||||
|
||||
err = v.convertFunc([]interface{}{true, false}, fieldData)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func Test_GetFileNameAndExt(t *testing.T) {
|
||||
@ -829,7 +1076,7 @@ func Test_TryFlushBlocks(t *testing.T) {
|
||||
return nil
|
||||
}
|
||||
|
||||
blockSize := int64(1024)
|
||||
blockSize := int64(2048)
|
||||
maxTotalSize := int64(4096)
|
||||
shardNum := int32(3)
|
||||
schema := sampleSchema()
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/retry"
|
||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||
)
|
||||
@ -149,17 +150,17 @@ func NewImportWrapper(ctx context.Context, collectionInfo *CollectionInfo, segme
|
||||
func (p *ImportWrapper) SetCallbackFunctions(assignSegmentFunc AssignSegmentFunc, createBinlogsFunc CreateBinlogsFunc, saveSegmentFunc SaveSegmentFunc) error {
|
||||
if assignSegmentFunc == nil {
|
||||
log.Warn("import wrapper: callback function AssignSegmentFunc is nil")
|
||||
return fmt.Errorf("callback function AssignSegmentFunc is nil")
|
||||
return merr.WrapErrImportFailed("callback function AssignSegmentFunc is nil")
|
||||
}
|
||||
|
||||
if createBinlogsFunc == nil {
|
||||
log.Warn("import wrapper: callback function CreateBinlogsFunc is nil")
|
||||
return fmt.Errorf("callback function CreateBinlogsFunc is nil")
|
||||
return merr.WrapErrImportFailed("callback function CreateBinlogsFunc is nil")
|
||||
}
|
||||
|
||||
if saveSegmentFunc == nil {
|
||||
log.Warn("import wrapper: callback function SaveSegmentFunc is nil")
|
||||
return fmt.Errorf("callback function SaveSegmentFunc is nil")
|
||||
return merr.WrapErrImportFailed("callback function SaveSegmentFunc is nil")
|
||||
}
|
||||
|
||||
p.assignSegmentFunc = assignSegmentFunc
|
||||
@ -190,7 +191,7 @@ func (p *ImportWrapper) fileValidation(filePaths []string) (bool, error) {
|
||||
// only allow json file, numpy file and csv file
|
||||
if fileType != JSONFileExt && fileType != NumpyFileExt && fileType != CSVFileExt {
|
||||
log.Warn("import wrapper: unsupported file type", zap.String("filePath", filePath))
|
||||
return false, fmt.Errorf("unsupported file type: '%s'", filePath)
|
||||
return false, merr.WrapErrImportFailed(fmt.Sprintf("unsupported file type: '%s'", filePath))
|
||||
}
|
||||
|
||||
// we use the first file to determine row-based or column-based
|
||||
@ -203,12 +204,12 @@ func (p *ImportWrapper) fileValidation(filePaths []string) (bool, error) {
|
||||
if rowBased {
|
||||
if fileType != JSONFileExt && fileType != CSVFileExt {
|
||||
log.Warn("import wrapper: unsupported file type for row-based mode", zap.String("filePath", filePath))
|
||||
return rowBased, fmt.Errorf("unsupported file type for row-based mode: '%s'", filePath)
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("unsupported file type for row-based mode: '%s'", filePath))
|
||||
}
|
||||
} else {
|
||||
if fileType != NumpyFileExt {
|
||||
log.Warn("import wrapper: unsupported file type for column-based mode", zap.String("filePath", filePath))
|
||||
return rowBased, fmt.Errorf("unsupported file type for column-based mode: '%s'", filePath)
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("unsupported file type for column-based mode: '%s'", filePath))
|
||||
}
|
||||
}
|
||||
|
||||
@ -216,7 +217,7 @@ func (p *ImportWrapper) fileValidation(filePaths []string) (bool, error) {
|
||||
_, ok := fileNames[name]
|
||||
if ok {
|
||||
log.Warn("import wrapper: duplicate file name", zap.String("filePath", filePath))
|
||||
return rowBased, fmt.Errorf("duplicate file: '%s'", filePath)
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("duplicate file: '%s'", filePath))
|
||||
}
|
||||
fileNames[name] = struct{}{}
|
||||
|
||||
@ -224,20 +225,20 @@ func (p *ImportWrapper) fileValidation(filePaths []string) (bool, error) {
|
||||
size, err := p.chunkManager.Size(p.ctx, filePath)
|
||||
if err != nil {
|
||||
log.Warn("import wrapper: failed to get file size", zap.String("filePath", filePath), zap.Error(err))
|
||||
return rowBased, fmt.Errorf("failed to get file size of '%s', error:%w", filePath, err)
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("failed to get file size of '%s', error:%v", filePath, err))
|
||||
}
|
||||
|
||||
// empty file
|
||||
if size == 0 {
|
||||
log.Warn("import wrapper: file size is zero", zap.String("filePath", filePath))
|
||||
return rowBased, fmt.Errorf("the file '%s' size is zero", filePath)
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("the file '%s' size is zero", filePath))
|
||||
}
|
||||
|
||||
if size > params.Params.CommonCfg.ImportMaxFileSize.GetAsInt64() {
|
||||
log.Warn("import wrapper: file size exceeds the maximum size", zap.String("filePath", filePath),
|
||||
zap.Int64("fileSize", size), zap.String("MaxFileSize", params.Params.CommonCfg.ImportMaxFileSize.GetValue()))
|
||||
return rowBased, fmt.Errorf("the file '%s' size exceeds the maximum size: %s bytes",
|
||||
filePath, params.Params.CommonCfg.ImportMaxFileSize.GetValue())
|
||||
return rowBased, merr.WrapErrImportFailed(fmt.Sprintf("the file '%s' size exceeds the maximum size: %s bytes",
|
||||
filePath, params.Params.CommonCfg.ImportMaxFileSize.GetValue()))
|
||||
}
|
||||
totalSize += size
|
||||
}
|
||||
@ -567,7 +568,7 @@ func (p *ImportWrapper) flushFunc(fields BlockData, shardID int, partitionID int
|
||||
if err != nil {
|
||||
logFields = append(logFields, zap.Error(err))
|
||||
log.Warn("import wrapper: failed to assign a new segment", logFields...)
|
||||
return fmt.Errorf("failed to assign a new segment for shard id %d, error: %w", shardID, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to assign a new segment for shard id %d, error: %v", shardID, err))
|
||||
}
|
||||
|
||||
segment = &WorkingSegment{
|
||||
@ -589,8 +590,8 @@ func (p *ImportWrapper) flushFunc(fields BlockData, shardID int, partitionID int
|
||||
logFields = append(logFields, zap.Error(err), zap.Int64("segmentID", segment.segmentID),
|
||||
zap.String("targetChannel", segment.targetChName))
|
||||
log.Warn("import wrapper: failed to save binlogs", logFields...)
|
||||
return fmt.Errorf("failed to save binlogs, shard id %d, segment id %d, channel '%s', error: %w",
|
||||
shardID, segment.segmentID, segment.targetChName, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to save binlogs, shard id %d, segment id %d, channel '%s', error: %v",
|
||||
shardID, segment.segmentID, segment.targetChName, err))
|
||||
}
|
||||
|
||||
segment.fieldsInsert = append(segment.fieldsInsert, fieldsInsert...)
|
||||
@ -630,8 +631,8 @@ func (p *ImportWrapper) closeWorkingSegment(segment *WorkingSegment) error {
|
||||
if err != nil {
|
||||
logFields = append(logFields, zap.Error(err))
|
||||
log.Warn("import wrapper: failed to seal segment", logFields...)
|
||||
return fmt.Errorf("failed to seal segment, shard id %d, segment id %d, channel '%s', error: %w",
|
||||
segment.shardID, segment.segmentID, segment.targetChName, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to seal segment, shard id %d, segment id %d, channel '%s', error: %v",
|
||||
segment.shardID, segment.segmentID, segment.targetChName, err))
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -250,11 +250,11 @@ func Test_ImportWrapperRowBased(t *testing.T) {
|
||||
|
||||
content := []byte(`{
|
||||
"rows":[
|
||||
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldJSON": {"x": 2}, "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4], "FieldJSON": {"a": 7, "b": true}},
|
||||
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldJSON": "{\"k\": 2.5}", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4], "FieldJSON": {"a": 8, "b": 2}},
|
||||
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldJSON": {"y": "hello"}, "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4], "FieldJSON": {"a": 9, "b": false}},
|
||||
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4], "FieldJSON": {"a": 10, "b": 2.15}},
|
||||
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldJSON": "{\"x\": true}", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4], "FieldJSON": {"a": 11, "b": "s"}}
|
||||
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldJSON": {"x": 2}, "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4], "FieldJSON": {"a": 7, "b": true}, "FieldArray": [1, 2, 3, 4]},
|
||||
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldJSON": "{\"k\": 2.5}", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4], "FieldJSON": {"a": 8, "b": 2}, "FieldArray": [5, 6, 7, 8]},
|
||||
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldJSON": {"y": "hello"}, "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [11, 22, 33, 44]},
|
||||
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4], "FieldJSON": {"a": 10, "b": 2.15}, "FieldArray": [10, 12, 13, 14]},
|
||||
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldJSON": "{\"x\": true}", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4], "FieldJSON": {"a": 11, "b": "s"}, "FieldArray": [21, 22, 23, 24]}
|
||||
]
|
||||
}`)
|
||||
|
||||
@ -342,10 +342,10 @@ func Test_ImportWrapperRowBased_CSV(t *testing.T) {
|
||||
|
||||
idAllocator := newIDAllocator(ctx, t, nil)
|
||||
content := []byte(
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector
|
||||
true,10,101,1001,10001,3.14,1.56,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]"
|
||||
false,11,102,1002,10002,3.15,1.57,No.1,"{""x"": 1}","[201,0]","[0.1,0.2,0.3,0.4]"
|
||||
true,12,103,1003,10003,3.16,1.58,No.2,"{""x"": 2}","[202,0]","[0.1,0.2,0.3,0.4]"`)
|
||||
`FieldBool,FieldInt8,FieldInt16,FieldInt32,FieldInt64,FieldFloat,FieldDouble,FieldString,FieldJSON,FieldBinaryVector,FieldFloatVector,FieldArray
|
||||
true,10,101,1001,10001,3.14,1.56,No.0,"{""x"": 0}","[200,0]","[0.1,0.2,0.3,0.4]","[1,2,3,4]"
|
||||
false,11,102,1002,10002,3.15,1.57,No.1,"{""x"": 1}","[201,0]","[0.1,0.2,0.3,0.4]","[5,6,7,8]"
|
||||
true,12,103,1003,10003,3.16,1.58,No.2,"{""x"": 2}","[202,0]","[0.1,0.2,0.3,0.4]","[9,10,11,12]"`)
|
||||
|
||||
filePath := TempFilesPath + "rows_1.csv"
|
||||
err = cm.Write(ctx, filePath, content)
|
||||
@ -443,7 +443,8 @@ func Test_ImportWrapperColumnBased_numpy(t *testing.T) {
|
||||
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||
return nil
|
||||
}
|
||||
collectionInfo, err := NewCollectionInfo(sampleSchema(), 2, []int64{1})
|
||||
schema := createNumpySchema()
|
||||
collectionInfo, err := NewCollectionInfo(schema, 2, []int64{1})
|
||||
assert.NoError(t, err)
|
||||
|
||||
files := createSampleNumpyFiles(t, cm)
|
||||
@ -741,11 +742,11 @@ func Test_ImportWrapperReportFailRowBased(t *testing.T) {
|
||||
|
||||
content := []byte(`{
|
||||
"rows":[
|
||||
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldJSON": "{\"x\": \"aaa\"}", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4], "FieldJSON": {"a": 9, "b": false}},
|
||||
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4], "FieldJSON": {"a": 9, "b": false}},
|
||||
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldJSON": "{\"x\": 2, \"y\": 5}", "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4], "FieldJSON": {"a": 9, "b": false}},
|
||||
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldJSON": "{\"x\": true}", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4], "FieldJSON": {"a": 9, "b": false}},
|
||||
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4], "FieldJSON": {"a": 9, "b": false}}
|
||||
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldJSON": "{\"x\": \"aaa\"}", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [1, 2, 3, 4]},
|
||||
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [1, 2, 3, 4]},
|
||||
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldJSON": "{\"x\": 2, \"y\": 5}", "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [1, 2, 3, 4]},
|
||||
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldJSON": "{\"x\": true}", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [1, 2, 3, 4]},
|
||||
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldJSON": "{}", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4], "FieldJSON": {"a": 9, "b": false}, "FieldArray": [1, 2, 3, 4]}
|
||||
]
|
||||
}`)
|
||||
|
||||
@ -817,7 +818,7 @@ func Test_ImportWrapperReportFailColumnBased_numpy(t *testing.T) {
|
||||
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
||||
return nil
|
||||
}
|
||||
collectionInfo, err := NewCollectionInfo(sampleSchema(), 2, []int64{1})
|
||||
collectionInfo, err := NewCollectionInfo(createNumpySchema(), 2, []int64{1})
|
||||
assert.NoError(t, err)
|
||||
wrapper := NewImportWrapper(ctx, collectionInfo, 1, ReadBufferSize, idAllocator, cm, importResult, reportFunc)
|
||||
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
||||
|
@ -22,13 +22,13 @@ import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/allocator"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -43,14 +43,14 @@ func getKeyValue(obj interface{}, fieldName string, isString bool) (string, erro
|
||||
if value, ok := obj.(string); ok {
|
||||
return value, nil
|
||||
}
|
||||
return "", fmt.Errorf("illegal value '%v' for varchar type key field '%s'", obj, fieldName)
|
||||
return "", merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for varchar type key field '%s'", obj, fieldName))
|
||||
}
|
||||
|
||||
// int64 type primary field, the value must be json.Number
|
||||
if num, ok := obj.(json.Number); ok {
|
||||
return string(num), nil
|
||||
}
|
||||
return "", fmt.Errorf("illegal value '%v' for int64 type key field '%s'", obj, fieldName)
|
||||
return "", merr.WrapErrImportFailed(fmt.Sprintf("illegal value '%v' for int64 type key field '%s'", obj, fieldName))
|
||||
}
|
||||
|
||||
// JSONRowConsumer is row-based json format consumer class
|
||||
@ -75,7 +75,7 @@ func NewJSONRowConsumer(ctx context.Context,
|
||||
) (*JSONRowConsumer, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("JSON row consumer: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
v := &JSONRowConsumer{
|
||||
@ -92,7 +92,7 @@ func NewJSONRowConsumer(ctx context.Context,
|
||||
err := initValidators(collectionInfo.Schema, v.validators)
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: fail to initialize json row-based consumer", zap.Error(err))
|
||||
return nil, fmt.Errorf("fail to initialize json row-based consumer, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("fail to initialize json row-based consumer, error: %v", err))
|
||||
}
|
||||
|
||||
v.shardsData = make([]ShardData, 0, collectionInfo.ShardNum)
|
||||
@ -100,7 +100,7 @@ func NewJSONRowConsumer(ctx context.Context,
|
||||
shardData := initShardData(collectionInfo.Schema, collectionInfo.PartitionIDs)
|
||||
if shardData == nil {
|
||||
log.Warn("JSON row consumer: fail to initialize in-memory segment data", zap.Int("shardID", i))
|
||||
return nil, fmt.Errorf("fail to initialize in-memory segment data for shard id %d", i)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("fail to initialize in-memory segment data for shard id %d", i))
|
||||
}
|
||||
v.shardsData = append(v.shardsData, shardData)
|
||||
}
|
||||
@ -108,7 +108,7 @@ func NewJSONRowConsumer(ctx context.Context,
|
||||
// primary key is autoid, id generator is required
|
||||
if v.collectionInfo.PrimaryKey.GetAutoID() && idAlloc == nil {
|
||||
log.Warn("JSON row consumer: ID allocator is nil")
|
||||
return nil, errors.New("ID allocator is nil")
|
||||
return nil, merr.WrapErrImportFailed("ID allocator is nil")
|
||||
}
|
||||
|
||||
return v, nil
|
||||
@ -125,7 +125,7 @@ func (v *JSONRowConsumer) RowCount() int64 {
|
||||
func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if v == nil || v.validators == nil || len(v.validators) == 0 {
|
||||
log.Warn("JSON row consumer is not initialized")
|
||||
return errors.New("JSON row consumer is not initialized")
|
||||
return merr.WrapErrImportFailed("JSON row consumer is not initialized")
|
||||
}
|
||||
|
||||
// if rows is nil, that means read to end of file, force flush all data
|
||||
@ -141,7 +141,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
err := tryFlushBlocks(v.ctx, v.shardsData, v.collectionInfo.Schema, v.callFlushFunc, v.blockSize, MaxTotalSizeInMemory, false)
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: try flush data but failed", zap.Error(err))
|
||||
return fmt.Errorf("try flush data but failed, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("try flush data but failed, error: %v", err))
|
||||
}
|
||||
|
||||
// prepare autoid, no matter int64 or varchar pk, we always generate autoid since the hidden field RowIDField requires them
|
||||
@ -152,18 +152,18 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if primaryValidator.autoID {
|
||||
if v.rowIDAllocator == nil {
|
||||
log.Warn("JSON row consumer: primary keys is auto-generated but IDAllocator is nil")
|
||||
return fmt.Errorf("primary keys is auto-generated but IDAllocator is nil")
|
||||
return merr.WrapErrImportFailed("primary keys is auto-generated but IDAllocator is nil")
|
||||
}
|
||||
var err error
|
||||
rowIDBegin, rowIDEnd, err = v.rowIDAllocator.Alloc(uint32(len(rows)))
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to generate primary keys", zap.Int("count", len(rows)), zap.Error(err))
|
||||
return fmt.Errorf("failed to generate %d primary keys, error: %w", len(rows), err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to generate %d primary keys, error: %v", len(rows), err))
|
||||
}
|
||||
if rowIDEnd-rowIDBegin != int64(len(rows)) {
|
||||
log.Warn("JSON row consumer: try to generate primary keys but allocated ids are not enough",
|
||||
zap.Int("count", len(rows)), zap.Int64("generated", rowIDEnd-rowIDBegin))
|
||||
return fmt.Errorf("try to generate %d primary keys but only %d keys were allocated", len(rows), rowIDEnd-rowIDBegin)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("try to generate %d primary keys but only %d keys were allocated", len(rows), rowIDEnd-rowIDBegin))
|
||||
}
|
||||
log.Info("JSON row consumer: auto-generate primary keys", zap.Int64("begin", rowIDBegin), zap.Int64("end", rowIDEnd))
|
||||
if !primaryValidator.isString {
|
||||
@ -183,7 +183,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if primaryValidator.isString {
|
||||
if primaryValidator.autoID {
|
||||
log.Warn("JSON row consumer: string type primary key cannot be auto-generated")
|
||||
return errors.New("string type primary key cannot be auto-generated")
|
||||
return merr.WrapErrImportFailed("string type primary key cannot be auto-generated")
|
||||
}
|
||||
|
||||
value := row[primaryKeyID]
|
||||
@ -191,7 +191,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to parse primary key at the row",
|
||||
zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to parse primary key at the row %d, error: %w", rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse primary key at the row %d, error: %v", rowNumber, err))
|
||||
}
|
||||
|
||||
// hash to shard based on pk, hash to partition if partition key exist
|
||||
@ -215,7 +215,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to parse primary key at the row",
|
||||
zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to parse primary key at the row %d, error: %w", rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse primary key at the row %d, error: %v", rowNumber, err))
|
||||
}
|
||||
|
||||
// parse the pk from a string
|
||||
@ -223,8 +223,8 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to parse primary key at the row",
|
||||
zap.String("value", strValue), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to parse primary key '%s' at the row %d, error: %w",
|
||||
strValue, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse primary key '%s' at the row %d, error: %v",
|
||||
strValue, rowNumber, err))
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,7 +232,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to hash primary key at the row",
|
||||
zap.Int64("key", pk), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to hash primary key %d at the row %d, error: %w", pk, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to hash primary key %d at the row %d, error: %v", pk, rowNumber, err))
|
||||
}
|
||||
|
||||
// hash to shard based on pk, hash to partition if partition key exist
|
||||
@ -259,8 +259,8 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
if err := validator.convertFunc(value, v.shardsData[shard][partitionID][fieldID]); err != nil {
|
||||
log.Warn("JSON row consumer: failed to convert value for field at the row",
|
||||
zap.String("fieldName", validator.fieldName), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return fmt.Errorf("failed to convert value for field '%s' at the row %d, error: %w",
|
||||
validator.fieldName, rowNumber, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert value for field '%s' at the row %d, error: %v",
|
||||
validator.fieldName, rowNumber, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -275,7 +275,7 @@ func (v *JSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
|
||||
func (v *JSONRowConsumer) hashToPartition(row map[storage.FieldID]interface{}, rowNumber int64) (int64, error) {
|
||||
if v.collectionInfo.PartitionKey == nil {
|
||||
if len(v.collectionInfo.PartitionIDs) != 1 {
|
||||
return 0, fmt.Errorf("collection '%s' partition list is empty", v.collectionInfo.Schema.Name)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("collection '%s' partition list is empty", v.collectionInfo.Schema.Name))
|
||||
}
|
||||
// no partition key, directly return the target partition id
|
||||
return v.collectionInfo.PartitionIDs[0], nil
|
||||
@ -288,7 +288,7 @@ func (v *JSONRowConsumer) hashToPartition(row map[storage.FieldID]interface{}, r
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to parse partition key at the row",
|
||||
zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse partition key at the row %d, error: %w", rowNumber, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse partition key at the row %d, error: %v", rowNumber, err))
|
||||
}
|
||||
|
||||
var hashValue uint32
|
||||
@ -300,15 +300,15 @@ func (v *JSONRowConsumer) hashToPartition(row map[storage.FieldID]interface{}, r
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to parse partition key at the row",
|
||||
zap.String("value", strValue), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse partition key '%s' at the row %d, error: %w",
|
||||
strValue, rowNumber, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse partition key '%s' at the row %d, error: %v",
|
||||
strValue, rowNumber, err))
|
||||
}
|
||||
|
||||
hashValue, err = typeutil.Hash32Int64(pk)
|
||||
if err != nil {
|
||||
log.Warn("JSON row consumer: failed to hash partition key at the row",
|
||||
zap.Int64("key", pk), zap.Int64("rowNumber", rowNumber), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to hash partition key %d at the row %d, error: %w", pk, rowNumber, err)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to hash partition key %d at the row %d, error: %v", pk, rowNumber, err))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,13 +23,13 @@ import (
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
@ -115,7 +115,7 @@ func (p *JSONParser) combineDynamicRow(dynamicValues map[string]interface{}, row
|
||||
err := desc.Decode(&mp)
|
||||
if err != nil {
|
||||
// invalid input
|
||||
return errors.New("illegal value for dynamic field, not a JSON format string")
|
||||
return merr.WrapErrImportFailed("illegal value for dynamic field, not a JSON format string")
|
||||
}
|
||||
|
||||
maps.Copy(dynamicValues, mp)
|
||||
@ -124,7 +124,7 @@ func (p *JSONParser) combineDynamicRow(dynamicValues map[string]interface{}, row
|
||||
maps.Copy(dynamicValues, mp)
|
||||
} else {
|
||||
// invalid input
|
||||
return errors.New("illegal value for dynamic field, not a JSON object")
|
||||
return merr.WrapErrImportFailed("illegal value for dynamic field, not a JSON object")
|
||||
}
|
||||
row[dynamicFieldID] = dynamicValues
|
||||
}
|
||||
@ -146,7 +146,7 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||
stringMap, ok := raw.(map[string]interface{})
|
||||
if !ok {
|
||||
log.Warn("JSON parser: invalid JSON format, each row should be a key-value map")
|
||||
return nil, errors.New("invalid JSON format, each row should be a key-value map")
|
||||
return nil, merr.WrapErrImportFailed("invalid JSON format, each row should be a key-value map")
|
||||
}
|
||||
|
||||
dynamicValues := make(map[string]interface{})
|
||||
@ -157,7 +157,7 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||
if (fieldID == p.collectionInfo.PrimaryKey.GetFieldID()) && p.collectionInfo.PrimaryKey.GetAutoID() {
|
||||
// primary key is auto-id, no need to provide
|
||||
log.Warn("JSON parser: the primary key is auto-generated, no need to provide", zap.String("fieldName", k))
|
||||
return nil, fmt.Errorf("the primary key '%s' is auto-generated, no need to provide", k)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the primary key '%s' is auto-generated, no need to provide", k))
|
||||
}
|
||||
|
||||
if ok {
|
||||
@ -168,7 +168,7 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||
} else {
|
||||
// no dynamic field. if user provided redundant field, return error
|
||||
log.Warn("JSON parser: the field is not defined in collection schema", zap.String("fieldName", k))
|
||||
return nil, fmt.Errorf("the field '%s' is not defined in collection schema", k)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the field '%s' is not defined in collection schema", k))
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,7 +189,7 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||
if !ok {
|
||||
// not auto-id primary key, no dynamic field, must provide value
|
||||
log.Warn("JSON parser: a field value is missed", zap.String("fieldName", k))
|
||||
return nil, fmt.Errorf("value of field '%s' is missed", k)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("value of field '%s' is missed", k))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,7 +207,7 @@ func (p *JSONParser) verifyRow(raw interface{}) (map[storage.FieldID]interface{}
|
||||
func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
if handler == nil || reader == nil {
|
||||
log.Warn("JSON parse handler is nil")
|
||||
return errors.New("JSON parse handler is nil")
|
||||
return merr.WrapErrImportFailed("JSON parse handler is nil")
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(reader.r)
|
||||
@ -232,42 +232,42 @@ func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
t, err := dec.Token()
|
||||
if err != nil {
|
||||
log.Warn("JSON parser: failed to decode the JSON file", zap.Error(err))
|
||||
return fmt.Errorf("failed to decode the JSON file, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
|
||||
}
|
||||
if t != json.Delim('{') && t != json.Delim('[') {
|
||||
log.Warn("JSON parser: invalid JSON format, the content should be started with '{' or '['")
|
||||
return errors.New("invalid JSON format, the content should be started with '{' or '['")
|
||||
return merr.WrapErrImportFailed("invalid JSON format, the content should be started with '{' or '['")
|
||||
}
|
||||
|
||||
// read the first level
|
||||
isEmpty := true
|
||||
isOldFormat := (t == json.Delim('{'))
|
||||
isOldFormat := t == json.Delim('{')
|
||||
for dec.More() {
|
||||
if isOldFormat {
|
||||
// read the key
|
||||
t, err := dec.Token()
|
||||
if err != nil {
|
||||
log.Warn("JSON parser: failed to decode the JSON file", zap.Error(err))
|
||||
return fmt.Errorf("failed to decode the JSON file, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
|
||||
}
|
||||
key := t.(string)
|
||||
keyLower := strings.ToLower(key)
|
||||
// the root key should be RowRootNode
|
||||
if keyLower != RowRootNode {
|
||||
log.Warn("JSON parser: invalid JSON format, the root key is not found", zap.String("RowRootNode", RowRootNode), zap.String("key", key))
|
||||
return fmt.Errorf("invalid JSON format, the root key should be '%s', but get '%s'", RowRootNode, key)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("invalid JSON format, the root key should be '%s', but get '%s'", RowRootNode, key))
|
||||
}
|
||||
|
||||
// started by '['
|
||||
t, err = dec.Token()
|
||||
if err != nil {
|
||||
log.Warn("JSON parser: failed to decode the JSON file", zap.Error(err))
|
||||
return fmt.Errorf("failed to decode the JSON file, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
|
||||
}
|
||||
|
||||
if t != json.Delim('[') {
|
||||
log.Warn("JSON parser: invalid JSON format, rows list should begin with '['")
|
||||
return errors.New("invalid JSON format, rows list should begin with '['")
|
||||
return merr.WrapErrImportFailed("invalid JSON format, rows list should begin with '['")
|
||||
}
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
var value interface{}
|
||||
if err := dec.Decode(&value); err != nil {
|
||||
log.Warn("JSON parser: failed to parse row value", zap.Error(err))
|
||||
return fmt.Errorf("failed to parse row value, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to parse row value, error: %v", err))
|
||||
}
|
||||
|
||||
row, err := p.verifyRow(value)
|
||||
@ -292,7 +292,7 @@ func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
isEmpty = false
|
||||
if err = handler.Handle(buf); err != nil {
|
||||
log.Warn("JSON parser: failed to convert row value to entity", zap.Error(err))
|
||||
return fmt.Errorf("failed to convert row value to entity, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert row value to entity, error: %v", err))
|
||||
}
|
||||
|
||||
// clear the buffer
|
||||
@ -305,7 +305,7 @@ func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
isEmpty = false
|
||||
if err = handler.Handle(buf); err != nil {
|
||||
log.Warn("JSON parser: failed to convert row value to entity", zap.Error(err))
|
||||
return fmt.Errorf("failed to convert row value to entity, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to convert row value to entity, error: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@ -313,18 +313,18 @@ func (p *JSONParser) ParseRows(reader *IOReader, handler JSONRowHandler) error {
|
||||
t, err = dec.Token()
|
||||
if err != nil {
|
||||
log.Warn("JSON parser: failed to decode the JSON file", zap.Error(err))
|
||||
return fmt.Errorf("failed to decode the JSON file, error: %w", err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
|
||||
}
|
||||
|
||||
if t != json.Delim(']') {
|
||||
log.Warn("JSON parser: invalid JSON format, rows list should end with a ']'")
|
||||
return errors.New("invalid JSON format, rows list should end with a ']'")
|
||||
return merr.WrapErrImportFailed("invalid JSON format, rows list should end with a ']'")
|
||||
}
|
||||
|
||||
// outside context might be canceled(service stop, or future enhancement for canceling import task)
|
||||
if isCanceled(p.ctx) {
|
||||
log.Warn("JSON parser: import task was canceled")
|
||||
return errors.New("import task was canceled")
|
||||
return merr.WrapErrImportFailed("import task was canceled")
|
||||
}
|
||||
|
||||
// nolint
|
||||
|
@ -112,6 +112,7 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||
FieldJSON: fmt.Sprintf("{\"x\": %d}", i),
|
||||
FieldBinaryVector: []int{(200 + i) % math.MaxUint8, 0},
|
||||
FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4},
|
||||
FieldArray: []int32{1, 2, 3},
|
||||
}
|
||||
content.Rows = append(content.Rows, row)
|
||||
}
|
||||
@ -187,6 +188,17 @@ func Test_JSONParserParseRows_IntPK(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
assert.InDelta(t, contenctRow.FieldFloatVector[k], float32(fval), 10e-6)
|
||||
}
|
||||
|
||||
v11, ok := parsedRow[113].([]interface{})
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, len(contenctRow.FieldArray), len(v11))
|
||||
for k := 0; k < len(v11); k++ {
|
||||
val, ok := v11[k].(json.Number)
|
||||
assert.True(t, ok)
|
||||
ival, err := strconv.ParseInt(string(val), 0, 32)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, contenctRow.FieldArray[k], int32(ival))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,6 @@ import (
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/sbinet/npyio"
|
||||
"github.com/sbinet/npyio/npy"
|
||||
"go.uber.org/zap"
|
||||
@ -35,6 +34,7 @@ import (
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -139,7 +139,7 @@ func convertNumpyType(typeStr string) (schemapb.DataType, error) {
|
||||
return schemapb.DataType_VarChar, nil
|
||||
}
|
||||
log.Warn("Numpy adapter: the numpy file data type is not supported", zap.String("dtype", typeStr))
|
||||
return schemapb.DataType_None, fmt.Errorf("the numpy file dtype '%s' is not supported", typeStr)
|
||||
return schemapb.DataType_None, merr.WrapErrImportFailed(fmt.Sprintf("the numpy file dtype '%s' is not supported", typeStr))
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ func stringLen(dtype string) (int, bool, error) {
|
||||
}
|
||||
|
||||
log.Warn("Numpy adapter: the numpy file dtype is not varchar data type", zap.String("dtype", dtype))
|
||||
return 0, false, fmt.Errorf("dtype '%s' of numpy file is not varchar data type", dtype)
|
||||
return 0, false, merr.WrapErrImportFailed(fmt.Sprintf("dtype '%s' of numpy file is not varchar data type", dtype))
|
||||
}
|
||||
|
||||
func isStringType(typeStr string) bool {
|
||||
@ -255,13 +255,13 @@ func (n *NumpyAdapter) checkCount(count int) int {
|
||||
func (n *NumpyAdapter) ReadBool(count int) ([]bool, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read bool data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read bool data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read bool data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Bool {
|
||||
log.Warn("Numpy adapter: numpy data is not bool type")
|
||||
return nil, errors.New("numpy data is not bool type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not bool type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -277,7 +277,7 @@ func (n *NumpyAdapter) ReadBool(count int) ([]bool, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read bool data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf(" failed to read bool data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf(" failed to read bool data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -289,7 +289,7 @@ func (n *NumpyAdapter) ReadBool(count int) ([]bool, error) {
|
||||
func (n *NumpyAdapter) ReadUint8(count int) ([]uint8, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read uint8 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read uint8 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read uint8 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
@ -298,7 +298,7 @@ func (n *NumpyAdapter) ReadUint8(count int) ([]uint8, error) {
|
||||
case "u1", "<u1", "|u1", "uint8":
|
||||
default:
|
||||
log.Warn("Numpy adapter: numpy data is not uint8 type")
|
||||
return nil, errors.New("numpy data is not uint8 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not uint8 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -314,7 +314,7 @@ func (n *NumpyAdapter) ReadUint8(count int) ([]uint8, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read uint8 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read uint8 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read uint8 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -326,13 +326,13 @@ func (n *NumpyAdapter) ReadUint8(count int) ([]uint8, error) {
|
||||
func (n *NumpyAdapter) ReadInt8(count int) ([]int8, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read int8 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read int8 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read int8 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Int8 {
|
||||
log.Warn("Numpy adapter: numpy data is not int8 type")
|
||||
return nil, errors.New("numpy data is not int8 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not int8 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -348,7 +348,7 @@ func (n *NumpyAdapter) ReadInt8(count int) ([]int8, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read int8 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int8 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int8 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -360,13 +360,13 @@ func (n *NumpyAdapter) ReadInt8(count int) ([]int8, error) {
|
||||
func (n *NumpyAdapter) ReadInt16(count int) ([]int16, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read int16 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read int16 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read int16 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Int16 {
|
||||
log.Warn("Numpy adapter: numpy data is not int16 type")
|
||||
return nil, errors.New("numpy data is not int16 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not int16 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -382,7 +382,7 @@ func (n *NumpyAdapter) ReadInt16(count int) ([]int16, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read int16 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int16 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int16 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -394,13 +394,13 @@ func (n *NumpyAdapter) ReadInt16(count int) ([]int16, error) {
|
||||
func (n *NumpyAdapter) ReadInt32(count int) ([]int32, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read int32 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read int32 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read int32 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Int32 {
|
||||
log.Warn("Numpy adapter: numpy data is not int32 type")
|
||||
return nil, errors.New("numpy data is not int32 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not int32 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -416,7 +416,7 @@ func (n *NumpyAdapter) ReadInt32(count int) ([]int32, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read int32 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int32 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int32 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -428,13 +428,13 @@ func (n *NumpyAdapter) ReadInt32(count int) ([]int32, error) {
|
||||
func (n *NumpyAdapter) ReadInt64(count int) ([]int64, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read int64 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read int64 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read int64 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Int64 {
|
||||
log.Warn("Numpy adapter: numpy data is not int64 type")
|
||||
return nil, errors.New("numpy data is not int64 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not int64 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -450,7 +450,7 @@ func (n *NumpyAdapter) ReadInt64(count int) ([]int64, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read int64 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int64 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int64 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -462,13 +462,13 @@ func (n *NumpyAdapter) ReadInt64(count int) ([]int64, error) {
|
||||
func (n *NumpyAdapter) ReadFloat32(count int) ([]float32, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read float32 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read float32 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read float32 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Float {
|
||||
log.Warn("Numpy adapter: numpy data is not float32 type")
|
||||
return nil, errors.New("numpy data is not float32 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not float32 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -484,7 +484,7 @@ func (n *NumpyAdapter) ReadFloat32(count int) ([]float32, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read float32 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float32 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float32 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -496,13 +496,13 @@ func (n *NumpyAdapter) ReadFloat32(count int) ([]float32, error) {
|
||||
func (n *NumpyAdapter) ReadFloat64(count int) ([]float64, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read float64 data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read float64 data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read float64 data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_Double {
|
||||
log.Warn("Numpy adapter: numpy data is not float64 type")
|
||||
return nil, errors.New("numpy data is not float64 type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not float64 type")
|
||||
}
|
||||
|
||||
// avoid read overflow
|
||||
@ -518,7 +518,7 @@ func (n *NumpyAdapter) ReadFloat64(count int) ([]float64, error) {
|
||||
err := binary.Read(n.reader, n.order, &data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read float64 data", zap.Int("count", count), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float64 data with count %d, error: %w", readSize, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float64 data with count %d, error: %v", readSize, err))
|
||||
}
|
||||
|
||||
// update read position after successfully read
|
||||
@ -530,20 +530,20 @@ func (n *NumpyAdapter) ReadFloat64(count int) ([]float64, error) {
|
||||
func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
if count <= 0 {
|
||||
log.Warn("Numpy adapter: cannot read varchar data with a zero or nagative count")
|
||||
return nil, errors.New("cannot read varchar data with a zero or nagative count")
|
||||
return nil, merr.WrapErrImportFailed("cannot read varchar data with a zero or nagative count")
|
||||
}
|
||||
|
||||
// incorrect type
|
||||
if n.dataType != schemapb.DataType_VarChar {
|
||||
log.Warn("Numpy adapter: numpy data is not varchar type")
|
||||
return nil, errors.New("numpy data is not varchar type")
|
||||
return nil, merr.WrapErrImportFailed("numpy data is not varchar type")
|
||||
}
|
||||
|
||||
// varchar length, this is the max length, some item is shorter than this length, but they also occupy bytes of max length
|
||||
maxLen, utf, err := stringLen(n.npyReader.Header.Descr.Type)
|
||||
if err != nil || maxLen <= 0 {
|
||||
log.Warn("Numpy adapter: failed to get max length of varchar from numpy file header", zap.Int("maxLen", maxLen), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get max length %d of varchar from numpy file header, error: %w", maxLen, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to get max length %d of varchar from numpy file header, error: %v", maxLen, err))
|
||||
}
|
||||
// log.Info("Numpy adapter: get varchar max length from numpy file header", zap.Int("maxLen", maxLen), zap.Bool("utf", utf))
|
||||
|
||||
@ -557,7 +557,7 @@ func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
|
||||
if n.reader == nil {
|
||||
log.Warn("Numpy adapter: reader is nil")
|
||||
return nil, errors.New("numpy reader is nil")
|
||||
return nil, merr.WrapErrImportFailed("numpy reader is nil")
|
||||
}
|
||||
|
||||
// read string one by one is not efficient, here we read strings batch by batch, each bach size is no more than 16MB
|
||||
@ -592,7 +592,7 @@ func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read utf32 bytes from numpy file",
|
||||
zap.Int("readDone", readDone), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read utf32 bytes from numpy file, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read utf32 bytes from numpy file, error: %v", err))
|
||||
}
|
||||
|
||||
// read string one by one from the buffer
|
||||
@ -601,7 +601,7 @@ func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed todecode utf32 bytes",
|
||||
zap.Int("position", readDone+j), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to decode utf32 bytes, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to decode utf32 bytes, error: %v", err))
|
||||
}
|
||||
|
||||
data = append(data, str)
|
||||
@ -613,7 +613,7 @@ func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to read ascii bytes from numpy file",
|
||||
zap.Int("readDone", readDone), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read ascii bytes from numpy file, error: %w", err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read ascii bytes from numpy file, error: %v", err))
|
||||
}
|
||||
|
||||
// read string one by one from the buffer
|
||||
@ -645,7 +645,7 @@ func (n *NumpyAdapter) ReadString(count int) ([]string, error) {
|
||||
func decodeUtf32(src []byte, order binary.ByteOrder) (string, error) {
|
||||
if len(src)%4 != 0 {
|
||||
log.Warn("Numpy adapter: invalid utf32 bytes length, the byte array length should be multiple of 4", zap.Int("byteLen", len(src)))
|
||||
return "", fmt.Errorf("invalid utf32 bytes length %d, the byte array length should be multiple of 4", len(src))
|
||||
return "", merr.WrapErrImportFailed(fmt.Sprintf("invalid utf32 bytes length %d, the byte array length should be multiple of 4", len(src)))
|
||||
}
|
||||
|
||||
var str string
|
||||
@ -675,7 +675,7 @@ func decodeUtf32(src []byte, order binary.ByteOrder) (string, error) {
|
||||
res, err := decoder.Bytes(src[lowbytesPosition : lowbytesPosition+2])
|
||||
if err != nil {
|
||||
log.Warn("Numpy adapter: failed to decode utf32 binary bytes", zap.Error(err))
|
||||
return "", fmt.Errorf("failed to decode utf32 binary bytes, error: %w", err)
|
||||
return "", merr.WrapErrImportFailed(fmt.Sprintf("failed to decode utf32 binary bytes, error: %v", err))
|
||||
}
|
||||
str += string(res)
|
||||
}
|
||||
@ -693,7 +693,7 @@ func decodeUtf32(src []byte, order binary.ByteOrder) (string, error) {
|
||||
utf8.EncodeRune(utf8Code, r)
|
||||
if r == utf8.RuneError {
|
||||
log.Warn("Numpy adapter: failed to convert 4 bytes unicode to utf8 rune", zap.Uint32("code", x))
|
||||
return "", fmt.Errorf("failed to convert 4 bytes unicode %d to utf8 rune", x)
|
||||
return "", merr.WrapErrImportFailed(fmt.Sprintf("failed to convert 4 bytes unicode %d to utf8 rune", x))
|
||||
}
|
||||
str += string(utf8Code)
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
@ -29,6 +28,7 @@ import (
|
||||
"github.com/milvus-io/milvus/internal/storage"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
@ -76,22 +76,22 @@ func NewNumpyParser(ctx context.Context,
|
||||
) (*NumpyParser, error) {
|
||||
if collectionInfo == nil {
|
||||
log.Warn("Numper parser: collection schema is nil")
|
||||
return nil, errors.New("collection schema is nil")
|
||||
return nil, merr.WrapErrImportFailed("collection schema is nil")
|
||||
}
|
||||
|
||||
if idAlloc == nil {
|
||||
log.Warn("Numper parser: id allocator is nil")
|
||||
return nil, errors.New("id allocator is nil")
|
||||
return nil, merr.WrapErrImportFailed("id allocator is nil")
|
||||
}
|
||||
|
||||
if chunkManager == nil {
|
||||
log.Warn("Numper parser: chunk manager pointer is nil")
|
||||
return nil, errors.New("chunk manager pointer is nil")
|
||||
return nil, merr.WrapErrImportFailed("chunk manager pointer is nil")
|
||||
}
|
||||
|
||||
if flushFunc == nil {
|
||||
log.Warn("Numper parser: flush function is nil")
|
||||
return nil, errors.New("flush function is nil")
|
||||
return nil, merr.WrapErrImportFailed("flush function is nil")
|
||||
}
|
||||
|
||||
parser := &NumpyParser{
|
||||
@ -164,7 +164,7 @@ func (p *NumpyParser) validateFileNames(filePaths []string) error {
|
||||
_, ok := requiredFieldNames[name]
|
||||
if !ok {
|
||||
log.Warn("Numpy parser: the file has no corresponding field in collection", zap.String("fieldName", name))
|
||||
return fmt.Errorf("the file '%s' has no corresponding field in collection", filePath)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("the file '%s' has no corresponding field in collection", filePath))
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ func (p *NumpyParser) validateFileNames(filePaths []string) error {
|
||||
_, ok := fileNames[name]
|
||||
if !ok {
|
||||
log.Warn("Numpy parser: there is no file corresponding to field", zap.String("fieldName", name))
|
||||
return fmt.Errorf("there is no file corresponding to field '%s'", name)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("there is no file corresponding to field '%s'", name))
|
||||
}
|
||||
}
|
||||
|
||||
@ -203,24 +203,24 @@ func (p *NumpyParser) createReaders(filePaths []string) ([]*NumpyColumnReader, e
|
||||
|
||||
if schema == nil {
|
||||
log.Warn("Numpy parser: the field is not found in collection schema", zap.String("fileName", fileName))
|
||||
return nil, fmt.Errorf("the field name '%s' is not found in collection schema", fileName)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the field name '%s' is not found in collection schema", fileName))
|
||||
}
|
||||
|
||||
file, err := p.chunkManager.Reader(p.ctx, filePath)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read the file", zap.String("filePath", filePath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read the file '%s', error: %s", filePath, err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read the file '%s', error: %s", filePath, err.Error()))
|
||||
}
|
||||
|
||||
adapter, err := NewNumpyAdapter(file)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read the file header", zap.String("filePath", filePath), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read the file header '%s', error: %s", filePath, err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read the file header '%s', error: %s", filePath, err.Error()))
|
||||
}
|
||||
|
||||
if file == nil || adapter == nil {
|
||||
log.Warn("Numpy parser: failed to open file", zap.String("filePath", filePath))
|
||||
return nil, fmt.Errorf("failed to open file '%s'", filePath)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to open file '%s'", filePath))
|
||||
}
|
||||
|
||||
dim, _ := getFieldDimension(schema)
|
||||
@ -251,8 +251,8 @@ func (p *NumpyParser) createReaders(filePaths []string) ([]*NumpyColumnReader, e
|
||||
log.Warn("Numpy parser: the row count of files are not equal",
|
||||
zap.String("firstFile", firstReader.fieldName), zap.Int("firstRowCount", firstReader.rowCount),
|
||||
zap.String("compareFile", compareReader.fieldName), zap.Int("compareRowCount", compareReader.rowCount))
|
||||
return nil, fmt.Errorf("the row count(%d) of file '%s.npy' is not equal to row count(%d) of file '%s.npy'",
|
||||
firstReader.rowCount, firstReader.fieldName, compareReader.rowCount, compareReader.fieldName)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("the row count(%d) of file '%s.npy' is not equal to row count(%d) of file '%s.npy'",
|
||||
firstReader.rowCount, firstReader.fieldName, compareReader.rowCount, compareReader.fieldName))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -264,7 +264,7 @@ func (p *NumpyParser) createReaders(filePaths []string) ([]*NumpyColumnReader, e
|
||||
func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
|
||||
if columnReader == nil || columnReader.reader == nil {
|
||||
log.Warn("Numpy parser: numpy reader is nil")
|
||||
return errors.New("numpy adapter is nil")
|
||||
return merr.WrapErrImportFailed("numpy adapter is nil")
|
||||
}
|
||||
|
||||
elementType := columnReader.reader.GetType()
|
||||
@ -273,7 +273,7 @@ func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
|
||||
if len(shape) == 0 {
|
||||
log.Warn("Numpy parser: the content stored in numpy file is not valid numpy array",
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("the content stored in numpy file is not valid numpy array for field '%s'", columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("the content stored in numpy file is not valid numpy array for field '%s'", columnReader.fieldName))
|
||||
}
|
||||
columnReader.rowCount = shape[0]
|
||||
|
||||
@ -286,45 +286,45 @@ func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
|
||||
if elementType != schemapb.DataType_Float && elementType != schemapb.DataType_Double {
|
||||
log.Warn("Numpy parser: illegal data type of numpy file for float vector field", zap.Any("dataType", elementType),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("illegal data type %s of numpy file for float vector field '%s'", getTypeName(elementType),
|
||||
columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal data type %s of numpy file for float vector field '%s'", getTypeName(elementType),
|
||||
columnReader.fieldName))
|
||||
}
|
||||
|
||||
// vector field, the shape should be 2
|
||||
if len(shape) != 2 {
|
||||
log.Warn("Numpy parser: illegal shape of numpy file for float vector field, shape should be 2", zap.Int("shape", len(shape)),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("illegal shape %d of numpy file for float vector field '%s', shape should be 2", shape,
|
||||
columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal shape %d of numpy file for float vector field '%s', shape should be 2", shape,
|
||||
columnReader.fieldName))
|
||||
}
|
||||
|
||||
if shape[1] != columnReader.dimension {
|
||||
log.Warn("Numpy parser: illegal dimension of numpy file for float vector field", zap.String("fieldName", columnReader.fieldName),
|
||||
zap.Int("numpyDimension", shape[1]), zap.Int("fieldDimension", columnReader.dimension))
|
||||
return fmt.Errorf("illegal dimension %d of numpy file for float vector field '%s', dimension should be %d",
|
||||
shape[1], columnReader.fieldName, columnReader.dimension)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal dimension %d of numpy file for float vector field '%s', dimension should be %d",
|
||||
shape[1], columnReader.fieldName, columnReader.dimension))
|
||||
}
|
||||
} else if schemapb.DataType_BinaryVector == columnReader.dataType {
|
||||
if elementType != schemapb.DataType_BinaryVector {
|
||||
log.Warn("Numpy parser: illegal data type of numpy file for binary vector field", zap.Any("dataType", elementType),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("illegal data type %s of numpy file for binary vector field '%s'", getTypeName(elementType),
|
||||
columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal data type %s of numpy file for binary vector field '%s'", getTypeName(elementType),
|
||||
columnReader.fieldName))
|
||||
}
|
||||
|
||||
// vector field, the shape should be 2
|
||||
if len(shape) != 2 {
|
||||
log.Warn("Numpy parser: illegal shape of numpy file for binary vector field, shape should be 2", zap.Int("shape", len(shape)),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("illegal shape %d of numpy file for binary vector field '%s', shape should be 2", shape,
|
||||
columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal shape %d of numpy file for binary vector field '%s', shape should be 2", shape,
|
||||
columnReader.fieldName))
|
||||
}
|
||||
|
||||
if shape[1] != columnReader.dimension/8 {
|
||||
log.Warn("Numpy parser: illegal dimension of numpy file for float vector field", zap.String("fieldName", columnReader.fieldName),
|
||||
zap.Int("numpyDimension", shape[1]*8), zap.Int("fieldDimension", columnReader.dimension))
|
||||
return fmt.Errorf("illegal dimension %d of numpy file for binary vector field '%s', dimension should be %d",
|
||||
shape[1]*8, columnReader.fieldName, columnReader.dimension)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal dimension %d of numpy file for binary vector field '%s', dimension should be %d",
|
||||
shape[1]*8, columnReader.fieldName, columnReader.dimension))
|
||||
}
|
||||
} else {
|
||||
// JSON field and VARCHAR field are using string type numpy
|
||||
@ -333,8 +333,8 @@ func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
|
||||
if elementType != columnReader.dataType {
|
||||
log.Warn("Numpy parser: illegal data type of numpy file for scalar field", zap.Any("numpyDataType", elementType),
|
||||
zap.String("fieldName", columnReader.fieldName), zap.Any("fieldDataType", columnReader.dataType))
|
||||
return fmt.Errorf("illegal data type %s of numpy file for scalar field '%s' with type %s",
|
||||
getTypeName(elementType), columnReader.fieldName, getTypeName(columnReader.dataType))
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal data type %s of numpy file for scalar field '%s' with type %s",
|
||||
getTypeName(elementType), columnReader.fieldName, getTypeName(columnReader.dataType)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -342,7 +342,7 @@ func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
|
||||
if len(shape) != 1 {
|
||||
log.Warn("Numpy parser: illegal shape of numpy file for scalar field, shape should be 1", zap.Int("shape", len(shape)),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return fmt.Errorf("illegal shape %d of numpy file for scalar field '%s', shape should be 1", shape, columnReader.fieldName)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("illegal shape %d of numpy file for scalar field '%s', shape should be 1", shape, columnReader.fieldName))
|
||||
}
|
||||
}
|
||||
|
||||
@ -354,12 +354,12 @@ func (p *NumpyParser) calcRowCountPerBlock() (int64, error) {
|
||||
sizePerRecord, err := typeutil.EstimateSizePerRecord(p.collectionInfo.Schema)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to estimate size of each row", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to estimate size of each row: %s", err.Error())
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("failed to estimate size of each row: %s", err.Error()))
|
||||
}
|
||||
|
||||
if sizePerRecord <= 0 {
|
||||
log.Warn("Numpy parser: failed to estimate size of each row, the collection schema might be empty")
|
||||
return 0, fmt.Errorf("failed to estimate size of each row: the collection schema might be empty")
|
||||
return 0, merr.WrapErrImportFailed("failed to estimate size of each row: the collection schema might be empty")
|
||||
}
|
||||
|
||||
// the sizePerRecord is estimate value, if the schema contains varchar field, the value is not accurate
|
||||
@ -398,7 +398,7 @@ func (p *NumpyParser) consume(columnReaders []*NumpyColumnReader) error {
|
||||
shardData := initShardData(p.collectionInfo.Schema, p.collectionInfo.PartitionIDs)
|
||||
if shardData == nil {
|
||||
log.Warn("Numper parser: failed to initialize FieldData list")
|
||||
return fmt.Errorf("failed to initialize FieldData list")
|
||||
return merr.WrapErrImportFailed("failed to initialize FieldData list")
|
||||
}
|
||||
shards = append(shards, shardData)
|
||||
}
|
||||
@ -420,7 +420,7 @@ func (p *NumpyParser) consume(columnReaders []*NumpyColumnReader) error {
|
||||
} else if readRowCount != fieldData.RowNum() {
|
||||
log.Warn("Numpy parser: data block's row count mismatch", zap.Int("firstBlockRowCount", readRowCount),
|
||||
zap.Int("thisBlockRowCount", fieldData.RowNum()), zap.Int64("rowCountPerBlock", rowCountPerBlock))
|
||||
return fmt.Errorf("data block's row count mismatch: %d vs %d", readRowCount, fieldData.RowNum())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("data block's row count mismatch: %d vs %d", readRowCount, fieldData.RowNum()))
|
||||
}
|
||||
|
||||
segmentData[reader.fieldID] = fieldData
|
||||
@ -458,7 +458,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadBool(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read bool array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read bool array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read bool array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.BoolFieldData{
|
||||
@ -468,7 +468,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadInt8(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read int8 array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int8 array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int8 array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.Int8FieldData{
|
||||
@ -478,7 +478,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadInt16(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to int16 array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int16 array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int16 array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.Int16FieldData{
|
||||
@ -488,7 +488,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadInt32(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read int32 array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int32 array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int32 array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.Int32FieldData{
|
||||
@ -498,7 +498,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadInt64(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read int64 array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read int64 array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read int64 array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.Int64FieldData{
|
||||
@ -508,13 +508,13 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadFloat32(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read float array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float array: %s", err.Error()))
|
||||
}
|
||||
|
||||
err = typeutil.VerifyFloats32(data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: illegal value in float array", zap.Error(err))
|
||||
return nil, fmt.Errorf("illegal value in float array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("illegal value in float array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.FloatFieldData{
|
||||
@ -524,13 +524,13 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadFloat64(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read double array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read double array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read double array: %s", err.Error()))
|
||||
}
|
||||
|
||||
err = typeutil.VerifyFloats64(data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: illegal value in double array", zap.Error(err))
|
||||
return nil, fmt.Errorf("illegal value in double array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("illegal value in double array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.DoubleFieldData{
|
||||
@ -540,7 +540,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadString(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read varchar array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read varchar array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read varchar array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.StringFieldData{
|
||||
@ -551,7 +551,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadString(rowCount)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read json string array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read json string array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read json string array: %s", err.Error()))
|
||||
}
|
||||
|
||||
byteArr := make([][]byte, 0)
|
||||
@ -561,8 +561,8 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: illegal string value for JSON field",
|
||||
zap.String("value", str), zap.String("FieldName", columnReader.fieldName), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse value '%v' for JSON field '%s', error: %w",
|
||||
str, columnReader.fieldName, err)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse value '%v' for JSON field '%s', error: %v",
|
||||
str, columnReader.fieldName, err))
|
||||
}
|
||||
byteArr = append(byteArr, []byte(str))
|
||||
}
|
||||
@ -574,7 +574,7 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err := columnReader.reader.ReadUint8(rowCount * (columnReader.dimension / 8))
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read binary vector array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read binary vector array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read binary vector array: %s", err.Error()))
|
||||
}
|
||||
|
||||
return &storage.BinaryVectorFieldData{
|
||||
@ -593,27 +593,27 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
data, err = columnReader.reader.ReadFloat32(rowCount * columnReader.dimension)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read float vector array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float vector array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float vector array: %s", err.Error()))
|
||||
}
|
||||
|
||||
err = typeutil.VerifyFloats32(data)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: illegal value in float vector array", zap.Error(err))
|
||||
return nil, fmt.Errorf("illegal value in float vector array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("illegal value in float vector array: %s", err.Error()))
|
||||
}
|
||||
} else if elementType == schemapb.DataType_Double {
|
||||
data = make([]float32, 0, columnReader.rowCount)
|
||||
data64, err := columnReader.reader.ReadFloat64(rowCount * columnReader.dimension)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to read float vector array", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to read float vector array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to read float vector array: %s", err.Error()))
|
||||
}
|
||||
|
||||
for _, f64 := range data64 {
|
||||
err = typeutil.VerifyFloat(f64)
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: illegal value in float vector array", zap.Error(err))
|
||||
return nil, fmt.Errorf("illegal value in float vector array: %s", err.Error())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("illegal value in float vector array: %s", err.Error()))
|
||||
}
|
||||
|
||||
data = append(data, float32(f64))
|
||||
@ -627,8 +627,8 @@ func (p *NumpyParser) readData(columnReader *NumpyColumnReader, rowCount int) (s
|
||||
default:
|
||||
log.Warn("Numpy parser: unsupported data type of field", zap.Any("dataType", columnReader.dataType),
|
||||
zap.String("fieldName", columnReader.fieldName))
|
||||
return nil, fmt.Errorf("unsupported data type %s of field '%s'", getTypeName(columnReader.dataType),
|
||||
columnReader.fieldName)
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported data type %s of field '%s'", getTypeName(columnReader.dataType),
|
||||
columnReader.fieldName))
|
||||
}
|
||||
}
|
||||
|
||||
@ -713,7 +713,7 @@ func (p *NumpyParser) prepareAppendFunctions() (map[string]func(src storage.Fiel
|
||||
appendFuncErr := p.appendFunc(schema)
|
||||
if appendFuncErr == nil {
|
||||
log.Warn("Numpy parser: unsupported field data type")
|
||||
return nil, fmt.Errorf("unsupported field data type: %d", schema.GetDataType())
|
||||
return nil, merr.WrapErrImportFailed(fmt.Sprintf("unsupported field data type: %d", schema.GetDataType()))
|
||||
}
|
||||
appendFunctions[schema.GetName()] = appendFuncErr
|
||||
}
|
||||
@ -734,7 +734,7 @@ func (p *NumpyParser) checkRowCount(fieldsData BlockData) (int, error) {
|
||||
continue
|
||||
}
|
||||
log.Warn("Numpy parser: field not provided", zap.String("fieldName", schema.GetName()))
|
||||
return 0, fmt.Errorf("field '%s' not provided", schema.GetName())
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("field '%s' not provided", schema.GetName()))
|
||||
}
|
||||
rowCounter[schema.GetName()] = v.RowNum()
|
||||
if v.RowNum() > rowCount {
|
||||
@ -747,7 +747,7 @@ func (p *NumpyParser) checkRowCount(fieldsData BlockData) (int, error) {
|
||||
if count != rowCount {
|
||||
log.Warn("Numpy parser: field row count is not equal to other fields row count", zap.String("fieldName", name),
|
||||
zap.Int("rowCount", count), zap.Int("otherRowCount", rowCount))
|
||||
return 0, fmt.Errorf("field '%s' row count %d is not equal to other fields row count: %d", name, count, rowCount)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("field '%s' row count %d is not equal to other fields row count: %d", name, count, rowCount))
|
||||
}
|
||||
}
|
||||
|
||||
@ -758,13 +758,13 @@ func (p *NumpyParser) checkRowCount(fieldsData BlockData) (int, error) {
|
||||
func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData) error {
|
||||
if len(fieldsData) == 0 {
|
||||
log.Warn("Numpy parser: fields data to split is empty")
|
||||
return fmt.Errorf("fields data to split is empty")
|
||||
return merr.WrapErrImportFailed("fields data to split is empty")
|
||||
}
|
||||
|
||||
if len(shards) != int(p.collectionInfo.ShardNum) {
|
||||
log.Warn("Numpy parser: block count is not equal to collection shard number", zap.Int("shardsLen", len(shards)),
|
||||
zap.Int32("shardNum", p.collectionInfo.ShardNum))
|
||||
return fmt.Errorf("block count %d is not equal to collection shard number %d", len(shards), p.collectionInfo.ShardNum)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("block count %d is not equal to collection shard number %d", len(shards), p.collectionInfo.ShardNum))
|
||||
}
|
||||
|
||||
rowCount, err := p.checkRowCount(fieldsData)
|
||||
@ -776,7 +776,7 @@ func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData)
|
||||
rowIDBegin, rowIDEnd, err := p.rowIDAllocator.Alloc(uint32(rowCount))
|
||||
if err != nil {
|
||||
log.Warn("Numpy parser: failed to alloc row ID", zap.Int("rowCount", rowCount), zap.Error(err))
|
||||
return fmt.Errorf("failed to alloc %d rows ID, error: %w", rowCount, err)
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("failed to alloc %d rows ID, error: %v", rowCount, err))
|
||||
}
|
||||
|
||||
rowIDField, ok := fieldsData[common.RowIDField]
|
||||
@ -797,7 +797,7 @@ func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData)
|
||||
log.Info("Numpy parser: generating auto-id", zap.Int("rowCount", rowCount), zap.Int64("rowIDBegin", rowIDBegin))
|
||||
if primaryKey.GetDataType() != schemapb.DataType_Int64 {
|
||||
log.Warn("Numpy parser: primary key field is auto-generated but the field type is not int64")
|
||||
return fmt.Errorf("primary key field is auto-generated but the field type is not int64")
|
||||
return merr.WrapErrImportFailed("primary key field is auto-generated but the field type is not int64")
|
||||
}
|
||||
|
||||
primaryDataArr := &storage.Int64FieldData{
|
||||
@ -815,7 +815,7 @@ func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData)
|
||||
primaryData, ok := fieldsData[primaryKey.GetFieldID()]
|
||||
if !ok || primaryData.RowNum() <= 0 {
|
||||
log.Warn("Numpy parser: primary key field is not provided", zap.String("keyName", primaryKey.GetName()))
|
||||
return fmt.Errorf("primary key '%s' field data is not provided", primaryKey.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("primary key '%s' field data is not provided", primaryKey.GetName()))
|
||||
}
|
||||
|
||||
// prepare append functions
|
||||
@ -855,8 +855,8 @@ func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData)
|
||||
log.Warn("Numpy parser: cannot append data since source or target field data is nil",
|
||||
zap.String("FieldName", schema.GetName()),
|
||||
zap.Bool("sourceNil", srcData == nil), zap.Bool("targetNil", targetData == nil))
|
||||
return fmt.Errorf("cannot append data for field '%s', possibly no any fields corresponding to this numpy file, or a required numpy file is not provided",
|
||||
schema.GetName())
|
||||
return merr.WrapErrImportFailed(fmt.Sprintf("cannot append data for field '%s', possibly no any fields corresponding to this numpy file, or a required numpy file is not provided",
|
||||
schema.GetName()))
|
||||
}
|
||||
appendFunc := appendFunctions[schema.GetName()]
|
||||
err := appendFunc(srcData, i, targetData)
|
||||
@ -869,13 +869,13 @@ func (p *NumpyParser) splitFieldsData(fieldsData BlockData, shards []ShardData)
|
||||
return nil
|
||||
}
|
||||
|
||||
// hashToPartition hash partition key to get an partition ID, return the first partition ID if no partition key exist
|
||||
// hashToPartition hash partition key to get a partition ID, return the first partition ID if no partition key exist
|
||||
// CollectionInfo ensures only one partition ID in the PartitionIDs if no partition key exist
|
||||
func (p *NumpyParser) hashToPartition(fieldsData BlockData, rowNumber int) (int64, error) {
|
||||
if p.collectionInfo.PartitionKey == nil {
|
||||
// no partition key, directly return the target partition id
|
||||
if len(p.collectionInfo.PartitionIDs) != 1 {
|
||||
return 0, fmt.Errorf("collection '%s' partition list is empty", p.collectionInfo.Schema.Name)
|
||||
return 0, merr.WrapErrImportFailed(fmt.Sprintf("collection '%s' partition list is empty", p.collectionInfo.Schema.Name))
|
||||
}
|
||||
return p.collectionInfo.PartitionIDs[0], nil
|
||||
}
|
||||
|
@ -44,9 +44,21 @@ func createLocalChunkManager(t *testing.T) storage.ChunkManager {
|
||||
return cm
|
||||
}
|
||||
|
||||
func createNumpySchema() *schemapb.CollectionSchema {
|
||||
schema := sampleSchema()
|
||||
fields := make([]*schemapb.FieldSchema, 0)
|
||||
for _, field := range schema.GetFields() {
|
||||
if field.GetDataType() != schemapb.DataType_Array {
|
||||
fields = append(fields, field)
|
||||
}
|
||||
}
|
||||
schema.Fields = fields
|
||||
return schema
|
||||
}
|
||||
|
||||
func createNumpyParser(t *testing.T) *NumpyParser {
|
||||
ctx := context.Background()
|
||||
schema := sampleSchema()
|
||||
schema := createNumpySchema()
|
||||
idAllocator := newIDAllocator(ctx, t, nil)
|
||||
|
||||
cm := createLocalChunkManager(t)
|
||||
@ -660,7 +672,7 @@ func Test_NumpyParserPrepareAppendFunctions(t *testing.T) {
|
||||
// succeed
|
||||
appendFuncs, err := parser.prepareAppendFunctions()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(sampleSchema().Fields), len(appendFuncs))
|
||||
assert.Equal(t, len(createNumpySchema().Fields), len(appendFuncs))
|
||||
|
||||
// schema has unsupported data type
|
||||
schema := &schemapb.CollectionSchema{
|
||||
@ -868,7 +880,7 @@ func Test_NumpyParserSplitFieldsData(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("primary key auto-generated", func(t *testing.T) {
|
||||
parser.collectionInfo.resetSchema(sampleSchema())
|
||||
parser.collectionInfo.resetSchema(createNumpySchema())
|
||||
schema := findSchema(parser.collectionInfo.Schema, schemapb.DataType_Int64)
|
||||
schema.AutoID = true
|
||||
|
||||
@ -982,7 +994,7 @@ func Test_NumpyParserConsume(t *testing.T) {
|
||||
files := createSampleNumpyFiles(t, cm)
|
||||
readers, err := parser.createReaders(files)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, len(sampleSchema().Fields), len(readers))
|
||||
assert.Equal(t, len(createNumpySchema().Fields), len(readers))
|
||||
|
||||
// succeed
|
||||
err = parser.consume(readers)
|
||||
@ -1043,7 +1055,7 @@ func Test_NumpyParserParse(t *testing.T) {
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
parser.collectionInfo.resetSchema(sampleSchema())
|
||||
parser.collectionInfo.resetSchema(createNumpySchema())
|
||||
|
||||
t.Run("succeed", func(t *testing.T) {
|
||||
cm := createLocalChunkManager(t)
|
||||
|
@ -142,6 +142,9 @@ var (
|
||||
// Do NOT export this,
|
||||
// never allow programmer using this, keep only for converting unknown error to milvusError
|
||||
errUnexpected = newMilvusError("unexpected error", (1<<16)-1, false)
|
||||
|
||||
// import
|
||||
ErrImportFailed = newMilvusError("deserializing import data failed", 2100, false)
|
||||
)
|
||||
|
||||
type milvusError struct {
|
||||
|
@ -873,3 +873,11 @@ func bound(name string, value, lower, upper any) boundField {
|
||||
func (f boundField) String() string {
|
||||
return fmt.Sprintf("%v out of range %v <= %s <= %v", f.value, f.lower, f.name, f.upper)
|
||||
}
|
||||
|
||||
func WrapErrImportFailed(msg ...string) error {
|
||||
err := error(ErrImportFailed)
|
||||
if len(msg) > 0 {
|
||||
err = errors.Wrap(err, strings.Join(msg, "->"))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user