milvus/internal/storage/data_sorter.go
yukun 4e6e502df1
Add data sorter in storage (#5200)
GetEntityByID needs the flushed segment to be sorted by RowID field, 
then do binary search to get the target id and entities.

See also: #5177 

Signed-off-by: fishpenguin kun.yu@zilliz.com
2021-05-14 02:59:49 +00:00

86 lines
2.8 KiB
Go

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package storage
import (
"github.com/milvus-io/milvus/internal/proto/schemapb"
)
type DataSorter struct {
InsertCodec *InsertCodec
InsertData *InsertData
}
func (ds *DataSorter) getIDField() FieldData {
for _, field := range ds.InsertCodec.Schema.Schema.Fields {
if field.FieldID == 0 {
return ds.InsertData.Data[field.FieldID]
}
}
return nil
}
func (ds *DataSorter) Len() int {
return len(ds.getIDField().(*Int64FieldData).Data)
}
func (ds *DataSorter) Swap(i, j int) {
for _, field := range ds.InsertCodec.Schema.Schema.Fields {
singleData := ds.InsertData.Data[field.FieldID]
switch field.DataType {
case schemapb.DataType_Bool:
data := singleData.(*BoolFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Int8:
data := singleData.(*Int8FieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Int16:
data := singleData.(*Int16FieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Int32:
data := singleData.(*Int32FieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Int64:
data := singleData.(*Int64FieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Float:
data := singleData.(*FloatFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_Double:
data := singleData.(*DoubleFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_String:
data := singleData.(*StringFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_BinaryVector:
data := singleData.(*BinaryVectorFieldData).Data
dim := singleData.(*BinaryVectorFieldData).Dim
for i := 0; i < dim/8; i++ {
data[i], data[i+dim/8] = data[i+dim/8], data[i]
}
case schemapb.DataType_FloatVector:
data := singleData.(*FloatVectorFieldData).Data
dim := singleData.(*FloatVectorFieldData).Dim
for i := 0; i < dim; i++ {
data[i], data[i+dim] = data[i+dim], data[i]
}
default:
errMsg := "undefined data type " + string(field.DataType)
panic(errMsg)
}
}
}
func (ds *DataSorter) Less(i, j int) bool {
return ds.getIDField().(*Int64FieldData).Data[i] < ds.getIDField().(*Int64FieldData).Data[j]
}