mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-01 03:18:29 +08:00
0095869d5b
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
185 lines
4.9 KiB
Go
185 lines
4.9 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datanode
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"sync/atomic"
|
|
|
|
"github.com/bits-and-blooms/bloom/v3"
|
|
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
)
|
|
|
|
// Segment contains the latest segment infos from channel.
|
|
type Segment struct {
|
|
collectionID UniqueID
|
|
partitionID UniqueID
|
|
segmentID UniqueID
|
|
sType atomic.Value // datapb.SegmentType
|
|
|
|
numRows int64
|
|
memorySize int64
|
|
compactedTo UniqueID
|
|
|
|
pkStat pkStatistics
|
|
|
|
startPos *internalpb.MsgPosition // TODO readonly
|
|
endPos *internalpb.MsgPosition
|
|
}
|
|
|
|
// pkStatistics contains pk field statistic information
|
|
type pkStatistics struct {
|
|
statsChanged bool // statistic changed
|
|
pkFilter *bloom.BloomFilter // bloom filter of pk inside a segment
|
|
minPK primaryKey // minimal pk value, shortcut for checking whether a pk is inside this segment
|
|
maxPK primaryKey // maximal pk value, same above
|
|
}
|
|
|
|
// update set pk min/max value if input value is beyond former range.
|
|
func (st *pkStatistics) update(pk primaryKey) error {
|
|
if st == nil {
|
|
return errors.New("nil pk statistics")
|
|
}
|
|
if st.minPK == nil {
|
|
st.minPK = pk
|
|
} else if st.minPK.GT(pk) {
|
|
st.minPK = pk
|
|
}
|
|
|
|
if st.maxPK == nil {
|
|
st.maxPK = pk
|
|
} else if st.maxPK.LT(pk) {
|
|
st.maxPK = pk
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (st *pkStatistics) updatePKRange(ids storage.FieldData) error {
|
|
switch pks := ids.(type) {
|
|
case *storage.Int64FieldData:
|
|
buf := make([]byte, 8)
|
|
for _, pk := range pks.Data {
|
|
id := storage.NewInt64PrimaryKey(pk)
|
|
err := st.update(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
common.Endian.PutUint64(buf, uint64(pk))
|
|
st.pkFilter.Add(buf)
|
|
}
|
|
case *storage.StringFieldData:
|
|
for _, pk := range pks.Data {
|
|
id := storage.NewVarCharPrimaryKey(pk)
|
|
err := st.update(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
st.pkFilter.AddString(pk)
|
|
}
|
|
default:
|
|
return fmt.Errorf("invalid data type for primary key: %T", ids)
|
|
}
|
|
|
|
// mark statistic updated
|
|
st.statsChanged = true
|
|
|
|
return nil
|
|
}
|
|
|
|
// getStatslog return marshaled statslog content if there is any change since last call.
|
|
// statslog is marshaled as json.
|
|
func (st *pkStatistics) getStatslog(segmentID, pkID UniqueID, pkType schemapb.DataType) ([]byte, error) {
|
|
if !st.statsChanged {
|
|
return nil, fmt.Errorf("%w segment %d", errSegmentStatsNotChanged, segmentID)
|
|
}
|
|
|
|
pks := storage.PrimaryKeyStats{
|
|
FieldID: pkID,
|
|
PkType: int64(pkType),
|
|
MaxPk: st.maxPK,
|
|
MinPk: st.minPK,
|
|
BF: st.pkFilter,
|
|
}
|
|
|
|
bs, err := json.Marshal(pks)
|
|
if err == nil {
|
|
st.statsChanged = false
|
|
}
|
|
return bs, err
|
|
}
|
|
|
|
type addSegmentReq struct {
|
|
segType datapb.SegmentType
|
|
segID, collID, partitionID UniqueID
|
|
numOfRows int64
|
|
startPos, endPos *internalpb.MsgPosition
|
|
statsBinLogs []*datapb.FieldBinlog
|
|
recoverTs Timestamp
|
|
importing bool
|
|
}
|
|
|
|
func (s *Segment) updatePk(pk primaryKey) error {
|
|
return s.pkStat.update(pk)
|
|
}
|
|
|
|
func (s *Segment) isValid() bool {
|
|
return s.getType() != datapb.SegmentType_Compacted
|
|
}
|
|
|
|
func (s *Segment) notFlushed() bool {
|
|
return s.isValid() && s.getType() != datapb.SegmentType_Flushed
|
|
}
|
|
|
|
func (s *Segment) getType() datapb.SegmentType {
|
|
return s.sType.Load().(datapb.SegmentType)
|
|
}
|
|
|
|
func (s *Segment) setType(t datapb.SegmentType) {
|
|
s.sType.Store(t)
|
|
}
|
|
|
|
func (s *Segment) updatePKRange(ids storage.FieldData) error {
|
|
log := log.With(zap.Int64("collectionID", s.collectionID),
|
|
zap.Int64("partitionID", s.partitionID),
|
|
zap.Int64("segmentID", s.segmentID),
|
|
)
|
|
|
|
err := s.pkStat.updatePKRange(ids)
|
|
if err != nil {
|
|
log.Warn("failed to updatePKRange", zap.Error(err))
|
|
}
|
|
|
|
log.Info("update pk range",
|
|
zap.Int64("num_rows", s.numRows), zap.Any("minPK", s.pkStat.minPK), zap.Any("maxPK", s.pkStat.maxPK))
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Segment) getSegmentStatslog(pkID UniqueID, pkType schemapb.DataType) ([]byte, error) {
|
|
return s.pkStat.getStatslog(s.segmentID, pkID, pkType)
|
|
}
|