milvus/internal/storage/stats.go
XuanYang-cn 9b335e74a2
[skip e2e]Update license for storage stats (#14349)
Signed-off-by: yangxuan <xuan.yang@zilliz.com>
2021-12-28 09:44:06 +08:00

116 lines
2.9 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"encoding/json"
"github.com/bits-and-blooms/bloom/v3"
"github.com/milvus-io/milvus/internal/common"
)
const (
// TODO silverxia maybe need set from config
bloomFilterSize uint = 100000
maxBloomFalsePositive float64 = 0.005
)
// Int64Stats contains statistics data for int64 column
type Int64Stats struct {
FieldID int64 `json:"fieldID"`
Max int64 `json:"max"`
Min int64 `json:"min"`
BF *bloom.BloomFilter `json:"bf"`
}
// StatsWriter writes stats to buffer
type StatsWriter struct {
buffer []byte
}
// GetBuffer returns buffer
func (sw *StatsWriter) GetBuffer() []byte {
return sw.buffer
}
// StatsInt64 writes Int64Stats from @msgs with @fieldID to @buffer
func (sw *StatsWriter) StatsInt64(fieldID int64, isPrimaryKey bool, msgs []int64) error {
if len(msgs) < 1 {
// return error: msgs must has one element at least
return nil
}
stats := &Int64Stats{
FieldID: fieldID,
Max: msgs[len(msgs)-1],
Min: msgs[0],
}
if isPrimaryKey {
stats.BF = bloom.NewWithEstimates(bloomFilterSize, maxBloomFalsePositive)
b := make([]byte, 8)
for _, msg := range msgs {
common.Endian.PutUint64(b, uint64(msg))
stats.BF.Add(b)
}
}
b, err := json.Marshal(stats)
if err != nil {
return err
}
sw.buffer = b
return nil
}
// StatsReader reads stats
type StatsReader struct {
buffer []byte
}
// SetBuffer sets buffer
func (sr *StatsReader) SetBuffer(buffer []byte) {
sr.buffer = buffer
}
// GetInt64Stats returns buffer as Int64Stats
func (sr *StatsReader) GetInt64Stats() (*Int64Stats, error) {
stats := &Int64Stats{}
err := json.Unmarshal(sr.buffer, &stats)
if err != nil {
return nil, err
}
return stats, nil
}
// DeserializeStats deserialize @blobs as []*Int64Stats
func DeserializeStats(blobs []*Blob) ([]*Int64Stats, error) {
results := make([]*Int64Stats, 0, len(blobs))
for _, blob := range blobs {
if blob.Value == nil {
continue
}
sr := &StatsReader{}
sr.SetBuffer(blob.Value)
stats, err := sr.GetInt64Stats()
if err != nil {
return nil, err
}
results = append(results, stats)
}
return results, nil
}