2024-03-04 19:31:09 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package storage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
|
|
|
"reflect"
|
|
|
|
"testing"
|
|
|
|
|
enhance: add delta log stream new format reader and writer (#34116)
issue: #34123
Benchmark case: The benchmark run the go benchmark function
`BenchmarkDeltalogFormat` which is put in the Files changed. It tests
the performance of serializing and deserializing from two different data
formats under a 10 million delete log dataset.
Metrics: The benchmarks measure the average time taken per operation
(ns/op), memory allocated per operation (MB/op), and the number of
memory allocations per operation (allocs/op).
| Test Name | Avg Time (ns/op) | Time Comparison | Memory Allocation
(MB/op) | Memory Comparison | Allocation Count (allocs/op) | Allocation
Comparison |
|---------------------------------|------------------|-----------------|---------------------------|-------------------|------------------------------|------------------------|
| one_string_format_reader | 2,781,990,000 | Baseline | 2,422 | Baseline
| 20,336,539 | Baseline |
| pk_ts_separate_format_reader | 480,682,639 | -82.72% | 1,765 | -27.14%
| 20,396,958 | +0.30% |
| one_string_format_writer | 5,483,436,041 | Baseline | 13,900 |
Baseline | 70,057,473 | Baseline |
| pk_and_ts_separate_format_writer| 798,591,584 | -85.43% | 2,178 |
-84.34% | 30,270,488 | -56.78% |
Both read and write operations show significant improvements in both
speed and memory allocation.
Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
2024-07-06 09:08:09 +08:00
|
|
|
"github.com/apache/arrow/go/v12/arrow"
|
2024-03-04 19:31:09 +08:00
|
|
|
"github.com/apache/arrow/go/v12/arrow/array"
|
|
|
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
|
|
)
|
|
|
|
|
2024-04-11 10:33:20 +08:00
|
|
|
func TestSerDe(t *testing.T) {
|
2024-03-04 19:31:09 +08:00
|
|
|
type args struct {
|
2024-04-11 10:33:20 +08:00
|
|
|
dt schemapb.DataType
|
|
|
|
v any
|
2024-03-04 19:31:09 +08:00
|
|
|
}
|
|
|
|
tests := []struct {
|
|
|
|
name string
|
|
|
|
args args
|
|
|
|
want interface{}
|
|
|
|
want1 bool
|
|
|
|
}{
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test bool", args{dt: schemapb.DataType_Bool, v: true}, true, true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test bool null", args{dt: schemapb.DataType_Bool, v: nil}, nil, true},
|
|
|
|
{"test bool negative", args{dt: schemapb.DataType_Bool, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test int8", args{dt: schemapb.DataType_Int8, v: int8(1)}, int8(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test int8 null", args{dt: schemapb.DataType_Int8, v: nil}, nil, true},
|
|
|
|
{"test int8 negative", args{dt: schemapb.DataType_Int8, v: true}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test int16", args{dt: schemapb.DataType_Int16, v: int16(1)}, int16(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test int16 null", args{dt: schemapb.DataType_Int16, v: nil}, nil, true},
|
|
|
|
{"test int16 negative", args{dt: schemapb.DataType_Int16, v: true}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test int32", args{dt: schemapb.DataType_Int32, v: int32(1)}, int32(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test int32 null", args{dt: schemapb.DataType_Int32, v: nil}, nil, true},
|
|
|
|
{"test int32 negative", args{dt: schemapb.DataType_Int32, v: true}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test int64", args{dt: schemapb.DataType_Int64, v: int64(1)}, int64(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test int64 null", args{dt: schemapb.DataType_Int64, v: nil}, nil, true},
|
|
|
|
{"test int64 negative", args{dt: schemapb.DataType_Int64, v: true}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test float32", args{dt: schemapb.DataType_Float, v: float32(1)}, float32(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test float32 null", args{dt: schemapb.DataType_Float, v: nil}, nil, true},
|
|
|
|
{"test float32 negative", args{dt: schemapb.DataType_Float, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test float64", args{dt: schemapb.DataType_Double, v: float64(1)}, float64(1), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test float64 null", args{dt: schemapb.DataType_Double, v: nil}, nil, true},
|
|
|
|
{"test float64 negative", args{dt: schemapb.DataType_Double, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test string", args{dt: schemapb.DataType_String, v: "test"}, "test", true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test string null", args{dt: schemapb.DataType_String, v: nil}, nil, true},
|
|
|
|
{"test string negative", args{dt: schemapb.DataType_String, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test varchar", args{dt: schemapb.DataType_VarChar, v: "test"}, "test", true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test varchar null", args{dt: schemapb.DataType_VarChar, v: nil}, nil, true},
|
|
|
|
{"test varchar negative", args{dt: schemapb.DataType_VarChar, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test array negative", args{dt: schemapb.DataType_Array, v: "{}"}, nil, false},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test array null", args{dt: schemapb.DataType_Array, v: nil}, nil, true},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test json", args{dt: schemapb.DataType_JSON, v: []byte("{}")}, []byte("{}"), true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test json null", args{dt: schemapb.DataType_JSON, v: nil}, nil, true},
|
|
|
|
{"test json negative", args{dt: schemapb.DataType_JSON, v: -1}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test float vector", args{dt: schemapb.DataType_FloatVector, v: []float32{1.0}}, []float32{1.0}, true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test float vector null", args{dt: schemapb.DataType_FloatVector, v: nil}, nil, true},
|
|
|
|
{"test float vector negative", args{dt: schemapb.DataType_FloatVector, v: []int{1}}, nil, false},
|
2024-04-11 10:33:20 +08:00
|
|
|
{"test bool vector", args{dt: schemapb.DataType_BinaryVector, v: []byte{0xff}}, []byte{0xff}, true},
|
|
|
|
{"test float16 vector", args{dt: schemapb.DataType_Float16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true},
|
|
|
|
{"test bfloat16 vector", args{dt: schemapb.DataType_BFloat16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true},
|
2024-05-27 16:27:42 +08:00
|
|
|
{"test bfloat16 vector null", args{dt: schemapb.DataType_BFloat16Vector, v: nil}, nil, true},
|
|
|
|
{"test bfloat16 vector negative", args{dt: schemapb.DataType_BFloat16Vector, v: -1}, nil, false},
|
2024-03-04 19:31:09 +08:00
|
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
|
|
t.Run(tt.name, func(t *testing.T) {
|
2024-04-11 10:33:20 +08:00
|
|
|
dt := tt.args.dt
|
|
|
|
v := tt.args.v
|
|
|
|
builder := array.NewBuilder(memory.DefaultAllocator, serdeMap[dt].arrowType(1))
|
|
|
|
serdeMap[dt].serialize(builder, v)
|
|
|
|
// assert.True(t, ok)
|
|
|
|
a := builder.NewArray()
|
|
|
|
got, got1 := serdeMap[dt].deserialize(a, 0)
|
2024-03-04 19:31:09 +08:00
|
|
|
if !reflect.DeepEqual(got, tt.want) {
|
2024-04-11 10:33:20 +08:00
|
|
|
t.Errorf("deserialize() got = %v, want %v", got, tt.want)
|
2024-03-04 19:31:09 +08:00
|
|
|
}
|
|
|
|
if got1 != tt.want1 {
|
2024-04-11 10:33:20 +08:00
|
|
|
t.Errorf("deserialize() got1 = %v, want %v", got1, tt.want1)
|
2024-03-04 19:31:09 +08:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2024-03-08 18:25:02 +08:00
|
|
|
|
enhance: add delta log stream new format reader and writer (#34116)
issue: #34123
Benchmark case: The benchmark run the go benchmark function
`BenchmarkDeltalogFormat` which is put in the Files changed. It tests
the performance of serializing and deserializing from two different data
formats under a 10 million delete log dataset.
Metrics: The benchmarks measure the average time taken per operation
(ns/op), memory allocated per operation (MB/op), and the number of
memory allocations per operation (allocs/op).
| Test Name | Avg Time (ns/op) | Time Comparison | Memory Allocation
(MB/op) | Memory Comparison | Allocation Count (allocs/op) | Allocation
Comparison |
|---------------------------------|------------------|-----------------|---------------------------|-------------------|------------------------------|------------------------|
| one_string_format_reader | 2,781,990,000 | Baseline | 2,422 | Baseline
| 20,336,539 | Baseline |
| pk_ts_separate_format_reader | 480,682,639 | -82.72% | 1,765 | -27.14%
| 20,396,958 | +0.30% |
| one_string_format_writer | 5,483,436,041 | Baseline | 13,900 |
Baseline | 70,057,473 | Baseline |
| pk_and_ts_separate_format_writer| 798,591,584 | -85.43% | 2,178 |
-84.34% | 30,270,488 | -56.78% |
Both read and write operations show significant improvements in both
speed and memory allocation.
Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
2024-07-06 09:08:09 +08:00
|
|
|
func TestArrowSchema(t *testing.T) {
|
|
|
|
fields := []arrow.Field{{Name: "1", Type: arrow.BinaryTypes.String, Nullable: true}}
|
|
|
|
builder := array.NewBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String)
|
|
|
|
builder.AppendValueFromString("1")
|
|
|
|
record := array.NewRecord(arrow.NewSchema(fields, nil), []arrow.Array{builder.NewArray()}, 1)
|
|
|
|
t.Run("test composite record", func(t *testing.T) {
|
|
|
|
cr := &compositeRecord{
|
|
|
|
recs: make(map[FieldID]arrow.Record, 1),
|
|
|
|
schema: make(map[FieldID]schemapb.DataType, 1),
|
|
|
|
}
|
|
|
|
cr.recs[0] = record
|
|
|
|
cr.schema[0] = schemapb.DataType_String
|
|
|
|
expected := arrow.NewSchema(fields, nil)
|
|
|
|
assert.Equal(t, expected, cr.ArrowSchema())
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("test simple arrow record", func(t *testing.T) {
|
|
|
|
cr := &simpleArrowRecord{
|
|
|
|
r: record,
|
|
|
|
schema: make(map[FieldID]schemapb.DataType, 1),
|
|
|
|
field2Col: make(map[FieldID]int, 1),
|
|
|
|
}
|
|
|
|
cr.schema[0] = schemapb.DataType_String
|
|
|
|
expected := arrow.NewSchema(fields, nil)
|
|
|
|
assert.Equal(t, expected, cr.ArrowSchema())
|
|
|
|
|
|
|
|
sr := newSelectiveRecord(cr, 0)
|
|
|
|
assert.Equal(t, expected, sr.ArrowSchema())
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2024-03-08 18:25:02 +08:00
|
|
|
func BenchmarkDeserializeReader(b *testing.B) {
|
|
|
|
len := 1000000
|
|
|
|
blobs, err := generateTestData(len)
|
|
|
|
assert.NoError(b, err)
|
|
|
|
b.ResetTimer()
|
|
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
reader, err := NewBinlogDeserializeReader(blobs, common.RowIDField)
|
|
|
|
assert.NoError(b, err)
|
|
|
|
defer reader.Close()
|
|
|
|
for i := 0; i < len; i++ {
|
|
|
|
err = reader.Next()
|
|
|
|
_ = reader.Value()
|
|
|
|
assert.NoError(b, err)
|
|
|
|
}
|
|
|
|
err = reader.Next()
|
|
|
|
assert.Equal(b, io.EOF, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkBinlogIterator(b *testing.B) {
|
|
|
|
len := 1000000
|
|
|
|
blobs, err := generateTestData(len)
|
|
|
|
assert.NoError(b, err)
|
|
|
|
b.ResetTimer()
|
|
|
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
itr, err := NewInsertBinlogIterator(blobs, common.RowIDField, schemapb.DataType_Int64)
|
|
|
|
assert.NoError(b, err)
|
|
|
|
defer itr.Dispose()
|
|
|
|
for i := 0; i < len; i++ {
|
|
|
|
assert.True(b, itr.HasNext())
|
|
|
|
_, err = itr.Next()
|
|
|
|
assert.NoError(b, err)
|
|
|
|
}
|
|
|
|
assert.False(b, itr.HasNext())
|
|
|
|
}
|
|
|
|
}
|