Add config logic and tikv_benchmark.go

Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
2024-11-30 02:48:45 +08:00 · 2020-09-17 14:31:50 +08:00 · 2020-09-17 14:31:50 +08:00 · 3bb0da1f57
commit 3bb0da1f57
parent 7f798c5fce
6 changed files with 273 additions and 6 deletions
--- a/cmd/storage/tikv_benchmark.go
+++ b/cmd/storage/tikv_benchmark.go
@ -0,0 +1,124 @@
 // s3-benchmark.go
 // Copyright (c) 2017 Wasabi Technology, Inc.
 package main
 import (
 	"github.com/czs007/suvlim/storage/pkg"
 	. "github.com/czs007/suvlim/storage/pkg/types"
 	"crypto/md5"
 	"flag"
 	"fmt"
 	"code.cloudfoundry.org/bytefmt"
 	"io/ioutil"
 	"log"
 	"math/rand"
 	"net/http"
 	"os"
 	"sync/atomic"
 	"time"
 	"context"
 )
 // Global variables
 var duration_secs, threads int
 var object_size uint64
 var object_data []byte
 var running_threads, upload_count, upload_slowdown_count int32
 var endtime, upload_finish time.Time
 func logit(msg string) {
 	fmt.Println(msg)
 	logfile, _ := os.OpenFile("benchmark.log", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
 	if logfile != nil {
 		logfile.WriteString(time.Now().Format(http.TimeFormat) + ": " + msg + "\n")
 		logfile.Close()
 	}
 }
 func _putFile(ctx context.Context, store Store){
    //objnum := atomic.AddInt32(&upload_count, 1)
 	key := "collection_abc"
    err := store.PutRow(ctx, []byte(key), object_data, "abc", uint64(time.Now().Unix()))
    if err != nil {
 	atomic.AddInt32(&upload_slowdown_count, 1)
    }
 }
 func runPutFile(thread_num int) {
 	var store Store
 	var err error
 	ctx := context.Background()
 	store, err = storage.NewStore(ctx, TIKVDriver)
 	if err != nil {
 		panic(err.Error())
 	}
 	for time.Now().Before(endtime) {
 		_putFile(ctx, store)
 	}
 	// Remember last done time
 	upload_finish = time.Now()
 	// One less thread
 	atomic.AddInt32(&running_threads, -1)
 }
 func main() {
 	// Hello
 	// Parse command line
 	myflag := flag.NewFlagSet("myflag", flag.ExitOnError)
 	myflag.IntVar(&duration_secs, "d", 1, "Duration of each test in seconds")
 	myflag.IntVar(&threads, "t", 1, "Number of threads to run")
 	var sizeArg string
 	myflag.StringVar(&sizeArg, "z", "1M", "Size of objects in bytes with postfix K, M, and G")
 	if err := myflag.Parse(os.Args[1:]); err != nil {
 		os.Exit(1)
 	}
 	// Check the arguments
 	var err error
 	if object_size, err = bytefmt.ToBytes(sizeArg); err != nil {
 		log.Fatalf("Invalid -z argument for object size: %v", err)
 	}
 	logit(fmt.Sprintf("Parameters: duration=%d, threads=%d, size=%s",
 		duration_secs, threads, sizeArg))
 	// Initialize data for the bucket
 	object_data = make([]byte, object_size)
 	rand.Read(object_data)
 	hasher := md5.New()
 	hasher.Write(object_data)
 	// reset counters
 	upload_count = 0
 	upload_slowdown_count = 0
 	running_threads = int32(threads)
 	// Run the upload case
 	starttime := time.Now()
 	endtime = starttime.Add(time.Second * time.Duration(duration_secs))
 	for n := 1; n <= threads; n++ {
 		go runPutFile(n)
 	}
 	// Wait for it to finish
 	for atomic.LoadInt32(&running_threads) > 0 {
 		time.Sleep(time.Millisecond)
 	}
 	upload_time := upload_finish.Sub(starttime).Seconds()
 	bps := float64(uint64(upload_count)*object_size) / upload_time
 	logit(fmt.Sprintf("PUT time %.1f secs, objects = %d, speed = %sB/sec, %.1f operations/sec. Slowdowns = %d",
 		upload_time, upload_count, bytefmt.ByteSize(uint64(bps)), float64(upload_count)/upload_time, upload_slowdown_count))
 	fmt.Println(" upload_count :", upload_count)
 }
--- a/core/unittest/test_c_api.cpp
+++ b/core/unittest/test_c_api.cpp
@ -152,6 +152,64 @@ TEST(CApiTest, SearchTest) {
 }
 TEST(CApiTest, SearchSimpleTest) {
  auto collection_name = "collection0";
  auto schema_tmp_conf = "null_schema";
  auto collection = NewCollection(collection_name, schema_tmp_conf);
  auto partition_name = "partition0";
  auto partition = NewPartition(collection, partition_name);
  auto segment = NewSegment(partition, 0);
  std::vector<char> raw_data;
  std::vector<uint64_t> timestamps;
  std::vector<int64_t> uids;
  int N = 3;
  int DIM = 16;
  std::vector<float> vec(DIM);
  for (int i = 0; i < DIM; i++) {
    vec[i] = i;
  }
  for (int i = 0; i < N; i++) {
    uids.push_back(i);
    timestamps.emplace_back(i);
    // append vec
    raw_data.insert(raw_data.end(), (const char *) &vec, ((const char *) &vec) + sizeof(float) * vec.size());
    int age = i;
    raw_data.insert(raw_data.end(), (const char *) &age, ((const char *) &age) + sizeof(age));
  }
  auto line_sizeof = (sizeof(int) + sizeof(float) * DIM);
  auto offset = PreInsert(segment, N);
  auto ins_res = Insert(segment, offset, N, uids.data(), timestamps.data(), raw_data.data(), (int) line_sizeof, N);
  assert(ins_res == 0);
  Close(segment);
  BuildIndex(segment);
  long result_ids[10];
  float result_distances[10];
  auto query_json = std::string(R"({"field_name":"fakevec","num_queries":1,"topK":10})");
  std::vector<float> query_raw_data(DIM);
  for (int i = 0; i < DIM; i++) {
    query_raw_data[i] = i;
  }
  auto sea_res = Search(segment, query_json.data(), 1, query_raw_data.data(), DIM, result_ids, result_distances);
  assert(sea_res == 0);
  DeleteCollection(collection);
  DeletePartition(partition);
  DeleteSegment(segment);
 }
 TEST(CApiTest, IsOpenedTest) {
    auto collection_name = "collection0";
    auto schema_tmp_conf = "null_schema";
--- a/reader/index_test.go
+++ b/reader/index_test.go
@ -0,0 +1,74 @@
 package reader
 import (
 	"encoding/binary"
 	"fmt"
 	msgPb "github.com/czs007/suvlim/pkg/master/grpc/message"
 	"github.com/stretchr/testify/assert"
 	"math"
 	"testing"
 )
 func TestIndex_BuildIndex(t *testing.T) {
 	// 1. Construct node, collection, partition and segment
 	node := NewQueryNode(0, 0)
 	var collection = node.NewCollection("collection0", "fake schema")
 	var partition = collection.NewPartition("partition0")
 	var segment = partition.NewSegment(0)
 	// 2. Create ids and timestamps
 	ids := make([]int64, 0)
 	timestamps := make([]uint64, 0)
 	// 3. Create records, use schema below:
 	// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
 	// schema_tmp->AddField("age", DataType::INT32);
 	const DIM = 16
 	const N = 10000
 	var vec = [DIM]float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
 	var rawData []byte
 	for _, ele := range vec {
 		buf := make([]byte, 4)
 		binary.LittleEndian.PutUint32(buf, math.Float32bits(ele))
 		rawData = append(rawData, buf...)
 	}
 	bs := make([]byte, 4)
 	binary.LittleEndian.PutUint32(bs, 1)
 	rawData = append(rawData, bs...)
 	var records [][]byte
 	for i := 0; i < N; i++ {
 		ids = append(ids, int64(i))
 		timestamps = append(timestamps, uint64(i))
 		records = append(records, rawData)
 	}
 	// 4. Do PreInsert
 	var offset = segment.SegmentPreInsert(N)
 	assert.GreaterOrEqual(t, offset, int64(0))
 	// 5. Do Insert
 	var err = segment.SegmentInsert(offset, &ids, &timestamps, &records)
 	assert.NoError(t, err)
 	// 6. Close segment, and build index
 	err = segment.Close()
 	assert.NoError(t, err)
 	// 7. Do search
 	var queryJson = "{\"field_name\":\"fakevec\",\"num_queries\":1,\"topK\":10}"
 	var queryRawData = make([]float32, 0)
 	for i := 0; i < 16; i++ {
 		queryRawData = append(queryRawData, float32(i))
 	}
 	var vectorRecord = msgPb.VectorRowRecord{
 		FloatData: queryRawData,
 	}
 	var searchRes, searchErr = segment.SegmentSearch(queryJson, timestamps[N/2], &vectorRecord)
 	assert.NoError(t, searchErr)
 	fmt.Println(searchRes)
 	// 8. Destruct node, collection, and segment
 	partition.DeleteSegment(segment)
 	collection.DeletePartition(partition)
 	node.DeleteCollection(collection)
 }
--- a/reader/query_node.go
+++ b/reader/query_node.go
@ -383,6 +383,9 @@ func (node *QueryNode) DoInsertAndDelete() msgPb.Status {
 	// Do delete
 	for segmentID, deleteIDs := range node.deleteData.deleteIDs {
 		if segmentID < 0 {
 			continue
 		}
 		wg.Add(1)
 		var deleteTimestamps = node.deleteData.deleteTimestamps[segmentID]
 		fmt.Println("Doing delete......")
--- a/reader/segment_test.go
+++ b/reader/segment_test.go
@ -100,14 +100,14 @@ func TestSegment_SegmentSearch(t *testing.T) {
 	var segment = partition.NewSegment(0)
 	// 2. Create ids and timestamps
-	ids := []int64{1, 2, 3}
+	ids := make([]int64, 0)
-	timestamps := []uint64{0, 0, 0}
+	timestamps := make([]uint64, 0)
 	// 3. Create records, use schema below:
 	// schema_tmp->AddField("fakeVec", DataType::VECTOR_FLOAT, 16);
 	// schema_tmp->AddField("age", DataType::INT32);
 	const DIM = 16
-	const N = 3
+	const N = 100
 	var vec = [DIM]float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
 	var rawData []byte
 	for _, ele := range vec {
@ -120,6 +120,8 @@ func TestSegment_SegmentSearch(t *testing.T) {
 	rawData = append(rawData, bs...)
 	var records [][]byte
 	for i := 0; i < N; i++ {
 		ids = append(ids, int64(i))
 		timestamps = append(timestamps, uint64(i + 1))
 		records = append(records, rawData)
 	}
@ -140,7 +142,7 @@ func TestSegment_SegmentSearch(t *testing.T) {
 	var vectorRecord = msgPb.VectorRowRecord {
 		FloatData: queryRawData,
 	}
-	var searchRes, searchErr = segment.SegmentSearch(queryJson, timestamps[0], &vectorRecord)
+	var searchRes, searchErr = segment.SegmentSearch(queryJson, timestamps[N/2], &vectorRecord)
 	assert.NoError(t, searchErr)
 	fmt.Println(searchRes)
--- a/reader/util_functions.go
+++ b/reader/util_functions.go
@ -13,10 +13,16 @@ func (node *QueryNode) GetKey2Segments() (*[]int64, *[]uint64, *[]int64) {
 	var key2SegMsg = node.messageClient.Key2SegMsg
 	for _, msg := range key2SegMsg {
-		for _, segmentID := range msg.SegmentId {
+		if msg.SegmentId == nil {
 			segmentIDs = append(segmentIDs, -1)
 			entityIDs = append(entityIDs, msg.Uid)
 			timestamps = append(timestamps, msg.Timestamp)
-			segmentIDs = append(segmentIDs, segmentID)
+		} else {
 			for _, segmentID := range msg.SegmentId {
 				segmentIDs = append(segmentIDs, segmentID)
 				entityIDs = append(entityIDs, msg.Uid)
 				timestamps = append(timestamps, msg.Timestamp)
 			}
 		}
 	}