Fix crash issue of bulkinsert by invalid numpy array file (#24480) (#24528)

Signed-off-by: yhmo <yihua.mo@zilliz.com>
This commit is contained in:
groot 2023-05-31 14:43:39 +08:00 committed by GitHub
parent 3cb75997a3
commit 0d0e97ab7f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 0 deletions

View File

@ -271,6 +271,12 @@ func (p *NumpyParser) validateHeader(columnReader *NumpyColumnReader) error {
elementType := columnReader.reader.GetType()
shape := columnReader.reader.GetShape()
// if user only save an element in a numpy file, the shape list will be empty
if len(shape) == 0 {
log.Error("Numpy parser: the content stored in numpy file is not valid numpy array",
zap.String("fieldName", columnReader.fieldName))
return fmt.Errorf("the content stored in numpy file is not valid numpy array for field '%s'", columnReader.fieldName)
}
columnReader.rowCount = shape[0]
// 1. field data type should be consist to numpy data type

View File

@ -142,6 +142,26 @@ func Test_NumpyParserValidateHeader(t *testing.T) {
err = parser.validateHeader(nil)
assert.Error(t, err)
t.Run("not a valid numpy array", func(t *testing.T) {
filePath := TempFilesPath + "invalid.npy"
err = CreateNumpyFile(filePath, "aaa")
assert.Nil(t, err)
file, err := os.Open(filePath)
assert.Nil(t, err)
defer file.Close()
adapter, err := NewNumpyAdapter(file)
assert.Nil(t, err)
columnReader := &NumpyColumnReader{
fieldName: "invalid",
reader: adapter,
}
err = parser.validateHeader(columnReader)
assert.Error(t, err)
})
validateHeader := func(data interface{}, fieldSchema *schemapb.FieldSchema) error {
filePath := TempFilesPath + fieldSchema.GetName() + ".npy"