2021-10-03 17:40:30 +08:00
|
|
|
|
## 8 Binlog
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
InsertBinlog、DeleteBinlog、DDLBinlog
|
|
|
|
|
|
2021-07-07 19:10:07 +08:00
|
|
|
|
Binlog is stored in a columnar storage format, every column in schema is stored in an individual file.
|
|
|
|
|
Timestamp, schema, row id and primary key allocated by system are four special columns.
|
|
|
|
|
Schema column records the DDL of the collection.
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
## Event format
|
|
|
|
|
|
2021-04-12 12:45:38 +08:00
|
|
|
|
Binlog file consists of 4 bytes magic number and a series of events. The first event must be descriptor event.
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-03 17:40:30 +08:00
|
|
|
|
### 8.1 Event format
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
```
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+=====================================+=====================================================================+
|
|
|
|
|
| event | Timestamp 0 : 8 | create timestamp |
|
|
|
|
|
| header +----------------------------+---------------------------------------------------------------------+
|
|
|
|
|
| | TypeCode 8 : 1 | event type code |
|
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | EventLength 9 : 4 | length of event, including header and data |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | NextPosition 13 : 4 | offset of next event from the start of file |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+=====================================+=====================================================================+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| event | fixed part 17 : x | |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| data +----------------------------+---------------------------------------------------------------------+
|
|
|
|
|
| | variable part | |
|
|
|
|
|
+=====================================+=====================================================================+
|
2020-12-03 10:16:22 +08:00
|
|
|
|
```
|
|
|
|
|
|
2021-10-03 17:40:30 +08:00
|
|
|
|
### 8.2 Descriptor Event format
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
```
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+=====================================+=====================================================================+
|
|
|
|
|
| event | Timestamp 0 : 8 | create timestamp |
|
|
|
|
|
| header +----------------------------+---------------------------------------------------------------------+
|
|
|
|
|
| | TypeCode 8 : 1 | event type code |
|
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | EventLength 9 : 4 | length of event, including header and data |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | NextPosition 13 : 4 | offset of next event from the start of file |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+=====================================+=====================================================================+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| event | CollectionID 17 : 8 | collection id |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| data +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | PartitionID 25 : 8 | partition id (schema column does not need) |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | SegmentID 33 : 8 | segment id (schema column does not need) |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | FieldID 41 : 8 | field id (schema column does not need) |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | StartTimestamp 49 : 8 | minimum timestamp allocated by master of all events in this file |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | EndTimestamp 57 : 8 | maximum timestamp allocated by master of all events in this file |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | PayloadDataType 65 : 4 | data type of payload |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | ExtraLength 69 : 4 | length of extra information |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | ExtraBytes 73 : n | extra information in json format |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
| +----------------------------+---------------------------------------------------------------------+
|
2021-09-24 18:17:56 +08:00
|
|
|
|
| | PostHeaderLengths n : n | header lengths for all event types |
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+=====================================+=====================================================================|
|
2020-12-03 10:16:22 +08:00
|
|
|
|
```
|
|
|
|
|
|
2021-10-11 17:56:29 +08:00
|
|
|
|
`ExtraBytes` is in json format.
|
|
|
|
|
|
|
|
|
|
`ExtraBytes` stores the extra information of the binlog file.
|
|
|
|
|
|
|
|
|
|
In binlog file, we have stored many common fields in fixed part, such as `CollectionID`, `PartitionID` and etc.
|
|
|
|
|
|
|
|
|
|
However, different binlog files have some other different information which differs from each other.
|
|
|
|
|
|
|
|
|
|
So, `ExtraBytes` was designed to store these different information.
|
|
|
|
|
|
|
|
|
|
For example, for index binlog file, we will store `indexID`, `indexBuildID`, `indexID` and other index-related
|
|
|
|
|
information to `ExtraBytes`.
|
|
|
|
|
|
|
|
|
|
In addition, `ExtraBytes` was also designed to extend binlog. Then we can add new features to binlog file without
|
|
|
|
|
breaking the compatibility.
|
|
|
|
|
|
|
|
|
|
For example, we can store the memory size of original content(before encode) to `ExtraBytes`.
|
|
|
|
|
The key in `ExtraBytes` is `original_size`. For now, `original_size` is required, not optional.
|
|
|
|
|
|
2021-10-03 17:40:30 +08:00
|
|
|
|
### 8.3 Type code
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
DESCRIPTOR_EVENT
|
|
|
|
|
INSERT_EVENT
|
|
|
|
|
DELETE_EVENT
|
|
|
|
|
CREATE_COLLECTION_EVENT
|
|
|
|
|
DROP_COLLECTION_EVENT
|
|
|
|
|
CREATE_PARTITION_EVENT
|
|
|
|
|
DROP_PARTITION_EVENT
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
DESCRIPTOR_EVENT must appear in all column files and always be the first event.
|
|
|
|
|
|
2021-07-07 19:10:07 +08:00
|
|
|
|
INSERT_EVENT 可以出现在除 DDL binlog 文件外的其他列的 binlog
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-07-07 19:10:07 +08:00
|
|
|
|
DELETE_EVENT 只能用于 primary key 的 binlog 文件(目前只有按照 primary key 删除)
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-07-07 19:10:07 +08:00
|
|
|
|
CREATE_COLLECTION_EVENT、DROP_COLLECTION_EVENT、CREATE_PARTITION_EVENT、DROP_PARTITION_EVENT 只出现在 DDL binlog 文件
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-03 17:40:30 +08:00
|
|
|
|
### 8.4 Event data part
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
event data part
|
|
|
|
|
|
|
|
|
|
INSERT_EVENT:
|
2021-07-07 19:10:07 +08:00
|
|
|
|
+================================================+==========================================================+
|
|
|
|
|
| event | fixed | StartTimestamp x : 8 | min timestamp in this event |
|
|
|
|
|
| data | part +------------------------------+----------------------------------------------------------+
|
|
|
|
|
| | | EndTimestamp x+8 : 8 | max timestamp in this event |
|
|
|
|
|
| +--------+------------------------------+----------------------------------------------------------+
|
|
|
|
|
| |variable| parquet payload | payload in parquet format |
|
|
|
|
|
| |part | | |
|
|
|
|
|
+================================================+==========================================================+
|
|
|
|
|
|
|
|
|
|
other events are similar with INSERT_EVENT
|
2020-12-03 10:16:22 +08:00
|
|
|
|
```
|
|
|
|
|
|
2021-10-03 17:40:30 +08:00
|
|
|
|
### 8.5 Example
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
Schema
|
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
string | int | float(optional) | vector(512)
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-04-12 12:45:38 +08:00
|
|
|
|
Request:
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
InsertRequest rows(1W)
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
DeleteRequest pk=1
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
DropPartition partitionTag="abc"
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-04-12 12:45:38 +08:00
|
|
|
|
insert binlogs:
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
rowid, pk, ts, string, int, float, vector 6 files
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
all events are INSERT_EVENT
|
|
|
|
|
float column file contains some NULL value
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
delete binlogs:
|
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
pk, ts 2 files
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
pk's events are DELETE_EVENT, ts's events are INSERT_EVENT
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
DDL binlogs:
|
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
ddl, ts
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
2021-10-06 13:57:11 +08:00
|
|
|
|
ddl's event is DROP_PARTITION_EVENT, ts's event is INSERT_EVENT
|
2020-12-03 10:16:22 +08:00
|
|
|
|
|
|
|
|
|
C++ interface
|
|
|
|
|
|
|
|
|
|
```c++
|
|
|
|
|
typedef void* CPayloadWriter
|
|
|
|
|
typedef struct CBuffer {
|
|
|
|
|
char* data;
|
|
|
|
|
int length;
|
|
|
|
|
} CBuffer
|
|
|
|
|
|
|
|
|
|
typedef struct CStatus {
|
|
|
|
|
int error_code;
|
|
|
|
|
const char* error_msg;
|
|
|
|
|
} CStatus
|
|
|
|
|
|
2021-04-12 12:45:38 +08:00
|
|
|
|
|
2020-12-03 10:16:22 +08:00
|
|
|
|
// C++ interface
|
|
|
|
|
// writer
|
|
|
|
|
CPayloadWriter NewPayloadWriter(int columnType);
|
|
|
|
|
CStatus AddBooleanToPayload(CPayloadWriter payloadWriter, bool *values, int length);
|
|
|
|
|
CStatus AddInt8ToPayload(CPayloadWriter payloadWriter, int8_t *values, int length);
|
|
|
|
|
CStatus AddInt16ToPayload(CPayloadWriter payloadWriter, int16_t *values, int length);
|
|
|
|
|
CStatus AddInt32ToPayload(CPayloadWriter payloadWriter, int32_t *values, int length);
|
|
|
|
|
CStatus AddInt64ToPayload(CPayloadWriter payloadWriter, int64_t *values, int length);
|
|
|
|
|
CStatus AddFloatToPayload(CPayloadWriter payloadWriter, float *values, int length);
|
|
|
|
|
CStatus AddDoubleToPayload(CPayloadWriter payloadWriter, double *values, int length);
|
|
|
|
|
CStatus AddOneStringToPayload(CPayloadWriter payloadWriter, char *cstr, int str_size);
|
|
|
|
|
CStatus AddBinaryVectorToPayload(CPayloadWriter payloadWriter, uint8_t *values, int dimension, int length);
|
|
|
|
|
CStatus AddFloatVectorToPayload(CPayloadWriter payloadWriter, float *values, int dimension, int length);
|
|
|
|
|
|
|
|
|
|
CStatus FinishPayloadWriter(CPayloadWriter payloadWriter);
|
|
|
|
|
CBuffer GetPayloadBufferFromWriter(CPayloadWriter payloadWriter);
|
|
|
|
|
int GetPayloadLengthFromWriter(CPayloadWriter payloadWriter);
|
|
|
|
|
CStatus ReleasePayloadWriter(CPayloadWriter handler);
|
|
|
|
|
|
|
|
|
|
// reader
|
|
|
|
|
CPayloadReader NewPayloadReader(int columnType, uint8_t *buffer, int64_t buf_size);
|
|
|
|
|
CStatus GetBoolFromPayload(CPayloadReader payloadReader, bool **values, int *length);
|
|
|
|
|
CStatus GetInt8FromPayload(CPayloadReader payloadReader, int8_t **values, int *length);
|
|
|
|
|
CStatus GetInt16FromPayload(CPayloadReader payloadReader, int16_t **values, int *length);
|
|
|
|
|
CStatus GetInt32FromPayload(CPayloadReader payloadReader, int32_t **values, int *length);
|
|
|
|
|
CStatus GetInt64FromPayload(CPayloadReader payloadReader, int64_t **values, int *length);
|
|
|
|
|
CStatus GetFloatFromPayload(CPayloadReader payloadReader, float **values, int *length);
|
|
|
|
|
CStatus GetDoubleFromPayload(CPayloadReader payloadReader, double **values, int *length);
|
|
|
|
|
CStatus GetOneStringFromPayload(CPayloadReader payloadReader, int idx, char **cstr, int *str_size);
|
|
|
|
|
CStatus GetBinaryVectorFromPayload(CPayloadReader payloadReader, uint8_t **values, int *dimension, int *length);
|
|
|
|
|
CStatus GetFloatVectorFromPayload(CPayloadReader payloadReader, float **values, int *dimension, int *length);
|
|
|
|
|
|
|
|
|
|
int GetPayloadLengthFromReader(CPayloadReader payloadReader);
|
|
|
|
|
CStatus ReleasePayloadReader(CPayloadReader payloadReader);
|
|
|
|
|
```
|