mirror of
https://gitee.com/he3db/he3pg.git
synced 2024-12-02 12:17:34 +08:00
Merge branch 'dev_performance' of https://gitee.com/zoujia_cm/he3pg into dev_performance
This commit is contained in:
commit
369dcb62f5
107
doc/he3db/deploy-for-pg-as-primary-he3db-as-replica.md
Normal file
107
doc/he3db/deploy-for-pg-as-primary-he3db-as-replica.md
Normal file
@ -0,0 +1,107 @@
|
||||
# 部署
|
||||
## 1 启动原生PG作为主
|
||||
### 1.1 PG14.2源码编译安装
|
||||
```shell
|
||||
./configure --enable-depend --enable-cassert --enable-debug CFLAGS="-ggdb -O0" --prefix=/home/postgres/psql14_pg
|
||||
make && make install
|
||||
```
|
||||
其中,configure选项参考[CONFIGURE-OPTIONS](https://www.postgresql.org/docs/current/install-procedure.html#CONFIGURE-OPTIONS)
|
||||
|
||||
### 1.2 初始化数据
|
||||
```shell
|
||||
cd /home/postgres/psql14_pg
|
||||
./bin/initdb -D /home/postgres/pgdata_14
|
||||
```
|
||||
|
||||
### 1.3 修改配置文件
|
||||
```shell
|
||||
vim /home/postgres/pgdata_14/postgresql.conf
|
||||
|
||||
port=15432
|
||||
wal_level = replica
|
||||
wal_recycle=off
|
||||
```
|
||||
|
||||
修改访问控制文件
|
||||
```shell
|
||||
vim /home/postgres/pgdata_14/pg_hba.conf
|
||||
|
||||
host repl all 0.0.0.0/0 trust
|
||||
```
|
||||
|
||||
### 1.4 启动服务
|
||||
```shell
|
||||
./bin/pg_ctl -D /home/postgres/pgdata_14 start -l logfile
|
||||
```
|
||||
### 1.5 创建流复制用户
|
||||
```shell
|
||||
./bin/psql -h127.0.0.1 -p15432
|
||||
|
||||
postgres=# CREATE ROLE repl login replication encrypted password 'repl';
|
||||
```
|
||||
## 2 启动He3DB作为备
|
||||
### 2.1 编译安装PG He3DB
|
||||
```shell
|
||||
//编译需要依赖静态库 he3pg/src/backend/storage/file/librust_log.a
|
||||
./configure --enable-depend --enable-cassert --enable-debug CFLAGS="-ggdb -O0" --prefix=/home/postgres/psqlhe3_mirror
|
||||
make && make install
|
||||
```
|
||||
### 2.2 从主备份数据
|
||||
```shell
|
||||
cd /home/postgres/psqlhe3_mirror
|
||||
./bin/pg_basebackup -h 127.0.0.1 -p 15432 -U repl -R -Fp -Xs -Pv -D /home/postgres/pgdata_mirror
|
||||
```
|
||||
### 2.3 修改postgres.conf配置
|
||||
```shell
|
||||
vim /home/postgres/pgdata_mirror/postgresql.conf
|
||||
|
||||
// 配置文件最后添加配置
|
||||
primary_conninfo = 'application_name=pushstandby user=repl host=127.0.0.1 port=15432 sslmode=disable sslcompression=0 gssencmode=disable target_session_attrs=any'
|
||||
hot_standby=on
|
||||
port = 5434
|
||||
push_standby=on
|
||||
wal_recycle=off
|
||||
fsync=off
|
||||
wal_keep_size=10000
|
||||
full_page_writes=off
|
||||
he3mirror=true
|
||||
```
|
||||
### 2.4 启动服务
|
||||
```shell
|
||||
./bin/pg_ctl -D /home/postgres/pgdata_mirror start -l logfile
|
||||
```
|
||||
## 3 验证
|
||||
### 3.1 链接主插入新数据
|
||||
```shell
|
||||
./bin/psql -h127.0.0.1 -p15432
|
||||
postgres=# create table "t1" (id int);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t1 values(1);
|
||||
INSERT 0 1
|
||||
```
|
||||
### 3.2 备机验证数据
|
||||
```shell
|
||||
./bin/psql -h127.0.0.1 -p5434
|
||||
postgres=# select * from t1;
|
||||
id
|
||||
----
|
||||
1
|
||||
|
||||
(1 row)
|
||||
```
|
||||
### 3.3 链接主插入新数据
|
||||
```
|
||||
./bin/psql -h127.0.0.1 -p15432
|
||||
postgres=# insert into t1 values(2);
|
||||
INSERT 0 1
|
||||
```
|
||||
### 3.4 备机验证数据
|
||||
```shell
|
||||
./bin/psql -h127.0.0.1 -p5434
|
||||
postgres=# select * from t1;
|
||||
id
|
||||
----
|
||||
1
|
||||
2
|
||||
(2 row)
|
||||
```
|
@ -66,12 +66,15 @@ func runArchive(cmd *cobra.Command, args []string) {
|
||||
|
||||
// archive wal kv
|
||||
fmt.Printf("archive wal kv!\n")
|
||||
//0600000000000000010000000000000000
|
||||
retStartString := fmt.Sprintf("06%s%s", archive_start_time_line, archive_start_lsn)
|
||||
retEndString := "06ffffffffffffffffffffffffffffffff"
|
||||
for id := 0; id < 8; id++ {
|
||||
//06000000000000000100000000000000070000000000000000
|
||||
//因为加了个id字段,目前不能跨时间线备份
|
||||
retStartString := fmt.Sprintf("06%s000000000000000%d%s", archive_start_time_line, id, archive_start_lsn)
|
||||
//retEndString := fmt.Sprintf("06ffffffffffffffff000000000000000%dffffffffffffffff", id)
|
||||
retEndString := fmt.Sprintf("06%s000000000000000%dffffffffffffffff", archive_start_time_line, id)
|
||||
|
||||
retStart := make([]byte, 17)
|
||||
retEnd := make([]byte, 17)
|
||||
retStart := make([]byte, 25)
|
||||
retEnd := make([]byte, 25)
|
||||
index := 0
|
||||
for i := 0; i < len(retStartString); i += 2 {
|
||||
value, _ := strconv.ParseUint(retStartString[i:i+2], 16, 8)
|
||||
@ -104,6 +107,7 @@ func runArchive(cmd *cobra.Command, args []string) {
|
||||
}
|
||||
wlCount--
|
||||
}
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
client.Close()
|
||||
|
@ -122,7 +122,7 @@ $(top_builddir)/src/port/libpgport_srv.a: | submake-libpgport
|
||||
LIBS += $(libpq)
|
||||
librust_log = -L$(top_builddir)/src/backend/storage/file/ -lrust_log -lstdc++ -lm -ldl -lpthread -lfuse3 -Wl,-gc-section
|
||||
LIBS += $(librust_log)
|
||||
libglib = -L/usr/lib/x86_64-linux-gnu/ -lglib-2.0 -I/usr/include/glib-2.0/ -I/usr/lib/x86_64-linux-gnu/glib-2.0/include/ -lpthread
|
||||
libglib = -L/usr/lib/x86_64-linux-gnu/ -lglib-2.0 -I/usr/include/glib-2.0/ -I/usr/lib/x86_64-linux-gnu/glib-2.0/include/ -lpthread -llmdb
|
||||
LIBS += $(libglib)
|
||||
postgres.o: $(OBJS)
|
||||
$(CC) $(LDREL) $(call expand_subsys,$^) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
|
||||
|
@ -37,7 +37,8 @@ OBJS = \
|
||||
xlogutils.o \
|
||||
pagehashqueue.o \
|
||||
ringbuffer.o \
|
||||
pthreadpool.o
|
||||
pthreadpool.o \
|
||||
pg_mirror.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
||||
|
@ -382,12 +382,12 @@ CleanLogIndexMain(int argc, char *argv[])
|
||||
* Loop forever
|
||||
*/
|
||||
SetProcessingMode(NormalProcessing);
|
||||
uint64_t pushStandbyPoint = 0;
|
||||
uint64_t pushStandbyPrePoint = 0;
|
||||
uint64_t preLastBlkStartLsn = 0;
|
||||
uint64_t preLastBlkEndLsn = 0;
|
||||
uint64_t preLastStartLsn = 0;
|
||||
uint64_t preLastEndLsn = 0;
|
||||
XLogRecPtr pushStandbyPoint = 0;
|
||||
XLogRecPtr pushStandbyPrePoint = 0;
|
||||
XLogRecPtr preLastBlkStartLsn = 0;
|
||||
XLogRecPtr preLastBlkEndLsn = 0;
|
||||
XLogRecPtr preLastStartLsn = 0;
|
||||
XLogRecPtr preLastEndLsn = 0;
|
||||
for (;;)
|
||||
{
|
||||
/* Clear any already-pending wakeups */
|
||||
@ -416,7 +416,7 @@ CleanLogIndexMain(int argc, char *argv[])
|
||||
int pageNum = 0;
|
||||
while(next!=NULL) {
|
||||
addFileKey(&next->tag.tag);
|
||||
next = tagList->next;
|
||||
next = next->next;
|
||||
pageNum++;
|
||||
}
|
||||
FreeTagNode(tagList);
|
||||
@ -689,6 +689,9 @@ BufferTag* QueuePushPage(void) {
|
||||
if (ready!= 0 && gpushpos < gpos) {
|
||||
return &(PageHashQueueShmem->gtag[gpushpos]->tag);
|
||||
} else {
|
||||
if (gpushpos < gpos) {
|
||||
elog(ERROR,"QueuePushPage gpushpos %d < gpos %d",gpushpos,gpos);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -698,7 +701,7 @@ void ProcFlushBufferToDisk(BufferTag*tag) {
|
||||
RBM_NORMAL);
|
||||
if (!BufferIsValid(buffer))
|
||||
{
|
||||
elog(ERROR,"ProcFlushBufferToDisk is invalid rel %d,flk %d,blk %d",tag->rnode.relNode,tag->forkNum,tag->blockNum);
|
||||
elog(FATAL,"ProcFlushBufferToDisk is invalid rel %d,flk %d,blk %d",tag->rnode.relNode,tag->forkNum,tag->blockNum);
|
||||
pg_atomic_fetch_add_u32(&PageHashQueueShmem->taskNum,1);
|
||||
return;
|
||||
}
|
||||
|
742
src/backend/access/transam/pg_mirror.c
Normal file
742
src/backend/access/transam/pg_mirror.c
Normal file
@ -0,0 +1,742 @@
|
||||
#include "access/pg_mirror.h"
|
||||
#include "postgres.h"
|
||||
#include "access/xlogrecord.h"
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/nbtxlog.h"
|
||||
#include "access/gistxlog.h"
|
||||
#include "access/spgxlog.h"
|
||||
#include "access/brin_xlog.h"
|
||||
#include "assert.h"
|
||||
#include "common/controldata_utils.h"
|
||||
#include "miscadmin.h"
|
||||
#define INSERT_FREESPACE_MIRROR(endptr) \
|
||||
(((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
|
||||
|
||||
static ControlFileData *ControlFile = NULL;
|
||||
//default 16MB
|
||||
static int WalSegSz = 16777216;
|
||||
|
||||
//muti block to one record
|
||||
typedef struct XLogHe3ToPg {
|
||||
uint64 CurrBytePos;
|
||||
uint64 PrevBytePos;
|
||||
}XLogHe3ToPg;
|
||||
static XLogHe3ToPg g_walHe3ToPg;
|
||||
|
||||
static void ReConvertMainData(XLogRecord* sRecord, char*sMainData, uint32_t*sLen, char* dMainData, uint32_t* dLen) {
|
||||
RmgrId rmid = sRecord->xl_rmid;
|
||||
uint8 info = (sRecord->xl_info & ~XLR_INFO_MASK);
|
||||
bool hasChange = false;
|
||||
switch(rmid) {
|
||||
case RM_HEAP2_ID:
|
||||
{
|
||||
if ((info & XLOG_HEAP_OPMASK) == XLOG_HEAP2_VISIBLE) {
|
||||
xl_heap_visible *xlrec = (xl_heap_visible *)sMainData;
|
||||
xl_old_heap_visible xlrecOld;
|
||||
xlrecOld.cutoff_xid = xlrec->cutoff_xid;
|
||||
xlrecOld.flags = xlrec->flags;
|
||||
*dLen = sizeof(xl_old_heap_visible);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_HEAP_ID:
|
||||
{
|
||||
if (((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_UPDATE) ||
|
||||
((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_HOT_UPDATE)) {
|
||||
xl_heap_update *xlrec = (xl_heap_update *)sMainData;
|
||||
xl_old_heap_update xlrecOld;
|
||||
xlrecOld.old_xmax = xlrec->old_xmax;
|
||||
xlrecOld.old_offnum = xlrec->old_offnum;
|
||||
xlrecOld.old_infobits_set = xlrec->old_infobits_set;
|
||||
xlrecOld.flags = xlrec->flags;
|
||||
xlrecOld.new_xmax = xlrec->new_xmax;
|
||||
xlrecOld.new_offnum = xlrec->new_offnum;
|
||||
*dLen = sizeof(xl_old_heap_update);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BTREE_ID:
|
||||
{
|
||||
if (info == XLOG_BTREE_SPLIT_L || info == XLOG_BTREE_SPLIT_R) {
|
||||
xl_btree_split *xlrec = (xl_btree_split *)sMainData;
|
||||
xl_old_btree_split xlrecOld;
|
||||
xlrecOld.level = xlrec->level;
|
||||
xlrecOld.firstrightoff = xlrec->firstrightoff;
|
||||
xlrecOld.newitemoff = xlrec->newitemoff;
|
||||
xlrecOld.postingoff = xlrec->postingoff;
|
||||
*dLen = sizeof(xl_old_btree_split);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_GIST_ID:
|
||||
{
|
||||
if (info == XLOG_GIST_PAGE_SPLIT) {
|
||||
gistxlogPageSplit *xlrec = (gistxlogPageSplit *)sMainData;
|
||||
gistoldxlogPageSplit xlrecOld;
|
||||
xlrecOld.origrlink = xlrec->origrlink;
|
||||
xlrecOld.orignsn = xlrec->orignsn;
|
||||
xlrecOld.origleaf = xlrec->origleaf;
|
||||
xlrecOld.npage = xlrec->npage;
|
||||
xlrecOld.markfollowright = xlrec->markfollowright;
|
||||
*dLen = sizeof(gistoldxlogPageSplit);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_SPGIST_ID:
|
||||
{
|
||||
if (info == XLOG_SPGIST_ADD_LEAF) {
|
||||
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *)sMainData;
|
||||
spgoldxlogAddLeaf xlrecOld;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.offnumLeaf = xlrec->offnumLeaf;
|
||||
xlrecOld.offnumHeadLeaf = xlrec->offnumHeadLeaf;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
*dLen = sizeof(spgoldxlogAddLeaf);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
} else if (info == XLOG_SPGIST_MOVE_LEAFS) {
|
||||
spgxlogMoveLeafs *xlrec = (spgxlogMoveLeafs *)sMainData;
|
||||
spgoldxlogMoveLeafs xlrecOld;
|
||||
xlrecOld.nMoves = xlrec->nMoves;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.replaceDead = xlrec->replaceDead;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = SizeOfOldSpgxlogMoveLeafs;
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
memcpy(dMainData+*dLen,xlrec->offsets,*sLen-SizeOfSpgxlogMoveLeafs);
|
||||
*dLen += *sLen-SizeOfSpgxlogMoveLeafs;
|
||||
hasChange = true;
|
||||
} else if (info == XLOG_SPGIST_ADD_NODE) {
|
||||
spgxlogAddNode *xlrec = (spgxlogAddNode *)sMainData;
|
||||
spgoldxlogAddNode xlrecOld;
|
||||
xlrecOld.offnum = xlrec->offnum;
|
||||
xlrecOld.offnumNew = xlrec->offnumNew;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.parentBlk = xlrec->parentBlk;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = sizeof(spgoldxlogAddNode);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
} else if (info == XLOG_SPGIST_PICKSPLIT) {
|
||||
spgxlogPickSplit *xlrec = (spgxlogPickSplit *)sMainData;
|
||||
spgoldxlogPickSplit xlrecOld;
|
||||
xlrecOld.isRootSplit = xlrec->isRootSplit;
|
||||
xlrecOld.nDelete = xlrec->nDelete;
|
||||
xlrecOld.nInsert = xlrec->nInsert;
|
||||
xlrecOld.initSrc = xlrec->initSrc;
|
||||
xlrecOld.initDest = xlrec->initDest;
|
||||
xlrecOld.offnumInner = xlrec->offnumInner;
|
||||
xlrecOld.initInner = xlrec->initInner;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.innerIsParent = xlrec->innerIsParent;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = SizeOfOldSpgxlogPickSplit;
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
memcpy(dMainData+*dLen,xlrec->offsets,*sLen-SizeOfSpgxlogPickSplit);
|
||||
*dLen += *sLen-SizeOfSpgxlogPickSplit;
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BRIN_ID:
|
||||
{
|
||||
if (info == XLOG_BRIN_INSERT) {
|
||||
xl_brin_insert *xlrec = (xl_brin_insert *)sMainData;
|
||||
xl_old_brin_insert xlrecOld;
|
||||
xlrecOld.heapBlk = xlrec->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecOld.pagesPerRange = xlrec->pagesPerRange;
|
||||
xlrecOld.offnum = xlrec->offnum;
|
||||
*dLen = sizeof(xl_old_brin_insert);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
hasChange = true;
|
||||
} else if ( info == XLOG_BRIN_UPDATE) {
|
||||
xl_brin_update *xlrec = (xl_brin_update *) sMainData;
|
||||
xl_old_brin_update xlrecUpdate;
|
||||
xl_brin_insert *xlrecInsert = &xlrec->insert;
|
||||
xl_old_brin_insert xlrecOld;
|
||||
xlrecOld.heapBlk = xlrecInsert->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecOld.pagesPerRange = xlrecInsert->pagesPerRange;
|
||||
xlrecOld.offnum = xlrecInsert->offnum;
|
||||
/* offset number of old tuple on old page */
|
||||
xlrecUpdate.oldOffnum = xlrec->oldOffnum;
|
||||
xlrecUpdate.insert = xlrecOld;
|
||||
*dLen = sizeof(xl_old_brin_update);
|
||||
memcpy(dMainData,&xlrecUpdate,*dLen);
|
||||
hasChange = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hasChange == false) {
|
||||
*dLen = *sLen;
|
||||
memcpy(dMainData,sMainData,*dLen);
|
||||
}
|
||||
}
|
||||
|
||||
static int XlogHe3ToPg(XLogRecord*newRecord[],int n, OldXLogRecord*oldRecord) {
|
||||
oldRecord->xl_xid = newRecord[0]->xl_xid;
|
||||
oldRecord->xl_info = newRecord[0]->xl_info;
|
||||
oldRecord->xl_rmid = newRecord[0]->xl_rmid;
|
||||
char d_main_data[8192];
|
||||
int dPos = 0;
|
||||
char* dst = (char*)oldRecord;
|
||||
dPos += sizeof(OldXLogRecord);
|
||||
uint32_t d_main_data_len = 0;
|
||||
uint32 main_data_len = 0;
|
||||
uint8_t blkNum = 0;
|
||||
bool hasblk = false;
|
||||
char*img_ptr[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
char*data_ptr[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
uint16_t bimg_len[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
uint16_t data_len[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
for(int i = 0;i<n;i++) {
|
||||
int sPos = 0;
|
||||
char* src = (char*)newRecord[i];
|
||||
uint32 remaining = newRecord[i]->xl_tot_len - sizeof(XLogRecord);
|
||||
uint32 datatotal = 0;
|
||||
sPos += sizeof(XLogRecord);
|
||||
while(remaining > datatotal) {
|
||||
uint8_t block_id = *(src + sPos);
|
||||
if (block_id == XLR_BLOCK_ID_DATA_SHORT) {
|
||||
sPos += sizeof(block_id);
|
||||
remaining -= sizeof(block_id);
|
||||
if (i == n-1) {
|
||||
memcpy(dst + dPos,&block_id,sizeof(block_id));
|
||||
dPos += sizeof(block_id);
|
||||
}
|
||||
main_data_len = *((uint8_t*)(src + sPos));
|
||||
//main_data_len type XLR_BLOCK_ID_DATA_SHORT
|
||||
uint8 d_len;
|
||||
if (i == n-1) {
|
||||
ReConvertMainData(newRecord[i],src + sPos + sizeof(d_len)+bimg_len[blkNum]+data_len[blkNum],&main_data_len,d_main_data,&d_main_data_len);
|
||||
d_len = d_main_data_len;
|
||||
memcpy(dst + dPos,&d_len,sizeof(d_len));
|
||||
dPos += sizeof(d_len);
|
||||
}
|
||||
sPos += sizeof(d_len);
|
||||
remaining -= sizeof(d_len);
|
||||
datatotal += main_data_len;
|
||||
break;
|
||||
} else if (block_id == XLR_BLOCK_ID_DATA_LONG) {
|
||||
sPos += sizeof(block_id);
|
||||
remaining -= sizeof(block_id);
|
||||
if (i == n-1) {
|
||||
memcpy((dst + dPos),&block_id,sizeof(block_id));
|
||||
dPos += sizeof(block_id);
|
||||
}
|
||||
memcpy(&main_data_len,src + sPos,sizeof(uint32));
|
||||
if (i == n-1) {
|
||||
ReConvertMainData(newRecord[i],src + sPos + sizeof(main_data_len)+bimg_len[blkNum]+data_len[blkNum],&main_data_len,d_main_data,&d_main_data_len);
|
||||
if (d_main_data_len > 255) {
|
||||
memcpy(dst + dPos,&d_main_data_len,sizeof(d_main_data_len));
|
||||
dPos += sizeof(d_main_data_len);
|
||||
} else {
|
||||
*(dst + dPos - 1) = XLR_BLOCK_ID_DATA_SHORT;
|
||||
uint8_t d_len = d_main_data_len;
|
||||
memcpy(dst + dPos,&d_len,sizeof(d_len));
|
||||
dPos += sizeof(d_len);
|
||||
}
|
||||
}
|
||||
sPos += sizeof(main_data_len);
|
||||
remaining -= sizeof(main_data_len);
|
||||
datatotal += main_data_len;
|
||||
break;
|
||||
} else if (block_id == XLR_BLOCK_ID_ORIGIN) {
|
||||
sPos += sizeof(block_id);
|
||||
remaining -= sizeof(block_id);
|
||||
if (i == n-1) {
|
||||
memcpy(dst + dPos,&block_id,sizeof(block_id));
|
||||
dPos += sizeof(block_id);
|
||||
memcpy(dst + dPos,src+sPos,sizeof(RepOriginId));
|
||||
dPos += sizeof(RepOriginId);
|
||||
}
|
||||
sPos += sizeof(RepOriginId);
|
||||
remaining -= sizeof(RepOriginId);
|
||||
} else if (block_id == XLR_BLOCK_ID_TOPLEVEL_XID) {
|
||||
sPos += sizeof(block_id);
|
||||
remaining -= sizeof(block_id);
|
||||
if (i == n - 1) {
|
||||
memcpy(dst + dPos,&block_id,sizeof(block_id));
|
||||
dPos += sizeof(block_id);
|
||||
memcpy(dst + dPos,src+sPos,sizeof(TransactionId));
|
||||
dPos += sizeof(TransactionId);
|
||||
}
|
||||
sPos += sizeof(TransactionId);
|
||||
remaining -= sizeof(TransactionId);
|
||||
} else if (block_id <= XLR_MAX_BLOCK_ID) {
|
||||
memcpy(dst + dPos, src + sPos, SizeOfXLogRecordBlockHeader);
|
||||
uint8_t fork_flags = *(src + sPos + sizeof(block_id));
|
||||
*(dst + dPos) = blkNum;
|
||||
hasblk = true;
|
||||
data_len[blkNum] = *((uint16_t*)(src + sPos + sizeof(block_id) + sizeof(fork_flags)));
|
||||
datatotal += data_len[blkNum];
|
||||
sPos += SizeOfXLogRecordBlockHeader;
|
||||
dPos += SizeOfXLogRecordBlockHeader;
|
||||
remaining -= SizeOfXLogRecordBlockHeader;
|
||||
if ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0) {
|
||||
bimg_len[blkNum] = *((uint16_t*)(src + sPos));
|
||||
datatotal += bimg_len[blkNum];
|
||||
uint16_t hole_offset = *((uint16_t*)(src + sPos + sizeof(bimg_len)));
|
||||
uint8_t bimg_info = *((uint16_t*)(src + sPos + sizeof(bimg_len) + sizeof(hole_offset)));
|
||||
memcpy(dst + dPos, src + sPos, SizeOfXLogRecordBlockImageHeader);
|
||||
sPos += SizeOfXLogRecordBlockImageHeader;
|
||||
dPos += SizeOfXLogRecordBlockImageHeader;
|
||||
remaining -= SizeOfXLogRecordBlockImageHeader;
|
||||
if ((bimg_info & BKPIMAGE_IS_COMPRESSED) != 0) {
|
||||
if ((bimg_info & BKPIMAGE_HAS_HOLE) != 0) {
|
||||
memcpy(dst + dPos, src + sPos, SizeOfXLogRecordBlockCompressHeader);
|
||||
sPos += SizeOfXLogRecordBlockCompressHeader;
|
||||
dPos += SizeOfXLogRecordBlockCompressHeader;
|
||||
remaining -= SizeOfXLogRecordBlockCompressHeader;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(fork_flags & BKPBLOCK_SAME_REL)) {
|
||||
memcpy(dst + dPos, src + sPos, sizeof(RelFileNode));
|
||||
sPos += sizeof(RelFileNode);
|
||||
dPos += sizeof(RelFileNode);
|
||||
remaining -= sizeof(RelFileNode);
|
||||
}
|
||||
memcpy(dst + dPos, src + sPos, sizeof(BlockNumber));
|
||||
sPos += sizeof(BlockNumber);
|
||||
dPos += sizeof(BlockNumber);
|
||||
remaining -= sizeof(BlockNumber);
|
||||
} else {
|
||||
printf("invalid block_id %u",block_id);
|
||||
}
|
||||
}
|
||||
assert(remaining == datatotal);
|
||||
if (bimg_len[blkNum] != 0 ) {
|
||||
img_ptr[blkNum] = src + sPos;
|
||||
sPos += bimg_len[blkNum];
|
||||
}
|
||||
if (data_len[blkNum] != 0) {
|
||||
data_ptr[blkNum] = src + sPos;
|
||||
sPos += data_len[blkNum];
|
||||
}
|
||||
if (hasblk == true) {
|
||||
blkNum++;
|
||||
}
|
||||
|
||||
sPos += main_data_len;
|
||||
assert(sPos == newRecord[i]->xl_tot_len);
|
||||
}
|
||||
int idx = 0;
|
||||
while(idx < blkNum) {
|
||||
if (bimg_len[idx] != 0) {
|
||||
memcpy(dst + dPos, img_ptr[idx], bimg_len[idx]);
|
||||
dPos += bimg_len[idx];
|
||||
}
|
||||
if (data_len[idx] != 0){
|
||||
memcpy(dst + dPos, data_ptr[idx], data_len[idx]);
|
||||
dPos += data_len[idx];
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
memcpy(dst + dPos, d_main_data, d_main_data_len);
|
||||
dPos += d_main_data_len;
|
||||
oldRecord->xl_tot_len = dPos;
|
||||
return dPos;
|
||||
}
|
||||
|
||||
static int OldUsableBytesInSegment =
|
||||
(DEFAULT_XLOG_SEG_SIZE / XLOG_BLCKSZ * (XLOG_BLCKSZ - SizeOfXLogShortPHD)) -
|
||||
(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
|
||||
|
||||
|
||||
static XLogRecPtr
|
||||
OldXLogBytePosToRecPtr(uint64 bytepos)
|
||||
{
|
||||
uint64 fullsegs;
|
||||
uint64 fullpages;
|
||||
uint64 bytesleft;
|
||||
uint32 seg_offset;
|
||||
XLogRecPtr result;
|
||||
|
||||
fullsegs = bytepos / OldUsableBytesInSegment;
|
||||
bytesleft = bytepos % OldUsableBytesInSegment;
|
||||
|
||||
if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
|
||||
{
|
||||
/* fits on first page of segment */
|
||||
seg_offset = bytesleft + SizeOfXLogLongPHD;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* account for the first page on segment with long header */
|
||||
seg_offset = XLOG_BLCKSZ;
|
||||
bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
|
||||
|
||||
fullpages = bytesleft / (XLOG_BLCKSZ - SizeOfXLogShortPHD);
|
||||
bytesleft = bytesleft % (XLOG_BLCKSZ - SizeOfXLogShortPHD);
|
||||
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
|
||||
}
|
||||
|
||||
XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, DEFAULT_XLOG_SEG_SIZE, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static XLogRecPtr
|
||||
OldXLogBytePosToEndRecPtr(uint64 bytepos)
|
||||
{
|
||||
uint64 fullsegs;
|
||||
uint64 fullpages;
|
||||
uint64 bytesleft;
|
||||
uint32 seg_offset;
|
||||
XLogRecPtr result;
|
||||
|
||||
fullsegs = bytepos / OldUsableBytesInSegment;
|
||||
bytesleft = bytepos % OldUsableBytesInSegment;
|
||||
|
||||
if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
|
||||
{
|
||||
/* fits on first page of segment */
|
||||
if (bytesleft == 0)
|
||||
seg_offset = 0;
|
||||
else
|
||||
seg_offset = bytesleft + SizeOfXLogLongPHD;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* account for the first page on segment with long header */
|
||||
seg_offset = XLOG_BLCKSZ;
|
||||
bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
|
||||
|
||||
fullpages = bytesleft / (XLOG_BLCKSZ - SizeOfXLogShortPHD);
|
||||
bytesleft = bytesleft % (XLOG_BLCKSZ - SizeOfXLogShortPHD);
|
||||
|
||||
if (bytesleft == 0)
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
|
||||
else
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
|
||||
}
|
||||
|
||||
XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, DEFAULT_XLOG_SEG_SIZE, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint64
|
||||
OldXLogRecPtrToBytePos(XLogRecPtr ptr)
|
||||
{
|
||||
uint64 fullsegs;
|
||||
uint32 fullpages;
|
||||
uint32 offset;
|
||||
uint64 result;
|
||||
|
||||
XLByteToSeg(ptr, fullsegs, DEFAULT_XLOG_SEG_SIZE);
|
||||
|
||||
fullpages = (XLogSegmentOffset(ptr, DEFAULT_XLOG_SEG_SIZE)) / XLOG_BLCKSZ;
|
||||
offset = ptr % XLOG_BLCKSZ;
|
||||
|
||||
if (fullpages == 0)
|
||||
{
|
||||
result = fullsegs * OldUsableBytesInSegment;
|
||||
if (offset > 0)
|
||||
{
|
||||
Assert(offset >= SizeOfXLogLongPHD);
|
||||
result += offset - SizeOfXLogLongPHD;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result = fullsegs * OldUsableBytesInSegment +
|
||||
(XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
|
||||
(fullpages - 1) * (XLOG_BLCKSZ - SizeOfXLogShortPHD); /* full pages */
|
||||
if (offset > 0)
|
||||
{
|
||||
Assert(offset >= SizeOfXLogShortPHD);
|
||||
result += offset - SizeOfXLogShortPHD;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
ReserveXLogWalSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
|
||||
{
|
||||
XLogHe3ToPg *Insert = &g_walHe3ToPg;
|
||||
uint64 startbytepos;
|
||||
uint64 endbytepos;
|
||||
uint64 prevbytepos;
|
||||
uint32 size = MAXALIGN(SizeOfOldXLogRecord);
|
||||
XLogRecPtr ptr;
|
||||
uint32 segleft;
|
||||
|
||||
startbytepos = Insert->CurrBytePos;
|
||||
|
||||
ptr = OldXLogBytePosToEndRecPtr(startbytepos);
|
||||
if (XLogSegmentOffset(ptr, DEFAULT_XLOG_SEG_SIZE) == 0)
|
||||
{
|
||||
*EndPos = *StartPos = ptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
endbytepos = startbytepos + size;
|
||||
prevbytepos = Insert->PrevBytePos;
|
||||
|
||||
*StartPos = OldXLogBytePosToRecPtr(startbytepos);
|
||||
*EndPos = OldXLogBytePosToEndRecPtr(endbytepos);
|
||||
|
||||
segleft = DEFAULT_XLOG_SEG_SIZE - XLogSegmentOffset(*EndPos, DEFAULT_XLOG_SEG_SIZE);
|
||||
if (segleft != DEFAULT_XLOG_SEG_SIZE)
|
||||
{
|
||||
/* consume the rest of the segment */
|
||||
*EndPos += segleft;
|
||||
endbytepos = OldXLogRecPtrToBytePos(*EndPos);
|
||||
}
|
||||
Insert->CurrBytePos = endbytepos;
|
||||
Insert->PrevBytePos = startbytepos;
|
||||
|
||||
*PrevPtr = OldXLogBytePosToRecPtr(prevbytepos);
|
||||
|
||||
Assert(XLogSegmentOffset(*EndPos, DEFAULT_XLOG_SEG_SIZE) == 0);
|
||||
Assert(OldXLogRecPtrToBytePos(*EndPos) == endbytepos);
|
||||
Assert(OldXLogRecPtrToBytePos(*StartPos) == startbytepos);
|
||||
Assert(OldXLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
ReserveXLogWalInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos,
|
||||
XLogRecPtr *PrevPtr)
|
||||
{
|
||||
XLogHe3ToPg *Insert = &g_walHe3ToPg;
|
||||
uint64 startbytepos;
|
||||
uint64 endbytepos;
|
||||
uint64 prevbytepos;
|
||||
|
||||
size = MAXALIGN(size);
|
||||
|
||||
/* All (non xlog-switch) records should contain data. */
|
||||
Assert(size > SizeOfOldXLogRecord);
|
||||
|
||||
/*
|
||||
* The duration the spinlock needs to be held is minimized by minimizing
|
||||
* the calculations that have to be done while holding the lock. The
|
||||
* current tip of reserved WAL is kept in CurrBytePos, as a byte position
|
||||
* that only counts "usable" bytes in WAL, that is, it excludes all WAL
|
||||
* page headers. The mapping between "usable" byte positions and physical
|
||||
* positions (XLogRecPtrs) can be done outside the locked region, and
|
||||
* because the usable byte position doesn't include any headers, reserving
|
||||
* X bytes from WAL is almost as simple as "CurrBytePos += X".
|
||||
*/
|
||||
|
||||
startbytepos = Insert->CurrBytePos;
|
||||
endbytepos = startbytepos + size;
|
||||
prevbytepos = Insert->PrevBytePos;
|
||||
Insert->CurrBytePos = endbytepos;
|
||||
Insert->PrevBytePos = startbytepos;
|
||||
|
||||
*StartPos = OldXLogBytePosToRecPtr(startbytepos);
|
||||
*EndPos = OldXLogBytePosToEndRecPtr(endbytepos);
|
||||
*PrevPtr = OldXLogBytePosToRecPtr(prevbytepos);
|
||||
|
||||
/*
|
||||
* Check that the conversions between "usable byte positions" and
|
||||
* XLogRecPtrs work consistently in both directions.
|
||||
*/
|
||||
Assert(OldXLogRecPtrToBytePos(*StartPos) == startbytepos);
|
||||
Assert(OldXLogRecPtrToBytePos(*EndPos) == endbytepos);
|
||||
Assert(OldXLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
|
||||
}
|
||||
|
||||
static void CopyXLogRecordToPgWAL(int write_len,OldXLogRecord* rechdr,XLogRecPtr StartPos, XLogRecPtr EndPos,
|
||||
char*dBuf,int* dLen) {
|
||||
|
||||
char *currpos;
|
||||
int freespace;
|
||||
int written;
|
||||
XLogRecPtr CurrPos;
|
||||
XLogPageHeader pagehdr;
|
||||
CurrPos = StartPos;
|
||||
XLogPageHeader page;
|
||||
XLogLongPageHeader longpage;
|
||||
currpos = dBuf;
|
||||
if (CurrPos % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
|
||||
XLogSegmentOffset(CurrPos, DEFAULT_XLOG_SEG_SIZE) > XLOG_BLCKSZ) {
|
||||
page = (XLogPageHeader)currpos;
|
||||
page->xlp_magic = XLOG_PAGE_MAGIC;
|
||||
page->xlp_info = 0;
|
||||
page->xlp_tli = ControlFile->checkPointCopy.ThisTimeLineID;
|
||||
page->xlp_pageaddr = CurrPos - (CurrPos % XLOG_BLCKSZ);
|
||||
currpos += SizeOfXLogShortPHD;
|
||||
}
|
||||
else if (CurrPos % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
|
||||
XLogSegmentOffset(CurrPos, DEFAULT_XLOG_SEG_SIZE) < XLOG_BLCKSZ) {
|
||||
page = (XLogPageHeader)currpos;
|
||||
page->xlp_magic = XLOG_PAGE_MAGIC;
|
||||
page->xlp_info = XLP_LONG_HEADER;
|
||||
page->xlp_tli = ControlFile->checkPointCopy.ThisTimeLineID;
|
||||
page->xlp_pageaddr = CurrPos - (CurrPos % XLOG_BLCKSZ);
|
||||
longpage = (XLogLongPageHeader) page;
|
||||
longpage->xlp_sysid = ControlFile->system_identifier;
|
||||
longpage->xlp_seg_size = WalSegSz;
|
||||
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
|
||||
currpos += SizeOfXLogLongPHD;
|
||||
}
|
||||
|
||||
freespace = INSERT_FREESPACE_MIRROR(CurrPos);
|
||||
Assert(freespace >= sizeof(uint32));
|
||||
/* Copy record data */
|
||||
written = 0;
|
||||
if (rechdr != NULL) {
|
||||
char *rdata_data = rechdr;
|
||||
int rdata_len = rechdr->xl_tot_len;
|
||||
while (rdata_len > freespace)
|
||||
{
|
||||
Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
|
||||
memcpy(currpos, rdata_data, freespace);
|
||||
rdata_data += freespace;
|
||||
rdata_len -= freespace;
|
||||
written += freespace;
|
||||
CurrPos += freespace;
|
||||
currpos += freespace;
|
||||
|
||||
pagehdr = (XLogPageHeader) currpos;
|
||||
pagehdr->xlp_info = 0;
|
||||
pagehdr->xlp_tli = ControlFile->checkPointCopy.ThisTimeLineID;
|
||||
pagehdr->xlp_magic = XLOG_PAGE_MAGIC;
|
||||
pagehdr->xlp_pageaddr = CurrPos - (CurrPos % XLOG_BLCKSZ);
|
||||
pagehdr->xlp_rem_len = write_len - written;
|
||||
pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
|
||||
if (XLogSegmentOffset(CurrPos, DEFAULT_XLOG_SEG_SIZE) == 0) {
|
||||
CurrPos += SizeOfXLogLongPHD;
|
||||
currpos += SizeOfXLogLongPHD;
|
||||
pagehdr->xlp_info |= XLP_LONG_HEADER;
|
||||
longpage = (XLogLongPageHeader) pagehdr;
|
||||
longpage->xlp_sysid = ControlFile->system_identifier;
|
||||
longpage->xlp_seg_size = WalSegSz;
|
||||
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
|
||||
} else {
|
||||
CurrPos += SizeOfXLogShortPHD;
|
||||
currpos += SizeOfXLogShortPHD;
|
||||
}
|
||||
freespace = INSERT_FREESPACE_MIRROR(CurrPos);
|
||||
}
|
||||
Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
|
||||
memcpy(currpos, rdata_data, rdata_len);
|
||||
currpos += rdata_len;
|
||||
CurrPos += rdata_len;
|
||||
freespace -= rdata_len;
|
||||
written += rdata_len;
|
||||
}
|
||||
Assert(written == write_len);
|
||||
int extra_space = MAXALIGN64(CurrPos) - CurrPos;
|
||||
CurrPos = MAXALIGN64(CurrPos);
|
||||
if (CurrPos != EndPos)
|
||||
printf("ERROR space reserved for WAL record does not match what was written");
|
||||
currpos += extra_space;
|
||||
*dLen = (int)(currpos - dBuf);
|
||||
}
|
||||
|
||||
void readControlFile(char*pathstr) {
|
||||
bool crc_ok;
|
||||
|
||||
ControlFile = get_controlfile(pathstr,&crc_ok);
|
||||
if (!crc_ok)
|
||||
printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n"
|
||||
"Either the file is corrupt, or it has a different layout than this program\n"
|
||||
"is expecting. The results below are untrustworthy.\n\n"));
|
||||
|
||||
/* set wal segment size */
|
||||
WalSegSz = ControlFile->xlog_seg_size;
|
||||
}
|
||||
|
||||
void setControlFile(ControlFileData *cfile) {
|
||||
ControlFile = cfile;
|
||||
}
|
||||
|
||||
|
||||
int ArrayXlogHe3ToPg(char*sBuf,int sLen, char*dBuf,int* dLen,uint64 *startLsn,uint64 *endLsn) {
|
||||
XLogRecord*one = (XLogRecord*)sBuf;
|
||||
//32kB
|
||||
static char tBuf[32768];
|
||||
int tLen = 0;
|
||||
int MtrLen = 0;
|
||||
int iLen = 0;
|
||||
int oLen = 0;
|
||||
*dLen = 0;
|
||||
for(;iLen<sLen;) {
|
||||
int n = 0;
|
||||
XLogRecord*newRecord[XLR_MAX_BLOCK_ID + 1];
|
||||
while(one->mtr == false) {
|
||||
newRecord[n++] = one;
|
||||
iLen += one->xl_tot_len;
|
||||
one = (((char*)one) + one->xl_tot_len);
|
||||
if (iLen > sLen) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
newRecord[n++] = one;
|
||||
iLen += one->xl_tot_len;
|
||||
one = (((char*)one) + one->xl_tot_len);
|
||||
if (iLen > sLen) {
|
||||
break;
|
||||
}
|
||||
XlogHe3ToPg(newRecord,n,tBuf+tLen);
|
||||
uint64 StartPos,EndPos;
|
||||
XLogRecPtr reduceV = 0;
|
||||
if (g_walHe3ToPg.PrevBytePos == 0) {
|
||||
uint64 xl_prev = newRecord[0]->xl_end - newRecord[0]->xl_tot_len;
|
||||
g_walHe3ToPg.PrevBytePos = g_walHe3ToPg.CurrBytePos = xl_prev;
|
||||
bool Insert = ReserveXLogWalSwitch(&StartPos,&EndPos,&xl_prev);
|
||||
g_walHe3ToPg.PrevBytePos = g_walHe3ToPg.CurrBytePos;
|
||||
reduceV = 1;
|
||||
}
|
||||
OldXLogRecord* rechdr = (OldXLogRecord*)(tBuf + tLen);
|
||||
ReserveXLogWalInsertLocation(rechdr->xl_tot_len,&StartPos,&EndPos,&rechdr->xl_prev);
|
||||
//for pg check
|
||||
if (rechdr->xl_rmid == RM_XLOG_ID &&
|
||||
(rechdr->xl_info == XLOG_CHECKPOINT_SHUTDOWN || rechdr->xl_info == XLOG_CHECKPOINT_ONLINE)) {
|
||||
CheckPoint*cp = (CheckPoint*)(((char*)rechdr)+SizeOfOldXLogRecord + SizeOfXLogRecordDataHeaderShort);
|
||||
cp->redo = StartPos;
|
||||
rechdr->xl_prev = rechdr->xl_prev-reduceV;
|
||||
}
|
||||
pg_crc32c rdata_crc;
|
||||
INIT_CRC32C(rdata_crc);
|
||||
COMP_CRC32C(rdata_crc, ((char*)rechdr) + SizeOfOldXLogRecord, rechdr->xl_tot_len - SizeOfOldXLogRecord);
|
||||
COMP_CRC32C(rdata_crc, rechdr, offsetof(OldXLogRecord, xl_crc));
|
||||
FIN_CRC32C(rdata_crc);
|
||||
rechdr->xl_crc = rdata_crc;
|
||||
CopyXLogRecordToPgWAL(rechdr->xl_tot_len,rechdr,StartPos,EndPos,dBuf+*dLen,&oLen);
|
||||
if (*startLsn == 0) {
|
||||
*startLsn = StartPos;
|
||||
}
|
||||
*endLsn = EndPos;
|
||||
*dLen += oLen;
|
||||
tLen += rechdr->xl_tot_len;
|
||||
MtrLen = iLen;
|
||||
}
|
||||
return MtrLen;
|
||||
}
|
||||
|
||||
|
||||
|
@ -18,9 +18,9 @@ static void getWalFunc(gpointer data, gpointer user_data) {
|
||||
//elem->status = STARTSTATUS;
|
||||
int r;
|
||||
clock_t start = clock();
|
||||
r = batchRead((uint8_t *) elem->data, ThisTimeLineID, elem->startLsn, walStoreToLocal);
|
||||
r = batchRead((uint8_t *) elem->data, ThisTimeLineID, elem->startLsn, elem->endLsn, walStoreToLocal);
|
||||
clock_t end = clock();
|
||||
printf("====LSN %X/%X==pid %d==len %d===time %u\n",LSN_FORMAT_ARGS(elem->startLsn),pthread_self(),r,end-start);
|
||||
//printf("====LSN %X/%X==pid %d==len %d===time %u\n",LSN_FORMAT_ARGS(elem->startLsn),pthread_self(),r,end-start);
|
||||
elem->dataLen = r;
|
||||
if (r > sizeof(XLogRecord)) {
|
||||
XLogRecord* record = ((XLogRecord*)elem->data);
|
||||
@ -42,7 +42,11 @@ int initPthreadPool(void) {
|
||||
return -1;
|
||||
}
|
||||
//default 8 thread read
|
||||
if(he3mirror){
|
||||
gpool = g_thread_pool_new(getWalFunc,NULL,1,FALSE,NULL);
|
||||
}else{
|
||||
gpool = g_thread_pool_new(getWalFunc,NULL,8,FALSE,NULL);
|
||||
}
|
||||
elog(LOG,"thread pool max threads is %d,num thread is %d",
|
||||
g_thread_pool_get_max_threads(gpool),g_thread_pool_get_num_threads(gpool));
|
||||
return 0;
|
||||
|
@ -29,29 +29,29 @@ static PGconn *connToPushStandby = NULL;
|
||||
pid_t startupPid = 0;
|
||||
|
||||
|
||||
static bool ConnectPushStandbyDB() {
|
||||
char *err;
|
||||
const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
const char *vals[] = {"postgres","repl","123456","127.0.0.1","15431",NULL};
|
||||
connToPushStandby = PQconnectdbParams(keys, vals, false);
|
||||
if (PQstatus(connToPushStandby) == CONNECTION_BAD)
|
||||
{
|
||||
err = pchomp(PQerrorMessage(connToPushStandby));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("push standby could not connect to the push standby server: %s", err)));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
// static bool ConnectPushStandbyDB() {
|
||||
// char *err;
|
||||
// const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
// const char *vals[] = {"postgres","repl","123456","100.73.36.123","15431",NULL};
|
||||
// connToPushStandby = PQconnectdbParams(keys, vals, false);
|
||||
// if (PQstatus(connToPushStandby) == CONNECTION_BAD)
|
||||
// {
|
||||
// err = pchomp(PQerrorMessage(connToPushStandby));
|
||||
// ereport(ERROR,
|
||||
// (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
// errmsg("push standby could not connect to the push standby server: %s", err)));
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
|
||||
}
|
||||
// }
|
||||
|
||||
|
||||
static bool ConnectPrimaryDB() {
|
||||
char *err;
|
||||
char conninfo[maxconnlen];
|
||||
const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
const char *vals[] = {"postgres","repl","123456","127.0.0.1","15432",NULL};
|
||||
// const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
// const char *vals[] = {"postgres","repl","123456","100.73.36.123","15432",NULL};
|
||||
strlcpy(conninfo, (char *) PrimaryConnInfo, maxconnlen);
|
||||
/* Establish the connection to the primary for query Min Lsn*/
|
||||
/*
|
||||
@ -59,7 +59,8 @@ static bool ConnectPrimaryDB() {
|
||||
* URI), and pass some extra options.
|
||||
*/
|
||||
/* Note we do not want libpq to re-expand the dbname parameter */
|
||||
pushconn = PQconnectdbParams(keys, vals, true);
|
||||
pushconn = PQconnectdb(conninfo);
|
||||
// pushconn = PQconnectdbParams(keys, vals, true);
|
||||
if (PQstatus(pushconn) == CONNECTION_BAD)
|
||||
{
|
||||
err = pchomp(PQerrorMessage(pushconn));
|
||||
@ -71,57 +72,58 @@ static bool ConnectPrimaryDB() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ConnectPrimaryDB4ReplyLSN() {
|
||||
char *err;
|
||||
char conninfo[maxconnlen];
|
||||
const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
const char *vals[] = {"postgres","postgres","","127.0.0.1","15432",NULL};
|
||||
strlcpy(conninfo, (char *) PrimaryConnInfo, maxconnlen);
|
||||
/* Establish the connection to the primary for query Min Lsn*/
|
||||
/*
|
||||
* We use the expand_dbname parameter to process the connection string (or
|
||||
* URI), and pass some extra options.
|
||||
*/
|
||||
/* Note we do not want libpq to re-expand the dbname parameter */
|
||||
pushconn = PQconnectdbParams(keys, vals, true);
|
||||
if (PQstatus(pushconn) == CONNECTION_BAD)
|
||||
{
|
||||
err = pchomp(PQerrorMessage(pushconn));
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
errmsg("push standby could not connect to the primary server: %s", err)));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// static bool ConnectPrimaryDB4ReplyLSN() {
|
||||
// char *err;
|
||||
// char conninfo[maxconnlen];
|
||||
// const char *keys[] = {"dbname","user","password","host","port",NULL};
|
||||
// const char *vals[] = {"postgres","postgres","","100.73.36.123","15432",NULL};
|
||||
// strlcpy(conninfo, (char *) PrimaryConnInfo, maxconnlen);
|
||||
// /* Establish the connection to the primary for query Min Lsn*/
|
||||
// /*
|
||||
// * We use the expand_dbname parameter to process the connection string (or
|
||||
// * URI), and pass some extra options.
|
||||
// */
|
||||
// /* Note we do not want libpq to re-expand the dbname parameter */
|
||||
// pushconn = PQconnectdbParams(keys, vals, true);
|
||||
// if (PQstatus(pushconn) == CONNECTION_BAD)
|
||||
// {
|
||||
// err = pchomp(PQerrorMessage(pushconn));
|
||||
// ereport(WARNING,
|
||||
// (errcode(ERRCODE_CONNECTION_FAILURE),
|
||||
// errmsg("push standby could not connect to the primary server: %s", err)));
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
|
||||
XLogRecPtr QueryPushLsn()
|
||||
{
|
||||
StringInfoData cmd;
|
||||
XLogRecPtr replylsn = InvalidXLogRecPtr;
|
||||
char *replyptr;
|
||||
initStringInfo(&cmd);
|
||||
appendStringInfoString(&cmd,"select pg_last_wal_replay_lsn()");
|
||||
replylsn = InvalidXLogRecPtr;
|
||||
if (connToPushStandby == NULL) {
|
||||
if (ConnectPushStandbyDB() == false) {
|
||||
return InvalidXLogRecPtr;
|
||||
}
|
||||
}
|
||||
PGresult *pgres = NULL;
|
||||
pgres = PQexec(connToPushStandby, cmd.data);
|
||||
if (PQresultStatus(pgres) == PGRES_TUPLES_OK && PQntuples(pgres) == 1) {
|
||||
replyptr = PQgetvalue(pgres, 0, 0);
|
||||
bool flag;
|
||||
replylsn = pg_lsn_in_internal(replyptr,&flag);
|
||||
|
||||
}
|
||||
PQfinish(connToPushStandby);
|
||||
connToPushStandby = NULL;
|
||||
PQclear(pgres);
|
||||
return replylsn;
|
||||
// XLogRecPtr QueryPushLsn()
|
||||
// {
|
||||
// StringInfoData cmd;
|
||||
// XLogRecPtr replylsn = InvalidXLogRecPtr;
|
||||
// char *replyptr;
|
||||
// initStringInfo(&cmd);
|
||||
// appendStringInfoString(&cmd,"select pg_last_wal_replay_lsn()");
|
||||
// replylsn = InvalidXLogRecPtr;
|
||||
// if (connToPushStandby == NULL) {
|
||||
// if (ConnectPushStandbyDB() == false) {
|
||||
// return InvalidXLogRecPtr;
|
||||
// }
|
||||
// }
|
||||
// PGresult *pgres = NULL;
|
||||
// pgres = PQexec(connToPushStandby, cmd.data);
|
||||
// if (PQresultStatus(pgres) == PGRES_TUPLES_OK && PQntuples(pgres) == 1) {
|
||||
// replyptr = PQgetvalue(pgres, 0, 0);
|
||||
// bool flag;
|
||||
// replylsn = pg_lsn_in_internal(replyptr,&flag);
|
||||
|
||||
}
|
||||
// }
|
||||
// PQfinish(connToPushStandby);
|
||||
// connToPushStandby = NULL;
|
||||
// PQclear(pgres);
|
||||
// return replylsn;
|
||||
|
||||
// }
|
||||
|
||||
XLogRecPtr QueryPushChkpointLsn(void)
|
||||
{
|
||||
@ -202,50 +204,50 @@ XLogRecPtr QueryMinLsn(XLogRecPtr lsn)
|
||||
return replylsn;
|
||||
}
|
||||
|
||||
XLogRecPtr QueryReplyLsn(XLogRecPtr lsn)
|
||||
{
|
||||
StringInfoData cmd;
|
||||
XLogRecPtr replylsn;
|
||||
PGresult *pgres = NULL;
|
||||
char *appname;
|
||||
char *state;
|
||||
char *syncstate;
|
||||
char *replyptr;
|
||||
replylsn = InvalidXLogRecPtr;
|
||||
if (pushconn == NULL) {
|
||||
if (ConnectPrimaryDB4ReplyLSN() == false) {
|
||||
return InvalidXLogRecPtr;
|
||||
}
|
||||
}
|
||||
// XLogRecPtr QueryReplyLsn(XLogRecPtr lsn)
|
||||
// {
|
||||
// StringInfoData cmd;
|
||||
// XLogRecPtr replylsn;
|
||||
// PGresult *pgres = NULL;
|
||||
// char *appname;
|
||||
// char *state;
|
||||
// char *syncstate;
|
||||
// char *replyptr;
|
||||
// replylsn = InvalidXLogRecPtr;
|
||||
// if (pushconn == NULL) {
|
||||
// if (ConnectPrimaryDB4ReplyLSN() == false) {
|
||||
// return InvalidXLogRecPtr;
|
||||
// }
|
||||
// }
|
||||
|
||||
initStringInfo(&cmd);
|
||||
appendStringInfoString(&cmd, "SELECT t.application_name, t.replay_lsn, t.state, t.sync_state FROM pg_catalog.pg_stat_replication t WHERE t.application_name <> \'");
|
||||
appendStringInfoString(&cmd, "pushstandby");
|
||||
appendStringInfoString(&cmd, "\' order by t.replay_lsn limit 1");
|
||||
// initStringInfo(&cmd);
|
||||
// appendStringInfoString(&cmd, "SELECT t.application_name, t.replay_lsn, t.state, t.sync_state FROM pg_catalog.pg_stat_replication t WHERE t.application_name <> \'");
|
||||
// appendStringInfoString(&cmd, "pushstandby");
|
||||
// appendStringInfoString(&cmd, "\' order by t.replay_lsn limit 1");
|
||||
|
||||
pgres = PQexec(pushconn, cmd.data);
|
||||
if (PQresultStatus(pgres) == PGRES_TUPLES_OK && PQntuples(pgres) == 1) {
|
||||
appname = PQgetvalue(pgres, 0, 0);
|
||||
replyptr = PQgetvalue(pgres, 0, 1);
|
||||
bool flag;
|
||||
replylsn = pg_lsn_in_internal(replyptr,&flag);
|
||||
//replylsn = atol(replyptr);
|
||||
state = PQgetvalue(pgres, 0, 2);
|
||||
syncstate = PQgetvalue(pgres, 0, 3);
|
||||
}
|
||||
else if (PQresultStatus(pgres) == PGRES_BAD_RESPONSE ||
|
||||
PQresultStatus(pgres) == PGRES_NONFATAL_ERROR ||
|
||||
PQresultStatus(pgres) == PGRES_FATAL_ERROR)
|
||||
{
|
||||
PQfinish(pushconn);
|
||||
pushconn = NULL;
|
||||
PQclear(pgres);
|
||||
return InvalidXLogRecPtr;
|
||||
}
|
||||
//elog(LOG,"appnamelsn: %x: replylsn %x",lsn,replylsn);
|
||||
if (lsn !=InvalidXLogRecPtr && lsn < replylsn||replylsn == InvalidXLogRecPtr) {
|
||||
replylsn = lsn;
|
||||
}
|
||||
PQclear(pgres);
|
||||
return replylsn;
|
||||
}
|
||||
// pgres = PQexec(pushconn, cmd.data);
|
||||
// if (PQresultStatus(pgres) == PGRES_TUPLES_OK && PQntuples(pgres) == 1) {
|
||||
// appname = PQgetvalue(pgres, 0, 0);
|
||||
// replyptr = PQgetvalue(pgres, 0, 1);
|
||||
// bool flag;
|
||||
// replylsn = pg_lsn_in_internal(replyptr,&flag);
|
||||
// //replylsn = atol(replyptr);
|
||||
// state = PQgetvalue(pgres, 0, 2);
|
||||
// syncstate = PQgetvalue(pgres, 0, 3);
|
||||
// }
|
||||
// else if (PQresultStatus(pgres) == PGRES_BAD_RESPONSE ||
|
||||
// PQresultStatus(pgres) == PGRES_NONFATAL_ERROR ||
|
||||
// PQresultStatus(pgres) == PGRES_FATAL_ERROR)
|
||||
// {
|
||||
// PQfinish(pushconn);
|
||||
// pushconn = NULL;
|
||||
// PQclear(pgres);
|
||||
// return InvalidXLogRecPtr;
|
||||
// }
|
||||
// //elog(LOG,"appnamelsn: %x: replylsn %x",lsn,replylsn);
|
||||
// if (lsn !=InvalidXLogRecPtr && lsn < replylsn||replylsn == InvalidXLogRecPtr) {
|
||||
// replylsn = lsn;
|
||||
// }
|
||||
// PQclear(pgres);
|
||||
// return replylsn;
|
||||
// }
|
||||
|
@ -24,6 +24,7 @@ wal_batch_t *ring_buffer_queue(ring_buffer_t *buffer, wal_batch_t data) {
|
||||
return NULL;
|
||||
}
|
||||
buffer->buffer[buffer->head_index].startLsn = data.startLsn;
|
||||
buffer->buffer[buffer->head_index].endLsn = data.endLsn;
|
||||
pg_atomic_exchange_u32(&buffer->buffer[buffer->head_index].status,(uint32_t)UNKOWNSTATUS);
|
||||
curWal = &buffer->buffer[buffer->head_index];
|
||||
buffer->head_index = ((buffer->head_index + 1) & RING_BUFFER_MASK(buffer));
|
||||
@ -53,7 +54,7 @@ uint8_t ring_buffer_dequeue_arr(ring_buffer_t *buffer, uint32 size) {
|
||||
SpinLockRelease(&buffer->mutex);
|
||||
return 0;
|
||||
}
|
||||
ring_buffer_size_t pos = 0;
|
||||
ring_buffer_size_t pos = buffer->tail_index;
|
||||
for(uint32 i = 0;i<size;i++) {
|
||||
pg_atomic_exchange_u32(&buffer->buffer[pos].status,(uint32_t)UNKOWNSTATUS);
|
||||
pos = ((pos+1) & RING_BUFFER_MASK(buffer));
|
||||
@ -122,10 +123,19 @@ void InitRingBufferSpace(void) {
|
||||
}
|
||||
|
||||
int walRecordQuery(char**buffer,int* curpos,int* maxspace,uint64 lsn) {
|
||||
if (gRingBufferManger->maxIdx == 0) {
|
||||
ring_buffer_size_t maxIdx = gRingBufferManger->maxIdx;
|
||||
if (maxIdx == 0) {
|
||||
return -1;
|
||||
}
|
||||
int low = 0,high = gRingBufferManger->maxIdx, mid = 0;
|
||||
ring_buffer_size_t tailIdx = gRingBufferManger->tail_index;
|
||||
int low = tailIdx,high = ((tailIdx+maxIdx) & RING_BUFFER_MASK(gRingBufferManger)), mid = 0;
|
||||
if (low > high) {
|
||||
if (gRingBufferManger->buffer[gRingBufferManger->buffer_mask].startLsn + gRingBufferManger->buffer[gRingBufferManger->buffer_mask].dataLen > lsn) {
|
||||
high = gRingBufferManger->buffer_mask+1;
|
||||
} else {
|
||||
low = 0;
|
||||
}
|
||||
}
|
||||
if (gRingBufferManger->buffer[high-1].startLsn == 0) {
|
||||
high -= 2;
|
||||
} else {
|
||||
@ -169,7 +179,7 @@ int walRecordQuery(char**buffer,int* curpos,int* maxspace,uint64 lsn) {
|
||||
free(*buffer);
|
||||
*buffer = ptr;
|
||||
}
|
||||
memcpy(*buffer,record,xllen);
|
||||
memcpy(*buffer+*curpos,record,xllen);
|
||||
*curpos += xllen;
|
||||
}
|
||||
return xllen;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,13 @@
|
||||
#include "storage/proc.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "access/nbtxlog.h"
|
||||
#include "access/nbtree.h"
|
||||
#include "access/gistxlog.h"
|
||||
#include "access/gist_private.h"
|
||||
#include "access/spgxlog.h"
|
||||
#include "access/brin_xlog.h"
|
||||
|
||||
/* Buffer size required to store a compressed version of backup block image */
|
||||
#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)
|
||||
@ -69,10 +76,12 @@ static int max_registered_block_id = 0; /* highest block_id + 1 currently
|
||||
|
||||
int group_total_len;
|
||||
int grouo_rec_count;
|
||||
int grouo_rec_cur_count;
|
||||
|
||||
XLogRecord *grouphead[XLR_MAX_BLOCK_ID + 1];
|
||||
int grouplens[XLR_MAX_BLOCK_ID + 1];
|
||||
XLogRecData groupRecData[XLR_MAX_BLOCK_ID + 1];
|
||||
XLogRecPtr groupEndLsn[XLR_MAX_BLOCK_ID + 1];
|
||||
|
||||
/*
|
||||
* A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
|
||||
@ -552,6 +561,459 @@ XLogInsert(RmgrId rmid, uint8 info)
|
||||
return EndPos;
|
||||
}
|
||||
|
||||
static XLogRecData g_bkp_rdatas[XLR_MAX_BLOCK_ID + 1][2];
|
||||
static XLogRecData g_main_data;
|
||||
|
||||
static void extendMainData(XLogReaderState *state) {
|
||||
int extendSize = 64;
|
||||
if (state->main_data_len + extendSize > state->main_data_bufsz)
|
||||
{
|
||||
|
||||
if (state->main_data)
|
||||
pfree(state->main_data);
|
||||
|
||||
/*
|
||||
* main_data_bufsz must be MAXALIGN'ed. In many xlog record
|
||||
* types, we omit trailing struct padding on-disk to save a few
|
||||
* bytes; but compilers may generate accesses to the xlog struct
|
||||
* that assume that padding bytes are present. If the palloc
|
||||
* request is not large enough to include such padding bytes then
|
||||
* we'll get valgrind complaints due to otherwise-harmless fetches
|
||||
* of the padding bytes.
|
||||
*
|
||||
* In addition, force the initial request to be reasonably large
|
||||
* so that we don't waste time with lots of trips through this
|
||||
* stanza. BLCKSZ / 2 seems like a good compromise choice.
|
||||
*/
|
||||
state->main_data_bufsz = MAXALIGN(Max(state->main_data_len + extendSize,
|
||||
BLCKSZ / 2));
|
||||
state->main_data = palloc(state->main_data_bufsz);
|
||||
}
|
||||
}
|
||||
|
||||
static void convertMainData(XLogReaderState *state, OldXLogRecord *record) {
|
||||
RmgrId rmid = record->xl_rmid;
|
||||
uint8 info = (record->xl_info & ~XLR_INFO_MASK);
|
||||
switch(rmid) {
|
||||
case RM_HEAP2_ID:
|
||||
{
|
||||
if ((info & XLOG_HEAP_OPMASK) == XLOG_HEAP2_VISIBLE) {
|
||||
xl_old_heap_visible *xlrec = (xl_old_heap_visible *) XLogRecGetData(state);
|
||||
xl_heap_visible xlrecNew;
|
||||
xlrecNew.rnode = state->blocks[1].rnode;
|
||||
xlrecNew.blkno = state->blocks[1].blkno;
|
||||
xlrecNew.cutoff_xid = xlrec->cutoff_xid;
|
||||
xlrecNew.flags = xlrec->flags;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(xl_heap_visible);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_HEAP_ID:
|
||||
{
|
||||
if (((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_UPDATE) ||
|
||||
((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_HOT_UPDATE)) {
|
||||
xl_old_heap_update *xlrec = (xl_old_heap_update *) XLogRecGetData(state);
|
||||
xl_heap_update xlrecNew;
|
||||
xlrecNew.old_xmax = xlrec->old_xmax;
|
||||
xlrecNew.old_offnum = xlrec->old_offnum;
|
||||
xlrecNew.old_infobits_set = xlrec->old_infobits_set;
|
||||
xlrecNew.flags = xlrec->flags;
|
||||
xlrecNew.new_xmax = xlrec->new_xmax;
|
||||
xlrecNew.new_offnum = xlrec->new_offnum;
|
||||
xlrecNew.newblk = state->blocks[0].blkno;
|
||||
if(state->max_block_id == 0){
|
||||
xlrecNew.oldblk = state->blocks[0].blkno;
|
||||
} else{
|
||||
xlrecNew.oldblk = state->blocks[1].blkno;
|
||||
}
|
||||
xlrecNew.rnode = state->blocks[0].rnode;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(xl_heap_update);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BTREE_ID:
|
||||
{
|
||||
if (info == XLOG_BTREE_SPLIT_L || info == XLOG_BTREE_SPLIT_R) {
|
||||
xl_old_btree_split *xlrec = (xl_old_btree_split *) XLogRecGetData(state);
|
||||
xl_btree_split xlrecNew;
|
||||
xlrecNew.level = xlrec->level;
|
||||
xlrecNew.firstrightoff = xlrec->firstrightoff;
|
||||
xlrecNew.newitemoff = xlrec->newitemoff;
|
||||
xlrecNew.postingoff = xlrec->postingoff;
|
||||
xlrecNew.origpagenumber = state->blocks[0].blkno;
|
||||
xlrecNew.rightpagenumber = state->blocks[1].blkno;
|
||||
if (!XLogRecGetBlockTag(state, 2, NULL, NULL, &xlrecNew.spagenumber))
|
||||
xlrecNew.spagenumber = P_NONE;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(xl_btree_split);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_GIST_ID:
|
||||
{
|
||||
if (info == XLOG_GIST_PAGE_SPLIT) {
|
||||
gistoldxlogPageSplit *xlrec = (gistoldxlogPageSplit *) XLogRecGetData(state);
|
||||
gistxlogPageSplit xlrecNew;
|
||||
xlrecNew.markfollowright = xlrec->markfollowright;
|
||||
xlrecNew.npage = xlrec->npage;
|
||||
xlrecNew.origleaf = xlrec->origleaf;
|
||||
xlrecNew.orignsn = xlrec->orignsn;
|
||||
xlrecNew.origrlink = xlrec->origrlink;
|
||||
xlrecNew.isroot = false;
|
||||
if (xlrec->npage > 0) {
|
||||
if (state->blocks[1].blkno == GIST_ROOT_BLKNO) {
|
||||
xlrecNew.isroot = true;
|
||||
}
|
||||
}
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(gistxlogPageSplit);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_SPGIST_ID:
|
||||
{
|
||||
if (info == XLOG_SPGIST_ADD_LEAF) {
|
||||
spgoldxlogAddLeaf *xlrec = (spgoldxlogAddLeaf *) XLogRecGetData(state);
|
||||
spgxlogAddLeaf xlrecNew;
|
||||
xlrecNew.newPage = xlrec->newPage; /* init dest page? */
|
||||
xlrecNew.storesNulls = xlrec->storesNulls; /* page is in the nulls tree? */
|
||||
xlrecNew.offnumLeaf = xlrec->offnumLeaf; /* offset where leaf tuple gets placed */
|
||||
xlrecNew.offnumHeadLeaf = xlrec->offnumHeadLeaf; /* offset of head tuple in chain, if any */
|
||||
|
||||
xlrecNew.offnumParent = xlrec->offnumParent; /* where the parent downlink is, if any */
|
||||
xlrecNew.nodeI = xlrec->nodeI;
|
||||
xlrecNew.blknoLeaf = state->blocks[0].blkno;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(spgxlogAddLeaf);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
|
||||
} else if (info == XLOG_SPGIST_MOVE_LEAFS) {
|
||||
spgoldxlogMoveLeafs *xlrec = (spgoldxlogMoveLeafs *) XLogRecGetData(state);
|
||||
spgxlogMoveLeafs xlrecNew;
|
||||
xlrecNew.nMoves = xlrec->nMoves; /* number of tuples moved from source page */
|
||||
xlrecNew.newPage = xlrec->newPage; /* init dest page? */
|
||||
xlrecNew.replaceDead = xlrec->replaceDead; /* are we replacing a DEAD source tuple? */
|
||||
xlrecNew.storesNulls = xlrec->storesNulls; /* pages are in the nulls tree? */
|
||||
|
||||
/* where the parent downlink is */
|
||||
xlrecNew.offnumParent = xlrec->offnumParent;
|
||||
xlrecNew.nodeI = xlrec->nodeI;
|
||||
|
||||
xlrecNew.stateSrc = xlrec->stateSrc;
|
||||
|
||||
/* for he3pg */
|
||||
xlrecNew.blknoDst = state->blocks[1].blkno;
|
||||
|
||||
/*----------
|
||||
* data follows:
|
||||
* array of deleted tuple numbers, length nMoves
|
||||
* array of inserted tuple numbers, length nMoves + 1 or 1
|
||||
* list of leaf tuples, length nMoves + 1 or 1 (unaligned!)
|
||||
*
|
||||
* Note: if replaceDead is true then there is only one inserted tuple
|
||||
* number and only one leaf tuple in the data, because we are not copying
|
||||
* the dead tuple from the source
|
||||
*----------
|
||||
*/
|
||||
char* tmp = palloc(state->main_data_len-SizeOfOldSpgxlogMoveLeafs);
|
||||
memcpy(tmp,state->main_data+SizeOfOldSpgxlogMoveLeafs,state->main_data_len-SizeOfOldSpgxlogMoveLeafs);
|
||||
extendMainData(state);
|
||||
memcpy(state->main_data,&xlrecNew,SizeOfSpgxlogMoveLeafs);
|
||||
memcpy(state->main_data + SizeOfSpgxlogMoveLeafs, tmp, state->main_data_len-SizeOfOldSpgxlogMoveLeafs);
|
||||
state->main_data_len += SizeOfSpgxlogMoveLeafs-SizeOfOldSpgxlogMoveLeafs;
|
||||
pfree(tmp);
|
||||
} else if (info == XLOG_SPGIST_ADD_NODE) {
|
||||
spgoldxlogAddNode *xlrec = (spgoldxlogAddNode *) XLogRecGetData(state);
|
||||
spgxlogAddNode xlrecNew;
|
||||
xlrecNew.offnum = xlrec->offnum;
|
||||
/*
|
||||
* Offset of the new tuple, on the new page (on backup block 1). Invalid,
|
||||
* if we overwrote the old tuple in the original page).
|
||||
*/
|
||||
xlrecNew.offnumNew = xlrec->offnumNew;
|
||||
xlrecNew.newPage = xlrec->newPage; /* init new page? */
|
||||
|
||||
/*----
|
||||
* Where is the parent downlink? parentBlk indicates which page it's on,
|
||||
* and offnumParent is the offset within the page. The possible values for
|
||||
* parentBlk are:
|
||||
*
|
||||
* 0: parent == original page
|
||||
* 1: parent == new page
|
||||
* 2: parent == different page (blk ref 2)
|
||||
* -1: parent not updated
|
||||
*----
|
||||
*/
|
||||
xlrecNew.parentBlk = xlrec->parentBlk;
|
||||
xlrecNew.offnumParent = xlrec->offnumParent; /* offset within the parent page */
|
||||
|
||||
xlrecNew.nodeI = xlrec->nodeI;
|
||||
xlrecNew.blkno1 = state->blocks[0].blkno;
|
||||
xlrecNew.stateSrc = xlrec->stateSrc;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(spgxlogAddNode);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
|
||||
} else if (info == XLOG_SPGIST_PICKSPLIT) {
|
||||
spgoldxlogPickSplit *xlrec = (spgoldxlogPickSplit *) XLogRecGetData(state);
|
||||
spgxlogPickSplit xlrecNew;
|
||||
xlrecNew.isRootSplit = xlrec->isRootSplit;
|
||||
|
||||
xlrecNew.nDelete = xlrec->nDelete; /* n to delete from Src */
|
||||
xlrecNew.nInsert = xlrec->nInsert; /* n to insert on Src and/or Dest */
|
||||
xlrecNew.initSrc = xlrec->initSrc; /* re-init the Src page? */
|
||||
xlrecNew.initDest = xlrec->initDest; /* re-init the Dest page? */
|
||||
|
||||
/* for he3pg */
|
||||
xlrecNew.blknoInner = state->blocks[2].blkno;
|
||||
/* where to put new inner tuple */
|
||||
xlrecNew.offnumInner = xlrec->offnumInner;
|
||||
xlrecNew.initInner = xlrec->initInner; /* re-init the Inner page? */
|
||||
|
||||
xlrecNew.storesNulls = xlrec->storesNulls; /* pages are in the nulls tree? */
|
||||
|
||||
/* where the parent downlink is, if any */
|
||||
xlrecNew.innerIsParent = xlrec->innerIsParent; /* is parent the same as inner page? */
|
||||
xlrecNew.offnumParent = xlrec->offnumParent;
|
||||
xlrecNew.nodeI = xlrec->nodeI;
|
||||
|
||||
xlrecNew.stateSrc = xlrec->stateSrc;
|
||||
|
||||
/*----------
|
||||
* data follows:
|
||||
* array of deleted tuple numbers, length nDelete
|
||||
* array of inserted tuple numbers, length nInsert
|
||||
* array of page selector bytes for inserted tuples, length nInsert
|
||||
* new inner tuple (unaligned!)
|
||||
* list of leaf tuples, length nInsert (unaligned!)
|
||||
*----------
|
||||
*/
|
||||
char* tmp = palloc(state->main_data_len-SizeOfOldSpgxlogPickSplit);
|
||||
memcpy(tmp,state->main_data+SizeOfOldSpgxlogPickSplit,state->main_data_len-SizeOfOldSpgxlogPickSplit);
|
||||
extendMainData(state);
|
||||
memcpy(state->main_data,&xlrecNew,SizeOfSpgxlogPickSplit);
|
||||
memcpy(state->main_data + SizeOfSpgxlogPickSplit, tmp, state->main_data_len-SizeOfOldSpgxlogPickSplit);
|
||||
state->main_data_len += SizeOfSpgxlogPickSplit-SizeOfOldSpgxlogPickSplit;
|
||||
pfree(tmp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BRIN_ID:
|
||||
{
|
||||
if (info == XLOG_BRIN_INSERT) {
|
||||
xl_old_brin_insert *xlrec = (xl_old_brin_insert *) XLogRecGetData(state);
|
||||
xl_brin_insert xlrecNew;
|
||||
xlrecNew.heapBlk = xlrec->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecNew.pagesPerRange = xlrec->pagesPerRange;
|
||||
xlrecNew.block0 = state->blocks[0].blkno;
|
||||
/* offset number in the main page to insert the tuple to. */
|
||||
xlrecNew.offnum = xlrec->offnum;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(xl_brin_insert);
|
||||
memcpy(state->main_data,&xlrecNew,state->main_data_len);
|
||||
} else if ( info == XLOG_BRIN_UPDATE) {
|
||||
xl_old_brin_update *xlrec = (xl_old_brin_update *) XLogRecGetData(state);
|
||||
xl_brin_update xlrecUpdate;
|
||||
xl_old_brin_insert *xlrecInsert = &xlrec->insert;
|
||||
xl_brin_insert xlrecNew;
|
||||
xlrecNew.heapBlk = xlrecInsert->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecNew.pagesPerRange = xlrecInsert->pagesPerRange;
|
||||
xlrecNew.block0 = state->blocks[0].blkno;
|
||||
/* offset number in the main page to insert the tuple to. */
|
||||
xlrecNew.offnum = xlrecInsert->offnum;
|
||||
xlrecUpdate.oldOffnum = xlrec->oldOffnum;
|
||||
xlrecUpdate.insert = xlrecNew;
|
||||
extendMainData(state);
|
||||
state->main_data_len = sizeof(xl_brin_update);
|
||||
memcpy(state->main_data,&xlrecUpdate,state->main_data_len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
XLogRecData *DecodeXLogRecordAssemble(XLogReaderState *state, OldXLogRecord *record,
|
||||
XLogRecPtr RedoRecPtr, bool doPageWrites,
|
||||
XLogRecPtr *fpw_lsn, int *num_fpi)
|
||||
{
|
||||
|
||||
/*
|
||||
* Make an rdata chain containing all the data portions of all block
|
||||
* references. This includes the data for full-page images. Also append
|
||||
* the headers for the block references in the scratch buffer.
|
||||
*/
|
||||
RmgrId rmid = record->xl_rmid;
|
||||
uint8 info = record->xl_info;
|
||||
*fpw_lsn = InvalidXLogRecPtr;
|
||||
int block_id;
|
||||
XLogRecord *rechdr = NULL;
|
||||
group_total_len = 0;
|
||||
grouo_rec_count = 0;
|
||||
grouo_rec_cur_count = 0;
|
||||
int maxidx = (state->max_block_id < 0 ? 1:state->max_block_id+1);
|
||||
bool isDone = false;
|
||||
for (block_id = 0; block_id < maxidx; block_id++)
|
||||
{
|
||||
XLogRecData* rdt;
|
||||
uint32 total_len;
|
||||
total_len = 0;
|
||||
pg_crc32c rdata_crc;
|
||||
XLogRecData *rdt_datas_last;
|
||||
char *scratch;
|
||||
// char linkkey[36];
|
||||
groupRecData[grouo_rec_count].next = NULL;
|
||||
rdt_datas_last = &groupRecData[grouo_rec_count];
|
||||
|
||||
scratch = hdr_scratch + grouo_rec_count * SINGLE_SCRATCH_SIZE;
|
||||
groupRecData[grouo_rec_count].data = scratch;
|
||||
/*group_total_len+=HEADER_SCRATCH_SIZE;*/
|
||||
grouphead[grouo_rec_count]=(XLogRecord *)scratch;
|
||||
/* The record begins with the fixed-size header */
|
||||
rechdr = (XLogRecord *)scratch;
|
||||
scratch += SizeOfXLogRecord;
|
||||
if (state->max_block_id >= 0) {
|
||||
DecodedBkpBlock *blkbuf = &state->blocks[block_id];
|
||||
XLogRecData* bkp_rdatas = g_bkp_rdatas[block_id];
|
||||
|
||||
XLogRecordBlockHeader bkpb;
|
||||
XLogRecordBlockImageHeader bimg;
|
||||
XLogRecordBlockCompressHeader cbimg = {0};
|
||||
bkpb.id = 0;
|
||||
bkpb.fork_flags = blkbuf->flags;
|
||||
bkpb.data_length = blkbuf->data_len;
|
||||
//total_len += bkpb.data_length;
|
||||
/* Ok, copy the header to the scratch buffer */
|
||||
memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader);
|
||||
scratch += SizeOfXLogRecordBlockHeader;
|
||||
if (blkbuf->has_image) {
|
||||
bimg.bimg_info = blkbuf->bimg_info;
|
||||
bimg.hole_offset = blkbuf->hole_offset;
|
||||
bimg.length = blkbuf->bimg_len;
|
||||
memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
|
||||
scratch += SizeOfXLogRecordBlockImageHeader;
|
||||
rdt_datas_last->next = &bkp_rdatas[0];
|
||||
rdt_datas_last = rdt_datas_last->next;
|
||||
bkp_rdatas[0].data = blkbuf->bkp_image;
|
||||
bkp_rdatas[0].len = blkbuf->bimg_len;
|
||||
if (bimg.bimg_info & BKPIMAGE_IS_COMPRESSED) {
|
||||
cbimg.hole_length = blkbuf->hole_length;
|
||||
if (bimg.bimg_info & BKPIMAGE_HAS_HOLE) {
|
||||
memcpy(scratch, &cbimg,
|
||||
SizeOfXLogRecordBlockCompressHeader);
|
||||
scratch += SizeOfXLogRecordBlockCompressHeader;
|
||||
}
|
||||
}
|
||||
total_len += bimg.length;
|
||||
*num_fpi += 1;
|
||||
}
|
||||
if (blkbuf->has_data) {
|
||||
rdt_datas_last->next = &bkp_rdatas[1];
|
||||
rdt_datas_last = rdt_datas_last->next;
|
||||
bkp_rdatas[1].data = blkbuf->data;
|
||||
bkp_rdatas[1].len = blkbuf->data_len;
|
||||
total_len += blkbuf->data_len;
|
||||
}
|
||||
memcpy(scratch, &blkbuf->rnode, sizeof(RelFileNode));
|
||||
scratch += sizeof(RelFileNode);
|
||||
|
||||
memcpy(scratch, &blkbuf->blkno, sizeof(BlockNumber));
|
||||
scratch += sizeof(BlockNumber);
|
||||
}
|
||||
|
||||
if (state->record_origin != InvalidRepOriginId) {
|
||||
*(scratch++) = (char)XLR_BLOCK_ID_ORIGIN;
|
||||
memcpy(scratch, &state->record_origin, sizeof(RepOriginId));
|
||||
scratch += sizeof(RepOriginId);
|
||||
}
|
||||
|
||||
if (state->toplevel_xid != InvalidTransactionId) {
|
||||
*(scratch++) = (char)XLR_BLOCK_ID_TOPLEVEL_XID;
|
||||
memcpy(scratch, &state->toplevel_xid, sizeof(TransactionId));
|
||||
scratch += sizeof(TransactionId);
|
||||
}
|
||||
|
||||
if (state->main_data_len > 0) {
|
||||
rdt_datas_last->next = &g_main_data;
|
||||
rdt_datas_last = &g_main_data;
|
||||
if (isDone == false) {
|
||||
convertMainData(state,record);
|
||||
g_main_data.data = state->main_data;
|
||||
g_main_data.len = state->main_data_len;
|
||||
isDone = true;
|
||||
}
|
||||
if (state->main_data_len > 255) {
|
||||
*(scratch++) = (char)XLR_BLOCK_ID_DATA_LONG;
|
||||
memcpy(scratch, &state->main_data_len, sizeof(uint32));
|
||||
scratch += sizeof(uint32);
|
||||
} else {
|
||||
*(scratch++) = (char)XLR_BLOCK_ID_DATA_SHORT;
|
||||
*(scratch++) = (uint8)state->main_data_len;
|
||||
}
|
||||
total_len += state->main_data_len;
|
||||
}
|
||||
|
||||
rdt_datas_last->next = NULL;
|
||||
groupRecData[grouo_rec_count].len = scratch - groupRecData[grouo_rec_count].data;
|
||||
total_len += groupRecData[grouo_rec_count].len;
|
||||
grouplens[grouo_rec_count] = total_len;
|
||||
|
||||
/*
|
||||
|
||||
in func CopyXLogRecordToWAL, we need freespace >= sizeof(uint32). so size of xlog size must be Maxalignalign
|
||||
|
||||
|
||||
*/
|
||||
/*total_len=MAXALIGN(total_len);*/
|
||||
|
||||
/*
|
||||
* Calculate CRC of the data
|
||||
*
|
||||
* Note that the record header isn't added into the CRC initially since we
|
||||
* don't know the prev-link yet. Thus, the CRC will represent the CRC of
|
||||
* the whole record in the order: rdata, then backup blocks, then record
|
||||
* header.
|
||||
*/
|
||||
|
||||
INIT_CRC32C(rdata_crc);
|
||||
|
||||
COMP_CRC32C(rdata_crc, groupRecData[grouo_rec_count].data + SizeOfXLogRecord, groupRecData[grouo_rec_count].len - SizeOfXLogRecord);
|
||||
|
||||
rdt = groupRecData[grouo_rec_count].next;
|
||||
|
||||
for (; rdt != NULL; rdt = rdt->next)
|
||||
COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
|
||||
|
||||
/*
|
||||
* Fill in the fields in the record header. Prev-link is filled in later,
|
||||
* once we know where in the WAL the record will be inserted. The CRC does
|
||||
* not include the record header yet.
|
||||
*/
|
||||
rechdr->xl_xid = record->xl_xid;
|
||||
rechdr->xl_tot_len = total_len;
|
||||
rechdr->xl_info = info;
|
||||
rechdr->xl_rmid = rmid;
|
||||
rechdr->xl_prev = InvalidXLogRecPtr;
|
||||
rechdr->xl_crc = rdata_crc;
|
||||
rechdr->blocknum = block_id;
|
||||
rechdr->mtr = false;
|
||||
|
||||
group_total_len += total_len;
|
||||
grouo_rec_count++;
|
||||
}
|
||||
rechdr->mtr = true;
|
||||
|
||||
return &groupRecData[0];
|
||||
}
|
||||
/*
|
||||
* Assemble a WAL record from the registered data and buffers into an
|
||||
* XLogRecData chain, ready for insertion with XLogInsertRecord().
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -632,13 +632,15 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
else
|
||||
{
|
||||
/* hm, page doesn't exist in file */
|
||||
/*if (mode == RBM_NORMAL)
|
||||
if(!he3mirror){
|
||||
if (mode == RBM_NORMAL && EnableHotStandby != false)
|
||||
{
|
||||
log_invalid_page(rnode, forknum, blkno, false);
|
||||
return InvalidBuffer;
|
||||
}*/
|
||||
}
|
||||
if (mode == RBM_NORMAL_NO_LOG)
|
||||
return InvalidBuffer;
|
||||
}
|
||||
/* OK to extend the file */
|
||||
/* we do this in recovery only - no rel-extension lock needed */
|
||||
Assert(InRecovery);
|
||||
@ -666,7 +668,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == RBM_NORMAL)
|
||||
if (!he3mirror && mode == RBM_NORMAL)
|
||||
{
|
||||
/* check that page has been initialized */
|
||||
Page page = (Page) BufferGetPage(buffer);
|
||||
@ -989,38 +991,38 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
|
||||
// }
|
||||
|
||||
/* XLogReaderRoutine->segment_open callback for local pg_wal files */
|
||||
// void
|
||||
// wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
|
||||
// TimeLineID *tli_p)
|
||||
// {
|
||||
// TimeLineID tli = *tli_p;
|
||||
// char path[MAXPGPATH];
|
||||
void
|
||||
wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
|
||||
TimeLineID *tli_p)
|
||||
{
|
||||
TimeLineID tli = *tli_p;
|
||||
char path[MAXPGPATH];
|
||||
|
||||
// XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
|
||||
// state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
|
||||
// if (state->seg.ws_file >= 0)
|
||||
// return;
|
||||
XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
|
||||
state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
|
||||
if (state->seg.ws_file >= 0)
|
||||
return;
|
||||
|
||||
// if (errno == ENOENT)
|
||||
// ereport(ERROR,
|
||||
// (errcode_for_file_access(),
|
||||
// errmsg("requested WAL segment %s has already been removed",
|
||||
// path)));
|
||||
// else
|
||||
// ereport(ERROR,
|
||||
// (errcode_for_file_access(),
|
||||
// errmsg("could not open file \"%s\": %m",
|
||||
// path)));
|
||||
// }
|
||||
if (errno == ENOENT)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("requested WAL segment %s has already been removed",
|
||||
path)));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m",
|
||||
path)));
|
||||
}
|
||||
|
||||
/* stock XLogReaderRoutine->segment_close callback */
|
||||
// void
|
||||
// wal_segment_close(XLogReaderState *state)
|
||||
// {
|
||||
// close(state->seg.ws_file);
|
||||
// /* need to check errno? */
|
||||
// state->seg.ws_file = -1;
|
||||
// }
|
||||
void
|
||||
wal_segment_close(XLogReaderState *state)
|
||||
{
|
||||
close(state->seg.ws_file);
|
||||
/* need to check errno? */
|
||||
state->seg.ws_file = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* XLogReaderRoutine->page_read callback for reading local xlog files
|
||||
@ -1158,7 +1160,7 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
|
||||
|
||||
int
|
||||
read_local_xlog_batch(XLogReaderState *state,
|
||||
int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
|
||||
XLogRecPtr targetRecPtr, int reqLen, char *cur_page)
|
||||
{
|
||||
XLogRecPtr read_upto,
|
||||
loc;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "postmaster/bgwriter.h"
|
||||
#include "postmaster/startup.h"
|
||||
#include "postmaster/walwriter.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "replication/walreceiver.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
@ -44,8 +44,7 @@ OBJS = \
|
||||
pg_subscription.o \
|
||||
pg_type.o \
|
||||
storage.o \
|
||||
toasting.o \
|
||||
pg_hot_data.o
|
||||
toasting.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
||||
@ -70,7 +69,7 @@ CATALOG_HEADERS := \
|
||||
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
|
||||
pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
|
||||
pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \
|
||||
pg_subscription_rel.h pg_stat_share_storage.h pg_hot_data.h
|
||||
pg_subscription_rel.h pg_stat_share_storage.h
|
||||
|
||||
GENERATED_HEADERS := $(CATALOG_HEADERS:%.h=%_d.h) schemapg.h system_fk_info.h
|
||||
|
||||
|
@ -40,7 +40,6 @@
|
||||
#include "catalog/pg_stat_share_storage.h"
|
||||
#include "catalog/pg_tablespace.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "catalog/pg_hot_data.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/fd.h"
|
||||
#include "utils/fmgroids.h"
|
||||
@ -248,7 +247,6 @@ IsSharedRelation(Oid relationId)
|
||||
if (relationId == AuthIdRelationId ||
|
||||
relationId == AuthMemRelationId ||
|
||||
relationId == DatabaseRelationId ||
|
||||
relationId == HotDataRelationId ||
|
||||
relationId == SharedDescriptionRelationId ||
|
||||
relationId == SharedDependRelationId ||
|
||||
relationId == SharedSecLabelRelationId ||
|
||||
@ -265,7 +263,6 @@ IsSharedRelation(Oid relationId)
|
||||
relationId == AuthMemMemRoleIndexId ||
|
||||
relationId == DatabaseNameIndexId ||
|
||||
relationId == DatabaseOidIndexId ||
|
||||
relationId == HotDataDatnameRelnameIndexId ||
|
||||
relationId == SharedDescriptionObjIndexId ||
|
||||
relationId == SharedDependDependerIndexId ||
|
||||
relationId == SharedDependReferenceIndexId ||
|
||||
|
@ -1,276 +0,0 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_hot_data.c
|
||||
* for hot data precache
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
#include "catalog/pg_hot_data.h"
|
||||
#include "libpq-fe.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "utils/timestamp.h"
|
||||
#include "access/xlog.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void PrecacheHotData()
|
||||
{
|
||||
char instanceName[NAMEDATALEN]; //default:master
|
||||
char primaryHost[16]; //default:127.0.0.1
|
||||
char primaryUser[NAMEDATALEN]; //default:postgres
|
||||
char primaryPw[NAMEDATALEN]; //default:123456
|
||||
char primaryPort[8]; //default:PostPortNumber
|
||||
char localPort[8]; //default:master
|
||||
StringInfoData cmd, primaryConnStr, localConnStr;
|
||||
|
||||
initStringInfo(&cmd);
|
||||
initStringInfo(&primaryConnStr);
|
||||
initStringInfo(&localConnStr);
|
||||
|
||||
memset(instanceName, 0, NAMEDATALEN);
|
||||
memset(primaryHost, 0, 16);
|
||||
memset(primaryUser, 0, NAMEDATALEN);
|
||||
memset(primaryPw, 0, NAMEDATALEN);
|
||||
memset(primaryPort, 0, 8);
|
||||
memset(localPort, 0, 8);
|
||||
|
||||
//parse
|
||||
if (strlen(PrimaryConnInfo) > 0)
|
||||
{
|
||||
char *temStr;
|
||||
char *temChr;
|
||||
int temStrLen;
|
||||
|
||||
//instanceName
|
||||
temStr = strstr(PrimaryConnInfo, "application_name=");
|
||||
temStrLen = strlen("application_name=");
|
||||
|
||||
if (temStr != NULL)
|
||||
{
|
||||
temChr = strchr(temStr, ' ');
|
||||
if (temChr != NULL)
|
||||
{
|
||||
memcpy(instanceName, temStr + temStrLen, temChr - temStr - temStrLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(instanceName, temStr + temStrLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(instanceName, "master");
|
||||
}
|
||||
|
||||
//primaryHost
|
||||
temStr = strstr(PrimaryConnInfo, "host=");
|
||||
temStrLen = strlen("host=");
|
||||
|
||||
if (temStr != NULL)
|
||||
{
|
||||
temChr = strchr(temStr, ' ');
|
||||
if (temChr != NULL)
|
||||
{
|
||||
memcpy(primaryHost, temStr + temStrLen, temChr - temStr - temStrLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryHost, temStr + temStrLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryHost, "127.0.0.1");
|
||||
}
|
||||
|
||||
//primaryUser
|
||||
temStr = strstr(PrimaryConnInfo, "user=");
|
||||
temStrLen = strlen("user=");
|
||||
|
||||
if (temStr != NULL)
|
||||
{
|
||||
temChr = strchr(temStr, ' ');
|
||||
if (temChr != NULL)
|
||||
{
|
||||
memcpy(primaryUser, temStr + temStrLen, temChr - temStr - temStrLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryUser, temStr + temStrLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryUser, "postgres");
|
||||
}
|
||||
|
||||
//primaryPw
|
||||
temStr = strstr(PrimaryConnInfo, "password=");
|
||||
temStrLen = strlen("password=");
|
||||
|
||||
if (temStr != NULL)
|
||||
{
|
||||
temChr = strchr(temStr, ' ');
|
||||
if (temChr != NULL)
|
||||
{
|
||||
memcpy(primaryPw, temStr + temStrLen, temChr - temStr - temStrLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryPw, temStr + temStrLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryPw, "123456");
|
||||
}
|
||||
|
||||
//primaryPort
|
||||
temStr = strstr(PrimaryConnInfo, "port=");
|
||||
temStrLen = strlen("port=");
|
||||
|
||||
if (temStr != NULL)
|
||||
{
|
||||
temChr = strchr(temStr, ' ');
|
||||
if (temChr != NULL)
|
||||
{
|
||||
memcpy(primaryPort, temStr + temStrLen, temChr - temStr - temStrLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(primaryPort, temStr + temStrLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(primaryPort, "%d", PostPortNumber);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy(instanceName, "master");
|
||||
strcpy(primaryHost, "127.0.0.1");
|
||||
strcpy(primaryUser, "postgres");
|
||||
strcpy(primaryPw, "123456");
|
||||
sprintf(primaryPort, "%d", PostPortNumber);
|
||||
}
|
||||
|
||||
//assemble primaryConnStr
|
||||
appendStringInfoString(&primaryConnStr, "host=");
|
||||
appendStringInfoString(&primaryConnStr, primaryHost);
|
||||
appendStringInfoString(&primaryConnStr, " user=");
|
||||
appendStringInfoString(&primaryConnStr, primaryUser);
|
||||
appendStringInfoString(&primaryConnStr, " password=");
|
||||
appendStringInfoString(&primaryConnStr, primaryPw);
|
||||
appendStringInfoString(&primaryConnStr, " port=");
|
||||
appendStringInfoString(&primaryConnStr, primaryPort);
|
||||
appendStringInfoString(&primaryConnStr, " dbname=postgres");
|
||||
|
||||
//conn local
|
||||
sprintf(localPort, "%d", PostPortNumber);
|
||||
appendStringInfoString(&localConnStr, "host=127.0.0.1 port=");
|
||||
appendStringInfoString(&localConnStr, localPort);
|
||||
appendStringInfoString(&localConnStr, " user=postgres dbname=postgres");
|
||||
PGconn *localConn = PQconnectdb(localConnStr.data);
|
||||
if (PQstatus(localConn) != CONNECTION_OK)
|
||||
{
|
||||
PQfinish(localConn);
|
||||
//log
|
||||
return;
|
||||
}
|
||||
|
||||
appendStringInfoString(&cmd, "SELECT datname, relname, crules FROM pg_hot_data WHERE crulessettime>cachetime AND clientname='");
|
||||
appendStringInfoString(&cmd, instanceName);
|
||||
appendStringInfoString(&cmd, "'");
|
||||
|
||||
//Query the corresponding precache policy
|
||||
PGresult *ruleRes = PQexec(localConn, cmd.data);
|
||||
if (PQresultStatus(ruleRes) != PGRES_TUPLES_OK)
|
||||
{
|
||||
PQclear(ruleRes);
|
||||
PQfinish(localConn);
|
||||
//log
|
||||
return;
|
||||
}
|
||||
int rows = PQntuples(ruleRes);
|
||||
for(int i=0; i<rows; i++)
|
||||
{
|
||||
char *datname;
|
||||
char *relname;
|
||||
char *crules;
|
||||
datname = PQgetvalue(ruleRes, i, 0);
|
||||
relname = PQgetvalue(ruleRes, i, 1);
|
||||
crules = PQgetvalue(ruleRes, i, 2);
|
||||
|
||||
//precache hot data(table level)
|
||||
if (strcmp(crules, "t") == 0)
|
||||
{
|
||||
//precache
|
||||
resetStringInfo(&localConnStr);
|
||||
appendStringInfoString(&localConnStr, "host=127.0.0.1 port=");
|
||||
appendStringInfoString(&localConnStr, localPort);
|
||||
appendStringInfoString(&localConnStr, " user=postgres dbname=");
|
||||
appendStringInfoString(&localConnStr, datname);
|
||||
PGconn *precacheConn = PQconnectdb(localConnStr.data);
|
||||
if (PQstatus(precacheConn) != CONNECTION_OK)
|
||||
{
|
||||
PQfinish(precacheConn);
|
||||
//log
|
||||
continue;
|
||||
}
|
||||
resetStringInfo(&cmd);
|
||||
appendStringInfoString(&cmd, "precache select * from ");
|
||||
appendStringInfoString(&cmd, relname);
|
||||
|
||||
PGresult *precacheRes = PQexec(precacheConn, cmd.data);
|
||||
if (PQresultStatus(precacheRes) != PGRES_TUPLES_OK)
|
||||
{
|
||||
PQclear(precacheRes);
|
||||
PQfinish(precacheConn);
|
||||
//log
|
||||
continue;
|
||||
}
|
||||
|
||||
PQclear(precacheRes);
|
||||
PQfinish(precacheConn);
|
||||
|
||||
//update primary pg_hot_data
|
||||
const char* currentTime = NULL;
|
||||
currentTime = timestamptz_to_str(GetCurrentTimestamp());
|
||||
resetStringInfo(&cmd);
|
||||
appendStringInfoString(&cmd, "UPDATE pg_hot_data SET cachetime='");
|
||||
appendStringInfoString(&cmd, currentTime);
|
||||
appendStringInfoString(&cmd, "' WHERE datname='");
|
||||
appendStringInfoString(&cmd, datname);
|
||||
appendStringInfoString(&cmd, "' AND relname='");
|
||||
appendStringInfoString(&cmd, relname);
|
||||
appendStringInfoString(&cmd, "' AND crules='");
|
||||
appendStringInfoString(&cmd, crules);
|
||||
appendStringInfoString(&cmd, "' AND clientname='");
|
||||
appendStringInfoString(&cmd, instanceName);
|
||||
appendStringInfoString(&cmd, "'");
|
||||
|
||||
PGconn *primaryConn = PQconnectdb(primaryConnStr.data);
|
||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||
{
|
||||
PQfinish(primaryConn);
|
||||
//log
|
||||
continue;
|
||||
}
|
||||
PGresult *updateRes=PQexec(primaryConn, cmd.data);
|
||||
if (PQresultStatus(updateRes) != PGRES_TUPLES_OK)
|
||||
{
|
||||
PQclear(updateRes);
|
||||
PQfinish(primaryConn);
|
||||
//log
|
||||
continue;
|
||||
}
|
||||
PQclear(updateRes);
|
||||
PQfinish(primaryConn);
|
||||
}
|
||||
}
|
||||
PQclear(ruleRes);
|
||||
PQfinish(localConn);
|
||||
}
|
@ -33,6 +33,7 @@
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/guc.h"
|
||||
|
||||
/* GUC variables */
|
||||
int wal_skip_threshold = 2048; /* in kilobytes */
|
||||
@ -924,7 +925,7 @@ smgr_redo(XLogReaderState *record)
|
||||
reln = smgropen(xlrec->rnode, InvalidBackendId);
|
||||
|
||||
/* He3DB: propeller instance and He3DB slave instance not create rel file*/
|
||||
if (!EnableHotStandby)
|
||||
if (!EnableHotStandby || he3mirror)
|
||||
{
|
||||
smgrcreate(reln, xlrec->forkNum, true);
|
||||
}
|
||||
@ -948,7 +949,7 @@ smgr_redo(XLogReaderState *record)
|
||||
* log as best we can until the drop is seen.
|
||||
*/
|
||||
/* He3DB: propeller instance and He3DB slave instance not create rel file*/
|
||||
if (!EnableHotStandby)
|
||||
if (!EnableHotStandby || he3mirror)
|
||||
{
|
||||
smgrcreate(reln, MAIN_FORKNUM, true);
|
||||
}
|
||||
@ -1007,7 +1008,7 @@ smgr_redo(XLogReaderState *record)
|
||||
}
|
||||
|
||||
/* Do the real work to truncate relation forks */
|
||||
if (nforks > 0)
|
||||
if (nforks > 0 && !EnableHotStandby)
|
||||
smgrtruncate(reln, forks, nforks, blocks);
|
||||
|
||||
/*
|
||||
|
@ -1557,7 +1557,7 @@ ExecutePlan(EState *estate,
|
||||
if (TupIsNull(slot))
|
||||
break;
|
||||
|
||||
if (!isPreCache)
|
||||
if (!isPreCacheTable && !isPreCacheIndex)
|
||||
{
|
||||
/*
|
||||
* If we have a junk filter, then project a new tuple with the junk
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include "utils/rel.h"
|
||||
#include "utils/snapmgr.h"
|
||||
#include "utils/spccache.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
|
||||
static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
|
||||
@ -81,6 +82,28 @@ BitmapHeapNext(BitmapHeapScanState *node)
|
||||
ParallelBitmapHeapState *pstate = node->pstate;
|
||||
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
|
||||
|
||||
/* set preCacheNodeOid */
|
||||
if (isPreCacheIndex && preCacheNodeOid == 0)
|
||||
{
|
||||
preCacheNodeOid = ((BitmapIndexScanState *)((PlanState *)(node))->lefttree)->biss_ScanDesc->indexRelation->rd_node.relNode;
|
||||
if (isPreCacheAction)
|
||||
{
|
||||
preCacheNodesPtr[(*preCacheNodesCountPtr)++] = preCacheNodeOid;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < *preCacheNodesCountPtr; i++)
|
||||
{
|
||||
if (preCacheNodesPtr[i] == preCacheNodeOid)
|
||||
{
|
||||
preCacheNodesPtr[i] = preCacheNodesPtr[*preCacheNodesCountPtr - 1];
|
||||
(*preCacheNodesCountPtr)--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* extract necessary information from index scan node
|
||||
*/
|
||||
|
@ -66,6 +66,28 @@ IndexOnlyNext(IndexOnlyScanState *node)
|
||||
TupleTableSlot *slot;
|
||||
ItemPointer tid;
|
||||
|
||||
/* set preCacheNodeOid */
|
||||
if (isPreCacheIndex && preCacheNodeOid == 0)
|
||||
{
|
||||
preCacheNodeOid = node->ioss_RelationDesc->rd_node.relNode;
|
||||
if (isPreCacheAction)
|
||||
{
|
||||
preCacheNodesPtr[(*preCacheNodesCountPtr)++] = preCacheNodeOid;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < *preCacheNodesCountPtr; i++)
|
||||
{
|
||||
if (preCacheNodesPtr[i] == preCacheNodeOid)
|
||||
{
|
||||
preCacheNodesPtr[i] = preCacheNodesPtr[*preCacheNodesCountPtr - 1];
|
||||
(*preCacheNodesCountPtr)--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* extract necessary information from index scan node
|
||||
*/
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/rel.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
/*
|
||||
* When an ordering operator is used, tuples fetched from the index that
|
||||
@ -86,6 +87,28 @@ IndexNext(IndexScanState *node)
|
||||
IndexScanDesc scandesc;
|
||||
TupleTableSlot *slot;
|
||||
|
||||
/* set preCacheNodeOid */
|
||||
if (isPreCacheIndex && preCacheNodeOid == 0)
|
||||
{
|
||||
preCacheNodeOid = node->iss_RelationDesc->rd_node.relNode;
|
||||
if (isPreCacheAction)
|
||||
{
|
||||
preCacheNodesPtr[(*preCacheNodesCountPtr)++] = preCacheNodeOid;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < *preCacheNodesCountPtr; i++)
|
||||
{
|
||||
if (preCacheNodesPtr[i] == preCacheNodeOid)
|
||||
{
|
||||
preCacheNodesPtr[i] = preCacheNodesPtr[*preCacheNodesCountPtr - 1];
|
||||
(*preCacheNodesCountPtr)--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* extract necessary information from index scan node
|
||||
*/
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "executor/execdebug.h"
|
||||
#include "executor/nodeSeqscan.h"
|
||||
#include "utils/rel.h"
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
static TupleTableSlot *SeqNext(SeqScanState *node);
|
||||
|
||||
@ -54,6 +55,28 @@ SeqNext(SeqScanState *node)
|
||||
ScanDirection direction;
|
||||
TupleTableSlot *slot;
|
||||
|
||||
/* set preCacheTableNode */
|
||||
if (isPreCacheTable && preCacheNodeOid == 0)
|
||||
{
|
||||
preCacheNodeOid = node->ss.ss_currentRelation->rd_node.relNode;
|
||||
if (isPreCacheAction)
|
||||
{
|
||||
preCacheNodesPtr[(*preCacheNodesCountPtr)++] = preCacheNodeOid;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < *preCacheNodesCountPtr; i++)
|
||||
{
|
||||
if (preCacheNodesPtr[i] == preCacheNodeOid)
|
||||
{
|
||||
preCacheNodesPtr[i] = preCacheNodesPtr[*preCacheNodesCountPtr - 1];
|
||||
(*preCacheNodesCountPtr)--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* get information from the estate and scan state
|
||||
*/
|
||||
|
@ -21,6 +21,7 @@ OBJS = \
|
||||
interrupt.o \
|
||||
pgarch.o \
|
||||
pgstat.o \
|
||||
secondbuffer.o \
|
||||
postmaster.o \
|
||||
startup.o \
|
||||
syslogger.o \
|
||||
|
@ -114,6 +114,7 @@
|
||||
#include "postmaster/interrupt.h"
|
||||
#include "postmaster/pgarch.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "postmaster/syslogger.h"
|
||||
#include "replication/logicallauncher.h"
|
||||
#include "replication/walsender.h"
|
||||
@ -257,8 +258,9 @@ static pid_t StartupPID = 0,
|
||||
AutoVacPID = 0,
|
||||
PgArchPID = 0,
|
||||
PgStatPID = 0,
|
||||
SysLoggerPID = 0,
|
||||
SecondBufferPID = 0;
|
||||
SecondBufferPID = 0,
|
||||
CleanLogIndexPID = 0,
|
||||
SysLoggerPID = 0;
|
||||
|
||||
/* Startup process's status */
|
||||
typedef enum
|
||||
@ -566,7 +568,9 @@ static void ShmemBackendArrayRemove(Backend *bn);
|
||||
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
|
||||
#define StartWalWriter() StartChildProcess(WalWriterProcess)
|
||||
#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
|
||||
#define StartSecondBuffer() StartChildProcess(SecondBufferProcess)
|
||||
#define StartSecondBuffer() StartChildProcess(SecondBufferProcess)
|
||||
#define StartCleanLogIndex() StartChildProcess(CleanLogIndexProcess)
|
||||
|
||||
/* Macros to check exit status of a child process */
|
||||
#define EXIT_STATUS_0(st) ((st) == 0)
|
||||
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
|
||||
@ -1779,6 +1783,10 @@ ServerLoop(void)
|
||||
CheckpointerPID = StartCheckpointer();
|
||||
if (BgWriterPID == 0)
|
||||
BgWriterPID = StartBackgroundWriter();
|
||||
if (CleanLogIndexPID == 0)
|
||||
CleanLogIndexPID = StartCleanLogIndex();
|
||||
if (SecondBufferPID == 0)
|
||||
SecondBufferPID = StartSecondBuffer();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1789,8 +1797,8 @@ ServerLoop(void)
|
||||
if (WalWriterPID == 0 && pmState == PM_RUN)
|
||||
WalWriterPID = StartWalWriter();
|
||||
|
||||
if(SecondBufferPID == 0 && pmState == PM_RUN)
|
||||
SecondBufferPID = StartSecondBuffer();
|
||||
// if(SecondBufferPID == 0 && pmState == PM_RUN)
|
||||
// SecondBufferPID = StartSecondBuffer();
|
||||
|
||||
/*
|
||||
* If we have lost the autovacuum launcher, try to start a new one. We
|
||||
@ -2744,7 +2752,10 @@ SIGHUP_handler(SIGNAL_ARGS)
|
||||
if (PgStatPID != 0)
|
||||
signal_child(PgStatPID, SIGHUP);
|
||||
if (SecondBufferPID != 0)
|
||||
signal_child(SecondBufferPID, SIGHUP); //重新加载配置后重启进程?
|
||||
signal_child(SecondBufferPID, SIGHUP);
|
||||
if (CleanLogIndexPID != 0 )
|
||||
signal_child(CleanLogIndexPID, SIGHUP);
|
||||
|
||||
|
||||
/* Reload authentication config files too */
|
||||
if (!load_hba())
|
||||
@ -3066,6 +3077,8 @@ reaper(SIGNAL_ARGS)
|
||||
WalWriterPID = StartWalWriter();
|
||||
if (SecondBufferPID == 0)
|
||||
SecondBufferPID = StartSecondBuffer(); //作用?
|
||||
if (CleanLogIndexPID == 0)
|
||||
CleanLogIndexPID = StartCleanLogIndex();
|
||||
|
||||
/*
|
||||
* Likewise, start other special children as needed. In a restart
|
||||
@ -3179,13 +3192,13 @@ reaper(SIGNAL_ARGS)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pid == SecondBufferPID)
|
||||
{
|
||||
SecondBufferPID = 0;
|
||||
if (!EXIT_STATUS_0(exitstatus))
|
||||
HandleChildCrash(pid, exitstatus,
|
||||
_("second buffer process"));
|
||||
}
|
||||
// if (pid == SecondBufferPID)
|
||||
// {
|
||||
// SecondBufferPID = 0;
|
||||
// if (!EXIT_STATUS_0(exitstatus))
|
||||
// HandleChildCrash(pid, exitstatus,
|
||||
// _("second buffer process"));
|
||||
// }
|
||||
|
||||
/*
|
||||
* Was it the wal receiver? If exit status is zero (normal) or one
|
||||
@ -3663,18 +3676,6 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
||||
signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
|
||||
}
|
||||
|
||||
/* Take care of the walwriter too*/
|
||||
if (pid == SecondBufferPID)
|
||||
SecondBufferPID = 0;
|
||||
else if (SecondBufferPID != 0 && take_action)
|
||||
{
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("sending %s to process %d",
|
||||
(SendStop ? "SIGSTOP" : "SIGQUIT"),
|
||||
(int) SecondBufferPID)));
|
||||
signal_child(SecondBufferPID, (SendStop ? SIGSTOP : SIGQUIT));
|
||||
}
|
||||
|
||||
/* Take care of the walreceiver too */
|
||||
if (pid == WalReceiverPID)
|
||||
WalReceiverPID = 0;
|
||||
@ -3726,7 +3727,29 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
||||
signal_child(PgStatPID, SIGQUIT);
|
||||
allow_immediate_pgstat_restart();
|
||||
}
|
||||
/* Take care of the clean logindex too */
|
||||
if (pid == CleanLogIndexPID)
|
||||
CleanLogIndexPID = 0;
|
||||
else if (CleanLogIndexPID != 0 && take_action)
|
||||
{
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("sending %s to process %d",
|
||||
(SendStop ? "SIGSTOP" : "SIGQUIT"),
|
||||
(int) CleanLogIndexPID)));
|
||||
signal_child(CleanLogIndexPID, (SendStop ? SIGSTOP : SIGQUIT));
|
||||
}
|
||||
|
||||
/* Take care of the walwriter too*/
|
||||
if (pid == SecondBufferPID)
|
||||
SecondBufferPID = 0;
|
||||
else if (SecondBufferPID != 0 && take_action)
|
||||
{
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("sending %s to process %d",
|
||||
(SendStop ? "SIGSTOP" : "SIGQUIT"),
|
||||
(int) SecondBufferPID)));
|
||||
signal_child(SecondBufferPID, (SendStop ? SIGSTOP : SIGQUIT));
|
||||
}
|
||||
/* We do NOT restart the syslogger */
|
||||
|
||||
if (Shutdown != ImmediateShutdown)
|
||||
@ -3869,14 +3892,16 @@ PostmasterStateMachine(void)
|
||||
/* and the walwriter too */
|
||||
if (WalWriterPID != 0)
|
||||
signal_child(WalWriterPID, SIGTERM);
|
||||
/*and the secondbuffer too*/
|
||||
if (SecondBufferPID != 0)
|
||||
signal_child(SecondBufferPID,SIGTERM);
|
||||
/* If we're in recovery, also stop startup and walreceiver procs */
|
||||
if (StartupPID != 0)
|
||||
signal_child(StartupPID, SIGTERM);
|
||||
if (WalReceiverPID != 0)
|
||||
signal_child(WalReceiverPID, SIGTERM);
|
||||
if (CleanLogIndexPID != 0)
|
||||
signal_child(CleanLogIndexPID, SIGTERM);
|
||||
/*and the secondbuffer too*/
|
||||
if (SecondBufferPID != 0)
|
||||
signal_child(SecondBufferPID,SIGTERM);
|
||||
/* checkpointer, archiver, stats, and syslogger may continue for now */
|
||||
|
||||
|
||||
@ -3905,7 +3930,6 @@ PostmasterStateMachine(void)
|
||||
StartupPID == 0 &&
|
||||
WalReceiverPID == 0 &&
|
||||
BgWriterPID == 0 &&
|
||||
SecondBufferPID == 0 &&
|
||||
(CheckpointerPID == 0 ||
|
||||
(!FatalError && Shutdown < ImmediateShutdown)) &&
|
||||
WalWriterPID == 0 &&
|
||||
@ -4002,7 +4026,6 @@ PostmasterStateMachine(void)
|
||||
Assert(BgWriterPID == 0);
|
||||
Assert(CheckpointerPID == 0);
|
||||
Assert(WalWriterPID == 0);
|
||||
Assert(SecondBufferPID == 0);
|
||||
Assert(AutoVacPID == 0);
|
||||
/* syslogger is not considered here */
|
||||
pmState = PM_NO_CHILDREN;
|
||||
@ -4209,8 +4232,6 @@ TerminateChildren(int signal)
|
||||
signal_child(CheckpointerPID, signal);
|
||||
if (WalWriterPID != 0)
|
||||
signal_child(WalWriterPID, signal);
|
||||
if (SecondBufferPID != 0)
|
||||
signal_child(SecondBufferPID,signal);
|
||||
if (WalReceiverPID != 0)
|
||||
signal_child(WalReceiverPID, signal);
|
||||
if (AutoVacPID != 0)
|
||||
@ -4219,6 +4240,10 @@ TerminateChildren(int signal)
|
||||
signal_child(PgArchPID, signal);
|
||||
if (PgStatPID != 0)
|
||||
signal_child(PgStatPID, signal);
|
||||
if (CleanLogIndexPID !=0)
|
||||
signal_child(CleanLogIndexPID, signal);
|
||||
if (SecondBufferPID != 0)
|
||||
signal_child(SecondBufferPID, signal);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4572,7 +4597,7 @@ BackendRun(Port *port)
|
||||
if (port->privateConn == true) {
|
||||
privateConn = true;
|
||||
}
|
||||
|
||||
client_application_name = port->application_name;
|
||||
/*
|
||||
* Make sure we aren't in PostmasterContext anymore. (We can't delete it
|
||||
* just yet, though, because InitPostgres will need the HBA data.)
|
||||
@ -5347,6 +5372,17 @@ sigusr1_handler(SIGNAL_ARGS)
|
||||
StartALLPageFlushWorker();
|
||||
}
|
||||
|
||||
if (CheckPostmasterSignal(PMSIGNAL_CLEAN_LOGINDEX_WORKER)) {
|
||||
if ( CleanLogIndexPID == 0) {
|
||||
CleanLogIndexPID = StartCleanLogIndex();
|
||||
}
|
||||
}
|
||||
|
||||
// if (CheckPostmasterSignal(PMSIGNAL_SECONDBUFFER_WORKER)) {
|
||||
// if (SecondBufferPID == 0) {
|
||||
// SecondBufferPID = StartSecondBuffer();
|
||||
// }
|
||||
// }
|
||||
if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
|
||||
{
|
||||
/* Startup Process wants us to start the walreceiver process. */
|
||||
@ -5512,6 +5548,14 @@ StartChildProcess(AuxProcType type)
|
||||
av[ac++] = NULL; /* filled in by postmaster_forkexec */
|
||||
#endif
|
||||
|
||||
if (pageEnv == NULL)
|
||||
{
|
||||
InitPageDBEnv();
|
||||
}
|
||||
if (walEnv == NULL)
|
||||
{
|
||||
InitWalDBEnv();
|
||||
}
|
||||
snprintf(typebuf, sizeof(typebuf), "-x%d", type);
|
||||
av[ac++] = typebuf;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -244,9 +244,11 @@ StartupProcessMain(void)
|
||||
//start flushWork
|
||||
#ifndef PG_NOREPLAY
|
||||
if (IsBootstrapProcessingMode() != true && InitdbSingle!=true) {
|
||||
if (push_standby == true) {
|
||||
//if (push_standby == true) {
|
||||
SignalStartFlushWork();
|
||||
}
|
||||
//}
|
||||
pg_usleep(1000);
|
||||
SignalStartCleanLogIndexWork();
|
||||
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
|
||||
}
|
||||
#endif
|
||||
|
@ -95,7 +95,8 @@ bool hot_standby_feedback;
|
||||
static WalReceiverConn *wrconn = NULL;
|
||||
WalReceiverFunctionsType *WalReceiverFunctions = NULL;
|
||||
|
||||
#define NAPTIME_PER_CYCLE 100 /* max sleep time between cycles (100ms) */
|
||||
//#define NAPTIME_PER_CYCLE 100 /* max sleep time between cycles (100ms) */
|
||||
#define NAPTIME_PER_CYCLE 10 /* max sleep time between cycles (10ms) */
|
||||
|
||||
/*
|
||||
* These variables are used similarly to openLogFile/SegNo,
|
||||
@ -824,11 +825,11 @@ XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len)
|
||||
case 'w': /* WAL records */
|
||||
{
|
||||
/* copy message to StringInfo */
|
||||
#ifdef PG_NOREPLAY
|
||||
if (he3mirror) {
|
||||
hdrlen = sizeof(int64) + sizeof(int64) + sizeof(int64);
|
||||
#else
|
||||
} else {
|
||||
hdrlen = sizeof(int64) + sizeof(int64) + sizeof(int64) + sizeof(int64);
|
||||
#endif
|
||||
}
|
||||
if (len < hdrlen)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
||||
@ -838,21 +839,21 @@ XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len)
|
||||
/* read the fields */
|
||||
dataStart = pq_getmsgint64(&incoming_message);
|
||||
walEnd = pq_getmsgint64(&incoming_message);
|
||||
#ifdef PG_NOREPLAY
|
||||
if (he3mirror){
|
||||
len -= hdrlen;
|
||||
#else
|
||||
} else{
|
||||
len = pq_getmsgint64(&incoming_message);
|
||||
#endif
|
||||
}
|
||||
sendTime = pq_getmsgint64(&incoming_message);
|
||||
ProcessWalSndrMessage(walEnd, sendTime);
|
||||
buf += hdrlen;
|
||||
#ifdef PG_NOREPLAY
|
||||
if (he3mirror) {
|
||||
XLogWalRcvWrite(buf, len, dataStart);
|
||||
#else
|
||||
} else {
|
||||
LogstreamResult.Write = dataStart+len;
|
||||
/* Update shared-memory status */
|
||||
pg_atomic_write_u64(&WalRcv->writtenUpto, LogstreamResult.Write);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'k': /* Keepalive */
|
||||
@ -1124,6 +1125,11 @@ XLogWalRcvSendReply(bool force, bool requestReply)
|
||||
writePtr = LogstreamResult.Write;
|
||||
flushPtr = LogstreamResult.Flush;
|
||||
applyPtr = GetXLogReplayRecPtr(NULL);
|
||||
#ifndef PG_NOREPLAY
|
||||
if (!he3mirror && push_standby == true) {
|
||||
applyPtr = GetXLogPushToDisk();
|
||||
}
|
||||
#endif
|
||||
|
||||
resetStringInfo(&reply_message);
|
||||
pq_sendbyte(&reply_message, 'r');
|
||||
|
@ -48,6 +48,8 @@
|
||||
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "access/printtup.h"
|
||||
#include "access/timeline.h"
|
||||
@ -93,6 +95,13 @@
|
||||
#include "utils/ps_status.h"
|
||||
#include "utils/timeout.h"
|
||||
#include "utils/timestamp.h"
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "catalog/pg_control.h"
|
||||
#include "access/nbtxlog.h"
|
||||
#include "access/gistxlog.h"
|
||||
#include "access/spgxlog.h"
|
||||
#include "access/brin_xlog.h"
|
||||
#include "access/xlog.h"
|
||||
|
||||
/*
|
||||
* Maximum data payload in a WAL data message. Must be >= XLOG_BLCKSZ.
|
||||
@ -105,6 +114,10 @@
|
||||
*/
|
||||
#define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)
|
||||
|
||||
#define ONCE_READ_TIKV_WAL (XLOG_BLCKSZ * 2)
|
||||
//bachread tikv 16k,but last record len mybe gt 8k,so DEFAULT_SEND_WAL_CAPCITY = 2 * ONCE_READ_TIKV_WAL
|
||||
#define DEFAULT_SEND_WAL_CAPCITY (ONCE_READ_TIKV_WAL*2)
|
||||
|
||||
/* Array of WalSnds in shared memory */
|
||||
WalSndCtlData *WalSndCtl = NULL;
|
||||
|
||||
@ -255,7 +268,7 @@ static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
|
||||
|
||||
static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
|
||||
TimeLineID *tli_p);
|
||||
|
||||
static void XLogSendTiKVPhysical(void);
|
||||
|
||||
/* Initialize walsender process before entering the main command loop */
|
||||
void
|
||||
@ -572,6 +585,10 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
{
|
||||
StringInfoData buf;
|
||||
XLogRecPtr FlushPtr;
|
||||
bool pgmirrorFlag = false;
|
||||
if (client_application_name!=NULL && strncmp(client_application_name,"pgmirror",strlen("pgmirror")) == 0) {
|
||||
pgmirrorFlag = true;
|
||||
}
|
||||
|
||||
if (ThisTimeLineID == 0)
|
||||
ereport(ERROR,
|
||||
@ -717,7 +734,7 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
* Don't allow a request to stream from a future point in WAL that
|
||||
* hasn't been flushed to disk in this server yet.
|
||||
*/
|
||||
if (FlushPtr < cmd->startpoint)
|
||||
if (pgmirrorFlag == false && FlushPtr < cmd->startpoint)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",
|
||||
@ -737,8 +754,21 @@ StartReplication(StartReplicationCmd *cmd)
|
||||
|
||||
/* Main loop of walsender */
|
||||
replication_active = true;
|
||||
|
||||
if (pgmirrorFlag == false) {
|
||||
WalSndLoop(XLogSendPhysical);
|
||||
} else {
|
||||
readControlFile(DataDir);
|
||||
SpinLockAcquire(&MyWalSnd->mutex);
|
||||
if (walsenderLsn != 0) {
|
||||
MyWalSnd->sentPtr = walsenderLsn;
|
||||
sentPtr = walsenderLsn;
|
||||
elog(LOG,"wal sender LSN %X/%X",LSN_FORMAT_ARGS(walsenderLsn));
|
||||
} else {
|
||||
elog(ERROR,"WAL sender LSN 0/0");
|
||||
}
|
||||
SpinLockRelease(&MyWalSnd->mutex);
|
||||
WalSndLoop(XLogSendTiKVPhysical);
|
||||
}
|
||||
|
||||
replication_active = false;
|
||||
if (got_STOPPING)
|
||||
@ -1300,7 +1330,7 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
|
||||
break;
|
||||
|
||||
sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp());
|
||||
|
||||
// sleeptime = 10; //10ms
|
||||
/* Sleep until something happens or we time out */
|
||||
WalSndWait(WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE, sleeptime,
|
||||
WAIT_EVENT_WAL_SENDER_WRITE_DATA);
|
||||
@ -2374,7 +2404,7 @@ WalSndLoop(WalSndSendDataCallback send_data)
|
||||
* of reaching wal_sender_timeout before sending a keepalive.
|
||||
*/
|
||||
sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp());
|
||||
|
||||
// sleeptime = 10; //10ms
|
||||
if (pq_is_send_pending())
|
||||
wakeEvents |= WL_SOCKET_WRITEABLE;
|
||||
|
||||
@ -2462,74 +2492,671 @@ WalSndKill(int code, Datum arg)
|
||||
SpinLockRelease(&walsnd->mutex);
|
||||
}
|
||||
|
||||
/* XLogReaderRoutine->segment_open callback */
|
||||
// static void
|
||||
// WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
|
||||
// TimeLineID *tli_p)
|
||||
// {
|
||||
// char path[MAXPGPATH];
|
||||
static void reConvertMainData(XLogRecord* sRecord, char*sMainData, uint32_t*sLen, char* dMainData, uint32_t* dLen) {
|
||||
RmgrId rmid = sRecord->xl_rmid;
|
||||
uint8 info = (sRecord->xl_info & ~XLR_INFO_MASK);
|
||||
switch(rmid) {
|
||||
case RM_HEAP2_ID:
|
||||
{
|
||||
if ((info & XLOG_HEAP_OPMASK) == XLOG_HEAP2_VISIBLE) {
|
||||
xl_heap_visible *xlrec = (xl_heap_visible *)sMainData;
|
||||
xl_old_heap_visible xlrecOld;
|
||||
xlrecOld.cutoff_xid = xlrec->cutoff_xid;
|
||||
xlrecOld.flags = xlrec->flags;
|
||||
*dLen = sizeof(xl_old_heap_visible);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_HEAP_ID:
|
||||
{
|
||||
if (((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_UPDATE) ||
|
||||
((info & XLOG_HEAP_OPMASK) == XLOG_HEAP_HOT_UPDATE)) {
|
||||
xl_heap_update *xlrec = (xl_heap_update *)sMainData;
|
||||
xl_old_heap_update xlrecOld;
|
||||
xlrecOld.old_xmax = xlrec->old_xmax;
|
||||
xlrecOld.old_offnum = xlrec->old_offnum;
|
||||
xlrecOld.old_infobits_set = xlrec->old_infobits_set;
|
||||
xlrecOld.flags = xlrec->flags;
|
||||
xlrecOld.new_xmax = xlrec->new_xmax;
|
||||
xlrecOld.new_offnum = xlrec->new_offnum;
|
||||
*dLen = sizeof(xl_old_heap_update);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BTREE_ID:
|
||||
{
|
||||
if (info == XLOG_BTREE_SPLIT_L || info == XLOG_BTREE_SPLIT_R) {
|
||||
xl_btree_split *xlrec = (xl_btree_split *)sMainData;
|
||||
xl_old_btree_split xlrecOld;
|
||||
xlrecOld.level = xlrec->level;
|
||||
xlrecOld.firstrightoff = xlrec->firstrightoff;
|
||||
xlrecOld.newitemoff = xlrec->newitemoff;
|
||||
xlrecOld.postingoff = xlrec->postingoff;
|
||||
*dLen = sizeof(xl_old_btree_split);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_GIST_ID:
|
||||
{
|
||||
if (info == XLOG_GIST_PAGE_SPLIT) {
|
||||
gistxlogPageSplit *xlrec = (gistxlogPageSplit *)sMainData;
|
||||
gistoldxlogPageSplit xlrecOld;
|
||||
xlrecOld.origrlink = xlrec->origrlink;
|
||||
xlrecOld.orignsn = xlrec->orignsn;
|
||||
xlrecOld.origleaf = xlrec->origleaf;
|
||||
xlrecOld.npage = xlrec->npage;
|
||||
xlrecOld.markfollowright = xlrec->markfollowright;
|
||||
*dLen = sizeof(gistoldxlogPageSplit);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_SPGIST_ID:
|
||||
{
|
||||
if (info == XLOG_SPGIST_ADD_LEAF) {
|
||||
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *)sMainData;
|
||||
spgoldxlogAddLeaf xlrecOld;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.offnumLeaf = xlrec->offnumLeaf;
|
||||
xlrecOld.offnumHeadLeaf = xlrec->offnumHeadLeaf;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
*dLen = sizeof(spgoldxlogAddLeaf);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
} else if (info == XLOG_SPGIST_MOVE_LEAFS) {
|
||||
spgxlogMoveLeafs *xlrec = (spgxlogMoveLeafs *)sMainData;
|
||||
spgoldxlogMoveLeafs xlrecOld;
|
||||
xlrecOld.nMoves = xlrec->nMoves;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.replaceDead = xlrec->replaceDead;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = SizeOfOldSpgxlogMoveLeafs;
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
memcpy(dMainData+*dLen,xlrec->offsets,*sLen-SizeOfSpgxlogMoveLeafs);
|
||||
*dLen += *sLen-SizeOfSpgxlogMoveLeafs;
|
||||
} else if (info == XLOG_SPGIST_ADD_NODE) {
|
||||
spgxlogAddNode *xlrec = (spgxlogAddNode *)sMainData;
|
||||
spgoldxlogAddNode xlrecOld;
|
||||
xlrecOld.offnum = xlrec->offnum;
|
||||
xlrecOld.offnumNew = xlrec->offnumNew;
|
||||
xlrecOld.newPage = xlrec->newPage;
|
||||
xlrecOld.parentBlk = xlrec->parentBlk;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = sizeof(spgoldxlogAddNode);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
} else if (info == XLOG_SPGIST_PICKSPLIT) {
|
||||
spgxlogPickSplit *xlrec = (spgxlogPickSplit *)sMainData;
|
||||
spgoldxlogPickSplit xlrecOld;
|
||||
xlrecOld.isRootSplit = xlrec->isRootSplit;
|
||||
xlrecOld.nDelete = xlrec->nDelete;
|
||||
xlrecOld.nInsert = xlrec->nInsert;
|
||||
xlrecOld.initSrc = xlrec->initSrc;
|
||||
xlrecOld.initDest = xlrec->initDest;
|
||||
xlrecOld.offnumInner = xlrec->offnumInner;
|
||||
xlrecOld.initInner = xlrec->initInner;
|
||||
xlrecOld.storesNulls = xlrec->storesNulls;
|
||||
xlrecOld.innerIsParent = xlrec->innerIsParent;
|
||||
xlrecOld.offnumParent = xlrec->offnumParent;
|
||||
xlrecOld.nodeI = xlrec->nodeI;
|
||||
xlrecOld.stateSrc = xlrec->stateSrc;
|
||||
*dLen = SizeOfOldSpgxlogPickSplit;
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
memcpy(dMainData+*dLen,xlrec->offsets,*sLen-SizeOfSpgxlogPickSplit);
|
||||
*dLen += *sLen-SizeOfSpgxlogPickSplit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RM_BRIN_ID:
|
||||
{
|
||||
if (info == XLOG_BRIN_INSERT) {
|
||||
xl_brin_insert *xlrec = (xl_brin_insert *)sMainData;
|
||||
xl_old_brin_insert xlrecOld;
|
||||
xlrecOld.heapBlk = xlrec->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecOld.pagesPerRange = xlrec->pagesPerRange;
|
||||
xlrecOld.offnum = xlrec->offnum;
|
||||
*dLen = sizeof(xl_old_brin_insert);
|
||||
memcpy(dMainData,&xlrecOld,*dLen);
|
||||
} else if ( info == XLOG_BRIN_UPDATE) {
|
||||
xl_brin_update *xlrec = (xl_brin_update *) sMainData;
|
||||
xl_old_brin_update xlrecUpdate;
|
||||
xl_brin_insert *xlrecInsert = &xlrec->insert;
|
||||
xl_old_brin_insert xlrecOld;
|
||||
xlrecOld.heapBlk = xlrecInsert->heapBlk;
|
||||
/* extra information needed to update the revmap */
|
||||
xlrecOld.pagesPerRange = xlrecInsert->pagesPerRange;
|
||||
xlrecOld.offnum = xlrecInsert->offnum;
|
||||
/* offset number of old tuple on old page */
|
||||
xlrecUpdate.oldOffnum = xlrec->oldOffnum;
|
||||
xlrecUpdate.insert = xlrecOld;
|
||||
*dLen = sizeof(xl_old_brin_update);
|
||||
memcpy(dMainData,&xlrecUpdate,*dLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /*-------
|
||||
// * When reading from a historic timeline, and there is a timeline switch
|
||||
// * within this segment, read from the WAL segment belonging to the new
|
||||
// * timeline.
|
||||
// *
|
||||
// * For example, imagine that this server is currently on timeline 5, and
|
||||
// * we're streaming timeline 4. The switch from timeline 4 to 5 happened at
|
||||
// * 0/13002088. In pg_wal, we have these files:
|
||||
// *
|
||||
// * ...
|
||||
// * 000000040000000000000012
|
||||
// * 000000040000000000000013
|
||||
// * 000000050000000000000013
|
||||
// * 000000050000000000000014
|
||||
// * ...
|
||||
// *
|
||||
// * In this situation, when requested to send the WAL from segment 0x13, on
|
||||
// * timeline 4, we read the WAL from file 000000050000000000000013. Archive
|
||||
// * recovery prefers files from newer timelines, so if the segment was
|
||||
// * restored from the archive on this server, the file belonging to the old
|
||||
// * timeline, 000000040000000000000013, might not exist. Their contents are
|
||||
// * equal up to the switchpoint, because at a timeline switch, the used
|
||||
// * portion of the old segment is copied to the new file. -------
|
||||
// */
|
||||
// *tli_p = sendTimeLine;
|
||||
// if (sendTimeLineIsHistoric)
|
||||
// {
|
||||
// XLogSegNo endSegNo;
|
||||
//1.recomplete CRC 2.MTR as endpoint Merge 3.some struct convert 4.checkpoint redo reset 5.use file segment manage
|
||||
static int MergeWalForPgMirror(char*source,char*destion,int limit,int*he3_pos) {
|
||||
int pos1 = 0,pos2 = 0,prev_pos2 = 0;
|
||||
bool isMtr = false;
|
||||
*he3_pos = 0;
|
||||
while(pos1<limit) {
|
||||
uint8_t blkNum = 0;
|
||||
char*img_ptr[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
char*data_ptr[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
uint16_t bimg_len[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
uint16_t data_len[XLR_MAX_BLOCK_ID + 1] = {0};
|
||||
uint16 RepOriginId = 0;
|
||||
uint32 TransactionId = 0;
|
||||
uint32_t d_main_data_len = 0;
|
||||
char d_main_data[8192];
|
||||
OldXLogRecord*old = NULL;
|
||||
prev_pos2 = pos2;
|
||||
while(isMtr == false) {
|
||||
if (pos1 >= limit) {
|
||||
return pos2;
|
||||
}
|
||||
XLogRecord*one = (XLogRecord*)(source + pos1);
|
||||
old = (OldXLogRecord*)(destion + pos2);
|
||||
old->xl_xid = one->xl_xid;
|
||||
old->xl_info = one->xl_info;
|
||||
old->xl_rmid = one->xl_rmid;
|
||||
pos1 += sizeof(XLogRecord);
|
||||
pos2 += sizeof(OldXLogRecord);
|
||||
uint32 remaining = one->xl_tot_len - sizeof(XLogRecord);
|
||||
uint32 datatotal = 0;
|
||||
isMtr = one->mtr;
|
||||
while(remaining > datatotal) {
|
||||
uint8_t block_id = *(source + pos1);
|
||||
if (block_id == XLR_BLOCK_ID_DATA_SHORT) {
|
||||
/* XLogRecordDataHeaderShort */
|
||||
pos1 += sizeof(block_id);
|
||||
if (isMtr == true) {
|
||||
memcpy((destion + pos2),&block_id,sizeof(block_id));
|
||||
pos2 += sizeof(block_id);
|
||||
}
|
||||
uint32_t main_data_len = 0;
|
||||
main_data_len = *((uint8_t*)(source + pos1));
|
||||
if (isMtr == true) {
|
||||
reConvertMainData(one,source + pos1,&main_data_len,d_main_data,&d_main_data_len);
|
||||
if (d_main_data_len == 0) {
|
||||
memcpy(destion + pos2,source + pos1,sizeof(uint8_t));
|
||||
}
|
||||
pos2 += sizeof(uint8_t);
|
||||
}
|
||||
pos1 += sizeof(uint8_t);
|
||||
remaining -= sizeof(uint8_t);
|
||||
datatotal += main_data_len;
|
||||
break;
|
||||
} else if (block_id == XLR_BLOCK_ID_DATA_LONG) {
|
||||
/* XLogRecordDataHeaderLong */
|
||||
pos1 += sizeof(block_id);
|
||||
if (isMtr == true) {
|
||||
memcpy((destion + pos2),&block_id,sizeof(block_id));
|
||||
pos2 += sizeof(block_id);
|
||||
}
|
||||
uint32 main_data_len = 0,d_main_data_len = 0;
|
||||
memcpy(&main_data_len,source + pos1,sizeof(uint32));
|
||||
if (isMtr == true) {
|
||||
reConvertMainData(one,source + pos1,&main_data_len,d_main_data,&d_main_data_len);
|
||||
if (d_main_data_len == 0) {
|
||||
memcpy(destion + pos2,&main_data_len,sizeof(main_data_len));
|
||||
pos2 += sizeof(main_data_len);
|
||||
} else {
|
||||
if (d_main_data_len > 255) {
|
||||
memcpy(destion + pos2,&d_main_data_len,sizeof(d_main_data_len));
|
||||
pos2 += sizeof(d_main_data_len);
|
||||
} else {
|
||||
*(destion + pos2 - 1) = XLR_BLOCK_ID_DATA_SHORT;
|
||||
uint8_t tlen = d_main_data_len;
|
||||
memcpy(destion + pos2,&tlen,sizeof(tlen));
|
||||
pos2 += sizeof(uint8_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
pos1 += sizeof(main_data_len);
|
||||
remaining -= sizeof(main_data_len);
|
||||
datatotal += main_data_len;
|
||||
break; /* by convention, the main data fragment is
|
||||
* always last */
|
||||
} else if (block_id == XLR_BLOCK_ID_ORIGIN) {
|
||||
pos1 += sizeof(block_id);
|
||||
if (isMtr == true) {
|
||||
memcpy((destion + pos2),&block_id,sizeof(block_id));
|
||||
pos2 += sizeof(block_id);
|
||||
}
|
||||
memcpy(&RepOriginId, source + pos1,sizeof(RepOriginId));
|
||||
if (isMtr == true) {
|
||||
memcpy(destion + pos2,&RepOriginId,sizeof(RepOriginId));
|
||||
pos2 += sizeof(RepOriginId);
|
||||
}
|
||||
pos1 += sizeof(RepOriginId);
|
||||
remaining -= sizeof(RepOriginId);
|
||||
} else if (block_id == XLR_BLOCK_ID_TOPLEVEL_XID) {
|
||||
pos1 += sizeof(block_id);
|
||||
if (isMtr == true) {
|
||||
memcpy((destion + pos2),&block_id,sizeof(block_id));
|
||||
pos2 += sizeof(block_id);
|
||||
}
|
||||
memcpy(&TransactionId,source + pos1,sizeof(TransactionId));
|
||||
if (isMtr == true) {
|
||||
memcpy(destion + pos2,&TransactionId,sizeof(TransactionId));
|
||||
pos2 += sizeof(TransactionId);
|
||||
}
|
||||
pos1 += sizeof(TransactionId);
|
||||
remaining -= sizeof(TransactionId);
|
||||
} else if (block_id <= XLR_MAX_BLOCK_ID) {
|
||||
/* Ok, copy the header to the scratch buffer */
|
||||
memcpy(destion + pos2, source + pos1, SizeOfXLogRecordBlockHeader);
|
||||
uint8_t fork_flags = *(source + pos1 + sizeof(block_id));
|
||||
*(destion + pos2) = blkNum;
|
||||
data_len[blkNum] = *((uint16_t*)(source + pos1 + sizeof(block_id) + sizeof(fork_flags)));
|
||||
datatotal += data_len;
|
||||
pos1 += SizeOfXLogRecordBlockHeader;
|
||||
pos2 += SizeOfXLogRecordBlockHeader;
|
||||
remaining -= SizeOfXLogRecordBlockHeader;
|
||||
if ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0) {
|
||||
bimg_len[blkNum] = *((uint16_t*)(source + pos1));
|
||||
datatotal += bimg_len;
|
||||
uint16_t hole_offset = *((uint16_t*)(source + pos1 + sizeof(bimg_len)));
|
||||
uint8_t bimg_info = *((uint16_t*)(source + pos1 + sizeof(bimg_len) + sizeof(hole_offset)));
|
||||
memcpy(destion + pos2, source + pos1, SizeOfXLogRecordBlockImageHeader);
|
||||
pos1 += SizeOfXLogRecordBlockImageHeader;
|
||||
pos2 += SizeOfXLogRecordBlockImageHeader;
|
||||
remaining -= SizeOfXLogRecordBlockImageHeader;
|
||||
if ((bimg_info & BKPIMAGE_IS_COMPRESSED) != 0) {
|
||||
if ((bimg_info & BKPIMAGE_HAS_HOLE) != 0) {
|
||||
memcpy(destion + pos2, source + pos1, SizeOfXLogRecordBlockCompressHeader);
|
||||
pos1 += SizeOfXLogRecordBlockCompressHeader;
|
||||
pos2 += SizeOfXLogRecordBlockCompressHeader;
|
||||
remaining -= SizeOfXLogRecordBlockCompressHeader;
|
||||
}
|
||||
}
|
||||
if (!(fork_flags & BKPBLOCK_SAME_REL)) {
|
||||
memcpy(destion + pos2, source + pos1, sizeof(RelFileNode));
|
||||
pos1 += sizeof(RelFileNode);
|
||||
pos2 += sizeof(RelFileNode);
|
||||
remaining -= sizeof(RelFileNode);
|
||||
}
|
||||
memcpy(destion + pos2, source + pos1, sizeof(BlockNumber));
|
||||
pos1 += sizeof(BlockNumber);
|
||||
pos2 += sizeof(BlockNumber);
|
||||
remaining -= sizeof(BlockNumber);
|
||||
}
|
||||
} else {
|
||||
elog(FATAL,"invalid block_id %u",block_id);
|
||||
}
|
||||
}
|
||||
assert(remaining == datatotal);
|
||||
if (bimg_len[blkNum] != 0 ) {
|
||||
img_ptr[blkNum] = source + pos1;
|
||||
pos1 += bimg_len[blkNum];
|
||||
}
|
||||
if (data_len[blkNum] != 0) {
|
||||
data_ptr[blkNum] = source + pos1;
|
||||
pos1 += data_len[blkNum];
|
||||
}
|
||||
blkNum++;
|
||||
}
|
||||
*he3_pos = pos1;
|
||||
int idx = 0;
|
||||
while(idx < blkNum) {
|
||||
if (bimg_len[idx] != 0) {
|
||||
memcpy(destion + pos2, img_ptr[idx], bimg_len[idx]);
|
||||
pos2 += bimg_len[idx];
|
||||
}
|
||||
if (data_len[blkNum] != 0){
|
||||
memcpy(destion + pos2, data_ptr[idx], data_len[idx]);
|
||||
pos2 += data_len[idx];
|
||||
}
|
||||
}
|
||||
memcpy(destion + pos2, d_main_data, d_main_data_len);
|
||||
pos2 += d_main_data_len;
|
||||
old->xl_tot_len = pos2-prev_pos2;
|
||||
isMtr = false;
|
||||
}
|
||||
return pos2;
|
||||
}
|
||||
|
||||
// XLByteToSeg(sendTimeLineValidUpto, endSegNo, state->segcxt.ws_segsize);
|
||||
// if (nextSegNo == endSegNo)
|
||||
// *tli_p = sendTimeLineNextTLI;
|
||||
// }
|
||||
static int findFirstCheckPoint(char* source,int limit) {
|
||||
XLogRecord* head = (XLogRecord*)source;
|
||||
bool find = false;
|
||||
int datalen = 0;
|
||||
while(!(head->xl_rmid == RM_XLOG_ID &&
|
||||
((head->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN || (head->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_ONLINE)) &&
|
||||
datalen < limit) {
|
||||
datalen += head->xl_tot_len;
|
||||
}
|
||||
if (datalen == limit) {
|
||||
return -1;
|
||||
}
|
||||
return datalen;
|
||||
}
|
||||
|
||||
// XLogFilePath(path, *tli_p, nextSegNo, state->segcxt.ws_segsize);
|
||||
// state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
|
||||
// if (state->seg.ws_file >= 0)
|
||||
// return;
|
||||
/*
|
||||
* Send out the WAL in its normal physical/stored form.
|
||||
*
|
||||
* Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk,
|
||||
* but not yet sent to the client, and buffer it in the libpq output
|
||||
* buffer.
|
||||
*
|
||||
* If there is no unsent WAL remaining, WalSndCaughtUp is set to true,
|
||||
* otherwise WalSndCaughtUp is set to false.
|
||||
*/
|
||||
static uint64_t EndLsn = 0;
|
||||
static void
|
||||
XLogSendTiKVPhysical(void)
|
||||
{
|
||||
XLogRecPtr SendRqstPtr;
|
||||
XLogRecPtr startptr;
|
||||
XLogRecPtr endptr;
|
||||
Size nbytes;
|
||||
XLogSegNo segno;
|
||||
WALReadError errinfo;
|
||||
|
||||
// /*
|
||||
// * If the file is not found, assume it's because the standby asked for a
|
||||
// * too old WAL segment that has already been removed or recycled.
|
||||
// */
|
||||
// if (errno == ENOENT)
|
||||
// {
|
||||
// char xlogfname[MAXFNAMELEN];
|
||||
// int save_errno = errno;
|
||||
/* If requested switch the WAL sender to the stopping state. */
|
||||
if (got_STOPPING)
|
||||
WalSndSetState(WALSNDSTATE_STOPPING);
|
||||
|
||||
// XLogFileName(xlogfname, *tli_p, nextSegNo, wal_segment_size);
|
||||
// errno = save_errno;
|
||||
// ereport(ERROR,
|
||||
// (errcode_for_file_access(),
|
||||
// errmsg("requested WAL segment %s has already been removed",
|
||||
// xlogfname)));
|
||||
// }
|
||||
// else
|
||||
// ereport(ERROR,
|
||||
// (errcode_for_file_access(),
|
||||
// errmsg("could not open file \"%s\": %m",
|
||||
// path)));
|
||||
// }
|
||||
if (streamingDoneSending)
|
||||
{
|
||||
WalSndCaughtUp = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Figure out how far we can safely send the WAL. */
|
||||
if (sendTimeLineIsHistoric)
|
||||
{
|
||||
/*
|
||||
* Streaming an old timeline that's in this server's history, but is
|
||||
* not the one we're currently inserting or replaying. It can be
|
||||
* streamed up to the point where we switched off that timeline.
|
||||
*/
|
||||
SendRqstPtr = sendTimeLineValidUpto;
|
||||
}
|
||||
else if (am_cascading_walsender)
|
||||
{
|
||||
/*
|
||||
* Streaming the latest timeline on a standby.
|
||||
*
|
||||
* Attempt to send all WAL that has already been replayed, so that we
|
||||
* know it's valid. If we're receiving WAL through streaming
|
||||
* replication, it's also OK to send any WAL that has been received
|
||||
* but not replayed.
|
||||
*
|
||||
* The timeline we're recovering from can change, or we can be
|
||||
* promoted. In either case, the current timeline becomes historic. We
|
||||
* need to detect that so that we don't try to stream past the point
|
||||
* where we switched to another timeline. We check for promotion or
|
||||
* timeline switch after calculating FlushPtr, to avoid a race
|
||||
* condition: if the timeline becomes historic just after we checked
|
||||
* that it was still current, it's still be OK to stream it up to the
|
||||
* FlushPtr that was calculated before it became historic.
|
||||
*/
|
||||
bool becameHistoric = false;
|
||||
|
||||
SendRqstPtr = GetStandbyFlushRecPtr();
|
||||
|
||||
if (!RecoveryInProgress())
|
||||
{
|
||||
/*
|
||||
* We have been promoted. RecoveryInProgress() updated
|
||||
* ThisTimeLineID to the new current timeline.
|
||||
*/
|
||||
am_cascading_walsender = false;
|
||||
becameHistoric = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Still a cascading standby. But is the timeline we're sending
|
||||
* still the one recovery is recovering from? ThisTimeLineID was
|
||||
* updated by the GetStandbyFlushRecPtr() call above.
|
||||
*/
|
||||
if (sendTimeLine != ThisTimeLineID)
|
||||
becameHistoric = true;
|
||||
}
|
||||
|
||||
if (becameHistoric)
|
||||
{
|
||||
/*
|
||||
* The timeline we were sending has become historic. Read the
|
||||
* timeline history file of the new timeline to see where exactly
|
||||
* we forked off from the timeline we were sending.
|
||||
*/
|
||||
List *history;
|
||||
|
||||
history = readTimeLineHistory(ThisTimeLineID);
|
||||
sendTimeLineValidUpto = tliSwitchPoint(sendTimeLine, history, &sendTimeLineNextTLI);
|
||||
|
||||
Assert(sendTimeLine < sendTimeLineNextTLI);
|
||||
list_free_deep(history);
|
||||
|
||||
sendTimeLineIsHistoric = true;
|
||||
|
||||
SendRqstPtr = sendTimeLineValidUpto;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Streaming the current timeline on a primary.
|
||||
*
|
||||
* Attempt to send all data that's already been written out and
|
||||
* fsync'd to disk. We cannot go further than what's been written out
|
||||
* given the current implementation of WALRead(). And in any case
|
||||
* it's unsafe to send WAL that is not securely down to disk on the
|
||||
* primary: if the primary subsequently crashes and restarts, standbys
|
||||
* must not have applied any WAL that got lost on the primary.
|
||||
*/
|
||||
SendRqstPtr = GetFlushRecPtr();
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the current system time as an approximation of the time at which
|
||||
* this WAL location was written for the purposes of lag tracking.
|
||||
*
|
||||
* In theory we could make XLogFlush() record a time in shmem whenever WAL
|
||||
* is flushed and we could get that time as well as the LSN when we call
|
||||
* GetFlushRecPtr() above (and likewise for the cascading standby
|
||||
* equivalent), but rather than putting any new code into the hot WAL path
|
||||
* it seems good enough to capture the time here. We should reach this
|
||||
* after XLogFlush() runs WalSndWakeupProcessRequests(), and although that
|
||||
* may take some time, we read the WAL flush pointer and take the time
|
||||
* very close to together here so that we'll get a later position if it is
|
||||
* still moving.
|
||||
*
|
||||
* Because LagTrackerWrite ignores samples when the LSN hasn't advanced,
|
||||
* this gives us a cheap approximation for the WAL flush time for this
|
||||
* LSN.
|
||||
*
|
||||
* Note that the LSN is not necessarily the LSN for the data contained in
|
||||
* the present message; it's the end of the WAL, which might be further
|
||||
* ahead. All the lag tracking machinery cares about is finding out when
|
||||
* that arbitrary LSN is eventually reported as written, flushed and
|
||||
* applied, so that it can measure the elapsed time.
|
||||
*/
|
||||
LagTrackerWrite(SendRqstPtr, GetCurrentTimestamp());
|
||||
|
||||
/*
|
||||
* If this is a historic timeline and we've reached the point where we
|
||||
* forked to the next timeline, stop streaming.
|
||||
*
|
||||
* Note: We might already have sent WAL > sendTimeLineValidUpto. The
|
||||
* startup process will normally replay all WAL that has been received
|
||||
* from the primary, before promoting, but if the WAL streaming is
|
||||
* terminated at a WAL page boundary, the valid portion of the timeline
|
||||
* might end in the middle of a WAL record. We might've already sent the
|
||||
* first half of that partial WAL record to the cascading standby, so that
|
||||
* sentPtr > sendTimeLineValidUpto. That's OK; the cascading standby can't
|
||||
* replay the partial WAL record either, so it can still follow our
|
||||
* timeline switch.
|
||||
*/
|
||||
if (sendTimeLineIsHistoric && sendTimeLineValidUpto <= sentPtr)
|
||||
{
|
||||
/* close the current file. */
|
||||
// if (xlogreader->seg.ws_file >= 0)
|
||||
// wal_segment_close(xlogreader);
|
||||
|
||||
/* Send CopyDone */
|
||||
pq_putmessage_noblock('c', NULL, 0);
|
||||
streamingDoneSending = true;
|
||||
|
||||
WalSndCaughtUp = true;
|
||||
|
||||
elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)",
|
||||
LSN_FORMAT_ARGS(sendTimeLineValidUpto),
|
||||
LSN_FORMAT_ARGS(sentPtr));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do we have any work to do? */
|
||||
Assert(sentPtr <= SendRqstPtr);
|
||||
if (SendRqstPtr <= sentPtr)
|
||||
{
|
||||
WalSndCaughtUp = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out how much to send in one message. If there's no more than
|
||||
* MAX_SEND_SIZE bytes to send, send everything. Otherwise send
|
||||
* MAX_SEND_SIZE bytes, but round back to logfile or page boundary.
|
||||
*
|
||||
* The rounding is not only for performance reasons. Walreceiver relies on
|
||||
* the fact that we never split a WAL record across two messages. Since a
|
||||
* long WAL record is split at page boundary into continuation records,
|
||||
* page boundary is always a safe cut-off point. We also assume that
|
||||
* SendRqstPtr never points to the middle of a WAL record.
|
||||
*/
|
||||
startptr = sentPtr;
|
||||
endptr = startptr;
|
||||
endptr += ONCE_READ_TIKV_WAL;
|
||||
|
||||
/* if we went beyond SendRqstPtr, back off */
|
||||
if (SendRqstPtr <= endptr)
|
||||
{
|
||||
endptr = SendRqstPtr;
|
||||
if (sendTimeLineIsHistoric)
|
||||
WalSndCaughtUp = false;
|
||||
else
|
||||
WalSndCaughtUp = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* round down to page boundary. */
|
||||
endptr -= (endptr % XLOG_BLCKSZ);
|
||||
WalSndCaughtUp = false;
|
||||
}
|
||||
|
||||
nbytes = endptr - startptr;
|
||||
Assert(nbytes <= ONCE_READ_TIKV_WAL);
|
||||
|
||||
/*
|
||||
* OK to read and send the slice.
|
||||
*/
|
||||
resetStringInfo(&output_message);
|
||||
pq_sendbyte(&output_message, 'w');
|
||||
|
||||
pq_sendint64(&output_message, 0); /* dataStart */
|
||||
pq_sendint64(&output_message, 0); /* walEnd */
|
||||
pq_sendint64(&output_message, 0); /* sendtime, filled in last */
|
||||
|
||||
/*
|
||||
* Read the log directly into the output buffer to avoid extra memcpy
|
||||
* calls.
|
||||
*/
|
||||
enlargeStringInfo(&output_message, DEFAULT_SEND_WAL_CAPCITY);
|
||||
static char* he3_wal_cache = NULL;
|
||||
uint64_t StartLsn = 0;
|
||||
static uint64_t PrevLsn = 0;
|
||||
if (he3_wal_cache == NULL) {
|
||||
he3_wal_cache = malloc(DEFAULT_SEND_WAL_CAPCITY);
|
||||
}
|
||||
retry:
|
||||
xlogreader->currTLI = ThisTimeLineID;
|
||||
int ret = -1;
|
||||
ret = He3DBWALRead(xlogreader,
|
||||
startptr,
|
||||
nbytes,
|
||||
he3_wal_cache);
|
||||
|
||||
if (ret < 0) {
|
||||
WALReadRaiseError(&errinfo);
|
||||
return;
|
||||
} else {
|
||||
nbytes = ret;
|
||||
}
|
||||
|
||||
int dLen = 0;
|
||||
int mtrLen = ArrayXlogHe3ToPg(he3_wal_cache,nbytes,&output_message.data[output_message.len],&dLen,&StartLsn,&EndLsn);
|
||||
output_message.len += dLen;
|
||||
output_message.data[output_message.len] = '\0';
|
||||
/*
|
||||
* Fill the send timestamp last, so that it is taken as late as possible.
|
||||
*/
|
||||
if (StartLsn % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
|
||||
XLogSegmentOffset(StartLsn, DEFAULT_XLOG_SEG_SIZE) > XLOG_BLCKSZ) {
|
||||
StartLsn -= SizeOfXLogShortPHD;
|
||||
}
|
||||
else if (StartLsn % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
|
||||
XLogSegmentOffset(StartLsn, DEFAULT_XLOG_SEG_SIZE) < XLOG_BLCKSZ) {
|
||||
StartLsn -= SizeOfXLogLongPHD;
|
||||
}
|
||||
endptr = startptr + mtrLen ;
|
||||
resetStringInfo(&tmpbuf);
|
||||
pq_sendint64(&tmpbuf, StartLsn);
|
||||
/* walStart */
|
||||
memcpy(&output_message.data[1],
|
||||
tmpbuf.data, sizeof(int64));
|
||||
EndLsn = StartLsn+dLen;
|
||||
resetStringInfo(&tmpbuf);
|
||||
pq_sendint64(&tmpbuf, EndLsn);
|
||||
/* walEnd */
|
||||
memcpy(&output_message.data[1 + sizeof(int64)],
|
||||
tmpbuf.data, sizeof(int64));
|
||||
resetStringInfo(&tmpbuf);
|
||||
/* sendtime, filled in last */
|
||||
pq_sendint64(&tmpbuf, GetCurrentTimestamp());
|
||||
memcpy(&output_message.data[1 + sizeof(int64) + sizeof(int64)],
|
||||
tmpbuf.data, sizeof(int64));
|
||||
pq_putmessage_noblock('d', output_message.data, output_message.len);
|
||||
sentPtr = endptr;
|
||||
|
||||
/* Update shared memory status */
|
||||
{
|
||||
WalSnd *walsnd = MyWalSnd;
|
||||
|
||||
SpinLockAcquire(&walsnd->mutex);
|
||||
walsnd->sentPtr = sentPtr;
|
||||
SpinLockRelease(&walsnd->mutex);
|
||||
}
|
||||
|
||||
/* Report progress of XLOG streaming in PS display */
|
||||
if (update_process_title)
|
||||
{
|
||||
char activitymsg[50];
|
||||
|
||||
snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
|
||||
LSN_FORMAT_ARGS(EndLsn));
|
||||
set_ps_display(activitymsg);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send out the WAL in its normal physical/stored form.
|
||||
@ -3489,7 +4116,11 @@ WalSndKeepalive(bool requestReply)
|
||||
/* construct the message... */
|
||||
resetStringInfo(&output_message);
|
||||
pq_sendbyte(&output_message, 'k');
|
||||
if (EndLsn != 0) {
|
||||
pq_sendint64(&output_message, sentPtr);
|
||||
} else {
|
||||
pq_sendint64(&output_message, EndLsn);
|
||||
}
|
||||
pq_sendint64(&output_message, GetCurrentTimestamp());
|
||||
pq_sendbyte(&output_message, requestReply ? 1 : 0);
|
||||
|
||||
@ -3533,6 +4164,9 @@ WalSndKeepaliveIfNecessary(void)
|
||||
/* Try to flush pending output to the client */
|
||||
if (pq_flush_if_writable() != 0)
|
||||
WalSndShutdown();
|
||||
// } else {
|
||||
// WalSndKeepalive(true);
|
||||
// pg_usleep(10000);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,6 +141,17 @@ InitBufferPool(void)
|
||||
/* Init other shared buffer-management stuff */
|
||||
StrategyInitialize(!foundDescs);
|
||||
|
||||
/* Init preCacheNodes arrays */
|
||||
preCacheNodesPtr = (Oid *)
|
||||
ShmemInitStruct("preCacheNodesPtr",
|
||||
NPreCacheNodes * sizeof(Oid), &foundBufCkpt);
|
||||
memset(preCacheNodesPtr, 0, NPreCacheNodes * sizeof(Oid));
|
||||
|
||||
preCacheNodesCountPtr = (uint16 *)
|
||||
ShmemInitStruct("preCacheNodesCountPtr",
|
||||
sizeof(uint16), &foundBufCkpt);
|
||||
memset(preCacheNodesCountPtr, 0, sizeof(uint16));
|
||||
|
||||
/* Initialize per-backend file flush context */
|
||||
WritebackContextInit(&BackendWritebackContext,
|
||||
&backend_flush_after);
|
||||
@ -177,5 +188,8 @@ BufferShmemSize(void)
|
||||
/* size of checkpoint sort array in bufmgr.c */
|
||||
size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
|
||||
|
||||
/* size of preCacheNodes */
|
||||
size = add_size(size, mul_size(NPreCacheNodes, sizeof(Oid)) + sizeof(uint16));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
@ -793,6 +793,18 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
|
||||
* miss.
|
||||
*/
|
||||
pgstat_count_buffer_read(reln);
|
||||
/* precache or unprecache index */
|
||||
if (isPreCacheIndex && !isPreCacheIndexDone && preCacheNodeOid == reln->rd_node.relNode)
|
||||
{
|
||||
BlockNumber precacheblocks;
|
||||
precacheblocks = smgrnblocks(reln->rd_smgr, forkNum);
|
||||
for(BlockNumber i=0; i < precacheblocks; i++)
|
||||
{
|
||||
ReleaseBuffer(ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence, forkNum, i, mode, strategy, &hit));
|
||||
}
|
||||
isPreCacheIndexDone = true;
|
||||
}
|
||||
|
||||
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
|
||||
forkNum, blockNum, mode, strategy, &hit);
|
||||
if (hit)
|
||||
@ -919,6 +931,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
|
||||
mode == RBM_ZERO_ON_ERROR)
|
||||
pgBufferUsage.shared_blks_read++;
|
||||
// for precache: buf not be eliminated by clock algorithm
|
||||
if (needPreCacheEscape && preCacheNodeOid == bufHdr->tag.rnode.relNode)
|
||||
{
|
||||
bufHdr->isPreCacheEscape=true;
|
||||
}
|
||||
// for unprecache: buf be eliminated by clock algorithm
|
||||
if (needUnpreCacheEscape && preCacheNodeOid == bufHdr->tag.rnode.relNode)
|
||||
{
|
||||
bufHdr->isPreCacheEscape=false;
|
||||
}
|
||||
}
|
||||
|
||||
/* At this point we do NOT hold any locks. */
|
||||
@ -1032,6 +1054,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
int lsnLen = 0;
|
||||
bool outdata = true;
|
||||
Bufrd tWalRecord;
|
||||
tWalRecord.count = 0;
|
||||
tWalRecord.buf = NULL;
|
||||
LsnNode* head = NULL;
|
||||
char* pageXlogPtr = NULL;
|
||||
@ -1047,6 +1070,21 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
MemSet((char *) bufBlock, 0, BLCKSZ);
|
||||
/* don't set checksum for all-zero page */
|
||||
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
|
||||
|
||||
/* for new page precache */
|
||||
if (*preCacheNodesCountPtr > 0)
|
||||
{
|
||||
uint16 preCacheNodei = 0;
|
||||
while (preCacheNodei < *preCacheNodesCountPtr)
|
||||
{
|
||||
if (preCacheNodesPtr[preCacheNodei] == bufHdr->tag.rnode.relNode)
|
||||
{
|
||||
bufHdr->isPreCacheEscape=true;
|
||||
break;
|
||||
}
|
||||
preCacheNodei++;
|
||||
}
|
||||
}
|
||||
//parallel replay PageFlushWorkerMain=>ProcFlushBufferToDisk=>XLogReadBufferExtended=>default status RM_NORMAL,
|
||||
//where init page,status is RBM_ZERO_AND_LOCK will lead to page invaild,so need smgrextend page then to smgrread
|
||||
//push standby can use ReadWalsByPage to replay base RBM_ZERO page,but slave must be ensure flush page min LSN point
|
||||
@ -1056,7 +1094,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
!isLocalBuf) && IsBootstrapProcessingMode() != true && InitdbSingle != true)
|
||||
{
|
||||
if (EnableHotStandby == true || InRecovery) {
|
||||
if (EnableHotStandby == true && push_standby == false) {
|
||||
BufferTag pageTag;
|
||||
pageTag.rnode = smgr->smgr_rnode.node;
|
||||
pageTag.forkNum = forkNum;
|
||||
@ -1064,18 +1101,20 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
replayLsn = GetXLogReplayRecPtr(&tli);
|
||||
XLogRecPtr pageLsn = BufferGetLSN(bufHdr);
|
||||
head = GetLogIndexByPage(&pageTag,pageLsn,replayLsn);
|
||||
GetXLogReplayRecPtr(&tli);
|
||||
if ((EnableHotStandby == true && push_standby == false) || he3mirror) {
|
||||
if (head->next!=NULL) {
|
||||
tWalRecord = ReadWalsByPage(pageTag.rnode.dbNode,pageTag.rnode.relNode,forkNum,blockNum,tli,head);
|
||||
}
|
||||
} else {
|
||||
LsnNode* next = head->next;
|
||||
if (next!=NULL) {
|
||||
walRecord.cap = 8192;
|
||||
walRecord.buf = malloc(walRecord.cap);
|
||||
LsnNode* next = head->next;
|
||||
}
|
||||
while(next!=NULL) {
|
||||
int count = walRecordQuery(&walRecord.buf,&walRecord.count,&walRecord.cap,next->lsn);
|
||||
if (count == -1) {
|
||||
elog(FATAL,"======walRecordQuery query wal Faild %X/%X======",LSN_FORMAT_ARGS(next->lsn));
|
||||
elog(FATAL,"======walRecordQuery query wal Faild %X/%X===1===",LSN_FORMAT_ARGS(next->lsn));
|
||||
}
|
||||
next = next->next;
|
||||
}
|
||||
@ -1118,18 +1157,20 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
pageTag.blockNum = blockNum;
|
||||
XLogRecPtr pageLsn = BufferGetLSN(bufHdr);
|
||||
head = GetLogIndexByPage(&pageTag,pageLsn,replayLsn);
|
||||
if (EnableHotStandby == true && push_standby == false) {
|
||||
if ((EnableHotStandby == true && push_standby == false) || he3mirror) {
|
||||
if (head->next != NULL) {
|
||||
tWalRecord = ReadWalsByPage(pageTag.rnode.dbNode,pageTag.rnode.relNode,forkNum,blockNum,tli,head);
|
||||
}
|
||||
} else {
|
||||
LsnNode* next = head->next;
|
||||
if (next != NULL) {
|
||||
walRecord.cap = 8192;
|
||||
walRecord.buf = malloc(walRecord.cap);
|
||||
LsnNode* next = head->next;
|
||||
}
|
||||
while(next!=NULL) {
|
||||
int count = walRecordQuery(&walRecord.buf,&walRecord.count,&walRecord.cap,next->lsn);
|
||||
if (count == -1) {
|
||||
elog(FATAL,"======walRecordQuery query wal Faild %X/%X======",LSN_FORMAT_ARGS(next->lsn));
|
||||
elog(FATAL,"======walRecordQuery query wal Faild %X/%X===2===",LSN_FORMAT_ARGS(next->lsn));
|
||||
}
|
||||
next = next->next;
|
||||
}
|
||||
@ -1140,6 +1181,30 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
elog(FATAL,"smgrextend=>he3dbsmgrread rel %d flk %d blk %d nbytes %d",smgr->smgr_rnode.node.relNode,forkNum, blockNum,nbytes);
|
||||
} else {
|
||||
memcpy(bufBlock,pageXlogPtr,BLCKSZ);
|
||||
if (push_standby == true || EnableHotStandby == false) {
|
||||
BufferTag pageTag;
|
||||
pageTag.rnode = smgr->smgr_rnode.node;
|
||||
pageTag.forkNum = forkNum;
|
||||
pageTag.blockNum = blockNum;
|
||||
XLogRecPtr pageLsn = BufferGetLSN(bufHdr);
|
||||
head = GetLogIndexByPage(&pageTag,pageLsn,replayLsn);
|
||||
if (head->next!=NULL && he3mirror) {
|
||||
tWalRecord = ReadWalsByPage(pageTag.rnode.dbNode,pageTag.rnode.relNode,forkNum,blockNum,tli,head);
|
||||
}else{
|
||||
LsnNode* next = head->next;
|
||||
if (next != NULL) {
|
||||
walRecord.cap = 8192;
|
||||
walRecord.buf = malloc(walRecord.cap);
|
||||
}
|
||||
while(next!=NULL) {
|
||||
int count = walRecordQuery(&walRecord.buf,&walRecord.count,&walRecord.cap,next->lsn);
|
||||
if (count == -1) {
|
||||
elog(FATAL,"======walRecordQuery query wal Faild %X/%X===3===",LSN_FORMAT_ARGS(next->lsn));
|
||||
}
|
||||
next = next->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1212,14 +1277,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
* apply logs to this old page when read from disk.
|
||||
*
|
||||
*/
|
||||
if (pageXlogPtr != NULL || tWalRecord.buf != NULL || walRecord.buf != NULL)
|
||||
if (pageXlogPtr != NULL || tWalRecord.count != 0 || walRecord.count != 0)
|
||||
{
|
||||
XLogRecPtr pageLsn = BufferGetLSN(bufHdr);
|
||||
char *xlogStart = NULL;
|
||||
if (pageXlogPtr != NULL) {
|
||||
xlogStart = pageXlogPtr + BLCKSZ;
|
||||
nbytes = nbytes - BLCKSZ;
|
||||
} else if (tWalRecord.buf != NULL) {
|
||||
if (walRecord.count != 0) {
|
||||
xlogStart = walRecord.buf;
|
||||
nbytes = walRecord.count;
|
||||
}
|
||||
} else if (tWalRecord.count != 0) {
|
||||
xlogStart = tWalRecord.buf;
|
||||
nbytes = tWalRecord.count;
|
||||
} else {
|
||||
@ -1230,13 +1299,15 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
|
||||
if (pageXlogPtr != NULL) {
|
||||
free(pageXlogPtr);
|
||||
pageXlogPtr = NULL;
|
||||
if (walRecord.count != 0) {
|
||||
free(walRecord.buf);
|
||||
FreeLsnNode(head);
|
||||
}
|
||||
} else if (tWalRecord.count != 0) {
|
||||
free_dataRead(tWalRecord.buf,tWalRecord.count,tWalRecord.cap);
|
||||
FreeLsnNode(head);
|
||||
} else {
|
||||
if (walRecord.buf != NULL) {
|
||||
free(walRecord.buf);
|
||||
}
|
||||
FreeLsnNode(head);
|
||||
}
|
||||
}
|
||||
@ -2134,40 +2205,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
|
||||
|
||||
ref->refcount++;
|
||||
Assert(ref->refcount > 0);
|
||||
// for precache: buf not be eliminated by clock algorithm
|
||||
if (needPreCacheEscape)
|
||||
{
|
||||
uint32 buf_state;
|
||||
uint32 old_buf_state;
|
||||
|
||||
old_buf_state = pg_atomic_read_u32(&buf->state);
|
||||
for (;;)
|
||||
{
|
||||
if (old_buf_state & BM_LOCKED)
|
||||
old_buf_state = WaitBufHdrUnlocked(buf);
|
||||
|
||||
buf_state = old_buf_state;
|
||||
|
||||
/* increase refcount */
|
||||
buf_state += BUF_REFCOUNT_ONE;
|
||||
|
||||
if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
|
||||
buf_state))
|
||||
{
|
||||
result = (buf_state & BM_VALID) != 0;
|
||||
|
||||
/*
|
||||
* Assume that we acquired a buffer pin for the purposes of
|
||||
* Valgrind buffer client checks (even in !result case) to
|
||||
* keep things simple. Buffers that are unsafe to access are
|
||||
* not generally guaranteed to be marked undefined or
|
||||
* non-accessible in any case.
|
||||
*/
|
||||
VALGRIND_MAKE_MEM_DEFINED(BufHdrGetBlock(buf), BLCKSZ);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
|
||||
return result;
|
||||
@ -2221,11 +2258,6 @@ PinBuffer_Locked(BufferDesc *buf)
|
||||
buf_state = pg_atomic_read_u32(&buf->state);
|
||||
Assert(buf_state & BM_LOCKED);
|
||||
buf_state += BUF_REFCOUNT_ONE;
|
||||
// for precache: buf not be eliminated by clock algorithm
|
||||
if (needPreCacheEscape)
|
||||
{
|
||||
buf_state += BUF_REFCOUNT_ONE;
|
||||
}
|
||||
UnlockBufHdr(buf, buf_state);
|
||||
|
||||
b = BufferDescriptorGetBuffer(buf);
|
||||
|
@ -324,7 +324,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
|
||||
*/
|
||||
local_buf_state = LockBufHdr(buf);
|
||||
|
||||
if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
|
||||
if (buf->isPreCacheEscape == false && BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
|
||||
{
|
||||
if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
|
||||
{
|
||||
|
@ -97,12 +97,13 @@
|
||||
#include "pgstat.h"
|
||||
#include "port/pg_iovec.h"
|
||||
#include "portability/mem.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/spin.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/resowner_private.h"
|
||||
#include "utils/hfs.h"
|
||||
//#include "utils/hfs.h"
|
||||
/* He3DB: He3FS */
|
||||
//#include "storage/iport.h"
|
||||
|
||||
@ -2422,9 +2423,15 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
|
||||
|
||||
int
|
||||
MasterFileRead(char *buffer,uint32_t dbid, uint32_t relid, uint32_t forkno, uint32_t blockno){
|
||||
OriginDPageKey odpk;
|
||||
|
||||
PageKey pageKey;
|
||||
Bufrd bufrd;
|
||||
bufrd.count = 0;
|
||||
Bufrd *bufrd = NULL;
|
||||
bufrd = (Bufrd *)malloc(sizeof(Bufrd));
|
||||
bufrd->count = 0;
|
||||
bufrd->cap = 0;
|
||||
bufrd->buf = buffer;
|
||||
int count = 0;
|
||||
|
||||
pageKey.relfileNode.dbNode = dbid;
|
||||
pageKey.relfileNode.relNode = relid;
|
||||
@ -2433,14 +2440,17 @@ MasterFileRead(char *buffer,uint32_t dbid, uint32_t relid, uint32_t forkno, uint
|
||||
pageKey.pageLsn = 0;
|
||||
pageKey.replyLsn = GetXLogWriteRecPtr();
|
||||
|
||||
bufrd = MoveOnePageToMemory(pageKey);
|
||||
if (bufrd.count > 0)
|
||||
odpk.pk = pageKey;
|
||||
odpk.opration = (int)EVICT;
|
||||
GetPageFromCurrentNode(pageKey,bufrd);
|
||||
count = bufrd->count;
|
||||
if (count > 0)
|
||||
{
|
||||
memcpy(buffer,bufrd.buf,bufrd.count);
|
||||
free_dataRead(bufrd.buf,bufrd.count, bufrd.cap);
|
||||
AddOneItemToDPArray(odpk);
|
||||
bufrd->buf = NULL;
|
||||
}
|
||||
|
||||
return bufrd.count;
|
||||
free(bufrd);
|
||||
return count;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "postmaster/bgworker_internals.h"
|
||||
#include "postmaster/bgwriter.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "replication/logicallauncher.h"
|
||||
#include "replication/origin.h"
|
||||
#include "replication/slot.h"
|
||||
@ -168,7 +169,8 @@ CreateSharedMemoryAndSemaphores(void)
|
||||
|
||||
/* secondbufferhash code. */
|
||||
//TODO the size should be calculated base on data buffer size.
|
||||
size = add_size(size, 1<<30);
|
||||
size = add_size(size, SecondBufferShmemSize());
|
||||
size = add_size(size, SecondBufferLWLockShmemSize());
|
||||
size = add_size(size, He3dbLogIndexShmemSize());
|
||||
|
||||
/* cache file size */
|
||||
@ -222,6 +224,8 @@ CreateSharedMemoryAndSemaphores(void)
|
||||
*/
|
||||
CreateLWLocks();
|
||||
|
||||
CreateSecondBufferLWLocks();
|
||||
|
||||
/*
|
||||
* Set up shmem.c index hashtable
|
||||
*/
|
||||
@ -254,6 +258,8 @@ CreateSharedMemoryAndSemaphores(void)
|
||||
* set up second buffer hash
|
||||
*/
|
||||
InitSecondBufferHash();
|
||||
InitSecondBufferMeta();
|
||||
InitDPageKeyArray();
|
||||
|
||||
/*
|
||||
* set up fs meta
|
||||
|
@ -140,6 +140,7 @@ static void SetActiveTblWithFirstPage(LogIndexMemTBL *mem_tbl, const BufferTag *
|
||||
pg_atomic_write_u32(&(mem_tbl->meta.state), LOG_INDEX_MEM_TBL_STATE_ACTIVE);
|
||||
|
||||
// index start with 1, 0 means INVALID. hash[] all values will be 0 after init, so set to 1 when first use.
|
||||
mem_tbl->meta.id = log_index_mem_list->active_table_index;
|
||||
mem_tbl->meta.lsn_free_head = 1;
|
||||
mem_tbl->meta.page_free_head = 1;
|
||||
// calculate hashcode by buffer tag
|
||||
@ -235,6 +236,17 @@ static void InsertLsnNodeByHead(LsnNode *head, XLogRecPtr lsn)
|
||||
head->next = new_node;
|
||||
}
|
||||
|
||||
// eg: before: head-->node1-->NULL, after: head-->node1-->newNode-->NULL
|
||||
static LsnNode *InsertLsnNodeByTail(LsnNode *head, XLogRecPtr lsn)
|
||||
{
|
||||
LsnNode *new_node;
|
||||
new_node = (LsnNode *)malloc(sizeof(LsnNode));
|
||||
head->next = new_node;
|
||||
new_node->lsn = lsn;
|
||||
new_node->next = NULL;
|
||||
return new_node;
|
||||
}
|
||||
|
||||
// print nodelist
|
||||
static void PrintLsnNode(LsnNode *head)
|
||||
{
|
||||
@ -365,6 +377,7 @@ void InsertLogIndexByPage(const BufferTag *page, XLogRecPtr lsn)
|
||||
if(mem_tbl->hash[hash_key] == 0)
|
||||
{
|
||||
// set hash value to next free head
|
||||
if (!(mem_tbl->meta.page_free_head > LOG_INDEX_MEM_TBL_PAGE_NUM || mem_tbl->meta.lsn_free_head > LOG_INDEX_MEM_TBL_SEG_NUM))
|
||||
mem_tbl->hash[hash_key] = mem_tbl->meta.page_free_head;
|
||||
SetNextPageItem(mem_tbl, page, lsn);
|
||||
}
|
||||
@ -437,11 +450,13 @@ void InsertLogIndexByPage(const BufferTag *page, XLogRecPtr lsn)
|
||||
LsnNode *GetLogIndexByPage(const BufferTag *page, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
|
||||
{
|
||||
LsnNode *head_node;
|
||||
LsnNode *tail;
|
||||
uint64 tbl_index;
|
||||
|
||||
// Prevent metadata changes during discovery.
|
||||
// TODO change to Lightweight Lock
|
||||
head_node = InitLsnNode();
|
||||
tail = head_node;
|
||||
LWLockAcquire(LogIndexMemListLock,LW_SHARED);
|
||||
tbl_index = log_index_mem_list->table_start_index;
|
||||
while(tbl_index != log_index_mem_list->active_table_index)
|
||||
@ -470,9 +485,8 @@ LsnNode *GetLogIndexByPage(const BufferTag *page, XLogRecPtr start_lsn, XLogRecP
|
||||
{
|
||||
if(lsn < end_lsn)
|
||||
{
|
||||
InsertLsnNodeByHead(head_node, lsn);
|
||||
tail = InsertLsnNodeByTail(tail, lsn);
|
||||
}else{
|
||||
ReverseLsnNode(head_node);
|
||||
LWLockRelease(LogIndexMemListLock);
|
||||
return head_node;
|
||||
}
|
||||
@ -501,9 +515,8 @@ LsnNode *GetLogIndexByPage(const BufferTag *page, XLogRecPtr start_lsn, XLogRecP
|
||||
{
|
||||
if(lsn < end_lsn)
|
||||
{
|
||||
InsertLsnNodeByHead(head_node, lsn);
|
||||
tail = InsertLsnNodeByTail(tail, lsn);
|
||||
}else{
|
||||
ReverseLsnNode(head_node);
|
||||
LWLockRelease(LogIndexMemListLock);
|
||||
return head_node;
|
||||
}
|
||||
@ -514,11 +527,9 @@ LsnNode *GetLogIndexByPage(const BufferTag *page, XLogRecPtr start_lsn, XLogRecP
|
||||
}
|
||||
seg_index = item_seg->next_seg;
|
||||
}
|
||||
ReverseLsnNode(head_node);
|
||||
LWLockRelease(LogIndexMemListLock);
|
||||
return head_node;
|
||||
}
|
||||
ReverseLsnNode(head_node);
|
||||
LWLockRelease(LogIndexMemListLock);
|
||||
return head_node;
|
||||
}
|
||||
@ -584,7 +595,8 @@ TagNode *GetBufTagByLsnRange(XLogRecPtr start_lsn, XLogRecPtr end_lsn)
|
||||
head_node = InitTagNode();
|
||||
LWLockAcquire(LogIndexMemListLock,LW_SHARED);
|
||||
tbl_index = log_index_mem_list->table_start_index;
|
||||
while(tbl_index != log_index_mem_list->active_table_index)
|
||||
uint64 active_index_next = (log_index_mem_list->active_table_index+1)%(log_index_mem_list->table_cap);
|
||||
while(tbl_index != active_index_next)
|
||||
{
|
||||
LogIndexMemTBL *mem_tbl = &(log_index_mem_list->mem_table[tbl_index]);
|
||||
tbl_index = (tbl_index + 1)%(log_index_mem_list->table_cap);
|
||||
|
@ -286,10 +286,7 @@ static HTAB *LockMethodLockHash;
|
||||
static HTAB *LockMethodProcLockHash;
|
||||
static HTAB *LockMethodLocalHash;
|
||||
|
||||
/*
|
||||
secondbufferhash code
|
||||
*/
|
||||
static HTAB *SecondBufferHash;
|
||||
|
||||
|
||||
/*
|
||||
fs meta code
|
||||
@ -484,47 +481,6 @@ void InitLocks(void)
|
||||
HASH_ELEM | HASH_BLOBS);
|
||||
}
|
||||
|
||||
/*
|
||||
init SecondBufferHash
|
||||
*/
|
||||
void
|
||||
InitSecondBufferHash(void)
|
||||
{
|
||||
HASHCTL info;
|
||||
long init_table_size,
|
||||
max_table_size;
|
||||
bool found;
|
||||
|
||||
/*
|
||||
* Compute init/max size to request for lock hashtables. Note these
|
||||
* calculations must agree with SecondBufferhashShmemSize!
|
||||
*/
|
||||
max_table_size = 200;
|
||||
init_table_size = max_table_size / 2;
|
||||
|
||||
info.keysize = sizeof(PageKey);
|
||||
info.entrysize = sizeof(PageValue);
|
||||
|
||||
info.num_partitions = NUM_LOCK_PARTITIONS;
|
||||
|
||||
SecondBufferHash = ShmemInitHash("SecondBuffer hash",
|
||||
init_table_size,
|
||||
max_table_size,
|
||||
&info,
|
||||
HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
|
||||
|
||||
/*
|
||||
* Allocate wal global structures.
|
||||
*/
|
||||
secondBbufferglobalOffset =
|
||||
ShmemInitStruct("secondbuffer global set",
|
||||
sizeof(globalOffset), &found);
|
||||
if (!found){
|
||||
SpinLockInit(&secondBufferGlobalOffset->mutex);
|
||||
//initglobaloffset();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void setglobaloffset(uint64 offset,uint64 ino){
|
||||
SpinLockAcquire(&secondBufferGlobalOffset->mutex);
|
||||
@ -561,21 +517,6 @@ void InitFSMetaHash(void)
|
||||
HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
|
||||
}
|
||||
|
||||
/*
|
||||
* notification_match: match function to use with notification_hash
|
||||
*/
|
||||
static int
|
||||
secondbuffer_match(const void *key1, const void *key2, Size keysize)
|
||||
{
|
||||
const pageKey *k1 = (const pageKey *)key1;
|
||||
const pageKey *k2 = (const pageKey *)key2;
|
||||
|
||||
Assert(keysize == sizeof(pageKey));
|
||||
if (k1->dbid == k2->dbid &&
|
||||
k1->blkno == k2->blkno && k1->forkno == k2->forkno && k1->relid == k2->relid)
|
||||
return 0; /* equal */
|
||||
return 1; /* not equal */
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch the lock method table associated with a given lock
|
||||
@ -4801,71 +4742,6 @@ int LockWaiterCount(const LOCKTAG *locktag)
|
||||
return waiters;
|
||||
}
|
||||
|
||||
//**************************
|
||||
|
||||
//* ops for wal hash code
|
||||
//*************************
|
||||
|
||||
/*
|
||||
|
||||
WAL LOG HASH
|
||||
*/
|
||||
uint32
|
||||
SecondBufferHashCode(const pageKey *pk)
|
||||
{
|
||||
return get_hash_value(SecondBufferHash, (const void *)pk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find or create LOCK and PROCLOCK objects as needed for a new lock
|
||||
* request.
|
||||
*
|
||||
* Returns the PROCLOCK object, or NULL if we failed to create the objects
|
||||
* for lack of shared memory.
|
||||
*
|
||||
* The appropriate partition lock must be held at entry, and will be
|
||||
* held at exit.
|
||||
*/
|
||||
PageValue *
|
||||
SetupSecondBufferInTable(const pageKey *pk)
|
||||
{
|
||||
|
||||
PageValue *pv;
|
||||
bool found;
|
||||
|
||||
wl = (PageValue *)
|
||||
hash_search(SecondBufferHash, pk, HASH_ENTER, &found);
|
||||
|
||||
if (!found)
|
||||
{
|
||||
printf("not found \n");
|
||||
}
|
||||
|
||||
return pv;
|
||||
}
|
||||
|
||||
void CleanUpSecondBuffer(const PageKey *pk, uint32 hashcode)
|
||||
{
|
||||
hash_search_with_hash_value(SecondBufferHash,
|
||||
(void *)pk,
|
||||
hashcode,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
}
|
||||
|
||||
PageValue *
|
||||
FindSecondBufferInTable(const PageKey *pk)
|
||||
{
|
||||
|
||||
PageValue *pv;
|
||||
bool found;
|
||||
pv = (PageValue *)
|
||||
hash_search(SecondBufferHash, pk, HASH_FIND, &found);
|
||||
|
||||
return pv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
for fs meta
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "pg_trace.h"
|
||||
#include "pgstat.h"
|
||||
#include "postmaster/bgwriter.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/fd.h"
|
||||
@ -44,7 +45,7 @@
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/guc.h"
|
||||
#include "utils/hfs.h"
|
||||
// #include "utils/hfs.h"
|
||||
#include "storage/he3db_logindex.h"
|
||||
|
||||
/*
|
||||
@ -93,17 +94,14 @@ typedef struct _MdfdVec
|
||||
|
||||
static MemoryContext MdCxt; /* context for all MdfdVec objects */
|
||||
|
||||
|
||||
/* Populate a file tag describing an md.c segment file. */
|
||||
#define INIT_MD_FILETAG(a,xx_rnode,xx_forknum,xx_segno) \
|
||||
( \
|
||||
#define INIT_MD_FILETAG(a, xx_rnode, xx_forknum, xx_segno) \
|
||||
( \
|
||||
memset(&(a), 0, sizeof(FileTag)), \
|
||||
(a).handler = SYNC_HANDLER_MD, \
|
||||
(a).rnode = (xx_rnode), \
|
||||
(a).forknum = (xx_forknum), \
|
||||
(a).segno = (xx_segno) \
|
||||
)
|
||||
|
||||
(a).segno = (xx_segno))
|
||||
|
||||
/*** behavior for mdopen & _mdfd_getseg ***/
|
||||
/* ereport if segment not present */
|
||||
@ -123,7 +121,6 @@ static MemoryContext MdCxt; /* context for all MdfdVec objects */
|
||||
*/
|
||||
#define EXTENSION_DONT_CHECK_SIZE (1 << 4)
|
||||
|
||||
|
||||
/* local routines */
|
||||
static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum,
|
||||
bool isRedo);
|
||||
@ -146,12 +143,10 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
|
||||
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
|
||||
MdfdVec *seg);
|
||||
|
||||
|
||||
/*
|
||||
* mdinit() -- Initialize private state for magnetic disk storage manager.
|
||||
*/
|
||||
void
|
||||
mdinit(void)
|
||||
void mdinit(void)
|
||||
{
|
||||
MdCxt = AllocSetContextCreate(TopMemoryContext,
|
||||
"MdSmgr",
|
||||
@ -163,8 +158,7 @@ mdinit(void)
|
||||
*
|
||||
* Note: this will return true for lingering files, with pending deletions
|
||||
*/
|
||||
bool
|
||||
mdexists(SMgrRelation reln, ForkNumber forkNum)
|
||||
bool mdexists(SMgrRelation reln, ForkNumber forkNum)
|
||||
{
|
||||
/*
|
||||
* Close it first, to ensure that we notice if the fork has been unlinked
|
||||
@ -180,8 +174,7 @@ mdexists(SMgrRelation reln, ForkNumber forkNum)
|
||||
*
|
||||
* If isRedo is true, it's okay for the relation to exist already.
|
||||
*/
|
||||
void
|
||||
mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
void mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
MdfdVec *mdfd;
|
||||
char *path;
|
||||
@ -280,8 +273,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
* Note: any failure should be reported as WARNING not ERROR, because
|
||||
* we are usually not in a transaction anymore when this is called.
|
||||
*/
|
||||
void
|
||||
mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
void mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
/* Now do the per-fork work */
|
||||
if (forkNum == InvalidForkNumber)
|
||||
@ -306,7 +298,7 @@ do_truncate(const char *path)
|
||||
* He3DB: He3FS replace OS FS
|
||||
* only propeller instance can release disk space
|
||||
*/
|
||||
//ret = pg_truncate(path, 0);
|
||||
// ret = pg_truncate(path, 0);
|
||||
if (push_standby)
|
||||
{
|
||||
ret = pg_truncate(path, 0);
|
||||
@ -348,7 +340,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
ret = do_truncate(path);
|
||||
|
||||
/* Forget any pending sync requests for the first segment */
|
||||
register_forget_request(rnode, forkNum, 0 /* first seg */ );
|
||||
register_forget_request(rnode, forkNum, 0 /* first seg */);
|
||||
}
|
||||
else
|
||||
ret = 0;
|
||||
@ -356,7 +348,8 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
/* Next unlink the file, unless it was already found to be missing */
|
||||
if (ret == 0 || errno != ENOENT)
|
||||
{
|
||||
if (push_standby == true || RelFileNodeBackendIsTemp(rnode)) {
|
||||
if (push_standby == true || RelFileNodeBackendIsTemp(rnode))
|
||||
{
|
||||
ret = unlink(path);
|
||||
}
|
||||
if (ret < 0 && errno != ENOENT)
|
||||
@ -371,7 +364,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
ret = do_truncate(path);
|
||||
|
||||
/* Register request to unlink first segment later */
|
||||
register_unlink_segment(rnode, forkNum, 0 /* first seg */ );
|
||||
register_unlink_segment(rnode, forkNum, 0 /* first seg */);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -379,7 +372,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
*/
|
||||
if (ret >= 0)
|
||||
{
|
||||
char *segpath = (char *) palloc(strlen(path) + 12);
|
||||
char *segpath = (char *)palloc(strlen(path) + 12);
|
||||
BlockNumber segno;
|
||||
|
||||
/*
|
||||
@ -405,7 +398,8 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
*/
|
||||
register_forget_request(rnode, forkNum, segno);
|
||||
}
|
||||
if (push_standby == true || RelFileNodeBackendIsTemp(rnode)) {
|
||||
if (push_standby == true || RelFileNodeBackendIsTemp(rnode))
|
||||
{
|
||||
if (unlink(segpath) < 0)
|
||||
{
|
||||
/* ENOENT is expected after the last segment... */
|
||||
@ -415,7 +409,9 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
errmsg("could not remove file \"%s\": %m", segpath)));
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -434,8 +430,7 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo)
|
||||
* EOF). Note that we assume writing a block beyond current EOF
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
void
|
||||
mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
off_t seekpos;
|
||||
@ -462,9 +457,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
Assert(seekpos < (off_t)BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
|
||||
{
|
||||
@ -486,7 +481,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
if (!skipFsync && !SmgrIsTemp(reln))
|
||||
register_dirty_segment(reln, forknum, v);
|
||||
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber)RELSEG_SIZE));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -535,7 +530,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
|
||||
mdfd->mdfd_vfd = fd;
|
||||
mdfd->mdfd_segno = 0;
|
||||
|
||||
Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
return mdfd;
|
||||
}
|
||||
@ -543,8 +538,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
|
||||
/*
|
||||
* mdopen() -- Initialize newly-opened relation.
|
||||
*/
|
||||
void
|
||||
mdopen(SMgrRelation reln)
|
||||
void mdopen(SMgrRelation reln)
|
||||
{
|
||||
/* mark it not open */
|
||||
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||
@ -554,8 +548,7 @@ mdopen(SMgrRelation reln)
|
||||
/*
|
||||
* mdclose() -- Close the specified relation, if it isn't closed already.
|
||||
*/
|
||||
void
|
||||
mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||
void mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
int nopensegs = reln->md_num_open_segs[forknum];
|
||||
|
||||
@ -577,8 +570,7 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
|
||||
/*
|
||||
* mdprefetch() -- Initiate asynchronous read of the specified block of a relation
|
||||
*/
|
||||
bool
|
||||
mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
{
|
||||
#ifdef USE_PREFETCH
|
||||
off_t seekpos;
|
||||
@ -589,11 +581,11 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
if (v == NULL)
|
||||
return false;
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
Assert(seekpos < (off_t)BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
|
||||
(void)FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
|
||||
#endif /* USE_PREFETCH */
|
||||
|
||||
return true;
|
||||
@ -605,8 +597,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
* This accepts a range of blocks because flushing several pages at once is
|
||||
* considerably more efficient than doing so individually.
|
||||
*/
|
||||
void
|
||||
mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||
void mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks)
|
||||
{
|
||||
/*
|
||||
@ -621,7 +612,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||
int segnum_start,
|
||||
segnum_end;
|
||||
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */,
|
||||
EXTENSION_RETURN_NULL);
|
||||
|
||||
/*
|
||||
@ -637,14 +628,14 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||
/* compute number of desired writes within the current segment */
|
||||
segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
|
||||
if (segnum_start != segnum_end)
|
||||
nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
nflush = RELSEG_SIZE - (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
Assert(nflush >= 1);
|
||||
Assert(nflush <= nblocks);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
|
||||
FileWriteback(v->mdfd_vfd, seekpos, (off_t)BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
|
||||
|
||||
nblocks -= nflush;
|
||||
blocknum += nflush;
|
||||
@ -654,8 +645,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
|
||||
/*
|
||||
* mdread() -- Read the specified block from a relation.
|
||||
*/
|
||||
void
|
||||
mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer)
|
||||
{
|
||||
off_t seekpos;
|
||||
@ -672,18 +662,29 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, false,
|
||||
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
segno = (uint32_t) blocknum /((BlockNumber) RELSEG_SIZE);
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
segno = (uint32_t)blocknum / ((BlockNumber)RELSEG_SIZE);
|
||||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
Assert(seekpos < (off_t)BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
//TODO read page from disk
|
||||
// TODO read page from disk
|
||||
if (!(InitdbSingle || IsBootstrapProcessingMode() == true))
|
||||
nbytes = MasterFileRead(buffer,reln->smgr_rnode.node.dbNode,reln->smgr_rnode.node.relNode,forknum,blocknum);
|
||||
nbytes = MasterFileRead(buffer, reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode, forknum, blocknum);
|
||||
|
||||
if (nbytes == 0)
|
||||
{
|
||||
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
|
||||
if (!(InitdbSingle || IsBootstrapProcessingMode() == true))
|
||||
{
|
||||
PageKey pageKey;
|
||||
pageKey.relfileNode.dbNode = reln->smgr_rnode.node.dbNode;
|
||||
pageKey.relfileNode.relNode = reln->smgr_rnode.node.relNode;
|
||||
pageKey.forkNo = forknum;
|
||||
pageKey.blkNo = blocknum;
|
||||
pageKey.pageLsn = PageGetLSN(buffer);
|
||||
pageKey.replyLsn = 0;
|
||||
ReceivePageFromDataBuffer(&pageKey, (uint8_t *)buffer);
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
|
||||
@ -735,8 +736,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
* 1)return read bytes
|
||||
* 2)add parameter to control pageXlog read or only page read
|
||||
*/
|
||||
int
|
||||
he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
int he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char **buffer, XLogRecPtr lsn)
|
||||
{
|
||||
off_t seekpos;
|
||||
@ -755,62 +755,54 @@ he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknu
|
||||
// reln->smgr_rnode.node.relNode,
|
||||
// reln->smgr_rnode.backend);
|
||||
|
||||
OriginDPageKey odpk;
|
||||
PageKey pageKey;
|
||||
Bufrd bufrd;
|
||||
bufrd.count = 0;
|
||||
|
||||
Bufrd *bufrd = NULL;
|
||||
int count = 0;
|
||||
|
||||
|
||||
if (!push_standby)
|
||||
{
|
||||
bufrd = (Bufrd *)malloc(sizeof(Bufrd));
|
||||
bufrd->count = 0;
|
||||
bufrd->cap = 0;
|
||||
bufrd->buf = NULL;
|
||||
|
||||
pageKey.relfileNode.dbNode = pageTag.rnode.dbNode;
|
||||
pageKey.relfileNode.relNode = pageTag.rnode.relNode;
|
||||
pageKey.forkNo = pageTag.forkNum;
|
||||
pageKey.forkNo = (uint32)pageTag.forkNum;
|
||||
pageKey.blkNo = pageTag.blockNum;
|
||||
pageKey.pageLsn = 0;
|
||||
pageKey.replyLsn = lsn;
|
||||
|
||||
bufrd = MoveOnePageToMemory(pageKey);
|
||||
if (bufrd.count > 0)
|
||||
{
|
||||
nbytes = bufrd.count;
|
||||
*buffer = (uint8_t *)malloc(bufrd.count);
|
||||
memcpy(*buffer, bufrd.buf,bufrd.count);
|
||||
free_dataRead(bufrd.buf, bufrd.count, bufrd.cap);
|
||||
if (push_standby)
|
||||
{
|
||||
Assert(bufrd.count == BLCKSZ);
|
||||
pageKey.pageLsn = PageGetLSN(*buffer);
|
||||
LsnNode *head = GetLogIndexByPage(&pageTag, pageKey.pageLsn, pageKey.replyLsn);
|
||||
if (head->next != NULL)
|
||||
{
|
||||
TimeLineID tli;
|
||||
GetXLogReplayRecPtr(&tli);
|
||||
Bufrd result;
|
||||
result = ReadWalsByPage(pageKey.relfileNode.dbNode, pageKey.relfileNode.relNode,
|
||||
pageKey.forkNo, pageKey.blkNo, tli, head);
|
||||
Assert(result.count != 0);
|
||||
nbytes += result.count;
|
||||
*buffer = (uint8_t *)realloc(*buffer, BLCKSZ + result.count);
|
||||
strcat(*buffer,result.buf);
|
||||
free_dataRead(result.buf, result.count, result.cap);
|
||||
odpk.pk = pageKey;
|
||||
odpk.opration = (int)EVICT;
|
||||
|
||||
GetPageFromCurrentNode(pageKey, bufrd);
|
||||
count = bufrd->count;
|
||||
}
|
||||
//TODO free result
|
||||
FreeLsnNode(head);
|
||||
}
|
||||
// *buffer = bufrd.buf;
|
||||
return nbytes;
|
||||
|
||||
if (count > 0)
|
||||
{
|
||||
*buffer = bufrd->buf;
|
||||
free(bufrd);
|
||||
AddOneItemToDPArray(odpk);
|
||||
return count;
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO 如果本地盘不存在,则调用标准接口读取page,再调用tikv的借口获取范围的wal
|
||||
uint8_t *buf = (uint8_t *)malloc(BLCKSZ);
|
||||
*buffer = (uint8_t *)malloc(BLCKSZ);
|
||||
// TODO 如果本地盘不存在,则调用标准接口读取page,再调用tikv的借口获取范围的wal
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, false,
|
||||
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
segno = (uint32_t)blocknum /((BlockNumber) RELSEG_SIZE);
|
||||
Assert(seekpos < (off_t)BLCKSZ * RELSEG_SIZE);
|
||||
segno = (uint32_t)blocknum / ((BlockNumber)RELSEG_SIZE);
|
||||
|
||||
|
||||
nbytes = FileRead(v->mdfd_vfd, buf, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
|
||||
nbytes = FileRead(v->mdfd_vfd, *buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
|
||||
if (nbytes < BLCKSZ)
|
||||
{
|
||||
if (nbytes < 0)
|
||||
@ -818,6 +810,9 @@ he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknu
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not read block %u in file \"%s\": %m",
|
||||
blocknum, FilePathName(v->mdfd_vfd))));
|
||||
if (he3mirror && nbytes == 0)
|
||||
MemSet(*buffer, 0, BLCKSZ);
|
||||
|
||||
ereport(PANIC,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("could not read block %u in file \"%s\": read only %d of %d bytes",
|
||||
@ -825,8 +820,13 @@ he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknu
|
||||
nbytes, BLCKSZ)));
|
||||
}
|
||||
|
||||
if (push_standby)
|
||||
{
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
pageKey.pageLsn = PageGetLSN(*buffer);
|
||||
|
||||
pageKey.pageLsn = PageGetLSN(buf);;
|
||||
pageKey.replyLsn = lsn;
|
||||
|
||||
LsnNode *head = GetLogIndexByPage(&pageTag, pageKey.pageLsn, pageKey.replyLsn);
|
||||
@ -835,19 +835,31 @@ he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknu
|
||||
TimeLineID tli;
|
||||
GetXLogReplayRecPtr(&tli);
|
||||
Bufrd result;
|
||||
result = GetWalsFromDisk(pageKey);
|
||||
if (result.count == 0) {
|
||||
result = ReadWalsByPage(pageKey.relfileNode.dbNode,pageKey.relfileNode.relNode,
|
||||
pageKey.forkNo,pageKey.blkNo, tli, head);
|
||||
WalLdPageKey wlpk;
|
||||
wlpk.sk.dbid = pageKey.relfileNode.dbNode;
|
||||
wlpk.sk.relid = pageKey.relfileNode.relNode;
|
||||
wlpk.pageLsn = pageKey.pageLsn;
|
||||
wlpk.partition = 0;
|
||||
result.count = 0;
|
||||
result = GetWalFromLocalBuffer(&wlpk);
|
||||
|
||||
if (result.count == 0)
|
||||
{
|
||||
free(result.buf);
|
||||
result = ReadWalsByPage(pageKey.relfileNode.dbNode, pageKey.relfileNode.relNode,
|
||||
pageKey.forkNo, pageKey.blkNo, tli, head);
|
||||
}
|
||||
Assert(result.count != 0);
|
||||
nbytes += result.count;
|
||||
buf = (uint8_t *)realloc(buf, BLCKSZ + result.count);
|
||||
strcat(buf,result.buf);
|
||||
//TODO free result
|
||||
*buffer = (uint8_t *)realloc(*buffer, BLCKSZ + result.count);
|
||||
memcpy((*buffer) + BLCKSZ, result.buf, result.count);
|
||||
// TODO free result
|
||||
free_dataRead(result.buf, result.count, result.cap);
|
||||
}
|
||||
*buffer = buf;
|
||||
else
|
||||
{
|
||||
ReceivePageFromDataBuffer(&pageKey, *buffer);
|
||||
}
|
||||
FreeLsnNode(head);
|
||||
return nbytes;
|
||||
}
|
||||
@ -913,17 +925,16 @@ he3db_mdread_pagexlog(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknu
|
||||
* 1)return read bytes
|
||||
* 2)add parameter to control pageXlog read or only page read
|
||||
*/
|
||||
int
|
||||
he3db_mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
int he3db_mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char **buffer, bool pagexlog, XLogRecPtr lsn)
|
||||
{
|
||||
// if (likely(pagexlog))
|
||||
// {
|
||||
// if (likely(pagexlog))
|
||||
// {
|
||||
return he3db_mdread_pagexlog(reln, forknum, blocknum, buffer, lsn);
|
||||
// }
|
||||
// }
|
||||
|
||||
// mdread(reln, forknum, blocknum, *buffer, lsn);
|
||||
// return 0;
|
||||
// mdread(reln, forknum, blocknum, *buffer, lsn);
|
||||
// return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -933,8 +944,7 @@ he3db_mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
* relation (ie, those before the current EOF). To extend a relation,
|
||||
* use mdextend().
|
||||
*/
|
||||
void
|
||||
mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
off_t seekpos;
|
||||
@ -955,9 +965,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
|
||||
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
|
||||
|
||||
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
|
||||
seekpos = (off_t)BLCKSZ * (blocknum % ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
Assert(seekpos < (off_t)BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
|
||||
|
||||
@ -1029,10 +1039,10 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
for (;;)
|
||||
{
|
||||
nblocks = _mdnblocks(reln, forknum, v);
|
||||
if (nblocks > ((BlockNumber) RELSEG_SIZE))
|
||||
if (nblocks > ((BlockNumber)RELSEG_SIZE))
|
||||
elog(FATAL, "segment too big");
|
||||
if (nblocks < ((BlockNumber) RELSEG_SIZE))
|
||||
return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
|
||||
if (nblocks < ((BlockNumber)RELSEG_SIZE))
|
||||
return (segno * ((BlockNumber)RELSEG_SIZE)) + nblocks;
|
||||
|
||||
/*
|
||||
* If segment is exactly RELSEG_SIZE, advance to next one.
|
||||
@ -1048,15 +1058,14 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
*/
|
||||
v = _mdfd_openseg(reln, forknum, segno, 0);
|
||||
if (v == NULL)
|
||||
return segno * ((BlockNumber) RELSEG_SIZE);
|
||||
return segno * ((BlockNumber)RELSEG_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* mdtruncate() -- Truncate relation to specified number of blocks.
|
||||
*/
|
||||
void
|
||||
mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
{
|
||||
BlockNumber curnblk;
|
||||
BlockNumber priorblocks;
|
||||
@ -1114,7 +1123,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
FileClose(v->mdfd_vfd);
|
||||
_fdvec_resize(reln, forknum, curopensegs - 1);
|
||||
}
|
||||
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
|
||||
else if (priorblocks + ((BlockNumber)RELSEG_SIZE) > nblocks)
|
||||
{
|
||||
/*
|
||||
* This is the last segment we want to keep. Truncate the file to
|
||||
@ -1125,7 +1134,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
*/
|
||||
BlockNumber lastsegblocks = nblocks - priorblocks;
|
||||
|
||||
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
|
||||
if (FileTruncate(v->mdfd_vfd, (off_t)lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not truncate file \"%s\" to %u blocks: %m",
|
||||
@ -1161,8 +1170,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
* crash before the next checkpoint syncs the newly-inactive segment, that
|
||||
* segment may survive recovery, reintroducing unwanted data into the table.
|
||||
*/
|
||||
void
|
||||
mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
int segno;
|
||||
int min_inactive_seg;
|
||||
@ -1224,7 +1232,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
/* Temp relations should never be fsync'd */
|
||||
Assert(!SmgrIsTemp(reln));
|
||||
|
||||
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
|
||||
if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */))
|
||||
{
|
||||
ereport(DEBUG1,
|
||||
(errmsg_internal("could not forward fsync request because request queue is full")));
|
||||
@ -1251,7 +1259,7 @@ register_unlink_segment(RelFileNodeBackend rnode, ForkNumber forknum,
|
||||
/* Should never be used with temp relations */
|
||||
Assert(!RelFileNodeBackendIsTemp(rnode));
|
||||
|
||||
RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */ );
|
||||
RegisterSyncRequest(&tag, SYNC_UNLINK_REQUEST, true /* retryOnError */);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1265,14 +1273,13 @@ register_forget_request(RelFileNodeBackend rnode, ForkNumber forknum,
|
||||
|
||||
INIT_MD_FILETAG(tag, rnode.node, forknum, segno);
|
||||
|
||||
RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */ );
|
||||
RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true /* retryOnError */);
|
||||
}
|
||||
|
||||
/*
|
||||
* ForgetDatabaseSyncRequests -- forget any fsyncs and unlinks for a DB
|
||||
*/
|
||||
void
|
||||
ForgetDatabaseSyncRequests(Oid dbid)
|
||||
void ForgetDatabaseSyncRequests(Oid dbid)
|
||||
{
|
||||
FileTag tag;
|
||||
RelFileNode rnode;
|
||||
@ -1283,14 +1290,13 @@ ForgetDatabaseSyncRequests(Oid dbid)
|
||||
|
||||
INIT_MD_FILETAG(tag, rnode, InvalidForkNumber, InvalidBlockNumber);
|
||||
|
||||
RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
|
||||
RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */);
|
||||
}
|
||||
|
||||
/*
|
||||
* DropRelationFiles -- drop files of all given relations
|
||||
*/
|
||||
void
|
||||
DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo)
|
||||
void DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo)
|
||||
{
|
||||
SMgrRelation *srels;
|
||||
int i;
|
||||
@ -1317,7 +1323,6 @@ DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo)
|
||||
pfree(srels);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _fdvec_resize() -- Resize the fork's open segments array
|
||||
*/
|
||||
@ -1413,7 +1418,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
|
||||
v->mdfd_vfd = fd;
|
||||
v->mdfd_segno = segno;
|
||||
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
|
||||
Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber)RELSEG_SIZE));
|
||||
|
||||
/* all done */
|
||||
return v;
|
||||
@ -1439,7 +1444,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
Assert(behavior &
|
||||
(EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL));
|
||||
|
||||
targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
|
||||
targetseg = blkno / ((BlockNumber)RELSEG_SIZE);
|
||||
|
||||
/* if an existing and opened segment, we're done */
|
||||
if (targetseg < reln->md_num_open_segs[forknum])
|
||||
@ -1472,7 +1477,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
Assert(nextsegno == v->mdfd_segno + 1);
|
||||
|
||||
if (nblocks > ((BlockNumber) RELSEG_SIZE))
|
||||
if (nblocks > ((BlockNumber)RELSEG_SIZE))
|
||||
elog(FATAL, "segment too big");
|
||||
|
||||
if ((behavior & EXTENSION_CREATE) ||
|
||||
@ -1492,19 +1497,19 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
* matters if in recovery, or if the caller is extending the
|
||||
* relation discontiguously, but that can happen in hash indexes.)
|
||||
*/
|
||||
if (nblocks < ((BlockNumber) RELSEG_SIZE))
|
||||
if (nblocks < ((BlockNumber)RELSEG_SIZE))
|
||||
{
|
||||
char *zerobuf = palloc0(BLCKSZ);
|
||||
|
||||
mdextend(reln, forknum,
|
||||
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
|
||||
nextsegno * ((BlockNumber)RELSEG_SIZE) - 1,
|
||||
zerobuf, skipFsync);
|
||||
pfree(zerobuf);
|
||||
}
|
||||
flags = O_CREAT;
|
||||
}
|
||||
else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
|
||||
nblocks < ((BlockNumber) RELSEG_SIZE))
|
||||
nblocks < ((BlockNumber)RELSEG_SIZE))
|
||||
{
|
||||
/*
|
||||
* When not extending (or explicitly including truncated
|
||||
@ -1564,7 +1569,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
errmsg("could not seek to end of file \"%s\": %m",
|
||||
FilePathName(seg->mdfd_vfd))));
|
||||
/* note that this calculation will ignore any partial block at EOF */
|
||||
return (BlockNumber) (len / BLCKSZ);
|
||||
return (BlockNumber)(len / BLCKSZ);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1573,8 +1578,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
||||
*
|
||||
* Return 0 on success, -1 on failure, with errno set.
|
||||
*/
|
||||
int
|
||||
mdsyncfiletag(const FileTag *ftag, char *path)
|
||||
int mdsyncfiletag(const FileTag *ftag, char *path)
|
||||
{
|
||||
SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId);
|
||||
File file;
|
||||
@ -1622,8 +1626,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
|
||||
*
|
||||
* Return 0 on success, -1 on failure, with errno set.
|
||||
*/
|
||||
int
|
||||
mdunlinkfiletag(const FileTag *ftag, char *path)
|
||||
int mdunlinkfiletag(const FileTag *ftag, char *path)
|
||||
{
|
||||
char *p;
|
||||
|
||||
@ -1641,8 +1644,7 @@ mdunlinkfiletag(const FileTag *ftag, char *path)
|
||||
* a SYNC_FILTER_REQUEST request. This will be called for all pending
|
||||
* requests to find out whether to forget them.
|
||||
*/
|
||||
bool
|
||||
mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
|
||||
bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
|
||||
{
|
||||
/*
|
||||
* For now we only use filter requests as a way to drop all scheduled
|
||||
|
@ -24,7 +24,9 @@
|
||||
#include "storage/md.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "storage/filecache.h"
|
||||
#include "utils/hfs.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
//#include "utils/hfs.h"
|
||||
#include "utils/backend_status.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/inval.h"
|
||||
#include "utils/guc.h"
|
||||
@ -55,7 +57,7 @@ typedef struct f_smgr
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
int (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
|
||||
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer);
|
||||
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
@ -390,10 +392,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
|
||||
int i = 0;
|
||||
RelFileNodeBackend *rnodes;
|
||||
ForkNumber forknum;
|
||||
OriginDPageKey odpk;
|
||||
PageKey pk;
|
||||
|
||||
|
||||
if (nrels == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Get rid of any remaining buffers for the relations. bufmgr will just
|
||||
* drop them without bothering to write the contents.
|
||||
@ -449,7 +453,14 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
|
||||
smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo);
|
||||
|
||||
//remove unused pages and related wals in localdisk cache.
|
||||
RemoveBufferFromLocal(rnodes[i].node.dbNode, rnodes[i].node.relNode, MAX_FORKNUM, 0);
|
||||
// RemoveBufferFromLocal(rnodes[i].node.dbNode, rnodes[i].node.relNode, MAX_FORKNUM, 0);
|
||||
pk.relfileNode.dbNode = rnodes[i].node.dbNode;
|
||||
pk.relfileNode.relNode = rnodes[i].node.relNode;
|
||||
pk.forkNo = MAX_FORKNUM;
|
||||
odpk.pk = pk;
|
||||
odpk.opration = DROP;
|
||||
AddOneItemToDPArray(odpk);
|
||||
|
||||
}
|
||||
|
||||
pfree(rnodes);
|
||||
@ -473,11 +484,11 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
// return;
|
||||
// }
|
||||
|
||||
//if ((push_standby != true && EnableHotStandby != true) || IsBootstrapProcessingMode() || InitdbSingle) {
|
||||
if ((push_standby != true && EnableHotStandby != true) || IsBootstrapProcessingMode() || InitdbSingle || he3mirror) {
|
||||
smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
|
||||
buffer, skipFsync);
|
||||
// elog(LOG,"smgrextend reln %d,flk %d,blk %d",reln->smgr_rnode.node.relNode,forknum,blocknum);
|
||||
//}
|
||||
}
|
||||
|
||||
/*
|
||||
* Normally we expect this to increase nblocks by one, but if the cached
|
||||
@ -562,15 +573,15 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
PageKey pageKey;
|
||||
pageKey.relfileNode.dbNode = reln->smgr_rnode.node.dbNode;
|
||||
pageKey.relfileNode.relNode = reln->smgr_rnode.node.relNode;
|
||||
pageKey.relfileNode.spcNode = reln->smgr_rnode.node.spcNode;
|
||||
|
||||
pageKey.blkNo = blocknum;
|
||||
pageKey.forkNo = forknum;
|
||||
pageKey.pageLsn = lsn;
|
||||
|
||||
EvictOnePageOutOfMemory(pageKey, buffer);
|
||||
if (push_standby) {
|
||||
if (push_standby || he3mirror) {
|
||||
smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum, buffer, skipFsync);
|
||||
} else {
|
||||
ReceivePageFromDataBuffer(&pageKey, (uint8_t *) buffer);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -665,6 +676,8 @@ void
|
||||
smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
|
||||
{
|
||||
int i;
|
||||
PageKey pk;
|
||||
OriginDPageKey odpk;
|
||||
|
||||
/*
|
||||
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
|
||||
@ -693,7 +706,17 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
|
||||
smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
|
||||
|
||||
//remove unused pages and related wals in localdisk cache.
|
||||
RemoveBufferFromLocal(reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode, forknum[i], nblocks[i]);
|
||||
// RemoveBufferFromLocal(reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode, forknum[i], nblocks[i]);
|
||||
if (IsBootstrapProcessingMode() != true && InitdbSingle != true)
|
||||
{
|
||||
pk.relfileNode.dbNode = reln->smgr_rnode.node.dbNode;
|
||||
pk.relfileNode.relNode = reln->smgr_rnode.node.relNode;
|
||||
pk.forkNo = forknum[i];
|
||||
pk.blkNo = nblocks[i];
|
||||
odpk.pk = pk;
|
||||
odpk.opration = (int)TRUNCATE;
|
||||
AddOneItemToDPArray(odpk);
|
||||
}
|
||||
/*
|
||||
* We might as well update the local smgr_cached_nblocks values. The
|
||||
* smgr cache inval message that this function sent will cause other
|
||||
@ -719,7 +742,8 @@ void
|
||||
smgrtruncatelsn(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks,XLogRecPtr lsn)
|
||||
{
|
||||
int i;
|
||||
|
||||
PageKey pk;
|
||||
OriginDPageKey odpk;
|
||||
/*
|
||||
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
|
||||
* just drop them without bothering to write the contents.
|
||||
@ -746,20 +770,26 @@ smgrtruncatelsn(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber
|
||||
reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
|
||||
if(!SmgrIsTemp(reln)) {
|
||||
if (false == flag) {
|
||||
XLogRecPtr pushLsn;
|
||||
XLogRecPtr minApplyLsn;
|
||||
do {
|
||||
sleep(1);
|
||||
pushLsn = QueryPushLsn();
|
||||
printf("====pushlsn=%lx==lsn==%lx==\n",pushLsn,lsn);
|
||||
} while(pushLsn!=InvalidXLogRecPtr && pushLsn<lsn);
|
||||
minApplyLsn = He3DBQueryMinLsnFromAllStanby();
|
||||
printf("====pushlsn=%lx==lsn==%lx==\n",minApplyLsn,lsn);
|
||||
} while(minApplyLsn!=InvalidXLogRecPtr && minApplyLsn<lsn);
|
||||
flag = true;
|
||||
}
|
||||
}
|
||||
smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
|
||||
|
||||
//remove unused pages and related wals in localdisk cache.
|
||||
RemoveBufferFromLocal(reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode, forknum[i], nblocks[i]);
|
||||
|
||||
// RemoveBufferFromLocal(reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode, forknum[i], nblocks[i]);
|
||||
pk.relfileNode.dbNode = reln->smgr_rnode.node.dbNode;
|
||||
pk.relfileNode.relNode = reln->smgr_rnode.node.relNode;
|
||||
pk.forkNo = forknum[i];
|
||||
pk.blkNo = nblocks[i];
|
||||
odpk.pk = pk;
|
||||
odpk.opration = TRUNCATE;
|
||||
AddOneItemToDPArray(odpk);
|
||||
/*
|
||||
* We might as well update the local smgr_cached_nblocks values. The
|
||||
* smgr cache inval message that this function sent will cause other
|
||||
|
@ -86,8 +86,15 @@
|
||||
* global variables
|
||||
* ----------------
|
||||
*/
|
||||
bool isPreCache = false;
|
||||
bool isPreCacheTable = false;
|
||||
bool isPreCacheIndex = false;
|
||||
bool isPreCacheIndexDone = false;
|
||||
bool needPreCacheEscape = false;
|
||||
bool needUnpreCacheEscape = false;
|
||||
bool isPreCacheAction = true;
|
||||
Oid preCacheNodeOid = 0;
|
||||
uint16 *preCacheNodesCountPtr = NULL;
|
||||
Oid *preCacheNodesPtr = NULL;
|
||||
const char *debug_query_string; /* client-supplied query string */
|
||||
|
||||
/* Note: whereToSendOutput is initialized for the bootstrap/standalone case */
|
||||
@ -1213,9 +1220,23 @@ exec_simple_query(const char *query_string)
|
||||
*/
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
if (isPreCache)
|
||||
if (isPreCacheTable || isPreCacheIndex)
|
||||
{
|
||||
if (isPreCacheAction)
|
||||
{
|
||||
needPreCacheEscape = true;
|
||||
needUnpreCacheEscape = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
needPreCacheEscape = false;
|
||||
needUnpreCacheEscape = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
needPreCacheEscape = false;
|
||||
needUnpreCacheEscape = false;
|
||||
}
|
||||
/*
|
||||
* Run the portal to completion, and then drop it (and the receiver).
|
||||
@ -1228,9 +1249,10 @@ exec_simple_query(const char *query_string)
|
||||
receiver,
|
||||
&qc);
|
||||
|
||||
if (isPreCache)
|
||||
if (isPreCacheTable || isPreCacheIndex)
|
||||
{
|
||||
needPreCacheEscape = false;
|
||||
needUnpreCacheEscape = false;
|
||||
}
|
||||
|
||||
receiver->rDestroy(receiver);
|
||||
@ -1329,6 +1351,55 @@ exec_simple_query(const char *query_string)
|
||||
debug_query_string = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
he3_exec_simple_query(const char *query_string)
|
||||
{
|
||||
if (strstr(query_string, "precache table ") != NULL && query_string - strstr(query_string, "precache table ") == 0)
|
||||
{
|
||||
isPreCacheTable = true;
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheAction = true;
|
||||
exec_simple_query(query_string + strlen("precache table "));
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheTable = false;
|
||||
}
|
||||
else if (strstr(query_string, "precache index ") != NULL && query_string - strstr(query_string, "precache index ") == 0)
|
||||
{
|
||||
isPreCacheIndex = true;
|
||||
isPreCacheIndexDone = false;
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheAction = true;
|
||||
exec_simple_query(query_string + strlen("precache index "));
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheIndexDone = false;
|
||||
isPreCacheIndex = false;
|
||||
}
|
||||
else if (strstr(query_string, "unprecache table ") != NULL && query_string - strstr(query_string, "unprecache table ") == 0)
|
||||
{
|
||||
isPreCacheTable = true;
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheAction = false;
|
||||
exec_simple_query(query_string + strlen("unprecache table "));
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheTable = false;
|
||||
}
|
||||
else if (strstr(query_string, "unprecache index ") != NULL && query_string - strstr(query_string, "unprecache index ") == 0)
|
||||
{
|
||||
isPreCacheIndex = true;
|
||||
isPreCacheIndexDone = false;
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheAction = false;
|
||||
exec_simple_query(query_string + strlen("unprecache index "));
|
||||
preCacheNodeOid = 0;
|
||||
isPreCacheIndexDone = false;
|
||||
isPreCacheIndex = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
exec_simple_query(query_string);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* exec_parse_message
|
||||
*
|
||||
@ -4504,16 +4575,7 @@ PostgresMain(int argc, char *argv[], bool PrivateConn,
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strstr(query_string, "precache ") != NULL && query_string - strstr(query_string, "precache ") == 0)
|
||||
{
|
||||
isPreCache = true;
|
||||
exec_simple_query(query_string + strlen("precache "));
|
||||
isPreCache = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
exec_simple_query(query_string);
|
||||
}
|
||||
he3_exec_simple_query(query_string);
|
||||
}
|
||||
|
||||
send_ready_for_query = true;
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include "pg_trace.h"
|
||||
#include "pgstat.h"
|
||||
#include "port/atomics.h" /* for memory barriers */
|
||||
#include "replication/walsender.h"
|
||||
#include "replication/walsender_private.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/proc.h" /* for MyProc */
|
||||
#include "storage/sinvaladt.h"
|
||||
@ -1148,3 +1150,42 @@ pgstat_clip_activity(const char *raw_activity)
|
||||
|
||||
return activity;
|
||||
}
|
||||
|
||||
XLogRecPtr He3DBQueryMinLsnFromAllStanby()
|
||||
{
|
||||
int i;
|
||||
XLogRecPtr minApplyLsn;
|
||||
int procpid = -1;
|
||||
for (i = 0; i < NumBackendStatSlots; i++)
|
||||
{
|
||||
if (strcmp(BackendStatusArray[i].st_appname, "pgmirror") == 0)
|
||||
{
|
||||
procpid = BackendStatusArray[i].st_procpid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Assert(WalSndCtl != NULL);
|
||||
|
||||
for (i = 0; i < max_wal_senders; i++)
|
||||
{
|
||||
int pid;
|
||||
XLogRecPtr apply;
|
||||
WalSnd *walsnd = &WalSndCtl->walsnds[i];
|
||||
SpinLockAcquire(&walsnd->mutex);
|
||||
if (walsnd->pid == 0)
|
||||
{
|
||||
SpinLockRelease(&walsnd->mutex);
|
||||
continue;
|
||||
}
|
||||
pid = walsnd->pid;
|
||||
apply = walsnd->apply;
|
||||
SpinLockRelease(&walsnd->mutex);
|
||||
if (pid != procpid)
|
||||
{
|
||||
if (apply < minApplyLsn)
|
||||
minApplyLsn = apply;
|
||||
}
|
||||
}
|
||||
return minApplyLsn;
|
||||
|
||||
}
|
@ -251,6 +251,12 @@ pgstat_get_wait_activity(WaitEventActivity w)
|
||||
case WAIT_EVENT_PAGEFLUSH_MAIN:
|
||||
event_name = "PageFlushMain";
|
||||
break;
|
||||
case WAIT_EVENT_CLEAN_LOGINDEX_MAIN:
|
||||
event_name = "CleanLogindexMain";
|
||||
break;
|
||||
case WAIT_EVENT_SECONDBUFFER_MAIN:
|
||||
event_name = "SecondBufferMain";
|
||||
break;
|
||||
/* no default case, so that compiler will warn */
|
||||
}
|
||||
|
||||
|
12
src/backend/utils/cache/syscache.c
vendored
12
src/backend/utils/cache/syscache.c
vendored
@ -74,7 +74,6 @@
|
||||
#include "catalog/pg_ts_template.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "catalog/pg_user_mapping.h"
|
||||
#include "catalog/pg_hot_data.h"
|
||||
#include "lib/qunique.h"
|
||||
#include "utils/catcache.h"
|
||||
#include "utils/rel.h"
|
||||
@ -476,17 +475,6 @@ static const struct cachedesc cacheinfo[] = {
|
||||
},
|
||||
4
|
||||
},
|
||||
{HotDataRelationId, /* HOTDATADATNAMERELNAME */
|
||||
HotDataDatnameRelnameIndexId,
|
||||
2,
|
||||
{
|
||||
Anum_pg_hot_data_datname,
|
||||
Anum_pg_hot_data_relname,
|
||||
0,
|
||||
0
|
||||
},
|
||||
4
|
||||
},
|
||||
{IndexRelationId, /* INDEXRELID */
|
||||
IndexRelidIndexId,
|
||||
1,
|
||||
|
@ -74,6 +74,7 @@
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "postmaster/syslogger.h"
|
||||
#include "postmaster/walwriter.h"
|
||||
#include "postmaster/secondbuffer.h"
|
||||
#include "replication/logicallauncher.h"
|
||||
#include "replication/reorderbuffer.h"
|
||||
#include "replication/slot.h"
|
||||
@ -233,6 +234,7 @@ static bool check_recovery_target_name(char **newval, void **extra, GucSource so
|
||||
static void assign_recovery_target_name(const char *newval, void *extra);
|
||||
static bool check_recovery_target_lsn(char **newval, void **extra, GucSource source);
|
||||
static void assign_recovery_target_lsn(const char *newval, void *extra);
|
||||
static void assign_walsender_target_lsn(const char *newval, void *extra);
|
||||
static bool check_primary_slot_name(char **newval, void **extra, GucSource source);
|
||||
static bool check_default_with_oids(bool *newval, void **extra, GucSource source);
|
||||
|
||||
@ -606,6 +608,10 @@ char *pgstat_temp_directory;
|
||||
char *application_name;
|
||||
bool push_standby = false;
|
||||
bool he3_point_in_time_recovery;
|
||||
bool he3mirror = false;
|
||||
bool pgmirror = false;
|
||||
char *client_application_name = NULL;
|
||||
|
||||
|
||||
int tcp_keepalives_idle;
|
||||
int tcp_keepalives_interval;
|
||||
@ -650,6 +656,7 @@ static char *timezone_string;
|
||||
static char *log_timezone_string;
|
||||
static char *timezone_abbreviations_string;
|
||||
static char *data_directory;
|
||||
//static char *lmdb_directory;
|
||||
static char *session_authorization_string;
|
||||
static int max_function_args;
|
||||
static int max_index_keys;
|
||||
@ -666,6 +673,7 @@ static char *recovery_target_string;
|
||||
static char *recovery_target_xid_string;
|
||||
static char *recovery_target_name_string;
|
||||
static char *recovery_target_lsn_string;
|
||||
static char *walSendLsnStr;
|
||||
|
||||
|
||||
/* should be static, but commands/variable.c needs to get at this */
|
||||
@ -754,6 +762,8 @@ const char *const config_group_names[] =
|
||||
gettext_noop("Write-Ahead Log / Archive Recovery"),
|
||||
/* WAL_RECOVERY_TARGET */
|
||||
gettext_noop("Write-Ahead Log / Recovery Target"),
|
||||
/* WAL_SEND_LSN */
|
||||
gettext_noop("Write-Ahead Log / Wal Send Lsn"),
|
||||
/* REPLICATION_SENDING */
|
||||
gettext_noop("Replication / Sending Servers"),
|
||||
/* REPLICATION_PRIMARY */
|
||||
@ -2135,6 +2145,14 @@ static struct config_bool ConfigureNamesBool[] =
|
||||
false,
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
{
|
||||
{"he3mirror", PGC_SIGHUP, WAL_ARCHIVE_RECOVERY,
|
||||
gettext_noop("Sets he3db as replica if he3mirror is configured true."),
|
||||
},
|
||||
&he3mirror,
|
||||
false,
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
/* End-of-list marker */
|
||||
{
|
||||
@ -2357,6 +2375,17 @@ static struct config_int ConfigureNamesInt[] =
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"second_buffers", PGC_POSTMASTER, RESOURCES_MEM,
|
||||
gettext_noop("Sets the number of second buffers used by the server."),
|
||||
NULL,
|
||||
GUC_UNIT_BLOCKS
|
||||
},
|
||||
&SNBuffers,
|
||||
1024, 16, INT_MAX / 2,
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
|
||||
gettext_noop("Sets the maximum number of temporary buffers used by each session."),
|
||||
@ -3950,6 +3979,15 @@ static struct config_string ConfigureNamesString[] =
|
||||
"",
|
||||
check_recovery_target_lsn, assign_recovery_target_lsn, NULL
|
||||
},
|
||||
{
|
||||
{"wal_send_lsn", PGC_SIGHUP, WAL_SEND_LSN,
|
||||
gettext_noop("Sets the LSN of the wal send log location up to which mirror start"),
|
||||
NULL
|
||||
},
|
||||
&walSendLsnStr,
|
||||
"",
|
||||
check_recovery_target_lsn, assign_walsender_target_lsn, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"promote_trigger_file", PGC_SIGHUP, REPLICATION_STANDBY,
|
||||
@ -3972,6 +4010,17 @@ static struct config_string ConfigureNamesString[] =
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"he3_meta_conninfo", PGC_SIGHUP, CONN_AUTH_AUTH,
|
||||
gettext_noop("Sets the connection string to be used to connect to the meta server."),
|
||||
NULL,
|
||||
GUC_SUPERUSER_ONLY
|
||||
},
|
||||
&he3_meta_conninfo,
|
||||
"",
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
|
||||
gettext_noop("Sets the name of the replication slot to use on the sending server."),
|
||||
@ -4377,6 +4426,30 @@ static struct config_string ConfigureNamesString[] =
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
//TODO lmdb
|
||||
{"lmdb_page_directory", PGC_POSTMASTER, FILE_LOCATIONS,
|
||||
gettext_noop("Sets the lmdb page directory."),
|
||||
NULL,
|
||||
GUC_SUPERUSER_ONLY | GUC_DISALLOW_IN_AUTO_FILE
|
||||
},
|
||||
&lmdb_page_directory,
|
||||
"/tmp/pagedb",
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
//TODO lmdb
|
||||
{"lmdb_wal_directory", PGC_POSTMASTER, FILE_LOCATIONS,
|
||||
gettext_noop("Sets the lmdb wal directory."),
|
||||
NULL,
|
||||
GUC_SUPERUSER_ONLY | GUC_DISALLOW_IN_AUTO_FILE
|
||||
},
|
||||
&lmdb_wal_directory,
|
||||
"/tmp/waldb",
|
||||
NULL, NULL, NULL
|
||||
},
|
||||
|
||||
{
|
||||
{"config_file", PGC_POSTMASTER, FILE_LOCATIONS,
|
||||
gettext_noop("Sets the server's main configuration file."),
|
||||
@ -12516,6 +12589,16 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void assign_walsender_target_lsn(const char *newval, void *extra)
|
||||
{
|
||||
if (newval && strcmp(newval, "") != 0)
|
||||
{
|
||||
walsenderLsn = *((XLogRecPtr *) extra);
|
||||
} else {
|
||||
walsenderLsn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
assign_recovery_target_lsn(const char *newval, void *extra)
|
||||
{
|
||||
|
@ -23,12 +23,12 @@ SUBDIRS = \
|
||||
pg_controldata \
|
||||
pg_ctl \
|
||||
pg_dump \
|
||||
pg_waldump \
|
||||
pg_resetwal \
|
||||
pg_test_fsync \
|
||||
pg_test_timing \
|
||||
pg_upgrade \
|
||||
pg_verifybackup \
|
||||
pg_waldump \
|
||||
pgbench \
|
||||
psql \
|
||||
scripts
|
||||
|
54
src/bin/pg_produce_wal/Makefile
Normal file
54
src/bin/pg_produce_wal/Makefile
Normal file
@ -0,0 +1,54 @@
|
||||
# src/bin/pg_waldump/Makefile
|
||||
|
||||
PGFILEDESC = "pg_produce_wal - decode and display WAL"
|
||||
PGAPPICON=win32
|
||||
|
||||
subdir = src/bin/pg_produce_wal
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
OBJS = \
|
||||
$(RMGRDESCOBJS) \
|
||||
$(WIN32RES) \
|
||||
xlogreader.o \
|
||||
pg_mirror.o
|
||||
|
||||
override CPPFLAGS := -DFRONTEND -DPG_NOREPLAY -I$(libpq_srcdir) $(CPPFLAGS)
|
||||
|
||||
librust_log = -DFRONTEND -L$(top_builddir)/src/backend/storage/file -lrust_log -lstdc++ -lm -ldl -lpthread -lfuse3 -Wl,-gc-section
|
||||
LIBS += $(librust_log)
|
||||
|
||||
all: pg_produce_wal
|
||||
|
||||
pg_produce_wal: pg_produce_wal.o $(OBJS) | submake-libpgport submake-libpq
|
||||
$(CC) $(CFLAGS) pg_produce_wal.o $(OBJS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
|
||||
|
||||
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
|
||||
rm -f $@ && $(LN_S) $< .
|
||||
|
||||
pg_mirror.c: % : $(top_srcdir)/src/backend/access/transam/%
|
||||
rm -f $@ && $(LN_S) $< .
|
||||
|
||||
#xlog.c: % : $(top_srcdir)/src/backend/access/transam/%
|
||||
# rm -f $@ && $(LN_S) $< .
|
||||
|
||||
#$(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/%
|
||||
# rm -f $@ && $(LN_S) $< .
|
||||
|
||||
install: all installdirs
|
||||
$(INSTALL_PROGRAM) pg_produce_wal$(X) '$(DESTDIR)$(bindir)/pg_produce_wal$(X)'
|
||||
installdirs:
|
||||
$(MKDIR_P) '$(DESTDIR)$(bindir)'
|
||||
|
||||
uninstall:
|
||||
rm -f '$(DESTDIR)$(bindir)/pg_produce_wal$(X)'
|
||||
|
||||
clean distclean maintainer-clean:
|
||||
rm -f pg_produce_wal$(X) $(OBJS) xlogreader.c pg_mirror.c
|
||||
rm -rf tmp_check
|
||||
|
||||
check:
|
||||
$(prove_check)
|
||||
|
||||
installcheck:
|
||||
$(prove_installcheck)
|
458
src/bin/pg_produce_wal/pg_produce_wal.c
Normal file
458
src/bin/pg_produce_wal/pg_produce_wal.c
Normal file
@ -0,0 +1,458 @@
|
||||
#define FRONTEND 1
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#include "access/transam.h"
|
||||
#include "access/xlog.h"
|
||||
#include "access/pg_mirror.h"
|
||||
#include "access/xlog_internal.h"
|
||||
#include "catalog/pg_control.h"
|
||||
#include "common/controldata_utils.h"
|
||||
#include "common/logging.h"
|
||||
#include "getopt_long.h"
|
||||
#include "pg_getopt.h"
|
||||
#include "access/heapam_xlog.h"
|
||||
#include "catalog/pg_control.h"
|
||||
#include "access/nbtxlog.h"
|
||||
#include "access/gistxlog.h"
|
||||
#include "access/spgxlog.h"
|
||||
#include "access/brin_xlog.h"
|
||||
#include "common/file_perm.h"
|
||||
|
||||
typedef struct XLogDumpPrivate
|
||||
{
|
||||
TimeLineID timeline;
|
||||
XLogRecPtr startptr;
|
||||
XLogRecPtr endptr;
|
||||
bool endptr_reached;
|
||||
} XLogDumpPrivate;
|
||||
|
||||
|
||||
static void
|
||||
usage(const char *progname)
|
||||
{
|
||||
printf(_("%s displays control information of a PostgreSQL database cluster.\n\n"), progname);
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTION] [DATADIR]\n"), progname);
|
||||
printf(_("\nOptions:\n"));
|
||||
printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
|
||||
printf(_(" -V, --version output version information, then exit\n"));
|
||||
printf(_(" -?, --help show this help, then exit\n"));
|
||||
printf(_("\nIf no data directory (DATADIR) is specified, "
|
||||
"the environment variable PGDATA\nis used.\n\n"));
|
||||
printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
|
||||
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
|
||||
}
|
||||
|
||||
|
||||
static const char *
|
||||
dbState(DBState state)
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case DB_STARTUP:
|
||||
return _("starting up");
|
||||
case DB_SHUTDOWNED:
|
||||
return _("shut down");
|
||||
case DB_SHUTDOWNED_IN_RECOVERY:
|
||||
return _("shut down in recovery");
|
||||
case DB_SHUTDOWNING:
|
||||
return _("shutting down");
|
||||
case DB_IN_CRASH_RECOVERY:
|
||||
return _("in crash recovery");
|
||||
case DB_IN_ARCHIVE_RECOVERY:
|
||||
return _("in archive recovery");
|
||||
case DB_IN_PRODUCTION:
|
||||
return _("in production");
|
||||
}
|
||||
return _("unrecognized status code");
|
||||
}
|
||||
|
||||
static const char *
|
||||
wal_level_str(WalLevel wal_level)
|
||||
{
|
||||
switch (wal_level)
|
||||
{
|
||||
case WAL_LEVEL_MINIMAL:
|
||||
return "minimal";
|
||||
case WAL_LEVEL_REPLICA:
|
||||
return "replica";
|
||||
case WAL_LEVEL_LOGICAL:
|
||||
return "logical";
|
||||
}
|
||||
return _("unrecognized wal_level");
|
||||
}
|
||||
|
||||
/* pg_waldump's XLogReaderRoutine->batch_read callback */
|
||||
static int
|
||||
WALDumpBatchRead(XLogReaderState *state, XLogRecPtr targetPtr,
|
||||
int reqLen, char *readBuff)
|
||||
{
|
||||
XLogDumpPrivate *private = state->private_data;
|
||||
int count;
|
||||
|
||||
if (private->endptr != InvalidXLogRecPtr)
|
||||
{
|
||||
if (targetPtr >= private->endptr)
|
||||
{
|
||||
private->endptr_reached = true;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
count = He3DBWALRead(state, targetPtr, SizeOfXLogRecord, readBuff);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#define UsableBytesInPage_tmp (XLOG_BLCKSZ - SizeOfXLogShortPHD)
|
||||
#define DEFAULT_XLOG_SEG_SIZE (16*1024*1024)
|
||||
|
||||
static uint64 UsableBytesInSegment_tmp =
|
||||
(DEFAULT_XLOG_SEG_SIZE / XLOG_BLCKSZ * UsableBytesInPage_tmp) -
|
||||
(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
|
||||
|
||||
static XLogRecPtr
|
||||
XLogBytePosToRecPtr_tmp(uint64 bytepos)
|
||||
{
|
||||
/*
|
||||
* original logic, we abandon it.
|
||||
*/
|
||||
if(0) {
|
||||
uint64 fullsegs;
|
||||
uint64 fullpages;
|
||||
uint64 bytesleft;
|
||||
uint32 seg_offset;
|
||||
XLogRecPtr result;
|
||||
|
||||
fullsegs = bytepos / UsableBytesInSegment_tmp;
|
||||
bytesleft = bytepos % UsableBytesInSegment_tmp;
|
||||
|
||||
if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
|
||||
{
|
||||
/* fits on first page of segment */
|
||||
seg_offset = bytesleft + SizeOfXLogLongPHD;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* account for the first page on segment with long header */
|
||||
seg_offset = XLOG_BLCKSZ;
|
||||
bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
|
||||
|
||||
fullpages = bytesleft / UsableBytesInPage_tmp;
|
||||
bytesleft = bytesleft % UsableBytesInPage_tmp;
|
||||
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
|
||||
}
|
||||
|
||||
XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, DEFAULT_XLOG_SEG_SIZE, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return bytepos;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Like XLogBytePosToRecPtr, but if the position is at a page boundary,
|
||||
* returns a pointer to the beginning of the page (ie. before page header),
|
||||
* not to where the first xlog record on that page would go to. This is used
|
||||
* when converting a pointer to the end of a record.
|
||||
*/
|
||||
static XLogRecPtr
|
||||
XLogBytePosToEndRecPtr_tmp(uint64 bytepos)
|
||||
{
|
||||
/*
|
||||
* original logic, we abandon it.
|
||||
*/
|
||||
if(0){
|
||||
uint64 fullsegs;
|
||||
uint64 fullpages;
|
||||
uint64 bytesleft;
|
||||
uint32 seg_offset;
|
||||
XLogRecPtr result;
|
||||
|
||||
fullsegs = bytepos / UsableBytesInSegment_tmp;
|
||||
bytesleft = bytepos % UsableBytesInSegment_tmp;
|
||||
|
||||
if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
|
||||
{
|
||||
/* fits on first page of segment */
|
||||
if (bytesleft == 0)
|
||||
seg_offset = 0;
|
||||
else
|
||||
seg_offset = bytesleft + SizeOfXLogLongPHD;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* account for the first page on segment with long header */
|
||||
seg_offset = XLOG_BLCKSZ;
|
||||
bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
|
||||
|
||||
fullpages = bytesleft / UsableBytesInPage_tmp;
|
||||
bytesleft = bytesleft % UsableBytesInPage_tmp;
|
||||
|
||||
if (bytesleft == 0)
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
|
||||
else
|
||||
seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
|
||||
}
|
||||
|
||||
XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return bytepos;
|
||||
}
|
||||
|
||||
static int
|
||||
BasicOpenFilePerm_tmp(const char *fileName, int fileFlags, mode_t fileMode)
|
||||
{
|
||||
int fd;
|
||||
|
||||
tryAgain:
|
||||
fd = open(fileName, fileFlags, fileMode);
|
||||
|
||||
if (fd >= 0)
|
||||
return fd; /* success! */
|
||||
|
||||
if (errno == EMFILE || errno == ENFILE)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
printf("out of file descriptors %d",errno);
|
||||
}
|
||||
|
||||
return -1; /* failure */
|
||||
}
|
||||
|
||||
static int64_t
|
||||
XLogFileInit_tmp(char* prefix,XLogSegNo logsegno, bool *use_existent, bool use_lock)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
char tmppath[MAXPGPATH];
|
||||
int64_t fd;
|
||||
int save_errno;
|
||||
char buff[XLOG_BLCKSZ]={0};
|
||||
int n = snprintf(path,sizeof(path),"%s/",prefix);
|
||||
XLogFilePath(&path[n], 1, logsegno, DEFAULT_XLOG_SEG_SIZE);
|
||||
|
||||
/*
|
||||
* Try to use existent file (checkpoint maker may have created it already)
|
||||
*/
|
||||
if (*use_existent)
|
||||
{
|
||||
fd = BasicOpenFilePerm_tmp(path, O_RDWR | PG_BINARY | SYNC_METHOD_FSYNC,PG_FILE_MODE_OWNER);
|
||||
if (fd < 0)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
printf("open file failed %s\n",path);
|
||||
}
|
||||
else
|
||||
return fd;
|
||||
} else {
|
||||
fd = BasicOpenFilePerm_tmp(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,PG_FILE_MODE_OWNER);
|
||||
off_t offset = 0;
|
||||
while(offset<DEFAULT_XLOG_SEG_SIZE) {
|
||||
pg_pwrite(fd,buff,XLOG_BLCKSZ,offset);
|
||||
offset +=XLOG_BLCKSZ;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
static struct option long_options[] = {
|
||||
{"pgdata", required_argument, NULL, 'D'},
|
||||
{"walfilename", required_argument, NULL, 'F'},
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
ControlFileData *ControlFile;
|
||||
bool crc_ok;
|
||||
char *DataDir = NULL;
|
||||
time_t time_tmp;
|
||||
char pgctime_str[128];
|
||||
char ckpttime_str[128];
|
||||
char mock_auth_nonce_str[MOCK_AUTH_NONCE_LEN * 2 + 1];
|
||||
const char *strftime_fmt = "%c";
|
||||
const char *progname;
|
||||
char xlogfilename[MAXFNAMELEN];
|
||||
int c;
|
||||
int i;
|
||||
int WalSegSz;
|
||||
XLogDumpPrivate private;
|
||||
|
||||
pg_logging_init(argv[0]);
|
||||
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_produce_wal"));
|
||||
progname = get_progname(argv[0]);
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
|
||||
{
|
||||
usage(progname);
|
||||
exit(0);
|
||||
}
|
||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
|
||||
{
|
||||
puts("pg_controldata (PostgreSQL) " PG_VERSION);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
while ((c = getopt_long(argc, argv, "D:", long_options, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'D':
|
||||
DataDir = optarg;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (DataDir == NULL)
|
||||
{
|
||||
if (optind < argc)
|
||||
DataDir = argv[optind++];
|
||||
else
|
||||
DataDir = getenv("PGDATA");
|
||||
}
|
||||
|
||||
/* Complain if any arguments remain */
|
||||
if (optind < argc)
|
||||
{
|
||||
pg_log_error("too many command-line arguments (first is \"%s\")",
|
||||
argv[optind]);
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (DataDir == NULL)
|
||||
{
|
||||
pg_log_error("no data directory specified");
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* get a copy of the control file */
|
||||
ControlFile = get_controlfile(DataDir, &crc_ok);
|
||||
if (!crc_ok)
|
||||
printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n"
|
||||
"Either the file is corrupt, or it has a different layout than this program\n"
|
||||
"is expecting. The results below are untrustworthy.\n\n"));
|
||||
setControlFile(ControlFile);
|
||||
|
||||
/* set wal segment size */
|
||||
WalSegSz = ControlFile->xlog_seg_size;
|
||||
|
||||
if (!IsValidWalSegSize(WalSegSz))
|
||||
{
|
||||
printf(_("WARNING: invalid WAL segment size\n"));
|
||||
printf(ngettext("The WAL segment size stored in the file, %d byte, is not a power of two\n"
|
||||
"between 1 MB and 1 GB. The file is corrupt and the results below are\n"
|
||||
"untrustworthy.\n\n",
|
||||
"The WAL segment size stored in the file, %d bytes, is not a power of two\n"
|
||||
"between 1 MB and 1 GB. The file is corrupt and the results below are\n"
|
||||
"untrustworthy.\n\n",
|
||||
WalSegSz),
|
||||
WalSegSz);
|
||||
}
|
||||
|
||||
/*
|
||||
* This slightly-chintzy coding will work as long as the control file
|
||||
* timestamps are within the range of time_t; that should be the case in
|
||||
* all foreseeable circumstances, so we don't bother importing the
|
||||
* backend's timezone library into pg_controldata.
|
||||
*
|
||||
* Use variable for format to suppress overly-anal-retentive gcc warning
|
||||
* about %c
|
||||
*/
|
||||
time_tmp = (time_t) ControlFile->time;
|
||||
strftime(pgctime_str, sizeof(pgctime_str), strftime_fmt,
|
||||
localtime(&time_tmp));
|
||||
time_tmp = (time_t) ControlFile->checkPointCopy.time;
|
||||
strftime(ckpttime_str, sizeof(ckpttime_str), strftime_fmt,
|
||||
localtime(&time_tmp));
|
||||
|
||||
memset(&private, 0, sizeof(XLogDumpPrivate));
|
||||
private.timeline = 1;
|
||||
private.startptr = ControlFile->checkPoint;
|
||||
private.endptr = InvalidXLogRecPtr;
|
||||
private.endptr_reached = false;
|
||||
/* we have everything we need, start reading */
|
||||
XLogReaderState *xlogreader_state;
|
||||
xlogreader_state =
|
||||
XLogReaderAllocate(WalSegSz, NULL,
|
||||
XL_ROUTINE(.batch_read = WALDumpBatchRead),
|
||||
&private);
|
||||
if (!xlogreader_state)
|
||||
printf("out of memory");
|
||||
|
||||
xlogreader_state->currTLI = ControlFile->checkPointCopy.ThisTimeLineID;
|
||||
/* first find a valid recptr to start from */
|
||||
XLogRecPtr first_record;
|
||||
int ret = -1;
|
||||
ret = He3DBWALRead(xlogreader_state,
|
||||
private.startptr,
|
||||
SizeOfXLogRecord,
|
||||
xlogreader_state->readBuf);
|
||||
if (ret < SizeOfXLogRecord) {
|
||||
printf("He3DBReadWalInternal Failed\n");
|
||||
return -1;
|
||||
}
|
||||
XLogRecord* record = (XLogRecord*)xlogreader_state->readBuf;
|
||||
char DStr[1024]={0};
|
||||
int dLen = 0;
|
||||
uint64 startLsn = 0,endLsn = 0;
|
||||
int mtrLen = ArrayXlogHe3ToPg(record,record->xl_tot_len,DStr,&dLen,&startLsn,&endLsn);
|
||||
ControlFile->checkPoint = startLsn;
|
||||
ControlFile->checkPointCopy.redo = startLsn;
|
||||
update_controlfile(DataDir,ControlFile,true);
|
||||
XLogSegNo segno;
|
||||
XLByteToSeg(ControlFile->checkPointCopy.redo, segno, WalSegSz);
|
||||
int64_t recvFile = -1;
|
||||
XLogSegNo recvSegNo = 0;
|
||||
TimeLineID recvFileTLI = 1;
|
||||
//ThisTimeLineID = 1;
|
||||
/* Close the current segment if it's completed */
|
||||
if (recvFile < 0)
|
||||
{
|
||||
bool use_existent = false;
|
||||
|
||||
/* Create/use new log file */
|
||||
XLByteToSeg(ControlFile->checkPoint, recvSegNo, DEFAULT_XLOG_SEG_SIZE);
|
||||
recvFile = XLogFileInit_tmp(DataDir,recvSegNo, &use_existent, true);
|
||||
recvFileTLI = 1;
|
||||
}
|
||||
int startoff = 0;
|
||||
int byteswritten;
|
||||
/* Calculate the start offset of the received logs */
|
||||
//startoff = XLogSegmentOffset(ControlFile->checkPoint, DEFAULT_XLOG_SEG_SIZE);
|
||||
//int segbytes;
|
||||
//if (startoff + endLsn - ControlFile->checkPoint > DEFAULT_XLOG_SEG_SIZE)
|
||||
// segbytes = DEFAULT_XLOG_SEG_SIZE - startoff;
|
||||
//else
|
||||
// segbytes = endLsn - ControlFile->checkPoint;
|
||||
|
||||
/* OK to write the logs */
|
||||
//errno = 0;
|
||||
|
||||
byteswritten = pg_pwrite(recvFile, DStr, dLen, (off_t) startoff);
|
||||
fsync(recvFile);
|
||||
close(recvFile);
|
||||
return 0;
|
||||
}
|
||||
|
@ -54,6 +54,18 @@ typedef struct xl_brin_createidx
|
||||
} xl_brin_createidx;
|
||||
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
|
||||
|
||||
typedef struct xl_old_brin_insert
|
||||
{
|
||||
BlockNumber heapBlk;
|
||||
|
||||
/* extra information needed to update the revmap */
|
||||
BlockNumber pagesPerRange;
|
||||
|
||||
OffsetNumber offnum;
|
||||
} xl_old_brin_insert;
|
||||
|
||||
#define SizeOfOldBrinInsert (offsetof(xl_old_brin_insert, offnum) + sizeof(OffsetNumber))
|
||||
|
||||
/*
|
||||
* This is what we need to know about a BRIN tuple insert
|
||||
*
|
||||
@ -95,6 +107,16 @@ typedef struct xl_brin_update
|
||||
|
||||
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
|
||||
|
||||
typedef struct xl_old_brin_update
|
||||
{
|
||||
/* offset number of old tuple on old page */
|
||||
OffsetNumber oldOffnum;
|
||||
|
||||
xl_old_brin_insert insert;
|
||||
} xl_old_brin_update;
|
||||
|
||||
#define SizeOfOldBrinUpdate (offsetof(xl_old_brin_update, insert) + SizeOfOldBrinInsert)
|
||||
|
||||
/*
|
||||
* This is what we need to know about a BRIN tuple samepage update
|
||||
*
|
||||
|
@ -59,6 +59,20 @@ typedef struct gistxlogDelete
|
||||
|
||||
#define SizeOfGistxlogDelete (offsetof(gistxlogDelete, ntodelete) + sizeof(uint16))
|
||||
|
||||
typedef struct gistoldxlogPageSplit
|
||||
{
|
||||
BlockNumber origrlink; /* rightlink of the page before split */
|
||||
GistNSN orignsn; /* NSN of the page before split */
|
||||
bool origleaf; /* was splitted page a leaf page? */
|
||||
|
||||
uint16 npage; /* # of pages in the split */
|
||||
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
|
||||
|
||||
/*
|
||||
* follow: 1. gistxlogPage and array of IndexTupleData per page
|
||||
*/
|
||||
} gistoldxlogPageSplit;
|
||||
|
||||
/*
|
||||
* Backup Blk 0: If this operation completes a page split, by inserting a
|
||||
* downlink for the split page, the left half of the split
|
||||
|
@ -192,6 +192,22 @@ typedef struct xl_multi_insert_tuple
|
||||
|
||||
#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
|
||||
|
||||
typedef struct xl_old_heap_update
|
||||
{
|
||||
TransactionId old_xmax; /* xmax of the old tuple */
|
||||
OffsetNumber old_offnum; /* old tuple's offset */
|
||||
uint8 old_infobits_set; /* infomask bits to set on old tuple */
|
||||
uint8 flags;
|
||||
TransactionId new_xmax; /* xmax of the new tuple */
|
||||
OffsetNumber new_offnum; /* new tuple's offset */
|
||||
/*
|
||||
* If XLH_UPDATE_CONTAINS_OLD_TUPLE or XLH_UPDATE_CONTAINS_OLD_KEY flags
|
||||
* are set, xl_heap_header and tuple data for the old tuple follow.
|
||||
*/
|
||||
} xl_old_heap_update;
|
||||
|
||||
#define SizeOfOldHeapUpdate (offsetof(xl_old_heap_update, new_offnum) + sizeof(OffsetNumber))
|
||||
|
||||
/*
|
||||
* This is what we need to know about update|hot_update
|
||||
*
|
||||
@ -346,6 +362,14 @@ typedef struct xl_heap_freeze_page
|
||||
|
||||
#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
|
||||
|
||||
typedef struct xl_old_heap_visible
|
||||
{
|
||||
TransactionId cutoff_xid;
|
||||
uint8 flags;
|
||||
} xl_old_heap_visible;
|
||||
|
||||
#define SizeOfOldHeapVisible (offsetof(xl_old_heap_visible, flags) + sizeof(uint8))
|
||||
|
||||
/*
|
||||
* This is what we need to know about setting a visibility map bit
|
||||
*
|
||||
|
@ -86,6 +86,16 @@ typedef struct xl_btree_insert
|
||||
|
||||
#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
|
||||
|
||||
typedef struct xl_old_btree_split
|
||||
{
|
||||
uint32 level; /* tree level of page being split */
|
||||
OffsetNumber firstrightoff; /* first origpage item on rightpage */
|
||||
OffsetNumber newitemoff; /* new item's offset */
|
||||
uint16 postingoff; /* offset inside orig posting tuple */
|
||||
} xl_old_btree_split;
|
||||
|
||||
#define SizeOfOldBtreeSplit (offsetof(xl_old_btree_split, postingoff) + sizeof(uint16))
|
||||
|
||||
/*
|
||||
* On insert with split, we save all the items going into the right sibling
|
||||
* so that we can restore it completely from the log record. This way takes
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "storage/buf_internals.h"
|
||||
//max Page Num
|
||||
#define G_QUEUE_LEN 2048
|
||||
#define PARALLEL_NUM 1
|
||||
#define PARALLEL_NUM 8
|
||||
typedef struct lsn_list_t {
|
||||
XLogRecPtr lsn;
|
||||
XLogRecPtr endlsn;
|
||||
|
8
src/include/access/pg_mirror.h
Normal file
8
src/include/access/pg_mirror.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef PG_MIRROR_H
|
||||
#define PG_MIRROR_H
|
||||
#include "c.h"
|
||||
#include "catalog/pg_control.h"
|
||||
extern int ArrayXlogHe3ToPg(char*sBuf,int sLen, char*dBuf,int* dLen,uint64 *startLsn,uint64 *endLsn);
|
||||
extern void readControlFile(char*pathstr);
|
||||
extern void setControlFile(ControlFileData *cfile);
|
||||
#endif
|
@ -25,10 +25,10 @@ extern XLogRecPtr LastPushPoint;
|
||||
|
||||
|
||||
extern XLogRecPtr QueryMinLsn(XLogRecPtr lsn);
|
||||
extern XLogRecPtr QueryPushLsn();
|
||||
// extern XLogRecPtr QueryPushLsn();
|
||||
extern XLogRecPtr QueryPushChkpointLsn();
|
||||
|
||||
extern XLogRecPtr QueryReplyLsn(XLogRecPtr lsn);
|
||||
// extern XLogRecPtr QueryReplyLsn(XLogRecPtr lsn);
|
||||
|
||||
typedef struct DirtyPage {
|
||||
XLogRecPtr startlsn;
|
||||
|
@ -58,6 +58,7 @@ typedef enum BufferStatus{
|
||||
|
||||
typedef struct wal_batch_t {
|
||||
XLogRecPtr startLsn;
|
||||
XLogRecPtr endLsn;
|
||||
int dataLen;
|
||||
pg_atomic_uint32 status;
|
||||
char* data;
|
||||
|
@ -39,6 +39,19 @@ typedef struct spgxlogState
|
||||
bool isBuild;
|
||||
} spgxlogState;
|
||||
|
||||
typedef struct spgoldxlogAddLeaf
|
||||
{
|
||||
bool newPage; /* init dest page? */
|
||||
bool storesNulls; /* page is in the nulls tree? */
|
||||
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
|
||||
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
|
||||
|
||||
OffsetNumber offnumParent; /* where the parent downlink is, if any */
|
||||
uint16 nodeI;
|
||||
|
||||
/* new leaf tuple follows (unaligned!) */
|
||||
} spgoldxlogAddLeaf;
|
||||
|
||||
/*
|
||||
* Backup Blk 0: destination page for leaf tuple
|
||||
* Backup Blk 1: parent page (if any)
|
||||
@ -59,6 +72,35 @@ typedef struct spgxlogAddLeaf
|
||||
/* new leaf tuple follows (unaligned!) */
|
||||
} spgxlogAddLeaf;
|
||||
|
||||
typedef struct spgoldxlogMoveLeafs
|
||||
{
|
||||
uint16 nMoves; /* number of tuples moved from source page */
|
||||
bool newPage; /* init dest page? */
|
||||
bool replaceDead; /* are we replacing a DEAD source tuple? */
|
||||
bool storesNulls; /* pages are in the nulls tree? */
|
||||
|
||||
/* where the parent downlink is */
|
||||
OffsetNumber offnumParent;
|
||||
uint16 nodeI;
|
||||
|
||||
spgxlogState stateSrc;
|
||||
|
||||
/*----------
|
||||
* data follows:
|
||||
* array of deleted tuple numbers, length nMoves
|
||||
* array of inserted tuple numbers, length nMoves + 1 or 1
|
||||
* list of leaf tuples, length nMoves + 1 or 1 (unaligned!)
|
||||
*
|
||||
* Note: if replaceDead is true then there is only one inserted tuple
|
||||
* number and only one leaf tuple in the data, because we are not copying
|
||||
* the dead tuple from the source
|
||||
*----------
|
||||
*/
|
||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||
} spgoldxlogMoveLeafs;
|
||||
|
||||
#define SizeOfOldSpgxlogMoveLeafs offsetof(spgoldxlogMoveLeafs, offsets)
|
||||
|
||||
/*
|
||||
* Backup Blk 0: source leaf page
|
||||
* Backup Blk 1: destination leaf page
|
||||
@ -96,6 +138,44 @@ typedef struct spgxlogMoveLeafs
|
||||
|
||||
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
|
||||
|
||||
typedef struct spgoldxlogAddNode
|
||||
{
|
||||
/*
|
||||
* Offset of the original inner tuple, in the original page (on backup
|
||||
* block 0).
|
||||
*/
|
||||
OffsetNumber offnum;
|
||||
|
||||
/*
|
||||
* Offset of the new tuple, on the new page (on backup block 1). Invalid,
|
||||
* if we overwrote the old tuple in the original page).
|
||||
*/
|
||||
OffsetNumber offnumNew;
|
||||
bool newPage; /* init new page? */
|
||||
|
||||
/*----
|
||||
* Where is the parent downlink? parentBlk indicates which page it's on,
|
||||
* and offnumParent is the offset within the page. The possible values for
|
||||
* parentBlk are:
|
||||
*
|
||||
* 0: parent == original page
|
||||
* 1: parent == new page
|
||||
* 2: parent == different page (blk ref 2)
|
||||
* -1: parent not updated
|
||||
*----
|
||||
*/
|
||||
int8 parentBlk;
|
||||
OffsetNumber offnumParent; /* offset within the parent page */
|
||||
|
||||
uint16 nodeI;
|
||||
|
||||
spgxlogState stateSrc;
|
||||
|
||||
/*
|
||||
* updated inner tuple follows (unaligned!)
|
||||
*/
|
||||
} spgoldxlogAddNode;
|
||||
|
||||
/*
|
||||
* Backup Blk 0: original page
|
||||
* Backup Blk 1: where new tuple goes, if not same place
|
||||
@ -162,6 +242,42 @@ typedef struct spgxlogSplitTuple
|
||||
*/
|
||||
} spgxlogSplitTuple;
|
||||
|
||||
typedef struct spgoldxlogPickSplit
|
||||
{
|
||||
bool isRootSplit;
|
||||
|
||||
uint16 nDelete; /* n to delete from Src */
|
||||
uint16 nInsert; /* n to insert on Src and/or Dest */
|
||||
bool initSrc; /* re-init the Src page? */
|
||||
bool initDest; /* re-init the Dest page? */
|
||||
|
||||
/* where to put new inner tuple */
|
||||
OffsetNumber offnumInner;
|
||||
bool initInner; /* re-init the Inner page? */
|
||||
|
||||
bool storesNulls; /* pages are in the nulls tree? */
|
||||
|
||||
/* where the parent downlink is, if any */
|
||||
bool innerIsParent; /* is parent the same as inner page? */
|
||||
OffsetNumber offnumParent;
|
||||
uint16 nodeI;
|
||||
|
||||
spgxlogState stateSrc;
|
||||
|
||||
/*----------
|
||||
* data follows:
|
||||
* array of deleted tuple numbers, length nDelete
|
||||
* array of inserted tuple numbers, length nInsert
|
||||
* array of page selector bytes for inserted tuples, length nInsert
|
||||
* new inner tuple (unaligned!)
|
||||
* list of leaf tuples, length nInsert (unaligned!)
|
||||
*----------
|
||||
*/
|
||||
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
|
||||
} spgoldxlogPickSplit;
|
||||
|
||||
#define SizeOfOldSpgxlogPickSplit offsetof(spgoldxlogPickSplit, offsets)
|
||||
|
||||
/*
|
||||
* Buffer references in the rdata array are:
|
||||
* Backup Blk 0: Src page (only if not root)
|
||||
|
@ -108,6 +108,7 @@ extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
|
||||
extern bool reachedConsistency;
|
||||
extern int group_total_len;
|
||||
extern int grouo_rec_count;
|
||||
extern int grouo_rec_cur_count;
|
||||
extern XLogRecord *grouphead[XLR_MAX_BLOCK_ID + 1];
|
||||
extern int grouplens[XLR_MAX_BLOCK_ID + 1];
|
||||
extern XLogRecData groupRecData[XLR_MAX_BLOCK_ID + 1];
|
||||
@ -142,11 +143,14 @@ extern char *PrimarySlotName;
|
||||
extern bool wal_receiver_create_temp_slot;
|
||||
extern bool track_wal_io_timing;
|
||||
|
||||
extern char *he3_meta_conninfo;
|
||||
|
||||
/* indirectly set via GUC system */
|
||||
extern TransactionId recoveryTargetXid;
|
||||
extern char *recovery_target_time_string;
|
||||
extern const char *recoveryTargetName;
|
||||
extern XLogRecPtr recoveryTargetLSN;
|
||||
extern XLogRecPtr walsenderLsn;
|
||||
extern RecoveryTargetType recoveryTarget;
|
||||
extern char *PromoteTriggerFile;
|
||||
extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
|
||||
@ -386,6 +390,9 @@ extern void XLogRequestWalReceiverReply(void);
|
||||
extern void assign_max_wal_size(int newval, void *extra);
|
||||
extern void assign_checkpoint_completion_target(double newval, void *extra);
|
||||
extern void pushTikv(int onePageListLen,int pageNum,bool flag);
|
||||
extern XLogRecData *DecodeXLogRecordAssemble(XLogReaderState *state, OldXLogRecord *record,
|
||||
XLogRecPtr RedoRecPtr, bool doPageWrites,
|
||||
XLogRecPtr *fpw_lsn, int *num_fpi);
|
||||
|
||||
/*
|
||||
* Routines to start, stop, and get status of a base backup.
|
||||
|
@ -219,16 +219,16 @@ struct XLogReaderState
|
||||
/* last read XLOG position for data currently in readBuf */
|
||||
uint32 bufoff;
|
||||
/* last read XLOG position for data currently in readBuf */
|
||||
// WALSegmentContext segcxt;
|
||||
// WALOpenSegment seg;
|
||||
// uint32 segoff;
|
||||
WALSegmentContext segcxt;
|
||||
WALOpenSegment seg;
|
||||
uint32 segoff;
|
||||
|
||||
/*
|
||||
* beginning of prior page read, and its TLI. Doesn't necessarily
|
||||
* correspond to what's in readBuf; used for timeline sanity checks.
|
||||
*/
|
||||
// XLogRecPtr latestPagePtr;
|
||||
// TimeLineID latestPageTLI;
|
||||
XLogRecPtr latestPagePtr;
|
||||
TimeLineID latestPageTLI;
|
||||
|
||||
/* beginning of the WAL record being read. */
|
||||
XLogRecPtr currRecPtr;
|
||||
@ -275,6 +275,7 @@ struct XLogReaderState
|
||||
Buffer buffer;
|
||||
bool isreplay;
|
||||
bool streamStart;
|
||||
bool insertTikv;
|
||||
};
|
||||
|
||||
/* Get a new XLogReader */
|
||||
@ -303,6 +304,8 @@ extern struct XLogRecord *He3DBXLogReadRecord(XLogReaderState *state,
|
||||
extern struct XLogRecord *He3DBXLogListReadRecord(XLogReaderState *state,
|
||||
char **errormsg, char *pageXlogBuf);
|
||||
|
||||
extern struct XLogRecord *StartupXLogReadRecord(XLogReaderState *state, char **errormsg);
|
||||
|
||||
/* Validate a page */
|
||||
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
|
||||
XLogRecPtr recptr, char *phdr);
|
||||
|
@ -17,6 +17,21 @@
|
||||
#include "storage/block.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
typedef struct OldXLogRecord
|
||||
{
|
||||
uint32 xl_tot_len; /* total len of entire record */
|
||||
TransactionId xl_xid; /* xact id */
|
||||
XLogRecPtr xl_prev; /* ptr to previous record in log */
|
||||
uint8 xl_info; /* flag bits, see below */
|
||||
RmgrId xl_rmid; /* resource manager for this record */
|
||||
/* 2 bytes of padding here, initialize to zero */
|
||||
pg_crc32c xl_crc; /* CRC for this record */
|
||||
|
||||
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
|
||||
|
||||
} OldXLogRecord;
|
||||
#define SizeOfOldXLogRecord (offsetof(OldXLogRecord, xl_crc) + sizeof(pg_crc32c))
|
||||
|
||||
/*
|
||||
* The overall layout of an XLOG record is:
|
||||
* Fixed-size header (XLogRecord struct)
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "access/xlogreader.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "access/xlogutils.h"
|
||||
|
||||
|
||||
extern bool XLogHaveInvalidPages(void);
|
||||
@ -60,8 +61,9 @@ extern int read_local_xlog_page(XLogReaderState *state,
|
||||
XLogRecPtr targetPagePtr, int reqLen,
|
||||
XLogRecPtr targetRecPtr, char *cur_page);
|
||||
extern int read_local_xlog_batch(XLogReaderState *state,
|
||||
XLogRecPtr startRecPtr,
|
||||
int reqLen,
|
||||
XLogRecPtr startRecPtr, char *cur_page);
|
||||
char *cur_page);
|
||||
extern void wal_segment_open(XLogReaderState *state,
|
||||
XLogSegNo nextSegNo,
|
||||
TimeLineID *tli_p);
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_hot_data.h
|
||||
* definition of the "hot_data" system catalog (pg_hot_data)
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 2022, He3DB Global Development Group
|
||||
*
|
||||
* src/include/catalog/pg_hot_data.h
|
||||
*
|
||||
* NOTES
|
||||
* The Catalog.pm module reads this file and derives schema
|
||||
* information.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_HOT_DATA_H
|
||||
#define PG_HOT_DATA_H
|
||||
|
||||
#include "catalog/genbki.h"
|
||||
#include "catalog/pg_hot_data_d.h"
|
||||
|
||||
/* ----------------
|
||||
* pg_hot_data definition. cpp turns this into
|
||||
* typedef struct FormData_pg_hot_data
|
||||
* ----------------
|
||||
*/
|
||||
CATALOG(pg_hot_data,4790,HotDataRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID(4793,HotDataRelation_Rowtype_Id) BKI_SCHEMA_MACRO
|
||||
{
|
||||
/* database name */
|
||||
NameData datname;
|
||||
|
||||
/* relation name */
|
||||
NameData relname;
|
||||
|
||||
/* caching rules */
|
||||
char crules;
|
||||
|
||||
/* client name */
|
||||
NameData clientname;
|
||||
|
||||
/* client addr */
|
||||
NameData clientaddr;
|
||||
|
||||
#ifdef CATALOG_VARLEN /* variable-length fields start here */
|
||||
/* cache rules schedule time */
|
||||
timestamptz crulessettime;
|
||||
|
||||
/* hot data cache time */
|
||||
timestamptz cachetime;
|
||||
#endif
|
||||
}FormData_pg_hot_data;
|
||||
|
||||
/* ----------------
|
||||
* Form_pg_hot_data corresponds to a pointer to a tuple with
|
||||
* the format of pg_hot_data relation.
|
||||
* ----------------
|
||||
*/
|
||||
typedef FormData_pg_hot_data *Form_pg_hot_data;
|
||||
|
||||
DECLARE_UNIQUE_INDEX(pg_hot_data_datname_relname_index, 4791, on pg_hot_data using btree(datname name_ops, relname name_ops));
|
||||
#define HotDataDatnameRelnameIndexId 4791
|
||||
|
||||
extern void PrecacheHotData();
|
||||
|
||||
#endif
|
130
src/include/postmaster/secondbuffer.h
Normal file
130
src/include/postmaster/secondbuffer.h
Normal file
@ -0,0 +1,130 @@
|
||||
#include "postgres.h"
|
||||
#include "utils/hfs.h"
|
||||
|
||||
#include <lmdb.h>
|
||||
|
||||
|
||||
#define MAXREADERS 512
|
||||
#define MAPSIE (uint64)1<<40
|
||||
#define DEFAULTPAGEPATH "/tmp/pagedb"
|
||||
#define DEFAULTWALPATH "/tmp/waldb"
|
||||
#define PAGE 1
|
||||
#define WAL 2
|
||||
#define BLKSZ 8192
|
||||
|
||||
#define DROP 1
|
||||
#define TRUNCATE 2
|
||||
#define EVICT 3
|
||||
#define SDLEN 1024
|
||||
#define SDNUM 128
|
||||
|
||||
#define SecondBufferTableHashPartition(hashcode) \
|
||||
((hashcode) % NUM_LOCK_PARTITIONS)
|
||||
#define SecondBufferMappingPartitionLock(hashcode) \
|
||||
(&SecondBufferMainLWLockArray[SecondBufferTableHashPartition(hashcode)].lock)
|
||||
|
||||
extern char *lmdb_page_directory;
|
||||
extern char *lmdb_wal_directory;
|
||||
extern Size SNBuffers;
|
||||
|
||||
/*
|
||||
for secondbufferhash code
|
||||
*/
|
||||
typedef struct SdPageKey
|
||||
{
|
||||
uint32 dbid;
|
||||
uint32 relid;
|
||||
uint32 forkno;
|
||||
uint32 blkno;
|
||||
} SdPageKey;
|
||||
|
||||
typedef struct SdPageKeyEntity
|
||||
{
|
||||
SdPageKey spk;
|
||||
struct SdPageKeyEntity *next;
|
||||
} SdPageKeyEntity;
|
||||
|
||||
typedef struct SdPageKeyList
|
||||
{
|
||||
SdPageKeyEntity *head;
|
||||
SdPageKeyEntity *tail;
|
||||
} SdPageKeyList;
|
||||
|
||||
typedef struct LdPageKey
|
||||
{
|
||||
SdPageKey sk;
|
||||
} LdPageKey;
|
||||
|
||||
typedef struct WalLdPageKey
|
||||
{
|
||||
SdPageKey sk;
|
||||
uint64 pageLsn;
|
||||
uint8 partition;
|
||||
} WalLdPageKey;
|
||||
|
||||
typedef struct OriginDPageKey
|
||||
{
|
||||
PageKey pk;
|
||||
int opration;
|
||||
} OriginDPageKey;
|
||||
|
||||
typedef struct SdPageValue
|
||||
{
|
||||
SdPageKey pk;
|
||||
uint8 pagecontent[BLKSZ];
|
||||
} SdPageValue;
|
||||
|
||||
|
||||
|
||||
typedef struct DPageKey
|
||||
{
|
||||
PageKey pk;
|
||||
bool pagedeleted;
|
||||
uint8_t operation;
|
||||
} DPageKey;
|
||||
|
||||
|
||||
|
||||
typedef struct kvStruct {
|
||||
LdPageKey lpk;
|
||||
uint8_t *buf;
|
||||
int32 length;
|
||||
uint64_t lsn;
|
||||
} kvStruct;
|
||||
|
||||
//extern SingleKeyArray *MultiKeyArrays;
|
||||
|
||||
extern MDB_env *pageEnv;
|
||||
extern MDB_env *walEnv;
|
||||
|
||||
extern MDB_dbi pageDbi;
|
||||
extern MDB_dbi walDbi;
|
||||
|
||||
extern MDB_txn *pageTxn;
|
||||
extern MDB_txn *walTxn;
|
||||
extern MDB_cursor *cursor;
|
||||
|
||||
// MDB_stat mst;
|
||||
// MDB_cursor_op op;
|
||||
|
||||
extern void InitSecondBufferMeta(void);
|
||||
extern void InitSecondBufferHash(void);
|
||||
|
||||
extern void InitDPageKeyArray(void);
|
||||
|
||||
extern void InitPageDBEnv(void);
|
||||
extern void InitWalDBEnv(void);
|
||||
|
||||
extern void storeWalInLocalBuffer(kvStruct *ks,int32 length);
|
||||
extern void ReceivePageFromDataBuffer(PageKey *pk, uint8_t *buffer); // when evict one page out databuffer, we should call this to store the page.
|
||||
extern void GetPageFromCurrentNode(PageKey pk,Bufrd *bufrd); // async delete old version page and wal. we should call this when move page from ld/sdb to db.
|
||||
extern Bufrd GetWalFromLd(PageKey *pk);
|
||||
extern Bufrd GetWalFromLocalBuffer(WalLdPageKey *pk);
|
||||
extern void AddOneItemToDPArray(OriginDPageKey pk);
|
||||
extern void SecondBufferMain(void);
|
||||
extern void ClosePageDBEnv(void);
|
||||
extern void CloseWalEnv(void);
|
||||
|
||||
extern void CreateSecondBufferLWLocks(void);
|
||||
extern Size SecondBufferLWLockShmemSize(void);
|
||||
extern Size SecondBufferShmemSize(void);
|
@ -189,6 +189,8 @@ typedef struct BufferDesc
|
||||
BufferTag tag; /* ID of page contained in buffer */
|
||||
int buf_id; /* buffer's index number (from 0) */
|
||||
|
||||
bool isPreCacheEscape; /* escape from clock algorithm */
|
||||
|
||||
/* state of the tag, containing flags, refcount and usagecount */
|
||||
pg_atomic_uint32 state;
|
||||
|
||||
|
@ -79,8 +79,15 @@ extern int bgwriter_flush_after;
|
||||
extern bool bulk_io_is_in_progress;
|
||||
extern int bulk_io_in_progress_count;
|
||||
|
||||
extern bool isPreCache;
|
||||
extern bool isPreCacheTable;
|
||||
extern bool isPreCacheIndex;
|
||||
extern bool isPreCacheIndexDone;
|
||||
extern bool needPreCacheEscape;
|
||||
extern bool needUnpreCacheEscape;
|
||||
extern bool isPreCacheAction;
|
||||
extern Oid preCacheNodeOid;
|
||||
extern uint16 *preCacheNodesCountPtr;
|
||||
extern Oid *preCacheNodesPtr;
|
||||
|
||||
/* in buf_init.c */
|
||||
extern PGDLLIMPORT char *BufferBlocks;
|
||||
@ -305,4 +312,7 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
|
||||
*/
|
||||
#define PAGEXLOG_BLCKSZ 49152
|
||||
|
||||
/* Max preCacheNodes */
|
||||
#define NPreCacheNodes 128
|
||||
|
||||
#endif /* BUFMGR_H */
|
||||
|
@ -214,23 +214,23 @@ typedef struct WalList
|
||||
slock_t append_lck;
|
||||
} WalList;
|
||||
|
||||
/*
|
||||
for secondbufferhash code
|
||||
*/
|
||||
typedef struct PageKey
|
||||
{
|
||||
uint32 dbid;
|
||||
uint32 relid;
|
||||
uint32 forkno;
|
||||
uint32 blkno;
|
||||
} PageKey;
|
||||
// /*
|
||||
// for secondbufferhash code
|
||||
// */
|
||||
// typedef struct SdPageKey
|
||||
// {
|
||||
// uint32 dbid;
|
||||
// uint32 relid;
|
||||
// uint32 forkno;
|
||||
// uint32 blkno;
|
||||
// } SdPageKey;
|
||||
|
||||
typedef struct PageValue
|
||||
{
|
||||
PageKey pk;
|
||||
uint8_t page[BLKSZ];
|
||||
uint8_t pageLsn[LSNSZ];
|
||||
} PageVlue;
|
||||
// typedef struct SdPageValue
|
||||
// {
|
||||
// SdPageKey pk;
|
||||
// uint8_t page[BLKSZ];
|
||||
// uint8_t pageLsn[LSNSZ];
|
||||
// } SdPageValue;
|
||||
|
||||
|
||||
//**************for fs meta************
|
||||
@ -678,9 +678,6 @@ extern void RememberSimpleDeadLock(PGPROC *proc1,
|
||||
extern void InitDeadLockChecking(void);
|
||||
|
||||
extern int LockWaiterCount(const LOCKTAG *locktag);
|
||||
extern void InitSecondBufferHash(void);
|
||||
extern PageValue *SetupSecondBufferInTable(const PageKey *pageKey);
|
||||
extern PageValue *FindSecondBufferInTable(const PageKey *pageKey);
|
||||
|
||||
#ifdef LOCK_DEBUG
|
||||
extern void DumpLocks(PGPROC *proc);
|
||||
|
@ -42,6 +42,7 @@ typedef enum
|
||||
PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
|
||||
PMSIGNAL_PARALLEL_FLUSH_WORKER,
|
||||
PMSIGNAL_CLEAN_LOGINDEX_WORKER,
|
||||
PMSIGNAL_SECONDBUFFER_WORKER,
|
||||
NUM_PMSIGNALS /* Must be last value of enum! */
|
||||
} PMSignalReason;
|
||||
|
||||
|
@ -383,7 +383,7 @@ extern PGPROC *PreparedXactProcs;
|
||||
* operation. Startup process and WAL receiver also consume 2 slots, but WAL
|
||||
* writer is launched only after startup has exited, so we only need 5 slots.
|
||||
*/
|
||||
#define NUM_AUXILIARY_PROCS 5
|
||||
#define NUM_AUXILIARY_PROCS 10
|
||||
|
||||
/* configurable options */
|
||||
extern PGDLLIMPORT int DeadlockTimeout;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#ifndef BACKEND_STATUS_H
|
||||
#define BACKEND_STATUS_H
|
||||
|
||||
#include "access/xlogdefs.h"
|
||||
#include "datatype/timestamp.h"
|
||||
#include "libpq/pqcomm.h"
|
||||
#include "miscadmin.h" /* for BackendType */
|
||||
@ -317,5 +318,7 @@ extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
|
||||
extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
|
||||
extern char *pgstat_clip_activity(const char *raw_activity);
|
||||
|
||||
extern XLogRecPtr He3DBQueryMinLsnFromAllStanby();
|
||||
|
||||
|
||||
#endif /* BACKEND_STATUS_H */
|
||||
|
@ -270,10 +270,14 @@ extern PGDLLIMPORT char *ConfigFileName;
|
||||
extern char *HbaFileName;
|
||||
extern char *IdentFileName;
|
||||
extern char *external_pid_file;
|
||||
extern char *client_application_name;
|
||||
|
||||
extern PGDLLIMPORT char *application_name;
|
||||
extern PGDLLIMPORT bool push_standby;
|
||||
extern PGDLLIMPORT bool he3_point_in_time_recovery;
|
||||
extern PGDLLIMPORT bool he3mirror;
|
||||
extern PGDLLIMPORT bool pgmirror;
|
||||
|
||||
|
||||
extern int tcp_keepalives_idle;
|
||||
extern int tcp_keepalives_interval;
|
||||
|
@ -69,6 +69,7 @@ enum config_group
|
||||
WAL_ARCHIVING,
|
||||
WAL_ARCHIVE_RECOVERY,
|
||||
WAL_RECOVERY_TARGET,
|
||||
WAL_SEND_LSN,
|
||||
REPLICATION_SENDING,
|
||||
REPLICATION_PRIMARY,
|
||||
REPLICATION_STANDBY,
|
||||
|
@ -3,14 +3,17 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "utils/pg_lsn.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
typedef struct{
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *buf;
|
||||
size_t count;
|
||||
size_t cap;
|
||||
} Bufrd;
|
||||
|
||||
typedef struct{
|
||||
typedef struct
|
||||
{
|
||||
int64_t fd;
|
||||
int32_t error;
|
||||
|
||||
@ -63,7 +66,8 @@ extern Bufrd dataRead(int64_t fd,
|
||||
extern void free_dataRead(uint8_t *buf, size_t count, size_t cap);
|
||||
|
||||
extern Bufrd readfs(int64_t fd, int64_t offset, uint32_t size);
|
||||
extern int batchRead(uint8_t *buf, uint32_t timeline, uint64_t startPtr, bool needStore);
|
||||
extern int batchRead(uint8_t *buf, uint32_t timeline, uint64_t startPtr,uint64_t endPtr, bool needStore);
|
||||
extern int batchReadForTools(uint8_t *buf, uint32_t timeline, uint64_t startPtr,uint64_t endPtr, bool needStore);
|
||||
extern uint8_t kvwrite(XLogItem *xlogItem);
|
||||
extern uint8_t flushwals(XLogItem *xlogItem, uint32_t timeline);
|
||||
extern uint8_t kvflush(XLogRecPtr lsn);
|
||||
@ -76,7 +80,14 @@ extern Bufrd ReadWalsByPage(uint32_t dbid,
|
||||
extern void InsertConsistToKV(uint64_t lsn);
|
||||
extern uint64_t GetConsistLsn(uint64_t lsn);
|
||||
extern void DelConsistLsns(uint64_t lsn);
|
||||
extern void DelRangeWals(uint32_t timeline, uint64_t startPtr,uint64_t endPtr);
|
||||
//extern void ReceivePageFromDataBuffer(PageKey *pk, uint8_t *buffer); //when evict one page out databuffer, we should call this to store the page.
|
||||
extern uint8_t EvictOnePageOutOfMemory(PageKey pageKey, char *value);
|
||||
|
||||
//GetPageFromCurrentNode(PageKey *pk);
|
||||
extern Bufrd MoveOnePageToMemory(PageKey pageKey);
|
||||
extern Bufrd GetWalsFromDisk(PageKey pageKey);
|
||||
|
||||
//extern Bufrd GetWalsFromDisk(PageKey pageKey);
|
||||
|
||||
|
||||
extern void RemoveBufferFromLocal(uint32_t dbid, uint32_t relid, uint32_t forkno, uint32_t blkno);
|
||||
|
@ -63,7 +63,6 @@ enum SysCacheIdentifier
|
||||
FOREIGNSERVERNAME,
|
||||
FOREIGNSERVEROID,
|
||||
FOREIGNTABLEREL,
|
||||
HOTDATADATNAMERELNAME,
|
||||
INDEXRELID,
|
||||
LANGNAME,
|
||||
LANGOID,
|
||||
|
Loading…
Reference in New Issue
Block a user