Skip to content

Commit e8738de

Browse files
committed
[PBCKP-146] truncate cfm files (but calc CRC for whole)
- store cfm files truncated to non-zero head with coarse granularity (64-4096 bytes) - but calculate crc for whole file - remember whole file size and use it for crc comparison during validation
1 parent ed47e49 commit e8738de

File tree

10 files changed

+406
-113
lines changed

10 files changed

+406
-113
lines changed

src/archive.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,11 +1375,11 @@ get_wal_file(const char *filename, const char *from_fullpath,
13751375
#ifdef HAVE_LIBZ
13761376
/* If requested file is regular WAL segment, then try to open it with '.gz' suffix... */
13771377
if (IsXLogFileName(filename))
1378-
rc = fio_send_file_gz(from_fullpath_gz, to_fullpath, out, &errmsg);
1378+
rc = fio_send_file_gz(from_fullpath_gz, out, &errmsg);
13791379
if (rc == FILE_MISSING)
13801380
#endif
13811381
/* ... failing that, use uncompressed */
1382-
rc = fio_send_file(from_fullpath, to_fullpath, out, NULL, &errmsg);
1382+
rc = fio_send_file(from_fullpath, out, false, NULL, &errmsg);
13831383

13841384
/* When not in prefetch mode, try to use partial file */
13851385
if (rc == FILE_MISSING && !prefetch_mode && IsXLogFileName(filename))
@@ -1389,13 +1389,13 @@ get_wal_file(const char *filename, const char *from_fullpath,
13891389
#ifdef HAVE_LIBZ
13901390
/* '.gz.partial' goes first ... */
13911391
snprintf(from_partial, sizeof(from_partial), "%s.gz.partial", from_fullpath);
1392-
rc = fio_send_file_gz(from_partial, to_fullpath, out, &errmsg);
1392+
rc = fio_send_file_gz(from_partial, out, &errmsg);
13931393
if (rc == FILE_MISSING)
13941394
#endif
13951395
{
13961396
/* ... failing that, use '.partial' */
13971397
snprintf(from_partial, sizeof(from_partial), "%s.partial", from_fullpath);
1398-
rc = fio_send_file(from_partial, to_fullpath, out, NULL, &errmsg);
1398+
rc = fio_send_file(from_partial, out, false, NULL, &errmsg);
13991399
}
14001400

14011401
if (rc == SEND_OK)

src/catalog.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,7 @@ get_backup_filelist(pgBackup *backup, bool strict)
10691069
char linked[MAXPGPATH];
10701070
char compress_alg_string[MAXPGPATH];
10711071
int64 write_size,
1072+
full_size,
10721073
mode, /* bit length of mode_t depends on platforms */
10731074
is_datafile,
10741075
is_cfs,
@@ -1087,6 +1088,8 @@ get_backup_filelist(pgBackup *backup, bool strict)
10871088

10881089
get_control_value_str(buf, "path", path, sizeof(path),true);
10891090
get_control_value_int64(buf, "size", &write_size, true);
1091+
if (!get_control_value_int64(buf, "full_size", &full_size, false))
1092+
full_size = write_size;
10901093
get_control_value_int64(buf, "mode", &mode, true);
10911094
get_control_value_int64(buf, "is_datafile", &is_datafile, true);
10921095
get_control_value_int64(buf, "is_cfs", &is_cfs, false);
@@ -1097,6 +1100,7 @@ get_backup_filelist(pgBackup *backup, bool strict)
10971100

10981101
file = pgFileInit(path);
10991102
file->write_size = (int64) write_size;
1103+
file->uncompressed_size = full_size;
11001104
file->mode = (mode_t) mode;
11011105
file->is_datafile = is_datafile ? true : false;
11021106
file->is_cfs = is_cfs ? true : false;
@@ -2561,6 +2565,11 @@ write_backup_filelist(pgBackup *backup, parray *files, const char *root,
25612565
file->external_dir_num,
25622566
file->dbOid);
25632567

2568+
if (file->uncompressed_size != 0 &&
2569+
file->uncompressed_size != file->write_size)
2570+
len += sprintf(line+len, ",\"full_size\":\"" INT64_FORMAT "\"",
2571+
file->uncompressed_size);
2572+
25642573
if (file->is_datafile)
25652574
len += sprintf(line+len, ",\"segno\":\"%d\"", file->segno);
25662575

src/data.c

Lines changed: 28 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file,
799799
* and its mtime is less than parent backup start time ... */
800800
if ((pg_strcasecmp(file->name, RELMAPPER_FILENAME) != 0) &&
801801
(prev_file && file->exists_in_prev &&
802+
file->size == prev_file->size &&
802803
file->mtime <= parent_backup_time))
803804
{
804805
/*
@@ -1387,10 +1388,12 @@ backup_non_data_file_internal(const char *from_fullpath,
13871388
const char *to_fullpath, pgFile *file,
13881389
bool missing_ok)
13891390
{
1390-
FILE *in = NULL;
13911391
FILE *out = NULL;
1392-
ssize_t read_len = 0;
1393-
char *buf = NULL;
1392+
char *errmsg = NULL;
1393+
int rc;
1394+
bool cut_zero_tail;
1395+
1396+
cut_zero_tail = file->forkName == cfm;
13941397

13951398
INIT_FILE_CRC32(true, file->crc);
13961399

@@ -1412,107 +1415,43 @@ backup_non_data_file_internal(const char *from_fullpath,
14121415

14131416
/* backup remote file */
14141417
if (fio_is_remote(FIO_DB_HOST))
1415-
{
1416-
char *errmsg = NULL;
1417-
int rc = fio_send_file(from_fullpath, to_fullpath, out, file, &errmsg);
1418+
rc = fio_send_file(from_fullpath, out, cut_zero_tail, file, &errmsg);
1419+
else
1420+
rc = fio_send_file_local(from_fullpath, out, cut_zero_tail, file, &errmsg);
14181421

1419-
/* handle errors */
1420-
if (rc == FILE_MISSING)
1421-
{
1422-
/* maybe deleted, it's not error in case of backup */
1423-
if (missing_ok)
1424-
{
1425-
elog(LOG, "File \"%s\" is not found", from_fullpath);
1426-
file->write_size = FILE_NOT_FOUND;
1427-
goto cleanup;
1428-
}
1429-
else
1430-
elog(ERROR, "File \"%s\" is not found", from_fullpath);
1431-
}
1432-
else if (rc == WRITE_FAILED)
1433-
elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, strerror(errno));
1434-
else if (rc != SEND_OK)
1422+
/* handle errors */
1423+
if (rc == FILE_MISSING)
1424+
{
1425+
/* maybe deleted, it's not error in case of backup */
1426+
if (missing_ok)
14351427
{
1436-
if (errmsg)
1437-
elog(ERROR, "%s", errmsg);
1438-
else
1439-
elog(ERROR, "Cannot access remote file \"%s\"", from_fullpath);
1428+
elog(LOG, "File \"%s\" is not found", from_fullpath);
1429+
file->write_size = FILE_NOT_FOUND;
1430+
goto cleanup;
14401431
}
1441-
1442-
pg_free(errmsg);
1432+
else
1433+
elog(ERROR, "File \"%s\" is not found", from_fullpath);
14431434
}
1444-
/* backup local file */
1445-
else
1435+
else if (rc == WRITE_FAILED)
1436+
elog(ERROR, "Cannot write to \"%s\": %s", to_fullpath, strerror(errno));
1437+
else if (rc != SEND_OK)
14461438
{
1447-
/* open source file for read */
1448-
in = fopen(from_fullpath, PG_BINARY_R);
1449-
if (in == NULL)
1450-
{
1451-
/* maybe deleted, it's not error in case of backup */
1452-
if (errno == ENOENT)
1453-
{
1454-
if (missing_ok)
1455-
{
1456-
elog(LOG, "File \"%s\" is not found", from_fullpath);
1457-
file->write_size = FILE_NOT_FOUND;
1458-
goto cleanup;
1459-
}
1460-
else
1461-
elog(ERROR, "File \"%s\" is not found", from_fullpath);
1462-
}
1463-
1464-
elog(ERROR, "Cannot open file \"%s\": %s", from_fullpath,
1465-
strerror(errno));
1466-
}
1467-
1468-
/* disable stdio buffering for local input/output files to avoid triple buffering */
1469-
setvbuf(in, NULL, _IONBF, BUFSIZ);
1470-
setvbuf(out, NULL, _IONBF, BUFSIZ);
1471-
1472-
/* allocate 64kB buffer */
1473-
buf = pgut_malloc(CHUNK_SIZE);
1474-
1475-
/* copy content and calc CRC */
1476-
for (;;)
1477-
{
1478-
read_len = fread(buf, 1, CHUNK_SIZE, in);
1479-
1480-
if (ferror(in))
1481-
elog(ERROR, "Cannot read from file \"%s\": %s",
1482-
from_fullpath, strerror(errno));
1483-
1484-
if (read_len > 0)
1485-
{
1486-
if (fwrite(buf, 1, read_len, out) != read_len)
1487-
elog(ERROR, "Cannot write to file \"%s\": %s", to_fullpath,
1488-
strerror(errno));
1489-
1490-
/* update CRC */
1491-
COMP_FILE_CRC32(true, file->crc, buf, read_len);
1492-
file->read_size += read_len;
1493-
}
1494-
1495-
if (feof(in))
1496-
break;
1497-
}
1439+
if (errmsg)
1440+
elog(ERROR, "%s", errmsg);
1441+
else
1442+
elog(ERROR, "Cannot access remote file \"%s\"", from_fullpath);
14981443
}
14991444

1500-
file->write_size = (int64) file->read_size;
1445+
pg_free(errmsg); /* ????? */
15011446

1502-
if (file->write_size > 0)
1503-
file->uncompressed_size = file->write_size;
1447+
file->uncompressed_size = file->read_size;
15041448

15051449
cleanup:
15061450
/* finish CRC calculation and store into pgFile */
15071451
FIN_FILE_CRC32(true, file->crc);
15081452

1509-
if (in && fclose(in))
1510-
elog(ERROR, "Cannot close the file \"%s\": %s", from_fullpath, strerror(errno));
1511-
15121453
if (out && fclose(out))
15131454
elog(ERROR, "Cannot close the file \"%s\": %s", to_fullpath, strerror(errno));
1514-
1515-
pg_free(buf);
15161455
}
15171456

15181457
/*

src/dir.c

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,67 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok)
324324
return crc;
325325
}
326326

327+
static const char zerobuf[4096] = {0};
328+
329+
/*
330+
* Read the local file to compute CRC for it extened to real_size.
331+
*/
332+
pg_crc32
333+
pgFileGetCRCForTruncated(const char *file_path, bool use_crc32c, int64_t real_size)
334+
{
335+
FILE *fp;
336+
pg_crc32 crc = 0;
337+
char *buf;
338+
size_t len = 0;
339+
int64_t read_size = 0;
340+
341+
INIT_FILE_CRC32(use_crc32c, crc);
342+
343+
/* open file in binary read mode */
344+
fp = fopen(file_path, PG_BINARY_R);
345+
if (fp == NULL)
346+
{
347+
elog(ERROR, "Cannot open file \"%s\": %s",
348+
file_path, strerror(errno));
349+
}
350+
351+
/* disable stdio buffering */
352+
setvbuf(fp, NULL, _IONBF, BUFSIZ);
353+
buf = pgut_malloc(STDIO_BUFSIZE);
354+
355+
/* calc CRC of file */
356+
for (;;)
357+
{
358+
if (interrupted)
359+
elog(ERROR, "interrupted during CRC calculation");
360+
361+
len = fread(buf, 1, STDIO_BUFSIZE, fp);
362+
363+
if (ferror(fp))
364+
elog(ERROR, "Cannot read \"%s\": %s", file_path, strerror(errno));
365+
366+
/* update CRC */
367+
COMP_FILE_CRC32(use_crc32c, crc, buf, len);
368+
369+
read_size += len;
370+
371+
if (feof(fp))
372+
break;
373+
}
374+
375+
while (read_size < real_size)
376+
{
377+
len = Min(real_size - read_size, sizeof(zerobuf));
378+
COMP_FILE_CRC32(use_crc32c, crc, zerobuf, len);
379+
read_size += len;
380+
}
381+
382+
FIN_FILE_CRC32(use_crc32c, crc);
383+
fclose(fp);
384+
pg_free(buf);
385+
386+
return crc;
387+
}
327388
/*
328389
* Read the local file to compute its CRC.
329390
* We cannot make decision about file decompression because
@@ -1812,7 +1873,7 @@ write_database_map(pgBackup *backup, parray *database_map, parray *backup_files_
18121873
FIO_BACKUP_HOST);
18131874
file->crc = pgFileGetCRC(database_map_path, true, false);
18141875
file->write_size = file->size;
1815-
file->uncompressed_size = file->read_size;
1876+
file->uncompressed_size = file->size;
18161877

18171878
parray_append(backup_files_list, file);
18181879
}

src/merge.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,7 @@ merge_files(void *arg)
10781078
tmp_file->hdr_crc = file->hdr_crc;
10791079
}
10801080
else
1081-
tmp_file->uncompressed_size = tmp_file->write_size;
1081+
tmp_file->uncompressed_size = tmp_file->uncompressed_size;
10821082

10831083
/* Copy header metadata from old map into a new one */
10841084
tmp_file->n_headers = file->n_headers;

src/pg_probackup.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,8 @@ typedef enum ShowFormat
348348
#define PROGRAM_VERSION "2.5.8"
349349

350350
/* update when remote agent API or behaviour changes */
351-
#define AGENT_PROTOCOL_VERSION 20501
352-
#define AGENT_PROTOCOL_VERSION_STR "2.5.1"
351+
#define AGENT_PROTOCOL_VERSION 20509
352+
#define AGENT_PROTOCOL_VERSION_STR "2.5.9"
353353

354354
/* update only when changing storage format */
355355
#define STORAGE_FORMAT_VERSION "2.4.4"
@@ -1077,6 +1077,7 @@ extern void fio_pgFileDelete(pgFile *file, const char *full_path);
10771077
extern void pgFileFree(void *file);
10781078

10791079
extern pg_crc32 pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok);
1080+
extern pg_crc32 pgFileGetCRCForTruncated(const char *file_path, bool use_crc32c, int64_t real_size);
10801081
extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok);
10811082

10821083
extern int pgFileMapComparePath(const void *f1, const void *f2);
@@ -1240,9 +1241,11 @@ extern int fio_copy_pages(const char *to_fullpath, const char *from_fullpath, pg
12401241
XLogRecPtr horizonLsn, int calg, int clevel, uint32 checksum_version,
12411242
bool use_pagemap, BlockNumber *err_blknum, char **errormsg);
12421243
/* return codes for fio_send_pages */
1243-
extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, char **errormsg);
1244-
extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out,
1244+
extern int fio_send_file_gz(const char *from_fullpath, FILE* out, char **errormsg);
1245+
extern int fio_send_file(const char *from_fullpath, FILE* out, bool cut_zero_tail,
12451246
pgFile *file, char **errormsg);
1247+
extern int fio_send_file_local(const char *from_fullpath, FILE* out, bool cut_zero_tail,
1248+
pgFile *file, char **errormsg);
12461249

12471250
extern void fio_list_dir(parray *files, const char *root, bool exclude, bool follow_symlink,
12481251
bool add_root, bool backup_logs, bool skip_hidden, int external_dir_num);

0 commit comments

Comments
 (0)