Skip to content

Commit dcc3f62

Browse files
committed
avcodec/flashsv: Avoid deflating data
Currently priming the zlib decompressor involves compressing data directly after having decompressed it and decompressing it again in order to set the "dictionary" and to initialize the adler32-checksum. Yet this is wasteful and can be simplified by synthetizing the compressed data via non-compressed blocks. This reduced the amount of allocations for the decoding part of fate-vsynth1-flashsv2, namely from total heap usage: 9,135 allocs, 9,135 frees, 376,503,427 bytes allocated to total heap usage: 2,373 allocs, 2,373 frees, 14,144,083 bytes allocated Signed-off-by: Andreas Rheinhardt <[email protected]>
1 parent 88cccd1 commit dcc3f62

File tree

1 file changed

+38
-51
lines changed

1 file changed

+38
-51
lines changed

libavcodec/flashsv.c

Lines changed: 38 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,10 @@ typedef struct FlashSVContext {
6363
AVBufferRef *keyframedata_buf;
6464
uint8_t *keyframe;
6565
BlockInfo *blocks;
66-
uint8_t *deflate_block;
67-
int deflate_block_size;
6866
int color_depth;
6967
int zlibprime_curr, zlibprime_prev;
7068
int diff_start, diff_height;
69+
uint8_t tmp[UINT16_MAX];
7170
} FlashSVContext;
7271

7372
static int decode_hybrid(const uint8_t *sptr, const uint8_t *sptr_end, uint8_t *dptr, int dx, int dy,
@@ -141,41 +140,59 @@ static av_cold int flashsv_decode_init(AVCodecContext *avctx)
141140

142141
static int flashsv2_prime(FlashSVContext *s, const uint8_t *src, int size)
143142
{
144-
z_stream zs;
145143
int zret; // Zlib return code
144+
static const uint8_t zlib_header[] = { 0x78, 0x01 };
145+
uint8_t *data = s->tmpblock;
146+
unsigned remaining;
146147

147148
if (!src)
148149
return AVERROR_INVALIDDATA;
149150

150-
zs.zalloc = NULL;
151-
zs.zfree = NULL;
152-
zs.opaque = NULL;
153-
154151
s->zstream.next_in = src;
155152
s->zstream.avail_in = size;
156-
s->zstream.next_out = s->tmpblock;
153+
s->zstream.next_out = data;
157154
s->zstream.avail_out = s->block_size * 3;
158155
inflate(&s->zstream, Z_SYNC_FLUSH);
159-
160-
if (deflateInit(&zs, 0) != Z_OK)
161-
return -1;
162-
zs.next_in = s->tmpblock;
163-
zs.avail_in = s->block_size * 3 - s->zstream.avail_out;
164-
zs.next_out = s->deflate_block;
165-
zs.avail_out = s->deflate_block_size;
166-
deflate(&zs, Z_SYNC_FLUSH);
167-
deflateEnd(&zs);
156+
remaining = s->block_size * 3 - s->zstream.avail_out;
168157

169158
if ((zret = inflateReset(&s->zstream)) != Z_OK) {
170159
av_log(s->avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
171160
return AVERROR_UNKNOWN;
172161
}
173162

174-
s->zstream.next_in = s->deflate_block;
175-
s->zstream.avail_in = s->deflate_block_size - zs.avail_out;
176-
s->zstream.next_out = s->tmpblock;
177-
s->zstream.avail_out = s->block_size * 3;
163+
/* Create input for zlib that is equivalent to encoding the output
164+
* from above and decoding it again (the net result of this is that
165+
* the dictionary of past decoded data is correctly primed and
166+
* the adler32 checksum is correctly initialized).
167+
* This is accomplished by synthetizing blocks of uncompressed data
168+
* out of the output from above. See section 3.2.4 of RFC 1951. */
169+
s->zstream.next_in = zlib_header;
170+
s->zstream.avail_in = sizeof(zlib_header);
178171
inflate(&s->zstream, Z_SYNC_FLUSH);
172+
while (remaining > 0) {
173+
unsigned block_size = FFMIN(UINT16_MAX, remaining);
174+
uint8_t header[5];
175+
/* Bit 0: Non-last-block, bits 1-2: BTYPE for uncompressed block */
176+
header[0] = 0;
177+
/* Block size */
178+
AV_WL16(header + 1, block_size);
179+
/* Block size (one's complement) */
180+
AV_WL16(header + 3, block_size ^ 0xFFFF);
181+
s->zstream.next_in = header;
182+
s->zstream.avail_in = sizeof(header);
183+
s->zstream.next_out = s->tmp;
184+
s->zstream.avail_out = sizeof(s->tmp);
185+
zret = inflate(&s->zstream, Z_SYNC_FLUSH);
186+
if (zret != Z_OK)
187+
return AVERROR_UNKNOWN;
188+
s->zstream.next_in = data;
189+
s->zstream.avail_in = block_size;
190+
zret = inflate(&s->zstream, Z_SYNC_FLUSH);
191+
if (zret != Z_OK)
192+
return AVERROR_UNKNOWN;
193+
data += block_size;
194+
remaining -= block_size;
195+
}
179196

180197
return 0;
181198
}
@@ -248,22 +265,6 @@ static int flashsv_decode_block(AVCodecContext *avctx, const AVPacket *avpkt,
248265
return 0;
249266
}
250267

251-
static int calc_deflate_block_size(int tmpblock_size)
252-
{
253-
z_stream zstream;
254-
int size;
255-
256-
zstream.zalloc = Z_NULL;
257-
zstream.zfree = Z_NULL;
258-
zstream.opaque = Z_NULL;
259-
if (deflateInit(&zstream, 0) != Z_OK)
260-
return -1;
261-
size = deflateBound(&zstream, tmpblock_size);
262-
deflateEnd(&zstream);
263-
264-
return size;
265-
}
266-
267268
static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
268269
int *got_frame, AVPacket *avpkt)
269270
{
@@ -322,19 +323,6 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
322323
"Cannot allocate decompression buffer.\n");
323324
return err;
324325
}
325-
if (s->ver == 2) {
326-
s->deflate_block_size = calc_deflate_block_size(tmpblock_size);
327-
if (s->deflate_block_size <= 0) {
328-
av_log(avctx, AV_LOG_ERROR,
329-
"Cannot determine deflate buffer size.\n");
330-
return -1;
331-
}
332-
if ((err = av_reallocp(&s->deflate_block, s->deflate_block_size)) < 0) {
333-
s->block_size = 0;
334-
av_log(avctx, AV_LOG_ERROR, "Cannot allocate deflate buffer.\n");
335-
return err;
336-
}
337-
}
338326
}
339327
s->block_size = s->block_width * s->block_height;
340328

@@ -570,7 +558,6 @@ static av_cold int flashsv2_decode_end(AVCodecContext *avctx)
570558
av_buffer_unref(&s->keyframedata_buf);
571559
av_freep(&s->blocks);
572560
av_freep(&s->keyframe);
573-
av_freep(&s->deflate_block);
574561
flashsv_decode_end(avctx);
575562

576563
return 0;

0 commit comments

Comments
 (0)