Skip to content

Commit 5c44459

Browse files
committed
Fix a bug that can crash deflate on some input when using Z_FIXED.
This bug was reported by Danilo Ramos of Eideticom, Inc. It has lain in wait 13 years before being found! The bug was introduced in zlib 1.2.2.2, with the addition of the Z_FIXED option. That option forces the use of fixed Huffman codes. For rare inputs with a large number of distant matches, the pending buffer into which the compressed data is written can overwrite the distance symbol table which it overlays. That results in corrupted output due to invalid distances, and can result in out-of-bound accesses, crashing the application. The fix here combines the distance buffer and literal/length buffers into a single symbol buffer. Now three bytes of pending buffer space are opened up for each literal or length/distance pair consumed, instead of the previous two bytes. This assures that the pending buffer cannot overwrite the symbol table, since the maximum fixed code compressed length/distance is 31 bits, and since there are four bytes of pending space for every three bytes of symbol space.
1 parent e99813d commit 5c44459

File tree

3 files changed

+79
-70
lines changed

3 files changed

+79
-70
lines changed

deflate.c

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
255255
int wrap = 1;
256256
static const char my_version[] = ZLIB_VERSION;
257257

258-
ushf *overlay;
259-
/* We overlay pending_buf and d_buf+l_buf. This works since the average
260-
* output size for (length,distance) codes is <= 24 bits.
261-
*/
262-
263258
if (version == Z_NULL || version[0] != my_version[0] ||
264259
stream_size != sizeof(z_stream)) {
265260
return Z_VERSION_ERROR;
@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
329324

330325
s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
331326

332-
overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
333-
s->pending_buf = (uchf *) overlay;
334-
s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
327+
/* We overlay pending_buf and sym_buf. This works since the average size
328+
* for length/distance pairs over any compressed block is assured to be 31
329+
* bits or less.
330+
*
331+
* Analysis: The longest fixed codes are a length code of 8 bits plus 5
332+
* extra bits, for lengths 131 to 257. The longest fixed distance codes are
333+
* 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
334+
* possible fixed-codes length/distance pair is then 31 bits total.
335+
*
336+
* sym_buf starts one-fourth of the way into pending_buf. So there are
337+
* three bytes in sym_buf for every four bytes in pending_buf. Each symbol
338+
* in sym_buf is three bytes -- two for the distance and one for the
339+
* literal/length. As each symbol is consumed, the pointer to the next
340+
* sym_buf value to read moves forward three bytes. From that symbol, up to
341+
* 31 bits are written to pending_buf. The closest the written pending_buf
342+
* bits gets to the next sym_buf symbol to read is just before the last
343+
* code is written. At that time, 31*(n-2) bits have been written, just
344+
* after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
345+
* 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
346+
* symbols are written.) The closest the writing gets to what is unread is
347+
* then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
348+
* can range from 128 to 32768.
349+
*
350+
* Therefore, at a minimum, there are 142 bits of space between what is
351+
* written and what is read in the overlain buffers, so the symbols cannot
352+
* be overwritten by the compressed data. That space is actually 139 bits,
353+
* due to the three-bit fixed-code block header.
354+
*
355+
* That covers the case where either Z_FIXED is specified, forcing fixed
356+
* codes, or when the use of fixed codes is chosen, because that choice
357+
* results in a smaller compressed block than dynamic codes. That latter
358+
* condition then assures that the above analysis also covers all dynamic
359+
* blocks. A dynamic-code block will only be chosen to be emitted if it has
360+
* fewer bits than a fixed-code block would for the same set of symbols.
361+
* Therefore its average symbol length is assured to be less than 31. So
362+
* the compressed data for a dynamic block also cannot overwrite the
363+
* symbols from which it is being constructed.
364+
*/
365+
366+
s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
367+
s->pending_buf_size = (ulg)s->lit_bufsize * 4;
335368

336369
if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
337370
s->pending_buf == Z_NULL) {
@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
340373
deflateEnd (strm);
341374
return Z_MEM_ERROR;
342375
}
343-
s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
344-
s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
376+
s->sym_buf = s->pending_buf + s->lit_bufsize;
377+
s->sym_end = (s->lit_bufsize - 1) * 3;
378+
/* We avoid equality with lit_bufsize*3 because of wraparound at 64K
379+
* on 16 bit machines and because stored blocks are restricted to
380+
* 64K-1 bytes.
381+
*/
345382

346383
s->level = level;
347384
s->strategy = strategy;
@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
552589

553590
if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
554591
s = strm->state;
555-
if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
592+
if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
556593
return Z_BUF_ERROR;
557594
do {
558595
put = Buf_size - s->bi_valid;
@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source)
11131150
#else
11141151
deflate_state *ds;
11151152
deflate_state *ss;
1116-
ushf *overlay;
11171153

11181154

11191155
if (deflateStateCheck(source) || dest == Z_NULL) {
@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source)
11331169
ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
11341170
ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
11351171
ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
1136-
overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
1137-
ds->pending_buf = (uchf *) overlay;
1172+
ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
11381173

11391174
if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
11401175
ds->pending_buf == Z_NULL) {
@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source)
11481183
zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
11491184

11501185
ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
1151-
ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
1152-
ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
1186+
ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
11531187

11541188
ds->l_desc.dyn_tree = ds->dyn_ltree;
11551189
ds->d_desc.dyn_tree = ds->dyn_dtree;
@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush)
19251959
FLUSH_BLOCK(s, 1);
19261960
return finish_done;
19271961
}
1928-
if (s->last_lit)
1962+
if (s->sym_next)
19291963
FLUSH_BLOCK(s, 0);
19301964
return block_done;
19311965
}
@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush)
20562090
FLUSH_BLOCK(s, 1);
20572091
return finish_done;
20582092
}
2059-
if (s->last_lit)
2093+
if (s->sym_next)
20602094
FLUSH_BLOCK(s, 0);
20612095
return block_done;
20622096
}
@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush)
21312165
FLUSH_BLOCK(s, 1);
21322166
return finish_done;
21332167
}
2134-
if (s->last_lit)
2168+
if (s->sym_next)
21352169
FLUSH_BLOCK(s, 0);
21362170
return block_done;
21372171
}
@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush)
21702204
FLUSH_BLOCK(s, 1);
21712205
return finish_done;
21722206
}
2173-
if (s->last_lit)
2207+
if (s->sym_next)
21742208
FLUSH_BLOCK(s, 0);
21752209
return block_done;
21762210
}

deflate.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ typedef struct internal_state {
217217
/* Depth of each subtree used as tie breaker for trees of equal frequency
218218
*/
219219

220-
uchf *l_buf; /* buffer for literals or lengths */
220+
uchf *sym_buf; /* buffer for distances and literals/lengths */
221221

222222
uInt lit_bufsize;
223223
/* Size of match buffer for literals/lengths. There are 4 reasons for
@@ -239,13 +239,8 @@ typedef struct internal_state {
239239
* - I can't count above 4
240240
*/
241241

242-
uInt last_lit; /* running index in l_buf */
243-
244-
ushf *d_buf;
245-
/* Buffer for distances. To simplify the code, d_buf and l_buf have
246-
* the same number of elements. To use different lengths, an extra flag
247-
* array would be necessary.
248-
*/
242+
uInt sym_next; /* running index in sym_buf */
243+
uInt sym_end; /* symbol table full when sym_next reaches this */
249244

250245
ulg opt_len; /* bit length of current block with optimal trees */
251246
ulg static_len; /* bit length of current block with static trees */
@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
325320

326321
# define _tr_tally_lit(s, c, flush) \
327322
{ uch cc = (c); \
328-
s->d_buf[s->last_lit] = 0; \
329-
s->l_buf[s->last_lit++] = cc; \
323+
s->sym_buf[s->sym_next++] = 0; \
324+
s->sym_buf[s->sym_next++] = 0; \
325+
s->sym_buf[s->sym_next++] = cc; \
330326
s->dyn_ltree[cc].Freq++; \
331-
flush = (s->last_lit == s->lit_bufsize-1); \
327+
flush = (s->sym_next == s->sym_end); \
332328
}
333329
# define _tr_tally_dist(s, distance, length, flush) \
334330
{ uch len = (uch)(length); \
335331
ush dist = (ush)(distance); \
336-
s->d_buf[s->last_lit] = dist; \
337-
s->l_buf[s->last_lit++] = len; \
332+
s->sym_buf[s->sym_next++] = dist; \
333+
s->sym_buf[s->sym_next++] = dist >> 8; \
334+
s->sym_buf[s->sym_next++] = len; \
338335
dist--; \
339336
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
340337
s->dyn_dtree[d_code(dist)].Freq++; \
341-
flush = (s->last_lit == s->lit_bufsize-1); \
338+
flush = (s->sym_next == s->sym_end); \
342339
}
343340
#else
344341
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)

trees.c

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ local void init_block(s)
416416

417417
s->dyn_ltree[END_BLOCK].Freq = 1;
418418
s->opt_len = s->static_len = 0L;
419-
s->last_lit = s->matches = 0;
419+
s->sym_next = s->matches = 0;
420420
}
421421

422422
#define SMALLEST 1
@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
948948

949949
Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
950950
opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
951-
s->last_lit));
951+
s->sym_next / 3));
952952

953953
if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
954954

@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
10171017
unsigned dist; /* distance of matched string */
10181018
unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
10191019
{
1020-
s->d_buf[s->last_lit] = (ush)dist;
1021-
s->l_buf[s->last_lit++] = (uch)lc;
1020+
s->sym_buf[s->sym_next++] = dist;
1021+
s->sym_buf[s->sym_next++] = dist >> 8;
1022+
s->sym_buf[s->sym_next++] = lc;
10221023
if (dist == 0) {
10231024
/* lc is the unmatched char */
10241025
s->dyn_ltree[lc].Freq++;
@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
10331034
s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
10341035
s->dyn_dtree[d_code(dist)].Freq++;
10351036
}
1036-
1037-
#ifdef TRUNCATE_BLOCK
1038-
/* Try to guess if it is profitable to stop the current block here */
1039-
if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1040-
/* Compute an upper bound for the compressed length */
1041-
ulg out_length = (ulg)s->last_lit*8L;
1042-
ulg in_length = (ulg)((long)s->strstart - s->block_start);
1043-
int dcode;
1044-
for (dcode = 0; dcode < D_CODES; dcode++) {
1045-
out_length += (ulg)s->dyn_dtree[dcode].Freq *
1046-
(5L+extra_dbits[dcode]);
1047-
}
1048-
out_length >>= 3;
1049-
Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1050-
s->last_lit, in_length, out_length,
1051-
100L - out_length*100L/in_length));
1052-
if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1053-
}
1054-
#endif
1055-
return (s->last_lit == s->lit_bufsize-1);
1056-
/* We avoid equality with lit_bufsize because of wraparound at 64K
1057-
* on 16 bit machines and because stored blocks are restricted to
1058-
* 64K-1 bytes.
1059-
*/
1037+
return (s->sym_next == s->sym_end);
10601038
}
10611039

10621040
/* ===========================================================================
@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
10691047
{
10701048
unsigned dist; /* distance of matched string */
10711049
int lc; /* match length or unmatched char (if dist == 0) */
1072-
unsigned lx = 0; /* running index in l_buf */
1050+
unsigned sx = 0; /* running index in sym_buf */
10731051
unsigned code; /* the code to send */
10741052
int extra; /* number of extra bits to send */
10751053

1076-
if (s->last_lit != 0) do {
1077-
dist = s->d_buf[lx];
1078-
lc = s->l_buf[lx++];
1054+
if (s->sym_next != 0) do {
1055+
dist = s->sym_buf[sx++] & 0xff;
1056+
dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
1057+
lc = s->sym_buf[sx++];
10791058
if (dist == 0) {
10801059
send_code(s, lc, ltree); /* send a literal byte */
10811060
Tracecv(isgraph(lc), (stderr," '%c' ", lc));
@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
11001079
}
11011080
} /* literal or match pair ? */
11021081

1103-
/* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1104-
Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
1105-
"pendingBuf overflow");
1082+
/* Check that the overlay between pending_buf and sym_buf is ok: */
1083+
Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
11061084

1107-
} while (lx < s->last_lit);
1085+
} while (sx < s->sym_next);
11081086

11091087
send_code(s, END_BLOCK, ltree);
11101088
}

0 commit comments

Comments
 (0)