1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file index_decoder.c
|
---|
4 | /// \brief Decodes the Index field
|
---|
5 | //
|
---|
6 | // Author: Lasse Collin
|
---|
7 | //
|
---|
8 | // This file has been put into the public domain.
|
---|
9 | // You can do whatever you want with this file.
|
---|
10 | //
|
---|
11 | ///////////////////////////////////////////////////////////////////////////////
|
---|
12 |
|
---|
13 | #include "index_decoder.h"
|
---|
14 | #include "check.h"
|
---|
15 |
|
---|
16 |
|
---|
17 | typedef struct {
|
---|
18 | enum {
|
---|
19 | SEQ_INDICATOR,
|
---|
20 | SEQ_COUNT,
|
---|
21 | SEQ_MEMUSAGE,
|
---|
22 | SEQ_UNPADDED,
|
---|
23 | SEQ_UNCOMPRESSED,
|
---|
24 | SEQ_PADDING_INIT,
|
---|
25 | SEQ_PADDING,
|
---|
26 | SEQ_CRC32,
|
---|
27 | } sequence;
|
---|
28 |
|
---|
29 | /// Memory usage limit
|
---|
30 | uint64_t memlimit;
|
---|
31 |
|
---|
32 | /// Target Index
|
---|
33 | lzma_index *index;
|
---|
34 |
|
---|
35 | /// Pointer give by the application, which is set after
|
---|
36 | /// successful decoding.
|
---|
37 | lzma_index **index_ptr;
|
---|
38 |
|
---|
39 | /// Number of Records left to decode.
|
---|
40 | lzma_vli count;
|
---|
41 |
|
---|
42 | /// The most recent Unpadded Size field
|
---|
43 | lzma_vli unpadded_size;
|
---|
44 |
|
---|
45 | /// The most recent Uncompressed Size field
|
---|
46 | lzma_vli uncompressed_size;
|
---|
47 |
|
---|
48 | /// Position in integers
|
---|
49 | size_t pos;
|
---|
50 |
|
---|
51 | /// CRC32 of the List of Records field
|
---|
52 | uint32_t crc32;
|
---|
53 | } lzma_index_coder;
|
---|
54 |
|
---|
55 |
|
---|
56 | static lzma_ret
|
---|
57 | index_decode(void *coder_ptr, const lzma_allocator *allocator,
|
---|
58 | const uint8_t *restrict in, size_t *restrict in_pos,
|
---|
59 | size_t in_size,
|
---|
60 | uint8_t *restrict out lzma_attribute((__unused__)),
|
---|
61 | size_t *restrict out_pos lzma_attribute((__unused__)),
|
---|
62 | size_t out_size lzma_attribute((__unused__)),
|
---|
63 | lzma_action action lzma_attribute((__unused__)))
|
---|
64 | {
|
---|
65 | lzma_index_coder *coder = coder_ptr;
|
---|
66 |
|
---|
67 | // Similar optimization as in index_encoder.c
|
---|
68 | const size_t in_start = *in_pos;
|
---|
69 | lzma_ret ret = LZMA_OK;
|
---|
70 |
|
---|
71 | while (*in_pos < in_size)
|
---|
72 | switch (coder->sequence) {
|
---|
73 | case SEQ_INDICATOR:
|
---|
74 | // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
|
---|
75 | // LZMA_FORMAT_ERROR, because a typical usage case for Index
|
---|
76 | // decoder is when parsing the Stream backwards. If seeking
|
---|
77 | // backward from the Stream Footer gives us something that
|
---|
78 | // doesn't begin with Index Indicator, the file is considered
|
---|
79 | // corrupt, not "programming error" or "unrecognized file
|
---|
80 | // format". One could argue that the application should
|
---|
81 | // verify the Index Indicator before trying to decode the
|
---|
82 | // Index, but well, I suppose it is simpler this way.
|
---|
83 | if (in[(*in_pos)++] != INDEX_INDICATOR)
|
---|
84 | return LZMA_DATA_ERROR;
|
---|
85 |
|
---|
86 | coder->sequence = SEQ_COUNT;
|
---|
87 | break;
|
---|
88 |
|
---|
89 | case SEQ_COUNT:
|
---|
90 | ret = lzma_vli_decode(&coder->count, &coder->pos,
|
---|
91 | in, in_pos, in_size);
|
---|
92 | if (ret != LZMA_STREAM_END)
|
---|
93 | goto out;
|
---|
94 |
|
---|
95 | coder->pos = 0;
|
---|
96 | coder->sequence = SEQ_MEMUSAGE;
|
---|
97 |
|
---|
98 | // Fall through
|
---|
99 |
|
---|
100 | case SEQ_MEMUSAGE:
|
---|
101 | if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
|
---|
102 | ret = LZMA_MEMLIMIT_ERROR;
|
---|
103 | goto out;
|
---|
104 | }
|
---|
105 |
|
---|
106 | // Tell the Index handling code how many Records this
|
---|
107 | // Index has to allow it to allocate memory more efficiently.
|
---|
108 | lzma_index_prealloc(coder->index, coder->count);
|
---|
109 |
|
---|
110 | ret = LZMA_OK;
|
---|
111 | coder->sequence = coder->count == 0
|
---|
112 | ? SEQ_PADDING_INIT : SEQ_UNPADDED;
|
---|
113 | break;
|
---|
114 |
|
---|
115 | case SEQ_UNPADDED:
|
---|
116 | case SEQ_UNCOMPRESSED: {
|
---|
117 | lzma_vli *size = coder->sequence == SEQ_UNPADDED
|
---|
118 | ? &coder->unpadded_size
|
---|
119 | : &coder->uncompressed_size;
|
---|
120 |
|
---|
121 | ret = lzma_vli_decode(size, &coder->pos,
|
---|
122 | in, in_pos, in_size);
|
---|
123 | if (ret != LZMA_STREAM_END)
|
---|
124 | goto out;
|
---|
125 |
|
---|
126 | ret = LZMA_OK;
|
---|
127 | coder->pos = 0;
|
---|
128 |
|
---|
129 | if (coder->sequence == SEQ_UNPADDED) {
|
---|
130 | // Validate that encoded Unpadded Size isn't too small
|
---|
131 | // or too big.
|
---|
132 | if (coder->unpadded_size < UNPADDED_SIZE_MIN
|
---|
133 | || coder->unpadded_size
|
---|
134 | > UNPADDED_SIZE_MAX)
|
---|
135 | return LZMA_DATA_ERROR;
|
---|
136 |
|
---|
137 | coder->sequence = SEQ_UNCOMPRESSED;
|
---|
138 | } else {
|
---|
139 | // Add the decoded Record to the Index.
|
---|
140 | return_if_error(lzma_index_append(
|
---|
141 | coder->index, allocator,
|
---|
142 | coder->unpadded_size,
|
---|
143 | coder->uncompressed_size));
|
---|
144 |
|
---|
145 | // Check if this was the last Record.
|
---|
146 | coder->sequence = --coder->count == 0
|
---|
147 | ? SEQ_PADDING_INIT
|
---|
148 | : SEQ_UNPADDED;
|
---|
149 | }
|
---|
150 |
|
---|
151 | break;
|
---|
152 | }
|
---|
153 |
|
---|
154 | case SEQ_PADDING_INIT:
|
---|
155 | coder->pos = lzma_index_padding_size(coder->index);
|
---|
156 | coder->sequence = SEQ_PADDING;
|
---|
157 |
|
---|
158 | // Fall through
|
---|
159 |
|
---|
160 | case SEQ_PADDING:
|
---|
161 | if (coder->pos > 0) {
|
---|
162 | --coder->pos;
|
---|
163 | if (in[(*in_pos)++] != 0x00)
|
---|
164 | return LZMA_DATA_ERROR;
|
---|
165 |
|
---|
166 | break;
|
---|
167 | }
|
---|
168 |
|
---|
169 | // Finish the CRC32 calculation.
|
---|
170 | coder->crc32 = lzma_crc32(in + in_start,
|
---|
171 | *in_pos - in_start, coder->crc32);
|
---|
172 |
|
---|
173 | coder->sequence = SEQ_CRC32;
|
---|
174 |
|
---|
175 | // Fall through
|
---|
176 |
|
---|
177 | case SEQ_CRC32:
|
---|
178 | do {
|
---|
179 | if (*in_pos == in_size)
|
---|
180 | return LZMA_OK;
|
---|
181 |
|
---|
182 | if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
|
---|
183 | != in[(*in_pos)++]) {
|
---|
184 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
185 | return LZMA_DATA_ERROR;
|
---|
186 | #endif
|
---|
187 | }
|
---|
188 |
|
---|
189 | } while (++coder->pos < 4);
|
---|
190 |
|
---|
191 | // Decoding was successful, now we can let the application
|
---|
192 | // see the decoded Index.
|
---|
193 | *coder->index_ptr = coder->index;
|
---|
194 |
|
---|
195 | // Make index NULL so we don't free it unintentionally.
|
---|
196 | coder->index = NULL;
|
---|
197 |
|
---|
198 | return LZMA_STREAM_END;
|
---|
199 |
|
---|
200 | default:
|
---|
201 | assert(0);
|
---|
202 | return LZMA_PROG_ERROR;
|
---|
203 | }
|
---|
204 |
|
---|
205 | out:
|
---|
206 | // Update the CRC32,
|
---|
207 | coder->crc32 = lzma_crc32(in + in_start,
|
---|
208 | *in_pos - in_start, coder->crc32);
|
---|
209 |
|
---|
210 | return ret;
|
---|
211 | }
|
---|
212 |
|
---|
213 |
|
---|
214 | static void
|
---|
215 | index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
|
---|
216 | {
|
---|
217 | lzma_index_coder *coder = coder_ptr;
|
---|
218 | lzma_index_end(coder->index, allocator);
|
---|
219 | lzma_free(coder, allocator);
|
---|
220 | return;
|
---|
221 | }
|
---|
222 |
|
---|
223 |
|
---|
224 | static lzma_ret
|
---|
225 | index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
|
---|
226 | uint64_t *old_memlimit, uint64_t new_memlimit)
|
---|
227 | {
|
---|
228 | lzma_index_coder *coder = coder_ptr;
|
---|
229 |
|
---|
230 | *memusage = lzma_index_memusage(1, coder->count);
|
---|
231 | *old_memlimit = coder->memlimit;
|
---|
232 |
|
---|
233 | if (new_memlimit != 0) {
|
---|
234 | if (new_memlimit < *memusage)
|
---|
235 | return LZMA_MEMLIMIT_ERROR;
|
---|
236 |
|
---|
237 | coder->memlimit = new_memlimit;
|
---|
238 | }
|
---|
239 |
|
---|
240 | return LZMA_OK;
|
---|
241 | }
|
---|
242 |
|
---|
243 |
|
---|
244 | static lzma_ret
|
---|
245 | index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
|
---|
246 | lzma_index **i, uint64_t memlimit)
|
---|
247 | {
|
---|
248 | // Remember the pointer given by the application. We will set it
|
---|
249 | // to point to the decoded Index only if decoding is successful.
|
---|
250 | // Before that, keep it NULL so that applications can always safely
|
---|
251 | // pass it to lzma_index_end() no matter did decoding succeed or not.
|
---|
252 | coder->index_ptr = i;
|
---|
253 | *i = NULL;
|
---|
254 |
|
---|
255 | // We always allocate a new lzma_index.
|
---|
256 | coder->index = lzma_index_init(allocator);
|
---|
257 | if (coder->index == NULL)
|
---|
258 | return LZMA_MEM_ERROR;
|
---|
259 |
|
---|
260 | // Initialize the rest.
|
---|
261 | coder->sequence = SEQ_INDICATOR;
|
---|
262 | coder->memlimit = my_max(1, memlimit);
|
---|
263 | coder->count = 0; // Needs to be initialized due to _memconfig().
|
---|
264 | coder->pos = 0;
|
---|
265 | coder->crc32 = 0;
|
---|
266 |
|
---|
267 | return LZMA_OK;
|
---|
268 | }
|
---|
269 |
|
---|
270 |
|
---|
271 | extern lzma_ret
|
---|
272 | lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
---|
273 | lzma_index **i, uint64_t memlimit)
|
---|
274 | {
|
---|
275 | lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
|
---|
276 |
|
---|
277 | if (i == NULL)
|
---|
278 | return LZMA_PROG_ERROR;
|
---|
279 |
|
---|
280 | lzma_index_coder *coder = next->coder;
|
---|
281 | if (coder == NULL) {
|
---|
282 | coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
|
---|
283 | if (coder == NULL)
|
---|
284 | return LZMA_MEM_ERROR;
|
---|
285 |
|
---|
286 | next->coder = coder;
|
---|
287 | next->code = &index_decode;
|
---|
288 | next->end = &index_decoder_end;
|
---|
289 | next->memconfig = &index_decoder_memconfig;
|
---|
290 | coder->index = NULL;
|
---|
291 | } else {
|
---|
292 | lzma_index_end(coder->index, allocator);
|
---|
293 | }
|
---|
294 |
|
---|
295 | return index_decoder_reset(coder, allocator, i, memlimit);
|
---|
296 | }
|
---|
297 |
|
---|
298 |
|
---|
299 | extern LZMA_API(lzma_ret)
|
---|
300 | lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
|
---|
301 | {
|
---|
302 | lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
|
---|
303 |
|
---|
304 | strm->internal->supported_actions[LZMA_RUN] = true;
|
---|
305 | strm->internal->supported_actions[LZMA_FINISH] = true;
|
---|
306 |
|
---|
307 | return LZMA_OK;
|
---|
308 | }
|
---|
309 |
|
---|
310 |
|
---|
311 | extern LZMA_API(lzma_ret)
|
---|
312 | lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
|
---|
313 | const lzma_allocator *allocator,
|
---|
314 | const uint8_t *in, size_t *in_pos, size_t in_size)
|
---|
315 | {
|
---|
316 | // Sanity checks
|
---|
317 | if (i == NULL || memlimit == NULL
|
---|
318 | || in == NULL || in_pos == NULL || *in_pos > in_size)
|
---|
319 | return LZMA_PROG_ERROR;
|
---|
320 |
|
---|
321 | // Initialize the decoder.
|
---|
322 | lzma_index_coder coder;
|
---|
323 | return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
|
---|
324 |
|
---|
325 | // Store the input start position so that we can restore it in case
|
---|
326 | // of an error.
|
---|
327 | const size_t in_start = *in_pos;
|
---|
328 |
|
---|
329 | // Do the actual decoding.
|
---|
330 | lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
|
---|
331 | NULL, NULL, 0, LZMA_RUN);
|
---|
332 |
|
---|
333 | if (ret == LZMA_STREAM_END) {
|
---|
334 | ret = LZMA_OK;
|
---|
335 | } else {
|
---|
336 | // Something went wrong, free the Index structure and restore
|
---|
337 | // the input position.
|
---|
338 | lzma_index_end(coder.index, allocator);
|
---|
339 | *in_pos = in_start;
|
---|
340 |
|
---|
341 | if (ret == LZMA_OK) {
|
---|
342 | // The input is truncated or otherwise corrupt.
|
---|
343 | // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
|
---|
344 | // like lzma_vli_decode() does in single-call mode.
|
---|
345 | ret = LZMA_DATA_ERROR;
|
---|
346 |
|
---|
347 | } else if (ret == LZMA_MEMLIMIT_ERROR) {
|
---|
348 | // Tell the caller how much memory would have
|
---|
349 | // been needed.
|
---|
350 | *memlimit = lzma_index_memusage(1, coder.count);
|
---|
351 | }
|
---|
352 | }
|
---|
353 |
|
---|
354 | return ret;
|
---|
355 | }
|
---|