1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file file_info.c
|
---|
4 | /// \brief Decode .xz file information into a lzma_index structure
|
---|
5 | //
|
---|
6 | // Author: Lasse Collin
|
---|
7 | //
|
---|
8 | // This file has been put into the public domain.
|
---|
9 | // You can do whatever you want with this file.
|
---|
10 | //
|
---|
11 | ///////////////////////////////////////////////////////////////////////////////
|
---|
12 |
|
---|
13 | #include "index_decoder.h"
|
---|
14 |
|
---|
15 |
|
---|
16 | typedef struct {
|
---|
17 | enum {
|
---|
18 | SEQ_MAGIC_BYTES,
|
---|
19 | SEQ_PADDING_SEEK,
|
---|
20 | SEQ_PADDING_DECODE,
|
---|
21 | SEQ_FOOTER,
|
---|
22 | SEQ_INDEX_INIT,
|
---|
23 | SEQ_INDEX_DECODE,
|
---|
24 | SEQ_HEADER_DECODE,
|
---|
25 | SEQ_HEADER_COMPARE,
|
---|
26 | } sequence;
|
---|
27 |
|
---|
28 | /// Absolute position of in[*in_pos] in the file. All code that
|
---|
29 | /// modifies *in_pos also updates this. seek_to_pos() needs this
|
---|
30 | /// to determine if we need to request the application to seek for
|
---|
31 | /// us or if we can do the seeking internally by adjusting *in_pos.
|
---|
32 | uint64_t file_cur_pos;
|
---|
33 |
|
---|
34 | /// This refers to absolute positions of interesting parts of the
|
---|
35 | /// input file. Sometimes it points to the *beginning* of a specific
|
---|
36 | /// field and sometimes to the *end* of a field. The current target
|
---|
37 | /// position at each moment is explained in the comments.
|
---|
38 | uint64_t file_target_pos;
|
---|
39 |
|
---|
40 | /// Size of the .xz file (from the application).
|
---|
41 | uint64_t file_size;
|
---|
42 |
|
---|
43 | /// Index decoder
|
---|
44 | lzma_next_coder index_decoder;
|
---|
45 |
|
---|
46 | /// Number of bytes remaining in the Index field that is currently
|
---|
47 | /// being decoded.
|
---|
48 | lzma_vli index_remaining;
|
---|
49 |
|
---|
50 | /// The Index decoder will store the decoded Index in this pointer.
|
---|
51 | lzma_index *this_index;
|
---|
52 |
|
---|
53 | /// Amount of Stream Padding in the current Stream.
|
---|
54 | lzma_vli stream_padding;
|
---|
55 |
|
---|
56 | /// The final combined index is collected here.
|
---|
57 | lzma_index *combined_index;
|
---|
58 |
|
---|
59 | /// Pointer from the application where to store the index information
|
---|
60 | /// after successful decoding.
|
---|
61 | lzma_index **dest_index;
|
---|
62 |
|
---|
63 | /// Pointer to lzma_stream.seek_pos to be used when returning
|
---|
64 | /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
|
---|
65 | uint64_t *external_seek_pos;
|
---|
66 |
|
---|
67 | /// Memory usage limit
|
---|
68 | uint64_t memlimit;
|
---|
69 |
|
---|
70 | /// Stream Flags from the very beginning of the file.
|
---|
71 | lzma_stream_flags first_header_flags;
|
---|
72 |
|
---|
73 | /// Stream Flags from Stream Header of the current Stream.
|
---|
74 | lzma_stream_flags header_flags;
|
---|
75 |
|
---|
76 | /// Stream Flags from Stream Footer of the current Stream.
|
---|
77 | lzma_stream_flags footer_flags;
|
---|
78 |
|
---|
79 | size_t temp_pos;
|
---|
80 | size_t temp_size;
|
---|
81 | uint8_t temp[8192];
|
---|
82 |
|
---|
83 | } lzma_file_info_coder;
|
---|
84 |
|
---|
85 |
|
---|
86 | /// Copies data from in[*in_pos] into coder->temp until
|
---|
87 | /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
|
---|
88 | /// in sync with *in_pos. Returns true if more input is needed.
|
---|
89 | static bool
|
---|
90 | fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
|
---|
91 | size_t *restrict in_pos, size_t in_size)
|
---|
92 | {
|
---|
93 | coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
|
---|
94 | coder->temp, &coder->temp_pos, coder->temp_size);
|
---|
95 | return coder->temp_pos < coder->temp_size;
|
---|
96 | }
|
---|
97 |
|
---|
98 |
|
---|
99 | /// Seeks to the absolute file position specified by target_pos.
|
---|
100 | /// This tries to do the seeking by only modifying *in_pos, if possible.
|
---|
101 | /// The main benefit of this is that if one passes the whole file at once
|
---|
102 | /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
|
---|
103 | /// as all the seeking can be done by adjusting *in_pos in this function.
|
---|
104 | ///
|
---|
105 | /// Returns true if an external seek is needed and the caller must return
|
---|
106 | /// LZMA_SEEK_NEEDED.
|
---|
107 | static bool
|
---|
108 | seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
|
---|
109 | size_t in_start, size_t *in_pos, size_t in_size)
|
---|
110 | {
|
---|
111 | // The input buffer doesn't extend beyond the end of the file.
|
---|
112 | // This has been checked by file_info_decode() already.
|
---|
113 | assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
|
---|
114 |
|
---|
115 | const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
|
---|
116 | const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
|
---|
117 |
|
---|
118 | bool external_seek_needed;
|
---|
119 |
|
---|
120 | if (target_pos >= pos_min && target_pos <= pos_max) {
|
---|
121 | // The requested position is available in the current input
|
---|
122 | // buffer or right after it. That is, in a corner case we
|
---|
123 | // end up setting *in_pos == in_size and thus will immediately
|
---|
124 | // need new input bytes from the application.
|
---|
125 | *in_pos += (size_t)(target_pos - coder->file_cur_pos);
|
---|
126 | external_seek_needed = false;
|
---|
127 | } else {
|
---|
128 | // Ask the application to seek the input file.
|
---|
129 | *coder->external_seek_pos = target_pos;
|
---|
130 | external_seek_needed = true;
|
---|
131 |
|
---|
132 | // Mark the whole input buffer as used. This way
|
---|
133 | // lzma_stream.total_in will have a better estimate
|
---|
134 | // of the amount of data read. It still won't be perfect
|
---|
135 | // as the value will depend on the input buffer size that
|
---|
136 | // the application uses, but it should be good enough for
|
---|
137 | // those few who want an estimate.
|
---|
138 | *in_pos = in_size;
|
---|
139 | }
|
---|
140 |
|
---|
141 | // After seeking (internal or external) the current position
|
---|
142 | // will match the requested target position.
|
---|
143 | coder->file_cur_pos = target_pos;
|
---|
144 |
|
---|
145 | return external_seek_needed;
|
---|
146 | }
|
---|
147 |
|
---|
148 |
|
---|
149 | /// The caller sets coder->file_target_pos so that it points to the *end*
|
---|
150 | /// of the desired file position. This function then determines how far
|
---|
151 | /// backwards from that position we can seek. After seeking fill_temp()
|
---|
152 | /// can be used to read data into coder->temp. When fill_temp() has finished,
|
---|
153 | /// coder->temp[coder->temp_size] will match coder->file_target_pos.
|
---|
154 | ///
|
---|
155 | /// This also validates that coder->target_file_pos is sane in sense that
|
---|
156 | /// we aren't trying to seek too far backwards (too close or beyond the
|
---|
157 | /// beginning of the file).
|
---|
158 | static lzma_ret
|
---|
159 | reverse_seek(lzma_file_info_coder *coder,
|
---|
160 | size_t in_start, size_t *in_pos, size_t in_size)
|
---|
161 | {
|
---|
162 | // Check that there is enough data before the target position
|
---|
163 | // to contain at least Stream Header and Stream Footer. If there
|
---|
164 | // isn't, the file cannot be valid.
|
---|
165 | if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
|
---|
166 | return LZMA_DATA_ERROR;
|
---|
167 |
|
---|
168 | coder->temp_pos = 0;
|
---|
169 |
|
---|
170 | // The Stream Header at the very beginning of the file gets handled
|
---|
171 | // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
|
---|
172 | // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
|
---|
173 | // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
|
---|
174 | // application uses an extremely small input buffer and the input
|
---|
175 | // file is very small.
|
---|
176 | if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
|
---|
177 | < sizeof(coder->temp))
|
---|
178 | coder->temp_size = (size_t)(coder->file_target_pos
|
---|
179 | - LZMA_STREAM_HEADER_SIZE);
|
---|
180 | else
|
---|
181 | coder->temp_size = sizeof(coder->temp);
|
---|
182 |
|
---|
183 | // The above if-statements guarantee this. This is important because
|
---|
184 | // the Stream Header/Footer decoders assume that there's at least
|
---|
185 | // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
|
---|
186 | assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
|
---|
187 |
|
---|
188 | if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
|
---|
189 | in_start, in_pos, in_size))
|
---|
190 | return LZMA_SEEK_NEEDED;
|
---|
191 |
|
---|
192 | return LZMA_OK;
|
---|
193 | }
|
---|
194 |
|
---|
195 |
|
---|
196 | /// Gets the number of zero-bytes at the end of the buffer.
|
---|
197 | static size_t
|
---|
198 | get_padding_size(const uint8_t *buf, size_t buf_size)
|
---|
199 | {
|
---|
200 | size_t padding = 0;
|
---|
201 | while (buf_size > 0 && buf[--buf_size] == 0x00)
|
---|
202 | ++padding;
|
---|
203 |
|
---|
204 | return padding;
|
---|
205 | }
|
---|
206 |
|
---|
207 |
|
---|
208 | /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
|
---|
209 | /// is used to tell the application that Magic Bytes didn't match. In other
|
---|
210 | /// Stream Header/Footer fields (in the middle/end of the file) it could be
|
---|
211 | /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
|
---|
212 | /// is a valid Stream Header at the beginning of the file. For those cases
|
---|
213 | /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
|
---|
214 | static lzma_ret
|
---|
215 | hide_format_error(lzma_ret ret)
|
---|
216 | {
|
---|
217 | if (ret == LZMA_FORMAT_ERROR)
|
---|
218 | ret = LZMA_DATA_ERROR;
|
---|
219 |
|
---|
220 | return ret;
|
---|
221 | }
|
---|
222 |
|
---|
223 |
|
---|
224 | /// Calls the Index decoder and updates coder->index_remaining.
|
---|
225 | /// This is a separate function because the input can be either directly
|
---|
226 | /// from the application or from coder->temp.
|
---|
227 | static lzma_ret
|
---|
228 | decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
|
---|
229 | const uint8_t *restrict in, size_t *restrict in_pos,
|
---|
230 | size_t in_size, bool update_file_cur_pos)
|
---|
231 | {
|
---|
232 | const size_t in_start = *in_pos;
|
---|
233 |
|
---|
234 | const lzma_ret ret = coder->index_decoder.code(
|
---|
235 | coder->index_decoder.coder,
|
---|
236 | allocator, in, in_pos, in_size,
|
---|
237 | NULL, NULL, 0, LZMA_RUN);
|
---|
238 |
|
---|
239 | coder->index_remaining -= *in_pos - in_start;
|
---|
240 |
|
---|
241 | if (update_file_cur_pos)
|
---|
242 | coder->file_cur_pos += *in_pos - in_start;
|
---|
243 |
|
---|
244 | return ret;
|
---|
245 | }
|
---|
246 |
|
---|
247 |
|
---|
248 | static lzma_ret
|
---|
249 | file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
---|
250 | const uint8_t *restrict in, size_t *restrict in_pos,
|
---|
251 | size_t in_size,
|
---|
252 | uint8_t *restrict out lzma_attribute((__unused__)),
|
---|
253 | size_t *restrict out_pos lzma_attribute((__unused__)),
|
---|
254 | size_t out_size lzma_attribute((__unused__)),
|
---|
255 | lzma_action action lzma_attribute((__unused__)))
|
---|
256 | {
|
---|
257 | lzma_file_info_coder *coder = coder_ptr;
|
---|
258 | const size_t in_start = *in_pos;
|
---|
259 |
|
---|
260 | // If the caller provides input past the end of the file, trim
|
---|
261 | // the extra bytes from the buffer so that we won't read too far.
|
---|
262 | assert(coder->file_size >= coder->file_cur_pos);
|
---|
263 | if (coder->file_size - coder->file_cur_pos < in_size - in_start)
|
---|
264 | in_size = in_start
|
---|
265 | + (size_t)(coder->file_size - coder->file_cur_pos);
|
---|
266 |
|
---|
267 | while (true)
|
---|
268 | switch (coder->sequence) {
|
---|
269 | case SEQ_MAGIC_BYTES:
|
---|
270 | // Decode the Stream Header at the beginning of the file
|
---|
271 | // first to check if the Magic Bytes match. The flags
|
---|
272 | // are stored in coder->first_header_flags so that we
|
---|
273 | // don't need to seek to it again.
|
---|
274 | //
|
---|
275 | // Check that the file is big enough to contain at least
|
---|
276 | // Stream Header.
|
---|
277 | if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
|
---|
278 | return LZMA_FORMAT_ERROR;
|
---|
279 |
|
---|
280 | // Read the Stream Header field into coder->temp.
|
---|
281 | if (fill_temp(coder, in, in_pos, in_size))
|
---|
282 | return LZMA_OK;
|
---|
283 |
|
---|
284 | // This is the only Stream Header/Footer decoding where we
|
---|
285 | // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
|
---|
286 | // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
|
---|
287 | return_if_error(lzma_stream_header_decode(
|
---|
288 | &coder->first_header_flags, coder->temp));
|
---|
289 |
|
---|
290 | // Now that we know that the Magic Bytes match, check the
|
---|
291 | // file size. It's better to do this here after checking the
|
---|
292 | // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
|
---|
293 | // instead of LZMA_DATA_ERROR when the Magic Bytes don't
|
---|
294 | // match in a file that is too big or isn't a multiple of
|
---|
295 | // four bytes.
|
---|
296 | if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
|
---|
297 | return LZMA_DATA_ERROR;
|
---|
298 |
|
---|
299 | // Start looking for Stream Padding and Stream Footer
|
---|
300 | // at the end of the file.
|
---|
301 | coder->file_target_pos = coder->file_size;
|
---|
302 |
|
---|
303 | // Fall through
|
---|
304 |
|
---|
305 | case SEQ_PADDING_SEEK:
|
---|
306 | coder->sequence = SEQ_PADDING_DECODE;
|
---|
307 | return_if_error(reverse_seek(
|
---|
308 | coder, in_start, in_pos, in_size));
|
---|
309 |
|
---|
310 | // Fall through
|
---|
311 |
|
---|
312 | case SEQ_PADDING_DECODE: {
|
---|
313 | // Copy to coder->temp first. This keeps the code simpler if
|
---|
314 | // the application only provides input a few bytes at a time.
|
---|
315 | if (fill_temp(coder, in, in_pos, in_size))
|
---|
316 | return LZMA_OK;
|
---|
317 |
|
---|
318 | // Scan the buffer backwards to get the size of the
|
---|
319 | // Stream Padding field (if any).
|
---|
320 | const size_t new_padding = get_padding_size(
|
---|
321 | coder->temp, coder->temp_size);
|
---|
322 | coder->stream_padding += new_padding;
|
---|
323 |
|
---|
324 | // Set the target position to the beginning of Stream Padding
|
---|
325 | // that has been observed so far. If all Stream Padding has
|
---|
326 | // been seen, then the target position will be at the end
|
---|
327 | // of the Stream Footer field.
|
---|
328 | coder->file_target_pos -= new_padding;
|
---|
329 |
|
---|
330 | if (new_padding == coder->temp_size) {
|
---|
331 | // The whole buffer was padding. Seek backwards in
|
---|
332 | // the file to get more input.
|
---|
333 | coder->sequence = SEQ_PADDING_SEEK;
|
---|
334 | break;
|
---|
335 | }
|
---|
336 |
|
---|
337 | // Size of Stream Padding must be a multiple of 4 bytes.
|
---|
338 | if (coder->stream_padding & 3)
|
---|
339 | return LZMA_DATA_ERROR;
|
---|
340 |
|
---|
341 | coder->sequence = SEQ_FOOTER;
|
---|
342 |
|
---|
343 | // Calculate the amount of non-padding data in coder->temp.
|
---|
344 | coder->temp_size -= new_padding;
|
---|
345 | coder->temp_pos = coder->temp_size;
|
---|
346 |
|
---|
347 | // We can avoid an external seek if the whole Stream Footer
|
---|
348 | // is already in coder->temp. In that case SEQ_FOOTER won't
|
---|
349 | // read more input and will find the Stream Footer from
|
---|
350 | // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
|
---|
351 | //
|
---|
352 | // Otherwise we will need to seek. The seeking is done so
|
---|
353 | // that Stream Footer wil be at the end of coder->temp.
|
---|
354 | // This way it's likely that we also get a complete Index
|
---|
355 | // field into coder->temp without needing a separate seek
|
---|
356 | // for that (unless the Index field is big).
|
---|
357 | if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
|
---|
358 | return_if_error(reverse_seek(
|
---|
359 | coder, in_start, in_pos, in_size));
|
---|
360 | }
|
---|
361 |
|
---|
362 | // Fall through
|
---|
363 |
|
---|
364 | case SEQ_FOOTER:
|
---|
365 | // Copy the Stream Footer field into coder->temp.
|
---|
366 | // If Stream Footer was already available in coder->temp
|
---|
367 | // in SEQ_PADDING_DECODE, then this does nothing.
|
---|
368 | if (fill_temp(coder, in, in_pos, in_size))
|
---|
369 | return LZMA_OK;
|
---|
370 |
|
---|
371 | // Make coder->file_target_pos and coder->temp_size point
|
---|
372 | // to the beginning of Stream Footer and thus to the end
|
---|
373 | // of the Index field. coder->temp_pos will be updated
|
---|
374 | // a bit later.
|
---|
375 | coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
|
---|
376 | coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
|
---|
377 |
|
---|
378 | // Decode Stream Footer.
|
---|
379 | return_if_error(hide_format_error(lzma_stream_footer_decode(
|
---|
380 | &coder->footer_flags,
|
---|
381 | coder->temp + coder->temp_size)));
|
---|
382 |
|
---|
383 | // Check that we won't seek past the beginning of the file.
|
---|
384 | //
|
---|
385 | // LZMA_STREAM_HEADER_SIZE is added because there must be
|
---|
386 | // space for Stream Header too even though we won't seek
|
---|
387 | // there before decoding the Index field.
|
---|
388 | //
|
---|
389 | // There's no risk of integer overflow here because
|
---|
390 | // Backward Size cannot be greater than 2^34.
|
---|
391 | if (coder->file_target_pos < coder->footer_flags.backward_size
|
---|
392 | + LZMA_STREAM_HEADER_SIZE)
|
---|
393 | return LZMA_DATA_ERROR;
|
---|
394 |
|
---|
395 | // Set the target position to the beginning of the Index field.
|
---|
396 | coder->file_target_pos -= coder->footer_flags.backward_size;
|
---|
397 | coder->sequence = SEQ_INDEX_INIT;
|
---|
398 |
|
---|
399 | // We can avoid an external seek if the whole Index field is
|
---|
400 | // already available in coder->temp.
|
---|
401 | if (coder->temp_size >= coder->footer_flags.backward_size) {
|
---|
402 | // Set coder->temp_pos to point to the beginning
|
---|
403 | // of the Index.
|
---|
404 | coder->temp_pos = coder->temp_size
|
---|
405 | - coder->footer_flags.backward_size;
|
---|
406 | } else {
|
---|
407 | // These are set to zero to indicate that there's no
|
---|
408 | // useful data (Index or anything else) in coder->temp.
|
---|
409 | coder->temp_pos = 0;
|
---|
410 | coder->temp_size = 0;
|
---|
411 |
|
---|
412 | // Seek to the beginning of the Index field.
|
---|
413 | if (seek_to_pos(coder, coder->file_target_pos,
|
---|
414 | in_start, in_pos, in_size))
|
---|
415 | return LZMA_SEEK_NEEDED;
|
---|
416 | }
|
---|
417 |
|
---|
418 | // Fall through
|
---|
419 |
|
---|
420 | case SEQ_INDEX_INIT: {
|
---|
421 | // Calculate the amount of memory already used by the earlier
|
---|
422 | // Indexes so that we know how big memory limit to pass to
|
---|
423 | // the Index decoder.
|
---|
424 | //
|
---|
425 | // NOTE: When there are multiple Streams, the separate
|
---|
426 | // lzma_index structures can use more RAM (as measured by
|
---|
427 | // lzma_index_memused()) than the final combined lzma_index.
|
---|
428 | // Thus memlimit may need to be slightly higher than the final
|
---|
429 | // calculated memory usage will be. This is perhaps a bit
|
---|
430 | // confusing to the application, but I think it shouldn't
|
---|
431 | // cause problems in practice.
|
---|
432 | uint64_t memused = 0;
|
---|
433 | if (coder->combined_index != NULL) {
|
---|
434 | memused = lzma_index_memused(coder->combined_index);
|
---|
435 | assert(memused <= coder->memlimit);
|
---|
436 | if (memused > coder->memlimit) // Extra sanity check
|
---|
437 | return LZMA_PROG_ERROR;
|
---|
438 | }
|
---|
439 |
|
---|
440 | // Initialize the Index decoder.
|
---|
441 | return_if_error(lzma_index_decoder_init(
|
---|
442 | &coder->index_decoder, allocator,
|
---|
443 | &coder->this_index,
|
---|
444 | coder->memlimit - memused));
|
---|
445 |
|
---|
446 | coder->index_remaining = coder->footer_flags.backward_size;
|
---|
447 | coder->sequence = SEQ_INDEX_DECODE;
|
---|
448 | }
|
---|
449 |
|
---|
450 | // Fall through
|
---|
451 |
|
---|
452 | case SEQ_INDEX_DECODE: {
|
---|
453 | // Decode (a part of) the Index. If the whole Index is already
|
---|
454 | // in coder->temp, read it from there. Otherwise read from
|
---|
455 | // in[*in_pos] onwards. Note that index_decode() updates
|
---|
456 | // coder->index_remaining and optionally coder->file_cur_pos.
|
---|
457 | lzma_ret ret;
|
---|
458 | if (coder->temp_size != 0) {
|
---|
459 | assert(coder->temp_size - coder->temp_pos
|
---|
460 | == coder->index_remaining);
|
---|
461 | ret = decode_index(coder, allocator, coder->temp,
|
---|
462 | &coder->temp_pos, coder->temp_size,
|
---|
463 | false);
|
---|
464 | } else {
|
---|
465 | // Don't give the decoder more input than the known
|
---|
466 | // remaining size of the Index field.
|
---|
467 | size_t in_stop = in_size;
|
---|
468 | if (in_size - *in_pos > coder->index_remaining)
|
---|
469 | in_stop = *in_pos
|
---|
470 | + (size_t)(coder->index_remaining);
|
---|
471 |
|
---|
472 | ret = decode_index(coder, allocator,
|
---|
473 | in, in_pos, in_stop, true);
|
---|
474 | }
|
---|
475 |
|
---|
476 | switch (ret) {
|
---|
477 | case LZMA_OK:
|
---|
478 | // If the Index docoder asks for more input when we
|
---|
479 | // have already given it as much input as Backward Size
|
---|
480 | // indicated, the file is invalid.
|
---|
481 | if (coder->index_remaining == 0)
|
---|
482 | return LZMA_DATA_ERROR;
|
---|
483 |
|
---|
484 | // We cannot get here if we were reading Index from
|
---|
485 | // coder->temp because when reading from coder->temp
|
---|
486 | // we give the Index decoder exactly
|
---|
487 | // coder->index_remaining bytes of input.
|
---|
488 | assert(coder->temp_size == 0);
|
---|
489 |
|
---|
490 | return LZMA_OK;
|
---|
491 |
|
---|
492 | case LZMA_STREAM_END:
|
---|
493 | // If the decoding seems to be successful, check also
|
---|
494 | // that the Index decoder consumed as much input as
|
---|
495 | // indicated by the Backward Size field.
|
---|
496 | if (coder->index_remaining != 0)
|
---|
497 | return LZMA_DATA_ERROR;
|
---|
498 |
|
---|
499 | break;
|
---|
500 |
|
---|
501 | default:
|
---|
502 | return ret;
|
---|
503 | }
|
---|
504 |
|
---|
505 | // Calculate how much the Index tells us to seek backwards
|
---|
506 | // (relative to the beginning of the Index): Total size of
|
---|
507 | // all Blocks plus the size of the Stream Header field.
|
---|
508 | // No integer overflow here because lzma_index_total_size()
|
---|
509 | // cannot return a value greater than LZMA_VLI_MAX.
|
---|
510 | const uint64_t seek_amount
|
---|
511 | = lzma_index_total_size(coder->this_index)
|
---|
512 | + LZMA_STREAM_HEADER_SIZE;
|
---|
513 |
|
---|
514 | // Check that Index is sane in sense that seek_amount won't
|
---|
515 | // make us seek past the beginning of the file when locating
|
---|
516 | // the Stream Header.
|
---|
517 | //
|
---|
518 | // coder->file_target_pos still points to the beginning of
|
---|
519 | // the Index field.
|
---|
520 | if (coder->file_target_pos < seek_amount)
|
---|
521 | return LZMA_DATA_ERROR;
|
---|
522 |
|
---|
523 | // Set the target to the beginning of Stream Header.
|
---|
524 | coder->file_target_pos -= seek_amount;
|
---|
525 |
|
---|
526 | if (coder->file_target_pos == 0) {
|
---|
527 | // We would seek to the beginning of the file, but
|
---|
528 | // since we already decoded that Stream Header in
|
---|
529 | // SEQ_MAGIC_BYTES, we can use the cached value from
|
---|
530 | // coder->first_header_flags to avoid the seek.
|
---|
531 | coder->header_flags = coder->first_header_flags;
|
---|
532 | coder->sequence = SEQ_HEADER_COMPARE;
|
---|
533 | break;
|
---|
534 | }
|
---|
535 |
|
---|
536 | coder->sequence = SEQ_HEADER_DECODE;
|
---|
537 |
|
---|
538 | // Make coder->file_target_pos point to the end of
|
---|
539 | // the Stream Header field.
|
---|
540 | coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
|
---|
541 |
|
---|
542 | // If coder->temp_size is non-zero, it points to the end
|
---|
543 | // of the Index field. Then the beginning of the Index
|
---|
544 | // field is at coder->temp[coder->temp_size
|
---|
545 | // - coder->footer_flags.backward_size].
|
---|
546 | assert(coder->temp_size == 0 || coder->temp_size
|
---|
547 | >= coder->footer_flags.backward_size);
|
---|
548 |
|
---|
549 | // If coder->temp contained the whole Index, see if it has
|
---|
550 | // enough data to contain also the Stream Header. If so,
|
---|
551 | // we avoid an external seek.
|
---|
552 | //
|
---|
553 | // NOTE: This can happen only with small .xz files and only
|
---|
554 | // for the non-first Stream as the Stream Flags of the first
|
---|
555 | // Stream are cached and already handled a few lines above.
|
---|
556 | // So this isn't as useful as the other seek-avoidance cases.
|
---|
557 | if (coder->temp_size != 0 && coder->temp_size
|
---|
558 | - coder->footer_flags.backward_size
|
---|
559 | >= seek_amount) {
|
---|
560 | // Make temp_pos and temp_size point to the *end* of
|
---|
561 | // Stream Header so that SEQ_HEADER_DECODE will find
|
---|
562 | // the start of Stream Header from coder->temp[
|
---|
563 | // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
|
---|
564 | coder->temp_pos = coder->temp_size
|
---|
565 | - coder->footer_flags.backward_size
|
---|
566 | - seek_amount
|
---|
567 | + LZMA_STREAM_HEADER_SIZE;
|
---|
568 | coder->temp_size = coder->temp_pos;
|
---|
569 | } else {
|
---|
570 | // Seek so that Stream Header will be at the end of
|
---|
571 | // coder->temp. With typical multi-Stream files we
|
---|
572 | // will usually also get the Stream Footer and Index
|
---|
573 | // of the *previous* Stream in coder->temp and thus
|
---|
574 | // won't need a separate seek for them.
|
---|
575 | return_if_error(reverse_seek(coder,
|
---|
576 | in_start, in_pos, in_size));
|
---|
577 | }
|
---|
578 | }
|
---|
579 |
|
---|
580 | // Fall through
|
---|
581 |
|
---|
582 | case SEQ_HEADER_DECODE:
|
---|
583 | // Copy the Stream Header field into coder->temp.
|
---|
584 | // If Stream Header was already available in coder->temp
|
---|
585 | // in SEQ_INDEX_DECODE, then this does nothing.
|
---|
586 | if (fill_temp(coder, in, in_pos, in_size))
|
---|
587 | return LZMA_OK;
|
---|
588 |
|
---|
589 | // Make all these point to the beginning of Stream Header.
|
---|
590 | coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
|
---|
591 | coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
|
---|
592 | coder->temp_pos = coder->temp_size;
|
---|
593 |
|
---|
594 | // Decode the Stream Header.
|
---|
595 | return_if_error(hide_format_error(lzma_stream_header_decode(
|
---|
596 | &coder->header_flags,
|
---|
597 | coder->temp + coder->temp_size)));
|
---|
598 |
|
---|
599 | coder->sequence = SEQ_HEADER_COMPARE;
|
---|
600 |
|
---|
601 | // Fall through
|
---|
602 |
|
---|
603 | case SEQ_HEADER_COMPARE:
|
---|
604 | // Compare Stream Header against Stream Footer. They must
|
---|
605 | // match.
|
---|
606 | return_if_error(lzma_stream_flags_compare(
|
---|
607 | &coder->header_flags, &coder->footer_flags));
|
---|
608 |
|
---|
609 | // Store the decoded Stream Flags into the Index. Use the
|
---|
610 | // Footer Flags because it contains Backward Size, although
|
---|
611 | // it shouldn't matter in practice.
|
---|
612 | if (lzma_index_stream_flags(coder->this_index,
|
---|
613 | &coder->footer_flags) != LZMA_OK)
|
---|
614 | return LZMA_PROG_ERROR;
|
---|
615 |
|
---|
616 | // Store also the size of the Stream Padding field. It is
|
---|
617 | // needed to calculate the offsets of the Streams correctly.
|
---|
618 | if (lzma_index_stream_padding(coder->this_index,
|
---|
619 | coder->stream_padding) != LZMA_OK)
|
---|
620 | return LZMA_PROG_ERROR;
|
---|
621 |
|
---|
622 | // Reset it so that it's ready for the next Stream.
|
---|
623 | coder->stream_padding = 0;
|
---|
624 |
|
---|
625 | // Append the earlier decoded Indexes after this_index.
|
---|
626 | if (coder->combined_index != NULL)
|
---|
627 | return_if_error(lzma_index_cat(coder->this_index,
|
---|
628 | coder->combined_index, allocator));
|
---|
629 |
|
---|
630 | coder->combined_index = coder->this_index;
|
---|
631 | coder->this_index = NULL;
|
---|
632 |
|
---|
633 | // If the whole file was decoded, tell the caller that we
|
---|
634 | // are finished.
|
---|
635 | if (coder->file_target_pos == 0) {
|
---|
636 | // The combined index must indicate the same file
|
---|
637 | // size as was told to us at initialization.
|
---|
638 | assert(lzma_index_file_size(coder->combined_index)
|
---|
639 | == coder->file_size);
|
---|
640 |
|
---|
641 | // Make the combined index available to
|
---|
642 | // the application.
|
---|
643 | *coder->dest_index = coder->combined_index;
|
---|
644 | coder->combined_index = NULL;
|
---|
645 |
|
---|
646 | // Mark the input buffer as used since we may have
|
---|
647 | // done internal seeking and thus don't know how
|
---|
648 | // many input bytes were actually used. This way
|
---|
649 | // lzma_stream.total_in gets a slightly better
|
---|
650 | // estimate of the amount of input used.
|
---|
651 | *in_pos = in_size;
|
---|
652 | return LZMA_STREAM_END;
|
---|
653 | }
|
---|
654 |
|
---|
655 | // We didn't hit the beginning of the file yet, so continue
|
---|
656 | // reading backwards in the file. If we have unprocessed
|
---|
657 | // data in coder->temp, use it before requesting more data
|
---|
658 | // from the application.
|
---|
659 | //
|
---|
660 | // coder->file_target_pos, coder->temp_size, and
|
---|
661 | // coder->temp_pos all point to the beginning of Stream Header
|
---|
662 | // and thus the end of the previous Stream in the file.
|
---|
663 | coder->sequence = coder->temp_size > 0
|
---|
664 | ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
|
---|
665 | break;
|
---|
666 |
|
---|
667 | default:
|
---|
668 | assert(0);
|
---|
669 | return LZMA_PROG_ERROR;
|
---|
670 | }
|
---|
671 | }
|
---|
672 |
|
---|
673 |
|
---|
674 | static lzma_ret
|
---|
675 | file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
|
---|
676 | uint64_t *old_memlimit, uint64_t new_memlimit)
|
---|
677 | {
|
---|
678 | lzma_file_info_coder *coder = coder_ptr;
|
---|
679 |
|
---|
680 | // The memory usage calculation comes from three things:
|
---|
681 | //
|
---|
682 | // (1) The Indexes that have already been decoded and processed into
|
---|
683 | // coder->combined_index.
|
---|
684 | //
|
---|
685 | // (2) The latest Index in coder->this_index that has been decoded but
|
---|
686 | // not yet put into coder->combined_index.
|
---|
687 | //
|
---|
688 | // (3) The latest Index that we have started decoding but haven't
|
---|
689 | // finished and thus isn't available in coder->this_index yet.
|
---|
690 | // Memory usage and limit information needs to be communicated
|
---|
691 | // from/to coder->index_decoder.
|
---|
692 | //
|
---|
693 | // Care has to be taken to not do both (2) and (3) when calculating
|
---|
694 | // the memory usage.
|
---|
695 | uint64_t combined_index_memusage = 0;
|
---|
696 | uint64_t this_index_memusage = 0;
|
---|
697 |
|
---|
698 | // (1) If we have already successfully decoded one or more Indexes,
|
---|
699 | // get their memory usage.
|
---|
700 | if (coder->combined_index != NULL)
|
---|
701 | combined_index_memusage = lzma_index_memused(
|
---|
702 | coder->combined_index);
|
---|
703 |
|
---|
704 | // Choose between (2), (3), or neither.
|
---|
705 | if (coder->this_index != NULL) {
|
---|
706 | // (2) The latest Index is available. Use its memory usage.
|
---|
707 | this_index_memusage = lzma_index_memused(coder->this_index);
|
---|
708 |
|
---|
709 | } else if (coder->sequence == SEQ_INDEX_DECODE) {
|
---|
710 | // (3) The Index decoder is activate and hasn't yet stored
|
---|
711 | // the new index in coder->this_index. Get the memory usage
|
---|
712 | // information from the Index decoder.
|
---|
713 | //
|
---|
714 | // NOTE: If the Index decoder doesn't yet know how much memory
|
---|
715 | // it will eventually need, it will return a tiny value here.
|
---|
716 | uint64_t dummy;
|
---|
717 | if (coder->index_decoder.memconfig(coder->index_decoder.coder,
|
---|
718 | &this_index_memusage, &dummy, 0)
|
---|
719 | != LZMA_OK) {
|
---|
720 | assert(0);
|
---|
721 | return LZMA_PROG_ERROR;
|
---|
722 | }
|
---|
723 | }
|
---|
724 |
|
---|
725 | // Now we know the total memory usage/requirement. If we had neither
|
---|
726 | // old Indexes nor a new Index, this will be zero which isn't
|
---|
727 | // acceptable as lzma_memusage() has to return non-zero on success
|
---|
728 | // and even with an empty .xz file we will end up with a lzma_index
|
---|
729 | // that takes some memory.
|
---|
730 | *memusage = combined_index_memusage + this_index_memusage;
|
---|
731 | if (*memusage == 0)
|
---|
732 | *memusage = lzma_index_memusage(1, 0);
|
---|
733 |
|
---|
734 | *old_memlimit = coder->memlimit;
|
---|
735 |
|
---|
736 | // If requested, set a new memory usage limit.
|
---|
737 | if (new_memlimit != 0) {
|
---|
738 | if (new_memlimit < *memusage)
|
---|
739 | return LZMA_MEMLIMIT_ERROR;
|
---|
740 |
|
---|
741 | // In the condition (3) we need to tell the Index decoder
|
---|
742 | // its new memory usage limit.
|
---|
743 | if (coder->this_index == NULL
|
---|
744 | && coder->sequence == SEQ_INDEX_DECODE) {
|
---|
745 | const uint64_t idec_new_memlimit = new_memlimit
|
---|
746 | - combined_index_memusage;
|
---|
747 |
|
---|
748 | assert(this_index_memusage > 0);
|
---|
749 | assert(idec_new_memlimit > 0);
|
---|
750 |
|
---|
751 | uint64_t dummy1;
|
---|
752 | uint64_t dummy2;
|
---|
753 |
|
---|
754 | if (coder->index_decoder.memconfig(
|
---|
755 | coder->index_decoder.coder,
|
---|
756 | &dummy1, &dummy2, idec_new_memlimit)
|
---|
757 | != LZMA_OK) {
|
---|
758 | assert(0);
|
---|
759 | return LZMA_PROG_ERROR;
|
---|
760 | }
|
---|
761 | }
|
---|
762 |
|
---|
763 | coder->memlimit = new_memlimit;
|
---|
764 | }
|
---|
765 |
|
---|
766 | return LZMA_OK;
|
---|
767 | }
|
---|
768 |
|
---|
769 |
|
---|
770 | static void
|
---|
771 | file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
|
---|
772 | {
|
---|
773 | lzma_file_info_coder *coder = coder_ptr;
|
---|
774 |
|
---|
775 | lzma_next_end(&coder->index_decoder, allocator);
|
---|
776 | lzma_index_end(coder->this_index, allocator);
|
---|
777 | lzma_index_end(coder->combined_index, allocator);
|
---|
778 |
|
---|
779 | lzma_free(coder, allocator);
|
---|
780 | return;
|
---|
781 | }
|
---|
782 |
|
---|
783 |
|
---|
784 | static lzma_ret
|
---|
785 | lzma_file_info_decoder_init(lzma_next_coder *next,
|
---|
786 | const lzma_allocator *allocator, uint64_t *seek_pos,
|
---|
787 | lzma_index **dest_index,
|
---|
788 | uint64_t memlimit, uint64_t file_size)
|
---|
789 | {
|
---|
790 | lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
|
---|
791 |
|
---|
792 | if (dest_index == NULL)
|
---|
793 | return LZMA_PROG_ERROR;
|
---|
794 |
|
---|
795 | lzma_file_info_coder *coder = next->coder;
|
---|
796 | if (coder == NULL) {
|
---|
797 | coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
|
---|
798 | if (coder == NULL)
|
---|
799 | return LZMA_MEM_ERROR;
|
---|
800 |
|
---|
801 | next->coder = coder;
|
---|
802 | next->code = &file_info_decode;
|
---|
803 | next->end = &file_info_decoder_end;
|
---|
804 | next->memconfig = &file_info_decoder_memconfig;
|
---|
805 |
|
---|
806 | coder->index_decoder = LZMA_NEXT_CODER_INIT;
|
---|
807 | coder->this_index = NULL;
|
---|
808 | coder->combined_index = NULL;
|
---|
809 | }
|
---|
810 |
|
---|
811 | coder->sequence = SEQ_MAGIC_BYTES;
|
---|
812 | coder->file_cur_pos = 0;
|
---|
813 | coder->file_target_pos = 0;
|
---|
814 | coder->file_size = file_size;
|
---|
815 |
|
---|
816 | lzma_index_end(coder->this_index, allocator);
|
---|
817 | coder->this_index = NULL;
|
---|
818 |
|
---|
819 | lzma_index_end(coder->combined_index, allocator);
|
---|
820 | coder->combined_index = NULL;
|
---|
821 |
|
---|
822 | coder->stream_padding = 0;
|
---|
823 |
|
---|
824 | coder->dest_index = dest_index;
|
---|
825 | coder->external_seek_pos = seek_pos;
|
---|
826 |
|
---|
827 | // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
|
---|
828 | // won't return 0 (which would indicate an error).
|
---|
829 | coder->memlimit = my_max(1, memlimit);
|
---|
830 |
|
---|
831 | // Prepare these for reading the first Stream Header into coder->temp.
|
---|
832 | coder->temp_pos = 0;
|
---|
833 | coder->temp_size = LZMA_STREAM_HEADER_SIZE;
|
---|
834 |
|
---|
835 | return LZMA_OK;
|
---|
836 | }
|
---|
837 |
|
---|
838 |
|
---|
839 | extern LZMA_API(lzma_ret)
|
---|
840 | lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
|
---|
841 | uint64_t memlimit, uint64_t file_size)
|
---|
842 | {
|
---|
843 | lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
|
---|
844 | dest_index, memlimit, file_size);
|
---|
845 |
|
---|
846 | // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
|
---|
847 | // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
|
---|
848 | // combination in a sane way. Applications still need to be careful
|
---|
849 | // if they use LZMA_FINISH so that they remember to reset it back
|
---|
850 | // to LZMA_RUN after seeking if needed.
|
---|
851 | strm->internal->supported_actions[LZMA_RUN] = true;
|
---|
852 | strm->internal->supported_actions[LZMA_FINISH] = true;
|
---|
853 |
|
---|
854 | return LZMA_OK;
|
---|
855 | }
|
---|