Line data Source code
1 : /*
2 : Zipios -- a small C++ library that provides easy access to .zip files.
3 :
4 : Copyright (C) 2000-2007 Thomas Sondergaard
5 : Copyright (C) 2015-2019 Made to Order Software Corporation
6 :
7 : This library is free software; you can redistribute it and/or
8 : modify it under the terms of the GNU Lesser General Public
9 : License as published by the Free Software Foundation; either
10 : version 2.1 of the License, or (at your option) any later version.
11 :
12 : This library is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : Lesser General Public License for more details.
16 :
17 : You should have received a copy of the GNU Lesser General Public
18 : License along with this library; if not, write to the Free Software
19 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 : */
21 :
22 : /** \file
23 : * \brief The implementation of zipios::ZipFile.
24 : *
25 : * This file contrains the high level functions used to read or write
26 : * a Zip archive file.
27 : */
28 :
29 : #include "zipios/zipfile.hpp"
30 :
31 : #include "zipios/zipiosexceptions.hpp"
32 :
33 : #include "backbuffer.hpp"
34 : #include "zipendofcentraldirectory.hpp"
35 : #include "zipcentraldirectoryentry.hpp"
36 : #include "zipinputstream.hpp"
37 : #include "zipoutputstream.hpp"
38 :
39 : #include <fstream>
40 :
41 :
42 : /** \brief The zipios namespace includes the Zipios library definitions.
43 : *
44 : * This namespace is used to clearly separate all the Zipios definitions.
45 : * Note that a very few definitions are found outside of the namespace.
46 : * Some of those are hidden in the source of the library, a very few
47 : * appear in the zipios-config.hpp file as they are used to support
48 : * Zipios on any platform.
49 : *
50 : * Note that to ensure secure development, we do not make use of the
51 : * C++ "using ..." keyword. That way we can guarantee what's what.
52 : */
53 : namespace zipios
54 : {
55 :
56 :
57 : /** \mainpage Zipios
58 : *
59 : * \image html zipios.jpg
60 : *
61 : * \section intro Introduction
62 : *
63 : * Zipios is a java.util.zip-like C++ library for reading and
64 : * writing Zip files (ZipFile). Access to individual entries is
65 : * provided through a Zipios class (FileEntry) for the meta data
66 : * of the and a standard C++ iostreams for the contents of the file.
67 : *
68 : * A simple virtual file system that mounts regular directories and
69 : * zip files is also provided (FileCollection).
70 : *
71 : * The library is fully re-entrant. It is not otherwise thread safe.
72 : *
73 : * The source code is released under the <a
74 : * href="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public
75 : * License</a>.
76 : *
77 : * \section status Status
78 : *
79 : * This was the status of version 1.x. At this point, 2.x has a brand new
80 : * version out and we are waiting for good news about the current status.
81 : * That being said, version 2.x comes a test suite which produces a
82 : * 100% coverage of the library (except gzip which is not yet publicly
83 : * available.)
84 : *
85 : * \warning
86 : * There is a bug in the catch.hpp header file that generates a never
87 : * ending loop (see https://github.com/philsquared/Catch/issues/271 for
88 : * more information) when running the test suite under FreeBSD and an error
89 : * occurs (although you should not have an error, if it happens, then
90 : * the loop never ends.) I have noticed that problem with the following
91 : * scenario, and it does not seem to be fixed yet (Apr 4, 2015):
92 : *
93 : * \li "use Zipios to create zip archives with 1 or 3 files each"
94 : *
95 : * Spanned archives are not supported, and support is not planned.
96 : *
97 : * The library v1.x has been tested and appears to be working with:
98 : *
99 : * \li <a href="http://www.freebsd.org/ports/archivers.html#zipios++-0.1.5">FreeBSD stable and current / gcc 2.95.3</a>
100 : * \li Red Hat Linux release 7.0 / gcc 2.96
101 : * \li Red Hat Linux release 6.2 (Zoot) / egcs-2.91.66
102 : * \li Linux Mandrake release 7.0 (Air) / gcc 2.95.2
103 : * \li SGI IRIX64 6.5 / gcc 2.95.2
104 : * \li SGI IRIX64 6.5 / MIPSpro Compilers: Version 7.30
105 : *
106 : * The library v2.x has been compiled and appears to be working with:
107 : *
108 : * \li Ubuntu (starting with 14.04) -- full test suite working
109 : * \li Debian (starting with Stretch)
110 : * \li Fedora (starting with F25)
111 : * \li FreeBSD (starting with 10.01)
112 : * \li SunOS (starting with Open SunOS 11.2)
113 : * \li Cygwin (starting with 6.1)
114 : *
115 : * If you make Zipios work on other platforms, let us know by posting
116 : * an issue on GitHub:
117 : *
118 : * https://github.com/Zipios/Zipios/issues
119 : *
120 : *
121 : * \section documentation Documentation
122 : *
123 : * This web page is the front page to the library documentation which
124 : * is generated from the source files using <a
125 : * href="http://www.stack.nl/~dimitri/doxygen/index.html">Doxygen</a>.
126 : * Use the links at the top of the page to browse the API
127 : * documentation. Your Doxygen installation may also be capable
128 : * of generating other formats (Latex, PDF, etc.) if you would
129 : * prefer such (we only offer the HTML documentation.)
130 : *
131 : * \subsection zipfiles Zip file access
132 : *
133 : * The two most important classes are DirectoryCollection and ZipFile.
134 : *
135 : * A ZipFile is also a FileCollection, only the collection is loaded
136 : * from a Zip archive instead of a directory. A ZipFile is composed of
137 : * ZipCentralDirectoryEntry objects. As far as you are concerned though,
138 : * you can only use it as FileEntry objects.
139 : *
140 : * Note that the ZipFile constructor immediately scans the Central
141 : * Directory of the Zip archive so the entries are immediately accessible.
142 : *
143 : * The DirectoryCollection can be created one file at a time, so it is
144 : * possible to create a collection without having to include all the
145 : * files from a directory. However, the files still have to exist on
146 : * disk. The DirectoryCollection is composed of DirectoryEntry objects.
147 : *
148 : * To access the entries in a collection, use the entries() function
149 : * which returns a vector of FileEntry objects. If you know the exact
150 : * filename of an entry, you may also use the getEntry() with that name.
151 : * This is particularly useful if you want to use Zipios as a way to
152 : * handle the resources of your executable (see the openEmbeddedZipFile()
153 : * function and the appendzip.cpp tool). Finally, you want to use
154 : * the getInputStream() function to read the data of a file defined in
155 : * a collection.
156 : *
157 : * \code
158 : * // Resources global pointer
159 : * zipios::ZipFile::pointer_t g_resources;
160 : *
161 : * // Initialization of resources
162 : * g_resources = zipios::ZipFile::openEmbeddedZipFile("executable_filename");
163 : *
164 : * // Anywhere else in your application
165 : *
166 : * // 1. get the entry (to access meta data)
167 : * zipios::FileEntry::pointer_t entry(g_resources->getEntry("my/resource/file.xml"));
168 : *
169 : * // 2. get the istream (to access the actual file data)
170 : * zipios::FileCollection::stream_pointer_t in_stream(g_resources->getInputStream("my/resource/file.xml"));
171 : * \endcode
172 : *
173 : * zipios_example.cpp demonstrates the central elements of Zipios when used
174 : * in read mode.
175 : *
176 : * \subsection filecollection FileCollection
177 : *
178 : * A ZipFile is actually just a special kind of FileCollection that
179 : * obtains its entries from a Zip archive. Zipios also implements
180 : * a DirectoryCollection that obtains its entries from an on disk
181 : * directory and a CollectionCollection that obtains its entries from
182 : * other collections.
183 : *
184 : * Using a single CollectionCollection, any number of other FileCollection's
185 : * can be placed under its control and accessed through the same single
186 : * interface that is used to access a ZipFile or a DirectoryCollection.
187 : *
188 : * \warning
189 : * The CollectionCollection singleton in version 1.x was removed to make
190 : * the entire library 100% re-entrant without the need to link against
191 : * a thread library.
192 : *
193 : * \section download Download
194 : *
195 : * Go to Zipios project page on SourceForge for tar balls, source code
196 : * (CVS for v1.x and GIT for v2.x), and ChangeLog.
197 : * <a href="https://sourceforge.net/projects/zipios/" >
198 : * https://sourceforge.net/projects/zipios/</a>
199 : *
200 : * <a href="https://sourceforge.net/projects/zipios/files/latest/download"
201 : * rel="nofollow"><img alt="Download Zipios"
202 : * src="https://img.shields.io/sourceforge/dt/zipios.svg"></a>
203 : *
204 : * \section development Development
205 : *
206 : * The Zipios project makes use of a few development tools and the
207 : * tests require the zip utility, used to verify that an external tool
208 : * can generate a zip file that Zipios can read.
209 : *
210 : * Under a Debian or Ubuntu system, you can run apt-get install with
211 : * the following list of packages:
212 : *
213 : * \code
214 : * # For source management (or download the .tar.gz file)
215 : * apt-get install git
216 : *
217 : * # For development
218 : * apt-get install g++ cmake zlib1g-dev
219 : *
220 : * # For documentation (or download the -doc.tar.gz file)
221 : * apt-get install doxygen graphviz
222 : *
223 : * # For testing (optional, albeit recommended)
224 : * apt-get install catch zip
225 : * \endcode
226 : *
227 : * Under Fedora, you can use dnf (or yum for early versions):
228 : *
229 : * \code
230 : * # For source management (or download the .tar.gz file)
231 : * dnf install git
232 : *
233 : * # For development
234 : * dnf install gcc-c++ cmake zlib-devel
235 : *
236 : * # For documentation (or download the -doc.tar.gz file)
237 : * dnf install doxygen graphviz
238 : *
239 : * # For testing (optional, albeit recommended)
240 : * dnf install catch-devel zip
241 : * \endcode
242 : *
243 : *
244 : * Other systems use tools with pretty much the same naming convention
245 : * so you should be able to make the correlation.
246 : *
247 : * \section links Links
248 : *
249 : * <a href="http://www.zlib.net/">zlib</a>.
250 : * The compression library that Zipios uses to perform the actual
251 : * compression and decompression.
252 : *
253 : * <a href="http://www.oracle.com/technetwork/java/index.html">
254 : * Java</a>. Zipios version 1.0 is heavily inspired by the
255 : * java.util.zip package. Version 2.0 is following the same
256 : * philosophy without (1) attempting to follow the interface one
257 : * to one and (2) without updating to the newer version, if there
258 : * were changes...
259 : *
260 : * You will find a text file in the doc directory named zip-format.txt
261 : * with a complete description of the zip file format as of October 1, 2014.
262 : *
263 : * \section bugs Bugs
264 : *
265 : * Submit bug reports and patches on:
266 : *
267 : * http://sourceforge.net/projects/zipios/
268 : *
269 : * \htmlonly
270 : * Project hosted by <a href="http://sourceforge.net">
271 : * <img style="vertical-align: middle;" src="http://sourceforge.net/sflogo.php?group_id=5418&type=1">
272 : * </a>
273 : * \endhtmlonly
274 : */
275 :
276 :
277 : /** \class ZipFile
278 : * \brief The ZipFile class represents a collection of files.
279 : *
280 : * ZipFile is a FileCollection, where the files are stored
281 : * in a .zip file.
282 : */
283 :
284 :
285 :
286 : /** \brief Open a zip archive that was previously appened to another file.
287 : *
288 : * Opens a Zip archive embedded in another file, by writing the zip
289 : * archive to the end of the file followed by the start offset of
290 : * the zip file on 4 bytes. The offset must be written in zip-file
291 : * byte-order (little endian).
292 : *
293 : * The program appendzip, which is part of the Zipios distribution can
294 : * be used to append a Zip archive to a file, e.g. a binary program.
295 : *
296 : * The function may throw various exception if the named file does not
297 : * seem to include a valid zip archive attached.
298 : *
299 : * \note
300 : * Only one file can be appended and opened in this way. Although
301 : * the appendzip tool can be used to append any number of files,
302 : * only the last one is accessible.
303 : *
304 : * \return A ZipFile that one can use to read compressed data.
305 : */
306 0 : ZipFile::pointer_t ZipFile::openEmbeddedZipFile(std::string const& name)
307 : {
308 : // open zipfile, read 4 last bytes close file
309 : // create ZipFile object.
310 : uint32_t start_offset;
311 : {
312 0 : std::ifstream ifs(name, std::ios::in | std::ios::binary);
313 0 : ifs.seekg(-4, std::ios::end);
314 0 : zipRead(ifs, start_offset);
315 : // todo: add support for 64 bit (files of more than 4Gb)
316 : }
317 0 : return ZipFile::pointer_t(new ZipFile(name, start_offset, 4));
318 : }
319 :
320 :
321 : /** \brief Initialize a ZipFile object.
322 : *
323 : * This is the default constructor of the ZipFile object.
324 : *
325 : * Note that an empty ZipFile is marked as invalid. More or less, such
326 : * an object is useless although it is useful to have this constructor
327 : * if you want to work with maps or vectors of ZipFile objects.
328 : */
329 1 : ZipFile::ZipFile()
330 : //: m_vs(...) -- auto-init
331 : {
332 1 : }
333 :
334 :
335 : /** \brief Initialize a ZipFile object from an input file.
336 : *
337 : * This constructor opens the named zip file. If the zip "file" is
338 : * embedded in a file that contains other data, e.g. a binary
339 : * program, the offset of the zip file start and end must be
340 : * specified.
341 : *
342 : * If the file cannot be opened or the Zip directory cannot
343 : * be read, then the constructor throws an exception.
344 : *
345 : * \param[in] filename The filename of the zip file to open.
346 : * \param[in] s_off Offset relative to the start of the file, that
347 : * indicates the beginning of the zip data in the file.
348 : * \param[in] e_off Offset relative to the end of the file, that
349 : * indicates the end of the zip data in the file.
350 : * The offset is a positive number, even though the
351 : * offset is towards the beginning of the file.
352 : */
353 357 : ZipFile::ZipFile(std::string const& filename, offset_t s_off, offset_t e_off)
354 : : FileCollection(filename)
355 441 : , m_vs(s_off, e_off)
356 : {
357 714 : std::ifstream zipfile(m_filename, std::ios::in | std::ios::binary);
358 357 : if(!zipfile)
359 : {
360 1 : throw IOException("Error opening Zip archive file for reading in binary mode.");
361 : }
362 :
363 : // Find and read the End of Central Directory.
364 712 : ZipEndOfCentralDirectory eocd;
365 : {
366 712 : BackBuffer bb(zipfile, m_vs);
367 356 : ssize_t read_p(-1);
368 8428 : for(;;)
369 : {
370 8784 : if(read_p < 0)
371 : {
372 379 : if(!bb.readChunk(read_p))
373 : {
374 23 : throw FileCollectionException("Unable to find zip structure: End-of-central-directory");
375 : }
376 : }
377 : // Note: this is pretty fast since it reads from 'bb' which
378 : // caches the buffer the readChunk() function just read.
379 : //
380 8761 : if(eocd.read(bb, read_p))
381 : {
382 : // found it!
383 313 : break;
384 : }
385 8428 : --read_p;
386 : }
387 : }
388 :
389 : // Position read pointer to start of first entry in central dir.
390 313 : m_vs.vseekg(zipfile, eocd.getOffset(), std::ios::beg);
391 :
392 : // TBD -- is that ", 0" still necessary? (With VC2012 and better)
393 : // Give the second argument in the next line to keep Visual C++ quiet
394 : //m_entries.resize(eocd.totalCount(), 0);
395 313 : m_entries.resize(eocd.getCount());
396 :
397 313 : size_t const max_entry(eocd.getCount());
398 108918 : for(size_t entry_num(0); entry_num < max_entry; ++entry_num)
399 : {
400 108615 : m_entries[entry_num] = FileEntry::pointer_t(new ZipCentralDirectoryEntry);
401 108615 : m_entries[entry_num].get()->read(zipfile);
402 : }
403 :
404 : // Consistency check #1:
405 : // The virtual seeker position is exactly the start offset of the
406 : // Central Directory plus the Central Directory size
407 : //
408 303 : offset_t const pos(m_vs.vtellg(zipfile));
409 303 : if(static_cast<offset_t>(eocd.getOffset() + eocd.getCentralDirectorySize()) != pos)
410 : {
411 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
412 : }
413 :
414 : // Consistency check #2:
415 : // Are local headers consistent with CD headers?
416 : //
417 108868 : for(auto it = m_entries.begin(); it != m_entries.end(); ++it)
418 : {
419 : /** \TODO
420 : * Make sure the entry offset is properly defined by
421 : * ZipCentralDirectoryEntry.
422 : *
423 : * Also the isEqual() is a quite advanced (slow) test here!
424 : */
425 108595 : m_vs.vseekg(zipfile, (*it)->getEntryOffset(), std::ios::beg);
426 217190 : ZipLocalEntry zlh;
427 108595 : zlh.read(zipfile);
428 108585 : if(!zipfile || !zlh.isEqual(**it))
429 : {
430 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
431 : }
432 : }
433 :
434 : // we are all good!
435 273 : m_valid = true;
436 273 : }
437 :
438 :
439 : /** \brief Create a clone of this ZipFile.
440 : *
441 : * This function creates a heap allocated clone of the ZipFile object.
442 : *
443 : * \return A shared pointer to a copy of this ZipFile object.
444 : */
445 1 : FileCollection::pointer_t ZipFile::clone() const
446 : {
447 1 : return FileCollection::pointer_t(new ZipFile(*this));
448 : }
449 :
450 :
451 : /** \brief Clean up the ZipFile object.
452 : *
453 : * The destructor ensures that any ZipFile data gets flushed
454 : * out before returning.
455 : */
456 551 : ZipFile::~ZipFile()
457 : {
458 275 : close();
459 276 : }
460 :
461 :
462 : /** \brief Retrieve a pointer to a file in the Zip archive.
463 : *
464 : * This function returns a shared pointer to an istream defined from the
465 : * named entry, which gives you access to the corresponding file defined
466 : * in the Zip archive.
467 : *
468 : * The function returns nullptr if there is no entry with the
469 : * specified name in this ZipFile.
470 : *
471 : * Note that the function returns a smart pointer to an istream. The
472 : * ZipFile class does not hold that pointer meaning that
473 : * if you call getInputStream() multiple times with the same
474 : * \p entry_name parameter, you get different istream instance each
475 : * time.
476 : *
477 : * By default the \p entry_name parameter is expected to match the full
478 : * path and filename (MatchPath::MATCH). If you are looking for a file
479 : * and want to ignore the path, set the matchpath parameter
480 : * to MatchPath::IGNORE.
481 : *
482 : * \note
483 : * If the file is compressed inside the Zip archive, this input stream
484 : * returns the uncompressed data transparently to you (outside of the
485 : * time it takes to decompress the data, of course.)
486 : *
487 : * \param[in] entry_name The name of the file to search in the collection.
488 : * \param[in] matchpath Whether the full path or just the filename is matched.
489 : *
490 : * \return A shared pointer to an open istream for the specified entry.
491 : *
492 : * \sa CollectionCollection
493 : * \sa DirectoryCollection
494 : * \sa FileCollection
495 : */
496 97251 : ZipFile::stream_pointer_t ZipFile::getInputStream(std::string const& entry_name, MatchPath matchpath)
497 : {
498 97251 : mustBeValid();
499 :
500 194502 : FileEntry::pointer_t entry(getEntry(entry_name, matchpath));
501 97251 : if(entry)
502 : {
503 193526 : stream_pointer_t zis(new ZipInputStream(m_filename, entry->getEntryOffset() + m_vs.startOffset()));
504 96743 : return zis;
505 : }
506 :
507 : // no entry with that name (and match) available
508 488 : return nullptr;
509 : }
510 :
511 :
512 : /** \brief Create a Zip archive from the specified FileCollection.
513 : *
514 : * This function is expected to be used with a DirectoryCollection
515 : * that you created to save the collection in an archive.
516 : *
517 : * \param[in,out] os The output stream where the Zip archive is saed.
518 : * \param[in] collection The collection to save in this output stream.
519 : * \param[in] zip_comment The global comment of the Zip archive.
520 : */
521 243 : void ZipFile::saveCollectionToArchive(std::ostream & os, FileCollection & collection, std::string const & zip_comment)
522 : {
523 : try
524 : {
525 486 : ZipOutputStream output_stream(os);
526 :
527 243 : output_stream.setComment(zip_comment);
528 :
529 486 : FileEntry::vector_t entries(collection.entries());
530 171709 : for(auto it(entries.begin()); it != entries.end(); ++it)
531 : {
532 171468 : output_stream.putNextEntry(*it);
533 : // get an InputStream if available (i.e. directories do not have an input stream)
534 171466 : if(!(*it)->isDirectory())
535 : {
536 322376 : FileCollection::stream_pointer_t is(collection.getInputStream((*it)->getName()));
537 161188 : if(is)
538 : {
539 161188 : output_stream << is->rdbuf();
540 : }
541 : }
542 : }
543 :
544 : // clean up mantually so we can get any exception
545 : // (so we avoid having exceptions gobbled by the destructor)
546 242 : output_stream.closeEntry();
547 242 : output_stream.finish();
548 239 : output_stream.close();
549 : }
550 8 : catch(...)
551 : {
552 4 : os.setstate(std::ios::failbit);
553 4 : throw;
554 : }
555 239 : }
556 :
557 :
558 3 : } // zipios namespace
559 :
560 : // Local Variables:
561 : // mode: cpp
562 : // indent-tabs-mode: nil
563 : // c-basic-offset: 4
564 : // tab-width: 4
565 : // End:
566 :
567 : // vim: ts=4 sw=4 et
|