Line data Source code
1 : /*
2 : Zipios++ - a small C++ library that provides easy access to .zip files.
3 :
4 : Copyright (C) 2000-2007 Thomas Sondergaard
5 : Copyright (C) 2015 Made to Order Software Corporation
6 :
7 : This library is free software; you can redistribute it and/or
8 : modify it under the terms of the GNU Lesser General Public
9 : License as published by the Free Software Foundation; either
10 : version 2 of the License, or (at your option) any later version.
11 :
12 : This library is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : Lesser General Public License for more details.
16 :
17 : You should have received a copy of the GNU Lesser General Public
18 : License along with this library; if not, write to the Free Software
19 : Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 : */
21 :
22 : /** \file
23 : * \brief The implementation of zipios::ZipFile.
24 : *
25 : * This file contrains the high level functions used to read or write
26 : * a Zip archive file.
27 : */
28 :
29 : #include "zipios++/zipfile.hpp"
30 :
31 : #include "zipios++/zipiosexceptions.hpp"
32 :
33 : #include "backbuffer.hpp"
34 : #include "zipendofcentraldirectory.hpp"
35 : #include "zipcentraldirectoryentry.hpp"
36 : #include "zipinputstream.hpp"
37 : #include "zipoutputstream.hpp"
38 :
39 : #include <fstream>
40 :
41 :
42 : /** \brief The zipios namespace includes the Zipios++ library definitions.
43 : *
44 : * This namespace is used to clearly separate all the Zipios++ definitions.
45 : * Note that a very few definitions are found outside of the namespace.
46 : * Some of those are hidden in the source of the library, a very few
47 : * appear in the zipios-config.hpp file as they are used to support
48 : * zipios++ on any platform.
49 : *
50 : * Note that to ensure secure development, we do not make use of the
51 : * C++ "using ..." keyword. That way we can guarantee what's what.
52 : */
53 : namespace zipios
54 : {
55 :
56 :
57 : /** \mainpage Zipios++
58 : *
59 : * \image html zipios++.jpg
60 : *
61 : * \section intro Introduction
62 : *
63 : * Zipios++ is a java.util.zip-like C++ library for reading and
64 : * writing Zip files (ZipFile). Access to individual entries is
65 : * provided through a Zipios++ class (FileEntry) for the meta data
66 : * of the and a standard C++ iostreams for the contents of the file.
67 : *
68 : * A simple virtual file system that mounts regular directories and
69 : * zip files is also provided (FileCollection).
70 : *
71 : * The library is fully re-entrant. It is not otherwise thread safe.
72 : *
73 : * The source code is released under the <a
74 : * href="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public
75 : * License</a>.
76 : *
77 : * \section status Status
78 : *
79 : * This was the status of version 1.x. At this point, 2.x has a brand new
80 : * version out and we are waiting for good news about the current status.
81 : * That being said, version 2.x comes a test suite which produces a
82 : * 100% coverage of the library (except gzip which is not yet publicly
83 : * available.)
84 : *
85 : * \warning
86 : * There is a bug in the catch.hpp header file that generates a never
87 : * ending loop (see https://github.com/philsquared/Catch/issues/271 for
88 : * more information) when running the test suite under FreeBSD and an error
89 : * occurs (although you should not have an error, if it happens, then
90 : * the loop never ends.) I have noticed that problem with the following
91 : * scenario, and it does not seem to be fixed yet (Apr 4, 2015):
92 : *
93 : * \li "use Zipios++ to create zip archives with 1 or 3 files each"
94 : *
95 : * Spanned archives are not supported, and support is not planned.
96 : *
97 : * The library v1.x has been tested and appears to be working with:
98 : *
99 : * \li <a href="http://www.freebsd.org/ports/archivers.html#zipios++-0.1.5">FreeBSD stable and current / gcc 2.95.3</a>
100 : * \li Red Hat Linux release 7.0 / gcc 2.96
101 : * \li Red Hat Linux release 6.2 (Zoot) / egcs-2.91.66
102 : * \li Linux Mandrake release 7.0 (Air) / gcc 2.95.2
103 : * \li SGI IRIX64 6.5 / gcc 2.95.2
104 : * \li SGI IRIX64 6.5 / MIPSpro Compilers: Version 7.30
105 : *
106 : * The library v2.x has been compiled and appears to be working with:
107 : *
108 : * \li Ubuntu (starting with 14.04) -- full test suite working
109 : * \li FreeBSD (starting with 10.01)
110 : * \li SunOS (starting with Open SunOS 11.2)
111 : * \li Cygwin (starting with 6.1)
112 : *
113 : * If you make zipios++ work on other platforms, let us know by posting
114 : * a message on Sourceforge.net
115 : *
116 : * http://sourceforge.net/projects/zipios/
117 : *
118 : * \section documentation Documentation
119 : *
120 : * This web page is the front page to the library documentation which
121 : * is generated from the source files using <a
122 : * href="http://www.stack.nl/~dimitri/doxygen/index.html">Doxygen</a>.
123 : * Use the links at the top of the page to browse the API
124 : * documentation. Your Doxygen installation may also be capable
125 : * of generating other formats (Latex, PDF, etc.) if you would
126 : * prefer such (we only offer the HTML documentation.)
127 : *
128 : * \subsection zipfiles Zip file access
129 : *
130 : * The two most important classes are DirectoryCollection and ZipFile.
131 : *
132 : * A ZipFile is also a FileCollection, only the collection is loaded
133 : * from a Zip archive instead of a directory. A ZipFile is composed of
134 : * ZipCentralDirectoryEntry objects. As far as you are concerned though,
135 : * you can only use it as FileEntry objects.
136 : *
137 : * Note that the ZipFile constructor immediately scans the Central
138 : * Directory of the Zip archive so the entries are immediately accessible.
139 : *
140 : * The DirectoryCollection can be created one file at a time, so it is
141 : * possible to create a collection without having to include all the
142 : * files from a directory. However, the files still have to exist on
143 : * disk. The DirectoryCollection is composed of DirectoryEntry objects.
144 : *
145 : * To access the entries in a collection, use the entries() function
146 : * which returns a vector of FileEntry objects. If you know the exact
147 : * filename of an entry, you may also use the getEntry() with that name.
148 : * This is particularly useful if you want to use Zipios++ as a way to
149 : * handle the resources of your executable (see the openEmbeddedZipFile()
150 : * function and the appendzip.cpp tool). Finally, you want to use
151 : * the getInputStream() function to read the data of a file defined in
152 : * a collection.
153 : *
154 : * \code
155 : * // Resources global pointer
156 : * zipios::ZipFile::pointer_t g_resources;
157 : *
158 : * // Initialization of resources
159 : * g_resources = zipios::ZipFile::openEmbeddedZipFile("executable_filename");
160 : *
161 : * // Anywhere else in your application
162 : *
163 : * // 1. get the entry (to access meta data)
164 : * zipios::FileEntry::pointer_t entry(g_resources->getEntry("my/resource/file.xml"));
165 : *
166 : * // 2. get the istream (to access the actual file data)
167 : * zipios::FileCollection::stream_pointer_t in_stream(g_resources->getInputStream("my/resource/file.xml"));
168 : * \endcode
169 : *
170 : * zipios_example.cpp demonstrates the central elements of Zipios++ when used
171 : * in read mode.
172 : *
173 : * \subsection filecollection FileCollection
174 : *
175 : * A ZipFile is actually just a special kind of FileCollection that
176 : * obtains its entries from a Zip archive. Zipios++ also implements
177 : * a DirectoryCollection that obtains its entries from an on disk
178 : * directory and a CollectionCollection that obtains its entries from
179 : * other collections.
180 : *
181 : * Using a single CollectionCollection, any number of other FileCollection's
182 : * can be placed under its control and accessed through the same single
183 : * interface that is used to access a ZipFile or a DirectoryCollection.
184 : *
185 : * \warning
186 : * The CollectionCollection singleton in version 1.x was removed to make
187 : * the entire library 100% re-entrant without the need to link against
188 : * a thread library.
189 : *
190 : * \section download Download
191 : *
192 : * Go to Zipios++ project page on SourceForge for tar balls, source code
193 : * (CVS for v1.x and GIT for v2.x), and ChangeLog.
194 : * <a href="https://sourceforge.net/projects/zipios/" >
195 : * https://sourceforge.net/projects/zipios/</a>
196 : *
197 : * <a href="https://sourceforge.net/projects/zipios/files/latest/download"
198 : * rel="nofollow"><img alt="Download Zipios++"
199 : * src="https://img.shields.io/sourceforge/dt/zipios.svg"></a>
200 : *
201 : * \section development Development
202 : *
203 : * The Zipios++ project makes use of a few development tools and the
204 : * tests require the zip utility, used to verify that an external tool
205 : * can generate a zip file that Zipios++ can read.
206 : *
207 : * Under a Debian or Ubuntu system, you can run apt-get install with
208 : * the following list of packages:
209 : *
210 : * \code
211 : * # For source management (or download the .tar.gz file)
212 : * apt-get install git
213 : *
214 : * # For developement
215 : * apt-get install g++ cmake zlib1g-dev
216 : *
217 : * # For documentation (or download the -doc.tar.gz file)
218 : * apt-get install doxygen graphviz
219 : *
220 : * # For testing (optional, albeit recommended)
221 : * apt-get install catch zip
222 : * \endcode
223 : *
224 : * Other systems use tools with pretty much the same naming convention
225 : * so you should be able to make the correlation.
226 : *
227 : * \section links Links
228 : *
229 : * <a href="http://www.zlib.net/">zlib</a>.
230 : * The compression library that Zipios++ uses to perform the actual
231 : * compression and decompression.
232 : *
233 : * <a href="http://www.oracle.com/technetwork/java/index.html">
234 : * Java</a>. Zipios++ version 1.0 is heavily inspired by the
235 : * java.util.zip package. Version 2.0 is following the same
236 : * philosophy without (1) attempting to follow the interface one
237 : * to one and (2) without updating to the newer version, if there
238 : * were changes...
239 : *
240 : * You will find a text file in the doc directory named zip-format.txt
241 : * with a complete description of the zip file format as of October 1, 2014.
242 : *
243 : * \section bugs Bugs
244 : *
245 : * Submit bug reports and patches on:
246 : *
247 : * http://sourceforge.net/projects/zipios/
248 : *
249 : * \htmlonly
250 : * Project hosted by <a href="http://sourceforge.net">
251 : * <img style="vertical-align: middle;" src="http://sourceforge.net/sflogo.php?group_id=5418&type=1">
252 : * </a>
253 : * \endhtmlonly
254 : */
255 :
256 :
257 : /** \class ZipFile
258 : * \brief The ZipFile class represents a collection of files.
259 : *
260 : * ZipFile is a FileCollection, where the files are stored
261 : * in a .zip file.
262 : */
263 :
264 :
265 :
266 : /** \brief Open a zip archive that was previously appened to another file.
267 : *
268 : * Opens a Zip archive embedded in another file, by writing the zip
269 : * archive to the end of the file followed by the start offset of
270 : * the zip file on 4 bytes. The offset must be written in zip-file
271 : * byte-order (little endian).
272 : *
273 : * The program appendzip, which is part of the Zipios++ distribution can
274 : * be used to append a Zip archive to a file, e.g. a binary program.
275 : *
276 : * The function may throw various exception if the named file does not
277 : * seem to include a valid zip archive attached.
278 : *
279 : * \note
280 : * Only one file can be appended and opened in this way. Although
281 : * the appendzip tool can be used to append any number of files,
282 : * only the last one is accessible.
283 : *
284 : * \return A ZipFile that one can use to read compressed data.
285 : */
286 0 : ZipFile::pointer_t ZipFile::openEmbeddedZipFile(std::string const& name)
287 : {
288 : // open zipfile, read 4 last bytes close file
289 : // create ZipFile object.
290 : uint32_t start_offset;
291 : {
292 0 : std::ifstream ifs(name, std::ios::in | std::ios::binary);
293 0 : ifs.seekg(-4, std::ios::end);
294 0 : zipRead(ifs, start_offset);
295 : // TODO: add support for 64 bit (files of more than 4Gb)
296 : }
297 0 : return ZipFile::pointer_t(new ZipFile(name, start_offset, 4));
298 : }
299 :
300 :
301 : /** \brief Initialize a ZipFile object.
302 : *
303 : * This is the default constructor of the ZipFile object.
304 : *
305 : * Note that an empty ZipFile is marked as invalid. More or less, such
306 : * an object is useless although it is useful to have this constructor
307 : * if you want to work with maps or vectors of ZipFile objects.
308 : */
309 1 : ZipFile::ZipFile()
310 : //: m_vs(...) -- auto-init
311 : {
312 1 : }
313 :
314 :
315 : /** \brief Initialize a ZipFile object from an input file.
316 : *
317 : * This constructor opens the named zip file. If the zip "file" is
318 : * embedded in a file that contains other data, e.g. a binary
319 : * program, the offset of the zip file start and end must be
320 : * specified.
321 : *
322 : * If the file cannot be opened or the Zip directory cannot
323 : * be read, then the constructor throws an exception.
324 : *
325 : * \param[in] filename The filename of the zip file to open.
326 : * \param[in] s_off Offset relative to the start of the file, that
327 : * indicates the beginning of the zip data in the file.
328 : * \param[in] e_off Offset relative to the end of the file, that
329 : * indicates the end of the zip data in the file.
330 : * The offset is a positive number, even though the
331 : * offset is towards the beginning of the file.
332 : */
333 369 : ZipFile::ZipFile(std::string const& filename, offset_t s_off, offset_t e_off)
334 : : FileCollection(filename)
335 453 : , m_vs(s_off, e_off)
336 : {
337 369 : std::ifstream zipfile(m_filename, std::ios::in | std::ios::binary);
338 369 : if(!zipfile)
339 : {
340 1 : throw IOException("Error opening Zip archive file for reading in binary mode.");
341 : }
342 :
343 : // Find and read the End of Central Directory.
344 736 : ZipEndOfCentralDirectory eocd;
345 : {
346 368 : BackBuffer bb(zipfile, m_vs);
347 368 : ssize_t read_p(-1);
348 : for(;;)
349 : {
350 9211 : if(read_p < 0)
351 : {
352 391 : if(!bb.readChunk(read_p))
353 : {
354 23 : throw FileCollectionException("Unable to find zip structure: End-of-central-directory");
355 : }
356 : }
357 : // Note: this is pretty fast since it reads from 'bb' which
358 : // caches the buffer the readChunk() function just read.
359 : //
360 9188 : if(eocd.read(bb, read_p))
361 : {
362 : // found it!
363 325 : break;
364 : }
365 8843 : --read_p;
366 368 : }
367 : }
368 :
369 : // Position read pointer to start of first entry in central dir.
370 325 : m_vs.vseekg(zipfile, eocd.getOffset(), std::ios::beg);
371 :
372 : // TBD -- is that ", 0" still necessary? (With VC2012 and better)
373 : // Give the second argument in the next line to keep Visual C++ quiet
374 : //m_entries.resize(eocd.totalCount(), 0);
375 325 : m_entries.resize(eocd.getCount());
376 :
377 325 : size_t const max_entry(eocd.getCount());
378 69727 : for(size_t entry_num(0); entry_num < max_entry; ++entry_num)
379 : {
380 69412 : m_entries[entry_num] = FileEntry::pointer_t(new ZipCentralDirectoryEntry);
381 69412 : m_entries[entry_num].get()->read(zipfile);
382 : }
383 :
384 : // Consistency check #1:
385 : // The virtual seeker position is exactly the start offset of the
386 : // Central Directory plus the Central Directory size
387 : //
388 315 : offset_t const pos(m_vs.vtellg(zipfile));
389 315 : if(static_cast<offset_t>(eocd.getOffset() + eocd.getCentralDirectorySize()) != pos)
390 : {
391 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
392 : }
393 :
394 : // Consistency check #2:
395 : // Are local headers consistent with CD headers?
396 : //
397 69677 : for(auto it = m_entries.begin(); it != m_entries.end(); ++it)
398 : {
399 : /** \TODO
400 : * Make sure the entry offset is properly defined by ZipCentralDirectoryEntry.
401 : * Also the isEqual() is a quite advance test here!
402 : */
403 69392 : m_vs.vseekg(zipfile, (*it)->getEntryOffset(), std::ios::beg);
404 69392 : ZipLocalEntry zlh;
405 69392 : zlh.read(zipfile);
406 69382 : if(!zipfile || !zlh.isEqual(**it))
407 : {
408 10 : throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
409 : }
410 69392 : }
411 :
412 : // we are all good!
413 654 : m_valid = true;
414 285 : }
415 :
416 :
417 : /** \brief Create a clone of this ZipFile.
418 : *
419 : * This function creates a heap allocated clone of the ZipFile object.
420 : *
421 : * \return A shared pointer to a copy of this ZipFile object.
422 : */
423 1 : FileCollection::pointer_t ZipFile::clone() const
424 : {
425 1 : return FileCollection::pointer_t(new ZipFile(*this));
426 : }
427 :
428 :
429 : /** \brief Clean up the ZipFile object.
430 : *
431 : * The destructor ensures that any ZipFile data gets flushed
432 : * out before returning.
433 : */
434 575 : ZipFile::~ZipFile()
435 : {
436 287 : close();
437 288 : }
438 :
439 :
440 : /** \brief Retrieve a pointer to a file in the Zip archive.
441 : *
442 : * This function returns a shared pointer to an istream defined from the
443 : * named entry, which gives you access to the corresponding file defined
444 : * in the Zip archive.
445 : *
446 : * The function returns nullptr if there is no entry with the
447 : * specified name in this ZipFile.
448 : *
449 : * Note that the function returns a smart pointer to an istream. The
450 : * ZipFile class does not hold that pointer meaning that
451 : * if you call getInputStream() multiple times with the same
452 : * \p entry_name parameter, you get different istream instance each
453 : * time.
454 : *
455 : * By default the \p entry_name parameter is expected to match the full
456 : * path and filename (MatchPath::MATCH). If you are looking for a file
457 : * and want to ignore the path, set the matchpath parameter
458 : * to MatchPath::IGNORE.
459 : *
460 : * \note
461 : * If the file is compressed inside the Zip archive, this input stream
462 : * returns the uncompressed data transparently to you (outside of the
463 : * time it takes to decompress the data, of course.)
464 : *
465 : * \param[in] entry_name The name of the file to search in the collection.
466 : * \param[in] matchpath Whether the full path or just the filename is matched.
467 : *
468 : * \return A shared pointer to an open istream for the specified entry.
469 : *
470 : * \sa CollectionCollection
471 : * \sa DirectoryCollection
472 : * \sa FileCollection
473 : */
474 61607 : ZipFile::stream_pointer_t ZipFile::getInputStream(std::string const& entry_name, MatchPath matchpath)
475 : {
476 61607 : mustBeValid();
477 :
478 61607 : FileEntry::pointer_t entry(getEntry(entry_name, matchpath));
479 61607 : if(entry)
480 : {
481 61117 : stream_pointer_t zis(new ZipInputStream(m_filename, entry->getEntryOffset() + m_vs.startOffset()));
482 61077 : return zis;
483 : }
484 :
485 : // no entry with that name (and match) available
486 530 : return nullptr;
487 : }
488 :
489 :
490 : /** \brief Create a Zip archive from the specified FileCollection.
491 : *
492 : * This function is expected to be used with a DirectoryCollection
493 : * that you created to save the collection in an archive.
494 : *
495 : * \param[in,out] os The output stream where the Zip archive is saed.
496 : * \param[in] collection The collection to save in this output stream.
497 : * \param[in] zip_comment The global comment of the Zip archive.
498 : */
499 256 : void ZipFile::saveCollectionToArchive(std::ostream & os, FileCollection & collection, std::string const & zip_comment)
500 : {
501 : try
502 : {
503 256 : ZipOutputStream output_stream(os);
504 :
505 256 : output_stream.setComment(zip_comment);
506 :
507 512 : FileEntry::vector_t entries(collection.entries());
508 133321 : for(auto it(entries.begin()); it != entries.end(); ++it)
509 : {
510 133067 : output_stream.putNextEntry(*it);
511 : // get an InputStream if available (i.e. directories do not have an input stream)
512 133065 : if(!(*it)->isDirectory())
513 : {
514 127032 : FileCollection::stream_pointer_t is(collection.getInputStream((*it)->getName()));
515 127032 : if(is)
516 : {
517 127032 : output_stream << is->rdbuf();
518 127032 : }
519 : }
520 : }
521 :
522 : // clean up mantually so we can get any exception
523 : // (so we avoid having exceptions gobbled by the destructor)
524 255 : output_stream.closeEntry();
525 255 : output_stream.finish();
526 508 : output_stream.close();
527 : }
528 4 : catch(...)
529 : {
530 4 : os.setstate(std::ios::failbit);
531 4 : throw;
532 : }
533 252 : }
534 :
535 :
536 3 : } // zipios namespace
537 :
538 : // Local Variables:
539 : // mode: cpp
540 : // indent-tabs-mode: nil
541 : // c-basic-offset: 4
542 : // tab-width: 4
543 : // End:
544 :
545 : // vim: ts=4 sw=4 et
|