|           Line data    Source code 
       1             : /*
       2             :   Zipios++ - a small C++ library that provides easy access to .zip files.
       3             : 
       4             :   Copyright (C) 2000-2007  Thomas Sondergaard
       5             :   Copyright (C) 2015  Made to Order Software Corporation
       6             : 
       7             :   This library is free software; you can redistribute it and/or
       8             :   modify it under the terms of the GNU Lesser General Public
       9             :   License as published by the Free Software Foundation; either
      10             :   version 2 of the License, or (at your option) any later version.
      11             : 
      12             :   This library is distributed in the hope that it will be useful,
      13             :   but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :   Lesser General Public License for more details.
      16             : 
      17             :   You should have received a copy of the GNU Lesser General Public
      18             :   License along with this library; if not, write to the Free Software
      19             :   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
      20             : */
      21             : 
      22             : /** \file
      23             :  * \brief The implementation of zipios::ZipFile.
      24             :  *
      25             :  * This file contrains the high level functions used to read or write
      26             :  * a Zip archive file.
      27             :  */
      28             : 
      29             : #include "zipios++/zipfile.hpp"
      30             : 
      31             : #include "zipios++/zipiosexceptions.hpp"
      32             : 
      33             : #include "backbuffer.hpp"
      34             : #include "zipendofcentraldirectory.hpp"
      35             : #include "zipcentraldirectoryentry.hpp"
      36             : #include "zipinputstream.hpp"
      37             : #include "zipoutputstream.hpp"
      38             : 
      39             : #include <fstream>
      40             : 
      41             : 
      42             : /** \brief The zipios namespace includes the Zipios++ library definitions.
      43             :  *
      44             :  * This namespace is used to clearly separate all the Zipios++ definitions.
      45             :  * Note that a very few definitions are found outside of the namespace.
      46             :  * Some of those are hidden in the source of the library, a very few
      47             :  * appear in the zipios-config.hpp file as they are used to support
      48             :  * zipios++ on any platform.
      49             :  *
      50             :  * Note that to ensure secure development, we do not make use of the
      51             :  * C++ "using ..." keyword. That way we can guarantee what's what.
      52             :  */
      53             : namespace zipios
      54             : {
      55             : 
      56             : 
      57             : /** \mainpage Zipios++
      58             :  *
      59             :  * \image html zipios++.jpg
      60             :  *
      61             :  * \section intro Introduction
      62             :  *
      63             :  * Zipios++ is a java.util.zip-like C++ library for reading and
      64             :  * writing Zip files (ZipFile). Access to individual entries is
      65             :  * provided through a Zipios++ class (FileEntry) for the meta data
      66             :  * of the and a standard C++ iostreams for the contents of the file.
      67             :  *
      68             :  * A simple virtual file system that mounts regular directories and
      69             :  * zip files is also provided (FileCollection).
      70             :  *
      71             :  * The library is fully re-entrant. It is not otherwise thread safe.
      72             :  *
      73             :  * The source code is released under the <a
      74             :  * href="http://www.gnu.org/copyleft/lesser.html">GNU Lesser General Public
      75             :  * License</a>.
      76             :  *
      77             :  * \section status Status
      78             :  *
      79             :  * This was the status of version 1.x. At this point, 2.x has a brand new
      80             :  * version out and we are waiting for good news about the current status.
      81             :  * That being said, version 2.x comes a test suite which produces a
      82             :  * 100% coverage of the library (except gzip which is not yet publicly
      83             :  * available.)
      84             :  *
      85             :  * \warning
      86             :  * There is a bug in the catch.hpp header file that generates a never
      87             :  * ending loop (see https://github.com/philsquared/Catch/issues/271 for
      88             :  * more information) when running the test suite under FreeBSD and an error
      89             :  * occurs (although you should not have an error, if it happens, then
      90             :  * the loop never ends.) I have noticed that problem with the following
      91             :  * scenario, and it does not seem to be fixed yet (Apr 4, 2015):
      92             :  *
      93             :  * \li "use Zipios++ to create zip archives with 1 or 3 files each"
      94             :  *
      95             :  * Spanned archives are not supported, and support is not planned.
      96             :  *
      97             :  * The library v1.x has been tested and appears to be working with:
      98             :  *
      99             :  * \li <a href="http://www.freebsd.org/ports/archivers.html#zipios++-0.1.5">FreeBSD stable and current / gcc 2.95.3</a>
     100             :  * \li Red Hat Linux release 7.0  / gcc 2.96
     101             :  * \li Red Hat Linux release 6.2 (Zoot) / egcs-2.91.66
     102             :  * \li Linux Mandrake release 7.0 (Air) / gcc 2.95.2
     103             :  * \li SGI IRIX64 6.5 / gcc 2.95.2
     104             :  * \li SGI IRIX64 6.5 / MIPSpro Compilers: Version 7.30
     105             :  *
     106             :  * The library v2.x has been compiled and appears to be working with:
     107             :  *
     108             :  * \li Ubuntu (starting with 14.04) -- full test suite working
     109             :  * \li FreeBSD (starting with 10.01)
     110             :  * \li SunOS (starting with Open SunOS 11.2)
     111             :  * \li Cygwin (starting with 6.1)
     112             :  *
     113             :  * If you make zipios++ work on other platforms, let us know by posting
     114             :  * a message on Sourceforge.net
     115             :  *
     116             :  *   http://sourceforge.net/projects/zipios/
     117             :  *
     118             :  * \section documentation Documentation
     119             :  *
     120             :  * This web page is the front page to the library documentation which
     121             :  * is generated from the source files using <a
     122             :  * href="http://www.stack.nl/~dimitri/doxygen/index.html">Doxygen</a>.
     123             :  * Use the links at the top of the page to browse the API
     124             :  * documentation. Your Doxygen installation may also be capable
     125             :  * of generating other formats (Latex, PDF, etc.) if you would
     126             :  * prefer such (we only offer the HTML documentation.)
     127             :  *
     128             :  * \subsection zipfiles Zip file access
     129             :  *
     130             :  * The two most important classes are DirectoryCollection and ZipFile.
     131             :  *
     132             :  * A ZipFile is also a FileCollection, only the collection is loaded
     133             :  * from a Zip archive instead of a directory. A ZipFile is composed of
     134             :  * ZipCentralDirectoryEntry objects. As far as you are concerned though,
     135             :  * you can only use it as FileEntry objects.
     136             :  *
     137             :  * Note that the ZipFile constructor immediately scans the Central
     138             :  * Directory of the Zip archive so the entries are immediately accessible.
     139             :  *
     140             :  * The DirectoryCollection can be created one file at a time, so it is
     141             :  * possible to create a collection without having to include all the
     142             :  * files from a directory. However, the files still have to exist on
     143             :  * disk. The DirectoryCollection is composed of DirectoryEntry objects.
     144             :  *
     145             :  * To access the entries in a collection, use the entries() function
     146             :  * which returns a vector of FileEntry objects. If you know the exact
     147             :  * filename of an entry, you may also use the getEntry() with that name.
     148             :  * This is particularly useful if you want to use Zipios++ as a way to
     149             :  * handle the resources of your executable (see the openEmbeddedZipFile()
     150             :  * function and the appendzip.cpp tool). Finally, you want to use
     151             :  * the getInputStream() function to read the data of a file defined in
     152             :  * a collection.
     153             :  *
     154             :  * \code
     155             :  *      // Resources global pointer
     156             :  *      zipios::ZipFile::pointer_t g_resources;
     157             :  *
     158             :  *      // Initialization of resources
     159             :  *      g_resources = zipios::ZipFile::openEmbeddedZipFile("executable_filename");
     160             :  *
     161             :  *      // Anywhere else in your application
     162             :  *
     163             :  *      // 1. get the entry (to access meta data)
     164             :  *      zipios::FileEntry::pointer_t entry(g_resources->getEntry("my/resource/file.xml"));
     165             :  *
     166             :  *      // 2. get the istream (to access the actual file data)
     167             :  *      zipios::FileCollection::stream_pointer_t in_stream(g_resources->getInputStream("my/resource/file.xml"));
     168             :  * \endcode
     169             :  *
     170             :  * zipios_example.cpp demonstrates the central elements of Zipios++ when used
     171             :  * in read mode.
     172             :  *
     173             :  * \subsection filecollection FileCollection
     174             :  *
     175             :  * A ZipFile is actually just a special kind of FileCollection that
     176             :  * obtains its entries from a Zip archive. Zipios++ also implements
     177             :  * a DirectoryCollection that obtains its entries from an on disk
     178             :  * directory and a CollectionCollection that obtains its entries from
     179             :  * other collections.
     180             :  *
     181             :  * Using a single CollectionCollection, any number of other FileCollection's
     182             :  * can be placed under its control and accessed through the same single
     183             :  * interface that is used to access a ZipFile or a DirectoryCollection.
     184             :  *
     185             :  * \warning
     186             :  * The CollectionCollection singleton in version 1.x was removed to make
     187             :  * the entire library 100% re-entrant without the need to link against
     188             :  * a thread library.
     189             :  *
     190             :  * \section download Download
     191             :  *
     192             :  * Go to Zipios++ project page on SourceForge for tar balls, source code
     193             :  * (CVS for v1.x and GIT for v2.x), and ChangeLog.
     194             :  * <a href="https://sourceforge.net/projects/zipios/" >
     195             :  * https://sourceforge.net/projects/zipios/</a>
     196             :  *
     197             :  * <a href="https://sourceforge.net/projects/zipios/files/latest/download"
     198             :  * rel="nofollow"><img alt="Download Zipios++"
     199             :  * src="https://img.shields.io/sourceforge/dt/zipios.svg"></a>
     200             :  *
     201             :  * \section development Development
     202             :  *
     203             :  * The Zipios++ project makes use of a few development tools and the
     204             :  * tests require the zip utility, used to verify that an external tool
     205             :  * can generate a zip file that Zipios++ can read.
     206             :  *
     207             :  * Under a Debian or Ubuntu system, you can run apt-get install with
     208             :  * the following list of packages:
     209             :  *
     210             :  * \code
     211             :  *   # For source management (or download the .tar.gz file)
     212             :  *   apt-get install git
     213             :  *
     214             :  *   # For developement
     215             :  *   apt-get install g++ cmake zlib1g-dev
     216             :  *
     217             :  *   # For documentation (or download the -doc.tar.gz file)
     218             :  *   apt-get install doxygen graphviz
     219             :  *
     220             :  *   # For testing (optional, albeit recommended)
     221             :  *   apt-get install catch zip
     222             :  * \endcode
     223             :  *
     224             :  * Other systems use tools with pretty much the same naming convention
     225             :  * so you should be able to make the correlation.
     226             :  *
     227             :  * \section links Links
     228             :  *
     229             :  * <a href="http://www.zlib.net/">zlib</a>.
     230             :  * The compression library that Zipios++ uses to perform the actual
     231             :  * compression and decompression.
     232             :  *
     233             :  * <a href="http://www.oracle.com/technetwork/java/index.html">
     234             :  * Java</a>. Zipios++ version 1.0 is heavily inspired by the
     235             :  * java.util.zip package. Version 2.0 is following the same
     236             :  * philosophy without (1) attempting to follow the interface one
     237             :  * to one and (2) without updating to the newer version, if there
     238             :  * were changes...
     239             :  *
     240             :  * You will find a text file in the doc directory named zip-format.txt
     241             :  * with a complete description of the zip file format as of October 1, 2014.
     242             :  *
     243             :  * \section bugs Bugs
     244             :  *
     245             :  * Submit bug reports and patches on:
     246             :  *
     247             :  *   http://sourceforge.net/projects/zipios/
     248             :  *
     249             :  * \htmlonly
     250             :  * Project hosted by <a href="http://sourceforge.net">
     251             :  * <img style="vertical-align: middle;" src="http://sourceforge.net/sflogo.php?group_id=5418&type=1">
     252             :  * </a>
     253             :  * \endhtmlonly
     254             :  */
     255             : 
     256             : 
     257             : /** \class ZipFile
     258             :  * \brief The ZipFile class represents a collection of files.
     259             :  *
     260             :  * ZipFile is a FileCollection, where the files are stored
     261             :  * in a .zip file.
     262             :  */
     263             : 
     264             : 
     265             : 
     266             : /** \brief Open a zip archive that was previously appened to another file.
     267             :  *
     268             :  * Opens a Zip archive embedded in another file, by writing the zip
     269             :  * archive to the end of the file followed by the start offset of
     270             :  * the zip file on 4 bytes. The offset must be written in zip-file
     271             :  * byte-order (little endian).
     272             :  *
     273             :  * The program appendzip, which is part of the Zipios++ distribution can
     274             :  * be used to append a Zip archive to a file, e.g. a binary program.
     275             :  *
     276             :  * The function may throw various exception if the named file does not
     277             :  * seem to include a valid zip archive attached.
     278             :  *
     279             :  * \note
     280             :  * Only one file can be appended and opened in this way. Although
     281             :  * the appendzip tool can be used to append any number of files,
     282             :  * only the last one is accessible.
     283             :  *
     284             :  * \return A ZipFile that one can use to read compressed data.
     285             :  */
     286           0 : ZipFile::pointer_t ZipFile::openEmbeddedZipFile(std::string const& name)
     287             : {
     288             :     // open zipfile, read 4 last bytes close file
     289             :     // create ZipFile object.
     290             :     uint32_t start_offset;
     291             :     {
     292           0 :         std::ifstream ifs(name, std::ios::in | std::ios::binary);
     293           0 :         ifs.seekg(-4, std::ios::end);
     294           0 :         zipRead(ifs, start_offset);
     295             :         // TODO: add support for 64 bit (files of more than 4Gb)
     296             :     }
     297           0 :     return ZipFile::pointer_t(new ZipFile(name, start_offset, 4));
     298             : }
     299             : 
     300             : 
     301             : /** \brief Initialize a ZipFile object.
     302             :  *
     303             :  * This is the default constructor of the ZipFile object.
     304             :  *
     305             :  * Note that an empty ZipFile is marked as invalid. More or less, such
     306             :  * an object is useless although it is useful to have this constructor
     307             :  * if you want to work with maps or vectors of ZipFile objects.
     308             :  */
     309           1 : ZipFile::ZipFile()
     310             :     //: m_vs(...) -- auto-init
     311             : {
     312           1 : }
     313             : 
     314             : 
     315             : /** \brief Initialize a ZipFile object from an input file.
     316             :  *
     317             :  * This constructor opens the named zip file. If the zip "file" is
     318             :  * embedded in a file that contains other data, e.g. a binary
     319             :  * program, the offset of the zip file start and end must be
     320             :  * specified.
     321             :  *
     322             :  * If the file cannot be opened or the Zip directory cannot
     323             :  * be read, then the constructor throws an exception.
     324             :  *
     325             :  * \param[in] filename  The filename of the zip file to open.
     326             :  * \param[in] s_off  Offset relative to the start of the file, that
     327             :  *                   indicates the beginning of the zip data in the file.
     328             :  * \param[in] e_off  Offset relative to the end of the file, that
     329             :  *                   indicates the end of the zip data in the file.
     330             :  *                   The offset is a positive number, even though the
     331             :  *                   offset is towards the beginning of the file.
     332             :  */
     333         369 : ZipFile::ZipFile(std::string const& filename, offset_t s_off, offset_t e_off)
     334             :     : FileCollection(filename)
     335         453 :     , m_vs(s_off, e_off)
     336             : {
     337         369 :     std::ifstream zipfile(m_filename, std::ios::in | std::ios::binary);
     338         369 :     if(!zipfile)
     339             :     {
     340           1 :         throw IOException("Error opening Zip archive file for reading in binary mode.");
     341             :     }
     342             : 
     343             :     // Find and read the End of Central Directory.
     344         736 :     ZipEndOfCentralDirectory eocd;
     345             :     {
     346         368 :         BackBuffer bb(zipfile, m_vs);
     347         368 :         ssize_t read_p(-1);
     348             :         for(;;)
     349             :         {
     350        9211 :             if(read_p < 0)
     351             :             {
     352         391 :                 if(!bb.readChunk(read_p))
     353             :                 {
     354          23 :                     throw FileCollectionException("Unable to find zip structure: End-of-central-directory");
     355             :                 }
     356             :             }
     357             :             // Note: this is pretty fast since it reads from 'bb' which
     358             :             //       caches the buffer the readChunk() function just read.
     359             :             //
     360        9188 :             if(eocd.read(bb, read_p))
     361             :             {
     362             :                 // found it!
     363         325 :                 break;
     364             :             }
     365        8843 :             --read_p;
     366         368 :         }
     367             :     }
     368             : 
     369             :     // Position read pointer to start of first entry in central dir.
     370         325 :     m_vs.vseekg(zipfile, eocd.getOffset(), std::ios::beg);
     371             : 
     372             :     // TBD -- is that ", 0" still necessary? (With VC2012 and better)
     373             :     // Give the second argument in the next line to keep Visual C++ quiet
     374             :     //m_entries.resize(eocd.totalCount(), 0);
     375         325 :     m_entries.resize(eocd.getCount());
     376             : 
     377         325 :     size_t const max_entry(eocd.getCount());
     378       69727 :     for(size_t entry_num(0); entry_num < max_entry; ++entry_num)
     379             :     {
     380       69412 :         m_entries[entry_num] = FileEntry::pointer_t(new ZipCentralDirectoryEntry);
     381       69412 :         m_entries[entry_num].get()->read(zipfile);
     382             :     }
     383             : 
     384             :     // Consistency check #1:
     385             :     // The virtual seeker position is exactly the start offset of the
     386             :     // Central Directory plus the Central Directory size
     387             :     //
     388         315 :     offset_t const pos(m_vs.vtellg(zipfile));
     389         315 :     if(static_cast<offset_t>(eocd.getOffset() + eocd.getCentralDirectorySize()) != pos)
     390             :     {
     391          10 :         throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
     392             :     }
     393             : 
     394             :     // Consistency check #2:
     395             :     // Are local headers consistent with CD headers?
     396             :     //
     397       69677 :     for(auto it = m_entries.begin(); it != m_entries.end(); ++it)
     398             :     {
     399             :         /** \TODO
     400             :          * Make sure the entry offset is properly defined by ZipCentralDirectoryEntry.
     401             :          * Also the isEqual() is a quite advance test here!
     402             :          */
     403       69392 :         m_vs.vseekg(zipfile, (*it)->getEntryOffset(), std::ios::beg);
     404       69392 :         ZipLocalEntry zlh;
     405       69392 :         zlh.read(zipfile);
     406       69382 :         if(!zipfile || !zlh.isEqual(**it))
     407             :         {
     408          10 :             throw FileCollectionException("Zip file consistency problem. Zip file data fields are inconsistent with zip file layout.");
     409             :         }
     410       69392 :     }
     411             : 
     412             :     // we are all good!
     413         654 :     m_valid = true;
     414         285 : }
     415             : 
     416             : 
     417             : /** \brief Create a clone of this ZipFile.
     418             :  *
     419             :  * This function creates a heap allocated clone of the ZipFile object.
     420             :  *
     421             :  * \return A shared pointer to a copy of this ZipFile object.
     422             :  */
     423           1 : FileCollection::pointer_t ZipFile::clone() const
     424             : {
     425           1 :     return FileCollection::pointer_t(new ZipFile(*this));
     426             : }
     427             : 
     428             : 
     429             : /** \brief Clean up the ZipFile object.
     430             :  *
     431             :  * The destructor ensures that any ZipFile data gets flushed
     432             :  * out before returning.
     433             :  */
     434         575 : ZipFile::~ZipFile()
     435             : {
     436         287 :     close();
     437         288 : }
     438             : 
     439             : 
     440             : /** \brief Retrieve a pointer to a file in the Zip archive.
     441             :  *
     442             :  * This function returns a shared pointer to an istream defined from the
     443             :  * named entry, which gives you access to the corresponding file defined
     444             :  * in the Zip archive.
     445             :  *
     446             :  * The function returns nullptr if there is no entry with the
     447             :  * specified name in this ZipFile.
     448             :  *
     449             :  * Note that the function returns a smart pointer to an istream. The
     450             :  * ZipFile class does not hold that pointer meaning that
     451             :  * if you call getInputStream() multiple times with the same
     452             :  * \p entry_name parameter, you get different istream instance each
     453             :  * time.
     454             :  *
     455             :  * By default the \p entry_name parameter is expected to match the full
     456             :  * path and filename (MatchPath::MATCH). If you are looking for a file
     457             :  * and want to ignore the path, set the matchpath parameter
     458             :  * to MatchPath::IGNORE.
     459             :  *
     460             :  * \note
     461             :  * If the file is compressed inside the Zip archive, this input stream
     462             :  * returns the uncompressed data transparently to you (outside of the
     463             :  * time it takes to decompress the data, of course.)
     464             :  *
     465             :  * \param[in] entry_name  The name of the file to search in the collection.
     466             :  * \param[in] matchpath  Whether the full path or just the filename is matched.
     467             :  *
     468             :  * \return A shared pointer to an open istream for the specified entry.
     469             :  *
     470             :  * \sa CollectionCollection
     471             :  * \sa DirectoryCollection
     472             :  * \sa FileCollection
     473             :  */
     474       61607 : ZipFile::stream_pointer_t ZipFile::getInputStream(std::string const& entry_name, MatchPath matchpath)
     475             : {
     476       61607 :     mustBeValid();
     477             : 
     478       61607 :     FileEntry::pointer_t entry(getEntry(entry_name, matchpath));
     479       61607 :     if(entry)
     480             :     {
     481       61117 :         stream_pointer_t zis(new ZipInputStream(m_filename, entry->getEntryOffset() + m_vs.startOffset()));
     482       61077 :         return zis;
     483             :     }
     484             : 
     485             :     // no entry with that name (and match) available
     486         530 :     return nullptr;
     487             : }
     488             : 
     489             : 
     490             : /** \brief Create a Zip archive from the specified FileCollection.
     491             :  *
     492             :  * This function is expected to be used with a DirectoryCollection
     493             :  * that you created to save the collection in an archive.
     494             :  *
     495             :  * \param[in,out] os  The output stream where the Zip archive is saed.
     496             :  * \param[in] collection  The collection to save in this output stream.
     497             :  * \param[in] zip_comment  The global comment of the Zip archive.
     498             :  */
     499         256 : void ZipFile::saveCollectionToArchive(std::ostream & os, FileCollection & collection, std::string const & zip_comment)
     500             : {
     501             :     try
     502             :     {
     503         256 :         ZipOutputStream output_stream(os);
     504             : 
     505         256 :         output_stream.setComment(zip_comment);
     506             : 
     507         512 :         FileEntry::vector_t entries(collection.entries());
     508      133321 :         for(auto it(entries.begin()); it != entries.end(); ++it)
     509             :         {
     510      133067 :             output_stream.putNextEntry(*it);
     511             :             // get an InputStream if available (i.e. directories do not have an input stream)
     512      133065 :             if(!(*it)->isDirectory())
     513             :             {
     514      127032 :                 FileCollection::stream_pointer_t is(collection.getInputStream((*it)->getName()));
     515      127032 :                 if(is)
     516             :                 {
     517      127032 :                     output_stream << is->rdbuf();
     518      127032 :                 }
     519             :             }
     520             :         }
     521             : 
     522             :         // clean up mantually so we can get any exception
     523             :         // (so we avoid having exceptions gobbled by the destructor)
     524         255 :         output_stream.closeEntry();
     525         255 :         output_stream.finish();
     526         508 :         output_stream.close();
     527             :     }
     528           4 :     catch(...)
     529             :     {
     530           4 :         os.setstate(std::ios::failbit);
     531           4 :         throw;
     532             :     }
     533         252 : }
     534             : 
     535             : 
     536           3 : } // zipios namespace
     537             : 
     538             : // Local Variables:
     539             : // mode: cpp
     540             : // indent-tabs-mode: nil
     541             : // c-basic-offset: 4
     542             : // tab-width: 4
     543             : // End:
     544             : 
     545             : // vim: ts=4 sw=4 et
 |