partdata.h

Go to the documentation of this file.
00001 /**
00002  *  Copyright (C) 2004-2005 Alo Sarv <madcat_@users.sourceforge.net>
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 /** @file partdata.h Interface for PartData class */
00020 
00021 #ifndef __PARTDATA_H__
00022 #define __PARTDATA_H__
00023 
00024 #include <hn/osdep.h>
00025 #include <hn/hnfwd.h>
00026 #include <hn/rangelist.h>
00027 #include <hn/event.h>
00028 #include <boost/filesystem/path.hpp>
00029 #include <boost/shared_ptr.hpp>
00030 #include <boost/enable_shared_from_this.hpp>
00031 #include <map>
00032 #include <list>
00033 
00034 /**
00035  * Events emitted from PartData object
00036  */
00037 enum PDEvent {
00038         PD_ADDED = 1,   //!< Posted whenever PartData is constructed
00039         PD_DATA_ADDED,  //!< Posted whenever data is added to PartData
00040         PD_DATA_FLUSHED,//!< Posted whenever a partdata flushes buffers
00041         PD_DESTROY,     //!< Posted whenever a partdata is about to be destroyed
00042         PD_VERIFYING,   //!< When file is complete, but in verification phase
00043         PD_MOVING,      //!< When file is verified, and moving is in progress
00044         PD_COMPLETE     //!< File has been completed and placed in Incoming dir
00045 };
00046 
00047 namespace Detail {
00048         struct ChunkMap;
00049         struct AvailIter;
00050         class Chunk;
00051 }
00052 
00053 class DLLEXPORT PartData {
00054 public:
00055         DECLARE_EVENT_TABLE(PartData*, int);
00056 public:
00057         /**
00058          * \brief Construct a NEW temporary file
00059          *
00060          * Using this constructor, a new temporary file is constructed, at
00061          * specified location with specified size.
00062          *
00063          * @param size      Size of the resulting file.
00064          * @param loc       Location on disk where to store the temp file
00065          * @param dest      Destination where to write the complete file
00066          *
00067          * \note The disk space indicated by @param size is not allocated on
00068          *       actual disk right away. Instead, the size is allocated
00069          *       dynamically as the file grows. This can be changed from global
00070          *       application preferences though.
00071          */
00072         PartData(
00073                 uint64_t size,
00074                 const boost::filesystem::path &loc,
00075                 const boost::filesystem::path &dest
00076         );
00077 
00078         /**
00079          * \brief Load a previously constructed temporary file.
00080          *
00081          * This method can be used to resume a previously started download, by
00082          * reading the necessery data from @param loc.
00083          *
00084          * @param loc    Path to PartData reference file, which contains the
00085          *               data required to resume the download.
00086          */
00087         PartData(const boost::filesystem::path &loc);
00088 
00089         /**
00090          * \brief Add an availability chunk mask
00091          *
00092          * Allows modules to register chunk availability maps, so PartData
00093          * can decide the lowest-available chunk to be returned from get*
00094          * methods
00095          *
00096          * @param chunkSize     Size of one chunk
00097          * @param chunks        Boolean vector, where each true indicates
00098          *                      the source having the part, and false the source
00099          *                      not having the part.
00100          */
00101         void addSourceMask(uint32_t chunkSize, const std::vector<bool> &chunks);
00102 
00103         /**
00104          * \brief Optimized version of addSourceMask(), adds a full source.
00105          *
00106          * Similar to addSourceMask(), this adds a source which has the entire
00107          * file.
00108          *
00109          * @param chunkSize    Size of one chunk
00110          */
00111         void addFullSource(uint32_t chunkSize);
00112 
00113         /**
00114          * \brief Remove an availability chunk mask
00115          * \see addSourceMask
00116          */
00117         void delSourceMask(uint32_t chunkSize, const std::vector<bool> &chunks);
00118 
00119         /**
00120          * \brief Remove a full source mask
00121          * \see addFullSource
00122          */
00123         void delFullSource(uint32_t chunkSize);
00124 
00125         /**
00126          * \brief Locates a range that PartData considers most important.
00127          *
00128          * The current implementation considers partially completed ranges top
00129          * priority, followed by rarest chunks, and then lowest used chunks.
00130          *
00131          * @param size      Optional, size of the range to be aquired.
00132          * @return          Pointer to a range marked as 'used'.
00133          *
00134          * \note The size of the given range my be smaller than was requested.
00135          * \note Current implementation ignores the size parameter.
00136          * \throws PartData::RangeError if no usable range could be found.
00137          */
00138         Detail::UsedRangePtr getRange(uint32_t size = 0);
00139 
00140         /**
00141          * \brief Locates a range which is also contained in passed chunkmap.
00142          *
00143          * This method restricts getRange() call to only chunks which are
00144          * indicated by a true value in the passed chunkmap (e.g. partial
00145          * sources).
00146          *
00147          * @param size      Size of a chunk in the chunkmap
00148          * @param chunks    The chunks the source has
00149          * @return          Pointer to a range marked as 'used'.
00150          *
00151          * \throws PartData::RangeError if no usable range could be found.
00152          */
00153         Detail::UsedRangePtr getRange(
00154                 uint32_t size, const std::vector<bool> &chunks
00155         );
00156 
00157         /**
00158          * \brief Simply writes data starting at specified offset.
00159          *
00160          * Checks will be performed to ensure the validity of the location
00161          * and that it's not already complete or locked.
00162          */
00163         void write(uint64_t beginOffset, const std::string &data);
00164 
00165         /**
00166          * \name Check for completeness.
00167          * Methods to check whether the entire file, or a part of it, is
00168          * complete.
00169          */
00170         //!@{
00171         bool isComplete() const;
00172         bool isComplete(const Range64 &subRange) const;
00173         bool isComplete(uint64_t begin, uint64_t end) const;
00174         //!@}
00175         /**
00176          * \name Generic accessors
00177          */
00178         //!@{
00179         uint32_t  getChunkCount(uint32_t chunkSize) const;
00180         MetaData* getMetaData() const { return m_md; }
00181         void      setMetaData(MetaData *md);
00182         void      setDestination(const boost::filesystem::path &p) { m_dest=p; }
00183         uint64_t  getSize() const { return m_size; }
00184         boost::filesystem::path getLocation() const { return m_loc; }
00185         boost::filesystem::path getDestination() const { return m_dest; }
00186         uint64_t  getCompleted() const; //!< Returns number of bytes completed
00187         //! Returns number of known sources
00188         uint32_t  getSourceCnt() const { return m_sourceCnt + m_fullSourceCnt; }
00189         //! Returns number of full sources
00190         uint32_t  getFullSourceCnt() const { return m_fullSourceCnt; }
00191         std::vector<bool> getPartStatus(uint32_t chunkSize) const {
00192                 CHECK_THROW(m_partStatus.find(chunkSize) != m_partStatus.end());
00193                 return (*m_partStatus.find(chunkSize)).second;
00194         }
00195         //!@}
00196 
00197         /**
00198          * Saves the current state of this file to m_loc.dat file.
00199          */
00200         void save();
00201 
00202         /**
00203          * \brief Adds a hashset with chunkhashes which to test downloaded data
00204          * against.
00205          *
00206          * @param hs      Hashset
00207          *
00208          * \pre hs->getChunkCount() > 0
00209          * \pre hs->getPartSize() > 0
00210          */
00211         void addHashSet(const HashSetBase *hs);
00212 
00213         /**
00214          * \brief Cancels this download, discarding ALL downloaded data.
00215          */
00216         void cancelDownload();
00217 
00218         //! Exception class
00219         struct DLLEXPORT LockError : public std::runtime_error {
00220                 LockError(const std::string&);
00221         };
00222         //! Exception class
00223         struct DLLEXPORT RangeError : public std::runtime_error {
00224                 RangeError(const std::string&);
00225         };
00226 
00227         //! Output operator to streams
00228         friend std::ostream& operator<<(std::ostream &o, const PartData &p);
00229 private:
00230         friend class SharedFile;
00231         friend class FilesList;
00232         friend class Detail::UsedRange;
00233         friend class Detail::LockedRange;
00234         friend class Detail::Chunk;
00235 
00236         friend int test_main(int, char[]);
00237 
00238         //! Copying part files is not allowed
00239         PartData(const PartData&);
00240         PartData& operator=(const PartData&);
00241 
00242         //! Only allowed by SharedFile
00243         ~PartData();
00244 
00245         /**
00246          * \short Aquire lock on a subrange of UsedRange.
00247          *
00248          * @param r      UsedRange to aquire lock within
00249          * @param size   Upper limit on the size of the LockedRange requested.
00250          * @return       Locked range
00251          *
00252          * \throws PartData::LockError if no lock could be aquired.
00253          */
00254         Detail::LockedRangePtr getLock(Detail::UsedRangePtr r, uint32_t size);
00255 
00256         /**
00257          * \name Implementation functions
00258          */
00259         //!@{
00260         void checkAddChunkMap(uint32_t chunkSize);
00261         template<typename Predicate>
00262         Detail::UsedRangePtr doGetRange(uint64_t size, Predicate &pred);
00263         void doWrite(uint64_t begin, const std::string &data);
00264         void flushBuffer();
00265         void rehashCompleted();
00266         void onHashEvent(HashWorkPtr p, HashEvent evt);
00267         boost::logic::tribool verifyHashSet(const HashSetBase *hs);
00268         void doComplete();
00269         void destroy(); // emits PD_DESTROY Event
00270         void deleteFiles(); // delete physical files refering to this temp file
00271         //!@}
00272 
00273         /**
00274          * \name Main rangelists.
00275          *
00276          * These lists are exclusive, no Range may exist simultaneously in
00277          * multiple of these lists. Also, none of these lists may contain
00278          * overlapping ranges.
00279          */
00280         //! @{
00281         RangeList64 m_complete;  //!< Complete ranges
00282         RangeList64 m_locked;    //!< Locked ranges
00283         RangeList64 m_corrupt;   //!< Corrupt ranges
00284         //! @}
00285 
00286         /**
00287          * \name Implementation data members
00288          */
00289         //!@{
00290         boost::scoped_ptr<Detail::ChunkMap> m_chunks;
00291         uint64_t m_size;
00292         boost::filesystem::path m_loc;
00293         boost::filesystem::path m_dest;
00294         std::map<uint64_t, std::string> m_buffer;
00295         typedef std::map<uint64_t, std::string>::iterator BIter;
00296         uint32_t m_toFlush;
00297         MetaData *m_md;
00298         uint16_t m_pendingHashes;
00299         //! Pointer to full rehash job (if any) in progress, used for canceling
00300         //! full rehash in case a chunkhash fails while this is in progress.
00301         HashWorkPtr m_fullJob;
00302         uint32_t m_sourceCnt;
00303         uint32_t m_fullSourceCnt;
00304         //! Caches complete chunks boolmaps, for faster usage by modules
00305         std::map<uint32_t, std::vector<bool> > m_partStatus;
00306         //!}
00307 public:
00308         //! For testing purposes only
00309         void printCompleted();
00310 };
00311 
00312 namespace Detail {
00313         /**
00314          * \brief Range marked as "in use".
00315          *
00316          * UsedRange concept is similar to many thread libraries lock object
00317          * concepts - you retrieve one via get() methods in PartData, and when
00318          * it is destroyed, it takes care that all used/locked ranges do get
00319          * freed properly. This object may only be used when wrapped in
00320          * boost::shared_ptr.
00321          *
00322          * \note There may be multiple UsedRange's refering to same Chunk.
00323          *       This is indicated by m_useCnt member of Chunk class.
00324          */
00325         class DLLEXPORT UsedRange :
00326                 public Range64,
00327                 public boost::enable_shared_from_this<UsedRange>
00328         {
00329         public:
00330                 /**
00331                  * \brief Aquire a lock within this UsedRange
00332                  *
00333                  * @param size     Size of the requested lock
00334                  * @return         Locked range object
00335                  *
00336                  * \note The returned locked range may be smaller than requested
00337                  * \throws PartData::LockError if locking fails for any reason
00338                  */
00339                 boost::shared_ptr<LockedRange> getLock(uint32_t size);
00340 
00341                 /**
00342                  * Check this UsedRange for completeness
00343                  */
00344                 bool isComplete() const;
00345 
00346                 //! Destructor public for boost::checked_deleter
00347                 ~UsedRange();
00348         private:
00349                 friend class ::PartData;
00350 
00351                 /**
00352                  * \brief Constructor
00353                  *
00354                  * Allowed only by PartData. UsedRange keeps a pointer back to
00355                  * its parent object, and also sets up event handers as
00356                  * neccesery to ensure the pointer remains valid.
00357                  *
00358                  * \note The template may be left undefined here since it's
00359                  *       only called from inside partdata.cpp
00360                  */
00361                 template<typename IterType>
00362                 UsedRange(PartData *parent, IterType it);
00363 
00364                 /**
00365                  * Constructs new UsedRange with specified begin and end
00366                  * offsets, w/o associating it with any specific chunk.
00367                  */
00368                 UsedRange(PartData *parent, uint64_t begin, uint64_t end);
00369 
00370                 //! copying is not allowed
00371                 UsedRange(const UsedRange&);
00372                 UsedRange& operator=(const UsedRange&);
00373 
00374                 //! Parent PartData
00375                 PartData *m_parent;
00376 
00377                 //! Chunk this UsedRange refers to
00378                 boost::scoped_ptr<Detail::AvailIter> m_chunk;
00379         };
00380 
00381         /**
00382          * \brief LockedRange object is an exclusivly locked Range in PartData.
00383          *
00384          * The lock is aquired upon call to UsedRange::getLock() method, which
00385          * constructs the lock object and returns to client code. The indicated
00386          * range in PartData is then exclusivly locked, with this object being
00387          * the only one allowed to access the locked region of the file. Upon
00388          * the destruction of this object, the lock is automatically freed.
00389          */
00390         class DLLEXPORT LockedRange : public Range64 {
00391         public:
00392                 //! Destructor public for boost::checked_deleter
00393                 ~LockedRange();
00394 
00395                 /**
00396                  * \brief Write data to within this locked region.
00397                  *
00398                  * @param beginOffset     Begin offset where to write data
00399                  * @param data            Data to be written
00400                  *
00401                  * \throws LockError If attempting to write outside this lock
00402                  */
00403                 void write(uint64_t beginOffset, const std::string &data);
00404 
00405                 //! Check if this range is complete
00406                 bool isComplete() const { return m_parent->isComplete(*this); }
00407         private:
00408                 friend class ::PartData;
00409 
00410                 /**
00411                  * \brief Construct new Lock
00412                  *
00413                  * @param parent      PartData object this lock belongs to
00414                  * @param r           Range to be locked
00415                  */
00416                 LockedRange(PartData *parent, Range64 r);
00417 
00418                 /**
00419                  * \brief Construct new lock and associate with chunk
00420                  *
00421                  * @param parent      PartData object this lock belongs to
00422                  * @param r           Range to be locked
00423                  * @param it          Iterator to chunk the lock belongs to
00424                  */
00425                 LockedRange(PartData *parent, Range64 r, Detail::AvailIter &it);
00426 
00427                 //! Copying locks is forbidden
00428                 LockedRange(const LockedRange&);
00429                 //! Copying locks is forbidden
00430                 LockedRange& operator=(const LockedRange&);
00431 
00432                 //!< Parent file
00433                 PartData *m_parent;
00434 
00435                 //! The chunk containing this LockedRange. May be invalid.
00436                 boost::scoped_ptr<Detail::AvailIter> m_chunk;
00437         };
00438 }
00439 
00440 #endif