hasher.h

Go to the documentation of this file.
00001 /**
00002  *  Copyright (C) 2004-2005 Alo Sarv <madcat_@users.sourceforge.net>
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 /** \file hasher.h Interface for Files Checksumming Subsystem */
00020 
00021 #ifndef __HASHER_H__
00022 #define __HASHER_H__
00023 
00024 #include <hn/osdep.h>
00025 #include <hn/hnfwd.h>
00026 #include <hn/event.h>
00027 #include <hn/workthread.h>
00028 #include <hn/hash.h>
00029 #include <boost/enable_shared_from_this.hpp>
00030 #include <boost/filesystem/path.hpp>
00031 
00032 /**
00033  * Represents a job entry to be performed by WorkThread. Always wrap this class
00034  * into boost::shared_ptr<>, because it is a 'loose' object - it is not
00035  * contained anywhere, however needs to stay alive through various different
00036  * containers/classes. As such, there is no specific 'destroyer' assigned
00037  * to this class - it will need to be cleaned up once nobody really needs
00038  * it anymore. The path through which this object goes is generally this:
00039  *
00040  * - Client code creates an object, and submits it to WorkThread.
00041  * - WorkThread inserts it into its local queue of pending jobs.
00042  * - When time comes, hashing thread takes it out from the queue and performs
00043  *   the work.
00044  * - When the work is completed, an event is emitted from the work.
00045  * - If client code handles it, it can retrieve the results, after which the
00046  *   object gets auto-destructed, since its not contained anywhere, and the
00047  *   last copy of the object was sent to the client.
00048  * - If the client does not handle the event, the object gets autodestructed,
00049  *   since the last place it was stored was in event table queue, and after
00050  *   calling all handlers for the object, the object is removed from the queue.
00051  */
00052 class DLLEXPORT HashWork :
00053         public ThreadWork,
00054         public boost::enable_shared_from_this<HashWork>
00055 {
00056 public:
00057         DECLARE_EVENT_TABLE(boost::shared_ptr<HashWork>, HashEvent);
00058 
00059         /**
00060          * Constructor for full hash work.
00061          *
00062          * @param filename      Full path to file to be hashed.
00063          *
00064          * Submitting this job to Hasher will result in HASH_COMPLETE or
00065          * HASH_FATAL_ERROR events to be submitted when the job is completed,
00066          * as well as the resulting data being submitted to MetaDb.
00067          */
00068         HashWork(const boost::filesystem::path &filename);
00069 
00070         /**
00071          * Construct a range hash work.
00072          *
00073          * @param filename      Full path to file to be hashed.
00074          * @param begin         Begin location to begin hashing.
00075          * @param end           End location until what to hash.
00076          * @param ref           Reference hash to check against.
00077          *
00078          * Submitting this job to Hasher will result in HASH_VERIFIED or
00079          * HASH_FAILED event being submitted when the job is completed.
00080          */
00081         HashWork(
00082                 const boost::filesystem::path &filename,
00083                 uint64_t begin, uint64_t end, const HashBase *ref
00084         );
00085 
00086         //! Destructor
00087         ~HashWork();
00088 
00089         //! @name Accessors
00090         //! \note We can't have these as const since scoped_lock is non-const
00091         //@{
00092         //! Whether this job is a full job.
00093         bool isFull() {
00094                 boost::mutex::scoped_lock l(m_lock);
00095                 return m_full;
00096         }
00097         //! In case of partial job, retrieves job range begin
00098         uint64_t begin() {
00099                 boost::mutex::scoped_lock l(m_lock);
00100                 return m_begin;
00101         }
00102         //! In case of partial job, retrieves job range end
00103         uint64_t end() {
00104                 boost::mutex::scoped_lock l(m_lock);
00105                 return m_end;
00106         }
00107         //! Retrieves file name to be hashed.
00108         boost::filesystem::path getFileName() {
00109                 boost::mutex::scoped_lock l(m_lock);
00110                 return m_filename;
00111         }
00112         //! Retrieves type of hash to be generated in case of range verification
00113         CGComm::HashTypeId getType() {
00114                 boost::mutex::scoped_lock l(m_lock);
00115                 CHECK_THROW(m_ref);
00116                 return m_ref->getTypeId();
00117         }
00118         //! In case of range hash work, retrieves reference/control hash
00119         const HashBase* getRef() {
00120                 boost::mutex::scoped_lock l(m_lock);
00121                 return m_ref;
00122         }
00123         //! Retrieves metaData pointer (filled after full hash job)
00124         MetaData* getMetaData() {
00125                 boost::mutex::scoped_lock l(m_lock);
00126                 return m_md;
00127         }
00128         //! Check if this job is valid, e.g. still needed to be performed. This
00129         //! is needed to make sure that while the job was waiting in the queue,
00130         //! it hasn't become invalid.
00131         bool isValid() {
00132                 boost::mutex::scoped_lock l(m_lock);
00133                 return m_valid;
00134         }
00135         //! This method should be called by the original job poster to abort
00136         //! this job, and remove from pending jobs queue. If the work is in
00137         //! progress already, it will also be aborted, and no results posted.
00138         void invalidate() {
00139                 boost::mutex::scoped_lock l(m_lock);
00140                 m_valid = false;
00141         }
00142         //@}
00143 
00144         //! For implementation use only - set metadata
00145         void setMetaData(MetaData *md) {
00146                 boost::mutex::scoped_lock l(m_lock);
00147                 m_md = md;
00148         }
00149 
00150         static uint64_t getHashed() { return s_dataCnt; }
00151         static double   getTime()   { return s_timeCnt; }
00152 
00153         //! Process this job
00154         virtual bool process();
00155 private:
00156         //! File to be hashed. Must include full path to the file.
00157         const boost::filesystem::path m_filename;
00158 
00159         //! After completing full hash job, contains full metadata about the
00160         //! file.
00161         MetaData *m_md;
00162 
00163         //! In case of range hash, this specifies range begin bytes
00164         uint64_t m_begin;
00165 
00166         //! In case of range hash, this specifies range end bytes
00167         uint64_t m_end;
00168 
00169         //! In case of range hash, this specifies reference/control hash
00170         const HashBase *m_ref;
00171 
00172         /**
00173          * \short Indicates valditiy of this job
00174          *
00175          * This variable is set to true as default, and can be set to false
00176          * using invalidate() member function. The purpose of this is to
00177          * provide a mechanism to abort hashing jobs which are no longer
00178          * wanted by the original poster.
00179          */
00180         bool m_valid;
00181 
00182         //! Whether this job is a "full" hash job
00183         bool m_full;
00184 
00185         /**
00186          * Protects all members of this object. This lock must be aquired
00187          * before touching anything in this object.
00188          */
00189         boost::mutex m_lock;
00190 
00191         void initState();
00192         void doProcess();
00193         void finish();
00194 
00195         int m_file;
00196         boost::scoped_array<char> m_buf;
00197         std::vector<boost::shared_ptr<HashSetMaker> > m_makers;
00198         bool m_inProgress;
00199         static uint64_t s_dataCnt;
00200         static double   s_timeCnt;
00201         static boost::mutex s_statsLock;
00202 };
00203 
00204 #endif