hasher.cpp

Go to the documentation of this file.
00001 /**
00002  *  Copyright (C) 2004-2005 Alo Sarv <madcat_@users.sourceforge.net>
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 /** \file hasher.cpp Implementation of Files Identification Subsystem */
00020 
00021 // Precompiled header
00022 #include <hn/hnprec.h>
00023 
00024 #include <hn/hasher.h>
00025 #include <hn/hashsetmaker.h>
00026 #include <hn/md4transform.h>
00027 #include <hn/md5transform.h>
00028 #include <hn/sha1transform.h>
00029 #include <hn/metadata.h>
00030 #include <boost/filesystem/path.hpp>
00031 #include <boost/filesystem/operations.hpp>
00032 #include <boost/lexical_cast.hpp>
00033 #include <fstream>
00034 #include <fcntl.h>
00035 
00036 using boost::filesystem::path;
00037 using boost::filesystem::no_check;
00038 using namespace CGComm;
00039 
00040 uint64_t HashWork::s_dataCnt = 0;
00041 double   HashWork::s_timeCnt = 0.0;
00042 boost::mutex HashWork::s_statsLock;
00043 static uint32_t s_bufSize = 32*1024;
00044 IMPLEMENT_EVENT_TABLE(HashWork, boost::shared_ptr<HashWork>, HashEvent);
00045 
00046 // Full hash job Constructor
00047 HashWork::HashWork(const boost::filesystem::path &filename)
00048 : m_filename(filename), m_md(), m_begin(), m_end(), m_ref(), m_valid(true),
00049 m_full(true), m_file(), m_inProgress() {}
00050 
00051 // Range hash verification job
00052 HashWork::HashWork(
00053         const boost::filesystem::path &filename, uint64_t begin, uint64_t end,
00054         const HashBase* ref
00055 ) : m_filename(filename), m_md(), m_begin(begin), m_end(end), m_ref(ref),
00056 m_valid(true), m_full(false), m_file(), m_inProgress() {
00057         CHECK_THROW(ref);
00058 }
00059 
00060 HashWork::~HashWork() {
00061         if (m_file) {
00062                 close(m_file);
00063         }
00064 }
00065 
00066 bool HashWork::process() try {
00067         if (!m_inProgress) {
00068                 initState();
00069         }
00070         CHECK_THROW(!isComplete());
00071 
00072         Utils::StopWatch s1;
00073         doProcess();
00074         s_timeCnt += s1.elapsed() / 1000.0;
00075 
00076         return isComplete();
00077 } catch (std::exception &e) {
00078         logError(e.what());
00079         getEventTable().postEvent(shared_from_this(), HASH_FATAL_ERROR);
00080         return true;
00081 }
00082 MSVC_ONLY(;)
00083 
00084 void HashWork::initState() {
00085         std::string fname(m_filename.native_file_string());
00086         m_file = open(fname.c_str(), O_RDONLY|O_LARGEFILE|O_BINARY);
00087 
00088         if (m_file == -1 || !boost::filesystem::exists(m_filename)) {
00089                 throw std::runtime_error(
00090                         (boost::format("Unable to open file `%s' for hashing.")
00091                         % fname).str()
00092                 );
00093         }
00094         logMsg(boost::format("Hashing file `%s'") % fname);
00095         boost::shared_ptr<HashSetMaker> t;
00096 
00097         if (isFull()) {
00098                 t.reset(new ED2KHashMaker);
00099                 m_makers.push_back(t);
00100                 t.reset(new SHA1HashMaker);
00101                 m_makers.push_back(t);
00102                 t.reset(new MD4HashMaker);
00103                 m_makers.push_back(t);
00104                 t.reset(new MD5HashMaker);
00105                 m_makers.push_back(t);
00106                 m_begin = 0;
00107                 m_end = Utils::getFileSize(fname);
00108         } else {
00109                 switch (getType()) {
00110                         case OP_HT_MD4:
00111                                 t.reset(new MD4HashMaker);
00112                                 break;
00113                         case OP_HT_MD5:
00114                                 t.reset(new MD5HashMaker);
00115                                 break;
00116                         case OP_HT_ED2K:
00117                                 t.reset(new ED2KHashMaker);
00118                                 break;
00119                         case OP_HT_SHA1:
00120                                 t.reset(new SHA1HashMaker);
00121                                 break;
00122                         default:
00123                                 boost::format fmt(
00124                                         "Requested unknown hash of type %s"
00125                                 );
00126                                 logError(fmt % m_ref->getType());
00127                                 break;
00128                 }
00129                 m_makers.push_back(t);
00130                 uint64_t ret = lseek64(m_file, m_begin, SEEK_SET);
00131                 CHECK_THROW(ret == m_begin);
00132         }
00133 
00134         m_buf.reset(new char[s_bufSize]);
00135         m_inProgress = true;
00136 }
00137 
00138 void HashWork::doProcess() {
00139         uint64_t ret = 0;
00140         uint64_t curPos = lseek64(m_file, 0L, SEEK_CUR);
00141 
00142         if (s_bufSize + curPos > m_end) {
00143                 ret = read(m_file, m_buf.get(), m_end - curPos + 1);
00144         } else {
00145                 ret = read(m_file, m_buf.get(), s_bufSize);
00146         }
00147 
00148         for (uint32_t i = 0; i < m_makers.size(); ++i) {
00149                 m_makers[i]->sumUp(m_buf.get(), ret);
00150         }
00151 
00152         boost::mutex::scoped_lock l(s_statsLock);
00153         s_dataCnt += ret;
00154         curPos = lseek64(m_file, 0L, SEEK_CUR);
00155 
00156         if (!ret || m_end + 1 == curPos) {
00157                 finish();
00158         }
00159 }
00160 
00161 void HashWork::finish() {
00162         if (isFull()) {
00163                 CHECK_THROW(m_makers.size());
00164                 std::string fname(m_filename.native_file_string());
00165                 uint64_t fileSize = Utils::getFileSize(fname);
00166                 uint32_t modDate = Utils::getModDate(fname);
00167                 m_md = new MetaData(fileSize);
00168                 m_md->setModDate(Utils::getModDate(fname));
00169                 m_md->addFileName(m_filename.leaf());
00170 
00171                 logDebug("Full hash complete:");
00172                 logDebug("FileName: " + fname);
00173                 logDebug(boost::format("FileSize: %d") % fileSize);
00174                 logDebug(boost::format("ModDate:  %d") % modDate);
00175 
00176                 for (uint32_t i = 0; i < m_makers.size(); ++i) {
00177                         HashSetBase *hs = m_makers[i]->getHashSet();
00178                         logDebug(
00179                                 boost::format("%s: %s")
00180                                 % hs->getFileHashType()
00181                                 % hs->getFileHash().decode()
00182                         );
00183                         m_md->addHashSet(hs);
00184                 }
00185 
00186                 getEventTable().postEvent(shared_from_this(), HASH_COMPLETE);
00187         } else {
00188                 CHECK_THROW(m_makers.size() == 1);
00189                 const HashBase &h = m_makers[0]->getHashSet()->getFileHash();
00190                 HashEvent evt(h == *m_ref ? HASH_VERIFIED : HASH_FAILED);
00191                 getEventTable().postEvent(shared_from_this(), evt);
00192         }
00193         setComplete();
00194         close(m_file);
00195         m_file = 0;
00196 }