metadb.h

Go to the documentation of this file.
00001 /**
00002  *  Copyright (C) 2004-2005 Alo Sarv <madcat_@users.sourceforge.net>
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 /** @file metadb.h Interface for MetaDb class */
00020 
00021 #ifndef __METADB_H__
00022 #define __METADB_H__
00023 
00024 #include <hn/osdep.h>
00025 #include <hn/hash.h>
00026 
00027 //! For trace logging
00028 #define METADB 1030
00029 #define METADB_STR "MetaDb"
00030 
00031 class SharedFile;
00032 class MetaData;
00033 
00034 /**
00035  * MetaDb class is container for all Meta Data handled by this application.
00036  * It provides a number of lookup functions as well as addition functions
00037  * publically. Internally, it stores various cross-referenced lists to
00038  * provide fast lookups. The contents of this class are stored in metadb.xml,
00039  * and loaded on each runtime.
00040  *
00041  * The purpose of this class is to provide a application-central database
00042  * through which it is possible to perform cross-referencing between FilesList
00043  * and this list.
00044  */
00045 class DLLEXPORT MetaDb {
00046 public:
00047         /**
00048          * This class is a Singleton. The only instance of this class may
00049          * be retrieved through this function. Note that the very first call
00050          * to this function also initializes this class.
00051          *
00052          * @return The only instance of this class
00053          */
00054         static MetaDb& instance();
00055 
00056         /**
00057          * Load the MetaDb contents from input stream, adding all found entries
00058          * in the stream to the database, merging duplicate entries.
00059          *
00060          * @param is       Input stream to read from.
00061          */
00062         void load(std::istream &is);
00063 
00064         /**
00065          * Write the contents of the database into output stream.
00066          *
00067          * @param os       Output stream to write to.
00068          */
00069         void save(std::ostream &os) const;
00070 
00071         /**
00072          * Add a metadata object to the database.
00073          *
00074          * @param md       MetaData pointer to be added to the database.
00075          */
00076         void push(MetaData *md);
00077 
00078         /**
00079          * Add a metadata object to the database, associating it with a
00080          * SharedFile.
00081          *
00082          * @param md       MetaData pointer to be added to the database.
00083          * @param id       SharedFile to associate with this data.
00084          */
00085         void push(MetaData *md, SharedFile *sf);
00086 
00087         /**
00088          * Find Metadata by searching with hash
00089          *
00090          * @param h        Reference to HashBase object to be searched for.
00091          * @return         Pointer to the MetaData object, or 0 if not found.
00092          */
00093         MetaData* find(const HashBase &h) const;
00094 
00095         /**
00096          * Find MetaData by searching with file name. Note that this function
00097          * may return any number of MetaData objects in case the filename is
00098          * ambigious.
00099          *
00100          * @param filename Name of the file to search for
00101          * @return         Vector containing all found entries. May be empty.
00102          */
00103         std::vector<MetaData*> find(const std::string &filename) const;
00104 
00105         /**
00106          * Find MetaData by searching with SharedFile
00107          *
00108          * @param id       SharedFile to search with
00109          * @return         Pointer to found MetaData object, or 0 if not found.
00110          */
00111         MetaData* find(SharedFile *sf) const;
00112 
00113         /**
00114          * Locate SharedFile by searching with hash
00115          *
00116          * @param h        Hash to search for
00117          * @return         The file being searched for, or 0 if not found.
00118          */
00119         SharedFile* findSharedFile(const HashBase &h) const;
00120 
00121         /**
00122          * Locate SharedFile by searching with file name. Note that this
00123          * function may return any number of SharedFiles if the file name is
00124          * ambigious.
00125          *
00126          * @param filename   Name of file to search for
00127          * @return           Vector containing all found entries. May be empty.
00128          */
00129         std::vector<SharedFile*> findSharedFile(
00130                 const std::string &filename
00131         ) const;
00132 
00133 private:
00134         /**
00135          * @name Singleton
00136          */
00137         //@{
00138         MetaDb();
00139         MetaDb(MetaDb &);
00140         MetaDb& operator=(const MetaDb&);
00141         ~MetaDb();
00142         //@}
00143 
00144         /**
00145          * HashWrapper structure acts as a container for HashBase pointers in
00146          * order to allow us to have a map of those keyed by the Hash.
00147          */
00148         class HashWrapper {
00149         public:
00150                 HashWrapper(const HashBase *obj) : m_object(obj) {}
00151                 const HashBase *const m_object;           //!< Contained object
00152 
00153                 //! Operator to work with std containers
00154                 friend bool operator<(
00155                         const HashWrapper &x, const HashWrapper &y
00156                 ) {
00157                         return (*x.m_object) < (*y.m_object);
00158                 }
00159                 friend std::ostream& operator<<(
00160                         std::ostream &o, const HashWrapper &hw
00161                 ) {
00162                         return o << (*hw.m_object);
00163                 }
00164                 bool operator==(const HashWrapper &h) const {
00165                         return *m_object == *h.m_object;
00166                 }
00167                 bool operator!=(const HashWrapper &h) const {
00168                         return *m_object != *h.m_object;
00169                 }
00170         private:
00171                 HashWrapper();
00172         };
00173 
00174         /**
00175          * Primary List
00176          * ------------
00177          * Pure metadata objects. We really cannot rely on any data existing in
00178          * the contained MetaData pointer, so we can't turn this into some kind
00179          * of map or anything. However, other lists contain iterators to this
00180          * list for faster access - iterate directly on this list only as last
00181          * resort. This list is also used to save in metadata.xml file, and is
00182          * also ready for publishing to outside world.
00183          */
00184         std::set<MetaData*> m_list;
00185         //! List Iterator
00186         typedef std::set<MetaData*>::iterator LIter;
00187         //! Constant List Iterator
00188         typedef std::set<MetaData*>::const_iterator CLIter;
00189 
00190         /**
00191          * SharedFile To MetaData Map
00192          * ----------------------
00193          * SharedFile* keyed map to locate MetaData objects by searching with
00194          * SharedFile pointer. This list is populated by FilesList (and
00195          * contained) classes, and generally contains only part of the entire
00196          * m_list contents - only entries which are also in FilesList are
00197          * listed here.
00198          */
00199         std::map<SharedFile*, MetaData*> m_sfToMd;
00200         //! SharedFile to MetaData Iterator
00201         typedef std::map<SharedFile*, MetaData*>::iterator SFMDIter;
00202         //! Constant SharedFile to MetaData Iterator
00203         typedef std::map<SharedFile*, MetaData*>::const_iterator CSFMDIter;
00204 
00205         /**
00206          * File Name to MetaData Map
00207          * -------------------------
00208          * This map allows finding MetaData knowing file name. The file name
00209          * key here is only that - file name. No paths may be included in the
00210          * name. Multiple objects may have the same file name - for this
00211          * reason, this is a multi-map.
00212          */
00213         std::multimap<std::string, MetaData*> m_filenames;
00214         //! File Name Iterator
00215         typedef std::multimap<std::string, MetaData*>::iterator FNIter;
00216         //! Constant File Name Iterator
00217         typedef std::multimap<std::string, MetaData*>::const_iterator CFNIter;
00218 
00219         /**
00220          * File Name to SharedFile Map
00221          * ------------------------
00222          * This map allows searching with file name and locating the
00223          * corresponding SharedFile's. Note that this is a multimap - same file
00224          * name may point to several SharedFiles.
00225          */
00226         std::multimap<std::string, SharedFile*> m_nameToSF;
00227         //! Name To SharedFile Iterator
00228         typedef std::multimap<std::string, SharedFile*>::iterator NTSFIter;
00229         //! Constant Name To SharedFile Iterator
00230         typedef std::multimap<
00231                 std::string, SharedFile*>
00232         ::const_iterator CNTSFIter;
00233 
00234         /**
00235          * Hash To SharedFile Map
00236          * ----------------------
00237          * A dual map which allows us to locate SharedFiles given a hash. The
00238          * outer map is keyed on HashTypeId, so there are generally only 5-6
00239          * entries there. For each of the outer maps, there's an inner map,
00240          * containing hashes of the given type. Lookups in the inner map return
00241          * the SharedFile of the searched entry.
00242          */
00243         std::map<
00244                 CGComm::HashTypeId, std::map<HashWrapper, SharedFile*>
00245         > m_hashToSF;
00246         //! Hash To SharedFile Iterator
00247         typedef std::map<
00248                 CGComm::HashTypeId, std::map<HashWrapper, SharedFile*>
00249         >::iterator HTSFIter;
00250         //! Constant Hash To SharedFile Iterator
00251         typedef std::map<
00252                 CGComm::HashTypeId, std::map<HashWrapper, SharedFile*>
00253         >::const_iterator CHTSFIter;
00254         //! HashWrapper To SharedFile Iterator
00255         typedef std::map<HashWrapper, SharedFile*>::iterator HWTSFIter;
00256         //! Constant HashWrapper To SharedFile Iterator
00257         typedef std::map<HashWrapper, SharedFile*>::const_iterator CHWTSFIter;
00258 
00259         /**
00260          * Hash To Meta Data Map
00261          * ---------------------
00262          * Last, but not least, a recursive map which allows us to look up
00263          * MetaData objects given we know one hash. The outer map is keyed
00264          * on hash types, so there are generally only 5-6 entries in the
00265          * outer map. For each of the outer map, there is an inner map, which
00266          * is keyed on the actual Hash, and returns MetaData pointer. Note
00267          * that we use HashWrapper wrapper class around HashBase pointer,
00268          * doing virtual function calls on lookups. While it is known that
00269          * virtual function calls come with some performance tradeoff, I
00270          * really don't see other way of doing it. Also keep in mind that
00271          * when HashSet object (contained in MetaData object) is destroyed,
00272          * it would invalidate the HashWrapper pointer - make sure to not
00273          * have any HashWrappers containing the HashBase objects around
00274          * anymore when deleting MetaData.
00275          */
00276         std::map<
00277                 CGComm::HashTypeId, std::map<HashWrapper, MetaData*>
00278         > m_hashes;
00279         //! Hash To Meta Data Iterator
00280         typedef std::map<
00281                 CGComm::HashTypeId, std::map<HashWrapper, MetaData*>
00282         >::iterator HTMDIter;
00283         //! Constant Hash To Meta Data Iterator
00284         typedef std::map<
00285                 CGComm::HashTypeId, std::map<HashWrapper, MetaData*>
00286         >::const_iterator CHTMDIter;
00287         //! Hash Wrapper To Meta Data Iterator
00288         typedef std::map<HashWrapper, MetaData*>::iterator HWTMDIter;
00289         //! Constant Hash Wrapper To Meta Data Iterator
00290         typedef std::map<HashWrapper, MetaData*>::const_iterator CHWTMDIter;
00291 
00292         //! Output operator for streams
00293         friend std::ostream& operator<<(std::ostream &o, const MetaDb &md);
00294 
00295         /**
00296          * Event handler for MetaData events, called from event table.
00297          *
00298          * @param md       Event source triggering the event.
00299          * @param evt      Event data specifying the kind of event.
00300          */
00301         void onMetaDataEvent(MetaData *md, int evt);
00302 
00303         /**
00304          * Event handler for SharedFile events, called from event table.
00305          *
00306          * @param sf       SharedFile object triggering the event.
00307          * @param evt      Event data specifying the kind of event.
00308          */
00309         void onSharedFileEvent(SharedFile *sf, int evt);
00310 
00311         /**
00312          * Attempt to add entry to m_filenames list.
00313          *
00314          * @param name     Filename to be added.
00315          * @param source   Source object the file name belongs to.
00316          */
00317         void tryAddFileName(MetaData *source, const std::string &name);
00318 
00319         /**
00320          * Attempt to add entry to m_nameToId map
00321          *
00322          * @param name     Filename to be added.
00323          * @param id       ID of the file the name belongs to.
00324          */
00325         void tryAddFileName(SharedFile *sf, const std::string &name);
00326 
00327         /**
00328          * Attempt to add entry to m_hashes list
00329          *
00330          * @param hash     Hash to be added.
00331          * @param source   Source object where this HashSet belongs to.
00332          */
00333         void tryAddHashSet(MetaData *source, const HashSetBase *hash);
00334 
00335         /**
00336          * Attempt to add entry to m_hashToId map
00337          *
00338          * @param hash     Hash to be added.
00339          * @param id       ID of the hash being added.
00340          */
00341         void tryAddHashSet(SharedFile *sf, const HashSetBase *hash);
00342 
00343         /**
00344          * Clears all lists. This is used for debugging purposes in metadata
00345          * regress-test.
00346          */
00347         void clear();
00348         friend void test_metadb();
00349 };
00350 
00351 #endif