parser.h

Go to the documentation of this file.
00001 /**
00002  *  Copyright (C) 2004-2005 Alo Sarv <madcat_@users.sourceforge.net>
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  */
00018 
00019 #ifndef __PARSER_H__
00020 #define __PARSER_H__
00021 
00022 /**
00023  * @file parser.h
00024  * Interface for ED2KParser class which performs ED2K network stream parsing.
00025  *
00026  * This is the main header for ED2KParser system, and should be included by
00027  * user code. The support headers, "packets.h" and "opcodes.h" should generally
00028  * not be included by user code.
00029  */
00030 
00031 #include <hn/log.h>               // For debug/trace logging
00032 #include <hn/utils.h>             // for Utils::getVal
00033 #include "opcodes.h"              // Protocol opcodes
00034 #include "packets.h"              // Packet objects header
00035 #include "zutils.h"               // for decompress
00036 
00037 /**
00038  * ED2KParser template class provides a generic interface for parsing ED2K
00039  * network stream. The input data is sent to the parser object through parse()
00040  * member functions, which performs the data parsing. When a packet has been
00041  * detected in stream, ED2KParser first locates the correct packet-factory
00042  * to handle the packet, and passes the packet data to the factory. The
00043  * specific packet factories in turn pass the data to the actual packet object
00044  * which then performs the final packet parsing and packet object construction.
00045  * Once the packet has been constructed, the packet-factory in question calls
00046  * back to the parser client (specified through Parent template argument and
00047  * parent argument in ED2KParser constructor), calling onPacket() member
00048  * function and passing the newly created packet object (by reference) to the
00049  * function. This allows client code to implement overloaded versions of
00050  * onPacket to perform event-driven packet handling.
00051  *
00052  * @param Parent         Parent class which will receive packet events
00053  */
00054 template<typename Parent>
00055 class ED2KParser  {
00056 public:
00057         /**
00058          * The one and only constructor, this initializes the packet parser
00059          * to parse a stream.
00060          *
00061          * @param parent   Pointer to object to which notifications should be
00062          *                 sent. Must not be null.
00063          */
00064         ED2KParser(Parent *parent) : m_parent(parent), m_need() {
00065                 CHECK_THROW(parent);
00066         }
00067 
00068         /**
00069          * @name Accessors and modifiers for internal data
00070          */
00071         //@{
00072         void    setParent(Parent *p) { CHECK_THROW(p); m_parent = p; }
00073         Parent* getParent()    const { return m_parent;              }
00074         bool    hasBuffered()  const { return m_buffer.size();       }
00075         void    clearBuffer()        { m_buffer.clear();             }
00076         //@}
00077 
00078         /**
00079          * Continue stream parsing, passing additional data. The data is
00080          * buffered internally, so @param data may be freed after passing
00081          * to this method. Note that this function triggers a chain-reaction
00082          * of events when a new packet is detected, which leads back to
00083          * client code, into the relevant packet handler function. When this
00084          * function returns, all found packets in stream have been parsed, and
00085          * all remaining data has been buffered for next parsing sequence.
00086          *
00087          * @param data     Data buffer to be parsed.
00088          */
00089         void parse(const std::string &data) {
00090                 m_buffer += data;
00091                 if (m_buffer.size() < 5 || m_buffer.size() < m_need) {
00092                         return; // not enough data yet
00093                 }
00094                 uint32_t lastPacket = 0;
00095                 std::istringstream i(m_buffer);
00096                 while (i.good()) {
00097                         lastPacket = i.tellg();
00098                         try {
00099                                 if (!readPacket(i, m_packet)) {
00100                                         break;
00101                                 }
00102                         } catch (Utils::ReadError&) {
00103                                 break;
00104                         }
00105                         // Locate the right factory for this packet.
00106                         Iter iter = factories()[m_packet.m_proto].find(
00107                                 m_packet.m_opcode
00108                         );
00109                         if (iter == factories()[m_packet.m_proto].end()) {
00110                                 logDebug(
00111                                         boost::format(
00112                                                 COL_GREEN "Received unknown "
00113                                                 "packet: %s" COL_NONE
00114                                         ) % m_packet
00115                                 );
00116                                 continue;
00117                         }
00118                         uint32_t curPos = i.tellg();
00119                         if (curPos == m_buffer.size()) {
00120                                 m_buffer.clear();
00121                         }
00122 
00123                         // found the handler
00124                         std::istringstream packet(m_packet.m_data);
00125                         (*iter).second->create(m_parent, packet);
00126                 }
00127                 m_buffer = i.str().substr(lastPacket);
00128         }
00129 
00130         /**
00131          * PacketFactory is an abstract base class for specific packet
00132          * factories, which handle specific packet construction and user
00133          * callbacks after packet construction. This class is implemented as
00134          * public here for specific derived factories to be able to access it.
00135          * It should not be used by user code.
00136          *
00137          * Specific factory must pass it's supported opcode (based on which
00138          * ED2KParser chooses that factory to pass data to) to the base class's
00139          * constructor. Specific factory must also override pure virtual
00140          * create() method to perform the packet construction.
00141          */
00142         class PacketFactory {
00143         public:
00144                 /**
00145                  * Creates a packet and calls back to packet handler.
00146                  *
00147                  * @param parent        Pointer to packet handler object
00148                  * @param i             Stream containing packet data
00149                  * @param proto         Protocol used for this packet
00150                  */
00151                 virtual void create(Parent *parent, std::istringstream &i) = 0;
00152         protected:
00153                 /**
00154                  * Base class constructor registers the factory with ED2KParser
00155                  * factories list, making it available to receive packets for
00156                  * construction.
00157                  *
00158                  * @param proto        Protocol, into which the packet belongs
00159                  * @param opcode       Opcode, upon which to call this factory
00160                  */
00161                 PacketFactory(uint8_t proto, uint8_t opcode) {
00162                         FIter endIter = ED2KParser::factories().end();
00163                         if (ED2KParser::factories().find(proto) == endIter) {
00164                                 ED2KParser::factories().insert(
00165                                         std::make_pair(proto, FactoryMap())
00166                                 );
00167                         }
00168                         ED2KParser::factories()[proto].insert(
00169                                 std::make_pair(opcode, this)
00170                         );
00171                 }
00172 
00173                 //! Dummy destructor
00174                 virtual ~PacketFactory() {}
00175         };
00176 private:
00177         typedef std::map<uint8_t, PacketFactory*> FactoryMap;
00178         typedef typename FactoryMap::iterator Iter;
00179         typedef typename std::map<uint8_t, FactoryMap>::iterator FIter;
00180 
00181         /**
00182          * While it would be syntactically possible to implement the static
00183          * data as member of the ED2KParser class, it seems to cause SIGSEGV
00184          * upon module loading during static data initialization. While similar
00185          * approach may work within the main application, it's a no-go within
00186          * a module, and thus we wrap it inside a member function which returns
00187          * the object by reference.
00188          *
00189          * @return  Static map of supported packet factories. The outer map
00190          *          is really small, and contains only two entries - PR_ED2K
00191          *          and PR_EMULE, since those two protocol's are used in ed2k
00192          *          network. The inner map lists all packets in the given
00193          *          protocol. When a new packet is found in stream, the inner
00194          *          map corresponding to the protocol is searched for the packet
00195          *          opcode, and the relevant factory's create() method called,
00196          *          passing the packet data.
00197          */
00198         static std::map<uint8_t, FactoryMap>& factories() {
00199                 static std::map<uint8_t, FactoryMap> s_factories;
00200                 return s_factories;
00201         }
00202 
00203         /**
00204          * InternalPacket structure is a temporary storage for a single packet
00205          * data.
00206          */
00207         struct InternalPacket {
00208                 uint8_t     m_proto;       //!< protocol
00209                 uint32_t    m_len;         //!< data + opcode length
00210                 uint8_t     m_opcode;      //!< opcode
00211                 std::string m_data;        //!< data
00212 
00213                 //! Output operator into streams
00214                 friend std::ostream& operator<<(
00215                         std::ostream &o, const InternalPacket &i
00216                 ) {
00217                         o << "protocol=" << Utils::hexDump(i.m_proto)  << " ";
00218                         o << "length="   << Utils::hexDump(i.m_len)    << " ";
00219                         o << "opcode="   << Utils::hexDump(i.m_opcode) << " ";
00220                         if (i.m_data.size() < 1024) {
00221                                 o << Utils::hexDump(i.m_data);
00222                         } else {
00223                                 o << "\nData omitted (length >= 1024)";
00224                         }
00225                         return o;
00226                 }
00227         };
00228 
00229         /**
00230          * readPacket() method attempts to read a single packet from the
00231          * designated stream.
00232          *
00233          * @param i       Stream to read the packet from
00234          * @param p       Packet object to store the packet data in
00235          * @return        True if packet was found and read; false otherwise
00236          *
00237          * \throws std::out_of_range if only part of the packet could be read.
00238          *         This generally indicates you should try again later when more
00239          *         data is available.
00240          * \throws std::runtime_error on fatal errors. If this is thrown, the
00241          *         stream should be marked unusable, and exception propagated
00242          *         up to client code.
00243          */
00244         bool readPacket(std::istringstream &i, InternalPacket &p) {
00245                 p.m_proto  = Utils::getVal<uint8_t >(i);
00246                 p.m_len    = Utils::getVal<uint32_t>(i);
00247                 if (i.str().size() < p.m_len + 5) {
00248                         m_need = p.m_len + 5 - i.str().size();
00249                         return false;
00250                 }
00251                 p.m_opcode = Utils::getVal<uint8_t >(i);
00252                 p.m_data   = Utils::getVal<std::string>(i, p.m_len - 1);
00253                 CHECK_THROW(p.m_data.size() == p.m_len - 1);
00254                 m_need = 0;
00255 
00256                 if (p.m_proto == PR_ZLIB) {
00257                         p.m_data = Zlib::decompress(p.m_data);
00258                         CHECK_THROW_MSG(p.m_data.size(), "unpacking failed");
00259                         p.m_proto = PR_ED2K;
00260                 } else if (p.m_proto != PR_EMULE && p.m_proto != PR_ED2K) {
00261                         throw std::runtime_error("invalid protocol");
00262                 }
00263 
00264                 // used for statistics gathering
00265                 if (p.m_opcode==OP_SENDINGCHUNK || p.m_opcode==OP_PACKEDCHUNK) {
00266                         ED2KPacket::addOverheadDn(6 + 24);
00267                 } else {
00268                         ED2KPacket::addOverheadDn(6 + p.m_data.size());
00269                 }
00270 
00271                 return true;
00272         }
00273 
00274         std::string m_buffer;     //!< Internal data buffer
00275         Parent *m_parent;           //!< Pointer to packets handler class
00276         InternalPacket m_packet;    //!< Packet currently being parsed
00277 
00278         /**
00279          * When parsing long packets (e.g. data packets - 10k long), this
00280          * indicates how much more data is needed before completing the packet.
00281          * This is used for optimizing, on order to reduce the amount of
00282          * "failed" packet parse attempts.
00283          */
00284         uint32_t m_need;
00285 };
00286 
00287 
00288 /**
00289  * Declares a new packet handler within this parser.
00290  *
00291  * @param Target               Class which will handle this packet type.
00292  * @param Packet               Packet type to be handled.
00293  *
00294  * \note The class must also declare a member function with the following
00295  * prototype in order to receive the notifiations:
00296  * void onPacket(const Packet &);
00297  * The name of the function is hardcoded. Failure to implement this function
00298  * will result in compile-time errors. If the function is private, additional
00299  * macro must be used in the class interface in order to make the function
00300  * accessible to the parser. See below.
00301  */
00302 #define DECLARE_PACKET_HANDLER(Target, Packet)                              \
00303         static Factory_##Packet < Target > s_packetFactory##Packet
00304 
00305 /**
00306  * Special version, allowing Parent and Target to differ.
00307  *
00308  * @param Parent         Parent parser
00309  * @param Target         Target which will receive the events
00310  */
00311 #define DECLARE_PACKET_HANDLER2(Parent, Target, Packet)                     \
00312         static Factory_##Packet < Parent, Target > s_packetFactory##Packet
00313 
00314 /**
00315  * Use this macro in your class's interface to allow parser factories to
00316  * access the packet handler functions, if they are declared private.
00317  */
00318 #define FRIEND_PARSER(Class, Packet) \
00319         friend class Factory_##Packet<Class>
00320 
00321 /**
00322  * This macro is used by the implementation to declare a new packet parser
00323  * factory. This should not be used by client code.
00324  *
00325  * @param PacketType     Type of packet this parser supports. Must be
00326  *                       fully-qualified type name.
00327  * @param Opcode         The opcode of the packet this factory is capable of
00328  *                       creating. This factory will be called when this opcode
00329  *                       is encountered in stream. Overlapping opcodes go
00330  *                       against the logic and thus are not allowed.
00331  */
00332 #define DECLARE_PACKET_FACTORY(Proto, PacketType, Opcode)                     \
00333 template<class Parent, class Target = Parent>                                 \
00334 class Factory_##PacketType : public ED2KParser<Parent>::PacketFactory {       \
00335 public:                                                                       \
00336         Factory_##PacketType()                                                \
00337         : ED2KParser<Parent>::PacketFactory(Proto, Opcode) {}                 \
00338         virtual void create(Parent *parent, std::istringstream &i) {          \
00339                 parent->onPacket(ED2KPacket::PacketType(i));                  \
00340         }                                                                     \
00341 }
00342 
00343 #include "factories.h"            // Packet factories
00344 
00345 #endif