/*  $Id: Trie.h,v 1.13 2010/08/12 03:22:24 sarrazip Exp $
    Trie.h - Tree structure for string storage

    verbiste - French conjugation system
    Copyright (C) 2003-2005 Pierre Sarrazin <http://sarrazip.com/>

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
    02111-1307, USA.
*/

#ifndef _H_Trie
#define _H_Trie

#include <string>
#include <vector>


namespace verbiste {


/**
    Tree structure for string storage.
    @param        T        type of the user data attached to the stored strings;
                            pointers to objects of type T will be stored in the
                        trie, but no T object will be created, copied,
                        assigned or destroyed by the trie.
*/
template <class T>
class Trie
{
public:

    /** Constructs an empty trie.
        @param        userDataFromNew                determines if the destructor
                                        must assume that all "user data"
                                        pointers come from new and must
                                        thus be destroyed with delete
    */
    Trie(bool userDataFromNew);


    /** Destroys the trie and its contents.
    */
    virtual ~Trie();


    /** Adds the given key and associated it with the given user data pointer.
        @returns        the user data previously associated with the key,
                        or NULL is no user data was associated
    */
    T *add(const std::string &key, T *userData);


    T *get(const std::string &key) const;


    T *getWithDefault(const std::string &key, T *deFault = NULL);


    /** Obtains the address of the user data associated with 'key'
        and adds an entry if necessary.
        @returns        a non-null pointer to the user data pointer
                        associated with 'key';
                        if a new entry was created, the T * is null.
    */
    T **getUserDataPointer(const std::string &key);


    /** Callback invoked by the Trie<>::get() method.
        This callback will be called for each prefix of the searched
        string for which the trie has some user data.
        This method does nothing if it is not overridden in a derived class.
        @param  key         the searched string
        @param  index       length of the prefix
        @param  userData    user data that is associated with the prefix
    */
    virtual void onFoundPrefixWithUserData(const std::string &/*key*/,
                                        std::string::size_type /*index*/,
                                        const T * /*userData*/) const
                                                        throw()
    {
    }

    /** Computes and returns the number of memory bytes consumed by
        this object, excluding the size of the user data instances.
        @returns                        number of bytes
    */
    size_t computeMemoryConsumption() const;

private:

    class Row;

    class Descriptor
    {
    public:
        /** Constructs a descriptor that can point to an inferior row.
            @param        inferior        pointer to the inferior row
                                            (may be NULL);
                                        this pointer must have been obtained
                                        through operator new
        */
        Descriptor(Row *inferior = NULL);

        Descriptor(const Descriptor &d) : inferiorRow(d.inferiorRow), userData(d.userData) {}
        Descriptor &operator = (const Descriptor &d)
        {
            if (&d != this) { inferiorRow = d.inferiorRow; userData = d.userData; }
            return *this;
        }

        /** Destroys this object and calls operator delete on the inferior row.
            Does not call operator delete on userData.
        */
        ~Descriptor();

        /** Calls recursiveDelete() on *inferiorRow if inferiorRow is not NULL.
            Then, calls operator delete that row and sets inferiorRow to NULL.
            @param        deleteUserData        if true, operator delete is called
                                            on userData (which may be NULL)
        */
        void recursiveDelete(bool deleteUserData);

        /** Computes and returns the number of memory bytes consumed by
            this object, excluding the size of the user data.
            @returns                        number of bytes
        */
        size_t computeMemoryConsumption() const;

        Row *inferiorRow;
        T *userData;
    };

    struct CharDesc
    {
        unsigned char c;
        Descriptor desc;

        CharDesc(char cc) : c(static_cast<unsigned char>(cc)), desc() {}

        /** Computes and returns the number of memory bytes consumed by
            this object, excluding the size of the Descriptor's user data.
            @returns                        number of bytes
        */
        size_t computeMemoryConsumption() const;
    };

    class Row
    {
    public:
        Row()
          : elements()
        {
        }

        /** Calls recursiveDelete() on each Descriptor in this row.
            Then empties this row.
            @param        deleteUserData        if true, operator delete is called
                                            on the userData field of the
                                        Descriptor objects
        */
        void recursiveDelete(bool deleteUserData);


        /** Finds an element of this row whose character field is equal to 'c'.
            Returns end() if no such element exists.
        */
        Descriptor *find(unsigned char c);

        /** Finds or creates an element of this row whose char. field is 'c'.
            If no such element exists, one is created using the
            default constructor of the Descriptor class.
        */
        Descriptor &operator [] (unsigned char c);

        /** Computes and returns the number of memory bytes consumed by
            this object, excluding the size of the Descriptors' user data.
            @returns                        number of bytes
        */
        size_t computeMemoryConsumption() const;

    private:
        std::vector<CharDesc> elements;  // average size should be about 1.4
    };


    Descriptor *getDesc(Row *row,
                        const std::string &key,
                        std::string::size_type index,
                        bool create,
                        bool callFoundPrefixCallback);


    T *lambda;  // user data associated with the empty string key
    Row *firstRow;  // must be created by operator new
    bool userDataFromNew;


    // Forbidden operations:
    Trie(const Trie &);
    Trie &operator = (const Trie &);

};


}  // namespace verbiste


#include "Trie.cpp"


#endif  /* _H_Trie */
