/* ****************************************************************************
  This file is part of KBabel

  Copyright (C) 2001 by Matthias Kiefer
                            <matthias.kiefer@gmx.de>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

**************************************************************************** */
#include "compendiumdata.h"

#include <catalog.h>
#include <tagextractor.h>

#include <kapp.h>
#include <kdebug.h>
#include <klocale.h>

CompendiumData::CompendiumData(QObject *parent)
            : QObject(parent)
            , _active(false)
            , _error(false)
            , _initialized(false)
            , _catalog(0)
            , _exactDict(9887)
            , _allDict(9887)
            , _wordDict(9887)
{
    _catalog = new Catalog(this, "CompendiumData::catalog");
    _exactDict.setAutoDelete(true);
    _allDict.setAutoDelete(true);
    _wordDict.setAutoDelete(true);
}


bool CompendiumData::load(KURL url)
{
    if(_active)
        return false;


    _error = false;
    _active = true;

    _exactDict.clear();
    _allDict.clear();
    _wordDict.clear();
    

    emit progressStarts(i18n("Loading po compendium"));
    bool e;
    connect(_catalog, SIGNAL(signalProgress(int)), this, SIGNAL(progress(int)));

    Catalog::IOStatus stat=_catalog->openURL(url,e);
        
    disconnect(_catalog, SIGNAL(signalProgress(int))
            , this, SIGNAL(progress(int)));
        

    if( stat!= Catalog::OK && stat != Catalog::RECOVERED_PARSE_ERROR)
    {
        kdDebug() << "error while opening file " << url.prettyURL() << endl;
                
        _error = true;
        _errorMsg = i18n("Error while trying to read file %1")
            .arg(url.prettyURL());

        emit progressEnds();

        _active = false;
        _initialized=true;

        return false;
    }

    emit progressStarts(i18n("Building indices"));

    int total = _catalog->numberOfEntries();
    for(int i=0; i < total; i++)
    { 
        if( (100*(i+1))%total < 100 )
        {
            emit progress((100*(i+1))/total);
            kapp->processEvents();
        }

        QString temp = _catalog->msgid(i);
        
        int *index = new int(i);
        _exactDict.insert(temp,index);


        temp = simplify(temp);
        temp = temp.lower();

        if(!temp.isEmpty() && temp.length() > 1)
        {
            QValueList<int> *indexList=_allDict[temp];
            
            if(!indexList)
            {
                indexList = new QValueList<int>;
                _allDict.insert(temp,indexList);
            }

            indexList->append(i);

            QStringList wList = wordList(temp);
            for ( QStringList::Iterator it = wList.begin()
                    ; it != wList.end(); ++it ) 
            {
                if( (*it).length() > 1)
                {
                    indexList=_wordDict[*it];
            
                    if(!indexList)
                    {
                        indexList = new QValueList<int>;
                        _wordDict.insert(*it,indexList);
                    }

                    indexList->append(i);
                }
            }
        }
    }

    // remove words, that are too frequent
    uint max=_allDict.count()/10;
    QDictIterator< QValueList<int> > it(_wordDict);
    while ( it.current() ) 
    {
        if(it.current()->count() > max)
        {
            _wordDict.remove(it.currentKey());
        }
        else
        {
            ++it;
        }
    }
    

    _initialized=true;

    emit progressEnds();



    _active = false;

    return true;
}

const int* CompendiumData::exactDict(const QString text) const
{
    return _exactDict[text];
}

const QValueList<int>* CompendiumData::allDict(const QString text) const
{
    return _allDict[text];
}

const QValueList<int>* CompendiumData::wordDict(const QString text) const
{
    return _wordDict[text];
}


void CompendiumData::registerObject(QObject *obj)
{
    if(!_registered.containsRef(obj))
        _registered.append(obj);
}

bool CompendiumData::unregisterObject(QObject *obj)
{
    _registered.removeRef(obj);

    return _registered.count()==0;
}

bool CompendiumData::hasObjects() const
{
    return _registered.count()==0;
}

QString CompendiumData::simplify(const QString string)
{ 
    QString result;

    TagExtractor te(string);
    result=te.plainString();

    result=result.simplifyWhiteSpace();
    result=result.stripWhiteSpace();

    return result;
}

QStringList CompendiumData::wordList(const QString string)
{    
    QString result=CompendiumData::simplify(string);
   
    return QStringList::split(' ',result);
}

