/***************************************************************************
 *                       Stream.cpp  -  description
 *                               -------------------
 *  begin                : Tue March 1 10:40:21 BST 2003
 *  copyright            : (C) 2002 by Dmitri Skachkov
 *  email                : d_skachkov@yahoo.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/


#include "Stream.h"

Stream::Stream()
{
    textstream = 0;
    pdbToIndex = 0;
    tempLinks = 0;
    links = 0;
    decoder = new toUnicode();
    recode = false;
    encoding = "ISO 8859-1";
    fileOpened = false;
    stripCR = true;
    reparagraph = false;
    spacePadded = 3;
    removeSpacePads = false;
    entities.insert("lt", "60");
    entities.insert("gt", "62");
    entities.insert("amp", "38");
    entities.insert("quot", "34");
    entities.insert("nbsp", "160");
    entities.insert("iexcl", "161");
    entities.insert("curren", "164");
    entities.insert("cent", "162");
    entities.insert("pound", "163");
    entities.insert("yen", "165");
    entities.insert("brvbar", "166");
    entities.insert("sect", "167");
    entities.insert("uml", "168");
    entities.insert("copy", "169");
    entities.insert("ordf", "170");
    entities.insert("laquo", "171");
    entities.insert("not", "172");
    entities.insert("shy", "173");
    entities.insert("reg", "174");
    entities.insert("macr", "175");
    entities.insert("deg", "176");
    entities.insert("plusmn", "177");
    entities.insert("sup2", "178");
    entities.insert("sup3", "179");
    entities.insert("acute", "180");
    entities.insert("micro", "181");
    entities.insert("para", "182");
    entities.insert("middot", "183");
    entities.insert("cedil", "184");
    entities.insert("sup1", "185");
    entities.insert("ordm", "186");
    entities.insert("raquo", "187");
    entities.insert("frac14", "188");
    entities.insert("frac12", "189");
    entities.insert("frac34", "190");
    entities.insert("iquest", "191");
    entities.insert("times", "215");
    entities.insert("divide", "247");
    entities.insert("euro", "8364");
    entities.insert("tilde", "732");
    entities.insert("lsquo", "8216");
    entities.insert("rsquo", "8217");
    entities.insert("sbquo", "8218");
    entities.insert("ldquo", "8220");
    entities.insert("rdquo", "8221");
    entities.insert("bdquo", "8222");
    entities.insert("lsaquo", "8249");
    entities.insert("rsaquo", "8250");
    entities.insert("ndash", "8211");
    entities.insert("mdash", "8212");
    entities.insert("","32");
}

Stream::~Stream()
{
}

QChar Stream::entityToChar(QString e)
{
    return QChar(QString(entities.find(e)).toUShort());
}

void Stream::resetTags()
{
    tag.color = Qt::black;
    tag.href = "";
    tag.base = "";
    tag.h = 0;
    tag.p = tag.br = tag.hr = false;
    tag.ul = tag.ol = tag.dl = false;
    tag.pre = tag.bq = tag.address = false;
    tag.em = tag.cite = tag.var = tag.strong = tag.code = tag.samp = tag.kbd = false;
    tag.dfn = tag.del = false;
    tag.small = tag.i = tag.b = tag.b = tag.tt = tag.big = false;
    tag.strike = tag.u = false;
    tag.a = tag.link = tag.html = tag.head = tag.title = tag.body = tag.script = false;
    tag.abbr = tag.acronym = tag.div = tag.img = false;
    tagChanged = true;
}

void Stream::setEncoding(QString e)
{
    if (e.length() == 0) e = "ISO 8859-1";
    recode = false;
    encoding = e;
    if (!textstream) return;
    textstream->~QTextStream();
    textstream = new QTextStream(&file);
    if (encoding == "UTF-8")
    {
    	textstream->setEncoding(QTextStream::UnicodeUTF8);
	return;
    }
    decoder->setCodec(e);
    recode = true;
    if (decoder->Index > -1) return;
    recode = false;
    if (encoding == "ISO 8859-1")
    {
	textstream->setEncoding(QTextStream::Latin1);
	return;
    }
    textstream->setCodec(QTextCodec::codecForName(encoding));
}

QChar Stream::readNextChar()
{
    int p;
    p = 0;
    tagChanged = false;
    newLine = false;
    while (1)
    {
	if (textstream->atEnd())  return QChar();
	textstream->operator>>(c);
	if (stripCR && (c == '\r')) continue;
	if (c == '\r') c = '\n';
	break;
    }
    //if (textstream->atEnd())  return QChar();
    //textstream->operator>>(c);
    if (recode) c = decoder->getQChar(c.unicode());
    if (c.unicode() == 0xA0) {c = ' ';}
    c = isTag(c);
    if (c.isNull())
    {
	tagChanged = true;
	return QChar();
    }
    //if (recode) c = decoder->getQChar(c);
    //printf ("%06x ",c.unicode());
    if (c.unicode() < 32) return ' ';
    return c;
}

QChar Stream::isTag(QChar t)
{
    return t;
}

QChar Stream::getTag(QChar t)
{
    return t;
}

QString Stream::readWordForward(bool stripLeadingSpaces)
{
    QChar ch;
    int pos;
    tempTag = tag;
    bool leadingSpace;
    leadingSpace = true;
    if (atEnd())
    {
	return QString();
    } else {
	word = "";
	while (1)
	{
	    pos = getPosition();
	    if (atEnd()) break;
	    ch = readNextChar();
	    if (ch.isNull())
	    {
		pos = getPosition();
		break;
	    } else if (ch.isSpace() && stripLeadingSpaces && !(tag.pre&!reparagraph)) {
		continue;
	    } else {
		while (1)
		{
		    word.append (ch);
		    pos = getPosition();
		    if (atEnd()) break;
		    ch = readNextChar();
		    if (ch.isSpace()) 
		    {
			if (!leadingSpace) break;
		    } else {
			leadingSpace = false;
		    }
		    if (ch.isNull())
		    {
			tagChanged = false;
			tag = tempTag;
			newLine = false;
			newParagraph = false;
			//tag.img = false;
			//pos = getPosition();
			break;
		    }
		}
		break;
	    }
	}
	setPosition(pos);
	return word;
    }
}

int Stream::getPosition()
{
    return file.at();
}

void Stream::setPosition(int pos)
{
    file.at(pos);
}

int Stream::getSize()
{
    return file.size();
}

void Stream::rewind()
{
    file.at(0);
}

void Stream::gotoEnd()
{
    file.at(file.size());
}

bool Stream::openFile(const QString & filepath)
{
    QFile f;
    if ( !QFile::exists( filepath ) ) return false;
    f.setName( filepath ); 
    if (!f.open( IO_ReadOnly ))
    {
	return false;
    }
    fileOpened = true;
    f.close();
    if (file.isOpen()) file.close();
    file.setName( filepath ); 
    file.open( IO_ReadOnly );
    docSize = file.size();
    numberOfPages = int(docSize/2000) + 1;
    newParagraph = false;
    newLine = false;
    if (textstream) textstream->~QTextStream();
    textstream = new QTextStream(&file);
    resetTags();
    setEncoding(encoding);
    if (links) delete [] links;
    links = 0;
    linksFound = false;
    return true;
}

void Stream::closeFile()
{
    if (textstream) textstream->~QTextStream();
    textstream = 0;
    file.close();
}

bool Stream::atEnd()
{
    return file.atEnd();
}

bool Stream::checkNewParagraph()
{
    if (atEnd()) return true;
    if (newParagraph)
    {
	newParagraph = false;
	return true;
    }
    return false;
}

bool Stream::checkNewLine()
{
    if (atEnd()) return true;
    if (newLine)
    {
	newLine = false;
	return true;
    }
    return false;
}

QString Stream::getFileDir()
{
    QFileInfo fi(file);
    return fi.dirPath(true);
}

int Stream::getPageNumber()
{
    return int(getPosition()/2000) + 1;
}

int Stream::getInLinkPosition(QString s)
{
    s = s;
    return 0;
}

void Stream::findLinks()
{
}

QString Stream::findNextContent()
{
    bool n;
    int p;
    int page;
    //QChar ch;
    QString tag;
    QString content,s;
    content = "";
    p = getPosition();
    /*if (pdbToIndex)
    {
	p = getPosition();
	for (unsigned int i =0; i< rimSize;i++)
	{
	    if (pdbToIndex[i].index<p) continue;
	    if (pdbToIndex[i].offset !=0) continue;
	    setPosition(pdbToIndex[i].index);
	    p = getPosition();
	    //printf("->%d\n",p);
	    page = getPageNumber();
	    for (int i=0;i<6;i++)
	    {
		s = readWordForward(true);
		content += s + " ";
		//if ((i>0) && (content.stripWhiteSpace() != "") && checkNewParagraph()) break;
		if ((i>0) && (content.stripWhiteSpace() != "") && checkNewLine()) break;
	    }
	    return content + "\n" + QString::number(page)+"\n"+QString::number(p)+"\n1\n";
	}
	return "";
    }*/
    while (!atEnd())
    {
	n = false;
	readWordForward(true);
	if (!tagChanged) continue;
	if (!checkNewParagraph()) continue;
	while (tagChanged)
	{
	    if (atEnd()) break;
	    p = getPosition();
	    if (checkNewLine()) n = true;
	    s = readWordForward(true);
	}
	if (!n) continue;
	if (s != "") setPosition(p);
	p = getPosition();
	page = getPageNumber();
	for (int i=0;i<6;i++)
	{
	    s = readWordForward(true);
	    content += s + " ";
	    //if ((i>0) && (content.stripWhiteSpace() != "") && checkNewParagraph())
	    if ((i>0) && (content.stripWhiteSpace() != "") && checkNewLine())
	    {
		//printf("->%s\n",(content + "\n" + QString::number(page)+"\n"+QString::number(p)+"\n1\n").latin1());
		return content + "\n" + QString::number(page)+"\n"+QString::number(p)+"\n1\n";
	    }
	}
	content = "";
	setPosition(p);
    }
    return "";
}
