/*
F5er - RSS feed maker for arbitrary web pages.

See README.md for details.

Copyright (C) 2011-2017 Vitaly Minko <vitaly.minko@gmail.com>

This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License
(http://www.gnu.org/licenses/) for more details.
*/
package main

import (
	"bufio"
	"bytes"
	"crypto/md5"
	"crypto/tls"
	"encoding/hex"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"net/http"
	"net/url"
	"os"
	"reflect"
	"regexp"
	"strings"
	"time"

	"github.com/gorilla/feeds"
	"github.com/jaytaylor/html2text"
	"golang.org/x/net/html"
	"golang.org/x/net/html/charset"
	"gopkg.in/xmlpath.v2"
)

// Do not forget to update documentation after changing global variables and constants.
var (
	ArgConfig  = flag.String("config", "/etc/f5er.conf", "Configuration file to use")
	ArgLogfile = flag.String("logfile", "f5er.log", "File to write log in")
	ArgHost    = flag.String("address", "localhost", "Local host bind address")
	ArgPort    = flag.Int("port", 8080, "Local host bind port")
	ArgHelp    = flag.Bool("help", false, "Print this help message and exit")
)

const (
	DefaultUserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0"
	ProgramVersion   = "1.0"
	ProgramName      = "F5er"
	Debug            = true
	UseInsecureTls   = true
)

type Channel struct {
	Title          string
	Link           string
	Description    string
	SelectionXPath string
	UserAgent      string
	PostForm       url.Values
}
var Channels map[string]*Channel

const (
	DEBUG = iota
	INFO
	WARNING
	ERROR
	FATAL
)

func l (level int, format string, args ...interface{}) {
	switch level {
	case DEBUG:
		if Debug {
			log.Printf("DEBUG: " + format, args ...)
		}
	case INFO:
		log.Printf("INFO: " + format, args ...)
	case WARNING:
		log.Printf("WARNING: " + format, args ...)
	case ERROR:
		log.Printf("ERROR: " + format, args ...)
	case FATAL:
		fmt.Printf("FATAL ERROR: " + format + "\n", args ...)
		log.Fatalf("FATAL: " + format, args ...)
	default:
	    panic("Unknown log level.");
	}
}

func (channel *Channel) testCompleteness() error {
	fields := []string{"Title", "Link", "Description", "SelectionXPath"}
	options := []string{"title", "link", "description", "selection_xpath"}
	for i, field := range fields {
		r := reflect.ValueOf(channel)
		f := reflect.Indirect(r).FieldByName(field)
		// Test if field f has no value.
		if f.Interface() == reflect.Zero(reflect.TypeOf(f.Interface())).Interface() {
			return fmt.Errorf("mandatory option '%s' is not specified", options[i])
		}
	}
	return nil
}

func parsePostForm(postForm string) (url.Values, error) {
	values := url.Values{}
	pairs := strings.Split(postForm, "&")
	for _, pair := range pairs {
		var validPair = regexp.MustCompile("^(.*)=(.*)?$")
		m := validPair.FindStringSubmatch(pair)
		if m == nil {
			return nil, fmt.Errorf("malformed field: '%s'", pair)
		}
		values.Add(m[1], m[2])
	}
	return values, nil
}

func parseConfig(cfgFileName string) (result map[string]*Channel) {
	result = make(map[string]*Channel)

	cfgFile, err := os.Open(cfgFileName)
	if err != nil {
		l(FATAL, "Can't open config file '%s': %v.", cfgFileName, err)
	}
	defer cfgFile.Close()

	var channelID string
	var skipChannel bool
	scanner := bufio.NewScanner(cfgFile)
	channelRe := regexp.MustCompile("^\\[(\\w+)\\]$")
	optionRe := regexp.MustCompile("^(\\w+)\\s?=\\s?(.*)$")
	for lineNumber := 1; scanner.Scan(); lineNumber++ {
		line := scanner.Text()
		if line == "" {
			l(DEBUG, "Skipping empty line #%d.", lineNumber)
			continue
		} else if line[0] == '#' {
			l(DEBUG, "Skipping comment at line #%d.", lineNumber)
			continue
		} else if m := channelRe.FindStringSubmatch(line); m != nil {
			name := m[1]
			l(DEBUG, "New channel found: '%s'.", name)
			if prevChannel, ok := result[channelID]; ok == true {
				if err = prevChannel.testCompleteness(); err != nil {
					l(ERROR, "Incomplete channel '%s': %v.", channelID, err)
					delete(result, channelID)
				}
			}
			if _, ok := result[name]; ok == true {
				l(ERROR, "Redeclaration of the channel '%s' at line #%d.", name, lineNumber)
				channelID = ""
				skipChannel = true
				continue
			}
			skipChannel = false
			channelID = name
			result[channelID] = new(Channel)
		} else if m := optionRe.FindStringSubmatch(line); m != nil {
			name := m[1]
			value := m[2]
			l(DEBUG, "New option found at line #%d: '%s' = '%s'.", lineNumber, name, value)
			if skipChannel {
				l(DEBUG, "Skipping option '%s' at line #%d.", name, lineNumber)
				continue
			}
			if channelID == "" {
				l(ERROR, "Option '%s' before channel declaration at line #%d.", name, lineNumber)
				continue
			}
			channel, ok := result[channelID]
			if ok == false {
				l(ERROR, "Internal error parsing line #%d: no channel %s.", lineNumber, channelID)
				continue
			}

			if name == "post_form" {
				channel.PostForm, err = parsePostForm(value)
				if err != nil {
					l(ERROR, "Malformed post_form option at line #%d: %v.", lineNumber, err)
					l(WARNING, "Channel '%s' will be skipped.", channelID)
					delete(result, channelID)
					channelID = ""
					skipChannel = true
				}
				continue
			}

			var fieldPtr *string
			switch name {
			case "title":
				fieldPtr = &channel.Title
			case "description":
				fieldPtr = &channel.Description
			case "link":
				fieldPtr = &channel.Link
			case "selection_xpath":
				fieldPtr = &channel.SelectionXPath
			case "user_agent":
				fieldPtr = &channel.UserAgent
			default:
				l(ERROR, "Unknown option '%s' at line #%d.", name, lineNumber)
				continue
			}
			if *fieldPtr != "" {
				l(ERROR, "Redeclaration of option '%s' at line #%d", name, lineNumber)
				continue
			}
			*fieldPtr = value
		} else {
			l(ERROR, "Can't parse line #%d", lineNumber)
		}
	}

	if err = scanner.Err(); err != nil {
		l(FATAL, "Can't read config file '%s': %v.", cfgFileName, err)
	}

	return result
}

func downloadHtml(url string, userAgent string, postForm url.Values) (string, error) {
	l(DEBUG, "Downloading URL '%s'.", url)
	tr := &http.Transport{
		TLSClientConfig: &tls.Config{InsecureSkipVerify: UseInsecureTls},
	}
	client := &http.Client{Transport: tr}

	var reqBodyReader io.Reader
	var method string = "GET"
	if postForm != nil {
		method = "POST"
		reqBodyReader = strings.NewReader(postForm.Encode())
	}
	req, err := http.NewRequest(method, url, reqBodyReader)
	if err != nil {
		return "", fmt.Errorf("failed to compose HTTP request for '%s': %v", url, err)
	}

	if userAgent == "" {
		userAgent = DefaultUserAgent
	}
	req.Header.Set("User-Agent", userAgent)
	resp, err := client.Do(req)
	if err != nil {
		return "", fmt.Errorf("failed to download URL '%': %v", url, err)
	}
	defer resp.Body.Close()

	contentType := resp.Header.Get("Content-Type")
	utf8Reader, err := charset.NewReader(resp.Body, contentType)
	if err != nil {
		return "", fmt.Errorf("can't convert from '%s' to UTF-8: %v", contentType, err)
	}

	body, err := ioutil.ReadAll(utf8Reader)
	if err != nil {
		return "", fmt.Errorf("error reading from UTF-8 reader: %v", err)
	}

	return string(body), nil
}

func extractSelection(rawHtml string, xpath string) (string, error) {
	// Need to clean up HTML first.
	reader := strings.NewReader(rawHtml)
	root, err := html.Parse(reader)
	if err != nil {
		return "", fmt.Errorf("error parsing HTML by net/html")
	}
	var b bytes.Buffer
	html.Render(&b, root)
	fixedHtml := b.String()
	fmt.Println(fixedHtml)

	reader = strings.NewReader(fixedHtml)
	xmlroot, xmlerr := xmlpath.ParseHTML(reader)
	if xmlerr != nil {
		return "", fmt.Errorf("error parsing HTML by xmlpath")
	}

	path := xmlpath.MustCompile(xpath)
	value, ok := path.String(xmlroot)
	if ok != true {
		return "", fmt.Errorf("HTML selection was not found")
	}
	return value, nil
}

func getMD5Hash(text string) string {
	hash := md5.Sum([]byte(text))
	return hex.EncodeToString(hash[:])
}

func composeRssFeed(channel *Channel, selection string) (string, error) {
	pubDate := time.Now().Format(time.RFC1123Z)
	feed := &feeds.RssFeed{
		Title:       channel.Title,
		Link:        channel.Link,
		Description: channel.Description,
		Generator:   ProgramName + " version " + ProgramVersion,
		PubDate:     pubDate,
	}

	summary, err := html2text.FromString(selection)
	if err != nil {
		l(WARNING, "Error converting HTML to plaintext for channel '%s'", channel.Title)
		summary = "Could not generate summary :-("
	}
	summary = strings.TrimSpace(summary)
	summary = strings.Replace(summary, "\n", "", -1)
	if len(summary) > 50 {
		summary = summary[:50]
	}

	feed.Items = []*feeds.RssItem{
		&feeds.RssItem{
			Title:       "Update found: " + summary,
			Link:        channel.Link,
			Description: html.EscapeString(selection),
			Guid:        getMD5Hash(selection),
			PubDate:     pubDate,
		},
	}

	rss, err := feeds.ToXML(feed)
	if err != nil {
		return "", fmt.Errorf("can't convert RSS feed to XML: %v", err)
	}
	return rss, nil
}

func httpRequestHandler(w http.ResponseWriter, r *http.Request) {
	l(DEBUG, "Got request: %s %s.", r.Method, r.URL.Path)

	if r.Method != "GET" {
		l(WARNING, "Forbidden method %s.", r.Method)
		http.Error(w, "Invalid request method.", http.StatusMethodNotAllowed)
		return
	}

	var validPath = regexp.MustCompile("^/([a-zA-Z0-9_]+)$")
	m := validPath.FindStringSubmatch(r.URL.Path)
	if m == nil {
		l(WARNING, "Malformed URI '%s'.", r.URL.Path)
		http.Error(w, "Invalid request URI", http.StatusBadRequest)
		return
	}
	id := m[1]
	channel, ok := Channels[id]
	if ok != true {
		l(WARNING, "Unknown channel '%s'.", id)
		http.NotFound(w, r)
		return
	}

	l(DEBUG, "Handling channel '%s'.", channel.Title)
	html, err := downloadHtml(channel.Link, channel.UserAgent, channel.PostForm)
	if err != nil {
		l(WARNING, "Can't download feed page '%s': %v.", channel.Link, err)
		http.Error(w, "Could not download feed page.", http.StatusInternalServerError)
		return
	}

	selection, err := extractSelection(html, channel.SelectionXPath)
	if err != nil {
		l(WARNING, "Looks like the XPath '%s' is obsolete for '%s'.",
			channel.SelectionXPath, channel.Link)
		http.Error(w, "Could not parse feed page.", http.StatusInternalServerError)
		return
	}
	rss, err := composeRssFeed(channel, selection)
	if err != nil {
		l(ERROR, "Failed to compose RSS feed for channel '%s': %v", id, err)
		http.Error(w, "Could not compose RSS feed.", http.StatusInternalServerError)
		return
	}
	l(DEBUG, "RSS feed for channel '%s' successfully generated.", id)
	fmt.Fprintln(w, rss)
}

func main() {
	flag.Parse()

	if *ArgHelp {
		fmt.Printf("%s - RSS feed maker for arbitrary web pages.", ProgramName)
		flag.Usage()
		return
	}

	logfile, err := os.OpenFile(*ArgLogfile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
	if err != nil {
		l(FATAL, "Error opening log file: %v", err)
	}
	defer logfile.Close()
	log.SetOutput(logfile)

	l(DEBUG, "%s version %s started.", ProgramName, ProgramVersion)
	Channels = parseConfig(*ArgConfig)

	http.HandleFunc("/", httpRequestHandler)
	err = http.ListenAndServe(fmt.Sprintf("%s:%d", *ArgHost, *ArgPort), nil)
	l(DEBUG, "HTTP server is listening %s:%d.", *ArgHost, *ArgPort)
	if err != nil {
		l(FATAL, "Can not start HTTP daemon:", err)
	}
}
