// Package torlogs contains code to read tor logs.
package torlogs

import (
	"bytes"
	"errors"
	"fmt"
	"os"
	"regexp"
	"strconv"

	"github.com/ooni/probe-cli/v3/internal/model"
)

var (
	// ErrEmptyLogFilePath indicates that the log file path is empty.
	ErrEmptyLogFilePath = errors.New("torlogs: empty log file path")

	// ErrCannotReadLogFile indicates we cannot read the log file.
	ErrCannotReadLogFile = errors.New("torlogs: cannot read the log file")

	// ErrNoBootstrapLogs indicates we could not find any bootstrap log in the log file.
	ErrNoBootstrapLogs = errors.New("torlogs: no bootstrap logs")

	// ErrCannotFindSubmatches indicates we cannot find submatches.
	ErrCannotFindSubmatches = errors.New("torlogs: cannot find submatches")
)

// torBootstrapRegexp helps to extract progress info from logs.
//
// See https://regex101.com/r/Do07qd/1.
var torBootstrapRegexp = regexp.MustCompile(
	`^[A-Za-z0-9.: ]+ \[notice\] Bootstrapped ([0-9]+)% \(([A-Za-z_]+)\): ([A-Za-z0-9 ]+)$`)

// ReadBootstrapLogs reads tor logs from the given file and
// returns a list of bootstrap-related logs.
func ReadBootstrapLogs(logFilePath string) ([]string, error) {
	// Implementation note:
	//
	// Tor is know to be good software that does not break its output
	// unnecessarily and that does not include PII into its logs unless
	// explicitly asked to. This fact gives me confidence that we can
	// safely include this subset of the logs into the results.
	//
	// On this note, I think it's safe to include timestamps from the
	// logs into the output, since we have a timestamp for the whole
	// experiment already, so we don't leak much more by also including
	// the Tor proper timestamps into the results.
	if logFilePath == "" {
		return nil, ErrEmptyLogFilePath
	}
	data, err := os.ReadFile(logFilePath)
	if err != nil {
		return nil, fmt.Errorf("%w: %s", ErrCannotReadLogFile, err.Error())
	}
	var out []string
	for _, bline := range bytes.Split(data, []byte("\n")) {
		if torBootstrapRegexp.Match(bline) {
			out = append(out, string(bline))
		}
	}
	if len(out) <= 0 {
		return nil, ErrNoBootstrapLogs
	}
	return out, nil
}

// ReadBootstrapLogsOrWarn is like ReadBootstrapLogs except that it does
// not return an error on failure, rather it emits a warning.
func ReadBootstrapLogsOrWarn(logger model.Logger, logFilePath string) []string {
	logs, err := ReadBootstrapLogs(logFilePath)
	if err != nil {
		logger.Warnf("%s", err.Error())
		return nil
	}
	return logs
}

// BootstrapInfo contains info extracted from a bootstrap log line.
type BootstrapInfo struct {
	// Progress is the progress (between 0 and 100)
	Progress int64

	// Tag is the machine readable description of the bootstrap state.
	Tag string

	// Summary is the human readable summary.
	Summary string
}

// ParseBootstrapLogLine takes in input a bootstrap log line and returns
// in output the components of such a log line.
func ParseBootstrapLogLine(logLine string) (*BootstrapInfo, error) {
	values := torBootstrapRegexp.FindStringSubmatch(logLine)
	if len(values) != 4 {
		return nil, ErrCannotFindSubmatches
	}
	progress, _ := strconv.ParseInt(values[1], 10, 64)
	bi := &BootstrapInfo{
		Progress: progress,
		Tag:      values[2],
		Summary:  values[3],
	}
	return bi, nil
}