package engine

import (
	"bufio"
	"context"
	"errors"
	"fmt"
	"io/fs"

	"github.com/apex/log"
	"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
	"github.com/ooni/probe-cli/v3/internal/engine/model"
)

// These errors are returned by the InputLoader.
var (
	ErrNoURLsReturned    = errors.New("no URLs returned")
	ErrDetectedEmptyFile = errors.New("file did not contain any input")
	ErrInputRequired     = errors.New("no input provided")
	ErrNoInputExpected   = errors.New("we did not expect any input")
)

// InputLoaderSession is the session according to an InputLoader. We
// introduce this abstraction because it helps us with testing.
type InputLoaderSession interface {
	CheckIn(ctx context.Context,
		config *model.CheckInConfig) (*model.CheckInInfo, error)
}

// InputLoaderLogger is the logger according to an InputLoader.
type InputLoaderLogger interface {
	// Warnf formats and emits a warning message.
	Warnf(format string, v ...interface{})
}

// InputLoader loads input according to the specified policy
// either from command line and input files or from OONI services. The
// behaviour depends on the input policy as described below.
//
// You MUST NOT change any public field of this structure when
// in use, because that MAY lead to data races.
//
// InputNone
//
// We fail if there is any StaticInput or any SourceFiles. If
// there's no input, we return a single, empty entry that causes
// experiments that don't require input to run once.
//
// InputOptional
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise we return a single, empty entry
// that causes experiments that don't require input to run once.
//
// InputOrQueryBackend
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we use OONI's probe services
// to gather input using the best API for the task.
//
// InputStrictlyRequired
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we return an error.
type InputLoader struct {
	// CheckInConfig contains options for the CheckIn API. If
	// not set, then we'll create a default config. If set but
	// there are fields inside it that are not set, then we
	// will set them to a default value.
	CheckInConfig *model.CheckInConfig

	// InputPolicy specifies the input policy for the
	// current experiment. We will not load any input if
	// the policy says we should not. You MUST fill in
	// this field.
	InputPolicy InputPolicy

	// Logger is the optional logger that the InputLoader
	// should be using. If not set, we will use the default
	// logger of github.com/apex/log.
	Logger InputLoaderLogger

	// Session is the current measurement session. You
	// MUST fill in this field.
	Session InputLoaderSession

	// StaticInputs contains optional input to be added
	// to the resulting input list if possible.
	StaticInputs []string

	// SourceFiles contains optional files to read input
	// from. Each file should contain a single input string
	// per line. We will fail if any file is unreadable
	// as well as if any file is empty.
	SourceFiles []string
}

// Load attempts to load input using the specified input loader. We will
// return a list of URLs because this is the only input we support.
func (il *InputLoader) Load(ctx context.Context) ([]model.URLInfo, error) {
	switch il.InputPolicy {
	case InputOptional:
		return il.loadOptional()
	case InputOrQueryBackend:
		return il.loadOrQueryBackend(ctx)
	case InputStrictlyRequired:
		return il.loadStrictlyRequired(ctx)
	default:
		return il.loadNone()
	}
}

// loadNone implements the InputNone policy.
func (il *InputLoader) loadNone() ([]model.URLInfo, error) {
	if len(il.StaticInputs) > 0 || len(il.SourceFiles) > 0 {
		return nil, ErrNoInputExpected
	}
	// Note that we need to return a single empty entry.
	return []model.URLInfo{{}}, nil
}

// loadOptional implements the InputOptional policy.
func (il *InputLoader) loadOptional() ([]model.URLInfo, error) {
	inputs, err := il.loadLocal()
	if err == nil && len(inputs) <= 0 {
		// Note that we need to return a single empty entry.
		inputs = []model.URLInfo{{}}
	}
	return inputs, err
}

// loadStrictlyRequired implements the InputStrictlyRequired policy.
func (il *InputLoader) loadStrictlyRequired(ctx context.Context) ([]model.URLInfo, error) {
	inputs, err := il.loadLocal()
	if err != nil || len(inputs) > 0 {
		return inputs, err
	}
	return nil, ErrInputRequired
}

// loadOrQueryBackend implements the InputOrQueryBackend policy.
func (il *InputLoader) loadOrQueryBackend(ctx context.Context) ([]model.URLInfo, error) {
	inputs, err := il.loadLocal()
	if err != nil || len(inputs) > 0 {
		return inputs, err
	}
	return il.loadRemote(ctx)
}

// loadLocal loads inputs from StaticInputs and SourceFiles.
func (il *InputLoader) loadLocal() ([]model.URLInfo, error) {
	inputs := []model.URLInfo{}
	for _, input := range il.StaticInputs {
		inputs = append(inputs, model.URLInfo{URL: input})
	}
	for _, filepath := range il.SourceFiles {
		extra, err := il.readfile(filepath, fsx.Open)
		if err != nil {
			return nil, err
		}
		// See https://github.com/ooni/probe-engine/issues/1123.
		if len(extra) <= 0 {
			return nil, fmt.Errorf("%w: %s", ErrDetectedEmptyFile, filepath)
		}
		inputs = append(inputs, extra...)
	}
	return inputs, nil
}

// inputLoaderOpenFn is the type of the function to open a file.
type inputLoaderOpenFn func(filepath string) (fs.File, error)

// readfile reads inputs from the specified file. The open argument should be
// compatible with stdlib's fs.Open and helps us with unit testing.
func (il *InputLoader) readfile(filepath string, open inputLoaderOpenFn) ([]model.URLInfo, error) {
	inputs := []model.URLInfo{}
	filep, err := open(filepath)
	if err != nil {
		return nil, err
	}
	defer filep.Close()
	// Implementation note: when you save file with vim, you have newline at
	// end of file and you don't want to consider that an input line. While there
	// ignore any other empty line that may occur inside the file.
	scanner := bufio.NewScanner(filep)
	for scanner.Scan() {
		line := scanner.Text()
		if line != "" {
			inputs = append(inputs, model.URLInfo{URL: line})
		}
	}
	if scanner.Err() != nil {
		return nil, scanner.Err()
	}
	return inputs, nil
}

// loadRemote loads inputs from a remote source.
func (il *InputLoader) loadRemote(ctx context.Context) ([]model.URLInfo, error) {
	config := il.CheckInConfig
	if config == nil {
		// Note: Session.CheckIn documentation says it will fill in
		// any field with a required value with a reasonable default
		// if such value is missing. So, here we just need to be
		// concerned about NOT passing it a NULL pointer.
		config = &model.CheckInConfig{}
	}
	reply, err := il.checkIn(ctx, config)
	if err != nil {
		return nil, err
	}
	if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
		return nil, ErrNoURLsReturned
	}
	return reply.WebConnectivity.URLs, nil
}

// checkIn executes the check-in and filters the returned URLs to exclude
// the URLs that are not part of the requested categories. This is done for
// robustness, just in case we or the API do something wrong.
func (il *InputLoader) checkIn(
	ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
	reply, err := il.Session.CheckIn(ctx, config)
	if err != nil {
		return nil, err
	}
	// Note: safe to assume that reply is not nil if err is nil
	if reply.WebConnectivity != nil && len(reply.WebConnectivity.URLs) > 0 {
		reply.WebConnectivity.URLs = il.preventMistakes(
			reply.WebConnectivity.URLs, config.WebConnectivity.CategoryCodes,
		)
	}
	return reply, nil
}

// preventMistakes makes the code more robust with respect to any possible
// integration issue where the backend returns to us URLs that don't
// belong to the category codes we requested.
func (il *InputLoader) preventMistakes(input []model.URLInfo, categories []string) (output []model.URLInfo) {
	if len(categories) <= 0 {
		return input
	}
	for _, entry := range input {
		var found bool
		for _, cat := range categories {
			if entry.CategoryCode == cat {
				found = true
				break
			}
		}
		if !found {
			il.logger().Warnf("URL %+v not in %+v; skipping", entry, categories)
			continue
		}
		output = append(output, entry)
	}
	return
}

// logger returns the configured logger or apex/log's default.
func (il *InputLoader) logger() InputLoaderLogger {
	if il.Logger != nil {
		return il.Logger
	}
	return log.Log
}