ooni-probe-cli/internal/engine/experiment/urlgetter/multi.go

package urlgetter

import (
	"context"
	"fmt"
	"time"

	"github.com/ooni/probe-cli/v3/internal/model"
)

// MultiInput is the input for Multi.Run().
type MultiInput struct {
	// Config contains the configuration for this target.
	Config Config

	// Target contains the target URL to measure.
	Target string
}

// MultiOutput is the output returned by Multi.Run()
type MultiOutput struct {
	// Input is the input for which we measured.
	Input MultiInput

	// Err contains the measurement error.
	Err error

	// TestKeys contains the measured test keys.
	TestKeys TestKeys
}

// MultiGetter allows to override the behaviour of Multi for testing purposes.
type MultiGetter func(ctx context.Context, g Getter) (TestKeys, error)

// DefaultMultiGetter is the default MultiGetter
func DefaultMultiGetter(ctx context.Context, g Getter) (TestKeys, error) {
	return g.Get(ctx)
}

// Multi allows to run several urlgetters in paraller.
type Multi struct {
	// Begin is the time when the experiment begun. If you do not
	// set this field, every target is measured independently.
	Begin time.Time

	// Getter is the Getter func to be used. If this is nil we use
	// the default getter, which is what you typically want.
	Getter MultiGetter

	// Parallelism is the optional parallelism to be used. If this is
	// zero, or negative, we use a reasonable default.
	Parallelism int

	// Session is the session to be used. If this is nil, the Run
	// method will panic with a nil pointer error.
	Session model.ExperimentSession
}

// Run performs several urlgetters in parallel. This function returns a channel
// where each result is posted. This function will always perform all the requested
// measurements: if the ctx is canceled or its deadline expires, then you will see
// a bunch of failed measurements. Since all measurements are always performed,
// you know you're done when you've read len(inputs) results in output.
func (m Multi) Run(ctx context.Context, inputs []MultiInput) <-chan MultiOutput {
	parallelism := m.Parallelism
	if parallelism <= 0 {
		const defaultParallelism = 3
		parallelism = defaultParallelism
	}
	inputch := make(chan MultiInput)
	outputch := make(chan MultiOutput)
	go m.source(inputs, inputch)
	for i := 0; i < parallelism; i++ {
		go m.do(ctx, inputch, outputch)
	}
	return outputch
}

// Collect prints on the output channel the result of running urlgetter
// on every provided input. It closes the output channel when done.
func (m Multi) Collect(ctx context.Context, inputs []MultiInput,
	prefix string, callbacks model.ExperimentCallbacks) <-chan MultiOutput {
	return m.CollectOverall(ctx, inputs, 0, len(inputs), prefix, callbacks)
}

// CollectOverall prints on the output channel the result of running urlgetter
// on every provided input. You can use this method if you perform multiple collection
// tasks within one experiment as it allows to calculate the overall progress correctly
func (m Multi) CollectOverall(ctx context.Context, inputChunk []MultiInput, overallStartIndex int, overallCount int,
	prefix string, callbacks model.ExperimentCallbacks) <-chan MultiOutput {
	outputch := make(chan MultiOutput)
	go m.collect(len(inputChunk), overallStartIndex, overallCount, prefix, callbacks, m.Run(ctx, inputChunk), outputch)
	return outputch
}

// collect drains inputch, prints progress, and emits to outputch. When done, this
// function will close outputch to notify the calller.
func (m Multi) collect(expect int, overallStartIndex int, overallCount int, prefix string, callbacks model.ExperimentCallbacks,
	inputch <-chan MultiOutput, outputch chan<- MultiOutput) {
	count := overallStartIndex
	var index int
	defer close(outputch)
	for index < expect {
		entry := <-inputch
		index++
		count++
		percentage := float64(count) / float64(overallCount)
		callbacks.OnProgress(percentage, fmt.Sprintf(
			"%s: measure %s: %+v", prefix, entry.Input.Target, model.ErrorToStringOrOK(entry.Err),
		))
		outputch <- entry
	}
}

// source posts all the inputs in the inputch. When done, this
// method will close the input channel to notify the reader.
func (m Multi) source(inputs []MultiInput, inputch chan<- MultiInput) {
	defer close(inputch)
	for _, input := range inputs {
		inputch <- input
	}
}

// do performs urlgetter on all the inputs read from the in channel and
// writes the results on the out channel. If the context is canceled, or
// its deadline expires, this function will continue performing all the
// required measurements, which will all fail.
func (m Multi) do(ctx context.Context, in <-chan MultiInput, out chan<- MultiOutput) {
	for input := range in {
		g := Getter{
			Begin:   m.Begin,
			Config:  input.Config,
			Session: m.Session,
			Target:  input.Target,
		}
		fn := m.Getter
		if fn == nil {
			fn = DefaultMultiGetter
		}
		tk, err := fn(ctx, g)
		out <- MultiOutput{Input: input, Err: err, TestKeys: tk}
	}
}
chore: merge probe-engine into probe-cli (#201) This is how I did it: 1. `git clone https://github.com/ooni/probe-engine internal/engine` 2. ``` (cd internal/engine && git describe --tags) v0.23.0 ``` 3. `nvim go.mod` (merging `go.mod` with `internal/engine/go.mod` 4. `rm -rf internal/.git internal/engine/go.{mod,sum}` 5. `git add internal/engine` 6. `find . -type f -name \*.go -exec sed -i 's@/ooni/probe-engine@/ooni/probe-cli/v3/internal/engine@g' {} \;` 7. `go build ./...` (passes) 8. `go test -race ./...` (temporary failure on RiseupVPN) 9. `go mod tidy` 10. this commit message Once this piece of work is done, we can build a new version of `ooniprobe` that is using `internal/engine` directly. We need to do more work to ensure all the other functionality in `probe-engine` (e.g. making mobile packages) are still WAI. Part of https://github.com/ooni/probe/issues/1335 2021-02-02 12:05:47 +01:00			`package urlgetter`

			`import (`
			`"context"`
			`"fmt"`
			`"time"`

refactor: interfaces and data types into the model package (#642) ## Checklist - [x] I have read the [contribution guidelines](https://github.com/ooni/probe-cli/blob/master/CONTRIBUTING.md) - [x] reference issue for this pull request: https://github.com/ooni/probe/issues/1885 - [x] related ooni/spec pull request: N/A Location of the issue tracker: https://github.com/ooni/probe ## Description This PR contains a set of changes to move important interfaces and data types into the `./internal/model` package. The criteria for including an interface or data type in here is roughly that the type should be important and used by several packages. We are especially interested to move more interfaces here to increase modularity. An additional side effect is that, by reading this package, one should be able to understand more quickly how different parts of the codebase interact with each other. This is what I want to move in `internal/model`: - [x] most important interfaces from `internal/netxlite` - [x] everything that was previously part of `internal/engine/model` - [x] mocks from `internal/netxlite/mocks` should also be moved in here as a subpackage 2022-01-03 13:53:23 +01:00			`"github.com/ooni/probe-cli/v3/internal/model"`
chore: merge probe-engine into probe-cli (#201) This is how I did it: 1. `git clone https://github.com/ooni/probe-engine internal/engine` 2. ``` (cd internal/engine && git describe --tags) v0.23.0 ``` 3. `nvim go.mod` (merging `go.mod` with `internal/engine/go.mod` 4. `rm -rf internal/.git internal/engine/go.{mod,sum}` 5. `git add internal/engine` 6. `find . -type f -name \*.go -exec sed -i 's@/ooni/probe-engine@/ooni/probe-cli/v3/internal/engine@g' {} \;` 7. `go build ./...` (passes) 8. `go test -race ./...` (temporary failure on RiseupVPN) 9. `go mod tidy` 10. this commit message Once this piece of work is done, we can build a new version of `ooniprobe` that is using `internal/engine` directly. We need to do more work to ensure all the other functionality in `probe-engine` (e.g. making mobile packages) are still WAI. Part of https://github.com/ooni/probe/issues/1335 2021-02-02 12:05:47 +01:00			`)`

			`// MultiInput is the input for Multi.Run().`
			`type MultiInput struct {`
			`// Config contains the configuration for this target.`
			`Config Config`

			`// Target contains the target URL to measure.`
			`Target string`
			`}`

			`// MultiOutput is the output returned by Multi.Run()`
			`type MultiOutput struct {`
			`// Input is the input for which we measured.`
			`Input MultiInput`

			`// Err contains the measurement error.`
			`Err error`

			`// TestKeys contains the measured test keys.`
			`TestKeys TestKeys`
			`}`

			`// MultiGetter allows to override the behaviour of Multi for testing purposes.`
			`type MultiGetter func(ctx context.Context, g Getter) (TestKeys, error)`

			`// DefaultMultiGetter is the default MultiGetter`
			`func DefaultMultiGetter(ctx context.Context, g Getter) (TestKeys, error) {`
			`return g.Get(ctx)`
			`}`

			`// Multi allows to run several urlgetters in paraller.`
			`type Multi struct {`
			`// Begin is the time when the experiment begun. If you do not`
			`// set this field, every target is measured independently.`
			`Begin time.Time`

			`// Getter is the Getter func to be used. If this is nil we use`
			`// the default getter, which is what you typically want.`
			`Getter MultiGetter`

			`// Parallelism is the optional parallelism to be used. If this is`
			`// zero, or negative, we use a reasonable default.`
			`Parallelism int`

			`// Session is the session to be used. If this is nil, the Run`
			`// method will panic with a nil pointer error.`
			`Session model.ExperimentSession`
			`}`

			`// Run performs several urlgetters in parallel. This function returns a channel`
			`// where each result is posted. This function will always perform all the requested`
			`// measurements: if the ctx is canceled or its deadline expires, then you will see`
			`// a bunch of failed measurements. Since all measurements are always performed,`
			`// you know you're done when you've read len(inputs) results in output.`
			`func (m Multi) Run(ctx context.Context, inputs []MultiInput) <-chan MultiOutput {`
			`parallelism := m.Parallelism`
			`if parallelism <= 0 {`
			`const defaultParallelism = 3`
			`parallelism = defaultParallelism`
			`}`
			`inputch := make(chan MultiInput)`
			`outputch := make(chan MultiOutput)`
			`go m.source(inputs, inputch)`
			`for i := 0; i < parallelism; i++ {`
			`go m.do(ctx, inputch, outputch)`
			`}`
			`return outputch`
			`}`

			`// Collect prints on the output channel the result of running urlgetter`
			`// on every provided input. It closes the output channel when done.`
			`func (m Multi) Collect(ctx context.Context, inputs []MultiInput,`
			`prefix string, callbacks model.ExperimentCallbacks) <-chan MultiOutput {`
			`return m.CollectOverall(ctx, inputs, 0, len(inputs), prefix, callbacks)`
			`}`

			`// CollectOverall prints on the output channel the result of running urlgetter`
			`// on every provided input. You can use this method if you perform multiple collection`
			`// tasks within one experiment as it allows to calculate the overall progress correctly`
			`func (m Multi) CollectOverall(ctx context.Context, inputChunk []MultiInput, overallStartIndex int, overallCount int,`
			`prefix string, callbacks model.ExperimentCallbacks) <-chan MultiOutput {`
			`outputch := make(chan MultiOutput)`
			`go m.collect(len(inputChunk), overallStartIndex, overallCount, prefix, callbacks, m.Run(ctx, inputChunk), outputch)`
			`return outputch`
			`}`

			`// collect drains inputch, prints progress, and emits to outputch. When done, this`
			`// function will close outputch to notify the calller.`
			`func (m Multi) collect(expect int, overallStartIndex int, overallCount int, prefix string, callbacks model.ExperimentCallbacks,`
			`inputch <-chan MultiOutput, outputch chan<- MultiOutput) {`
			`count := overallStartIndex`
			`var index int`
			`defer close(outputch)`
			`for index < expect {`
			`entry := <-inputch`
			`index++`
			`count++`
			`percentage := float64(count) / float64(overallCount)`
			`callbacks.OnProgress(percentage, fmt.Sprintf(`
cleanup: use ErrorToStringOrOK func in other tests that returns nil (#701) Reference issue: https://github.com/ooni/probe/issues/2040 2022-03-08 11:59:44 +01:00			`"%s: measure %s: %+v", prefix, entry.Input.Target, model.ErrorToStringOrOK(entry.Err),`
chore: merge probe-engine into probe-cli (#201) This is how I did it: 1. `git clone https://github.com/ooni/probe-engine internal/engine` 2. ``` (cd internal/engine && git describe --tags) v0.23.0 ``` 3. `nvim go.mod` (merging `go.mod` with `internal/engine/go.mod` 4. `rm -rf internal/.git internal/engine/go.{mod,sum}` 5. `git add internal/engine` 6. `find . -type f -name \*.go -exec sed -i 's@/ooni/probe-engine@/ooni/probe-cli/v3/internal/engine@g' {} \;` 7. `go build ./...` (passes) 8. `go test -race ./...` (temporary failure on RiseupVPN) 9. `go mod tidy` 10. this commit message Once this piece of work is done, we can build a new version of `ooniprobe` that is using `internal/engine` directly. We need to do more work to ensure all the other functionality in `probe-engine` (e.g. making mobile packages) are still WAI. Part of https://github.com/ooni/probe/issues/1335 2021-02-02 12:05:47 +01:00			`))`
			`outputch <- entry`
			`}`
			`}`

			`// source posts all the inputs in the inputch. When done, this`
			`// method will close the input channel to notify the reader.`
			`func (m Multi) source(inputs []MultiInput, inputch chan<- MultiInput) {`
			`defer close(inputch)`
			`for _, input := range inputs {`
			`inputch <- input`
			`}`
			`}`

			`// do performs urlgetter on all the inputs read from the in channel and`
			`// writes the results on the out channel. If the context is canceled, or`
			`// its deadline expires, this function will continue performing all the`
			`// required measurements, which will all fail.`
			`func (m Multi) do(ctx context.Context, in <-chan MultiInput, out chan<- MultiOutput) {`
			`for input := range in {`
			`g := Getter{`
			`Begin: m.Begin,`
			`Config: input.Config,`
			`Session: m.Session,`
			`Target: input.Target,`
			`}`
			`fn := m.Getter`
			`if fn == nil {`
			`fn = DefaultMultiGetter`
			`}`
			`tk, err := fn(ctx, g)`
			`out <- MultiOutput{Input: input, Err: err, TestKeys: tk}`
			`}`
			`}`