2018-09-05 18:40:37 +02:00
package database
import (
2018-09-07 12:55:27 +02:00
"database/sql"
2018-09-10 12:41:28 +02:00
"encoding/json"
2020-01-28 11:53:00 +01:00
"fmt"
2021-01-11 19:59:20 +01:00
"net/http"
2021-01-22 12:14:19 +01:00
"net/url"
2018-09-17 17:30:29 +02:00
"os"
2020-01-28 11:53:00 +01:00
"path/filepath"
2018-09-10 16:29:14 +02:00
"reflect"
2018-09-05 18:40:37 +02:00
"time"
"github.com/apex/log"
2022-11-15 10:35:30 +01:00
"github.com/ooni/probe-cli/v3/internal/engine"
2018-09-05 18:40:37 +02:00
"github.com/pkg/errors"
2022-05-06 13:05:24 +02:00
"github.com/upper/db/v4"
2018-09-05 18:40:37 +02:00
)
2022-11-16 15:51:41 +01:00
// Open returns a new database instance
func Open ( dbpath string ) ( * Database , error ) {
db , err := Connect ( dbpath )
if err != nil {
return nil , err
}
return & Database {
sess : db ,
} , nil
}
// Database is a database instance to store measurements
type Database struct {
sess db . Session
}
// Session returns the database session
func ( d * Database ) Session ( ) db . Session {
return d . sess
}
2018-09-05 18:40:37 +02:00
// ListMeasurements given a result ID
2022-11-16 15:51:41 +01:00
func ( d * Database ) ListMeasurements ( resultID int64 ) ( [ ] MeasurementURLNetwork , error ) {
2018-09-07 15:16:20 +02:00
measurements := [ ] MeasurementURLNetwork { }
2022-11-16 15:51:41 +01:00
req := d . sess . SQL ( ) . Select (
2018-09-07 15:16:20 +02:00
db . Raw ( "networks.*" ) ,
db . Raw ( "urls.*" ) ,
db . Raw ( "measurements.*" ) ,
2018-09-12 15:41:54 +02:00
db . Raw ( "results.*" ) ,
2018-09-07 15:16:20 +02:00
) . From ( "results" ) .
2018-09-17 17:30:38 +02:00
Join ( "measurements" ) . On ( "results.result_id = measurements.result_id" ) .
Join ( "networks" ) . On ( "results.network_id = networks.network_id" ) .
LeftJoin ( "urls" ) . On ( "urls.url_id = measurements.url_id" ) .
OrderBy ( "measurements.measurement_start_time" ) .
Where ( "results.result_id = ?" , resultID )
2018-09-07 15:16:20 +02:00
if err := req . All ( & measurements ) ; err != nil {
log . Errorf ( "failed to run query %s: %v" , req . String ( ) , err )
return measurements , err
}
2018-09-05 18:40:37 +02:00
return measurements , nil
}
2019-10-03 11:18:07 +02:00
// GetMeasurementJSON returns a map[string]interface{} given a database and a measurementID
2022-11-16 15:51:41 +01:00
func ( d * Database ) GetMeasurementJSON ( measurementID int64 ) ( map [ string ] interface { } , error ) {
2019-10-02 18:23:14 +02:00
var (
measurement MeasurementURLNetwork
2019-10-03 11:18:07 +02:00
msmtJSON map [ string ] interface { }
2019-10-02 18:23:14 +02:00
)
2022-11-16 15:51:41 +01:00
req := d . sess . SQL ( ) . Select (
2019-10-02 18:23:14 +02:00
db . Raw ( "urls.*" ) ,
db . Raw ( "measurements.*" ) ,
) . From ( "measurements" ) .
LeftJoin ( "urls" ) . On ( "urls.url_id = measurements.url_id" ) .
Where ( "measurements.measurement_id= ?" , measurementID )
if err := req . One ( & measurement ) ; err != nil {
log . Errorf ( "failed to run query %s: %v" , req . String ( ) , err )
return nil , err
}
2021-04-30 17:08:16 +02:00
if measurement . Measurement . IsUploaded {
2021-01-11 19:59:20 +01:00
// TODO(bassosimone): this should be a function exposed by probe-engine
reportID := measurement . Measurement . ReportID . String
2021-01-22 12:14:19 +01:00
measurementURL := & url . URL {
Scheme : "https" ,
Host : "api.ooni.io" ,
Path : "/api/v1/raw_measurement" ,
}
query := url . Values { }
query . Add ( "report_id" , reportID )
2022-11-15 10:35:30 +01:00
if measurement . URL . URL . Valid {
2021-01-22 12:14:19 +01:00
query . Add ( "input" , measurement . URL . URL . String )
2021-01-11 19:59:20 +01:00
}
2021-01-22 12:14:19 +01:00
measurementURL . RawQuery = query . Encode ( )
log . Debugf ( "using %s" , measurementURL . String ( ) )
resp , err := http . Get ( measurementURL . String ( ) )
2021-01-11 19:59:20 +01:00
if err != nil {
log . Errorf ( "failed to fetch the measurement %s %s" , reportID , measurement . URL . URL . String )
return nil , err
}
defer resp . Body . Close ( )
if err := json . NewDecoder ( resp . Body ) . Decode ( & msmtJSON ) ; err != nil {
log . Error ( "failed to unmarshal the measurement_json" )
return nil , err
}
return msmtJSON , nil
}
2020-01-29 18:48:12 +01:00
// MeasurementFilePath might be NULL because the measurement from a
// 3.0.0-beta install
2022-11-15 10:35:30 +01:00
if ! measurement . Measurement . MeasurementFilePath . Valid {
2020-01-29 10:43:08 +01:00
log . Error ( "invalid measurement_file_path" )
log . Error ( "backup your OONI_HOME and run `ooniprobe reset`" )
return nil , errors . New ( "cannot access measurement file" )
}
2020-01-28 11:53:00 +01:00
measurementFilePath := measurement . Measurement . MeasurementFilePath . String
2021-06-15 14:01:45 +02:00
b , err := os . ReadFile ( measurementFilePath )
2019-10-02 18:23:14 +02:00
if err != nil {
return nil , err
}
2020-01-28 11:53:00 +01:00
if err := json . Unmarshal ( b , & msmtJSON ) ; err != nil {
2020-02-05 18:28:10 +01:00
log . Error ( "failed to unmarshal the measurement_json" )
log . Error ( "backup your OONI_HOME and run `ooniprobe reset`" )
2020-01-28 11:53:00 +01:00
return nil , err
2019-10-02 18:23:14 +02:00
}
2020-01-28 11:53:00 +01:00
return msmtJSON , nil
2019-10-02 18:23:14 +02:00
}
2018-09-05 18:40:37 +02:00
// ListResults return the list of results
2022-11-16 15:51:41 +01:00
func ( d * Database ) ListResults ( ) ( [ ] ResultNetwork , [ ] ResultNetwork , error ) {
2018-09-07 15:16:20 +02:00
doneResults := [ ] ResultNetwork { }
incompleteResults := [ ] ResultNetwork { }
2022-11-16 15:51:41 +01:00
req := d . sess . SQL ( ) . Select (
2022-01-14 11:24:43 +01:00
db . Raw ( "networks.network_name" ) ,
db . Raw ( "networks.network_type" ) ,
db . Raw ( "networks.ip" ) ,
db . Raw ( "networks.asn" ) ,
db . Raw ( "networks.network_country_code" ) ,
db . Raw ( "results.result_id" ) ,
db . Raw ( "results.test_group_name" ) ,
db . Raw ( "results.result_start_time" ) ,
db . Raw ( "results.network_id" ) ,
db . Raw ( "results.result_is_viewed" ) ,
db . Raw ( "results.result_runtime" ) ,
db . Raw ( "results.result_is_done" ) ,
db . Raw ( "results.result_is_uploaded" ) ,
db . Raw ( "results.result_data_usage_up" ) ,
db . Raw ( "results.result_data_usage_down" ) ,
db . Raw ( "results.measurement_dir" ) ,
db . Raw ( "COUNT(CASE WHEN measurements.is_anomaly = TRUE THEN 1 END) as anomaly_count" ) ,
db . Raw ( "COUNT() as total_count" ) ,
// The test_keys column are concanetated with the "|" character as a separator.
// We consider this to be safe since we only really care about values of the
// performance test_keys where the values are all numbers and none of the keys
// contain the "|" character.
db . Raw ( "group_concat(test_keys, '|') as test_keys" ) ,
2018-09-07 15:16:20 +02:00
) . From ( "results" ) .
2018-09-17 17:30:29 +02:00
Join ( "networks" ) . On ( "results.network_id = networks.network_id" ) .
2022-01-14 11:24:43 +01:00
Join ( "measurements" ) . On ( "measurements.result_id = results.result_id" ) .
OrderBy ( "results.result_start_time" ) .
GroupBy (
db . Raw ( "networks.network_name" ) ,
db . Raw ( "networks.network_type" ) ,
db . Raw ( "networks.ip" ) ,
db . Raw ( "networks.asn" ) ,
db . Raw ( "networks.network_country_code" ) ,
db . Raw ( "results.result_id" ) ,
db . Raw ( "results.test_group_name" ) ,
db . Raw ( "results.result_start_time" ) ,
db . Raw ( "results.network_id" ) ,
db . Raw ( "results.result_is_viewed" ) ,
db . Raw ( "results.result_runtime" ) ,
db . Raw ( "results.result_is_done" ) ,
db . Raw ( "results.result_is_uploaded" ) ,
db . Raw ( "results.result_data_usage_up" ) ,
db . Raw ( "results.result_data_usage_down" ) ,
db . Raw ( "results.measurement_dir" ) ,
)
2018-09-17 17:30:29 +02:00
if err := req . Where ( "result_is_done = true" ) . All ( & doneResults ) ; err != nil {
2018-09-07 15:16:20 +02:00
return doneResults , incompleteResults , errors . Wrap ( err , "failed to get result done list" )
}
2018-09-17 17:30:29 +02:00
if err := req . Where ( "result_is_done = false" ) . All ( & incompleteResults ) ; err != nil {
2018-09-07 15:16:20 +02:00
return doneResults , incompleteResults , errors . Wrap ( err , "failed to get result done list" )
}
2018-09-05 18:40:37 +02:00
return doneResults , incompleteResults , nil
}
2018-09-17 17:30:29 +02:00
// DeleteResult will delete a particular result and the relative measurement on
// disk.
2022-11-16 15:51:41 +01:00
func ( d * Database ) DeleteResult ( resultID int64 ) error {
2018-09-17 17:30:29 +02:00
var result Result
2022-11-16 15:51:41 +01:00
res := d . sess . Collection ( "results" ) . Find ( "result_id" , resultID )
2018-09-17 17:30:29 +02:00
if err := res . One ( & result ) ; err != nil {
2018-09-18 09:54:27 +02:00
if err == db . ErrNoMoreRows {
return err
}
2018-09-17 17:30:29 +02:00
log . WithError ( err ) . Error ( "error in obtaining the result" )
return err
}
if err := res . Delete ( ) ; err != nil {
log . WithError ( err ) . Error ( "failed to delete the result directory" )
return err
}
os . RemoveAll ( result . MeasurementDir )
return nil
}
2021-04-30 17:08:16 +02:00
// UpdateUploadedStatus will check if all the measurements inside of a given result set have been uploaded and if so will set the is_uploaded flag to true
2022-11-16 15:51:41 +01:00
func ( d * Database ) UpdateUploadedStatus ( result * Result ) error {
err := d . sess . Tx ( func ( tx db . Session ) error {
2022-05-06 13:05:24 +02:00
uploadedTotal := UploadedTotalCount { }
req := tx . SQL ( ) . Select (
db . Raw ( "SUM(measurements.measurement_is_uploaded)" ) ,
db . Raw ( "COUNT(*)" ) ,
) . From ( "results" ) .
Join ( "measurements" ) . On ( "measurements.result_id = results.result_id" ) .
Where ( "results.result_id = ?" , result . ID )
2021-04-30 17:08:16 +02:00
2022-05-06 13:05:24 +02:00
err := req . One ( & uploadedTotal )
if err != nil {
log . WithError ( err ) . Error ( "failed to retrieve total vs uploaded counts" )
return err
}
if uploadedTotal . UploadedCount == uploadedTotal . TotalCount {
result . IsUploaded = true
} else {
result . IsUploaded = false
}
err = tx . Collection ( "results" ) . Find ( "result_id" , result . ID ) . Update ( result )
if err != nil {
log . WithError ( err ) . Error ( "failed to update result" )
return errors . Wrap ( err , "updating result" )
}
return nil
} )
2021-04-30 17:08:16 +02:00
if err != nil {
log . WithError ( err ) . Error ( "Failed to write to the results table" )
return err
}
return nil
}
2018-09-05 18:40:37 +02:00
// CreateMeasurement writes the measurement to the database a returns a pointer
// to the Measurement
2022-11-16 15:51:41 +01:00
func ( d * Database ) CreateMeasurement ( reportID sql . NullString , testName string , measurementDir string , idx int , resultID int64 , urlID sql . NullInt64 ) ( * Measurement , error ) {
2020-02-05 18:26:16 +01:00
// TODO we should look into generating this file path in a more robust way.
// If there are two identical test_names in the same test group there is
// going to be a clash of test_name
msmtFilePath := filepath . Join ( measurementDir , fmt . Sprintf ( "msmt-%s-%d.json" , testName , idx ) )
2018-09-07 12:55:27 +02:00
msmt := Measurement {
2020-01-28 11:53:00 +01:00
ReportID : reportID ,
TestName : testName ,
ResultID : resultID ,
MeasurementFilePath : sql . NullString { String : msmtFilePath , Valid : true } ,
URLID : urlID ,
IsFailed : false ,
IsDone : false ,
2018-09-07 12:55:27 +02:00
// XXX Do we want to have this be part of something else?
StartTime : time . Now ( ) . UTC ( ) ,
TestKeys : "" ,
}
2018-09-05 18:40:37 +02:00
2022-11-16 15:51:41 +01:00
newID , err := d . sess . Collection ( "measurements" ) . Insert ( msmt )
2018-09-05 18:40:37 +02:00
if err != nil {
return nil , errors . Wrap ( err , "creating measurement" )
}
2022-05-06 13:05:24 +02:00
msmt . ID = newID . ID ( ) . ( int64 )
2018-09-07 12:55:27 +02:00
return & msmt , nil
2018-09-05 18:40:37 +02:00
}
// CreateResult writes the Result to the database a returns a pointer
// to the Result
2022-11-16 15:51:41 +01:00
func ( d * Database ) CreateResult ( homePath string , testGroupName string , networkID int64 ) ( * Result , error ) {
2018-09-07 12:55:27 +02:00
startTime := time . Now ( ) . UTC ( )
2018-09-05 18:40:37 +02:00
2022-11-15 10:35:30 +01:00
p , err := makeResultsDir ( homePath , testGroupName , startTime )
2018-09-05 18:40:37 +02:00
if err != nil {
return nil , err
}
2018-09-07 12:55:27 +02:00
result := Result {
TestGroupName : testGroupName ,
StartTime : startTime ,
NetworkID : networkID ,
}
result . MeasurementDir = p
log . Debugf ( "Creating result %v" , result )
2022-11-16 15:51:41 +01:00
newID , err := d . sess . Collection ( "results" ) . Insert ( result )
2018-09-05 18:40:37 +02:00
if err != nil {
return nil , errors . Wrap ( err , "creating result" )
}
2022-05-06 13:05:24 +02:00
result . ID = newID . ID ( ) . ( int64 )
2018-09-07 12:55:27 +02:00
return & result , nil
}
// CreateNetwork will create a new network in the network table
2022-11-16 15:51:41 +01:00
func ( d * Database ) CreateNetwork ( loc engine . LocationProvider ) ( * Network , error ) {
2018-09-07 12:55:27 +02:00
network := Network {
2019-10-03 09:43:25 +02:00
ASN : loc . ProbeASN ( ) ,
CountryCode : loc . ProbeCC ( ) ,
NetworkName : loc . ProbeNetworkName ( ) ,
2018-09-07 14:06:08 +02:00
// On desktop we consider it to always be wifi
NetworkType : "wifi" ,
2019-10-03 09:43:25 +02:00
IP : loc . ProbeIP ( ) ,
2018-09-07 12:55:27 +02:00
}
2022-11-16 15:51:41 +01:00
newID , err := d . sess . Collection ( "networks" ) . Insert ( network )
2018-09-07 12:55:27 +02:00
if err != nil {
return nil , err
}
2022-05-06 13:05:24 +02:00
network . ID = newID . ID ( ) . ( int64 )
2018-09-07 12:55:27 +02:00
return & network , nil
2018-09-05 18:40:37 +02:00
}
2018-09-07 15:23:29 +02:00
// CreateOrUpdateURL will create a new URL entry to the urls table if it doesn't
// exists, otherwise it will update the category code of the one already in
// there.
2022-11-16 15:51:41 +01:00
func ( d * Database ) CreateOrUpdateURL ( urlStr string , categoryCode string , countryCode string ) ( int64 , error ) {
2018-09-12 18:47:57 +02:00
var url URL
2018-09-07 15:23:29 +02:00
2022-11-16 15:51:41 +01:00
err := d . sess . Tx ( func ( tx db . Session ) error {
2022-05-06 13:05:24 +02:00
res := tx . Collection ( "urls" ) . Find (
db . Cond { "url" : urlStr , "url_country_code" : countryCode } ,
)
err := res . One ( & url )
2018-09-07 15:23:29 +02:00
2022-05-06 13:05:24 +02:00
if err == db . ErrNoMoreRows {
url = URL {
URL : sql . NullString { String : urlStr , Valid : true } ,
CategoryCode : sql . NullString { String : categoryCode , Valid : true } ,
CountryCode : sql . NullString { String : countryCode , Valid : true } ,
}
newID , insErr := tx . Collection ( "urls" ) . Insert ( url )
if insErr != nil {
log . Error ( "Failed to insert into the URLs table" )
return insErr
}
url . ID = sql . NullInt64 { Int64 : newID . ID ( ) . ( int64 ) , Valid : true }
} else if err != nil {
log . WithError ( err ) . Error ( "Failed to get single result" )
return err
} else {
url . CategoryCode = sql . NullString { String : categoryCode , Valid : true }
res . Update ( url )
2018-09-07 15:23:29 +02:00
}
2018-09-12 18:47:57 +02:00
2022-05-06 13:05:24 +02:00
return nil
} )
2018-09-12 18:47:57 +02:00
if err != nil {
log . WithError ( err ) . Error ( "Failed to write to the URL table" )
return 0 , err
2018-09-07 15:23:29 +02:00
}
2018-09-12 18:47:57 +02:00
return url . ID . Int64 , nil
2018-09-07 15:23:29 +02:00
}
2018-09-10 12:41:28 +02:00
// AddTestKeys writes the summary to the measurement
2022-11-16 15:51:41 +01:00
func ( d * Database ) AddTestKeys ( msmt * Measurement , tk interface { } ) error {
2018-09-10 16:29:14 +02:00
var (
isAnomaly bool
isAnomalyValid bool
)
2018-09-10 12:41:28 +02:00
tkBytes , err := json . Marshal ( tk )
if err != nil {
log . WithError ( err ) . Error ( "failed to serialize summary" )
}
2018-09-10 16:29:14 +02:00
// This is necessary so that we can extract from the the opaque testKeys just
// the IsAnomaly field of bool type.
// Maybe generics are not so bad after-all, heh golang?
isAnomalyValue := reflect . ValueOf ( tk ) . FieldByName ( "IsAnomaly" )
2022-11-15 10:35:30 +01:00
if isAnomalyValue . IsValid ( ) && isAnomalyValue . Kind ( ) == reflect . Bool {
2018-09-10 16:29:14 +02:00
isAnomaly = isAnomalyValue . Bool ( )
isAnomalyValid = true
}
2018-09-10 12:41:28 +02:00
msmt . TestKeys = string ( tkBytes )
2018-09-10 16:29:14 +02:00
msmt . IsAnomaly = sql . NullBool { Bool : isAnomaly , Valid : isAnomalyValid }
2018-09-10 12:41:28 +02:00
2022-11-16 15:51:41 +01:00
err = d . sess . Collection ( "measurements" ) . Find ( "measurement_id" , msmt . ID ) . Update ( msmt )
2018-09-10 12:41:28 +02:00
if err != nil {
2018-09-10 16:29:14 +02:00
log . WithError ( err ) . Error ( "failed to update measurement" )
2018-09-10 12:41:28 +02:00
return errors . Wrap ( err , "updating measurement" )
}
return nil
}
2022-11-16 15:51:41 +01:00
// Close closes the database session
func ( d * Database ) Close ( ) error {
return d . sess . Close ( )
}