from toolbox_runner import list_tools
import io
import tempfile
import json
import pandas as pd
import xarray as xr
import numpy as np
from pprint import pprint


tools = list_tools(as_dict=True)
pprint(tools)

{'cross-validation': cross-validation: Leave-one-out cross-validation  FROM tbr_skgstat:latest VERSION: 1.0,
 'kriging': kriging: Kriging interpolation  FROM tbr_skgstat:latest VERSION: 1.0,
 'profile': profile: Dataset Profile  FROM tbr_profile:latest VERSION: 0.2,
 'sample': sample: Sample field data  FROM tbr_skgstat:latest VERSION: 1.0,
 'simulation': simulation: Geostatistical simulation  FROM tbr_skgstat:latest VERSION: 1.0,
 'variogram': variogram: Variogram fitting  FROM tbr_skgstat:latest VERSION: 1.1}


# load some data samples
df = pd.read_csv('in/meuse.csv')
coords = df[['x', 'y']].values
vals = df.lead.values
print(df.head())

ds = xr.open_dataset('in/cmip_prec.nc')
ds

        x       y  lead
0  181072  333611   299
1  181025  333558   277
2  181165  333537   199
3  181298  333484   116
4  181307  333330   117

<xarray.Dataset>
Dimensions:  (x: 50, y: 105)
Dimensions without coordinates: x, y
Data variables:
    prec     (x, y) float32 0.4414 0.4412 0.4426 0.4426 ... 3.987 3.998 3.992

array([[0.441444, 0.441238, 0.442608, ..., 0.397872, 0.397873, 0.398873],
       [0.514816, 0.513774, 0.513158, ..., 0.471693, 0.474968, 0.479044],
       [0.528682, 0.529469, 0.530613, ..., 0.496112, 0.494906, 0.4937  ],
       ...,
       [3.370672, 3.402019, 3.426166, ..., 3.678835, 3.613726, 3.542363],
       [3.366747, 3.328666, 3.341913, ..., 3.741339, 3.678434, 3.753727],
       [3.320999, 3.254253, 3.221964, ..., 3.986949, 3.99786 , 3.992284]],
      dtype=float32)


vario = tools.get('variogram')

print(vario.title)
print(vario.description)
print('\nPARAMETERS\n---------\n')
for key, conf in vario.parameters.items():
    print(f"## {key}")
    print(f"input type:    {conf['type']}")
    print(f"description:   {conf.get('description', 'no description provided')}")
    print()

Variogram fitting
Estimate an empirical variogram and fit a model

PARAMETERS
---------

## coordinates
input type:    file
description:   Pass either a (N, D) shaped numpy array, or a .mat file containing the matrix of observation location coordinates

## values
input type:    file
description:   Pass either a (N, 1) shaped numpy array or a .mat file containing the matrix of observations

## n_lags
input type:    integer
description:   no description provided

## bin_func
input type:    enum
description:   no description provided

## model
input type:    enum
description:   no description provided

## estimator
input type:    enum
description:   no description provided

## maxlag
input type:    string
description:   Can be 'median', 'mean', a number < 1 for a ratio of maximum separating distance or a number > 1 for an absolute distance

## fit_method
input type:    enum
description:   no description provided

## use_nugget
input type:    bool
description:   Enable the nugget parameter. Defaults to False, which will set the nugget parameter to 0.

## fit_range
input type:    float
description:   Only valid if fit_method='manual'. The variogram effective range.

## fit_sill
input type:    float
description:   Only valid if fit_method='manual'. The variogram sill.

## fit_nugget
input type:    float
description:   Only valid if fit_method='manual'. The variogram nugget.

## fit_sigma
input type:    enum
description:   Use a distance dependent weight on fits to favor closer bins. Do not set for


# get the tool
vario = tools.get('variogram')

# run
step = vario.run(result_path='./test', coordinates=coords, values=vals, maxlag='median', model='exponential', bin_func='scott')

# the parameters are stored in variogram.json
vario_params = step.get('variogram.json')
pprint(vario_params)

{'bin_func': 'scott',
 'dist_func': 'euclidean',
 'estimator': 'matheron',
 'fit_method': 'trf',
 'fit_sigma': None,
 'maxlag': 1372.6660191029719,
 'model': 'exponential',
 'n_lags': 10,
 'normalize': False,
 'use_nugget': False,
 'verbose': False}


simu = tools.get('simulation')

# run a simulation
step = simu.run(result_path='./test', coordinates=coords, values=vals, variogram=vario_params, n_simulations=10, grid='50x50')

# print the log
print(step.log)

# print all outputs
print(step.outputs)

# .dat files can be loaded to numpy. Load the mean of all simulations
mean = step.get('simulation_mean.dat')

mean

Estimating variogram...
exponential Variogram
---------------------
Estimator:         matheron
Effective Range:   1372.67
Sill:              17740.72
Nugget:            0.00
        
Starting 10 iterations seeded 42

['STDERR.log', 'STDOUT.log', 'result.json', 'simulation_mean.dat', 'simulation_std.dat', 'variogram.html', 'variogram.json', 'variogram.pdf']

array([[171.82328018, 177.51347383, 164.87992787, ..., 121.29299786,
        146.47249759, 181.21038282],
       [173.66173153, 187.09362293, 169.08039522, ..., 129.47851113,
        138.09837756, 168.64932544],
       [192.41124558, 199.84261111, 184.90009063, ..., 178.66673437,
        147.14080855, 184.20576035],
       ...,
       [154.72179719, 184.44261002, 185.62482896, ...,  98.83145353,
        112.23031823, 130.52692855],
       [192.51765686, 180.46835029, 158.42274169, ...,  92.45844056,
        106.70579276, 125.39187714],
       [165.30685215, 169.39535479, 187.13380149, ..., 112.48571592,
        116.8817058 , 129.10454359]])


import plotly.offline as py
import plotly.graph_objects as go
py.init_notebook_mode(connected=True)


fig = go.Figure()

xx = np.mgrid[coords[:,0].min():coords[:,0].max():50j]
yy = np.mgrid[coords[:,1].min():coords[:,1].max():50j]

fig.add_trace(go.Heatmap(z=mean.T, x=xx, y=yy))
fig.add_trace(go.Scatter(x=coords[:,0], y=coords[:,1], mode='markers', marker=dict(color=vals)))
py.iplot(fig)


krig = tools.get('kriging')

# run kringing with the same variogram
step = krig.run(result_path='./test', coordinates=coords, values=vals, variogram=vario_params, algorithm='ordinary', grid='50x50')

# print outputs
print(f'OUTPUTS: {step.outputs}')

# get the kriging field
krig_field = step.get('kriging.dat')

OUTPUTS: ['STDERR.log', 'STDOUT.log', 'kriging.dat', 'result.json', 'sigma.dat', 'variogram.html', 'variogram.json', 'variogram.pdf']


fig = go.Figure()

xx = np.mgrid[coords[:,0].min():coords[:,0].max():50j]
yy = np.mgrid[coords[:,1].min():coords[:,1].max():50j]

fig.add_trace(go.Heatmap(z=krig_field.T, x=xx, y=yy))
fig.add_trace(go.Scatter(x=coords[:,0], y=coords[:,1], mode='markers', marker=dict(color=vals)))
py.iplot(fig)


sample = tools.get('sample')

# run the step, pass the netCDF DataArray values in. Passing netCDF directly is currently not yet supported
step = sample.run(result_path='./test', field=ds.prec.values, sample_size=200, method='random', seed=42)

sam_coords = step.get('coordinates.dat')
sam_values = step.get('values.dat')

print(sam_coords[:5], sam_values[:5])

[[ 46.  37.]
 [ 30.  52.]
 [ 36.  46.]
 [ 20.   9.]
 [ 19.   9.]
 [  5.  36.]
 [  0.  52.]
 [ 35.  67.]
 [ 16.  24.]
 [ 26.  62.]
 [ 29.  58.]
 [ 24.  95.]
 [ 18.  57.]
 [  7.  81.]
 [ 10.  78.]
 [ 43.  39.]
 [ 24.  80.]
 [ 17.  71.]
 [ 34.  81.]
 [  4.  23.]
 [ 41.  86.]
 [ 10.  84.]
 [ 40.   7.]
 [ 23.  75.]
 [ 30.   3.]
 [ 41.  16.]
 [ 20.  17.]
 [ 38.  98.]
 [ 31. 103.]
 [ 18.   9.]
 [  3.  36.]
 [ 21.  42.]
 [ 20.  19.]
 [ 32.  79.]
 [ 19.  47.]
 [ 38.  43.]
 [ 45.  68.]
 [ 33.  92.]
 [ 14.  90.]
 [ 13.  29.]
 [ 16.  33.]
 [ 21.  17.]
 [ 11.  52.]
 [  0.  89.]
 [  7.  13.]
 [ 46.   6.]
 [ 46.  71.]
 [  7.  82.]
 [ 21.  47.]
 [  3.   6.]
 [  0.  25.]
 [ 37.  44.]
 [ 29.  31.]
 [ 40.  72.]
 [ 36.  28.]
 [ 12.  69.]
 [ 38.  82.]
 [ 19.  14.]
 [ 33.  63.]
 [  4.  48.]
 [ 47.  75.]
 [ 25.  74.]
 [ 13.  48.]
 [ 24.  85.]
 [ 33.  29.]
 [  4.  11.]
 [ 23.   4.]
 [ 30.  37.]
 [ 22.  14.]
 [ 37.  65.]
 [ 15.  64.]
 [  4.  32.]
 [ 40. 103.]
 [ 38.  48.]
 [  2.  52.]
 [ 18.  70.]
 [ 33.  42.]
 [  3.  91.]
 [ 19.  20.]
 [ 18.  65.]
 [ 24.  37.]
 [ 42.  33.]
 [ 16.  45.]
 [  7.  76.]
 [ 41.  55.]
 [ 44.  34.]
 [ 14.  22.]
 [ 23.  45.]
 [  9.   9.]
 [ 48.  57.]
 [ 26.   9.]
 [ 29.  20.]
 [ 38.  58.]
 [ 20.  55.]
 [ 10.  45.]
 [ 30.  28.]
 [ 49.   9.]
 [  0.   9.]
 [ 45.  84.]
 [ 16.  87.]
 [ 25.  59.]
 [ 29.  35.]
 [ 46.  98.]
 [ 38.  72.]
 [ 41.  50.]
 [ 27.  30.]
 [ 48.  74.]
 [ 37.   0.]
 [  2.  45.]
 [ 45.  78.]
 [ 43.  16.]
 [ 25.  88.]
 [  1.  42.]
 [  6.  85.]
 [ 29.  68.]
 [ 33. 100.]
 [ 29.  43.]
 [ 36.  68.]
 [ 37.  48.]
 [ 19.  56.]
 [ 14.  53.]
 [ 27.  70.]
 [ 34.  88.]
 [ 38.  15.]
 [ 15.  94.]
 [  7.  37.]
 [ 21. 103.]
 [ 33.  45.]
 [  7.  66.]
 [  8.  28.]
 [ 13.  23.]
 [ 24.  96.]
 [ 21.  45.]
 [ 40.  69.]
 [ 18.   2.]
 [ 21.  82.]
 [ 25.  33.]
 [ 10.  77.]
 [  2.  94.]
 [ 30.  80.]
 [ 24.  16.]
 [ 16.  79.]
 [  1.  58.]
 [ 13.  70.]
 [ 37.  50.]
 [ 33.  24.]
 [  6.  69.]
 [ 27.  83.]
 [ 49. 100.]
 [ 43.  40.]
 [ 34.   8.]
 [ 42.  11.]
 [ 47.  98.]
 [ 42.   3.]
 [  4.  77.]
 [ 19.  12.]
 [ 47.  82.]
 [ 44.  72.]
 [ 28.  86.]
 [ 20.  52.]
 [ 29.  80.]
 [ 44.  51.]
 [ 24.  81.]
 [ 32.  48.]
 [ 18.  25.]
 [  9.  94.]
 [ 10.   8.]
 [ 18.  44.]
 [  6. 103.]
 [ 45.  57.]
 [ 38.  45.]
 [ 20.  92.]
 [ 39.  85.]
 [  1. 102.]
 [ 32.  47.]
 [ 11.  79.]
 [ 24.   6.]
 [ 25.  58.]
 [ 46.  15.]
 [ 26.  18.]
 [ 18.  42.]
 [ 43.  44.]
 [ 49.  12.]
 [ 34.  72.]
 [  3.  66.]
 [ 47.  35.]
 [ 33.  64.]
 [ 33.  71.]
 [ 26.  31.]
 [ 19.  79.]
 [ 47.  37.]
 [ 26.  77.]
 [ 45.  39.]
 [ 21.  85.]
 [ 43.  19.]
 [ 27.  85.]
 [  9.   3.]
 [ 28.  65.]
 [  6.  98.]
 [ 15.  13.]] [1.56643319 1.21410108 1.09151483 0.90275711 0.79714686 0.52685285
 0.43907213 1.34296417 0.58310544 1.0576514  1.07803547 1.51823688
 0.7588805  0.47446406 0.59424967 1.76380181 1.15420723 0.74646312
 2.20343709 0.54484051 2.77527881 0.60002983 3.18355989 0.98727626
 3.52224779 3.12436509 0.6673519  2.81215143 2.83764625 0.67794216
 0.52972019 0.77878302 0.71263808 1.74358666 0.69007552 1.03169382
 2.25946832 2.25268888 0.99728626 0.58717901 0.64075476 0.76832354
 0.57568276 0.41387519 0.53231156 3.22567463 2.55189586 0.47800356
 0.74712467 0.54579258 0.46314046 1.08248043 1.03468215 2.43645096
 2.44516087 0.70035249 2.82893586 0.64742476 1.26739347 0.52771032
 2.98279953 1.07962406 0.58405489 1.1636833  1.43189633 0.54430562
 1.11341012 1.02795792 1.03229296 1.59741688 0.6398595  0.54087478
 3.24965715 1.08555257 0.48336729 0.81693918 1.2302115  0.46306965
 0.62371558 0.7686618  0.99902183 2.05966878 0.63426334 0.50777227
 1.52562141 2.19511008 0.53171521 0.96624792 0.51193655 2.03135514
 1.39742327 1.9691031  1.56291008 0.82715923 0.52788919 1.22228336
 2.74120998 0.45628875 3.17208481 0.69380343 1.06033504 0.96655536
 3.67635775 2.23853111 1.27121139 0.95783669 2.92789578 2.96736908
 0.51949364 3.11906219 2.93065476 1.28574657 0.4918038  0.46541134
 1.14876747 2.688236   1.16826367 1.54364598 1.15791404 0.76498008
 0.63366944 1.07609153 2.22540545 3.40494823 1.0674603  0.53669131
 0.99892932 1.26721025 0.63613349 0.52522159 0.55758905 1.53731751
 0.75868523 2.13841367 0.70990354 0.82653904 0.95178461 0.65724534
 0.48461005 1.51164782 1.38600624 0.57868862 0.4762769  0.7112481
 1.16606891 2.40978193 0.6216011  1.4412961  3.99998665 1.6792599
 2.91013741 3.19975281 3.89967418 3.3362639  0.51427364 0.70524913
 3.09059453 2.58090734 1.47865105 0.69385165 1.44206369 1.35118115
 1.15926647 1.30860794 0.66257006 0.44686899 0.52173024 0.72465706
 0.50848812 1.82299113 1.019804   0.80122846 2.81979275 0.47169304
 1.29154313 0.75601888 1.34116733 1.04577863 2.51094389 1.67373157
 0.73093742 1.35940313 2.46337986 1.62745821 0.53514397 1.27196443
 1.23221815 1.38664985 0.99060202 0.74668097 1.37584698 1.22677243
 1.76204526 0.81098527 2.70537114 1.45291638 0.54047167 1.06675732
 0.48693883 0.55340379]


cv = tools.get('cross-validation')

# calculate the two variograms
v1 = vario.run(result_path='./test', coordinates=sam_coords, values=sam_values, maxlag='median', n_lags=25, model='exponential')
v2 = vario.run(result_path='./test', coordinates=sam_coords, values=sam_values, maxlag='median', n_lags=25, model='gaussian')

v1_params = v1.get('variogram.json')
v2_params = v2.get('variogram.json')

pprint(v1_params)
pprint(v2_params)

{'bin_func': 'even',
 'dist_func': 'euclidean',
 'estimator': 'matheron',
 'fit_method': 'trf',
 'fit_sigma': None,
 'maxlag': 36.61966684720111,
 'model': 'exponential',
 'n_lags': 25,
 'normalize': False,
 'use_nugget': False,
 'verbose': False}
{'bin_func': 'even',
 'dist_func': 'euclidean',
 'estimator': 'matheron',
 'fit_method': 'trf',
 'fit_sigma': None,
 'maxlag': 36.61966684720111,
 'model': 'gaussian',
 'n_lags': 25,
 'normalize': False,
 'use_nugget': False,
 'verbose': False}


cv1 = cv.run(result_path='./test', coordinates=sam_coords, values=sam_values, variogram=v1_params, measure='rmse')
cv2 = cv.run(result_path='./test', coordinates=sam_coords, values=sam_values, variogram=v2_params, measure='rmse')

print(cv1.log)
print(cv2.log)

Estimating variogram...
exponential Variogram
---------------------
Estimator:         matheron
Effective Range:   36.62
Sill:              0.61
Nugget:            0.00
        
Took 2.56 seconds.
RMSE: 1.0448182745681351

Estimating variogram...
gaussian Variogram
------------------
Estimator:         matheron
Effective Range:   36.62
Sill:              0.71
Nugget:            0.00
        
Took 2.82 seconds.
RMSE: 17.731107405500516

Use the geostatistical toolbox container¶

General info about tools¶

Variogram¶

Simulation¶

Plot the simulation result¶

Kriging¶

Sampling¶

Cross-validation¶