# 5.3 - Probability Density Transformation.¶

# Importing GemPy
import gempy as gp
from gempy.bayesian import plot_posterior as pp
from gempy.bayesian.plot_posterior import default_red, default_blue

# Importing auxiliary libraries
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
import arviz as az
import os


## Model definition¶

Same problem as before, let’s assume the observations are layer thickness measurements taken on an outcrop. Now, in the previous example we chose a prior for the mean arbitrarily: $$𝜇∼Normal(mu=10.0, sigma=5.0)$$–something that made sense for these specific data set. If we do not have any further information, keeping an uninformative prior and let the data to dictate the final value of the inference is the sensible way forward. However, this also enable to add information to the system by setting informative priors.

Imagine we get a borehole with the tops of the two interfaces of interest. Each of this data point will be a random variable itself since the accuracy of the exact 3D location will be always limited. Notice that this two data points refer to depth not to thickness–the unit of the rest of the observations. Therefore, the first step would be to perform a transformation of the parameters into the observations space. Naturally in this example a simple subtraction will suffice.

Now we can define the probabilistic models:

This is to make it work in sphinx gallery

cwd = os.getcwd()
if not 'examples' in cwd:
path_dir = os.getcwd() + '/examples/tutorials/ch5_probabilistic_modeling'
else:
path_dir = cwd


Define plotting function

def plot_geo_setting_well(geo_model):
device_loc = np.array([[6e3, 0, 3700]])
p2d = gp.plot_2d(geo_model, show_topography=True, legend=False)

well_1 = 3.41e3
well_2 = 3.6e3
p2d.axes[0].scatter([3e3], [well_1], marker='^', s=400, c='#71a4b3', zorder=10)
p2d.axes[0].scatter([9e3], [well_2], marker='^', s=400, c='#71a4b3', zorder=10)
p2d.axes[0].scatter(device_loc[:, 0], device_loc[:, 2], marker='x', s=400,
c='#DA8886', zorder=10)

p2d.axes[0].vlines(3e3, .5e3, well_1, linewidth=4, color='gray')
p2d.axes[0].vlines(9e3, .5e3, well_2, linewidth=4, color='gray')
p2d.axes[0].vlines(3e3, .5e3, well_1)
p2d.axes[0].vlines(9e3, .5e3, well_2)
p2d.axes[0].set_xlim(2900, 3100)

plt.show()

geo_model = gp.load_model(r'2-layers', path=path_dir + r'/2-layers', recompile=True)
plot_geo_setting_well(geo_model=geo_model)


Out:

Active grids: ['regular']
Active grids: ['regular' 'topography']
Setting kriging parameters to their default values.
Compiling theano function...
Level of Optimization:  fast_compile
Device:  cpu
Precision:  float64
Number of faults:  0
Compilation Done!
Kriging values:
values
range            1.3e+04
$C_o$            4.2e+06
drift equations      [3]

y_obs = [2.12]
y_obs_list = [2.12, 2.06, 2.08, 2.05, 2.08, 2.09,
2.19, 2.07, 2.16, 2.11, 2.13, 1.92]

np.random.seed(4003)


# .. image:: /../../_static/computational_graph1.png

with pm.Model() as model:
mu_top = pm.Normal('$\mu_{top}$', 3.05, .2)
sigma_top = pm.Gamma('$\sigma_{top}$', 0.3, 3)
y_top = pm.Normal('y_{top}', mu=mu_top, sd=sigma_top, observed=[3.02])

mu_bottom = pm.Normal('$\mu_{bottom}$', 1.02, .2)
sigma_bottom = pm.Gamma('$\sigma_{bottom}$', 0.3, 3)
y_bottom = pm.Normal('y_{bottom}', mu=mu_bottom, sd=sigma_bottom,
observed=[1.02])

mu_t = pm.Deterministic('$\mu_{thickness}$', mu_top - mu_bottom)
sigma_thick = pm.Gamma('$\sigma_{thickness}$', 0.3, 3)
y = pm.Normal('y_{thickness}', mu=mu_t, sd=sigma_thick, observed=y_obs_list)


## Sampling¶

with model:
prior = pm.sample_prior_predictive(1000)
trace = pm.sample(1000, discard_tuned_samples=False, cores=1)
post = pm.sample_posterior_predictive(trace)


Out:

█
█
█

data = az.from_pymc3(trace=trace,
prior=prior,
posterior_predictive=post)
data

arviz.InferenceData
• <xarray.Dataset>
Dimensions:               (chain: 2, draw: 1000)
Coordinates:
* chain                 (chain) int64 0 1
* draw                  (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
Data variables:
$\mu_{top}$           (chain, draw) float64 3.02 3.02 3.02 ... 3.027 3.111
$\mu_{bottom}$        (chain, draw) float64 0.9388 0.9388 ... 0.9506 1.019
$\sigma_{top}$        (chain, draw) float64 0.006996 0.006996 ... 0.1242
$\sigma_{bottom}$     (chain, draw) float64 0.1121 0.1121 ... 0.1016 0.01844
$\mu_{thickness}$     (chain, draw) float64 2.082 2.082 ... 2.076 2.092
$\sigma_{thickness}$  (chain, draw) float64 0.08336 0.08336 ... 0.06289
Attributes:
created_at:                 2020-12-07T15:46:06.995223
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3
sampling_time:              11.50733232498169
tuning_steps:               1000

• <xarray.Dataset>
Dimensions:              (chain: 1, draw: 4000, y_{bottom}_dim_0: 1, y_{thickness}_dim_0: 12, y_{top}_dim_0: 1)
Coordinates:
* chain                (chain) int64 0
* draw                 (draw) int64 0 1 2 3 4 5 ... 3995 3996 3997 3998 3999
* y_{top}_dim_0        (y_{top}_dim_0) int64 0
* y_{bottom}_dim_0     (y_{bottom}_dim_0) int64 0
* y_{thickness}_dim_0  (y_{thickness}_dim_0) int64 0 1 2 3 4 5 6 7 8 9 10 11
Data variables:
y_{top}              (chain, draw, y_{top}_dim_0) float64 4.004 ... 2.959
y_{bottom}           (chain, draw, y_{bottom}_dim_0) float64 0.7223 ... 1...
y_{thickness}        (chain, draw, y_{thickness}_dim_0) float64 3.08 ... ...
Attributes:
created_at:                 2020-12-07T15:46:07.429328
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3

• <xarray.Dataset>
Dimensions:              (chain: 2, draw: 1000, y_{bottom}_dim_0: 1, y_{thickness}_dim_0: 12, y_{top}_dim_0: 1)
Coordinates:
* chain                (chain) int64 0 1
* draw                 (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
* y_{top}_dim_0        (y_{top}_dim_0) int64 0
* y_{bottom}_dim_0     (y_{bottom}_dim_0) int64 0
* y_{thickness}_dim_0  (y_{thickness}_dim_0) int64 0 1 2 3 4 5 6 7 8 9 10 11
Data variables:
y_{top}              (chain, draw, y_{top}_dim_0) float64 4.042 ... 0.8977
y_{bottom}           (chain, draw, y_{bottom}_dim_0) float64 1.007 ... 3.072
y_{thickness}        (chain, draw, y_{thickness}_dim_0) float64 1.459 ......
Attributes:
created_at:                 2020-12-07T15:46:07.426739
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3

• <xarray.Dataset>
Dimensions:             (chain: 2, draw: 1000)
Coordinates:
* chain               (chain) int64 0 1
* draw                (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
Data variables:
depth               (chain, draw) int64 4 3 1 3 2 2 3 2 ... 4 3 3 3 4 5 4 5
mean_tree_accept    (chain, draw) float64 0.9827 0.1244 ... 0.943 0.9833
lp                  (chain, draw) float64 15.71 15.71 15.71 ... 14.45 15.59
process_time_diff   (chain, draw) float64 0.002001 0.001627 ... 0.0047
perf_counter_diff   (chain, draw) float64 0.002001 0.001627 ... 0.004703
perf_counter_start  (chain, draw) float64 2.857e+04 2.857e+04 ... 2.857e+04
step_size_bar       (chain, draw) float64 0.2812 0.2812 ... 0.3079 0.3079
energy_error        (chain, draw) float64 -0.04127 0.0 ... -0.274 -0.1443
energy              (chain, draw) float64 -15.34 -13.86 ... -11.97 -14.32
diverging           (chain, draw) bool True False False ... False False
max_energy_error    (chain, draw) float64 1.999e+03 29.62 ... 25.86 216.0
step_size           (chain, draw) float64 0.217 0.217 ... 0.3396 0.3396
tree_size           (chain, draw) float64 9.0 7.0 1.0 5.0 ... 19.0 15.0 23.0
Attributes:
created_at:                 2020-12-07T15:46:07.000188
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3
sampling_time:              11.50733232498169
tuning_steps:               1000

• <xarray.Dataset>
Dimensions:                     (chain: 1, draw: 1000)
Coordinates:
* chain                       (chain) int64 0
* draw                        (draw) int64 0 1 2 3 4 5 ... 995 996 997 998 999
Data variables:
$\sigma_{bottom}$           (chain, draw) float64 0.001322 ... 2.649e-06
$\mu_{bottom}$              (chain, draw) float64 1.09 0.7497 ... 1.035
$\sigma_{top}$_log__        (chain, draw) float64 -1.74 -0.8758 ... -6.446
$\sigma_{bottom}$_log__     (chain, draw) float64 -6.629 -3.144 ... -12.84
$\mu_{thickness}$           (chain, draw) float64 2.238 2.343 ... 2.13 1.897
$\mu_{top}$                 (chain, draw) float64 3.328 3.092 ... 2.931
$\sigma_{thickness}$        (chain, draw) float64 4.401e-07 ... 0.0009651
$\sigma_{thickness}$_log__  (chain, draw) float64 -14.64 -2.327 ... -6.943
$\sigma_{top}$              (chain, draw) float64 0.1756 0.4165 ... 0.001586
Attributes:
created_at:                 2020-12-07T15:46:07.432004
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3

• <xarray.Dataset>
Dimensions:              (chain: 1, draw: 1000, y_{bottom}_dim_0: 1, y_{thickness}_dim_0: 12, y_{top}_dim_0: 1)
Coordinates:
* chain                (chain) int64 0
* draw                 (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
* y_{top}_dim_0        (y_{top}_dim_0) int64 0
* y_{bottom}_dim_0     (y_{bottom}_dim_0) int64 0
* y_{thickness}_dim_0  (y_{thickness}_dim_0) int64 0 1 2 3 4 5 6 7 8 9 10 11
Data variables:
y_{top}              (chain, draw, y_{top}_dim_0) float64 3.429 ... 2.93
y_{bottom}           (chain, draw, y_{bottom}_dim_0) float64 1.088 ... 1.035
y_{thickness}        (chain, draw, y_{thickness}_dim_0) float64 2.238 ......
Attributes:
created_at:                 2020-12-07T15:46:07.436438
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3

• <xarray.Dataset>
Dimensions:              (y_{bottom}_dim_0: 1, y_{thickness}_dim_0: 12, y_{top}_dim_0: 1)
Coordinates:
* y_{top}_dim_0        (y_{top}_dim_0) int64 0
* y_{bottom}_dim_0     (y_{bottom}_dim_0) int64 0
* y_{thickness}_dim_0  (y_{thickness}_dim_0) int64 0 1 2 3 4 5 6 7 8 9 10 11
Data variables:
y_{top}              (y_{top}_dim_0) float64 3.02
y_{bottom}           (y_{bottom}_dim_0) float64 1.02
y_{thickness}        (y_{thickness}_dim_0) float64 2.12 2.06 ... 2.13 1.92
Attributes:
created_at:                 2020-12-07T15:46:07.437687
arviz_version:              0.10.0
inference_library:          pymc3
inference_library_version:  3.9.3

az.plot_trace(data)
plt.show()


sphinx_gallery_thumbnail_number = 3

az.plot_density([data, data.prior], shade=.9, data_labels=["Posterior", "Prior"],
var_names=[
'$\\mu_{top}$',
'$\\mu_{bottom}$',
'$\\mu_{thickness}$',
'$\\sigma_{top}$',
'$\\sigma_{bottom}$',
'$\\sigma_{thickness}$'
],
colors=[default_red, default_blue], bw=5);
plt.show()

p = pp.PlotPosterior(data)

p.create_figure(figsize=(9, 5), joyplot=False, marginal=True, likelihood=False)
p.plot_marginal(var_names=['$\\mu_{top}$', '$\\mu_{bottom}$'],
plot_trace=False, credible_interval=.70, kind='kde',
marginal_kwargs={"bw": 1}
)
plt.show()

p = pp.PlotPosterior(data)
p.create_figure(figsize=(9, 6), joyplot=True)

iteration = 500
p.plot_posterior(['$\\mu_{top}$', '$\\mu_{bottom}$'],
['$\mu_{thickness}$', '$\sigma_{thickness}$'],
'y_{thickness}', iteration,
marginal_kwargs={"credible_interval": 0.94,
'marginal_kwargs': {"bw": 1},
'joint_kwargs': {"bw": 1}})
plt.show()

az.plot_pair(data, divergences=False, var_names=['$\\mu_{top}$', '$\\mu_{bottom}$'])
plt.show()


Total running time of the script: ( 0 minutes 33.042 seconds)

Gallery generated by Sphinx-Gallery