# Variable used in the notebook to determine which site
# is used to run the simulations.
import os
SALVUS_FLOW_SITE_NAME = os.environ.get("SITE_NAME", "local")
PROJECT_DIR = "project"
import time
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import obspy
import xarray as xr
from salvus import namespace as sn
# Define a spherical chunk domain.
lat_c, lon_c = -27.5, 135
lat_e, lon_e = 25.5, 42.0
# Uncomment the following line to start from scratch
# !rm -rf project
d = sn.domain.dim3.SphericalChunkDomain(
lat_center=lat_c,
lat_extent=lat_e,
lon_center=lon_c,
lon_extent=lon_e,
radius_in_meter=6371e3,
)
p = sn.Project.from_domain(path=PROJECT_DIR, domain=d, load_if_exists=True)
# Add an event by parsing some prepared data.
e = sn.Event(
sources=sn.simple_config.source.seismology.parse(
"data/event.txt", dimensions=3
),
receivers=sn.simple_config.receiver.seismology.parse(
obspy.read_inventory("data/stations.txt"),
dimensions=3,
fields=["displacement"],
),
)
p += e
lat = np.linspace(lat_c - lat_e / 2.0, lat_c + lat_e / 2.0, 50)
lon = np.linspace(lon_c - lon_e / 2.0, lon_c + lon_e / 2.0, 50)
depth = np.linspace(0, 1000.0, 40)
_, lon_grid, lat_grid = np.meshgrid(depth, lon, lat, indexing="ij")
# Distance from center in degree.
d = ((lon_grid - lon_c) ** 2 + (lat_grid - lat_c) ** 2) ** 0.5
# Apply Gaussian
sigma = 5.0
d = 1.0 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-0.5 * (d / sigma) ** 2)
# Normalize to 10 % deviation
d /= d.max()
d *= 10.0
ds = xr.Dataset(
data_vars={
"vs": (["depth", "longitude", "latitude"], d),
"vp": (["depth", "longitude", "latitude"], d),
},
coords={"depth": depth, "latitude": lat, "longitude": lon},
)
ds.vs.attrs["units"] = "%"
ds.vp.attrs["units"] = "%"
# Same for the coordinate axes.
ds.depth.attrs["units"] = "km"
ds.latitude.attrs["units"] = "degrees_north"
ds.longitude.attrs["units"] = "degrees_east"
ds.attrs["geospatial_lat_units"] = "degrees_north"
ds.attrs["geospatial_lon_units"] = "degrees_east"
ds.attrs["geospatial_vertical_units"] = "km"
ds.vs.isel(depth=0).T.plot()
plt.show()
ec = sn.EventConfiguration(
wavelet=sn.simple_config.stf.GaussianRate(half_duration_in_seconds=100.0),
waveform_simulation_configuration=sn.WaveformSimulationConfiguration(
end_time_in_seconds=1800.0
),
)
mc = sn.ModelConfiguration(background_model="prem_iso_no_crust")
p += sn.SimulationConfiguration(
tensor_order=1,
name="initial_model",
elements_per_wavelength=1.0,
min_period_in_seconds=200,
max_depth_in_meters=500e3,
model_configuration=sn.ModelConfiguration(
background_model="prem_iso_no_crust",
),
event_configuration=ec,
)
p += sn.model.volume.seismology.GenericModel(name="vs_blob", data=ds)
p += sn.SimulationConfiguration(
tensor_order=1,
name="target_model",
elements_per_wavelength=1.0,
min_period_in_seconds=200,
max_depth_in_meters=500e3,
model_configuration=sn.ModelConfiguration(
background_model="prem_iso_no_crust", volume_models=["vs_blob"]
),
event_configuration=ec,
)
p.viz.nb.simulation_setup("target_model", events=p.events.list())
[2024-07-05 10:17:55,913] INFO: Creating mesh. Hang on.
Interpolating model: vs_blob.
<salvus.flow.simple_config.simulation.waveform.Waveform object at 0x7fadbd28de10>
for simulation in ["initial_model", "target_model"]:
p.simulations.launch(
ranks_per_job=2,
site_name=SALVUS_FLOW_SITE_NAME,
events=p.events.list()[0],
wall_time_in_seconds_per_job=3600,
simulation_configuration=simulation,
)
[2024-07-05 10:17:56,813] INFO: Creating mesh. Hang on. [2024-07-05 10:17:57,218] INFO: Submitting job ... Uploading 1 files... 🚀 Submitted job_2407051017324675_e174795cdb@local [2024-07-05 10:17:57,418] INFO: Submitting job ... Uploading 1 files... 🚀 Submitted job_2407051017422081_045845550d@local
p.simulations.query(block=True, ping_interval_in_seconds=1.0)
True
def process(st, inv, receiver, sources):
# This particular function adds some random noise to
# the data. To make it reproducible let's create some
# deterministic seed - the seed is based on the receiver
# number and the point-to-point amplitude of the first
# trace.
np.random.seed(
int.from_bytes((receiver.name + str(st[0].data.ptp())).encode(), "big")
% 2**32
)
# Add 5% of normal distributed noise.
for tr in st:
tr.data += np.random.normal(
scale=0.05 * tr.data.ptp(), size=tr.data.shape
)
# Filter it again to not have really high-frequencies.
st.filter("lowpass", freq=1.0 / 80.0, zerophase=True)
return st
p += sn.processing.seismology.SeismologyProcessingConfiguration(
name="target_model_with_noise",
data_source_name="SYNTHETIC_DATA:target_model",
processing_function=process,
)
# ### Pick Windows
#
# Real world applications usually need some form of data selection. In seismology this oftentimes takes the form of picking windows. Salvus offers some utilities to automatically select these.
if not p.entities.has(
"data_selection_configuration", "initial_model_100_to_300s"
):
p.actions.seismology.pick_windows(
data_selection_configuration="initial_model_100_to_300s",
observed_data_name="PROCESSED_DATA:target_model_with_noise",
synthetic_data_name="initial_model",
events=p.events.list(),
receiver_field="displacement",
window_taper_width_in_seconds=50.0,
window_picking_function="built-in",
window_picking_function_kwargs={
"minimum_period_in_seconds": 100.0,
"maximum_period_in_seconds": 300.0,
},
)
p.viz.nb.waveforms(
data=[
"SYNTHETIC_DATA:initial_model",
"PROCESSED_DATA:target_model_with_noise",
],
receiver_field="displacement",
data_selection_configuration="initial_model_100_to_300s",
)
p.viz.seismology.windows("initial_model_100_to_300s")
def l2_misfit_and_adjoint_source(
data_synthetic: np.ndarray,
data_observed: np.ndarray,
sampling_rate_in_hertz: float,
):
# Necessary to not run into crazy small numbers.
f = 1e10
assert data_synthetic.shape == data_observed.shape
# the adjoint source is the negative derivative w.r.t.
# synthetic data, i.e., we flip the sign here
diff = data_observed - data_synthetic
misfit = 0.5 * (diff**2).sum() / sampling_rate_in_hertz * f
adj_src = diff / sampling_rate_in_hertz * f
return misfit, adj_src
# Add a misfit configuration to the project.
# Note that this is independet of the synthetic data of the forward run
# but it only defines how to compare two sets of data.
p += sn.MisfitConfiguration(
name="L2-misfit-to-target-model",
observed_data="PROCESSED_DATA:target_model_with_noise",
misfit_function=l2_misfit_and_adjoint_source,
receiver_field="displacement",
data_selection_configuration="initial_model_100_to_300s",
)
# Misfits can be computed before running an adjoint simulation.
misfits = None
while not misfits:
misfits = p.actions.inversion.compute_misfits(
simulation_configuration="initial_model",
misfit_configuration="L2-misfit-to-target-model",
events=p.events.list()[0],
ranks_per_job=4,
site_name=SALVUS_FLOW_SITE_NAME,
)
time.sleep(5.0)
print(misfits)
{'event_SOUTHEAST_OF_LOYALTY_ISLANDS_Mag_5.59_2010-08-03-22-30': 0.10154278998881669}
# Actually compute the gradients.
while not p.actions.inversion.compute_gradients(
simulation_configuration="initial_model",
misfit_configuration="L2-misfit-to-target-model",
wavefield_compression=sn.WavefieldCompression(
forward_wavefield_sampling_interval=5
),
events=p.events.list()[0],
ranks_per_job=4,
site_name=SALVUS_FLOW_SITE_NAME,
):
time.sleep(5.0)
p.viz.nb.gradients(
simulation_configuration="initial_model",
misfit_configuration="L2-misfit-to-target-model",
wavefield_compression=sn.WavefieldCompression(
forward_wavefield_sampling_interval=5
),
events=p.events.list(),
)
[2024-07-05 10:18:08,300] INFO: The following events have been simulated before, but checkpoints are not available for this combination of `site_name` and `ranks_per_job` and wavefield compression settings. They will be run again: ['event_SOUTHEAST_OF_LOYALTY_ISLANDS_Mag_5.59_2010-08-03-22-30'] [2024-07-05 10:18:08,319] INFO: Submitting job ... [2024-07-05 10:18:08,367] INFO: Launched simulations for 1 events. Please check again to see if they are finished. [2024-07-05 10:18:13,509] INFO: Submitting job ... Uploading 1 files... 🚀 Submitted job_2407051018511124_bf6e717f2d@local [2024-07-05 10:18:13,552] INFO: Launched adjoint simulations for 1 events. Please check again to see if they are finished. [2024-07-05 10:18:18,604] INFO: 1 events have already been submitted. They will not be submitted again.
# Look at the misfits from the different receivers.
p.viz.nb.misfit_comparison(
reference_data="initial_model",
misfit_configuration="L2-misfit-to-target-model",
event=p.events.list()[0],
)
initial_model (reference) | |
---|---|
IU.CTAO. | 5.7134e-03 |
IU.HNR. | 1.6906e-02 |
IU.MBWA. | 2.5158e-02 |
IU.NWAO. | 3.5260e-02 |
IU.PMG. | 6.9113e-03 |
IU.SNZO. | 1.1595e-02 |
<pandas.io.formats.style.Styler at 0x7fadbca22150>
# Can also be shown on a map.
p.viz.seismology.misfit_map(
reference_data="initial_model",
misfit_configuration="L2-misfit-to-target-model",
)
# Or have a detailed look at how the individual misfits are computed.
p.viz.nb.misfits(
simulation_configuration="initial_model",
misfit_configuration="L2-misfit-to-target-model",
)
p.viz.misfit_histogram(
simulation_configuration_a="initial_model",
misfit_configuration="L2-misfit-to-target-model",
events=p.events.list(),
)