"""
inversion
=========
Module containing the main inversion class, MTfit.inversion.Inversion class.
"""
# **Restricted: For Non-Commercial Use Only**
# This code is protected intellectual property and is available solely for teaching
# and non-commercially funded academic research purposes.
#
# Applications for commercial use should be made to Schlumberger or the University of Cambridge.
import sys
import os
import glob
import operator
import time
import gc
import warnings
import multiprocessing
import traceback
import logging
import copy
try:
import cPickle as pickle
except ImportError:
import pickle
from math import ceil
import numpy as np
from .utilities.multiprocessing_helper import JobPool
from .utilities.file_io import parse_hyp
from .utilities.file_io import parse_csv
from .utilities.file_io import full_pdf_output_dicts
from .utilities.file_io import hyp_output_dicts
from .utilities.file_io import MATLAB_output
from .utilities.file_io import pickle_output
from .utilities.file_io import hyp_output
from .utilities.file_io import read_binary_output
from .utilities.file_io import read_sf_output
from .sampling import ln_bayesian_evidence
from .algorithms import BaseAlgorithm
from .algorithms import IterationSample
from .algorithms import TimeSample
from .algorithms import MarkovChainMonteCarloAlgorithmCreator
from .probability import polarity_ln_pdf
from .probability import LnPDF
from .probability import polarity_probability_ln_pdf
from .probability import amplitude_ratio_ln_pdf
from .probability import relative_amplitude_ratio_ln_pdf
from .probability import ln_marginalise
from .probability import dkl_estimate
from .utilities.extensions import get_extensions
from .extensions.scatangle import parse_scatangle
logger = logging.getLogger('MTfit.inversion')
try:
from .probability import cprobability
except ImportError:
cprobability = False
except Exception:
logger.exception('Error importing c extension')
cprobability = False
_MEMTEST = False
_DEBUG = False
_VERBOSITY = 0
_CYTHON_TESTS = False
_COMBINED_TESTS = False
def memory_profile_test(memtest=_MEMTEST):
"""
Decorator for running memory profiler (memory_profiler module) to test memory requirements and bottlenecks.
"""
def decorator(function):
"""Memory Profile Test Actual Decorator"""
if memtest:
try:
from memory_profiler import profile
return profile(function)
except Exception:
return function
return function
return decorator
#
# Task Objects
#
class ForwardTask(object):
"""
Forward modelling task
Task which carries out forward modelling and returns results dictionary containing PDF, MTs and Number of forward modelled Samples
Callable object.
"""
def __init__(self, mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_prob, polarity_prob, location_sample_multipliers=False, incorrect_polarity_prob=0, return_zero=False,
reuse=False, marginalise=True, generate_samples=100000, cutoff=100000000, dc=False, extension_data={}):
"""
ForwardTask initialisation
Args
mt: numpy matrix object containing moment tensor 6 vectors.
a_polarity: Polarity observations station-ray 6 vector.
error_polarity: Polarity observations error.
a1_amplitude_ratio: Amplitude ratio observations numerator station-ray 6 vector.
a2_amplitude_ratio: Amplitude ratio observations denominator station-ray 6 vector.
amplitude_ratio: Observed amplitude ratio.
percentage_error1_amplitude_ratio: Amplitude ratio observations percentage error for the numerator.
percentage_error2_amplitude_ratio: Amplitude ratio observations percentage error for the denominator.
a_polarity_prob: Polarity PDF observations station-ray 6 vector.
polarity_prob: Polarity PDF probability
return_zero:[False] Boolean flag to return zero probability samples.
reuse:[False] Boolean flag as to whether task is reused (mt changed and re-run...)
marginalise:[True] Boolean flag as to whether pdf is marginalised over location/model uncertainty or not.
generate_samples:[100000] Number of samples to generate when generating samples.
cutoff:[100000000] Max number of samples to try when generating samples.
dc:[False] DC or MT when generating samples.
extension_data:[{}] A dictionary of processed data for use by an MTfit.data_types extension.
"""
self.mt = mt
self.a_polarity = a_polarity
self.error_polarity = error_polarity
self.a1_amplitude_ratio = a1_amplitude_ratio
self.a2_amplitude_ratio = a2_amplitude_ratio
self.amplitude_ratio = amplitude_ratio
self.percentage_error1_amplitude_ratio = percentage_error1_amplitude_ratio
self.percentage_error2_amplitude_ratio = percentage_error2_amplitude_ratio
self.a_polarity_prob = a_polarity_prob
self.polarity_prob = polarity_prob
self.incorrect_polarity_prob = incorrect_polarity_prob
self.location_sample_multipliers = location_sample_multipliers
self._return_zero = return_zero
self._reuse = reuse
self.marginalise = marginalise
self.dc = dc
self.cutoff = cutoff
self.generate_samples = 0
self.extension_data = extension_data
if isinstance(self.mt, bool) and cprobability is not False:
self.generate_samples = generate_samples
@memory_profile_test(_MEMTEST)
def __call__(self):
"""
Runs the ForwardTask and returns the result as a dictionary
Returns
Dictionary of results as {'moment_tensors':MTs,'ln_pdf':prob,'n':numberOfInitialSamples}
"""
global _VERBOSITY
global _DEBUG
try:
if _VERBOSITY >= 3:
try:
import memory_profiler
logger.info('Memory Usage - Initial: {}'.format(memory_profiler.memory_usage()))
except ImportError:
memory_profiler = False
if self.generate_samples or len(self.mt):
_return = False # set _return flag to False before starting
if cprobability and not _CYTHON_TESTS and not _COMBINED_TESTS:
try:
# Get number of samples if not generating them
if not self.generate_samples:
N = self.mt.shape[1]
# Check if there are location PDF samples.
location_samples = False
n_location_samples = 1
if isinstance(self.a_polarity, np.ndarray) and len(self.a_polarity.shape) > 2 and self.a_polarity.shape[1] > 1:
location_samples = True
n_location_samples = self.a_polarity.shape[1]
elif isinstance(self.a1_amplitude_ratio, np.ndarray) and len(self.a1_amplitude_ratio.shape) > 2 and self.a1_amplitude_ratio.shape[1] > 1:
location_samples = True
n_location_samples = self.a1_amplitude_ratio.shape[1]
elif isinstance(self.a_polarity_prob, np.ndarray) and len(self.a_polarity_prob.shape) > 2 and self.a_polarity_prob.shape[1] > 1:
location_samples = True
n_location_samples = self.a_polarity_prob.shape[1]
# Check location sample probabilities (easier on memory and computation to have 2 samples in the same place have double the probability)
if self.location_sample_multipliers:
ln_location_sample_multipliers = np.log(np.array(self.location_sample_multipliers).astype(np.float64, copy=False))
else:
ln_location_sample_multipliers = np.zeros(n_location_samples)
# Try to use cprobabilities combined PDF code:
if self.generate_samples:
ln_p_total, mt, N = cprobability.combined_ln_pdf(self.mt, self.a_polarity, self.error_polarity, self.a1_amplitude_ratio,
self.a2_amplitude_ratio, self.amplitude_ratio, self.percentage_error1_amplitude_ratio,
self.percentage_error2_amplitude_ratio, self.a_polarity_prob, self.polarity_prob,
self.incorrect_polarity_prob, generate_samples=self.generate_samples, dc=self.dc,
cutoff=self.cutoff, marginalised=int(self.marginalise), location_samples_multipliers=ln_location_sample_multipliers)
self.mt = np.asarray(mt)
else:
ln_p_total = cprobability.combined_ln_pdf(self.mt, self.a_polarity, self.error_polarity, self.a1_amplitude_ratio, self.a2_amplitude_ratio, self.amplitude_ratio,
self.percentage_error1_amplitude_ratio, self.percentage_error2_amplitude_ratio, self.a_polarity_prob, self.polarity_prob,
self.incorrect_polarity_prob, generate_samples=self.generate_samples, dc=self.dc, cutoff=self.cutoff,
marginalised=int(self.marginalise), location_samples_multipliers=ln_location_sample_multipliers)
# Handle extensions
if len(self.extension_data):
extension_names, extensions = get_extensions('MTfit.data_types')
for key in self.extension_data.keys():
try:
if key in extension_names and 'relative' not in key:
ln_p_ext = extensions[key](self.mt, **self.extension_data[key])
ln_p_total = cprobability.ln_combine(ln_p_total, ln_p_ext)
else:
raise KeyError('Extension {} function not found.'.format(key))
except Exception:
logger.exception('Exception for extension: {}'.format(key))
# Set nans to inf
ln_p_total[np.isnan(ln_p_total)] = -np.inf
# Check if any probabilities are non zero otherwise return
if not np.prod(ln_p_total.shape) or not ln_p_total.max() > -np.inf:
if not self._return_zero:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
return {'moment_tensors': self.mt, 'ln_pdf': LnPDF(-np.inf*np.ones(self.mt.shape[1])), 'n': N}
# Otherwise probability evaluation is complete so set return flag to True
_return = True
except Exception:
# Exception in cprobabilities combined PDF - print error message
logger.exception('Cython Combined PDFs failed')
# Check if the combined PDF has been evaluated, otherwise evaluate individual PDFs
if not _return:
# Check if generating samples (MT is bool rather than np.array)
if isinstance(self.mt, bool):
if self.dc:
self.mt = cprobability.random_dc(self.generate_samples)
else:
self.mt = cprobability.random_mt(self.generate_samples)
_return = False
location_samples = False
N = self.mt.shape[1]
# Initialise ln_p_total and check location_samples
if len(self.mt.shape) > 2:
ln_p_total = np.zeros((1, self.mt.shape[1], self.mt.shape[2]))
elif isinstance(self.a_polarity, np.ndarray) and len(self.a_polarity.shape) > 2 and self.a_polarity.shape[1] > 1:
location_samples = True
ln_p_total = np.zeros((self.a_polarity.shape[1], self.mt.shape[1]))
elif isinstance(self.a1_amplitude_ratio, np.ndarray) and len(self.a1_amplitude_ratio.shape) > 2 and self.a1_amplitude_ratio.shape[1] > 1:
location_samples = True
ln_p_total = np.zeros((self.a1_amplitude_ratio.shape[1], self.mt.shape[1]))
elif isinstance(self.a_polarity_prob, np.ndarray) and len(self.a_polarity_prob.shape) > 2 and self.a_polarity_prob.shape[1] > 1:
location_samples = True
ln_p_total = np.zeros((self.a_polarity_prob.shape[1], self.mt.shape[1]))
else:
ln_p_total = np.zeros((1, self.mt.shape[1]))
# Set manual polarities flag
manual_polarities = False
try:
# Ignore np.divide errors
with warnings.catch_warnings() and np.errstate(divide='ignore'):
warnings.simplefilter("ignore")
# polarity ln_pdf (will try to use cython again, and only fall back to python if necessary)
ln_p_total = polarity_ln_pdf(self.a_polarity, self.mt, self.error_polarity, self.incorrect_polarity_prob)
# Set manual_polarities (don't use auto-polarities) and return flags (have evaluated a PDF)
manual_polarities = True
_return = True
# Print number of non zero samples (Verbosity>=3)
logger.debug('Polarity non-zero samples = {}'.format(sum(ln_p_total > -np.inf)))
except Exception:
# Exception - check if data exists and print output, otherwise ignore
if not isinstance(self.error_polarity, np.ndarray) and self.error_polarity:
logger.exception('Polarity Exception')
# Force garbage collect to tidy memory
gc.collect()
# If manual polarity data didn't work or doesn't exist, try automated polarities
if not manual_polarities:
try:
# Ignore np.divide errors
with warnings.catch_warnings() and np.errstate(divide='ignore'):
warnings.simplefilter("ignore")
# Polarity probability ln_pdf (will try to use cython again, and only fall back to python if necessary)
ln_p_total = polarity_probability_ln_pdf(np.tensordot(self.a_polarity_prob, self.mt, 1), self.polarity_prob[0], self.polarity_prob[1], self.incorrect_polarity_prob)
_return = True
# Print number of non zero samples (Verbosity>=3)
logger.debug('Polarity probability non-zero samples = {}'.format(sum(ln_p_total > -np.inf)))
except Exception:
# Exception - check if data exists and print output, otherwise ignore
if not isinstance(self.a_polarity_prob, np.ndarray) and self.a_polarity_prob:
logging.exception('Polarity PDF Exception')
# Force garbage collect to tidy memory
gc.collect()
# Check if any probabilities are non zero otherwise return
if not ln_p_total.max() > -np.inf:
if not self._return_zero:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
if location_samples and self.marginalise:
# Return marginalised zero PDF
return {'moment_tensors': self.mt, 'ln_pdf': LnPDF(-np.inf*np.ones(self.mt.shape[1])), 'n': N}
return {'moment_tensors': self.mt, 'ln_pdf': ln_p_total, 'n': N}
try:
# Ignore np.divide errors
with warnings.catch_warnings() and np.errstate(divide='ignore'):
warnings.simplefilter("ignore")
# amplitude ratio ln_pdf (will try to use cython again, and only fall back to python if necessary)
ln_p_amp_rat = amplitude_ratio_ln_pdf(self.amplitude_ratio, self.mt, self.a1_amplitude_ratio, self.a2_amplitude_ratio, self.percentage_error1_amplitude_ratio,
self.percentage_error2_amplitude_ratio)
if _return:
# if Cython and polarity data exists, combine using Cython
if cprobability and not _CYTHON_TESTS:
ln_p_total = cprobability.ln_combine(ln_p_total, ln_p_amp_rat)
else:
ln_p_total = ln_p_total+ln_p_amp_rat
else:
ln_p_total = ln_p_amp_rat
# free ln_p_amp_rat memory
del ln_p_amp_rat
_return = True
# Print number of non zero samples (Verbosity>=3)
if _VERBOSITY >= 3:
logger.info('Amplitude Ratio non-zero samples:{}. Total Non zero: {}'.format(sum(ln_p_amp_rat > -np.inf), sum(ln_p_total > -np.inf)))
except Exception:
if (not isinstance(self.percentage_error1_amplitude_ratio, np.ndarray) and self.percentage_error1_amplitude_ratio) or (not isinstance(self.percentage_error2_amplitude_ratio, np.ndarray) and self.percentage_error2_amplitude_ratio):
logger.exception('Amplitude Ratio Exception')
# Force garbage collection
# Handle extensions
if len(self.extension_data):
extension_names, extensions = get_extensions('MTfit.data_types')
for key in self.extension_data.keys():
try:
if key in extension_names and 'relative' not in key:
ln_p_ext = extensions[key](self.mt, **self.extension_data[key])
if cprobability and not _CYTHON_TESTS:
ln_p_total = cprobability.ln_combine(ln_p_total, ln_p_ext)
else:
ln_p_total = ln_p_total+ln_p_ext
else:
raise KeyError('Extension {} function not found.'.format(key))
except Exception:
logging.exception('Exception for extension: {}'.format(key))
# Clear the memory
gc.collect()
if not _return:
# No data has been used, so print exceptions
logging.error('No data used in polarity, amplitude ratio or polarity PDF')
# If location PDF samples exist and sample PDF multipliers exist (increased Prob) - For Oct -tree sampling sample density correspond to PDF values (more samples in higher prob). Grid sampling needs location multipliers set (can be done using Scat2Angle with 'grid' option in command line)
if location_samples and self.location_sample_multipliers and sum(self.location_sample_multipliers) != len(self.location_sample_multipliers):
if cprobability:
location_sample_multipliers = np.array(self.location_sample_multipliers)
location_sample_multipliers = location_sample_multipliers.astype(np.float64, copy=False)
ln_p_total = cprobability.ln_multipliers(ln_p_total, location_sample_multipliers)
else:
ln_p_total += np.log(np.matrix(self.location_sample_multipliers).T) # Conversion to matrix and transpose to correct for list order to ln_p_total dimensions (0 is sample dimension)
# If there are location samples and the marginalise flag is set, then marginalise, trying to use Cython
if location_samples and self.marginalise:
ln_p_total = ln_marginalise(ln_p_total)
# Delete arrays to free memory if not reusing
if not self._reuse:
del self.a_polarity
del self.a_polarity_prob
del self.a1_amplitude_ratio
del self.a2_amplitude_ratio
del self.amplitude_ratio
del self.polarity_prob
del self.percentage_error1_amplitude_ratio
del self.percentage_error2_amplitude_ratio
del self.error_polarity
del self.incorrect_polarity_prob
del self.location_sample_multipliers
gc.collect()
# Print end memory usage (Verbosity >=3)
if _VERBOSITY >= 3 and memory_profiler:
print('end usage {}'.format(memory_profiler.memory_usage()))
# Check results
if _return:
# Flag for nan errors if _DEBUG is set
if np.isnan(ln_p_total).any() and _DEBUG:
raise ValueError('NaN result')
ln_p_total = LnPDF(ln_p_total)
# remove zeros if _return_zero flag not set.
if not self._return_zero:
self.mt = self.mt[:, ln_p_total.nonzero()]
ln_p_total = ln_p_total[:, ln_p_total.nonzero()]
return {'moment_tensors': self.mt, 'ln_pdf': LnPDF(ln_p_total), 'n': N}
else:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
return False
except Exception as e:
# Overall error catch
warnings.warn('Error in forward task:{}\n\nReturning no result and continuing [no action required].'.format(e), RuntimeWarning)
traceback.print_exc()
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': 0}
class McMCForwardTask(object):
"""
Markov chain Monte Carlo Forward modelling task
Task which carries out Markov chain Monte Carlo forward modelling and returns results dictionary containing PDF, MTs and Number of forward modelled Samples
Runs for whole Markov chain. Callable object.
"""
def __init__(self, algorithm_kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_prob, polarity_prob, incorrect_polarity_prob=0, fid=False, event_data=False, location_samples=False,
location_sample_multipliers=False, marginalise=True, normalise=True, convert=False, extension_data={}):
"""
McMCForwardTask initialisation
Args
algorithm_kwargs: Algorithm Keyword Arguments for running Markov chain Monte Carlo inversion with.
a_polarity: Polarity observations station-ray 6 vector.
error_polarity: Polarity observations error.
a1_amplitude_ratio: Amplitude ratio observations numerator station-ray 6 vector.
a2_amplitude_ratio: Amplitude ratio observations denominator station-ray 6 vector.
amplitude_ratio: Observed amplitude ratio.
percentage_error1_amplitude_ratio: Amplitude ratio observations percentage error for the numerator.
percentage_error2_amplitude_ratio: Amplitude ratio observations percentage error for the denominator.
a_polarity_prob: Polarity PDF observations station-ray 6 vector.
polarity_prob: Polarity PDF probability
fid:[False] File name for output.
event_data:[False] event_data data for output.
location_samples:[False] Station angle scatter samples for output.
location_sample_multipliers:[False] Station angle probability samples for output.
marginalise:[True] Boolean flag as to whether pdf is marginalised over location/model uncertainty.
normalise:[True] Normalise output (doesn't matter for McMC main output, but will affect probability output)
convert:[False] Convert output data to tape, hudson and fault plane coordinates.
extension_data:[{}] A dictionary of processed data for use by an MTfit.data_types extension.
"""
self.algorithm_kwargs = algorithm_kwargs
self.a_polarity = a_polarity
self.error_polarity = error_polarity
self.a1_amplitude_ratio = a1_amplitude_ratio
self.a2_amplitude_ratio = a2_amplitude_ratio
self.amplitude_ratio = amplitude_ratio
self.percentage_error1_amplitude_ratio = percentage_error1_amplitude_ratio
self.percentage_error2_amplitude_ratio = percentage_error2_amplitude_ratio
self.a_polarity_prob = a_polarity_prob
self.polarity_prob = polarity_prob
self.incorrect_polarity_prob = incorrect_polarity_prob
self.fid = fid
self.event_data = event_data
self.location_samples = location_samples
self.location_sample_multipliers = location_sample_multipliers
self.marginalise = marginalise
self.normalise = normalise
self.convert = convert
self.extension_data = extension_data
def __call__(self):
"""
Runs the McMCForwardTask and returns the result as a dictionary
Returns
Dictionary of results as {'algorithm_output_data':algorithm_data,'event_data':event_data,'location_samples':location_samples,'location_sample_multipliers':location_sample_multipliers}
"""
# Set algorithm
self.algorithm = MarkovChainMonteCarloAlgorithmCreator(**self.algorithm_kwargs)
# Initialise algorithm
mts, end = self.algorithm.initialise()
# Run forward tasks for MT samples
forward = ForwardTask(mts, self.a_polarity, self.error_polarity, self.a1_amplitude_ratio, self.a2_amplitude_ratio, self.amplitude_ratio, self.percentage_error1_amplitude_ratio,
self.percentage_error2_amplitude_ratio, self.a_polarity_prob, self.polarity_prob, self.location_sample_multipliers, self.incorrect_polarity_prob, return_zero=True,
reuse=True, marginalise=self.marginalise, extension_data=self.extension_data)
while not end:
forward.mt = mts
result = forward()
mts, end = self.algorithm.iterate(result)
# Close logger
# Get output data and print results
output_data, output_string = self.algorithm.output(self.normalise, self.convert, 0)
return {'algorithm_output_data': output_data, 'event_data': self.event_data, 'location_samples': self.location_samples,
'location_sample_multipliers': self.location_sample_multipliers}
class MultipleEventsForwardTask(object):
"""
Multiple Events Forward modelling task
Task which carries out forward modelling with relative amplitudes and returns dictionary containing PDF, MTs and Number of forward modelled Samples
Callable object.
"""
def __init__(self, mts, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio,
a_polarity_prob, polarity_prob, a_relative_amplitude, relative_amplitude, percentage_error_relative_amplitude, relative_amplitude_stations, location_sample_multipliers=False,
incorrect_polarity_prob=0, minimum_number_intersections=2, return_zero=False, reuse=False, relative=False, location_sample_size=1, marginalise_relative=False,
combine=True, extension_data=[]):
"""
Initialisation of MultipleEventsForwardTask
Used to calculate forward model parameters for multiple event samples (e.g. using relative amplitudes).
Args
mts: List of numpy matrix objects containing moment tensor 6 vectors for each event.
a_polarity: List of Polarity observations station-ray 6 vector for each event.
error_polarity: List of Polarity observations error for each event.
a1_amplitude_ratio: List of Amplitude ratio observations numerator station-ray 6 vector for each event.
a2_amplitude_ratio: List of Amplitude ratio observations denominator station-ray 6 vector for each event.
amplitude_ratio: List of Observed amplitude ratios for each event.
percentage_error1_amplitude_ratio: List of Amplitude ratio observations percentage_error for the numerator for each event.
percentage_error2_amplitude_ratio: List of Amplitude ratio observations percentage_error for the denominator for each event.
a_polarity_prob: List of Polarity PDF observations station-ray 6 vector for each event.
polarity_prob: List of Polarity PDF probability for each event.
a_relative_amplitude: List of numpy matrix objects for station-ray 6 vector for each event
relative_amplitude: List of Observed amplitude values for relative amplitude calculation for each event
percentage_error_relative_amplitude: List of Amplitude ratio observations percentage_error for the relative amplitude calculation for each event
relative_amplitude_stations: List of relative amplitude stations allowing for intersections when evaluating the relative amplitudes
return_zero:[False] Boolean flag to return zero probability samples.
reuse:[False] Boolean flag as to whether task is reused (mt changed and re-run...)
relative:[False] Use relative amplitude on multiple
location_samples:[1] Integer number of station samples, (default is 1)
marginalise_relative:[False] Boolean flag to marginalise the location uncertainty between absolute and relative data.
combine:[False] Boolean flag to combine the probabilities between multiple events.
extension_data:[] A list of dictionaries of processed data for use by an MTfit.data_types extension.
"""
self.mts = mts
self.a_polarity = a_polarity
self.error_polarity = error_polarity
self.a1_amplitude_ratio = a1_amplitude_ratio
self.a2_amplitude_ratio = a2_amplitude_ratio
self.amplitude_ratio = amplitude_ratio
self.percentage_error1_amplitude_ratio = percentage_error1_amplitude_ratio
self.percentage_error2_amplitude_ratio = percentage_error2_amplitude_ratio
self.a_polarity_prob = a_polarity_prob
self.polarity_prob = polarity_prob
self.incorrect_polarity_prob = incorrect_polarity_prob
self.a_relative_amplitude = a_relative_amplitude
self.relative_amplitude_stations = relative_amplitude_stations
self.relative_amplitude = relative_amplitude
self.percentage_error_relative_amplitude = percentage_error_relative_amplitude
self.location_sample_multipliers = location_sample_multipliers
self._return_zero = return_zero
self._reuse = reuse
self._relative = relative
self.forward_tasks = []
self.minimum_number_intersections = minimum_number_intersections
self.location_sample_size = location_sample_size
self._marginalise_relative = marginalise_relative
self._combine = combine
# Need to implement marginalisation of scale factor for self._marginalise_relative and location_samples
if self._relative and self._marginalise_relative and self.location_sample_size > 1:
raise NotImplementedError('Marginalisation of location samples on relative data not implemented yet - we may well expect location uncertainty between the co-located events to be zero')
# https://github.com/djpugh/MTfit/issues/11
# Scale factor is location/amplitude specific
# We would expect amplitude ratio location uncertainty to be minimal between co-located events
# So probably need to remove any location uncertainty for the relative amplitude terms
self.extension_data = extension_data
if self._relative and not self._combine and self._return_zero:
self._combine = True
@memory_profile_test(_MEMTEST)
def __call__(self):
"""
Runs the MultipleEventsForwardTask and returns the result as a dictionary
Returns
Dictionary of results as {'mt1':mt1,'mt2':mt2,'PDF':pdf}
"""
# If combining outputs make a single ln_pdf else make a list of them
if self._combine:
# Assuming that location uncertainty matches for each event (i.e if samples used, event is still co-located...)
ln_p_total = np.matrix(np.zeros((self.location_sample_size, self.mts[0].shape[1])))
else:
ln_p_total = []
# Set non-zero array
non_zeros = np.ones((self.mts[0].shape[1]), dtype=bool)
# Set scale factors and scale factor uncertainties
scale_factor = np.array(np.zeros((self.location_sample_size, self.mts[0].shape[1], len(self.mts), len(self.mts))))
scale_factor_uncertainty = np.array(np.zeros((self.location_sample_size, self.mts[0].shape[1], len(self.mts), len(self.mts))))
N = self.mts[0].shape[1]
# Loop over MTs
extension_scale = {}
for i, mt in enumerate(self.mts):
# Set diagonal scale_factors and scale_factor_uncertainties
scale_factor[:, :, i, i] = 1
scale_factor_uncertainty[:, :, i, i] = 0
# Get station coefficients
if isinstance(self.a_polarity, list):
a_polarity = self.a_polarity[i]
else:
a_polarity = self.a_polarity
if isinstance(self.a1_amplitude_ratio, list):
a1_amplitude_ratio = self.a1_amplitude_ratio[i]
else:
a1_amplitude_ratio = self.a1_amplitude_ratio
if isinstance(self.a2_amplitude_ratio, list):
a2_amplitude_ratio = self.a2_amplitude_ratio[i]
else:
a2_amplitude_ratio = self.a2_amplitude_ratio
if isinstance(self.a_polarity_prob, list):
a_polarity_prob = self.a_polarity_prob[i]
else:
a_polarity_prob = self.a_polarity_prob
if isinstance(self.incorrect_polarity_prob, list):
incorrect_polarity_prob = self.incorrect_polarity_prob[i]
else:
incorrect_polarity_prob = self.incorrect_polarity_prob
if len(self.extension_data) and len(self.extension_data) >= i:
extension_data = self.extension_data[i]
else:
extension_data = {}
# Update mt with non-zeros if combining
if not self._return_zero and self._combine:
self.mts[i] = np.ascontiguousarray(self.mts[i][:, non_zeros])
mt = self.mts[i]
# If forward tasks are being re-used and one doesn't exist or no tasks are re-used
if self._reuse and len(self.forward_tasks) < len(self.mts) or not self._reuse:
forward_task = ForwardTask(mt, a_polarity, self.error_polarity[i], a1_amplitude_ratio, a2_amplitude_ratio, self.amplitude_ratio[i],
self.percentage_error1_amplitude_ratio[i], self.percentage_error2_amplitude_ratio[i], a_polarity_prob,
self.polarity_prob[i], self.location_sample_multipliers, incorrect_polarity_prob, return_zero=True,
reuse=True, marginalise=self._marginalise_relative, extension_data=extension_data)
if self._reuse:
self.forward_tasks.append(forward_task)
elif self._reuse:
self.forward_tasks[i].mt = mt
forward_task = self.forward_tasks[i]
# Run forward task
result = forward_task()
# Process results amd combine
ln_pdf = result['ln_pdf']
if isinstance(result['ln_pdf'], LnPDF):
ln_pdf = result['ln_pdf']._ln_pdf
# If not combining add to ln_pdf list
if not self._combine:
if not (ln_pdf > -np.inf).any() and self._relative and ((isinstance(self.a_relative_amplitude, list) and len(self.a_relative_amplitude)) or not isinstance(self.a_relative_amplitude, (bool, list))):
# in relative loop and all zero prob
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
ln_p_total.append(ln_pdf)
continue
# all zero so breaking
if ln_pdf.shape[1] == 0:
ln_p_total = 0*ln_p_total-np.inf
break
ln_p_total = ln_p_total+ln_pdf
# return if ln_p_total is all zero
if not (ln_p_total > -np.inf).any():
# return zeros
if not self._return_zero:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
return {'moment_tensors': self.mts, 'ln_pdf': ln_p_total, 'n': N, 'scale_factor': scale_factor}
# Update mts
if not self._return_zero:
# Update ln_pdfs etc. for non-zeros
if sum(non_zeros) != sum(np.array(np.sum(ln_p_total, 0) > -np.inf).flatten()):
non_zeros[non_zeros] = np.array(np.sum(ln_p_total, 0) > -np.inf).flatten() # update non zeros
for j in range(i+1):
self.mts[j] = np.ascontiguousarray(self.mts[j][:, np.array(np.sum(ln_p_total, 0) > -np.inf). flatten()])
scale_factor = np.ascontiguousarray(scale_factor[:, np.array(np.sum(ln_p_total, 0) > -np.inf).flatten(), :, :])
scale_factor_uncertainty = np.ascontiguousarray(scale_factor_uncertainty[:, np.array(np.sum(ln_p_total, 0) > -np.inf).flatten(), :, :])
ln_p_total = ln_p_total[:, np.array(np.sum(ln_p_total, 0) > -np.inf).flatten()]
mt = self.mts[i]
#####
if self._relative and ((isinstance(self.a_relative_amplitude, list) and len(self.a_relative_amplitude)) or not isinstance(self.a_relative_amplitude, (bool, list))) and not self._combine:
# Do relative amplitude calculations if not combining PDFs - Relative loop over non-zero samples, introduces some bias but not much, and should reduce computation time
# Set non zeros
non_zero = []
for i, ln_p in enumerate(ln_p_total):
nonzero_p = LnPDF(ln_p).nonzero()
self.mts[i] = self.mts[i][:, nonzero_p]
ln_p_total[i] = LnPDF(ln_p)[:, nonzero_p]
non_zero.append(len(nonzero_p))
# Get fewest number of mts and set all non-zero mts to that
min_mts = min(non_zero)
if min_mts == 0:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
for i, ln_p in enumerate(ln_p_total):
if i == 0:
ln_p_t = ln_p[:, 0: min_mts]
else:
ln_p_t += ln_p[:, 0: min_mts]
self.mts[i] = np.ascontiguousarray(self.mts[i][:, 0: min_mts])
# Set up ln_p_total to run amp ratios through
ln_p_total = LnPDF(ln_p_t)
if not self._return_zero:
nonzero_p = ln_p_total.nonzero()
self.mts = [mts[:, nonzero_p] for mts in self.mts]
ln_p_total = ln_p_total._ln_pdf
if self._relative and ((isinstance(self.a_relative_amplitude, list) and len(self.a_relative_amplitude)) or not isinstance(self.a_relative_amplitude, (bool, list))):
for j in range(i):
# Go backwards to ease calculation as there are fewer events at start when most non-zero mts
try:
# Get amplitude ratio station coefficients
if isinstance(self.a_relative_amplitude, list) and len(self.a_relative_amplitude):
a1_relative_amplitude_ratio = self.a_relative_amplitude[i]
a2_relative_amplitude_ratio = self.a_relative_amplitude[j]
else:
a1_relative_amplitude_ratio = self.a_relative_amplitude
a2_relative_amplitude_ratio = self.a_relative_amplitude
# Intersect stations
intersected_data = _intersect_stations(self.relative_amplitude_stations[i], self.relative_amplitude_stations[j], a1_relative_amplitude_ratio,
a2_relative_amplitude_ratio, self.relative_amplitude[i], self.relative_amplitude[j],
self.percentage_error_relative_amplitude[i], self.percentage_error_relative_amplitude[j])
a1_relative_amplitude_ratio = intersected_data[0]
a2_relative_amplitude_ratio = intersected_data[1]
relative_amplitude_i = intersected_data[2]
relative_amplitude_j = intersected_data[3]
percentage_error_relative_amplitude_i = intersected_data[4]
percentage_error_relative_amplitude_j = intersected_data[5]
n_intersections = intersected_data[6]
# Check number of intersections
if n_intersections >= self.minimum_number_intersections:
# Evaluate relative amplitude ratio pdf
ln_p_amp_rat, scale, scale_uncertainty = relative_amplitude_ratio_ln_pdf(relative_amplitude_i, relative_amplitude_j, np.ascontiguousarray(mt),
np.ascontiguousarray(self.mts[j]), a1_relative_amplitude_ratio,
a2_relative_amplitude_ratio, percentage_error_relative_amplitude_i,
percentage_error_relative_amplitude_j)
# Set scale_factor and scale_factor_uncertainties
scale_factor[:, :, i, j] = scale
scale_factor[:, :, j, i] = scale
scale_factor_uncertainty[:, :, j, i] = scale_uncertainty
scale_factor_uncertainty[:, :, i, j] = scale_uncertainty
# Handle location samples PDFs
if self.location_sample_multipliers and sum(self.location_sample_multipliers) != len(self.location_sample_multipliers):
if cprobability:
location_sample_multipliers = np.array(self.location_sample_multipliers)
location_sample_multipliers = location_sample_multipliers.astype(np.float64, copy=False)
ln_p_amp_rat = cprobability.ln_multipliers(ln_p_amp_rat, location_sample_multipliers)
else:
ln_p_amp_rat += np.log(np.matrix(self.location_sample_multipliers).T)
# Marginalise if marginalise_relative flag is True
if self.location_sample_size > 1 and self._marginalise_relative:
ln_p_amp_rat = ln_marginalise(ln_p_amp_rat, _cython=cprobability is not False)
# Combine ln_p_total and amp rat
if cprobability and not _CYTHON_TESTS:
ln_p_total = cprobability.ln_combine(np.ascontiguousarray(ln_p_total), ln_p_amp_rat)
else:
ln_p_total = ln_p_total+ln_p_amp_rat
# return zeros
if not (ln_p_total > -np.inf).any():
if not self._return_zero:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
return {'moment_tensors': self.mts, 'ln_pdf': ln_p_total, 'n': N}
# Update mts for non-zeros
if not self._return_zero:
non_zero_indices = np.array(np.sum(ln_p_total, 0) > -np.inf).flatten()
if sum(non_zeros) != sum(non_zero_indices):
non_zeros[non_zeros] = non_zero_indices
for j in range(i+1):
self.mts[j] = np.ascontiguousarray(self.mts[j][:, non_zero_indices])
ln_p_total = np.ascontiguousarray(ln_p_total[:, non_zero_indices])
scale_factor = np.ascontiguousarray(scale_factor[:, non_zero_indices, :, :])
scale_factor_uncertainty = np.ascontiguousarray(scale_factor_uncertainty[:, non_zero_indices, :, :])
except Exception as e:
print('Relative Amplitude Ratio Exception {}'.format(e))
# Exception in cprobabilities combined PDF - print error message
if _VERBOSITY >= 4:
traceback.print_exc()
# Handle relative inversion extensions
if self._relative and len(self.extension_data):
extension_names, extensions = get_extensions('MTfit.data_types')
extension_scale = {}
extension_scale_uncertainty = {}
for key in self.extension_data[i].keys():
extension_scale[key] = np.array(np.zeros((self.location_sample_size, self.mts[0].shape[1], len(self.mts), len(self.mts))))
extension_scale_uncertainty[key] = np.array(np.zeros((self.location_sample_size, self.mts[0].shape[1], len(self.mts), len(self.mts))))
extension_scale[key][:, :, i, i] = 1
extension_scale_uncertainty[key][:, :, i, i] = 0
for j in range(i):
for key in self.extension_data[i].keys():
if key in self.extension_data[j].keys():
try:
if key in extension_names and 'relative' in key:
extension_data_1, extension_data_2, n_intersections = _intersect_stations_extension_data(self.extension_data[i][key].copy(),
self.extension_data[j][key].copy())
if not n_intersections:
continue
ln_p_ext, scale, scale_uncertainty = extensions[key](self.mt, extension_data_1, extension_data_2)
extension_scale[key][:, :, i, j] = scale
extension_scale[key][:, :, j, i] = scale
extension_scale_uncertainty[key][:, :, j, i] = scale_uncertainty
extension_scale_uncertainty[key][:, :, i, j] = scale_uncertainty
if self.location_sample_multipliers and sum(self.location_sample_multipliers) != len(self.location_sample_multipliers):
if cprobability:
location_sample_multipliers = np.array(self.location_sample_multipliers)
location_sample_multipliers = location_sample_multipliers.astype(np.float64, copy=False)
ln_p_extensions = cprobability.ln_multipliers(ln_p_ext, location_sample_multipliers)
else:
ln_p_extensions += np.log(np.matrix(self.location_sample_multipliers).T)
# Marginalise if marginalise_relative flag is True
if self.location_sample_size > 1 and self._marginalise_relative:
ln_p_extensions = ln_marginalise(ln_p_extensions, _cython=cprobability is not False)
if cprobability and not _CYTHON_TESTS:
ln_p_total = cprobability.ln_combine(ln_p_total, ln_p_extensions)
else:
ln_p_total = ln_p_total+ln_p_extensions
if not (ln_p_total > -np.inf).any():
if not self._return_zero:
return {'moment_tensors': np.matrix([]), 'ln_pdf': LnPDF(np.matrix([])), 'n': N}
return {'moment_tensors': self.mts, 'ln_pdf': ln_p_total, 'n': N}
# Update mts for non-zeros
if not self._return_zero:
non_zero_indices = np.array(np.sum(ln_p_total, 0) > -np.inf).flatten()
if sum(non_zeros) != sum(non_zero_indices):
non_zeros[non_zeros] = non_zero_indices
for j in range(i+1):
self.mts[j] = np.ascontiguousarray(self.mts[j][:, non_zero_indices])
ln_p_total = np.ascontiguousarray(ln_p_total[:, non_zero_indices])
for key in extension_scale.keys():
extension_scale[key] = np.ascontiguousarray(extension_scale[key][:, non_zero_indices, :, :])
extension_scale_uncertainty[key] = np.ascontiguousarray(extension_scale_uncertainty[key][:, non_zero_indices, :, :])
else:
raise KeyError('Extension {} function not found.'.format(key))
except Exception as e:
print('Exception for relative extension: {} - {}'.format(key, e))
# Delete attributes if not reusing and force garbage collection
if not self._reuse:
del self.a_polarity
del self.a_polarity_prob
del self.a1_amplitude_ratio
del self.a2_amplitude_ratio
del self.amplitude_ratio
del self.polarity_prob
del self.percentage_error1_amplitude_ratio
del self.percentage_error2_amplitude_ratio
del self.error_polarity
del self.incorrect_polarity_prob
del self.extension_data
gc.collect()
# Take non-zeros here?
if np.isnan(ln_p_total).any() and _DEBUG:
raise ValueError('NaN result')
# Set up results dict
if self.location_sample_size > 1 and not self._marginalise_relative:
# Relative not marginalised already
ln_scale = 0
if -ln_p_total.max() > 0 and ln_p_total.max() > -np.inf:
ln_scale = -ln_p_total.max()
scale_factor = np.array([{'mu': scale_factor[:, i], 'ln_p': ln_p_total[:, i], 'sigma': scale_factor_uncertainty[:, i]} for i in range(scale_factor.shape[1])])
try:
for key in extension_scale.keys():
extension_scale[key] = np.array([{'mu': extension_scale[key][:, i], 'ln_p': ln_p_total[:, i], 'sigma': extension_scale_uncertainty[key][:, i]} for i in range(extension_scale[key].shape[1])])
except Exception:
extension_scale = {}
p_total = np.exp(ln_p_total+ln_scale)
# Marginalise
p_total = np.matrix(np.sum(p_total, 0))
ln_p_total = np.log(p_total)-ln_scale
elif self._relative: # marginalise_relative is true
if scale_factor.shape[0] == 1:
scale_factor = scale_factor.squeeze(0)
scale_factor_uncertainty = scale_factor_uncertainty.squeeze(0)
elif scale_factor.shape[1] == 1:
scale_factor = scale_factor.squeeze(1)
scale_factor_uncertainty = scale_factor_uncertainty.squeeze(1)
scale_factor = np.array([{'mu': scale_factor[i, :, :], 'sigma': scale_factor_uncertainty[i, :, :]} for i in range(scale_factor.shape[0])])
try:
for key in extension_scale.keys():
extension_scale[key] = extension_scale[key].squeeze(0)
extension_scale_uncertainty[key] = extension_scale_uncertainty[key].squeeze(0)
extension_scale[key] = np.array([{'mu': extension_scale[key][i, :, :], 'ln_p': ln_p_total[:, i], 'sigma': extension_scale_uncertainty[key][i, :, :]} for i in range(extension_scale[key].shape[0])])
except Exception:
extension_scale = {}
else:
scale_factor = False
ln_p_total = LnPDF(ln_p_total)
if not self._return_zero:
nonzero_p = ln_p_total.nonzero()
self.mts = [mts[:, nonzero_p] for mts in self.mts]
ln_p_total = ln_p_total[:, nonzero_p]
if not isinstance(scale_factor, bool):
scale_factor = scale_factor[nonzero_p]
return {'moment_tensors': self.mts, 'ln_pdf': ln_p_total, 'n': N, 'scale_factor': scale_factor, 'extensions_scale_factor': extension_scale}
class MultipleEventsMcMCForwardTask(McMCForwardTask):
"""
Multiple Events McMC Forward modelling task
Task which carries out Markov chain Monte Carlo forward modelling which carries out forward modelling for multiple events and returns dictionary containing PDF, MTs and Number of forward modelled Samples
Callable object.
"""
# Carry out McMC on multiple events using relative amplitudes to link them together.
def __init__(self, algorithm_kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio,
a_polarity_prob, polarity_prob, a_relative_amplitude, relative_amplitude, percentage_error_relative_amplitude, relative_amplitude_stations, incorrect_polarity_prob=0, relative=True,
minimum_number_intersections=2, fid=False, event_data=False, location_samples=False, location_sample_multipliers=False, marginalise_relative=True, normalise=True, convert=False, extension_data=[]):
"""
Initialisation of MultipleEventsMcMCForwardTask
Args
algorithm_kwargs: Algorithm Keyword Arguments for running Markov chain Monte Carlo inversion with.
a_polarity: List or numpy matrix of polarity observations station-ray 6 vector.
error_polarity: List of numpy matrices of polarity observations error.
a1_amplitude_ratio: List or numpy matrix of amplitude ratio observations numerator station-ray 6 vector.
a2_amplitude_ratio: List or numpy matrix of amplitude ratio observations denominator station-ray 6 vector.
amplitude_ratio: List of numpy matrices of observed amplitude ratio.
percentage_error1_amplitude_ratio: List of numpy matrices amplitude ratio observations percentage error for the numerator.
percentage_error2_amplitude_ratio: List of numpy matrices amplitude ratio observations percentage error for the denominator.
a_polarity_prob: List or numpy matrix of polarity PDF observations station-ray 6 vector.
polarity_prob: List of numpy matrices of polarity PDF probability.
a_relative_amplitude: List of numpy matrix objects for station-ray 6 vector for each event
relative_amplitude: List of Observed amplitude values for relative amplitude calculation for each event
percentage_error_relative_amplitude: List of Amplitude ratio observations percentage_error for the relative amplitude calculation for each event
relative_amplitude_stations: List of relative amplitude stations allowing for intersections when evaluating the relative amplitudes
relative:[True] Carry out relative amplitude step in inversion.
minimum_number_intersections:[2] Integer - minimum number of station intersections required to include the relative amplitude information.
fid:[False] File name for output.
event_data:[False] event_data data for output.
location_samples:[False] Station angle scatter samples for output.
location_sample_multipliers:[False] Station angle probability samples for output.
marginalise_relative:[False] Boolean flag as to whether pdf is marginalised over location/model uncertainty - IGNORED.
normalise:[True] Normalise output (doesn't matter for McMC main output, but will affect probability output)
convert:[False] Convert output data to tape, hudson and fault plane coordinates.
extension_data:[] A list of dictionaries of processed data for use by an MTfit.data_types extension.
"""
self.algorithm_kwargs = algorithm_kwargs
self.a_polarity = a_polarity
self.error_polarity = error_polarity
self.a1_amplitude_ratio = a1_amplitude_ratio
self.a2_amplitude_ratio = a2_amplitude_ratio
self.amplitude_ratio = amplitude_ratio
self.percentage_error1_amplitude_ratio = percentage_error1_amplitude_ratio
self.percentage_error2_amplitude_ratio = percentage_error2_amplitude_ratio
self.a_polarity_prob = a_polarity_prob
self.polarity_prob = polarity_prob
self.incorrect_polarity_prob = incorrect_polarity_prob
self.a_relative_amplitude = a_relative_amplitude
self.relative_amplitude = relative_amplitude
self.percentage_error_relative_amplitude = percentage_error_relative_amplitude
self.relative_amplitude_stations = relative_amplitude_stations
self._relative = relative
self.fid = fid
self.event_data = event_data
self.location_samples = location_samples
self.location_sample_multipliers = location_sample_multipliers
self.minimum_number_intersections = minimum_number_intersections
self._marginalise_relative = True # Setting this to False causes issues at the moment marginalise_relative
# TODO fix thie marginalise relative handling in the McMC
self.normalise = normalise
self.convert = convert
self.extension_data = extension_data
# Set logger
def __call__(self):
"""
Runs the multiple events McMC forward task
Runs the MultipleEventsMcMCForwardTask and returns the result as a dictionary
Returns
Dictionary of results as {'algorithm_output_data':algorithm_data,'event_data':event_data,'location_samples':location_samples,'location_sample_multipliers':location_sample_multipliers}
"""
location_sample_size = 1
if self.location_samples:
location_sample_size = len(self.location_samples)
# Set algorithm
self.algorithm = MarkovChainMonteCarloAlgorithmCreator(**self.algorithm_kwargs)
# Initialise algorithm
mts, end = self.algorithm.initialise()
iteration = 1
# Initialise algorithms separately (no relative data)
while self.algorithm._initialising and self.algorithm._initialiser:
results = []
# Sort station angle coefficients (list or array)
for i, mt in enumerate(mts):
if isinstance(self.a_polarity, list):
a_polarity = self.a_polarity[i]
else:
a_polarity = self.a_polarity
if isinstance(self.error_polarity, list):
error_polarity = self.error_polarity[i]
else:
error_polarity = self.error_polarity
if isinstance(self.a1_amplitude_ratio, list):
a1_amplitude_ratio = self.a1_amplitude_ratio[i]
else:
a1_amplitude_ratio = self.a1_amplitude_ratio
if isinstance(self.a2_amplitude_ratio, list):
a2_amplitude_ratio = self.a2_amplitude_ratio[i]
else:
a2_amplitude_ratio = self.a2_amplitude_ratio
if isinstance(self.a_polarity_prob, list):
a_polarity_prob = self.a_polarity_prob[i]
else:
a_polarity_prob = self.a_polarity_prob
if isinstance(self.incorrect_polarity_prob, list):
incorrect_polarity_prob = self.incorrect_polarity_prob[i]
else:
incorrect_polarity_prob = self.incorrect_polarity_prob
if len(self.extension_data) and len(self.extension_data) >= i:
extension_data = self.extension_data[i]
else:
extension_data = {}
if isinstance(self.polarity_prob, list) and len(self.polarity_prob):
polarity_prob = self.polarity_prob[i]
else:
polarity_prob = None
if isinstance(self.amplitude_ratio, list):
amplitude_ratio = self.amplitude_ratio[i]
else:
amplitude_ratio = self.amplitude_ratio
if isinstance(self.percentage_error1_amplitude_ratio, list):
percentage_error1_amplitude_ratio = self.percentage_error1_amplitude_ratio[i]
else:
percentage_error1_amplitude_ratio = self.percentage_error1_amplitude_ratio
if isinstance(self.percentage_error2_amplitude_ratio, list):
percentage_error2_amplitude_ratio = self.percentage_error2_amplitude_ratio[i]
else:
percentage_error2_amplitude_ratio = self.percentage_error2_amplitude_ratio
# Run forward task
forward_task = ForwardTask(mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio,
amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_prob,
polarity_prob, self.location_sample_multipliers, incorrect_polarity_prob,
return_zero=False, reuse=False, extension_data=extension_data)
results.append(forward_task())
mts, end = self.algorithm.iterate(results)
iteration += 1
# Run Relative amplitude data multiple events
multiple_events_forward_task = MultipleEventsForwardTask(mts, self.a_polarity, self.error_polarity, self.a1_amplitude_ratio,
self.a2_amplitude_ratio, self.amplitude_ratio, self.percentage_error1_amplitude_ratio,
self.percentage_error2_amplitude_ratio, self.a_polarity_prob, self.polarity_prob,
self.a_relative_amplitude, self.relative_amplitude,
self.percentage_error_relative_amplitude, self.relative_amplitude_stations,
self.location_sample_multipliers, self.incorrect_polarity_prob,
self.minimum_number_intersections, return_zero=True, reuse=True,
relative=self._relative, location_sample_size=location_sample_size,
marginalise_relative=self._marginalise_relative,
extension_data=self.extension_data)
while not end:
multiple_events_forward_task.mts = mts
result = multiple_events_forward_task()
mts, end = self.algorithm.iterate(result)
# Close logger
# Output data
output_data, output_string = self.algorithm.output(self.normalise, self.convert, 0)
# Print output string
print(output_string)
return {'algorithm_output_data': output_data, 'event_data': self.event_data, 'location_samples': self.location_samples,
'location_sample_multipliers': self.location_sample_multipliers}
class CombineMpiOutputTask(object):
"""
Task for combining MPI output
Combines and saves mpi (hyp) output (including scale factors from relative inversion)
Initialisation
Args
uid: UID for combining.
format: ['matlab'] Output format
results_format: ['full_pdf'] Set result format
"""
def __init__(self, uid, format='matlab', results_format='full_pdf', parallel=False, binary_file_version=2):
"""
Initialisation of CombineMpiOutputTask
Args
uid: UID for combining.
format: Output format
"""
self.uid = uid
self.format = format
self.results_format = results_format
self.parallel = parallel
self.binary_file_version = binary_file_version
def __call__(self):
"""
Runs the mpi output combining task
Runs the CombineMpiOutputTask and returns a result code.
Returns
resultCode: 10 if successful.
"""
print('\n\tCombining {}'.format(self.uid))
# Get hyp, mt and sf files
hypfiles = glob.glob(self.uid+'.*.hyp')
fids = glob.glob(self.uid+'.*.mt')
scale_factor_fids = glob.glob(self.uid+'.*.sf')
inv_fid = glob.glob(self.uid.replace('MT', '').replace('DC', '')+'.inv')
# Try to use the pkl file as input
try:
data = pickle.load(open(self.uid+'.0.pkl'))
# Check fids exist (haven't moved the files)
if all([os.path.exists(fid) for fid in data['fids']]):
fids = data['fids']
data = data['event_data']
if len(inv_fid):
inversion = Inversion(data_file=inv_fid[0], parallel=self.parallel)
else:
inversion = Inversion(data=data, parallel=self.parallel)
# Otherwise use the datafile or hypfile
except Exception:
data = False
if len(inv_fid):
inversion = Inversion(data_file=inv_fid[0], parallel=self.parallel)
elif len(hypfiles):
inversion = Inversion(data_file=hypfiles[0], parallel=self.parallel)
# Combine MPI output
try:
inversion._combine_mpi_output(inversion.data[0], fids, scale_factor_fids, self.uid+'.mat', format=self.format, results_format=self.results_format, parallel=False, mpi=False, binary_file_version=self.binary_file_version)
except Exception:
traceback.print_exc()
return 10
#
# Inversion object
#
[docs]class Inversion(object):
"""
Main Inversion object
Runs the MT inversion, follwing a parameterisation set on initialisation. Parameters can be set for the algorithm to use in the kwargs.
Algorithm options are:
* Time - Monte Carlo random sampling - runs until time limit reached.
* Iterate - Monte Carlo random sampling - runs until sample limit reached.
* McMC - Markov chain Monte Carlo sampling.
* TransDMcMC - Markov chain Monte Carlo sampling.
These are discussed in more detail in the MTfit.algorithms documentation.
The inversion is run by calling the forward function (MTfit.inversion.Inversion.forward)
**Data Format**
The inversion expects a python dictionary of the data in the format::
>>data={'PPolarity':{'Measured':numpy.matrix([[-1],[-1]...]),
'Error':numpy.matrix([[0.01],[0.02],...]),
'Stations':{'Name':[Station1,Station2,...]
'Azimuth':numpy.matrix([[248.0],[122.3]...]),
'TakeOffAngle':numpy.matrix([[24.5],[2.8]...])
}
},
'PSHAmplitudeRatio':{...},
...
'UID':'Event1'
}
The initial key arguments correspond to the data types that can be used in the inversion. The inversion uses polarity observations and amplitude ratios, and can also use relative amplitudes.
This means that the useable data types are:
**Polarity**
* PPolarity
* SHPolarity
* SVPolarity
Polarity observations made manually or automatically. The corresponding data dictionary for polarities needs the following keys:
* *Measured*: numpy matrix of polarity observations
* *Error*: numpy matrix of fractional uncertainty in the polarity observations.
* *Stations*: dictionary of station information with keys:
* *Name*: list of station names
* *Azimuth*: numpy matrix of azimuth values in degrees
* *TakeOffAngle*: numpy matrix of take off angle values in degrees - 0 down (NED coordinate system).
As with all of these dictionaries, the indexes of the observations and errors must correspond to the stations, i.e. `data['Measured'][0,0]` -> from `data['Stations']['Name'][0]` with `error data['Error'][0,:]` etc.
If polarity probabilities are being used, the keys are:
* PPolarityProbability
* SHPolarityProbability
* SVPolarityProbability
With a similar structure to the Polarity data, except the measured matrix has an additional dimension, i.e. `data['Measured'][0,0]` is the positive polarity probability and `data['Measured'][0,1]` is the negative polarity probability.
**Amplitude ratios**
* P/SHRMSAmplitudeRatio
* P/SVRMSAmplitudeRatio
* SH/SVRMSAmplitudeRatio
* P/SHQRMSAmplitudeRatio
* P/SVQRMSAmplitudeRatio
* SH/SVQRMSAmplitudeRatio
* P/SHAmplitudeRatio
* P/SVAmplitudeRatio
* SH/SVAmplitudeRatio
* P/SHQAmplitudeRatio
* P/SVQAmplitudeRatio
* SH/SVQAmplitudeRatio
Amplitude ratio observations made manually or automatically. The Q is not necessary but is useful to label the amplitudes with a Q correction.
The corresponding data dictionary for amplitude ratios needs the following keys:
* *Measured*: numpy matrix of corrected numerator and denominator amplitude ratio observations, needs to have two columns, one for the numerator and one for the denominator.
* *Error*: numpy matrix of uncertainty (standard deviation) in the amplitude ratio observations, needs to have two columns, one for the numerator and one for the denominator.
* *Stations*: dictionary of station information with keys:
* *Name*: list of station names
* *Azimuth*: numpy matrix of azimuth values in degrees
* *TakeOffAngle*: numpy matrix of take off angle values in degrees - 0 down (NED coordinate system).
As with all of these dictionaries, the indexes of the observations and errors must correspond to the stations, i.e. `data['Measured'][0,0]` -> from `data['Stations']['Name'][0]` with `error data['Error'][0,:]` etc.
**Relative Amplitude Ratios**
* PAmplitude
* SHAmplitude
* SVAmplitude
* PQAmplitude
* SHQAmplitude
* SVQAmplitude
* PRMSAmplitude
* SHRMSAmplitude
* SVRMSAmplitude
* PQRMSAmplitude
* SHQRMSAmplitude
* SVQRMSAmplitude
Relative Amplitude ratios use amplitude observations for different events made manually or automatically. The Q is not necessary but is useful to label the amplitudes with a Q correction.
The corresponding data dictionary for amplitude ratios needs the following keys:
* *Measured*: numpy matrix of amplitude observations for the event.
* *Error*: numpy matrix of uncertainty (standard deviation) in the amplitude observations.
* *Stations*: dictionary of station information with keys:
* *Name*: list of station names
* *Azimuth*: numpy matrix of azimuth values in degrees
* *TakeOffAngle*: numpy matrix of take off angle values in degrees - 0 down (NED coordinate system).
As with all of these dictionaries, the indexes of the observations and errors must correspond to the stations, i.e. `data['Measured'][0,0]` -> from `data['Stations']['Name'][0]` with `error data['Error'][0,:]` etc.
**Angle Scatter Format**
The angle scatter files can be generated using a utility Scat2Angle based on the NonLinLoc angle code. The angle scatter file is a text file with samples separated by blank lines.
The expected format is for samples from the location PDF that have been converted into take off and azimuth angles (in degrees) for the stations, along with a probability value. It is important that the samples are drawn
from the location PDF as a Monte Carlo based integration approach is used for marginalising over the uncertainty.
It is possible to use XYZ2Angle to samples drawn from a location PDF using the NonLinLoc angle approach.
Expected format is::
Probability
StationName Azimuth TakeOffAngle
StationName Azimuth TakeOffAngle
Probability
.
.
.
With TakeOffAngle as 0 down (NED coordinate system).
e.g.::
504.7
S0271 231.1 154.7
S0649 42.9 109.7
S0484 21.2 145.4
S0263 256.4 122.7
S0142 197.4 137.6
S0244 229.7 148.1
S0415 75.6 122.8
S0065 187.5 126.1
S0362 85.3 128.2
S0450 307.5 137.7
S0534 355.8 138.2
S0641 14.7 120.2
S0155 123.5 117
S0162 231.8 127.5
S0650 45.9 108.2
S0195 193.8 147.3
S0517 53.7 124.2
S0004 218.4 109.8
S0588 12.9 128.6
S0377 325.5 165.3
S0618 29.4 120.5
S0347 278.9 149.5
S0529 326.1 131.7
S0083 223.7 118.2
S0595 42.6 117.8
S0236 253.6 118.6
502.7
S0271 233.1 152.7
S0649 45.9 101.7
S0484 25.2 141.4
S0263 258.4 120.7
.
.
.
"""
@memory_profile_test(_MEMTEST)
def __init__(self, data={}, data_file=False, location_pdf_file_path=False, algorithm='iterate', parallel=True, n=0, phy_mem=8, dc=False, **kwargs):
"""
Initialisation of inversion object
Args:
data (dict/list): Dictionary or list of dictionaries containing data for inversion. Can be ignored if a data_file is passed as an argument (for data format, see below).
data_file (str): Path or list of file paths containing (binary) pickled data dictionaries.
location_pdf_file_path (str): Path or list of file paths to angle scatter files (for file format, see above - other format extensions can be added using setuptools entry points).
algorithm (str):['iterate'] algorithm selector
parallel (bool):[True] Run the inversion in parallel using multiprocessing
n (int):[0] Number of workers, default is to use as many as returned by multiprocessing.cpu_count
phy_mem (int):[8] Estimated physical memory to use (used for determining array sizes, it is likely that more memory will be used, and if so no errors are forced).
*On python versions <2.7.4 there is a bug (http://bugs.python.org/issue13555) with pickle that limits the total number of samples when running in parallel, so large (>20GB) phy_mem allocations per process are ignored.*
dc (bool):[False] Boolean flag as to run inversion constrained to double-couple or allowed to explore the full moment tensor space.
Keyword Arguments:
number_stations (int):[0] Used for estimating sample sizes in the Monte Carlo random sampling algorithms (Time,Iterate) if set.
number_location_samples (int):[0] Used for estimating sample sizes in the Monte Carlo random sampling algorithms (Time,Iterate) if set.
path (str): File path for output. Default is current working dir (interactive) or PBS workdir.
fid (str): File name root for output, default is to use MTfitOutput or the event UID if set in the data dictionary.
inversion_options (list): List of data types to be used in the iversion, if not set, the inversion uses all the data types in the data dictionary, irrespective of independence.
diagnostic_output (bool): [False] Boolean flag to output diagnostic information in MATLAB save file.
marginalise_relative (bool): [False] Boolean flag to marginalise over location/model uncertainty during relative amplitude inversion.
mpi (bool): [False] Boolean flag to run using MPI from mpi4py (useful on cluster).
multiple_events (bool): [False] Boolean flag to run all the events in the inversion in one single joint inversion.
relative_amplitude (bool): [False] Boolean flag to run multiple_events including relative amplitude inversion.
output_format (str): ['matlab'] str format style.
minimum_number_intersections (int): [2] Integer minimum number of station intersections between events in relative amplitude inversion.
quality_check (bool): [False] Boolean flag/Float value for maximum non-zero percentage check (stops inversion if event quality is poor)
recover (bool): [False] Tries to recover pre-existing inversions, and does not re-run if an output file exists and is readable.
file_sample (bool): [False] Saves samples to a mat file (allowing for easier recovery and reduced memory requirements but can slow the inversion down as requires disk writing every non-zero iteration. Only works well for recovery with random sampling methods)
results_format (str): ['full_pdf'] Data format for the output
marginalise_relative (bool): [False] Boolean flag to marginalise the location uncertainty between absolute and relative data.
file_sample (bool): [False] save samples to file rather than keeping in memory (not necessary in normal use).
normalise (bool): [True] normalise output Probability (doesn't affect ln_pdf output).
convert (bool): Convert output moment tensors to Tape parameters, Hudson u&v coordinates and strike-dip-rake triples.
discard (bool): [False] Probability cut-off for discarding samples Discarding samples - samples less than 1/(discard*n_samples) of the maximum likelihood value are discarded as negligeable. False means no samples are discarded.
c_generate (bool): [False] Generate samples in the probability calculation when using Cython.
generate_cutoff (int): Set number of samples to cut-off at when using c_generate (Default is the value of max_samples)
relative_loop (bool): [False] Loop over non-zero samples when using relative amplitudes.
bin_angle_coefficient_samples (int): [0] Bin size in degrees when binning angle coefficients (All station angle differences must be within this range for samples to fall in the same bin)
no_station_distribution (bool): [True] Boolean flag to output station distribution or not.
max_samples (int): [6000000] Max number of samples when using the iterate algorithm.
max_time (int): [600] Max time when using the time algorithm.
verbosity (int): [0] Set verbosity level (0-4) high numbers mean more logging output and verbosity==4 means the debugger will be called on errors.
debug (bool): [False] Sets debug on or off (True means verbosity set to 4).
Other kwargs are passed to the algorithm - see MTfit.algorithms documentation for help on those.
"""
data_file_path = data_file
self.dc = dc
self._marginalise_relative = kwargs.get('marginalise_relative', False)
self.file_sample = kwargs.get('file_sample', False)
# If data and data_file doesn't exist, try to read sys.argv
if not len(data) and not data_file:
try:
data_file = sys.argv[1]
except Exception:
pass
# Set MPI parameters
self._MPI = kwargs.get('mpi', False)
self.comm = False
if self._MPI:
try:
from mpi4py import MPI
self.comm = MPI.COMM_WORLD
self._print('Running MTfit using MPI')
except Exception:
self._MPI = False
# Set DC parameters
if self.dc:
self._print('\nDouble Couple Constrained Inversion\n')
else:
self._print('\nFull Moment Tensor Inversion\n')
# Parse data_files to data_dict
if data_file:
if isinstance(data_file, list):
self._print('\n*********\ndata files: '+','.join(data_file))
data = []
for filename in data_file:
file_data = self._load(filename)
if isinstance(file_data, list):
data.extend(file_data)
else:
data.append(file_data)
else:
self._print('\n*********\ndata file: '+data_file)
data = self._load(data_file)
# Raise error if no data
if not data or not len(data):
raise ValueError('Inversion requires an input data_file or data dictionary')
# Set data parameters
if isinstance(data, list):
number_events = len(data)
self.data = data
else:
self.data = [data.copy()]
number_events = 1
# Check multiple events flag
self.multiple_events = kwargs.get('multiple_events', False)
# Check number of events vs. multiple events
if self.multiple_events and number_events == 1:
self.multiple_events = False
kwargs['multiple_events'] = False
if not self.multiple_events:
number_events = 1
# Set more parameters from kwargs
self.number_events = number_events
kwargs['number_events'] = number_events
self._relative = kwargs.get('relative_amplitude', False)
self.normalise = kwargs.get('normalise', True)
self.convert = kwargs.get('convert', False)
self.discard = kwargs.get('discard', False)
self.c_generate = kwargs.get('c_generate', False)
self._relative_loop = kwargs.get('relative_loop', False)
self.bin_angle_coefficient_samples = kwargs.get('bin_angle_coefficient_samples', 0)
number_stations = 40
self.location_pdf_files = False
self.number_location_samples = 0
# Get Debug and verbositu arguments
global _DEBUG
global _VERBOSITY
_VERBOSITY = kwargs.get('verbosity', 0)
_DEBUG = kwargs.get('debug', False)
if _DEBUG:
_VERBOSITY = 4
try:
kwargs.pop('debug')
except Exception:
pass
# Handle location PDFs
if location_pdf_file_path:
# Get number of location samples
number_location_samples = int(kwargs.get('number_location_samples', 0))
self.number_location_samples = number_location_samples
# Get number of stations
number_stations = int(kwargs.get('number_stations', 0))
if isinstance(location_pdf_file_path, list):
self.location_pdf_files = location_pdf_file_path
else:
# Search in location_pdf_file_path
self.location_pdf_files = glob.glob(location_pdf_file_path)
# Get number of stations and number of location samples
if not number_stations or not number_location_samples:
location_samples, location_sample_multipliers = self._read_location([u for u in self.location_pdf_files if len(u)][0])
if not number_location_samples:
number_location_samples = len(location_samples)
number_stations = len(location_samples[0])
# Clear results and garbage collect
del location_samples
del location_sample_multipliers
gc.collect()
else:
# No location PDFs
number_location_samples = 1
self.number_stations = number_stations
self.number_events = number_events
# Print out location PDF files
if self.location_pdf_files:
self._print('Angle Scatter files: '+','.join(self.location_pdf_files))
# Set run parameters
self._print('\n*********\n\nRun parameters\n')
self._algorithm_name = algorithm
self.output_format = kwargs.get('output_format', 'matlab')
self.results_format = kwargs.get('results_format', 'full_pdf')
self._output_station_distribution = not kwargs.get('no_station_distribution', False)
self.max_samples = kwargs.get('max_samples', 6000000)
self.max_time = kwargs.get('max_time', 600)
# Set worker parameters
self._worker_params(parallel, n, phy_mem)
# Check parallel parameters
if self.parallel and (self.pool or self._MPI):
if self.pool:
number_workers = len(self.pool)
else:
number_workers = self._number_workers
if self._MPI:
self._print('MPI: '+str(number_workers)+' workers')
else:
self._print('Multi-threaded: '+str(number_workers)+' workers')
else:
self._print('Single-threaded')
self.minimum_number_intersections = kwargs.get('minimum_number_intersections', 2)
self._quality_check = kwargs.get('quality_check', False)
self.kwargs = kwargs
self.kwargs['number_samples'] = self.number_samples
self.generate_samples = 0
self.generate_cutoff = kwargs.get('generate_cutoff', self.max_samples)
self.file_sample = self.kwargs.get('file_sample', False)
self._set_algorithm(**self.kwargs)
self.inversion_options = self.kwargs.get('inversion_options', False)
self.fid = self.kwargs.get('fid', False)
self._recover = self.kwargs.get('recover', False)
self.kwargs['file_sample'] = self.file_sample
if isinstance(data_file_path, list):
data_file_path = data_file_path[0]
# Check if the shell is interactive and get local path,otherwise get workdir path
if ('PBS_ENVIRONMENT' in os.environ.keys() and 'interactive' in os.environ['PBS_ENVIRONMENT'].lower()) or len([env for env in os.environ.keys() if 'PBS' in env and env != 'PBS_DEFAULT']) < 0:
default_path = os.getcwd()
else:
default_path = os.environ.get('PBS_O_WORKDIR', os.getcwd())
# If path set in kwargs, get that path instead
if 'path' in kwargs:
self._path = kwargs.get('path', default_path)
elif data_file_path and os.path.isdir(data_file_path):
self._path = data_file_path
elif data_file_path and os.path.isdir(os.path.split(data_file_path)[0]):
self._path = os.path.split(data_file_path)[0]
else:
self._path = default_path
self._diagnostic_output = kwargs.get('diagnostic_output', False)
self.mpi_output = kwargs.get('mpi_output', True)
self._print('Algorithm: '+str(self._algorithm_name))
self._print('Algorithm Options: \n\t'+'\n\t'.join([str(u)+'\t'+str(v) for u, v in self.kwargs.items()]))
self._print('\n*********\n')
def _print(self, string, force=False):
"""MPI safe print (rank 0 only unless forced)"""
# If MPI, only print if rank 0
if not(self._MPI) or (self._MPI and self.comm.Get_rank() == 0 and not force):
print(string)
def _load(self, filename):
"""
Function to load data
Args
filename: filename of binary pickled dictionary or csv file to load
Returns
data: Loaded data from filename or False if exception thrown.
"""
try:
with open(filename, 'rb') as f:
data = pickle.load(f)
except Exception:
try:
with open(filename, 'r') as f:
data = pickle.load(f)
except Exception:
data = False
# Parser plug-in extensions.
parser_names, parsers = get_extensions('MTfit.parsers', {'.csv': parse_csv, '.hyp': parse_hyp})
try:
try:
ext = os.path.splitext(filename)[1]
data = parsers[ext](filename)
# Else try all the parsers
except Exception:
for parser in parsers.values():
try:
data = parser(filename)
except Exception:
pass
except Exception:
# If errors then try to parse csv and then parse hyp
try:
data = parse_csv(filename)
except Exception:
try:
data = parse_hyp(filename)
except Exception:
print('Parsers available are: {}'.format(','.join(list(parsers.keys()))))
traceback.print_exc()
if isinstance(data, bool):
print("No data available using available parsers:\n\t"+'\n\t'.join(parser_names))
return data
def _read_location(self, filename):
"""
Reads the location PDF file
Function allows for extension to other location format types (default is scatangle using parse_scatangle)
Expected plugin type is to match a file extension and return the sample_records,sample_probability where sample records is a list of the sample records::
sample_records=[{'Name':['S001','S002',...],'Azimuth':np.matrix([[121.],[37.],...]),'TakeOffAngle':np.matrix([[88.],[12.],...])},
{'Name':['S001','S002',...],'Azimuth':np.matrix([[120.],[36.],...]),'TakeOffAngle':np.matrix([[87.],[11.],...])}]
sample_probability=[0.8,1.2,...]
"""
parser_names, parsers = get_extensions('MTfit.location_pdf_parsers', {'.scatangle': parse_scatangle})
try:
# Try to call the plugin for the correct extension
try:
ext = os.path.splitext(filename)[1]
return parsers[ext](filename)
# Else try with all the parsers
except Exception:
for parser in parsers.values():
try:
return parser(filename)
except Exception:
pass
return False
except Exception:
if len(parser_names) == 0: # If not built correctly, can load scatangle file
return parse_scatangle(filename)
traceback.print_exc()
return False
def _set_logger(self, fid):
"""Sets file loggers up"""
# now = datetime.datetime.now()
# If mpi, only set loggers if rank 0
if (self._MPI and self.comm.Get_rank() == 0) or not self._MPI:
self.output_logger = None
self.error_logger = None
def _close_logger(self):
"""Closes file loggers"""
# If mpi, only close loggers if rank 0 (no other loggers opened])
if (self._MPI and self.comm.Get_rank() == 0) or not self._MPI:
self.output_logger.close()
self.error_logger.close()
def _set_algorithm(self, single=False, **kwargs):
"""
Sets up algorithm for inversion
Keyword Arguments
keyword arguments for algorithm configuration
For information on the kwargs, see the MTfit.Algorithm docstrings.
"""
self.McMC = False
# If no algorithm is set - use BaseAlgorithm (Will not behave as expected, but used for testing)
if not self._algorithm_name:
self.algorithm = BaseAlgorithm(number_samples=self.number_samples, dc=self.dc, quality_check=self._quality_check,
number_events=self.number_events, file_sample=self.file_sample,
fname=self.kwargs.get('fid', 'MTfit_run'), file_safe=not self.kwargs.get('no_file_safe', False),
sampling=self.kwargs.get('sampling', False), sampling_prior=self.kwargs.get('sampling_prior', False),
sample_distribution=self.kwargs.get('sample_distribution', False))
# Iterate algorithm
elif self._algorithm_name.lower() in ['iterate']:
max_samples = self.max_samples
number_samples = self.number_samples
if self._MPI and single:
# set max samples divided by n_workers
max_samples = self.max_samples/self._number_workers
number_samples = min(self.number_samples, max_samples)
self._print('Iterate algorithm chosen, maximum samples: '+str(self.max_samples)+' across '+str(self._number_workers)+' processes - '+str(max_samples)+' per process')
else:
self._print('Iterate algorithm chosen, maximum samples: '+str(self.max_samples))
self.algorithm = IterationSample(number_samples=number_samples, dc=self.dc, max_samples=max_samples,
quality_check=self._quality_check, number_events=self.number_events,
file_sample=self.file_sample, fname=self.kwargs.get('fid', 'MTfit_run'),
file_safe=not self.kwargs.get('no_file_safe', False),
generate=single and self.c_generate, sampling=self.kwargs.get('sampling', False),
sampling_prior=self.kwargs.get('sampling_prior', False),
sample_distribution=self.kwargs.get('sample_distribution', False))
if single and self.c_generate:
# Set generate cutoff size
self.generate_samples = self.number_samples/5
self.generate_cutoff = max_samples
# Time algorithm
elif self._algorithm_name.lower() in ['time']:
self._print('Time algorithm chosen, maximum time: '+str(self.max_time))
self.algorithm = TimeSample(number_samples=self.number_samples, dc=self.dc, max_time=self.max_time,
quality_check=self._quality_check, number_events=self.number_events,
file_sample=self.file_sample, fname=self.kwargs.get('fid', 'MTfit_run'),
file_safe=not self.kwargs.get('no_file_safe', False),
generate=single and self.c_generate, sampling=self.kwargs.get('sampling', False),
sampling_prior=self.kwargs.get('sampling_prior', False),
sample_distribution=self.kwargs.get('sample_distribution', False))
if single and self.c_generate:
# Set generate sample size, cutoff is default or set in initialisation
self.generate_samples = self.number_samples/5
# Algorithm extensions
elif self._algorithm_name.lower() in get_extensions('MTfit.parallel_algorithms')[0]:
extension_algorithms = get_extensions('MTfit.parallel_algorithms')[0]
kwargs['number_samples'] = self.number_samples
self.kwargs['number_samples'] = self.number_samples
self.algorithm = extension_algorithms[self._algorithm_name.lower()](**kwargs)
# McMC algorithm
elif 'mcmc' in self._algorithm_name.lower() or self._algorithm_name.lower() in get_extensions('MTfit.directed_algorithms')[0]:
if 'transd' in self._algorithm_name.lower():
kwargs['trans_dimensional'] = True
self.kwargs['trans_dimensional'] = True
kwargs['number_samples'] = self.number_samples
self.kwargs['number_samples'] = self.number_samples
if self._algorithm_name.lower() in get_extensions('MTfit.directed_algorithms')[0]:
kwargs['mode'] = self._algorithm_name.lower()
self.McMC = True
self.algorithm = MarkovChainMonteCarloAlgorithmCreator(**kwargs)
def _update_samples(self, show=True, _cython=cprobability is not False):
"""
Update number of samples based on available memory etc.
Args
show:[True] Print result to stdout
_cython: If cython code is being used - default is set by the parameter cprobability which is """+str(cprobability is not False)+'\n\n'
# Get number of floats from available memory
nfloats = self._floatmem/8.
# Get number of location PDF samples
try:
number_location_samples = len(self.location_sample_multipliers)
except Exception:
number_location_samples = self.number_location_samples
if not number_location_samples:
number_location_samples = 1
A = number_location_samples*6*self.number_events*self._max_data_size
D = 5*self._max_data_size
# Calculation allows for a bit of overflow...
# Calculation depends on Cython or not.
if _cython:
if self._relative:
number_samples = 0.8*(nfloats-(self._number_workers+1)*(4*A+6*D))/((self._number_workers+1)*6*self.number_events)
else:
number_samples = 0.8*(nfloats-(self._number_workers+1)*(4*A+6*D))/((self._number_workers+1)*6*self.number_events)
else:
if self._relative:
number_samples = 0.01*(nfloats-(self._number_workers+1)*(4*A+6*D))/((self._number_workers+1)*6*self.number_events+4*(self._number_workers+1)*self._max_data_size*number_location_samples*self.number_events+4*(self._number_workers+1)*number_location_samples*self.number_events*self.number_events)
else:
number_samples = 0.8*(nfloats-(self._number_workers+1)*(4*A+6*D))/((self._number_workers+1)*6*self.number_events+4*(self._number_workers+1)*self._max_data_size*number_location_samples*self.number_events)
if self._MPI:
number_samples /= 2
if self.parallel and not self._MPI and sys.version_info[:2] <= (2, 7, 4) and (number_samples*6*self.number_events*8*8 < 2**30):
# Check for pickle bug #13555 http://bugs.python.org/issue13555 which seems linked to multiprocessing issue #17560 http://bugs.python.org/issue17560
# cannot pickle files longer than 2**31 (32 bit encoding used for pickle length)
# Possible fix https://stackoverflow.com/questions/15118344/system-error-while-running-subprocesses-using-multiprocessing
number_samples = min([((2**30)/(6*8*8*self.number_events)), number_samples])
# Bodge to prevent memory usage above limit
number_samples /= 20
# If estiamted total number of samples <1 raise an error.
if number_samples < 1:
self._print('Warning - possible memory error, number of samples less than 1')
number_samples = 1
# If iterate, check against max samples set (don't want to evaluate more than that in one sample).
if hasattr(self, '_algorithm_name') and isinstance(self._algorithm_name, str) and self._algorithm_name.lower() in ['iterate']:
number_samples = min(number_samples, self.max_samples)
if show:
self._print('Number of Random Samples: '+str(int(number_samples))+' with '+str(number_location_samples)+' location samples and '+str(self._max_data_size)+' stations')
self.number_samples = int(number_samples)
try:
self.kwargs['number_samples'] = self.number_samples
except Exception:
pass
def _worker_params(self, parallel=True, n=0, phy_mem=0):
"""Determines worker parameters for inversion
Determines the sample sizes and worker parameters for the inversion.
Args
parallel:[True] Boolean flag for running in parallel
n:[0] Number of workers to generate, if 0 uses the number returned by multiprocessing.cpu_count()
phy_mem:[0] Estimated physical memory to use (used for determining array sizes, it is likely that more memory will be used, and if so no errors are forced).
number_location_samples:[1] Number of station samples (angle scatter samples) used in the inversion - again used for determing array sizes.
number_events:[1] Number of events - used for determing sample sizes
number_stations:[40] Number of stations - used for determing sample sizes
"""
# Check if on cluster or otherwise.
if len([u for u in os.environ.keys() if 'PBS_' in u]):
self.PBS = True
else:
self.PBS = False
# Check for pool
if hasattr(self, 'pool'):
self._close_pool()
# Get number of processors available
if self.PBS:
# If PBS get number of nodes
try:
n = int(os.environ['PBS_NUM_PPN'])*int(os.environ['PBS_NUM_NODES'])
except KeyError:
n = 1
if not parallel:
n = 1
# Check parallel - close pool if MPI or only on processor
if not parallel or n == 1 or self._MPI:
self._close_pool()
self.pool = False
elif n:
# Set pool for bumber of workers
self.pool = JobPool(n, task=ForwardTask)
else:
# Get number of workers from multiprocessing
n = multiprocessing.cpu_count() or 1 # Check CPU count if errors use 1
self.pool = JobPool(n, task=ForwardTask)
self._number_workers = n
# Amount of memory available for calculations (Can cause problems for very large non-zero percentages)
mem_scale = 0.9
# Set available float memory
if not phy_mem:
try:
import psutil
self._floatmem = mem_scale*psutil.virtual_memory().available/n
except Exception:
phy_mem = 8
self._floatmem = mem_scale*phy_mem*(1024*1024*1024.)
else:
self._floatmem = mem_scale*phy_mem*(1024*1024*1024.)
self.parallel = parallel
# calculate data size
max_data = 0
max_data_size = 0
for data in self.data:
if isinstance(data, dict):
for key in data:
if key not in ['UID', 'hyp_file'] and len(data[key]):
max_data_size = max([max_data, len(data[key]['Stations']['Name'])])
self._max_data_size = max_data_size
# Calculate Sample Size
self._update_samples(show=False)
def __del__(self):
"""Closes pool and exits, allowing object to be deleted."""
try:
self._close_pool()
except Exception:
traceback.print_exc()
gc.collect()
def _close_pool(self):
"""Closes and removes JobPool
"""
try:
if hasattr(self, 'pool') and self.pool:
self.pool.close()
del self.pool
self.pool = False
except Exception:
traceback.print_exc()
gc.collect()
def _combine_mpi_output(self, event_data, fids, scale_factor_fids=[], fid='MTfitOutput.mat', output_data=False, location_samples=False,
location_sample_multipliers=False, format='matlab', binary_file_version=2, *args, **kwargs):
"""
Combine mpi output binary files - Rank 0 only
Args:
event_data: Event data dictionary
fids: storage fids
scale_factor_fids:[[]] Scale_factor fids if relative amplitudes are used.
fid=[MTfitOutput.mat] Output filename.
output_data:[False] Output data (Stations etc.)
location_samples:[False] Location PDF samples
location_sample_multipliers:[False] Location PDF sample probabilities (should be one for Oct-tree samples)
format=[matlab] Output format.
"""
# Get old results format
old_results_format = self.results_format
# Check kwargs for update format
self.results_format = kwargs.get('results_format', old_results_format)
print('---------Combining MPI Output-----------')
output = {}
# Loop over fids
for i, mpi_fid in enumerate(fids):
# Get mt files
mt_fid = os.path.splitext(mpi_fid)[0]+'.mt'
# Read mt output in.
if not len(output):
# No files read
try:
output = read_binary_output(mt_fid, version=binary_file_version)
except Exception:
continue
if isinstance(output, list) and len(output) == 1:
output = output[0]
else:
try:
binary_output = read_binary_output(mt_fid)
except Exception:
continue
if isinstance(binary_output, list) and len(binary_output) == 1:
binary_output = binary_output[0]
if isinstance(binary_output, list) and len(binary_output) > 1:
for i, event_output in enumerate(binary_output):
for key in event_output.keys():
if key in output[i].keys():
if key == 'total_number_samples':
output[i][key] += event_output[key]
elif key in ['ln_bayesian_evidence', 'dkl']:
output[i][key] = 0 # Updated later
else:
output[i][key] = np.append(output[i][key], event_output[key], 1)
else:
for key in binary_output.keys():
if key in output.keys():
if key == 'total_number_samples':
output[key] += binary_output[key]
elif key in ['ln_bayesian_evidence', 'dkl']:
output[key] = 0 # Updated later
else:
output[key] = np.append(output[key], binary_output[key], 1)
# Handle Scale factors if they exist.
if len(scale_factor_fids):
for i, fid in enumerate(scale_factor_fids):
fid = os.path.splitext(fid)[0]+'.sf'
# Read scale_factors
if not len(output):
try:
output = read_sf_output(fid)
except Exception:
continue
if isinstance(output, list) and len(output) == 1:
output = output[0]
else:
try:
scale_factors_output = read_sf_output(fid)
except Exception:
continue
if isinstance(scale_factors_output, list) and len(scale_factors_output) == 1:
scale_factors_output = scale_factors_output[0]
if isinstance(scale_factors_output, list) and len(scale_factors_output) > 1:
for i, event_scale_factors in enumerate(scale_factors_output):
for key in event_scale_factors.keys():
if key in output[i].keys():
if key == 'total_number_samples':
output[i][key] += event_scale_factors[key]
elif key in ['ln_bayesian_evidence', 'dkl']:
output[i][key] = 0 # Updated later
else:
output[i][key] = np.append(output[i][key], event_scale_factors[key], 1)
else:
for key in scale_factors_output.keys():
if key in output.keys():
if key == 'total_number_samples':
output[key] += scale_factors_output[key]
elif key in ['ln_bayesian_evidence', 'dkl']:
output[key] = 0 # Updated later
else:
output[key] = np.append(output[key], scale_factors_output[key], 1)
# Output - handle special cases
if isinstance(output, list):
for i, out in enumerate(output):
if len(output):
print(output[i]['total_number_samples'])
output[i]['ln_bayesian_evidence'] = ln_bayesian_evidence(output[i], output[i]['total_number_samples'])
try:
if np.max(output[i]['g'])-np.min(output[i]['g']) < 0.000001 and np.abs(np.mean(output[i]['g'])) < 0.000001 and np.max(output[i]['d'])-np.min(output[i]['d']) < 0.000001 and np.abs(np.mean(output[i]['d'])) < 0.000001:
V = (2*np.pi*np.pi)
else:
V = (np.pi*np.pi*np.pi)
output[i]['dkl'] = dkl_estimate(output[i]['ln_pdf'], V, output[i]['total_number_samples'])
except Exception:
pass
output[i]['dV'] = 1
else:
if not len(output):
return
output['dV'] = 1
output['ln_bayesian_evidence'] = ln_bayesian_evidence(output, output['total_number_samples'])
try:
if np.max(output['g'])-np.min(output['g']) < 0.000001 and np.abs(np.mean(output['g'])) < 0.000001 and np.max(output['d'])-np.min(output['d']) < 0.000001 and np.abs(np.mean(output['d'])) < 0.000001:
V = (2*np.pi*np.pi)
else:
V = (np.pi*np.pi*np.pi)
output['dkl'] = dkl_estimate(output['ln_pdf'], V, output['total_number_samples'])
except Exception:
pass
if os.path.splitext(fid)[0].split('.')[-1] == str(0):
fid = '.'.join(os.path.splitext(fid)[0].split('.')[:-1])+os.path.splitext(fid)[1]
# format specifics - output for MATLAB only is first
if isinstance(output, list):
for i, event_output in enumerate(output):
fid_i = os.path.splitext(fid)[0]+'_'+str(i)+os.path.splitext(fid)[1]
self.output(event_data, fid_i, event_output, location_samples, location_sample_multipliers, format, *args, **kwargs)
else:
self.output(event_data, fid, output, location_samples, location_sample_multipliers, format, *args, **kwargs)
# Reset output format
self.results_format = old_results_format
def output(self, event_data, fid='MTfitOutput.mat', output_data=False, location_samples=False, location_sample_multipliers=False,
output_format='matlab', *args, **kwargs):
"""
Outputs event_data results to fid
Default output format is matlab.
Args
event_data: data dictionary for the event_data.
fid:['MTfitOutput.mat'] Filename for output.
Algorithm:[False] Algorith for output - only needed if multiple event_datas/algorithms used
location_samples:[False] Station angle scatter samples.
location_sample_multipliers:[False] Station angle scatter probabilities.
output_format:['matlab'] output format.
Keyword Arguments
station_only:[False] Only output station data
"""
self._print('Saving')
# Check location samples
if location_samples and location_sample_multipliers and self._output_station_distribution:
pass
elif not hasattr(self, 'location_samples') or not hasattr(self, 'location_sample_multipliers'):
location_samples = False
location_sample_multipliers = False
elif self.location_samples and self.location_sample_multipliers_original and self._output_station_distribution:
location_samples = self.location_samples
location_sample_multipliers = self.location_sample_multipliers_original
elif not self._output_station_distribution:
location_samples = False
location_sample_multipliers = False
# Check if output data exists (or just outputting data)
if not output_data and not kwargs.get('station_only', False):
output_data, output_string = self.algorithm.output(self.normalise, self.convert, self.discard)
if not self._MPI or (self._MPI and (self.mpi_output or(not isinstance(self.comm, bool) and self.comm.Get_rank()))) == 0:
if self._MPI and self.mpi_output:
output_string.replace('-MTfit Forward Model Output-', '-MTfit Forward Model Output Process '+str(self.comm.Get_rank())+'-')
print(output_string)
try:
if (not isinstance(output_data, dict) or not len(output_data['probability'])) and not kwargs.get('station_only', False):
return
except TypeError:
return
# get results format
output_data_names, output_data_formats = get_extensions('MTfit.output_data_formats', {'full_pdf': full_pdf_output_dicts, 'hyp': hyp_output_dicts})
try:
# Try to call the plugin for the correct extension'
if output_format and output_format.lower() == 'hyp':
out_data = output_data_formats[output_format.lower()](event_data, self.inversion_options, output_data, location_samples, location_sample_multipliers, self.multiple_events, self._diagnostic_output, *args, **kwargs)
else:
out_data = output_data_formats[self.results_format](event_data, self.inversion_options, output_data, location_samples, location_sample_multipliers, self.multiple_events, self._diagnostic_output, *args, **kwargs)
# Else try with full pdf output
except Exception:
traceback.print_exc()
out_data = full_pdf_output_dicts(event_data, self.inversion_options, output_data, location_samples, location_sample_multipliers, self.multiple_events, self._diagnostic_output, *args, **kwargs)
if output_format and output_format.lower() == 'matlab' and self.results_format == 'full_pdf':
try:
if out_data[0] and out_data[0]['Events']['Probability'].shape[-1]*len([key for key in out_data[0]['Events'].keys() if 'MTSpace' in key]) < 20000000:
kwargs['version'] = '7'
except Exception:
out_data[0] = False
# output data
output_data_names, output_data_formats = get_extensions('MTfit.output_formats', {'matlab': MATLAB_output, 'pickle': pickle_output, 'hyp': hyp_output})
try:
try:
# Try to call the plugin for the correct extension
output_string, fid = output_data_formats[output_format.lower()](out_data, fid, self.pool, *args, **kwargs)
# Else try with full pdf output
except Exception:
traceback.print_exc()
if output_format and output_format.lower() == 'matlab':
version = kwargs.get('version', '7.3')
kwargs['version'] = version
kwargs.pop('version')
output_string, fid = MATLAB_output(out_data, fid, self.pool, version=version, *args, **kwargs)
if output_format and output_format.lower() in ['pickle']:
output_string, fid = pickle_output(out_data, fid, self.pool, *args, **kwargs)
self._print(output_string)
except Exception:
self._print(output_format+' output')
traceback.print_exc()
def _station_angles(self, event, i):
"""Convert event station angles to GF six vector
Converts the station azimuth and take-off angle to the six vector corresponding to the homogeneous greens functions, as presented in ######################
Args
event: Event data dictionary.
i: Event index for scatter files.
Returns
a_polarity,error_polarity,a1_amplitude_ratio,a2_amplitude_ratio,amplitude_ratio,percentage_error1_amplitude_ratio,percentage_error2_amplitude_ratio,a_polarity_prob,polarity_prob
a_polarity: Polarity station angles matrix.
error_polarity: Polarity error matrix.
a1_amplitude_ratio: Amplitude ratio numerator station angles matrix.
a2_amplitude_ratio: Amplitude ratio denominator station angles matrix.
amplitude_ratio: Amplitude Ratio.
percentage_error1_amplitude_ratio: Amplitude ratio numerator percentage uncertainty matrix.
percentage_error2_amplitude_ratio: Amplitude ratio denominator percentage uncertainty matrix.
a_polarity_prob: Polarity PDF station angles matrix.
polarity_prob: Polarity PDF probabilities
"""
location_samples = False
location_sample_multipliers = False
# Check location PDF samples and set if they exist
if self.location_pdf_files and len(self.location_pdf_files[i]):
location_samples, location_sample_multipliers = self._read_location(self.location_pdf_files[i])
self.location_samples = location_samples
self.location_sample_multipliers = location_sample_multipliers
self.location_sample_multipliers_original = self.location_sample_multipliers[:]
elif self.location_pdf_files and self._relative and not self._marginalise_relative:
location_pdf_file = [location_pdf_file for location_pdf_file in self.location_pdf_files if len(location_pdf_file)][0]
location_samples, location_sample_multipliers = self._read_location(location_pdf_file)
self.location_samples = location_samples
self.location_sample_multipliers = location_sample_multipliers
self.location_sample_multipliers_original = self.location_sample_multipliers[:]
else:
self.location_samples = False
self.location_sample_multipliers = False
self.location_sample_multipliers_original = False
# Get station angles and measurements from data
a_polarity, error_polarity, incorrect_polarity_probability = polarity_matrix(event, location_samples)
a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio = amplitude_ratio_matrix(event, location_samples)
a_polarity_probability, polarity_probability, incorrect_polarity_probability_2 = polarity_probability_matrix(event, location_samples)
# Extensions:
extension_data = {}
extension_names, extensions = get_extensions('MTfit.process_data_types')
for ext in extension_names:
extension_data[ext] = extensions[ext](event)
# Bin station location PDF samples
if self.bin_angle_coefficient_samples > 0 and location_samples:
(a_polarity, a1_amplitude_ratio, a2_amplitude_ratio, a_polarity_probability, extension_data,
self.location_sample_multipliers) = bin_angle_coefficient_samples(a_polarity, a1_amplitude_ratio, a2_amplitude_ratio, a_polarity_probability, self.location_sample_multipliers,
self.bin_angle_coefficient_samples, extension_data)
if isinstance(a_polarity, bool):
incorrect_polarity_probability = incorrect_polarity_probability_2
return (a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio,
a_polarity_probability, polarity_probability, incorrect_polarity_probability, extension_data)
def _fid(self, event, i, source='MT', single=False):
"""Generates event fid
Args
event: event data dictionary
i: event index
source: 'MT' or 'dc'
Returns
fid string
"""
# Try to make fid from attribute
if self.fid:
fid = self.fid.split('.mat')[0]+str(i)+source+'.mat'
# Otherwise try to use UID or fall back to MTfitOutput as default
else:
try:
fid = self._path+os.path.sep+str(event['UID'])+source+'.mat'
except Exception:
fid = self._path+os.path.sep+'MTfitOutput'+source+'.mat'
# Add rank to MPI file output (so they don't overwrite)
if self._MPI and single and self.mpi_output:
fid = os.path.splitext(fid)[0]+'.'+str(self.comm.Get_rank())+os.path.splitext(fid)[1]
return fid
def _recover_test(self, fid):
"""
Tests if the output file exists in the correct format
Args:
fid: Filename to check for.
Returns
Boolean: True if exists.
"""
recover = False
# If recover option set
if self._recover:
# Check MPI and rank
if not self._MPI or self.comm.Get_rank() == 0:
# Either Rank 0 or no MPI
# Check MPI output
if self._MPI and self.mpi_output:
if len(glob.glob(fid.split('.mat')[0]+'.pkl')) or len(glob.glob(fid.split('.mat')[0]+'.0.pkl')):
recover = True
# Check if MATLAB format
if not recover and self.output_format.lower() == 'matlab':
try:
try:
import h5py
if h5py.is_hdf5(fid.split('.mat')[0]+'.mat'):
from hdf5storage import loadmat
else:
from scipy.io import loadmat
except Exception as e:
self._print(e)
from scipy.io import loadmat
loadmat(fid.split('.mat')[0]+'.mat')
gc.collect()
self._print('Recover option enabled and output file exists: '+fid)
recover = True
except Exception as e:
self._print('Recover option enabled and output file does not exist or is corrupted')
self._print(e)
recover = False
# Broadcast recover if MPI
if self._MPI:
recover = self.comm.bcast(recover, root=0)
return recover
def _file_sample_test(self, fid):
"""
Recover test if file sample option enabled
Args:
fid: Filename to check for.
Returns
Boolean: True if exists.
"""
if self._recover and self.file_sample:
try:
from hdf5storage import loadmat
loadmat(fid.split('.mat')[0]+'_in_progress.mat')
gc.collect()
self._print('Recover option enabled and in progress file exists, returning to this point') # How to handle mcmc?
return True
except Exception:
return False
elif not self._recover:
try:
os.remove(fid.split('.mat')[0]+'_in_progress.mat')
except Exception:
pass
return False
@memory_profile_test(_MEMTEST)
def _mcmc_forward(self, source_type='MT', **kwargs):
"""
Markov chain Monte Carlo event forward function
Runs event forward model using Markov chain Monte Carlo approach. Runs multiple events in parallel.
Args
source_type:['MT'] 'MT' or 'dc' for fid
"""
# Loop over events in self.data list
for i, event in enumerate(self.data):
# Set output fid
fid = self._fid(event, i, source_type)
self._print('\n\nEvent '+str(i+1)+'\n--------\n')
try:
self._print('UID: '+str(event['UID'])+'\n')
except Exception:
self._print('No UID\n')
# Do recovery test
if self._recover_test(fid):
continue
try:
# Check data
if not event:
self._print('No Data')
continue
try:
event = self._trim_data(event)
except ValueError:
self._print('No Data')
continue
del self.algorithm
gc.collect()
self.kwargs['fid'] = fid
# Get station angle coefficients and data
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, incorrect_polarity_probability, extension_data) = self._station_angles(event, i)
# Update sample numbers
self._update_samples()
# Set algorithms
self._set_algorithm(**self.kwargs)
# Check parallel etc. and run McMCForwardTask
if self.pool:
if i > len(self.pool):
result = self.pool.result()
self._print('Inversion completed, '+str(result['algorithm_output_data']['total_number_samples'])+' samples evaluated: '+str((result['algorithm_output_data']['accepted'])) +
' accepted samples: '+str((float(result['algorithm_output_data']['accepted'])/float(result['algorithm_output_data']['total_number_samples']))*100)[:4]+'%')
try:
self.output(result['event_data'], result['fid'], result['algorithm_output_data'], result['location_samples'], result['location_sample_multipliers'], output_format=self.output_format)
except Exception:
self._print('Output Error')
traceback.print_exc()
self.pool.custom_task(McMCForwardTask, self.kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability, fid, event, self.location_samples,
self.location_sample_multipliers, self.normalise, self.convert, extension_data)
else:
result = McMCForwardTask(self.kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability, fid, normalise=self.normalise,
convert=self.convert, extension_data=extension_data)()
try:
accepted = (float(result['algorithm_output_data']['accepted'])/float(result['algorithm_output_data']['total_number_samples']))*100
except ZeroDivisionError:
accepted = 0
self._print('Inversion completed, '+str(result['algorithm_output_data']['total_number_samples'])+' samples evaluated: '+str((result['algorithm_output_data']['accepted'])) +
' accepted samples: '+str(accepted)[:4]+'%')
try:
self.output(event, fid, result['algorithm_output_data'], a_polarity=a_polarity, error_polarity=error_polarity, a1_amplitude_ratio=a1_amplitude_ratio,
a2_amplitude_ratio=a2_amplitude_ratio, amplitude_ratio=amplitude_ratio, percentage_error1_amplitude_ratio=percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio=percentage_error2_amplitude_ratio, output_format=self.output_format)
except Exception:
self._print('Output Error')
traceback.print_exc()
except NotImplementedError as e:
raise e
except Exception:
traceback.print_exc()
# Get pool results
if self.pool:
results = self.pool.all_results()
for result in results:
try:
accepted = (float(result['algorithm_output_data']['accepted'])/float(result['algorithm_output_data']['total_number_samples']))*100
except ZeroDivisionError:
accepted = 0
self._print('Inversion completed, '+str(result['algorithm_output_data']['total_number_samples'])+' samples evaluated: '+str((result['algorithm_output_data']['accepted'])) +
' accepted samples: '+str(accepted)[:4]+'%')
try:
self.output(result['event_data'], result['fid'], result['algorithm_output_data'], result['location_samples'], result['location_sample_multipliers'], output_format=self.output_format)
except Exception:
self._print('Output Error')
traceback.print_exc()
@memory_profile_test(_MEMTEST)
def _mcmc_multiple_forward(self, source_type='MT', **kwargs):
"""
Markov chain Monte Carlo event forward function for multiple event joint pdf
Runs event forward model using Markov chain Monte Carlo approach for multiple events.
Args
source_type:['MT'] 'MT' or 'dc' for fid
"""
# Get fid
fid = self._fid({}, '', '_joint_inversion'+source_type)
# Recover test
if self._recover_test(fid):
return
# Sort station angles and data for multiple event
a_polarity = []
error_polarity = []
a1_amplitude_ratio = []
a2_amplitude_ratio = []
amplitude_ratio = []
percentage_error1_amplitude_ratio = []
percentage_error2_amplitude_ratio = []
a_polarity_probability = []
polarity_probability = []
a_relative_amplitude = []
relative_amplitude = []
percentage_error_relative_amplitude = []
relative_amplitude_stations = []
incorrect_polarity_probability = []
extension_data = []
for i, event in enumerate(self.data):
try:
# Check event data
if not event:
self._print('No Data')
continue
try:
event = self._trim_data(event)
except ValueError:
self._print('No Data')
continue
# Get event station angles and data etc. for event and add to list
(a_polarity_i, error_polarity_i, a1_amplitude_ratio_i, a2_amplitude_ratio_i, amplitude_ratio_i, percentage_error1_amplitude_ratio_i,
percentage_error2_amplitude_ratio_i, a_polarity_probability_i, polarity_probability_i, incorrect_polarity_probability_i, extension_data_i) = self._station_angles(event, i)
a_polarity.append(a_polarity_i)
error_polarity.append(error_polarity_i)
a1_amplitude_ratio.append(a1_amplitude_ratio_i)
a2_amplitude_ratio.append(a2_amplitude_ratio_i)
amplitude_ratio.append(amplitude_ratio_i)
percentage_error1_amplitude_ratio.append(percentage_error1_amplitude_ratio_i)
percentage_error2_amplitude_ratio.append(percentage_error2_amplitude_ratio_i)
a_polarity_probability.append(a_polarity_probability_i)
polarity_probability.append(polarity_probability_i)
incorrect_polarity_probability.append(incorrect_polarity_probability_i)
extension_data.append(extension_data_i)
# If running relative get relative data
if self._relative:
a_relative_amplitude_i, relative_amplitude_i, percentage_error_relative_amplitude_i, relative_amplitude_stations_i = relative_amplitude_ratio_matrix(event, self.location_samples)
a_relative_amplitude.append(a_relative_amplitude_i)
relative_amplitude.append(relative_amplitude_i)
percentage_error_relative_amplitude.append(percentage_error_relative_amplitude_i)
relative_amplitude_stations.append(relative_amplitude_stations_i)
except NotImplementedError as e:
raise e
except Exception:
traceback.print_exc()
self._print('\n\nJoint Inversion: '+str(self.number_events)+' Events\n--------\n')
try:
# Set algorithm
del self.algorithm
gc.collect()
self._update_samples()
self._set_algorithm(**self.kwargs)
# Run forward model using MultipleEventsMcMCForwardTask
if self.pool:
self.pool.custom_task(MultipleEventsMcMCForwardTask, self.kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio,
amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, a_relative_amplitude, relative_amplitude, percentage_error_relative_amplitude,
relative_amplitude_stations, incorrect_polarity_probability, self._relative, self.minimum_number_intersections,
fid, event, self.location_samples, self.location_sample_multipliers, self._marginalise_relative, self.normalise,
self.convert, extension_data)
result = self.pool.result()
try:
accepted = (float(result['algorithm_output_data']['accepted'])/float(result['algorithm_output_data']['total_number_samples']))*100
except ZeroDivisionError:
accepted = 0
self._print('Inversion completed, '+str(result['algorithm_output_data']['total_number_samples'])+' samples evaluated: '+str((result['algorithm_output_data']['accepted'])) +
' accepted samples: '+str(accepted)[:4]+'%')
try:
self.output(result['event_data'], result['fid'], result['algorithm_output_data'], result['location_samples'], result['location_sample_multipliers'],
output_format=self.output_format)
except NotImplementedError as e:
raise e
except Exception:
self._print('Output Error')
traceback.print_exc()
else:
result = MultipleEventsMcMCForwardTask(self.kwargs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio,
amplitude_ratio, percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability,
a_relative_amplitude, relative_amplitude, percentage_error_relative_amplitude, relative_amplitude_stations, incorrect_polarity_probability,
relative=self._relative, minimum_number_intersections=self.minimum_number_intersections, fid=fid, location_samples=self.location_samples,
marginalise_relative=self._marginalise_relative, normalise=self.normalise, convert=self.convert, extension_data=extension_data)()
self._print('Inversion completed, '+str(result['algorithm_output_data']['total_number_samples'])+' samples evaluated: '+str((result['algorithm_output_data']['accepted'])) +
' accepted samples: '+str((float(result['algorithm_output_data']['accepted'])/float(result['algorithm_output_data']['total_number_samples']))*100)[:4]+'%')
try:
self.output(self.data, fid, result['algorithm_output_data'], a_polarity=a_polarity, error_polarity=error_polarity, a1_amplitude_ratio=a1_amplitude_ratio,
a2_amplitude_ratio=a2_amplitude_ratio, amplitude_ratio=amplitude_ratio, percentage_error1_amplitude_ratio=percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio=percentage_error2_amplitude_ratio, output_format=self.output_format)
except NotImplementedError as e:
raise e
except Exception:
self._print('Output Error')
traceback.print_exc()
except NotImplementedError as e:
raise e
except Exception:
traceback.print_exc()
@memory_profile_test(_MEMTEST)
def _random_sampling_forward(self, source_type='MT', return_zero=False):
"""
Monte Carlo random sampling event forward function
Runs event forward model using Monte Carlo random sampling approach - ie parallel for multiple samples, single event.
Args
source_type:['MT'] 'MT' or 'DC' for fid
return_zero:[True] Return zero probability samples in task result.
"""
# Loop over events
for i, event in enumerate(self.data):
# Get fid
fid = self._fid(event, i, source_type, single=True)
# Set logger
self._set_logger(fid)
self._print('\n\nEvent '+str(i+1)+'\n--------\n')
try:
self._print('UID: '+str(event['UID'])+'\n')
except Exception:
self._print('No UID\n')
# Recover test
if self._recover_test(fid):
continue
# File sample recover test
if self._file_sample_test(fid):
self._print('Continuing from previous sampling')
self.kwargs['fid'] = fid
# Check event data
if not event:
self._print('No Data')
continue
try:
try:
event = self._trim_data(event)
except ValueError:
self._print('No Data')
continue
# Get station angle coefficients and data etc.
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability, extension_data) = self._station_angles(event, i)
# Update sample size
self._update_samples()
# Set algorithm
del self.algorithm
gc.collect()
self._set_algorithm(single=True, **self.kwargs)
self._print('\nInitialisation Complete\n\nBeginning Inversion\n')
# Run ForwardTasks
if self.pool:
# initialise all tasks
for i in range(self.pool.number_workers):
MTs, end = self._parse_job_result(False)
self.pool.task(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, self.location_sample_multipliers,
incorrect_polarity_probability, return_zero, False, True, self.generate_samples, self.generate_cutoff, self.dc, extension_data)
elif self._MPI:
end = False
# Carried out in each worker
MTs, ignored_end = self._parse_job_result(False)
result = ForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, self.location_sample_multipliers,
incorrect_polarity_probability, return_zero, generate_samples=self.generate_samples, cutoff=self.generate_cutoff,
dc=self.dc, extension_data=extension_data)()
# Return to initiator algorithm
else:
MTs, end = self._parse_job_result(False)
# Continue until max samples/time reached (end = = True)
while not end:
if self.pool:
result = self.pool.result()
MTs, end = self._parse_job_result(result)
self.pool.task(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, self.location_sample_multipliers,
incorrect_polarity_probability, return_zero, False, True, self.generate_samples, self.generate_cutoff, self.dc, extension_data)
elif self._MPI:
if not self.mpi_output:
# Handle result split and together using gather
mts = self.comm.gather(result['moment_tensors'], 0)
Ps = self.comm.gather(result['ln_pdf'], 0)
Ns = self.comm.gather(result['n'], 0)
if self.comm.Get_rank() == 0:
end = False
for i, mt in enumerate(mts):
result = {'moment_tensors': mts[i], 'ln_pdf': Ps[i], 'n': Ns[i]}
ignoredMTs, end = self._parse_job_result(result)
if end:
end = True # Process all results before ending
else:
end = None
end = self.comm.bcast(end, root=0) # Broadcast end to all mpis
_iteration = self.algorithm.iteration
_start_time = False
try:
_start_time = self.algorithm.start_time
except Exception:
pass
MTs, ignored_end = self._parse_job_result(False) # Get new random MTs
self.algorithm.iteration = _iteration
if _start_time:
self.algorithm.start_time = _start_time
else:
# Just run in parallel
MTs, end = self._parse_job_result(result)
end = self.comm.bcast(end, root=0)
result = ForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, self.location_sample_multipliers,
incorrect_polarity_probability, return_zero, generate_samples=self.generate_samples, cutoff=self.generate_cutoff, dc=self.dc,
extension_data=extension_data)()
else:
result = ForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, self.location_sample_multipliers,
incorrect_polarity_probability, return_zero, generate_samples=self.generate_samples, cutoff=self.generate_cutoff, dc=self.dc,
extension_data=extension_data)()
MTs, end = self._parse_job_result(result)
# Get left over pool results
if self.pool:
results = self.pool.all_results()
for result in results:
if result:
MTs, end = self._parse_job_result(result)
try:
self._print('Inversion completed\n\t'+'Elapsed time: '+str(time.time()-self.algorithm.start_time).split('.')[0]+' seconds\n\t'+str(self.algorithm.pdf_sample.n) +
' samples evaluated\n\t'+str(len(self.algorithm.pdf_sample.nonzero())) +
' non-zero samples\n\t'+'{:f}'.format((float(len(self.algorithm.pdf_sample.nonzero()))/float(self.algorithm.pdf_sample.n))*100)+'%')
except ZeroDivisionError:
self._print('Inversion completed\n\t'+'Elapsed time: '+str(time.time()-self.algorithm.start_time).split('.')[0]+' seconds\n\t'+str(self.algorithm.pdf_sample.n) +
' samples evaluated\n\t'+str(len(self.algorithm.pdf_sample.nonzero()))+' non-zero samples\n\t'+'{:f}'.format(0)+'%')
self._print('Algorithm max value: '+self.algorithm.max_value())
output_format = self.output_format
if self._MPI and self.mpi_output:
# MPI output (hyp format)
output_format = 'hyp'
results_format = self.results_format
self.results_format = 'hyp'
normalise = self.normalise
self.normalise = False
# Output results
if (self._MPI and not self.mpi_output and self.comm.Get_rank() == 0) or (self._MPI and self.mpi_output) or not self._MPI:
try:
self.output(event, fid, a_polarity=a_polarity, error_polarity=error_polarity, a1_amplitude_ratio=a1_amplitude_ratio, a2_amplitude_ratio=a2_amplitude_ratio,
amplitude_ratio=amplitude_ratio, percentage_error1_amplitude_ratio=percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio=percentage_error2_amplitude_ratio, output_format=output_format)
except Exception:
self._print('Output Error')
traceback.print_exc()
if self._MPI and self.mpi_output:
# Output pkl file with data
self.normalise = normalise
self.results_format = results_format
fids = self.comm.gather(fid, 0)
if isinstance(fids, list):
self._print('All moment tensor samples outputted: '+str(len(fids))+' files')
else:
self._print('Error with fids '+str(fids)+' type:'+str(type(fids)))
if self.comm.Get_rank() == 0:
with open(os.path.splitext(fid)[0]+'.pkl', 'wb') as f:
pickle.dump({'fids': fids, 'event_data': event}, f)
except Exception:
traceback.print_exc()
@memory_profile_test(_MEMTEST)
def _random_sampling_multiple_forward(self, source_type='MT', **kwargs):
"""
Monte Carlo event forward function for multiple event joint pdf
Runs event forward model using Markov chain Monte Carlo approach for multiple events.
Args
source_type:['MT'] 'MT' or 'dc' for fid
"""
# Set fid
fid = self._fid({}, '', '_joint_inversion'+source_type, single=True)
# Set logger
self._set_logger(fid)
# Recovery test
if self._recover_test(fid):
return
# File sample recovery test
if self._file_sample_test(fid):
self._print('Continuing from previous sampling')
self.kwargs['fid'] = fid
# Get station angle coefficients, data etc. for multiple events
a_polarity = []
error_polarity = []
a1_amplitude_ratio = []
a2_amplitude_ratio = []
amplitude_ratio = []
percentage_error1_amplitude_ratio = []
percentage_error2_amplitude_ratio = []
a_polarity_probability = []
polarity_probability = []
a_relative_amplitude = []
relative_amplitude = []
percentage_error_relative_amplitude = []
relative_amplitude_stations = []
incorrect_polarity_probability = []
extension_data = []
for i, event in enumerate(self.data):
try:
# Check event data
if not event:
self._print('No Data')
continue
try:
event = self._trim_data(event)
except ValueError:
self._print('No Data')
continue
# Get event station angles and data etc. for event and add to list
(a_polarity_i, error_polarity_i, a1_amplitude_ratio_i, a2_amplitude_ratio_i, amplitude_ratio_i, percentage_error1_amplitude_ratio_i,
percentage_error2_amplitude_ratio_i, a_polarity_probability_i, polarity_probability_i, incorrect_polarity_probability_i, extension_data_i) = self._station_angles(event, i)
a_polarity.append(a_polarity_i)
error_polarity.append(error_polarity_i)
a1_amplitude_ratio.append(a1_amplitude_ratio_i)
a2_amplitude_ratio.append(a2_amplitude_ratio_i)
amplitude_ratio.append(amplitude_ratio_i)
percentage_error1_amplitude_ratio.append(percentage_error1_amplitude_ratio_i)
percentage_error2_amplitude_ratio.append(percentage_error2_amplitude_ratio_i)
a_polarity_probability.append(a_polarity_probability_i)
polarity_probability.append(polarity_probability_i)
incorrect_polarity_probability.append(incorrect_polarity_probability_i)
extension_data.append(extension_data_i)
# If running relative get relative data
if self._relative:
a_relative_amplitude_i, relative_amplitude_i, percentage_error_relative_amplitude_i, relative_amplitude_stations_i = relative_amplitude_ratio_matrix(event, self.location_samples)
a_relative_amplitude.append(a_relative_amplitude_i)
relative_amplitude.append(relative_amplitude_i)
percentage_error_relative_amplitude.append(percentage_error_relative_amplitude_i)
relative_amplitude_stations.append(relative_amplitude_stations_i)
except Exception:
traceback.print_exc()
self._print('\n\nJoint Inversion: '+str(self.number_events)+' Events\n--------\n')
self._update_samples()
self._set_algorithm(single=True, **self.kwargs)
location_sample_size = 1
if self.location_samples:
location_sample_size = len(self.location_samples)
try:
# Run forward model using MultipleEventsForwardTask
if self.pool:
# initialise all tasks
for i in range(self.pool.number_workers):
MTs, end = self._parse_job_result(False)
self.pool.custom_task(MultipleEventsForwardTask, MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, a_relative_amplitude,
relative_amplitude, percentage_error_relative_amplitude, relative_amplitude_stations, self.location_sample_multipliers, incorrect_polarity_probability, self.minimum_number_intersections,
False, False, self._relative, location_sample_size, self._marginalise_relative, not self._relative_loop, extension_data)
elif self._MPI:
end = False
# Carried out in each worker
MTs, ignored_end = self._parse_job_result(False)
result = MultipleEventsForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, a_relative_amplitude, relative_amplitude,
percentage_error_relative_amplitude, relative_amplitude_stations, self.location_sample_multipliers, incorrect_polarity_probability,
self.minimum_number_intersections, relative=self._relative, location_sample_size=location_sample_size,
marginalise_relative=self._marginalise_relative, combine=not self._relative_loop, extension_data=extension_data)()
else:
MTs, end = self._parse_job_result(False)
while not end:
if self.pool:
result = self.pool.result()
MTs, end = self._parse_job_result(result)
self.pool.custom_task(MultipleEventsForwardTask, MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability,
a_relative_amplitude, relative_amplitude, percentage_error_relative_amplitude, relative_amplitude_stations, self.location_sample_multipliers, incorrect_polarity_probability,
self.minimum_number_intersections, False, False, self._relative, location_sample_size, self._marginalise_relative, not self._relative_loop, extension_data)
elif self._MPI:
if not self.mpi_output:
items = self.comm.Gather(result, 0)
if self.comm.Get_rank() == 0:
end = False
for result in items:
MTs, end = self._parse_job_result(result)
if end:
end = True
else:
end = None
# Broadcast end to all mpis
end = self.comm.bcast(end, root=0)
_iteration = self.algorithm.iteration
_start_time = False
try:
_start_time = self.algorithm.start_time
except Exception:
pass
# Get new random MTs
MTs, ignored_end = self._parse_job_result(False)
self.algorithm.iteration = _iteration
if _start_time:
self.algorithm.start_time = _start_time
else:
MTs, end = self._parse_job_result(result)
end = self.comm.bcast(end, root=0)
result = MultipleEventsForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, a_relative_amplitude, relative_amplitude,
percentage_error_relative_amplitude, relative_amplitude_stations, self.location_sample_multipliers, incorrect_polarity_probability,
self.minimum_number_intersections, relative=self._relative, location_sample_size=location_sample_size,
marginalise_relative=self._marginalise_relative, combine=not self._relative_loop, extension_data=extension_data)()
else:
result = MultipleEventsForwardTask(MTs, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, a_relative_amplitude, relative_amplitude,
percentage_error_relative_amplitude, relative_amplitude_stations, self.location_sample_multipliers, incorrect_polarity_probability,
self.minimum_number_intersections, relative=self._relative, location_sample_size=location_sample_size,
marginalise_relative=self._marginalise_relative, combine=not self._relative_loop, extension_data=extension_data)()
MTs, end = self._parse_job_result(result)
# Get all pool results
if self.pool:
results = self.pool.all_results()
for result in results:
if result:
MTs, end = self._parse_job_result(result)
try:
self._print('Inversion completed\n\t'+'Elapsed time: '+str(time.time()-self.algorithm.start_time).split('.')[0]+' seconds\n\t'+str(self.algorithm.pdf_sample.n)+' samples evaluated\n\t'+str(len(self.algorithm.pdf_sample.nonzero()))+' non-zero samples\n\t'+'{:f}'.format((float(len(self.algorithm.pdf_sample.nonzero()))/float(self.algorithm.pdf_sample.n))*100)+'%')
except ZeroDivisionError:
self._print('Inversion completed\n\t'+'Elapsed time: '+str(time.time()-self.algorithm.start_time).split('.')[0]+' seconds\n\t'+str(self.algorithm.pdf_sample.n)+' samples evaluated\n\t'+str(len(self.algorithm.pdf_sample.nonzero()))+' non-zero samples\n\t'+'{:f}'.format(0)+'%')
self._print('Algorithm max value: '+self.algorithm.max_value())
# Handle mpi output
output_format = self.output_format
if self._MPI and self.mpi_output:
output_format = 'hyp'
results_format = self.results_format
self.results_format = 'hyp'
normalise = self.normalise
self.normalise = False
# Output results
if (self._MPI and not self.mpi_output and self.comm.Get_rank() == 0) or (self._MPI and self.mpi_output) or not self._MPI:
try:
print(fid)
self.output(self.data, fid, a_polarity=a_polarity, error_polarity=error_polarity, a1_amplitude_ratio=a1_amplitude_ratio, a2_amplitude_ratio=a2_amplitude_ratio, amplitude_ratio=amplitude_ratio, percentage_error1_amplitude_ratio=percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio=percentage_error2_amplitude_ratio, output_format=output_format)
except Exception:
self._print('Output Error')
traceback.print_exc()
# Output mpi output pkl if self.mpi_output
if self._MPI and self.mpi_output:
self.normalise = normalise
self.results_format = results_format
fids = self.comm.gather(fid, 0)
if isinstance(fids, list):
self._print('All moment tensor samples outputted: '+str(len(fids))+' files')
else:
self._print('Error with fids '+str(fids)+' type:'+str(type(fids)))
if self.comm.Get_rank() == 0:
with open(os.path.splitext(fid)[0]+'.pkl', 'wb') as f:
pickle.dump({'fids': fids, 'event_data': event}, f)
except Exception:
traceback.print_exc()
[docs] def forward(self):
"""
Runs event forward model using the arguments when the Inversion object was initialised.
Depending on the algorithm selection uses either random sampling or Markov chain Monte Carlo sampling - for more information on the different algorithms see MTfit.algorithms documentation.
"""
source_type = 'MT'
if self.dc:
source_type = 'DC'
t0 = time.time()
# Choose type of forward model to run (McMC or Random sampling, single event or joint events)
if self.McMC:
if self.multiple_events:
self._mcmc_multiple_forward(source_type)
else:
self._mcmc_forward(source_type)
else:
if self.multiple_events:
self._random_sampling_multiple_forward(source_type)
else:
self._random_sampling_forward(source_type)
# Print run time
t1 = time.time()
self._print('\n--------\n{} inversion complete, elapsed time: {}'.format(source_type, t1-t0))
self._close_pool()
def _parse_job_result(self, job_result=False):
"""
Parse job results
Checks and parses the job results and either iterates or re-initialises.
Args
job_result:[False] result from foward task.
"""
# Check job result with algorithm
if job_result and not isinstance(job_result, Exception):
# in loop so process and return new results
# Process result
return self.algorithm.iterate(job_result)
else:
return self.algorithm.initialise()
def _trim_data(self, data):
"""
Remove unrequired data
Checks and removes un-required data (not in inversion_options if set) from the event data dictionary.
Args
data: Event data dictionary.
Returns
data: Trimmed event data dictionary.
Raises
ValueError: No remaining data for selected inversion options.
"""
# Check inverion options
if self.inversion_options:
for key in list(data.keys()):
# Remove data_type not in inversion options
if key not in self.inversion_options and key not in ['UID']:
data.pop(key)
if not len(data) or list(data.keys()) == ['UID']:
raise ValueError('No data remaining for the selected inversion options:\n\t'+'\n\t'.join(self.inversion_options))
else:
return data
else:
# use all options
return data
#
# Station angle coefficient functions and helpers
#
def polarity_matrix(data, location_samples=False):
"""
Generates the polarity observation matrices from the data.
Args:
data: Event data dictionary
location_samples:[False] Station angle scatter samples.
Returns
a,error
a: numpy matrix of station angle MT coefficients multiplied by the polarity.
error: numpy matrix of fractional error.
"""
a = False
_a = False
error = False
incorrect_polarity_prob = 0
# Check location samples
if location_samples:
original_samples = [u.copy() for u in location_samples]
# Get polarity data
for key in sorted([u for u in data.keys() if 'polarity' in u.lower() and 'prob' not in u.lower()]):
mode = key.lower().split('polarity')[0]
if location_samples:
location_samples = [u.copy() for u in original_samples]
selected_stations = sorted(list(set(location_samples[0]['Name']) & set(data[key]['Stations']['Name'])))
n_stations = len(selected_stations)
indices = [location_samples[0]['Name'].index(u) for u in selected_stations]
angles = np.zeros((n_stations, len(location_samples), 6))
for i, sample in enumerate(location_samples):
sample['Name'] = operator.itemgetter(*indices)(sample['Name'])
sample['TakeOffAngle'] = sample['TakeOffAngle'][indices]
sample['Azimuth'] = sample['Azimuth'][indices]
angles[:, i, :] = station_angles(sample, mode)
angles = np.array(angles)
# Fix for station order change with location samples
indices = [data[key]['Stations']['Name'].index(u) for u in selected_stations]
# Get measured and errors from location sample indices
measured = data[key]['Measured'][indices]
_error = np.array(data[key]['Error'][indices]).flatten()
# Try to get incorrect polarity probability
if data[key].__contains__('IncorrectPolarityProbability'):
_incorrect_polarity_prob = np.array(data[key]['IncorrectPolarityProbability'][indices]).flatten()
else:
_incorrect_polarity_prob = np.array([0])
else:
# Otherwise get angles and measured directly
n_stations = np.prod(data[key]['Stations']['TakeOffAngle'].shape)
angles = np.zeros((n_stations, 1, 6))
angles[:, 0, :] = station_angles(data[key]['Stations'], mode)
measured = data[key]['Measured']
_error = np.array(data[key]['Error']).flatten()
if data[key].__contains__('IncorrectPolarityProbability'):
_incorrect_polarity_prob = np.array(data[key]['IncorrectPolarityProbability']).flatten()
else:
_incorrect_polarity_prob = np.array([0])
# If angles have multiple dimensions (location samples), expand the measurements
if angles.ndim > 2:
measured = np.array(measured)
measured = np.expand_dims(measured, 1)
# Set or append to outputs
if not _a:
_a = True
a = np.multiply(angles, measured)
error = _error
incorrect_polarity_prob = _incorrect_polarity_prob
else:
a = np.append(a, np.multiply(angles, measured), 0)
error = np.append(error, _error, 0)
if len(_incorrect_polarity_prob) != n_stations:
# Make sure this is the same length as the data as we are appending it
_incorrect_polarity_prob = np.kron(_incorrect_polarity_prob, np.ones(n_stations))
incorrect_polarity_prob = np.append(incorrect_polarity_prob, _incorrect_polarity_prob, 0)
# Set incorrect polarity prob if default
if np.sum(np.abs(incorrect_polarity_prob)) == 0:
incorrect_polarity_prob = 0
return a, error, incorrect_polarity_prob
def polarity_probability_matrix(data, location_samples=False):
"""
Generates the polarity PDF observation matrices from the data.
Args:
data: Event data dictionary
location_samples:[False] Station angle scatter samples.
Returns
a,probability,error
a: numpy matrix of station angle MT coefficients.
probability: numpy matrix of polarity probabilities.
error: numpy matrix of fractional error.
"""
a = False
positive_probability = False
negative_probability = False
_a = False
incorrect_polarity_prob = 0
# Check location samples and get sample
if location_samples:
original_samples = [u.copy() for u in location_samples]
# Loop over polarity prob data
for key in sorted([u for u in data.keys() if 'polarity' in u.lower() and 'prob' in u.lower()]):
mode = key.lower().split('polarity')[0]
# If location samples, then select stations appropriately
if location_samples:
location_samples = [u.copy() for u in original_samples]
selected_stations = sorted(list(set(location_samples[0]['Name']) & set(data[key]['Stations']['Name'])))
indices = [location_samples[0]['Name'].index(u) for u in selected_stations]
angles = np.zeros((len(selected_stations), len(location_samples), 6))
for i, sample in enumerate(location_samples):
sample['Name'] = operator.itemgetter(*indices)(sample['Name'])
sample['TakeOffAngle'] = sample['TakeOffAngle'][indices]
sample['Azimuth'] = sample['Azimuth'][indices]
angles[:, i, :] = station_angles(sample, mode)
angles = np.array(angles)
indices = [data[key]['Stations']['Name'].index(u) for u in selected_stations]
measured = data[key]['Measured'][indices]
if data[key].__contains__('IncorrectPolarityProbability'):
_incorrect_polarity_prob = np.array(data[key]['IncorrectPolarityProbability'][indices]).flatten()
else:
_incorrect_polarity_prob = np.array([0])
# Otherwise get angles and measeured
else:
angles = np.zeros((np.prod(data[key]['Stations']['TakeOffAngle'].shape), 1, 6))
angles[:, 0, :] = station_angles(data[key]['Stations'], mode)
measured = data[key]['Measured']
if data[key].__contains__('IncorrectPolarityProbability'):
_incorrect_polarity_prob = np.array(data[key]['IncorrectPolarityProbability']).flatten()
else:
_incorrect_polarity_prob = np.array([0])
# Set or append to outputs
if not _a:
_a = True
a = angles
positive_probability = np.array(measured[:, 0]).flatten()
negative_probability = np.array(measured[:, 1]).flatten()
incorrect_polarity_prob = _incorrect_polarity_prob
else:
a = np.append(a, angles, 0)
positive_probability = np.append(positive_probability, np.array(measured[:, 0]).flatten())
negative_probability = np.append(positive_probability, np.array(measured[:, 1]).flatten())
incorrect_polarity_prob = np.append(incorrect_polarity_prob, _incorrect_polarity_prob, 0)
# Set incorrect polarity prob if default
if np.sum(np.abs(incorrect_polarity_prob)) == 0:
incorrect_polarity_prob = 0
return a, (positive_probability, negative_probability), incorrect_polarity_prob
def amplitude_ratio_matrix(data, location_samples=False):
"""
Generates the amplitude ratio observation matrices from the data.
Args:
data: Event data dictionary
location_samples:[False] Station angle scatter samples.
Returns
a1,a2,amplitude_ratio,percentage_error1,percentage_error2
a1: numpy matrix of station angle MT coefficients for ratio numerator.
a2: numpy matrix of station angle MT coefficients for ratio denominator.
amplitude_ratio: numpy matrix of observed ratios.
percentage_error1: numpy matrix of ratio numerator percentage error.
percentage_error2: numpy matrix of ratio denominator percentage error.
"""
a1 = False
a2 = False
a = False
percentage_error1 = False
percentage_error2 = False
amplitude_ratio = False
if location_samples:
original_samples = [u.copy() for u in location_samples]
# Loop over data and get amplitude ratio data
for key in sorted([u for u in data.keys() if 'amplituderatio' in u.lower() or 'amplitude_ratio' in u.lower()]):
phase = key.replace('_', '').lower().split('amplituderatio')[0]
phase = phase.rstrip('rms')
phase = phase.rstrip('q')
phase.replace('_', '')
# If location samples, get intersection stations and data
if location_samples:
location_samples = [u.copy() for u in original_samples]
selected_stations = sorted(list(set(location_samples[0]['Name']) & set(data[key]['Stations']['Name'])))
indices = [location_samples[0]['Name'].index(u) for u in selected_stations]
angles1 = np.zeros((len(selected_stations), len(location_samples), 6))
angles2 = np.zeros((len(selected_stations), len(location_samples), 6))
for i, sample in enumerate(location_samples):
sample['Name'] = operator.itemgetter(*indices)(sample['Name'])
sample['TakeOffAngle'] = sample['TakeOffAngle'][indices]
sample['Azimuth'] = sample['Azimuth'][indices]
angles1[:, i, :] = station_angles(sample, phase.split('/')[0])
angles2[:, i, :] = station_angles(sample, phase.split('/')[1])
angles1 = np.array(angles1)
angles2 = np.array(angles2)
angles = [angles1, angles2]
indices = [data[key]['Stations']['Name'].index(u) for u in selected_stations]
_measured = data[key]['Measured'][indices]
error = data[key]['Error'][indices]
# Otherwise get angles and measurements
else:
angles1 = np.zeros((np.prod(data[key]['Stations']['TakeOffAngle'].shape), 1, 6))
angles2 = np.zeros((np.prod(data[key]['Stations']['TakeOffAngle'].shape), 1, 6))
angles1[:, 0, :] = station_angles(data[key]['Stations'], phase.split('/')[0])
angles2[:, 0, :] = station_angles(data[key]['Stations'], phase.split('/')[1])
angles1 = np.array(angles1)
angles2 = np.array(angles2)
angles = [angles1, angles2]
_measured = data[key]['Measured']
error = data[key]['Error']
# Set or append to outputs
if not a:
a = True
a1 = angles[0]
a2 = angles[1]
amplitude_ratio = np.array(np.abs(np.divide(_measured[:, 0], _measured[:, 1]))).flatten()
percentage_error1 = np.array(np.divide(error[:, 0], np.abs(_measured[:, 0]))).flatten()
percentage_error2 = np.array(np.divide(error[:, 1], np.abs(_measured[:, 1]))).flatten()
else:
a1 = np.append(a1, angles[0], 0)
a2 = np.append(a2, angles[1], 0)
percentage_error1 = np.append(percentage_error1, np.array(np.divide(error[:, 0], np.abs(_measured[:, 0]))).flatten(), 0)
percentage_error2 = np.append(percentage_error2, np.array(np.divide(error[:, 1], np.abs(_measured[:, 1]))).flatten(), 0)
amplitude_ratio = np.append(amplitude_ratio, np.array(np.abs(np.divide(_measured[:, 0], _measured[:, 1]))).flatten(), 0)
return a1, a2, amplitude_ratio, percentage_error1, percentage_error2
def relative_amplitude_ratio_matrix(data, location_samples=False):
"""
Generates the relative amplitude ratio observation matrices from the data.
Args:
data: Event data dictionary
location_samples:[False] Station angle scatter samples.
Returns
a_relative_amplitude,relative_amplitude,percentage_error
a_relative_amplitude: numpy matrix of station angle MT coefficients for ratio numerator.
relative_amplitude: numpy matrix of observations.
percentage_error: numpy matrix of relative amplitude percentage error.
"""
a_relative_amplitude = False
a = False
percentage_error = False
relative_amplitude_stations = []
relative_amplitude = False
if location_samples:
original_samples = [u.copy() for u in location_samples]
# Loop over data and get amplitude data
for key in sorted([u for u in data.keys() if 'amplitude' in u.lower() and 'ratio' not in u.lower()]):
phase = key.replace('_', '').lower().split('amplitude')[0]
phase = phase.rstrip('q')
phase = phase.rstrip('rms')
phase = phase.rstrip('q')
# If location samples, get intersection stations and data
if location_samples:
location_samples = [u.copy() for u in original_samples]
selected_stations = sorted(list(set(location_samples[0]['Name']) & set(data[key]['Stations']['Name'])))
indices = [location_samples[0]['Name'].index(u) for u in selected_stations]
angles = np.zeros((len(selected_stations), len(location_samples), 6))
for i, sample in enumerate(location_samples):
sample['Name'] = operator.itemgetter(*indices)(sample['Name'])
sample['TakeOffAngle'] = sample['TakeOffAngle'][indices]
sample['Azimuth'] = sample['Azimuth'][indices]
angles[:, i, :] = station_angles(sample, phase)
angles = np.array(angles)
relative_amplitude_stations.extend(selected_stations)
indices = [data[key]['Stations']['Name'].index(u) for u in selected_stations]
measured = data[key]['Measured'][indices]
error = data[key]['Error'][indices]
# Otherwise get angles and measurements
else:
angles = np.zeros((np.prod(data[key]['Stations']['TakeOffAngle'].shape), 1, 6))
angles[:, 0, :] = station_angles(data[key]['Stations'], phase)
relative_amplitude_stations.extend(data[key]['Stations']['Name'])
measured = np.array(data[key]['Measured']).flatten()
error = np.array(data[key]['Error']).flatten()
# Set or append to outputs
if not a:
a = True
a_relative_amplitude = angles
relative_amplitude = np.array(np.abs(measured)).flatten()
percentage_error = np.array(np.divide(error, np.abs(measured))).flatten()
else:
a_relative_amplitude = np.append(a_relative_amplitude, angles, 0)
relative_amplitude = np.array(np.abs(np.append(relative_amplitude, measured, 0))).flatten()
percentage_error = np.array(np.abs(np.append(percentage_error, np.divide(error, measured), 0))).flatten()
if not len(relative_amplitude_stations):
relative_amplitude_stations = False
return a_relative_amplitude, relative_amplitude, percentage_error, relative_amplitude_stations
def _intersect_stations(relative_amplitude_stations_i, relative_amplitude_stations_j, a_relative_amplitude_ratio_i, a_relative_amplitude_ratio_j,
relative_amplitude_i, relative_amplitude_j, percentage_error_relative_amplitude_i, percentage_error_relative_amplitude_j):
"""
Determine the intersection of the two lists of stations, and then returns the input angles and amplitudes etc for the intersection.
Args
relative_amplitude_stations_i: List of stations corresponding to the data from event_i
relative_amplitude_stations_j: List of stations corresponding to the data from event_j
a_relative_amplitude_ratio_i: numpy matrix of station GFs from event_i
a_relative_amplitude_ratio_j: numpy matrix of station GFs from event_j
relative_amplitude_i: numpy matrix of amplitude observations from event_i
relative_amplitude_j: numpy matrix of amplitude observations from event_j
percentage_error_relative_amplitude_i: numpy matrix of percentage errors from event_i
percentage_error_relative_amplitude_j: numpy matrix of percentage errors from event_j
Returns
a_relative_amplitude_ratio_i: numpy matrix of station GFs for event_i and event_j intersection corresponding to event_i
a_relative_amplitude_ratio_j: numpy matrix of station GFs for event_i and event_j intersection corresponding to event_j
relative_amplitude_i: numpy matrix of amplitude observations for event_i and event_j intersection corresponding to event_i
relative_amplitude_j: numpy matrix of amplitude observations for event_i and event_j intersection corresponding to event_j
percentage_error_relative_amplitude_i: numpy matrix of percentage errors for event_i and event_j intersection corresponding to event_i
percentage_error_relative_amplitude_j: numpy matrix of percentage errors for event_i and event_j intersection corresponding to event_j
number_intersections: integer number of intersections between event_i and event_j.
"""
# Get intersection of stations between events
selected_stations = list(set(relative_amplitude_stations_i) & set(relative_amplitude_stations_j))
# Get indices for each event
indices_i = [i for i, u in enumerate(relative_amplitude_stations_i) if u in selected_stations]
indices_j = [i for i, u in enumerate(relative_amplitude_stations_j) if u in selected_stations]
# Get parameters for event i
a_relative_amplitude_ratio_i = a_relative_amplitude_ratio_i[indices_i]
relative_amplitude_i = relative_amplitude_i[indices_i]
percentage_error_relative_amplitude_i = percentage_error_relative_amplitude_i[indices_i]
# Get parameters for event j
a_relative_amplitude_ratio_j = a_relative_amplitude_ratio_j[indices_j]
relative_amplitude_j = relative_amplitude_j[indices_j]
percentage_error_relative_amplitude_j = percentage_error_relative_amplitude_j[indices_j]
return (np.ascontiguousarray(a_relative_amplitude_ratio_i), np.ascontiguousarray(a_relative_amplitude_ratio_j),
np.ascontiguousarray(relative_amplitude_i), np.ascontiguousarray(relative_amplitude_j),
np.ascontiguousarray(percentage_error_relative_amplitude_i), np.ascontiguousarray(percentage_error_relative_amplitude_j),
len(indices_i))
def _intersect_stations_extension_data(extension_data_i, extension_data_j):
"""
Determines the intersection of the two lists of stations, and then returns the input angles and amplitudes etc for the intersection.
Args
extension_data_i: extension data from event_i
extension_data_j: extension data from event_j
Returns
extension_data_i: extension data for event_i and event_j intersection corresponding to event_i
extension_data_j: extension data for event_i and event_j intersection corresponding to event_j
number_intersections: integer number of intersections between event_i and event_j.
"""
# Get intersection of stations between events
stations_i = extension_data_i['stations']
stations_j = extension_data_j['stations']
selected_stations = list(set(stations_i) & set(stations_j))
# Get indices for each event
indices_i = [i for i, u in enumerate(stations_i) if u in selected_stations]
indices_j = [i for i, u in enumerate(stations_j) if u in selected_stations]
# Get parameters for event i
for key in extension_data_i:
extension_data_i[key] = np.ascontiguousarray(extension_data_i[key][indices_i])
for key in extension_data_j:
extension_data_j[key] = np.ascontiguousarray(extension_data_j[key][indices_j])
# Get parameters for event j
return extension_data_i, extension_data_j, len(indices_i)
def station_angles(stations, phase, radians=False):
"""
Calculates the station MT coefficients from the station angles. ###############
TakeOffAngle 0 down (as this is positive z-axis in NED system.
Args
stations: station dictionary (format is the station component of the data dictionary (MTfit.inversion.Inversion docstrings))
phase: 'P','SH','SV' - the component for which to calculate the station angles, can be a ratio separated with a \.
radians:[False] Boolean flag to set radians true or false
Returns
station angles MT coefficients.
"""
# Get azimuth and takeoff angles
try:
azimuth = np.matrix(stations['Azimuth'])
takeoff_angle = np.matrix(stations['TakeOffAngle'])
except Exception:
azimuth = stations[0] # stations is angle tuple
takeoff_angle = stations[1]
# Transpose to correct orientation if necessary
if azimuth.shape[0] < azimuth.shape[1]:
azimuth = azimuth.T
if takeoff_angle.shape[0] < takeoff_angle.shape[1]:
takeoff_angle = takeoff_angle.T
# Radian conversion if not radians
if not radians:
azimuth = azimuth*np.pi/180
if not radians:
takeoff_angle = takeoff_angle*np.pi/180
# Set arrays
azimuth = np.array(azimuth)
takeoff_angle = np.array(takeoff_angle)
# Get phase
phase = phase.lower().rstrip('q')
# Calculate angles
if phase.lower() == 'p':
return np.matrix(np.array([np.cos(azimuth)*np.cos(azimuth)*np.sin(takeoff_angle)*np.sin(takeoff_angle),
np.sin(azimuth)*np.sin(azimuth)*np.sin(takeoff_angle)*np.sin(takeoff_angle),
np.cos(takeoff_angle)*np.cos(takeoff_angle),
(np.sqrt(2))*np.sin(azimuth)*np.cos(azimuth)*np.sin(takeoff_angle)*np.sin(takeoff_angle),
(np.sqrt(2))*np.cos(azimuth)*np.cos(takeoff_angle)*np.sin(takeoff_angle),
(np.sqrt(2))*np.sin(azimuth)*np.cos(takeoff_angle)*np.sin(takeoff_angle)]).T)
elif phase.lower() == 'sh':
return np.matrix(np.array([-np.sin(azimuth)*np.cos(azimuth)*np.sin(takeoff_angle),
np.sin(azimuth)*np.cos(azimuth)*np.sin(takeoff_angle),
0*azimuth,
(1/np.sqrt(2))*np.cos(2*azimuth)*np.sin(takeoff_angle),
-(1/np.sqrt(2))*np.sin(azimuth)*np.cos(takeoff_angle),
(1/np.sqrt(2))*np.cos(azimuth)*np.cos(takeoff_angle)]).T)
elif phase.lower() == 'sv':
return np.matrix(np.array([np.cos(azimuth)*np.cos(azimuth)*np.sin(takeoff_angle)*np.cos(takeoff_angle),
np.sin(azimuth)*np.sin(azimuth)*np.sin(takeoff_angle)*np.cos(takeoff_angle),
-np.sin(takeoff_angle)*np.cos(takeoff_angle),
np.sqrt(2)*np.cos(azimuth)*np.sin(azimuth)*np.sin(takeoff_angle)*np.cos(takeoff_angle),
(1/np.sqrt(2))*np.cos(azimuth)*np.cos(2*takeoff_angle),
(1/np.sqrt(2))*np.sin(azimuth)*np.cos(2*takeoff_angle)]).T)
elif len(phase.split('/')) == 2:
numerator = station_angles(stations, phase.split('/')[0], radians)
denominator = station_angles(stations, phase.split('/')[1], radians)
return (numerator, denominator)
else:
raise ValueError('{} phase not recognised.'.format(phase))
#
# Solution misfit and probability check functions
#
def _polarity_misfit_check(polarity, azimuth, takeoffangle, phase, mt):
"""
Check polarity misfit for a given MT (private function)
Args
polarity: np.array of polarity observations.
azimuth: np.matrix of azimuths.
takeoffangle: np.matrix of takeoffangles.
phase: Phase of polarity measurements (P, SH, SV).
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz.
Returns
indices of differing model and observed polarities or boolean false if no polarities.
"""
# If polarity exists then check model polarities vs observed polarities
if polarity != 0:
a_station = station_angles({'Azimuth': [azimuth], 'TakeOffAngle': [takeoffangle]}, phase)
model_polarity = float(a_station*mt)
return model_polarity*polarity <= 0
return False
def polarity_misfit_check(mt, data={}, data_file=False, inversion_options=['PPolarity']):
"""
Checks polarity misfit for a given MT and data dictionary
Args
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz.
data: data dictionary or list of dictionaries.
data_file: data input file (data dict or data_file must be provided).
inversion_options: polarity inversion_options list .
Returns
list of names of misfitting stations
"""
# Copy data
copied_data = copy.copy(data)
# Set up inverson
inversion = Inversion(copied_data, data_file, False, parallel=False, inversion_options=inversion_options)
# Loop over events
for i, event in enumerate(inversion.data):
# Check data
if not event:
print('No Data')
continue
# Set algorithm
inversion._set_algorithm()
try:
event = inversion._trim_data(event)
except ValueError:
print('No Data')
continue
# Get observed polarities/inversion_options in data
observed_polarity = False
stations = []
for i, key in enumerate(inversion_options):
if key in event:
if isinstance(observed_polarity, bool):
observed_polarity = event[key]['Measured'].copy()
else:
observed_polarity = np.append(observed_polarity, event[key]['Measured'].copy(), 0)
event[key]['Measured'] /= event[key]['Measured']
stations.extend(event[key]['Stations']['Name'])
# Create model polarities
observed_polarity = np.array(observed_polarity).flatten()
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability) = inversion._station_angles(event, i)
model_polarity = np.sign(np.tensordot(a_polarity, mt, 1)).flatten()
# Compare with observed polarities
return sorted(list(np.array(stations)[(model_polarity*observed_polarity) < 0]))
return []
def probability_mt_check(mt, data={}, data_file=False, location_pdf_file_path=False, inversion_options=['PPolarity', 'P/SHAmplitudeRatio', 'P/SVAmplitudeRatio'], RMS=False, Q=False):
"""
Evaluate results for a given mt and data
Args
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz.
data: data dictionary or list of dictionaries.
data_file: data input file (data dict or data_file must be provided).
location_pdf_file_path: location PDF file path.
inversion_options:['PPolarity','P/SHAmplitudeRatio','P/SVAmplitudeRatio'] polarity inversion_options list.
RMS:[False] Use RMS amplitudes (if not set in inversion options).
Q:[False] Use Q (if not set in inversion options).
Returns
results list
"""
# Copy data
copied_data = copy.copy(data)
# Get amplitude ratio options
for i, u in enumerate(inversion_options):
if 'AmplitudeRatio' in u:
if RMS and 'RMS' not in u:
inversion_options[i] = u.split('AmplitudeRatio')[0]+'RMSAmplitudeRatio'
if Q and 'Q' not in u:
inversion_options[i] = u.split('AmplitudeRatio')[0]+'QAmplitudeRatio'
inversion = Inversion(copied_data, data_file, location_pdf_file_path, parallel=False, inversion_options=inversion_options)
results = []
for i, event in enumerate(inversion.data):
if not event:
print('No Data')
continue
inversion._set_algorithm()
try:
event = inversion._trim_data(event)
except ValueError:
print('No Data')
continue
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability) = inversion._station_angles(event, i)
results.append(ForwardTask(mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, inversion.location_sample_multipliers, incorrect_polarity_probability, True)())
return results
def polarity_mt_check(mt, data={}, data_file=False, location_pdf_file_path=False, inversion_options=['PPolarity', 'SHPolarity', 'SVPolarity']):
"""
Evaluate polarity results for a given mt and data
Args
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz
data: data dictionary or list of dictionaries
data_file: data input file (data dict or data_file must be provided)
location_pdf_file_path: location PDF file path
inversion_options:['PPolarity','P/SHAmplitudeRatio','P/SVAmplitudeRatio'] inversion_options list
Returns
results list
"""
# Copy data
copied_data = copy.copy(data)
# Get polarity inversion options
processed_inversion_options = []
for i, u in enumerate(inversion_options):
if 'polarity' in u.lower() and 'prob' not in u:
processed_inversion_options.append(u)
# Create inversion object
inversion = Inversion(copied_data, data_file, location_pdf_file_path, parallel=False, inversion_options=processed_inversion_options)
results = []
for i, event in enumerate(inversion.data):
if not event:
print('No Data')
continue
# Set algorithms
inversion._set_algorithm()
try:
event = inversion._trim_data(event)
except ValueError:
print('No Data')
continue
# Get angles and data
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability) = inversion._station_angles(event, i)
# Get results
results.append(ForwardTask(mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, inversion.location_sample_multipliers, incorrect_polarity_probability, True)())
return results
def polarity_probability_mt_check(mt, data={}, data_file=False, location_pdf_file_path=False, inversion_options=['PPolarityProbability', 'SHPolarityProbability']):
"""
Evaluate Polarity probability results for a given mt and data
Args
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz
data: data dictionary or list of dictionaries
data_file: data input file (data dict or data_file must be provided)
location_pdf_file_path: location PDF file path
inversion_options:['PPolarityProbability','SHPolarityProbability'] polarity inversion_options list
Returns
results list
"""
# Copy data
copied_data = copy.copy(data)
# Get polarity inversion options
processed_inversion_options = []
for i, u in enumerate(inversion_options):
if 'polarity' in u.lower() and 'prob' in u:
processed_inversion_options.append(u)
# Create inversion object
inversion = Inversion(copied_data, data_file, location_pdf_file_path, parallel=False, inversion_options=processed_inversion_options)
results = []
for i, event in enumerate(inversion.data):
if not event:
print('No Data')
continue
# Set algorithms
inversion._set_algorithm()
try:
event = inversion._trim_data(event)
except ValueError:
print('No Data')
continue
# Get angles and data
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio, percentage_error1_amplitude_ratio,
percentage_error2_amplitude_ratio, a_polarity_probability, polarity_probability, incorrect_polarity_probability) = inversion._station_angles(event, i)
# Get results
results.append(ForwardTask(mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, inversion.location_sample_multipliers, incorrect_polarity_probability, True)())
return results
def amplitude_ratio_probability_mt_check(mt, data={}, data_file=False, location_pdf_file_path=False, inversion_options=['P/SHAmplitudeRatio', 'P/SVAmplitudeRatio'], RMS=False, Q=False):
"""
Evaluate amplitude ratio results for a given mt and data
Args
mt: np.matrix moment tensor six vector Mxx Myy Mzz sqrt(2)*Mxy sqrt(2)*Mxz sqrt(2)*Myz.
data: data dictionary or list of dictionaries.
data_file: data input file (data dict or data_file must be provided).
location_pdf_file_path: location PDF file path.
inversion_options:['P/SHAmplitudeRatio','P/SVAmplitudeRatio'] inversion_options list.
RMS:[False] Use RMS amplitudes (if not set in inversion options).
Q:[False] Use Q (if not set in inversion options).
Returns
results list
"""
# Copy data
copied_data = copy.copy(data)
# Get polarity inversion options
processed_inversion_options = []
for i, u in enumerate(inversion_options):
if 'amplituderatio' in u.lower():
if RMS and 'rms' not in u.lower():
u = u.split('AmplitudeRatio')[0]+'RMSAmplitudeRatio'
if Q and 'q' not in u.lower():
u = u.split('AmplitudeRatio')[0]+'QAmplitudeRatio'
processed_inversion_options.append(u)
# Create inversion object
inversion = Inversion(copied_data, data_file, location_pdf_file_path, parallel=False, inversion_options=processed_inversion_options)
results = []
for i, event in enumerate(inversion.data):
if not event:
print('No Data')
continue
# Set algorithms
inversion._set_algorithm()
try:
event = inversion._trim_data(event)
except ValueError:
print('No Data')
continue
# Get angles and data
(a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, incorrect_polarity_probability) = inversion._station_angles(event, i)
# Get results
results.append(ForwardTask(mt, a_polarity, error_polarity, a1_amplitude_ratio, a2_amplitude_ratio, amplitude_ratio,
percentage_error1_amplitude_ratio, percentage_error2_amplitude_ratio, a_polarity_probability,
polarity_probability, inversion.location_sample_multipliers, incorrect_polarity_probability, True)())
return results
def _pop_station(data, key, station):
"""Remove station from data type (key)"""
# Get index
index = data[key]['Stations']['Name'].index(station)
# Delete indices
data[key]['Measured'] = np.delete(data[key]['Measured'], index, 0)
data[key]['Error'] = np.delete(data[key]['Error'], index, 0)
data[key]['Stations']['Azimuth'] = np.delete(data[key]['Stations']['Azimuth'], index, 0)
data[key]['Stations']['TakeOffAngle'] = np.delete(data[key]['Stations']['TakeOffAngle'], index, 0)
data[key]['Stations']['Name'].pop(index)
return data
#
# Combine hyp event output from MPI
#
def combine_mpi_output(filepath='', output_format='matlab', parallel=False, mpi=False, binary_file_version=2, **kwargs):
"""
Combine MPI output into single output
MPI output is hyp and mt files for each mpi process, this function combines them into a single output.
Args:
filepath:[.] Input filepath.
output_format:[matlab] Output format.
parallel:[False] Boolean flag to run in parallel using job pool (Not MPI).
mpi:[False] Boolean MPI flag.
"""
# Check MPI
if mpi:
try:
from mpi4py import MPI
comm = MPI.COMM_WORLD
except Exception:
mpi = False
# Set filepath to folder
if os.path.isdir(filepath):
filepath += os.path.sep
# Get hyp files
hypfiles = glob.glob(filepath+'*.hyp')
# Get UIDs
UIDs = sorted(list(set(['.'.join(os.path.splitext(u)[0].split('.')[:-1]) for u in hypfiles if len('.'.join(os.path.splitext(u)[0].split('.')[:-1]))])))
print('---------Combining Files------------')
print('\t{}\n\n'.format('\n\t'.join(UIDs)))
# Run combination
# Run in parallel
if parallel:
job_pool = JobPool(task=CombineMpiOutputTask)
number_workers = len(job_pool)
if number_workers == 1:
job_pool.close()
parallel = False
# Run using MPI
if mpi:
# split by workers
sample = len(UIDs)/float(comm.Get_size())
# Get the UIDS for this worker
UIDs = UIDs[int(comm.Get_rank()*ceil(sample)):min(int((comm.Get_rank()+1)*ceil(sample)), len(UIDs)-1)]
# Loop over UIDs and run combine task
for uid in UIDs:
if parallel:
job_pool.task(uid, output_format, kwargs.get('results_format', 'full_pdf'), binary_file_version=binary_file_version)
else:
CombineMpiOutputTask(uid, output_format, kwargs.get('results_format', 'full_pdf'), binary_file_version=binary_file_version)()
if parallel:
job_pool.close()
#
# Location pdf parsing and binning
#
def bin_angle_coefficient_samples(a_polarity, a1_amplitude_ratio, a2_amplitude_ratio, a_polarity_prob, location_sample_multipliers, extension_data, epsilon=0):
"""
Carry out the binning over the station ray path coefficent samples
Unlike the bin_scatangle function, this accounts for variations in angle dependencies.
Args
a_polarity: np.array of polarity station coefficent data.
a1_amplitude_ratio: np.array of amplitude ratio numerator station coefficent data.
a2_amplitude_ratio: np.array of amplitude ratio denominator station coefficient data.
a_polarity_prob: np.array of polarity prob stationcoefficient data.
location_sample_multipliers: location sample probabilities.
epsilon:[0] allowed difference for the samples to be binned together.
Returns
a_polarity,a1_amplitude_ratio,a2_amplitude_ratio,a_polarity_prob,multipliers
np.arrays of station coefficents for each data type and the location sample multipliers.
"""
if epsilon > 0:
import time
if cprobability:
print('C')
t0 = time.time()
a_polarity, a1_amplitude_ratio, a2_amplitude_ratio, a_polarity_prob, extension_data, multipliers = cprobability.bin_angle_coefficient_samples(a_polarity, a1_amplitude_ratio,
a2_amplitude_ratio, a_polarity_prob,
location_sample_multipliers, extension_data,
epsilon)
print('Ctime = {}'.format(time.time()-t0))
else:
print('Py')
t0 = time.time()
multipliers = []
no_pop = np.ones((len(location_sample_multipliers),), dtype=bool)
for i in range(len(location_sample_multipliers)):
multiplier = location_sample_multipliers[i]
for j in range(i, len(location_sample_multipliers)):
if no_pop[j]:
diff_polarity = 0
if not isinstance(a_polarity, bool):
diff_polarity = a_polarity[:, i, :]-a_polarity[:, j, :]
diff_polarity = 0
if not isinstance(a1_amplitude_ratio, bool):
diff_a1_amplitude_ratio = a1_amplitude_ratio[:, i, :]-a1_amplitude_ratio[:, j, :]
diff_polarity = 0
if not isinstance(a2_amplitude_ratio, bool):
diff_a2_amplitude_ratio = a2_amplitude_ratio[:, i, :]-a2_amplitude_ratio[:, j, :]
diff_polarity = 0
if not isinstance(a_polarity_prob, bool):
diff_polarity_prob = a_polarity_prob[:, i, :]-a_polarity_prob[:, j, :]
if np.max([np.max(np.abs(diff_polarity)), np.max(np.abs(diff_a1_amplitude_ratio)), np.max(np.abs(diff_a2_amplitude_ratio)), np.max(np.abs(diff_polarity_prob))]) < epsilon:
multiplier += location_sample_multipliers[j]
no_pop[j] = 0
multipliers.append(multiplier)
print('Pytime = {}'.format(time.time()-t0))
if not isinstance(a_polarity, bool):
a_polarity = a_polarity[:, no_pop, :]
if not isinstance(a1_amplitude_ratio, bool):
a1_amplitude_ratio = a1_amplitude_ratio[:, no_pop, :]
if not isinstance(a2_amplitude_ratio, bool):
a2_amplitude_ratio = a2_amplitude_ratio[:, no_pop, :]
if not isinstance(a_polarity_prob, bool):
a_polarity_prob = a_polarity_prob[:, no_pop, :]
for key in extension_data.keys():
for k in extension_data[key].keys():
if k[:2] == 'a_' or k[0] == 'a' and k[2] == '_':
extension_data[key][k] = extension_data[key][k][:, no_pop, :]
print('Epsilon = {} reduced {} samples to {} records.'.format(epsilon, len(location_sample_multipliers), len(multipliers)))
return a_polarity, a1_amplitude_ratio, a2_amplitude_ratio, a_polarity_prob, extension_data, multipliers