3.1.7.1. MTfit.extensions.scatangleΒΆ
"""
scatangle.py
************
Extension for handling scatangle files (installed with the main module, but provides an example of an extension)
"""
# **Restricted: For Non-Commercial Use Only**
# This code is protected intellectual property and is available solely for teaching
# and non-commercially funded academic research purposes.
#
# Applications for commercial use should be made to Schlumberger or the University of Cambridge.
import os
import glob
import time
import logging
import operator
import numpy as np
from ..utilities.multiprocessing_helper import JobPool
logger = logging.getLogger('MTfit.extensions.scatangle')
try:
from . import cscatangle
except ImportError:
cscatangle = None
except Exception:
logger.exception('Error importing c extension')
cscatangle = None
try:
import argparse # noqa F401
_argparse = True # noqa F811
except Exception:
_argparse = False
def parse_scatangle(filename, number_location_samples=0, bin_size=0, _use_c=True):
"""
Read station angles scatter file
Reads the station angle scatter file. Expected format is given below. TakeOffAngle is 0 down (NED coordinate system).
The probabilities are read in, so if Oct-tree or metropolis sampling is used, the probability values should be set to one.
Args
filename: Angle scatter file name
location_samples:[Default=0] Number of samples to take from the full location PDF (0 means all samples)
bin_size:[Default=1] Bin size for sample weighting in degrees (if 0 no binning used) - reduces the number of samples required for well constrained locations.
Returns
Records,Probability: Angle Records and the probability for each sample.
Expected format is:
Probability
StationName Azimuth TakeOffAngle
StationName Azimuth TakeOffAngle
Probability
.
.
.
e.g.
504.7
S0271 231.1 154.7
S0649 42.9 109.7
S0484 21.2 145.4
S0263 256.4 122.7
S0142 197.4 137.6
S0244 229.7 148.1
S0415 75.6 122.8
S0065 187.5 126.1
S0362 85.3 128.2
S0450 307.5 137.7
S0534 355.8 138.2
S0641 14.7 120.2
S0155 123.5 117
S0162 231.8 127.5
S0650 45.9 108.2
S0195 193.8 147.3
S0517 53.7 124.2
S0004 218.4 109.8
S0588 12.9 128.6
S0377 325.5 165.3
S0618 29.4 120.5
S0347 278.9 149.5
S0529 326.1 131.7
S0083 223.7 118.2
S0595 42.6 117.8
S0236 253.6 118.6
502.7
S0271 233.1 152.7
S0649 45.9 101.7
S0484 25.2 141.4
S0263 258.4 120.7
"""
# Read file
with open(filename, 'r') as f:
station_file = f.readlines()
multipliers = []
sample_records = []
record = {'Name': [], 'Azimuth': [], 'TakeOffAngle': []}
multiplier = 1.0
# Loop over lines
for line in station_file:
if line.lstrip('\r') == '\n':
if len(record['Name']) and multiplier:
record['Azimuth'] = np.matrix(record['Azimuth']).T
record['TakeOffAngle'] = np.matrix(record['TakeOffAngle']).T
sample_records.append(record)
# Using multipliers therefore prob = 1
multipliers.append(multiplier)
record = {'Name': [], 'Azimuth': [], 'TakeOffAngle': []}
elif len(line.rstrip().rstrip('\r').split()) == 1:
try:
multiplier = float(line.rstrip().rstrip('\r'))
except Exception:
multiplier = 1.0
else:
record['Name'].append(line.split()[0])
record['Azimuth'].append(float(line.split()[1]))
record['TakeOffAngle'].append(float(line.rstrip().rstrip('\r').split()[2]))
if len(record['Name']):
record['Azimuth'] = np.matrix(record['Azimuth']).T
record['TakeOffAngle'] = np.matrix(record['TakeOffAngle']).T
sample_records.append(record)
multipliers.append(multiplier)
if number_location_samples and number_location_samples < len(sample_records):
try:
samples = np.random.choice(len(sample_records), number_location_samples, False)
except AttributeError:
i = 0
samples = np.array(list(set(np.random.randint(0, len(sample_records), len(sample_records)).tolist())))
while len(samples) < number_location_samples and i < 100:
samples = np.array(list(set(np.random.randint(0, len(sample_records), len(sample_records)).tolist())))
i += 1
if len(samples) < number_location_samples:
raise ValueError("Couldn't sample angle PDF")
else:
samples = samples[:len(sample_records)]
# Sample randomly from records, probability
sample_records = list(np.array(sample_records)[samples])
multipliers = list(np.array(multipliers)[samples])
new_multipliers = []
if bin_size:
old_size = len(sample_records)
if cscatangle and _use_c:
logger.info('C code used')
t0 = time.time()
sample_records, multipliers = cscatangle.bin_scatangle(sample_records, np.array(multipliers), bin_size)
logger.info('Elapsed time = {}'.format(time.time()-t0))
else:
logger.info('Python code used')
t0 = time.time()
for i, record in enumerate(sample_records):
if not (i+1) % 10:
logger.info('{} records completed'.format(i+1))
j = i+1
multiplier = multipliers[i]
while j < len(sample_records):
toa_diff = sample_records[j]['TakeOffAngle']-record['TakeOffAngle']
az_diff = sample_records[j]['Azimuth']-record['Azimuth']
if np.max(np.abs(toa_diff)) < bin_size/2.0 and np.max(np.abs(az_diff)) < bin_size/2.0:
multiplier += multipliers.pop(j)
sample_records.pop(j)
else:
j += 1
new_multipliers.append(multiplier)
multipliers = new_multipliers
logger.info('Elapsed time = {}'.format(time.time()-t0))
logger.info('{} degree binning reduced {} samples to {} samples.'.format(bin_size, old_size, len(sample_records)))
return sample_records, multipliers
def _output_scatangle(filename, samples, probabilities):
"""
Output scatangle file from samples and probabilities.
Args
filename: str name of file to output to.
samples: list of location samples.
probabilities: list of sample probabilities.
"""
output = []
# Loop over samples
for i, sample in enumerate(samples):
# Append multiplier (probability)
output.append(str(probabilities[i]))
# Loop over stations
for j, st in enumerate(sample['Name']):
output.append(st+'\t'+str(float(sample['Azimuth'][j]))+'\t'+str(float(sample['TakeOffAngle'][j])))
output.append('')
with open(filename, 'w') as f:
f.write('\n'.join(output))
def bin_scatangle(filename, number_location_samples=0, bin_size=1):
"""
Bin scatangle samples into bins of size given by the bin size argument, if all the differences in angles for each station between the two samples are within that range
Args
filename: str of filename to read.
number_location_samples:[0} integer number of location samples to sub-sample (0 means to use all).
bin_size:[1.0] float size of bin to stack samples over.
"""
# Parse scatangle and bin
sample_records, multipliers = parse_scatangle(filename, number_location_samples, bin_size)
old_filename = filename
# Add _bin_ to filename
filename = ('_bin_'+str(bin_size)).join(os.path.splitext(filename))
# Output to disk
_output_scatangle(filename, sample_records, multipliers)
return old_filename, filename
class BinScatangleTask(object):
"""
Scatangle Binning task
Bins and Saves scatangle file
Initialisation
Args
fid: Filename for MATLAB output.
number_location_samples: number_location_samples.
bin_size: bin size.
"""
def __init__(self, fid, number_location_samples=0, bin_size=1.0):
"""
Initialisation of MatlabOutputTask
Args
fid: Filename for MATLAB output.
output: Dictionary of output to be saved to fid.
"""
self.fid = fid
self.number_location_samples = number_location_samples
self.bin_size = bin_size
def __call__(self):
"""
Runs the MATLAB output task
Runs the MatlabOutputTask and returns a result code.
Returns
resultCode: 10 if successful, 20 if an exception is thrown.
"""
try:
return bin_scatangle(self.fid, self.number_location_samples, self.bin_size)
except Exception:
logger.exception('Scatangle Bin Error')
return 20
def bin_scatangle_files(files, number_location_samples=0, bin_scatangle_size=1.0, parallel=True, mpi=False, **kwargs):
"""
Bin scatangle samples into bins of size given by the bin size argument,
if all the differences in angles for each station between the two samples are within that range
Args
filename: str of filename to read.
number_location_samples:[0} integer number of location samples to sub-sample (0 means to use all).
bin_scatangle_size:[1.0] float size of bin to stack samples over.
parallel:[True] boolean to run in parallel using job pool (overridden by mpi option).
mpi: [False] boolean to run using MPI (ignores parallel flag).
Returns
new_files:list of new file names
"""
# check path and glob
if not isinstance(files, list) and os.path.isdir(files):
files = glob.glob('*.'+kwargs['angle_extension'])
elif not isinstance(files, list) and '*' in files:
files = glob.glob(files)
# Make sure the files are a list
if not isinstance(files, list):
files = [files]
# Set parallel flag
parallel = parallel and not len(files) == 1
# Get MPI parameters if running using MPI
if mpi:
try:
from mpi4py import MPI
comm = MPI.COMM_WORLD
except Exception:
mpi = False
# Otherwise create jobpool if running in parallel
elif parallel:
job_pool = JobPool(task=BinScatangleTask)
nworkers = len(job_pool)
if nworkers == 1:
job_pool.close()
parallel = False
new_files = files[:]
if mpi:
# Sort files into size lists
data = [[] for i in range(comm.Get_size())]
j = 0
for fn in files:
data[j].append(fn)
j = (j+1) % comm.Get_size()
# Loop over files for each process
for fn in data[comm.Get_rank()]:
# If the filename is not blank, then bin it.
if len(fn):
new_files[new_files.index(fn)] = bin_scatangle(fn, number_location_samples, bin_scatangle_size)[1]
end = True
# Wait for end
items = comm.gather(end, 0)
if comm.Get_rank() == 0:
for i in items:
assert i is True
elif parallel:
# Loop over pool running tasks
for fn in files:
job_pool.task(fn, number_location_samples, bin_scatangle_size)
# Get results
results = job_pool.all_results()
for new_fn in results:
new_files[new_files.index(new_fn[0])] = new_fn[1]
job_pool.close()
else:
# Run bin for each file
for fn in files:
new_files[new_files.index(fn)] = bin_scatangle(fn, number_location_samples, bin_scatangle_size)[1]
return new_files
def convert_scatangle_to_MATLAB(scatangle_file, fid=False, data_file=False):
"""
Converts scatangle file to MATLAB station distribution format
Converts the scatangle file to the station distribution format separately from the inversion.
Args
scatangle_file: Scatangle filename.
fid:[False] Output filename.
data_file:[False] Data file path (means that only stations with observations are outputted).
"""
from ..inversion import Inversion
# Parse scatangle file
X = parse_scatangle(scatangle_file)
if not fid:
fid = scatangle_file
if data_file:
# Check Stations for only those with observations
data = Inversion({'UID': 1}, parallel=False)._load(data_file)
original_samples = [u.copy() for u in X[0]]
stations = []
for key in [u for u in data.keys() if 'amplituderatio' in u.lower() or 'amplitude_ratio' in u.lower() or 'polarity' in u.lower()]:
stations.extend(data[key]['Stations']['Name'])
stations_set = set(stations)
location_samples = [u.copy() for u in original_samples]
selected_stations = list(set(location_samples[0]['Name']) & stations_set)
indices = [location_samples[0]['Name'].index(u) for u in selected_stations]
for i, sample in enumerate(location_samples):
sample['Name'] = operator.itemgetter(*indices)(sample['Name'])
sample['TakeOffAngle'] = sample['TakeOffAngle'][indices]
sample['Azimuth'] = sample['Azimuth'][indices]
# Output results
Inversion(data_file=data_file, parallel=False).__output__({}, fid=fid, location_samples=location_samples,
location_sample_multipliers=X[1], station_only=True)
else:
# Output results
Inversion({'UID': 1}, parallel=False).__output__({}, fid=fid, location_samples=X[0],
location_sample_multipliers=X[1], station_only=True)
def parser_check(parser, options, defaults):
flags = []
if options['bin_scatangle']:
if not options['location_pdf_file_path']:
options['location_pdf_file_path'] = glob.glob(options['data_file']+os.path.sep+'*'+options['angle_extension'])
if not isinstance(options['location_pdf_file_path'], list):
options['location_pdf_file_path'] = [options['location_pdf_file_path']]
flags = ['no_location_update']
return options, flags
# MTfit pkg_resources EntryPoint functions
PARSER_DEFAULTS = {
'bin_scatangle': False,
'bin_size': 1.0,
}
PARSER_DEFAULT_TYPES = {'bin_scatangle': [bool], 'bin_size': [float]}
def cmd_defaults():
return(PARSER_DEFAULTS, PARSER_DEFAULT_TYPES)
def cmd_opts(group, argparser=_argparse, defaults=PARSER_DEFAULTS):
"""
Adds parser group for scatangle arguments
Returns
group: argparse or optparse argument group
"""
if argparser:
group.add_argument("--bin-scatangle", "--binscatangle", "--bin_scatangle", action="store_true",
default=defaults['bin_scatangle'], help="Bin the scatangle file to reduce the number of samples [default=False]. --bin-size Sets the bin size parameter .",
dest="bin_scatangle")
group.add_argument("--bin-size", "--binsize", "--bin_size", type=float, default=defaults['bin_size'],
help="Sets the scatangle bin size parameter [default={}]".format(defaults['bin_size']), dest="bin_scatangle_size")
else:
group.add_option("--bin-scatangle", "--binscatangle", "--bin_scatangle", action="store_true",
default=defaults['bin_scatangle'], help="Bin the scatangle file to reduce the number of samples [default=False]. --bin-size Sets the bin size parameter .",
dest="bin_scatangle")
group.add_option("--bin-size", "--binsize", "--bin_size", type=float, default=defaults['bin_size'],
help="Sets the scatangle bin size parameter [default={}]".format(defaults['bin_size']), dest="bin_scatangle_size")
return group, parser_check
def pre_inversion(**kwargs):
if kwargs.get('bin_scatangle', False):
try:
kwargs['location_pdf_file_path'] = bin_scatangle_files(kwargs.get('location_pdf_file_path'), **kwargs)
kwargs.pop('number_location_samples')
except Exception:
pass
return kwargs
def location_pdf_parser(*args, **kwargs):
return parse_scatangle(*args, **kwargs)
There are also Cython functions:
# cython: infer_types=True
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
# **Restricted: For Non-Commercial Use Only**
# This code is protected intellectual property and is available solely for teaching
# and non-commercially funded academic research purposes.
#
# Applications for commercial use should be made to Schlumberger or the University of Cambridge.
cimport cython
import numpy as np
cimport numpy as np
from cython.view cimport array as cvarray
from cpython cimport bool
# DTYPE=np.float64
# ctypedef np.float64_t DTYPE_t
ctypedef double DTYPE_t
ctypedef long long LONG
# ctypedef long long
from libc.stdlib cimport rand, RAND_MAX
IF UNAME_SYSNAME == "Windows":
from libc.math cimport HUGE_VAL as inf
ELSE:
from libc.math cimport INFINITY as inf
from libc.math cimport fmax
from libc.math cimport fmin
from libc.math cimport erf
from libc.math cimport sqrt
from libc.math cimport exp
from libc.math cimport log
from libc.math cimport fabs
from libc.math cimport cos
from libc.math cimport sin
from libc.math cimport M_PI as pi
from libc.math cimport M_SQRT2 as sqrt2
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.cdivision(True)
cdef DTYPE_t[::1] get_multipliers(DTYPE_t[:,:,::1] angles,DTYPE_t bin_size,DTYPE_t [::1] multipliers) nogil:
cdef Py_ssize_t umax=angles.shape[0]
cdef Py_ssize_t nsta=angles.shape[2]
cdef Py_ssize_t u
cdef Py_ssize_t w
cdef int ok=1
for u in range(umax):
if multipliers[u]==-1:
continue
for v in range(u+1,umax):
if multipliers[v]>-1:
ok=1
for w in range(nsta):
ok=ok*(fabs(angles[u,0,w]-angles[v,0,w])<bin_size/2.)*(fabs(angles[u,1,w]-angles[v,1,w])<bin_size/2.)
if ok==0:
break
if w==nsta-1 and ok>0:
multipliers[u]+=multipliers[v]
if v>u:
multipliers[v]=-1
return multipliers
cpdef bin_scatangle(sample_records,multipliers,bin_size):
cdef DTYPE_t[:,:,::1] angles=np.empty((len(sample_records),2,len(sample_records[0]['Name'])))
cdef Py_ssize_t i
cdef Py_ssize_t j
new_records=[]
new_multipliers=[]
for i,record in enumerate(sample_records):
# print i,record
for j in range(len(record['Name'])):
angles[i,0,j]=record['TakeOffAngle'][j,0]
angles[i,1,j]=record['Azimuth'][j,0]
# print multipliers,bin_size,angles
multipliers=np.asarray(get_multipliers(angles,bin_size,multipliers)).flatten()
for i,record in enumerate(sample_records):
if multipliers[i]>0:
new_records.append(record)
new_multipliers.append(multipliers[i])
return new_records,new_multipliers