Blame | Last modification | View Log | Download | RSS feed
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import scipy
import netCDF4
import datetime
import magic
import copy
'''
TODO :
-Make write_netcdf work when usedatetime=False
'''
class Tra(object):
"""
Class to work with LAGRANTO output
Read trajectories from a LAGRANTO file and return a structured numpy array
filename : file containing lagranto trajectories
return : structured numpy array traj(ntra,ntime) with variables as tuple.
Example:
trajs = Tra(filename)
trajs['lon'][0,:] : return the longitudes for the first trajectory.
Author : Nicolas Piaget, ETH Zurich , 2014
Sebastiaan Crezee, ETH Zurich , 2014
"""
read_format = {
'NetCDF Data Format data': 'netcdf',
'ASCII text': 'ascii',
}
def __init__(self, filename, typefile=None, usedatetime=True):
"""
Read a file based on its type.
By default typefile is determined automatically,
but it can also be specified explicitly
as a string argument:'netcdf' or 'ascii'
"""
if typefile is None:
typefile = magic.from_file(filename)
error = "Unkown fileformat. Known formats are {}".format(
":".join(self.read_format.keys())
)
typefile = self.read_format[typefile]
else:
error = "Unkown fileformat. Known formats are ascii or netcdf"
try:
function = '_read_{}'.format(typefile)
self._array = globals()[function](filename,
usedatetime=usedatetime)
except IOError:
raise IOError(error)
def __len__(self):
return len(self._array)
def __getattr__(self, attr):
if attr in self.__dict__:
return getattr(self, attr)
return getattr(self._array, attr)
def __getitem__(self, key):
return self._array[key]
def __setitem__(self, key, item):
if type(key) is slice:
self._array = item
else:
formats = self._array.dtype.descr + [(key, item.dtype.descr[0][1])]
formats = [(form[0].encode('ascii', 'ignore'),
form[1]) for form in formats]
newarr = scipy.zeros(self._array.shape, dtype=formats)
for var in self.variables:
newarr[var] = self._array[var]
newarr[key] = item
self._array = newarr
def __repr__(self):
string = " \
{} trajectories with {} time steps. \n \
Available fields: {}\n \
total duration: {} minutes".format(
self.ntra, self.ntime,
"/".join(self.variables),
self.duration
)
return string
@property
def ntra(self):
if self.ndim < 2:
print(" \
Be careful with the dimensions, \
you may want to change the shape: \n \
either shape + (1,) or (1,)+shape \
")
return None
return self.shape[0]
@property
def ntime(self):
if self.ndim < 2:
print(" \
Be careful with the dimensions,\
you may want to change the shape: \n \
either shape + (1,) or (1,)+shape \
")
return None
return self.shape[1]
@property
def variables(self):
return list(self.dtype.names)
@property
def duration(self):
""" time duration in minutes """
end = self['time'][0, -1]
end = end.astype(datetime.datetime)
start = self['time'][0, 0]
start = start.astype(datetime.datetime)
delta = end - start
return delta.total_seconds() / 60.
@property
def initial(self):
""" give the initial time of the trajectories """
starttime = self['time'][0, 0]
return starttime
def set_array(self, array):
""" To change the trajectories array """
self._array = array
def concatenate(self, trajs):
""" To concatenate trajectories together
return Tra object
"""
if type(trajs) is not tuple:
trajs = (trajs,)
trajstuple = tuple(tra._array for tra in trajs)
trajstuple += (self._array,)
newtrajs = copy.copy(self)
test = scipy.concatenate(trajstuple)
newtrajs._array = test
return newtrajs
def write(self, filename, fileformat='netcdf'):
globals()['_write_{}'.format(fileformat)](self, filename)
# -----------------------------------------------------------------------------
# read a netcdf lsl file
# -----------------------------------------------------------------------------
def _read_netcdf(filename, usedatetime):
""" Read a netcdf lsl file """
# read the netcdf, the formats and the variables names
ncfile = netCDF4.Dataset(filename)
if usedatetime:
formats = [var[1].dtype if var[0] != 'time' else 'datetime64[s]'
for var in list(ncfile.variables.items())]
else:
formats = [var[1].dtype for var in list(ncfile.variables.items())]
variables = list(ncfile.variables.keys())
# set the numbers of trajectories and time step
try:
ntra = len(ncfile.dimensions['dimx_lon'])
except:
try:
ntra = len(ncfile.dimensions['id'])
except:
try:
ntra = len(ncfile.dimensions['ntra'])
except:
raise Exception('Cannot read the number of trajectories,\
not one of dimx_lon, id or ntra')
try:
ntime = len(ncfile.dimensions['time'])
except:
ntime = len(ncfile.dimensions['ntim'])
# change names of the coordinates if necessary
nvariables = list(variables)
if "longitude" in variables:
nvariables[variables.index("longitude")] = "lon"
if "latitude" in variables:
nvariables[variables.index("latitude")] = "lat"
# create a structured array using numpy
array = scipy.zeros((ntra, ntime),
dtype={'names': nvariables, 'formats': formats})
for variable in variables:
if variable == 'BASEDATE':
continue
nvariable = variable
if variable == 'longitude':
nvariable = 'lon'
if variable == 'latitude':
nvariable = 'lat'
if len(ncfile.variables[variable].shape) == 4:
vardata = ncfile.variables[variable][:, 0, 0, :][:].T
else:
vardata = ncfile.variables[variable][:].T
vardata[vardata < -990] = scipy.nan
array[nvariable] = vardata
# read the starting date and duration
try:
date = [int(i) for i in ncfile.variables['BASEDATE'][0, 0, 0, :]]
starttime = datetime.datetime(date[0], date[1],
date[2], date[3], date[4])
except:
starttime = datetime.datetime(ncfile.ref_year,
ncfile.ref_month,
ncfile.ref_day,
ncfile.ref_hour,
ncfile.ref_min)
# find characteristic of trajectories
timestep = ncfile.variables['time'][1] - ncfile.variables['time'][0]
period = ncfile.variables['time'][-1] - ncfile.variables['time'][0]
# True if time = hh.mm
cond1 = 0.00 < (ncfile.variables['time'][:] % 1).max() <= 0.60
if cond1:
timestep = scipy.floor(timestep) + ((timestep % 1) / .60)
timestep = scipy.around(timestep, 6)
period = scipy.floor(period) + ((period % 1) / .60)
period = scipy.around(period, 6)
# transform the times into datetime object
# special treatment for online trajectories (time given in minutes)
if usedatetime:
try:
time = scipy.array(
[scipy.datetime64(starttime + datetime.timedelta(hours=t))
for t in scipy.arange(0, period + timestep, timestep)]
)
except AttributeError:
time = scipy.array(
[scipy.datetime64(starttime + datetime.timedelta(seconds=t))
for t in scipy.arange(0, period + timestep, timestep)]
)
time.shape = (1,) + time.shape
time = time.repeat(array.shape[0], axis=0)
array['time'] = time
ncfile.close()
return array
# ------------------------------------------------------------------------------
# read an ASCII lsl file
# -----------------------------------------------------------------------------
def _read_ascii(filename, usedatetime, nhead=5):
""" Read a lsl file from ASCII """
# open the file
open_file = open(filename, 'r')
# get the header
file_lines = open_file.readlines()
nvariables = file_lines[2].strip().split()
head = file_lines[0].split()
# read starting time new and old formats
try:
starttime = datetime.datetime.strptime(head[2], '%Y%m%d_%H%M')
except ValueError:
try:
starttime = datetime.datetime.strptime(
head[2] + '_' + head[3], '%Y%m%d_%H'
)
except:
print("Warning: could not retrieve starttime from header,\
setting to default value ")
starttime = datetime.datetime(1970, 1, 1)
if usedatetime:
dtypes = ['f8' if var != 'time' else datetime.datetime
for var in nvariables]
else:
dtypes = ['f8' for var in nvariables]
# read the content as numpy array
array = scipy.genfromtxt(filename,
dtype=dtypes,
names=nvariables,
skip_header=nhead,
missing_values=-999.99)
# find characteristic of trajectories
timestep = float(array[1][0]) - float(array[0][0])
period = float(array[-1][0]) - float(array[0][0])
# Convert minutes to decimal hours
if max((array['time'].astype(float)) % 1) <= 0.60:
timestep = scipy.floor(timestep) + ((timestep % 1) / .60)
period = scipy.floor(period) + ((period % 1) / .60)
# period/timestep gives strange offset (related to precision??)
# so use scipy.around
ntime = int(1 + scipy.around(period / timestep))
ntra = int(array.size / ntime)
# reshape traj file
array = scipy.reshape(array, (ntra, ntime))
if usedatetime:
# change time into datetime object
time = scipy.array(
[scipy.datetime64(starttime + datetime.timedelta(hours=t))
for t in scipy.arange(0, period + timestep, timestep)]
)
time.shape = (1,) + time.shape
time = time.repeat(array.shape[0], axis=0)
array['time'] = 0
newdtypes = [descr if descr[0] != 'time' else ('time', 'datetime64[s]')
for descr in array.dtype.descr]
array = array.astype(newdtypes)
array['time'] = time
return array
# ------------------------------------------------------------------------------
# write trajectories to a netcdf file
# ------------------------------------------------------------------------------
def _write_netcdf(Tra, filename):
ncfile = netCDF4.Dataset(filename, 'w', format='NETCDF3_CLASSIC')
ncfile.createDimension('ntra', Tra.ntra)
ncfile.createDimension('ntim', Tra.ntime)
ncfile.ref_year, ncfile.ref_month, ncfile.ref_day, ncfile.ref_hour,\
ncfile.ref_min = Tra.initial.astype(datetime.datetime).timetuple()[0:5]
vararray = ncfile.createVariable('time', 'f4', ('ntim', ))
delta = Tra['time'][0, :] - Tra.initial
time = [int(a.astype(datetime.datetime).total_seconds() / 3600) +
(a.astype(datetime.datetime).total_seconds() -
int(a.astype(datetime.datetime).total_seconds() / 3600) * 3600)
/ 60 * 0.01 for a in delta]
vararray[:] = time
# add variables
for var in Tra.variables:
if var == 'time':
continue
vararray = ncfile.createVariable(var, Tra[var].dtype, ('ntim', 'ntra'))
vararray[:] = Tra[var].T
ncfile.close()