import datetime
import os
import pathlib
import re
import pandas as pd
from flexiznam.schema.datasets import Dataset
[docs]class ScanimageData(Dataset):
DATASET_TYPE = 'scanimage'
[docs] @staticmethod
def from_folder(folder, verbose=True, mouse=None, session=None, recording=None,
flm_session=None):
"""Create a scanimage dataset by loading info from folder"""
fnames = [f for f in os.listdir(folder) if f.endswith(('.csv', '.tiff', '.tif'))]
tif_files = [f for f in fnames if f.endswith(('.tif', '.tiff'))]
csv_files = [f for f in fnames if f.endswith('.csv')]
if not tif_files:
raise IOError('Cannot find any tif file')
# scanimage files finish with _acqnum_filenum.tif. All files with the same
# filename until acqnum are grouped together
pattern = r'(.*)_(\d*)_(\d*).tiff?'
matches = [re.match(pattern, f) for f in tif_files]
if verbose:
non_si_tiff = {f for f, m in zip(tif_files, matches) if not m}
if non_si_tiff:
print('Found %d tif files that are NOT scanimage data.' %
len(non_si_tiff))
for s in non_si_tiff:
print(' %s' % s)
tif_df = [dict(filename=f,
fname=m.groups()[0],
acq_num=m.groups()[1],
file_num=m.groups()[2])
for f, m in zip(tif_files, matches) if m]
tif_df = pd.DataFrame(tif_df)
tif_df['acq_identifier'] = tif_df.fname + tif_df.acq_num
output = {}
matched_csv = set()
for acq_id, acq_df in tif_df.groupby('acq_identifier'):
# find if there is any corresponding csv
fname = acq_df.fname.iloc[0]
acq_num = acq_df.acq_num.iloc[0]
associated_csv = {f for f in csv_files if f.startswith(fname) and
f.endswith(acq_num + '.csv')}
if associated_csv in matched_csv:
raise IOError('A csv file matched with 2 scanimage tif datasets')
matched_csv.update(associated_csv)
associated_csv = {f[len(fname):-(len(acq_num) + 4)].strip('_'): f for f in
associated_csv}
example_tif = pathlib.Path(folder) / acq_df.filename.iloc[0]
created = datetime.datetime.fromtimestamp(example_tif.stat().st_mtime)
output[acq_id] = ScanimageData(path=folder,
tif_files=list(acq_df.filename.values),
csv_files=associated_csv,
created=created.strftime('%Y-%m-%d %H:%M:%S'),
flm_session=flm_session)
for field in ('mouse', 'session', 'recording'):
setattr(output[acq_id], field, locals()[field])
output[acq_id].dataset_name = acq_id
if verbose:
unmatched = set(csv_files) - matched_csv
if unmatched and verbose:
print('%d csv files did not match any scanimage acquisition:' % len(unmatched))
for m in unmatched:
print(' %s' % m)
return output
[docs] def from_flexilims(project=None, name=None, data_series=None, flm_session=None):
"""Create a camera dataset from flexilims entry"""
raise NotImplementedError
def __init__(self, path, name=None, tif_files=None, csv_files=None,
extra_attributes=None, created=None, project=None, is_raw=True,
flm_session=None):
"""Create a Scanimage dataset
Args:
name: Identifier. Unique name on flexilims. When imported from folder,
default to the acquisition name
path: Path to the folder containing all the files
tif_files: List of file names associated with this dataset
csv_files: Dictionary of csv files associated to the binary file. Keys are
identifier provided for convenience, values are the full file name
extra_attributes: Other optional attributes (from or for flexilims)
created: Date of creation. Default to the creation date of a tif file
project: name of hexadecimal id of the project to which the dataset belongs
is_raw: default to True. Is it processed data or raw data?
flm_session: authentication session for connecting to flexilims
"""
super().__init__(name=name, path=path, is_raw=is_raw,
dataset_type=ScanimageData.DATASET_TYPE,
extra_attributes=extra_attributes, created=created,
project=project, flm_session=flm_session)
self.csv_files = csv_files
self.tif_files = tif_files
@property
def tif_files(self):
"""List of tif files, sorted alphabetically (automatically)"""
return self._tif_files
@tif_files.setter
def tif_files(self, value):
if value is None:
self._tif_files = None
return
if isinstance(value, str):
value = [value]
value = list(sorted(value))
if not self.is_valid(tif_files=value):
raise IOError('One or more file do not exist. Set self._tif_files if you want'
' to skip check')
self._tif_files = value
[docs] def is_valid(self, tif_files=None):
"""Check that associated files exist"""
if tif_files is None:
tif_files = self.tif_files
# checking file one by one is long, compare sets
tif_files = set(tif_files)
existing_file = {f for f in os.listdir(self.path) if f.endswith(('tif', '.tiff'))}
if tif_files - existing_file:
return False
for _, file_path in self.csv_files.items():
if not (pathlib.Path(self.path) / file_path).exists():
return False
return True
def __len__(self):
"""Number of tif files in the dataset"""
return len(self.tif_files)