Source code for flexiznam.schema.scanimage_data

import datetime
import os
import pathlib
import re

import pandas as pd
from flexiznam.schema.datasets import Dataset


[docs]class ScanimageData(Dataset): DATASET_TYPE = 'scanimage'
[docs] @staticmethod def from_folder(folder, verbose=True, mouse=None, session=None, recording=None, flm_session=None): """Create a scanimage dataset by loading info from folder""" fnames = [f for f in os.listdir(folder) if f.endswith(('.csv', '.tiff', '.tif'))] tif_files = [f for f in fnames if f.endswith(('.tif', '.tiff'))] csv_files = [f for f in fnames if f.endswith('.csv')] if not tif_files: raise IOError('Cannot find any tif file') # scanimage files finish with _acqnum_filenum.tif. All files with the same # filename until acqnum are grouped together pattern = r'(.*)_(\d*)_(\d*).tiff?' matches = [re.match(pattern, f) for f in tif_files] if verbose: non_si_tiff = {f for f, m in zip(tif_files, matches) if not m} if non_si_tiff: print('Found %d tif files that are NOT scanimage data.' % len(non_si_tiff)) for s in non_si_tiff: print(' %s' % s) tif_df = [dict(filename=f, fname=m.groups()[0], acq_num=m.groups()[1], file_num=m.groups()[2]) for f, m in zip(tif_files, matches) if m] tif_df = pd.DataFrame(tif_df) tif_df['acq_identifier'] = tif_df.fname + tif_df.acq_num output = {} matched_csv = set() for acq_id, acq_df in tif_df.groupby('acq_identifier'): # find if there is any corresponding csv fname = acq_df.fname.iloc[0] acq_num = acq_df.acq_num.iloc[0] associated_csv = {f for f in csv_files if f.startswith(fname) and f.endswith(acq_num + '.csv')} if associated_csv in matched_csv: raise IOError('A csv file matched with 2 scanimage tif datasets') matched_csv.update(associated_csv) associated_csv = {f[len(fname):-(len(acq_num) + 4)].strip('_'): f for f in associated_csv} example_tif = pathlib.Path(folder) / acq_df.filename.iloc[0] created = datetime.datetime.fromtimestamp(example_tif.stat().st_mtime) output[acq_id] = ScanimageData(path=folder, tif_files=list(acq_df.filename.values), csv_files=associated_csv, created=created.strftime('%Y-%m-%d %H:%M:%S'), flm_session=flm_session) for field in ('mouse', 'session', 'recording'): setattr(output[acq_id], field, locals()[field]) output[acq_id].dataset_name = acq_id if verbose: unmatched = set(csv_files) - matched_csv if unmatched and verbose: print('%d csv files did not match any scanimage acquisition:' % len(unmatched)) for m in unmatched: print(' %s' % m) return output
[docs] def from_flexilims(project=None, name=None, data_series=None, flm_session=None): """Create a camera dataset from flexilims entry""" raise NotImplementedError
def __init__(self, path, name=None, tif_files=None, csv_files=None, extra_attributes=None, created=None, project=None, is_raw=True, flm_session=None): """Create a Scanimage dataset Args: name: Identifier. Unique name on flexilims. When imported from folder, default to the acquisition name path: Path to the folder containing all the files tif_files: List of file names associated with this dataset csv_files: Dictionary of csv files associated to the binary file. Keys are identifier provided for convenience, values are the full file name extra_attributes: Other optional attributes (from or for flexilims) created: Date of creation. Default to the creation date of a tif file project: name of hexadecimal id of the project to which the dataset belongs is_raw: default to True. Is it processed data or raw data? flm_session: authentication session for connecting to flexilims """ super().__init__(name=name, path=path, is_raw=is_raw, dataset_type=ScanimageData.DATASET_TYPE, extra_attributes=extra_attributes, created=created, project=project, flm_session=flm_session) self.csv_files = csv_files self.tif_files = tif_files @property def tif_files(self): """List of tif files, sorted alphabetically (automatically)""" return self._tif_files @tif_files.setter def tif_files(self, value): if value is None: self._tif_files = None return if isinstance(value, str): value = [value] value = list(sorted(value)) if not self.is_valid(tif_files=value): raise IOError('One or more file do not exist. Set self._tif_files if you want' ' to skip check') self._tif_files = value
[docs] def is_valid(self, tif_files=None): """Check that associated files exist""" if tif_files is None: tif_files = self.tif_files # checking file one by one is long, compare sets tif_files = set(tif_files) existing_file = {f for f in os.listdir(self.path) if f.endswith(('tif', '.tiff'))} if tif_files - existing_file: return False for _, file_path in self.csv_files.items(): if not (pathlib.Path(self.path) / file_path).exists(): return False return True
def __len__(self): """Number of tif files in the dataset""" return len(self.tif_files)