""" Identifier: KSC-SJ4-csst_common/data_manager.py Name: data_manager.py Description: file path generator Author: Bo Zhang Created: 2022-09-13 Modified-History: 2022-09-13, Bo Zhang, created 2022-09-13, Bo Zhang, added CsstMbiDataManager 2022-09-29, Bo Zhang, favor CsstMsDataManager instead of CsstMbiDataManager 2022-10-26, Bo Zhang, reconstruct CsstMsDataManager, deprecate CsstMbiDataManager """ import os import glob import re from astropy.io import fits from astropy.table import Table from .params import CSST_PARAMS as CP class CsstMsDataManager: """ CSST MS data manager, including MBI and SLS. ``CsstMsDataManager`` provides an interface to switch between DFS and local file system. To initialize ``CsstMsDataManager`` from local directory, use ``CsstMsDataManager.from_dir()`` To initialize ``CsstMsDataManager`` on ``dandelion`` or ``PM node``, ``CsstMsDataManager.quickstart()``. To initialize ``CsstMsDataManager`` from DFS, use ``CsstMsDataManager.from_dfs()``. To generate L0 and L1 file paths, use ``CsstMsDataManager.l0_detector()``, ``CsstMsDataManager.l1_detector()``, etc. Here are some examples for simulation with different versions. C3: MSC_MS_210525220000_100000020_06_raw.fits MSC_CRS_210525220000_100000020_06_raw.fits MSC_210525120000_0000020_06.cat C5.1: CSST_MSC_MS_SCI_20270810081950_20270810082220_100000100_06_L0_1.fits CSST_MSC_MS_CRS_20270810081950_20270810082220_100000100_06_L0_1.fits MSC_10000100_chip_06_filt_y.cat MSC_10000100_chip_06_filt_y.log C5.2 CSST_MSC_MS_SCI_20270810081950_20270810082220_100000100_06_L0_1.fits CSST_MSC_MS_CRS_20270810081950_20270810082220_100000100_06_L0_1.fits MSC_100000100_chip_06_filt_y.cat MSC_100000100_chip_06_filt_y.log Parameters ---------- ver_sim : str The version of simulation data, see ``csst_common.params.CP``. datatype : str The options are {"mbi", "sls", "all"}. The "all" option is used for QC in particular. Note that in this case methods like ``get_bias`` are unavailable. available_detectors : list The list of available detector serial numbers of available images. target_detectors : list The list of target detector serial numbers of available images. dir_l0 : str The L0 directory. dir_l1 : str The L1 directory. path_aux : str The aux data directory (bias, flat, dark). dir_pcref : str The position calibration reference data directory. Will be removed in the next version. _exp_id : int The exposure ID. _exp_start : int The exposure start time in ``yyyymmddhhmmss`` format. _exp_stop : int The exposure start time in ``yyyymmddhhmmss`` format. _telescope : str The telescope name. Defaults to ``CSST`` for C5.2 simulation. _instrument : str The instrument name. Defaults to ``MSC`` for C5.2 simulation. _survey : str The survey name. Defaults to ``MS`` for C5.2 simulation. _imagetype : str The image type signature for science images. Defualts to ``SCI`` for C5.2 simulation. _l0_post : str The postfix. Defaults to ``L0_1`` for C5.2 simulation. Examples -------- >>> dm_mbi = CsstMsDataManager(...) >>> # access L0 directory >>> dm_mbi.dir_l0 >>> # access L1 directory >>> dm_mbi.dir_l1 >>> # access dir_pcref >>> dm_mbi.dir_pcref >>> # access path_aux >>> dm_mbi.path_aux >>> # access ver_sim >>> dm_mbi.ver_sim >>> # access target detectors >>> dm_mbi.target_detectors >>> # access available detectors >>> dm_mbi.available_detectors >>> # define an L1 file (detector-specified) >>> dm_mbi.l1_detector(detector=6) >>> # define an L1 file (non-detector-specified) >>> dm_mbi.l1_file("flipped_image.fits") """ def __init__(self, ver_sim="C5.2", datatype="mbi", available_detectors=None, target_detectors=None, dir_l0=".", dir_l1=".", path_aux="", # bias dark flat dir_pcref="", # deprecated _exp_id=100000100, _exp_start=20270810081950, _exp_stop=20270810082220, _telescope="CSST", _instrument="MSC", _survey="MS", _imagetype="SCI", _l0_post="L0_1", ): # version assert ver_sim in CP["sim"]["versions"] self.ver_sim = ver_sim # datatype, valid_detectors, detector2filter assert datatype in ["mbi", "sls", "all"] self.datatype = datatype if datatype == "mbi": # MBI self.valid_detectors = CP["mbi"]["detectors"] self.detector2filter = CP["mbi"]["detector2filter"] elif datatype == "sls": # SLS self.valid_detectors = CP["sls"]["detectors"] self.detector2filter = CP["sls"]["detector2filter"] else: # ALL self.valid_detectors = CP["all"]["detectors"] self.detector2filter = CP["all"]["detector2filter"] # available_detectors self.available_detectors = available_detectors # set all available detectors by default self.target_detectors = [] self.set_detectors(target_detectors) # exposure info self._exp_id = _exp_id self._exp_start = _exp_start self._exp_stop = _exp_stop # file name components self._telescope = _telescope self._instrument = _instrument self._survey = _survey self._imagetype = _imagetype self._l0_post = _l0_post # data directory self.dir_l0 = dir_l0 self.dir_l1 = dir_l1 self.dir_pcref = dir_pcref self.path_aux = path_aux # record hard code names in history self.hardcode_history = [] @staticmethod def from_dfs(ver_sim="C5.2", data_type="mbi", exp_id=10000100, dir_l1="."): raise NotImplementedError("from_dfs is currently not available!") @staticmethod def from_dir(ver_sim="C5.2", datatype="mbi", dir_l0=".", dir_l1=".", dir_pcref="", path_aux=""): """ initialize the multi-band imaging data manager """ assert ver_sim in ["C5.2", ] # glob files fps_img = CsstMsDataManager.glob_image(dir_l0, ver_sim=ver_sim) if len(fps_img) == 0: raise FileNotFoundError(f"No file found in dir_l0: {dir_l0}") # available detectors available_detectors = [int(re.split(r"[_.]", fp)[7]) for fp in fps_img] available_detectors.sort() # parse info _telescope, _instrument, _survey, _imagetype, \ _exp_start, _exp_stop, _exp_id, \ _detector, *_l0_post, _ext = re.split(r"[_.]", fps_img[0]) _exp_start = int(_exp_start) _exp_stop = int(_exp_stop) _exp_id = int(_exp_id) return CsstMsDataManager(ver_sim=ver_sim, datatype=datatype, available_detectors=available_detectors, target_detectors=None, dir_l0=dir_l0, dir_l1=dir_l1, path_aux=path_aux, # bias dark flat dir_pcref=dir_pcref, # deprecated _exp_id=_exp_id, _exp_start=_exp_start, _exp_stop=_exp_stop, _telescope=_telescope, _instrument=_instrument, _survey=_survey, _imagetype=_imagetype, _l0_post="_".join(_l0_post), ) @staticmethod def glob_image(dir_l0, ver_sim="C5"): """ glob files in L0 data directory """ if ver_sim == "C3": pattern = os.path.join(dir_l0, "MSC_MS_*_raw.fits") else: assert ver_sim in ["C5.1", "C5.2"] pattern = os.path.join(dir_l0, "CSST_MSC_MS_SCI_*.fits") fps = glob.glob(pattern) fps = [os.path.basename(fp) for fp in fps] fps.sort() print("@DM.glob_dir: {} files found with pattern: {}".format(len(fps), pattern)) return fps @staticmethod def glob_cat(dir_l0, ver_sim="C5"): """ glob input catalogs in L0 data directory """ assert ver_sim in ["C5.1", "C5.2"] pattern = os.path.join(dir_l0, "MSC_*.cat") fps = glob.glob(pattern) fps = [os.path.basename(fp) for fp in fps] fps.sort() print("@DM.glob_dir: {} files found with pattern: {}".format(len(fps), pattern)) return fps def l0_cat(self, detector=6): """ the L0 cat file path""" assert self.ver_sim == "C5.2" fn = "{}_{}_chip_{:02d}_filt_{}.cat".format( self._instrument, self._exp_id, detector, self.detector2filter[detector]) return os.path.join(self.dir_l0, fn) def l0_log(self, detector=6): """ L0 log file path """ assert self.ver_sim == "C5.2" fn = "{}_{}_chip_{:02d}_filt_{}.log".format( self._instrument, self._exp_id, detector, self.detector2filter[detector]) return os.path.join(self.dir_l0, fn) def l0_detector(self, detector=6): """ L0 detector-specific image file path """ assert self.ver_sim in ["C5.1", "C5.2"] fn = "{}_{}_{}_SCI_{}_{}_{}_{:02d}_L0_1.fits".format( self._telescope, self._instrument, self._survey, self._exp_start, self._exp_stop, self._exp_id, detector) return os.path.join(self.dir_l0, fn) def l0_crs(self, detector=6): """ L0 cosmic ray file path """ assert self.ver_sim in ["C5.1", "C5.2"] fn = "{}_{}_{}_CRS_{}_{}_{}_{:02d}_L0_1.fits".format( self._telescope, self._instrument, self._survey, self._exp_start, self._exp_stop, self._exp_id, detector) return os.path.join(self.dir_l0, fn) def l1_detector(self, detector=6, post="img.fits"): """ generate L1 file path Parameters ---------- detector: detector ID post: postfix e.g, {"img.fits", "wht.fits", "flg.fits", "img_L1.fits", "wht_L1.fits", "flg_L1.fits"} Returns ------- L1 file path """ assert self.ver_sim in ["C5.1", "C5.2"] fn = "{}_{}_{}_SCI_{}_{}_{}_{:02d}_{}".format( self._telescope, self._instrument, self._survey, self._exp_start, self._exp_stop, self._exp_id, detector, post) return os.path.join(self.dir_l1, fn) def set_detectors(self, detectors=None): """ set target detector """ if detectors is None: # default detectors self.target_detectors = self.available_detectors else: try: # assert detectors is a subset of available detectors assert set(detectors).issubset(set(self.available_detectors)) self.target_detectors = list(detectors) except AssertionError as ae: print("@DM: available detectors are ", self.available_detectors) print("@DM: target detectors are ", detectors) print("@DM: final target detectors are ", set(detectors) & set(self.available_detectors)) # raise ae self.target_detectors = list(set(detectors) & set(self.available_detectors)) print("final target detector IDs are ", self.target_detectors) return def get_bias(self, detector=6): """ get bias data """ if self.datatype == "mbi": fp = glob.glob(self.path_aux.format("CLB", detector))[0] return fits.getdata(fp) else: return self.path_aux.format(detector, "bias") def get_dark(self, detector=6): """ get dark data """ if self.datatype == "mbi": fp = glob.glob(self.path_aux.format("CLD", detector))[0] return fits.getdata(fp) else: return self.path_aux.format(detector, "dark") def get_flat(self, detector=6): """ get flat data """ if self.datatype == "mbi": fp = glob.glob(self.path_aux.format("CLF", detector))[0] return fits.getdata(fp) else: return self.path_aux.format(detector, "flat") def l1_file(self, name="", comment=""): """ L1 file path Parameters ---------- name : str file name comment : str use the function name plz Returns ------- fp: str the synthetic file path """ fp = os.path.join(self.dir_l1, name) # record hardcode history self.hardcode_history.append(dict(hdcd=fp, comment=comment)) return fp def get_sls_info(self, use_dfs=False): if use_dfs: raise NotImplementedError() else: assert len(self.target_detectors) == 1 header = fits.getheader(self.l0_detector(self.target_detectors[0]), ext=1) return header def get_mbi_info(self, use_dfs=False): if use_dfs: raise NotImplementedError() else: info = Table.read("/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/slitlessSpectroscopy/t_mbi_l1.fits") return info @staticmethod def quickstart(ver_sim="C5.2", datatype="mbi", dir_l1=".", exposure_id=100): """Quick dataset generator for tests on dandelion or PMO Parameters ---------- ver_sim : str {"C5.2"} datatype : str {"mbi", "sls"} dir_l1 : str output directory exposure_id : int The serial number of the exposure. 20-154 for C5.2. Returns ------- CsstMsDataManager The Main Survey Data Manager instance. """ assert datatype in ["mbi", "sls"] # auto identify node name hostname = os.uname()[1] assert hostname in ["dandelion", "ubuntu"] # dandelion if hostname == "dandelion" and datatype == "mbi": dir_l0 = "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/multipleBandsImaging/" \ "NGP_AstrometryON_shearOFF/MSC_{:07d}/".format(exposure_id) path_aux = "/nfsdata/users/cham/L1Test/ref_C5.2/MSC_{}_*_{:02d}_combine.fits" dir_pcref = "/nfsdata/users/csstpipeline/L1Pipeline/msc/gaia_dr3/" elif hostname == "dandelion" and datatype == "sls": dir_l0 = "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/slitlessSpectroscopy/" \ "NGP_AstrometryON_shearOFF_Spec/MSC_{:07d}/".format(exposure_id) path_aux = "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/slitlessSpectroscopy/csst_{:02d}{}.fits" dir_pcref = "" # PMO elif hostname == "ubuntu" and datatype == "mbi": dir_l0 = "/share/simudata/CSSOSDataProductsSims/data/CSSTSimImage_C5/" \ "NGP_AstrometryON_shearOFF/MSC_{:07d}/".format(exposure_id) path_aux = "/data/sim_data/MSC_0000100/ref/MSC_{}_*_{:02d}_combine.fits" dir_pcref = "/home/user/L1Pipeline/msc/gaia_dr3/" elif hostname == "ubuntu" and datatype == "sls": dir_l0 = "/share/simudata/CSSOSDataProductsSims/data/CSSTSimImage_C5/" \ "NGP_AstrometryON_shearOFF_Spec/MSC_{:07d}/".format(exposure_id) path_aux = "" dir_pcref = "" else: raise ValueError("@DM: invalid hostname {} or datatype {}!".format(hostname, datatype)) return CsstMsDataManager.from_dir( ver_sim=ver_sim, datatype=datatype, dir_l0=dir_l0, dir_l1=dir_l1, dir_pcref=dir_pcref, path_aux=path_aux) class CsstMbiDataManager: def __init__(self, ver_sim="C5.2", dir_l0="", dir_l1="", dir_pcref="", path_aux="", force_all_detectors=False): """ initialize the multi-band imaging data manager Parameters ---------- ver_sim: str version of simulation data, see csst_common.params.CP dir_l0: str L0 directory dir_l1: str L1 directory dir_pcref: str position calibration data directory path_aux: str aux data directory (bias, flat, dark) force_all_detectors: bool if True, assert data for all detectors are available Examples -------- >>> dm_mbi = CsstMbiDataManager(...) >>> # access L0 directory >>> dm_mbi.dir_l0 >>> # access L1 directory >>> dm_mbi.dir_l1 >>> # access dir_pcref >>> dm_mbi.dir_pcref >>> # access path_aux >>> dm_mbi.path_aux >>> # access ver_sim >>> dm_mbi.ver_sim >>> # access target detectors >>> dm_mbi.target_detectors >>> # access available detectors >>> dm_mbi.available_detectors >>> # define an L1 file (detector-specified) >>> dm_mbi.l1_detector(detector=6) >>> # define an L1 file (non-detector-specified) >>> dm_mbi.l1_file("flipped_image.fits") """ raise DeprecationWarning( "CsstMbiDataManager will no longer be available in some days, please use CsstMsDataManager instead.")