data_manager.py 14.5 KB
Newer Older
BO ZHANG's avatar
BO ZHANG committed
1
2
3
4
5
import glob
import re

from astropy.io import fits

6
from .params import CSST_PARAMS as CP
BO ZHANG's avatar
BO ZHANG committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30


class CsstMbiDataManager:
    """ this class defines the file format of the input & output of CSST MSC L1 pipeline

    C3:
        MSC_MS_210525220000_100000020_06_raw.fits
        MSC_CRS_210525220000_100000020_06_raw.fits
        MSC_210525120000_0000020_06.cat

    C5.1:
        CSST_MSC_MS_SCI_20270810081950_20270810082220_100000100_06_L0_1.fits
        CSST_MSC_MS_CRS_20270810081950_20270810082220_100000100_06_L0_1.fits
        MSC_10000100_chip_06_filt_y.cat
        MSC_10000100_chip_06_filt_y.log

    C5.2
        CSST_MSC_MS_SCI_20270810081950_20270810082220_100000100_06_L0_1.fits
        CSST_MSC_MS_CRS_20270810081950_20270810082220_100000100_06_L0_1.fits
        MSC_100000100_chip_06_filt_y.cat
        MSC_100000100_chip_06_filt_y.log

    """

BO ZHANG's avatar
BO ZHANG committed
31
    def __init__(self, ver_sim="C5.2", dir_l0="", dir_l1="", dir_pcref="", path_aux="", force_all_detectors=False):
BO ZHANG's avatar
BO ZHANG committed
32
33
34
35
36
        """ initialize the multi-band imaging data manager

        Parameters
        ----------
        ver_sim: str
BO ZHANG's avatar
BO ZHANG committed
37
            version of simulation data, see csst_common.params.CP
BO ZHANG's avatar
BO ZHANG committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
        dir_l0: str
            L0 directory
        dir_l1: str
            L1 directory
        dir_pcref: str
            position calibration data directory
        path_aux: str
            aux data directory (bias, flat, dark)
        force_all_detectors: bool
            if True, assert data for all detectors are available

        Examples
        --------
        >>> dm = CsstMbiDataManager(...)
        >>> # access L0 directory
        >>> dm.dir_l0
        >>> # access L1 directory
        >>> dm.dir_l1
        >>> # access dir_pcref
        >>> dm.dir_pcref
        >>> # access path_aux
        >>> dm.path_aux
        >>> # access ver_sim
        >>> dm.ver_sim
        >>> # access target detectors
        >>> dm.target_detectors
        >>> # access available detectors
        >>> dm.available_detectors
        >>> # define an L1 file (detector-specified)
        >>> dm.l1_detector(detector=6)
        >>> # define an L1 file (non-detector-specified)
        >>> dm.l1_file("flipped_image.fits")
        """
71
        assert ver_sim in CP["sim"]["versions"]
BO ZHANG's avatar
BO ZHANG committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85

        self.dir_l0 = dir_l0
        self.dir_l1 = dir_l1
        self.dir_pcref = dir_pcref
        self.path_aux = path_aux
        self.ver_sim = ver_sim
        self.target_detectors = []

        self.hardcode_history = []

        fps_img = self.glob_image(dir_l0, ver_sim=ver_sim)
        fps_cat = self.glob_cat(dir_l0, ver_sim=ver_sim)

        if force_all_detectors:
86
            assert len(fps_img) == len(CP["mbi"]["detectors"])
BO ZHANG's avatar
BO ZHANG committed
87
88
89
90
91
92
93
        else:
            assert len(fps_img) > 0

        if ver_sim == "C3":
            # get info
            # print(re.split(r"[_.]", fps[0]))
            self._instrument, self._survey, \
BO ZHANG's avatar
BO ZHANG committed
94
95
            self._exp_start, self._exp_id, \
            _detector, self._l0_suffix, _ext = re.split(r"[_.]", fps_img[0])
BO ZHANG's avatar
BO ZHANG committed
96
97
98
99
100
101
102
103
104
105
106
107
108
            self._cat_id = re.split(r"[_.]", fps_cat[0])[1]

            self._exp_start = int(self._exp_start)
            self._exp_id = int(self._exp_id)

            # available detectors
            self.available_detectors = [int(re.split(r"[_.]", fp)[4]) for fp in fps_img]
            self.available_detectors.sort()

        elif ver_sim in ["C5.1", "C5.2"]:
            # get info
            # print(re.split(r"[_.]", fps[0]))
            self._telescope, self._instrument, self._survey, self._imagetype, \
BO ZHANG's avatar
BO ZHANG committed
109
110
            self._exp_start, self._exp_stop, self._exp_id, \
            _detector, self._l0_suffix, self._version, _ext = re.split(r"[_.]", fps_img[0])
BO ZHANG's avatar
BO ZHANG committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
            self._cat_id = re.split(r"[_.]", fps_cat[0])[1]

            self._exp_start = int(self._exp_start)
            self._exp_stop = int(self._exp_stop)
            self._exp_id = int(self._exp_id)

            # available detectors
            self.available_detectors = [int(re.split(r"[_.]", fp)[7]) for fp in fps_img]
            self.available_detectors.sort()

    @staticmethod
    def glob_image(dir_l0, ver_sim="C5"):
        """ glob files in L0 data directory """
        if ver_sim == "C3":
            pattern = os.path.join(dir_l0, "MSC_MS_*_raw.fits")
        else:
            assert ver_sim in ["C5.1", "C5.2"]
            pattern = os.path.join(dir_l0, "CSST_MSC_MS_SCI_*.fits")
        fps = glob.glob(pattern)
        fps = [os.path.basename(fp) for fp in fps]
        fps.sort()

        print("@DM.glob_dir: {} files found with pattern: {}".format(len(fps), pattern))
        return fps

    @staticmethod
    def glob_cat(dir_l0, ver_sim="C5"):
        """ glob input catalogs in L0 data directory """
        if ver_sim == "C3":
            pattern = os.path.join(dir_l0, "MSC_*.cat")
        else:
            assert ver_sim in ["C5.1", "C5.2"]
            pattern = os.path.join(dir_l0, "MSC_*.cat")
        fps = glob.glob(pattern)
        fps = [os.path.basename(fp) for fp in fps]
        fps.sort()

        print("@DM.glob_dir: {} files found with pattern: {}".format(len(fps), pattern))
        return fps

    def l0_cat(self, detector=6):
        """ the L0 cat file path"""
        if self.ver_sim == "C3":
            fn = "{}_{}_{:07d}_{:02d}.cat".format(
                self._instrument, self._cat_id, self._exp_id - 100000000, detector)
        elif self.ver_sim == "C5.1":
            fn = "{}_{}_chip_{:02d}_filt_{}.cat".format(
BO ZHANG's avatar
BO ZHANG committed
158
                self._instrument, self._exp_id - 90000000, detector, CP["mbi"]["detector2filter"])
BO ZHANG's avatar
BO ZHANG committed
159
160
        elif self.ver_sim == "C5.2":
            fn = "{}_{}_chip_{:02d}_filt_{}.cat".format(
BO ZHANG's avatar
BO ZHANG committed
161
                self._instrument, self._exp_id, detector, CP["mbi"]["detector2filter"])
BO ZHANG's avatar
BO ZHANG committed
162
163
164
165
166
167
        return os.path.join(self.dir_l0, fn)

    def l0_log(self, detector=6):
        """ L0 log file path """
        if self.ver_sim == "C5.1":
            fn = "{}_{}_chip_{:02d}_filt_{}.log".format(
BO ZHANG's avatar
BO ZHANG committed
168
                self._instrument, self._exp_id - 90000000, detector, CP["mbi"]["detector2filter"])
BO ZHANG's avatar
BO ZHANG committed
169
170
        elif self.ver_sim == "C5.2":
            fn = "{}_{}_chip_{:02d}_filt_{}.log".format(
BO ZHANG's avatar
BO ZHANG committed
171
                self._instrument, self._exp_id, detector, CP["mbi"]["detector2filter"])
BO ZHANG's avatar
BO ZHANG committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
        return os.path.join(self.dir_l0, fn)

    def l0_detector(self, detector=6):
        """ L0 detector-specific image file path """
        if self.ver_sim == "C3":
            fn = "{}_{}_{}_{}_{:02d}_raw.fits".format(
                self._instrument, self._survey, self._exp_start, self._exp_id, detector)
        else:
            assert self.ver_sim in ["C5.1", "C5.2"]
            fn = "{}_{}_{}_SCI_{}_{}_{}_{:02d}_L0_1.fits".format(
                self._telescope, self._instrument, self._survey,
                self._exp_start, self._exp_stop, self._exp_id, detector)
        return os.path.join(self.dir_l0, fn)

    def l0_crs(self, detector=6):
        """ L0 cosmic ray file path """
        if self.ver_sim == "C3":
            fn = "{}_CRS_{}_{}_{:02d}_raw.fits".format(
                self._instrument, self._exp_start, self._exp_id, detector)
        else:
            assert self.ver_sim in ["C5.1", "C5.2"]
            fn = "{}_{}_{}_CRS_{}_{}_{}_{:02d}_L0_1.fits".format(
                self._telescope, self._instrument, self._survey,
                self._exp_start, self._exp_stop, self._exp_id, detector)
        return os.path.join(self.dir_l0, fn)

    def l1_detector(self, detector=6, post="img.fits"):
        """ generate L1 file path

        Parameters
        ----------
        detector:
            detector ID
        post:
            postfix
            {"img.fits", "wht.fits", "flg.fits", "img_L1.fits", "wht_L1.fits", "flg_L1.fits"}

        Returns
        -------
        L1 file path

        """
        if self.ver_sim == "C3":
            fn = "{}_{}_{}_{}_{:02d}_{}".format(
                self._instrument, self._survey,
                self._exp_start, self._exp_id, detector, post)
        else:
            assert self.ver_sim in ["C5.1", "C5.2"]
            fn = "{}_{}_{}_SCI_{}_{}_{}_{:02d}_{}".format(
                self._telescope, self._instrument, self._survey,
                self._exp_start, self._exp_stop, self._exp_id, detector, post)
        return os.path.join(self.dir_l1, fn)

    def set_detectors(self, detectors=None):
        """ set target detector """
        if detectors is None:
            # default detectors
            self.target_detectors = self.available_detectors
        else:
            try:
                # assert detectors is a subset of available detectors
                assert set(detectors).issubset(set(self.available_detectors))
BO ZHANG's avatar
BO ZHANG committed
234
                self.target_detectors = list(detectors)
BO ZHANG's avatar
BO ZHANG committed
235
            except AssertionError as ae:
BO ZHANG's avatar
BO ZHANG committed
236
237
238
239
                print("@DM: available detector are ", self.available_detectors)
                print("@DM: target detector are ", detectors)

                print("@DM: final target detectors are ", set(detectors) & set(self.available_detectors))
BO ZHANG's avatar
BO ZHANG committed
240
                # raise ae
BO ZHANG's avatar
BO ZHANG committed
241
                self.target_detectors = set(detectors) & set(self.available_detectors)
BO ZHANG's avatar
BO ZHANG committed
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
        print("final target detector IDs are ", self.target_detectors)
        return

    def get_bias(self, detector=6):
        fp = glob.glob(self.path_aux.format("CLB", detector))[0]
        return fits.getdata(fp)

    def get_dark(self, detector=6):
        fp = glob.glob(self.path_aux.format("CLD", detector))[0]
        return fits.getdata(fp)

    def get_flat(self, detector=6):
        fp = glob.glob(self.path_aux.format("CLF", detector))[0]
        return fits.getdata(fp)

    def l1_file(self, name="", comment=""):
        """

        Parameters
        ----------
        name: str
            file name
        comment: str
            use the function name plz

        Returns
        -------
        fp: str
            the synthetic file path

        """
        fp = os.path.join(self.dir_l1, name)
        # record hardcode history
        self.hardcode_history.append(dict(hdcd=fp, comment=comment))
        return fp

    @staticmethod
BO ZHANG's avatar
tweaks    
BO ZHANG committed
279
280
    def __testddl__():
        """ test function """
BO ZHANG's avatar
BO ZHANG committed
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
        dm = CsstMbiDataManager(
            ver_sim="C5.2",
            dir_l0="/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/multipleBandsImaging"
                   "/NGP_AstrometryON_shearOFF/MSC_0000100",
            dir_l1=".",
            force_all_detectors=True,
        )
        print("----- available detectors -----")
        print(dm.available_detectors)
        for detector in dm.available_detectors[:2]:
            print("----- L0 images -----")
            print(dm.l0_detector(detector=detector))
            print(os.path.exists(dm.l0_detector(detector=detector)))
            print("----- L0 crs -----")
            print(dm.l0_crs(detector=detector))
            print(os.path.exists(dm.l0_detector(detector=detector)))
            print("----- L0 input cat -----")
            print(dm.l0_cat(detector=detector))
            print(os.path.exists(dm.l0_cat(detector=detector)))
            print("----- L0 input log -----")
            print(dm.l0_log(detector=detector))
            print(os.path.exists(dm.l0_log(detector=detector)))
            print("----- L1 images -----")
            print(dm.l1_detector(detector, post="img.fits"))
BO ZHANG's avatar
tweaks    
BO ZHANG committed
305
        return dm
BO ZHANG's avatar
BO ZHANG committed
306

BO ZHANG's avatar
BO ZHANG committed
307
    @staticmethod
308
    def quickstart(ver_sim="C5.2", dir_l1=".", exposure_id=100):
BO ZHANG's avatar
BO ZHANG committed
309
        """ quick dataset generator for tests on dandelion or PMO
BO ZHANG's avatar
BO ZHANG committed
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324

        Parameters
        ----------
        ver_sim:
            {"C5.2"}
        dir_l1:
            output directory
        exposure_id:
            21-154 for C5.2

        Returns
        -------
        CsstMbiDataManager
            the MBI data manager instance
        """
BO ZHANG's avatar
BO ZHANG committed
325
326
327
        # auto identify node name
        hostname = os.uname()[1]

BO ZHANG's avatar
BO ZHANG committed
328
329
        if hostname == "dandelion":
            # dandelion node
BO ZHANG's avatar
BO ZHANG committed
330
331
            dir_l0 = "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/multipleBandsImaging/" \
                     "NGP_AstrometryON_shearOFF/MSC_{:07d}/".format(exposure_id)
BO ZHANG's avatar
BO ZHANG committed
332
333
            path_aux = "/nfsdata/users/cham/L1Test/ref_C5.2/MSC_{}_*_{:02d}_combine.fits"
            dir_pcref = "/nfsdata/users/csstpipeline/L1Pipeline/msc/gaia_dr3/"
BO ZHANG's avatar
BO ZHANG committed
334
335
        elif hostname == "ubuntu":
            # PMO node
BO ZHANG's avatar
BO ZHANG committed
336
337
            dir_l0 = "/share/simudata/CSSOSDataProductsSims/data/CSSTSimImage_C5/" \
                     "NGP_AstrometryON_shearOFF/MSC_{:07d}/".format(exposure_id)
BO ZHANG's avatar
BO ZHANG committed
338
339
340
            path_aux = "/data/sim_data/MSC_0000100/ref/MSC_{}_*_{:02d}_combine.fits"
            dir_pcref = "/home/user/L1Pipeline/msc/gaia_dr3/"
        else:
BO ZHANG's avatar
BO ZHANG committed
341
            raise ValueError("@DM: invalid hostname {}!".format(hostname))
BO ZHANG's avatar
BO ZHANG committed
342
343
344

        return CsstMbiDataManager(ver_sim=ver_sim, dir_l0=dir_l0, dir_l1=dir_l1, dir_pcref=dir_pcref, path_aux=path_aux)

BO ZHANG's avatar
BO ZHANG committed
345

BO ZHANG's avatar
BO ZHANG committed
346
DIR_SIM = {
BO ZHANG's avatar
BO ZHANG committed
347
    ("C5.2", "dandelion"): "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/multipleBandsImaging/"
BO ZHANG's avatar
BO ZHANG committed
348
                           "NGP_AstrometryON_shearOFF/MSC_{:07d}/",
BO ZHANG's avatar
BO ZHANG committed
349
    ("C5.2", "ubuntu"): "/nfsdata/share/csst_simulation_data/Cycle-5-SimuData/multipleBandsImaging/"
BO ZHANG's avatar
BO ZHANG committed
350
351
352
                        "NGP_AstrometryON_shearOFF/MSC_{:07d}/",
}

BO ZHANG's avatar
BO ZHANG committed
353
354
355
if __name__ == "__main__":
    # test C3
    import os
BO ZHANG's avatar
BO ZHANG committed
356

BO ZHANG's avatar
BO ZHANG committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    dm = CsstMbiDataManager(
        ver_sim="C3", dir_l0="/data/L1Pipeline/msc/MSC_0000020", dir_l1="/data/L1Pipeline/msc/work")
    print("----- L0 images -----")
    print(dm.l0_detector(detector=6))
    print(os.path.exists(dm.l0_detector(detector=6)))
    print("----- L0 crs -----")
    print(dm.l0_crs(detector=6))
    print(os.path.exists(dm.l0_detector(detector=8)))
    print("----- L0 input cat -----")
    print(dm.l0_cat(8))
    print(os.path.exists(dm.l0_cat(detector=8)))
    print("----- available detectors -----")
    print(dm.available_detectors)
    print("----- L1 images -----")
BO ZHANG's avatar
BO ZHANG committed
371
    print(dm.l1_detector(25, "img.fits"))
BO ZHANG's avatar
BO ZHANG committed
372
373
374

    # test C5.1
    import os
BO ZHANG's avatar
BO ZHANG committed
375

BO ZHANG's avatar
BO ZHANG committed
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
    dm = CsstMbiDataManager(
        ver_sim="C5.1", dir_l0="/data/sim_data/MSC_0000100", dir_l1="/home/user/L1Pipeline/msc/work")
    print("----- available detectors -----")
    print(dm.available_detectors)
    for detector in dm.available_detectors[:2]:
        print("----- L0 images -----")
        print(dm.l0_detector(detector=detector))
        print(os.path.exists(dm.l0_detector(detector=detector)))
        print("----- L0 crs -----")
        print(dm.l0_crs(detector=detector))
        print(os.path.exists(dm.l0_detector(detector=detector)))
        print("----- L0 input cat -----")
        print(dm.l0_cat(detector=detector))
        print(os.path.exists(dm.l0_cat(detector=detector)))
        print("----- L0 input log -----")
        print(dm.l0_log(detector=detector))
        print(os.path.exists(dm.l0_log(detector=detector)))
        print("----- L1 images -----")
        print(dm.l1_detector(detector, post="img.fits"))