inspect.py 3.39 KB
Newer Older
BO ZHANG's avatar
BO ZHANG committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Aim
---
Inspect a dataset.

Example
-------
python -m csst_dag.cli.inspect -h

python -m csst_dag.cli.inspect \
    --dataset=csst-msc-c9-25sqdeg-v3 \
    --instrument=MSC \
    --obs-type=WIDE \
    --obs-group=W2 \
    --obs-id=10100232366 \
    --detector=09 \
    --prc-status=0
"""

import argparse

import numpy as np
from astropy import table

from csst_dag import Dispatcher, dfs

parser = argparse.ArgumentParser(
    description="Inspector for CSST datasets.",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

# data level
parser.add_argument("--level", type=int, help="Data level", default=0)
# level0 data parameters
parser.add_argument("--dataset", type=str, help="Dataset name")
parser.add_argument("--instrument", type=str, help="Instrument name", default=None)
parser.add_argument("--obs-type", type=str, help="Observation type", default=None)
parser.add_argument("--obs-group", type=str, help="Observation group", default=None)
parser.add_argument("--obs-id", type=str, help="Observation ID", default=None)
parser.add_argument("--detector", type=str, help="Detector name", default=None)
# level1 data parameters
parser.add_argument("--data-model", type=str, help="Data model", default=None)
parser.add_argument("--batch-id", type=str, help="Batch ID", default=None)
# reset prc_status
parser.add_argument("--reset", type=int, help="Reset prc_status", default=None)

args = parser.parse_args()

# from csst_dag import DotDict
#
# args = DotDict(
#     dataset="csst-msc-c9-25sqdeg-v3",
#     instrument="MSC",
#     obs_type="WIDE",
#     obs_group="W2",
#     obs_id=None,
#     detector=None,
#     prc_status=None,
# )

print("CLI parameters: ", args)

if args.level == 0:
    print("Inspecting level0 data...")
    plan_basis, data_basis = Dispatcher.find_plan_level0_basis(
        dataset=args.dataset,
        instrument=args.instrument,
        obs_type=args.obs_type,
        obs_group=args.obs_group,
        obs_id=args.obs_id,
        detector=args.detector,
        prc_status=args.prc_status,
    )
else:
    print("Inspecting level1 data...")
    plan_basis, data_basis = Dispatcher.find_plan_level1_basis(
        dataset=args.dataset,
        instrument=args.instrument,
        obs_type=args.obs_type,
        obs_group=args.obs_group,
        obs_id=args.obs_id,
        detector=args.detector,
        data_model=args.data_model,
        batch_id=args.batch_id,
    )
print(f"{len(plan_basis)} plan basis, {len(data_basis)} data basis found")
data_basis.remove_columns(["file_name", "_id"])

# zero / non-zero prc_status
u_prc_status, c_prc_status = np.unique(data_basis["prc_status"].data, return_counts=True)
t_prc_status = table.Table([u_prc_status, c_prc_status], names=["prc_status", "count"])
print("Prc status statistics:")
t_prc_status.pprint_all()

for stats_type, stats_keys in dict(
        obs_type=["dataset", "instrument", "obs_type", "prc_status"],
        obs_id=["dataset", "instrument", "obs_type", "obs_id", "prc_status"],
        detector=["dataset", "instrument", "obs_type", "detector", "prc_status"],
).items():
    print("\n>>> STATS DIM: ", stats_type)
    u_data, c_data = np.unique(data_basis[stats_keys], return_counts=True)
    u_table = table.Table(u_data)
    u_table.add_column(table.Column(c_data, name="count"))
    u_table.pprint_all()

if args.reset is not None:
    print("Resetting prc_status to ", args.reset)
    dfs.update_prc_status_by_ids(list(data_basis["_id"]), args.reset,)