init dispatcher.py

c1df3efa · BO ZHANG · 6336cc21 · c1df3efa
Commit c1df3efa authored Jul 06, 2025 by BO ZHANG 🏀
--- a/csst_dag/dag/dispatcher.py
+++ b/csst_dag/dag/dispatcher.py
+import numpy as np
+from astropy.table import Table
+from csst_dfs_client import plan, level0, level1
+from .._csst import csst
+
+# THESE ARE GENERAL PARAMETERS!
+PLAN_PARAMS = {
+    "dataset": None,
+    "instrument": None,
+    "obs_type": None,
+    "obs_group": None,
+    "obs_id": None,
+    "proposal_id": None,
+}
+
+LEVEL0_PARAMS = {
+    "dataset": None,
+    "instrument": None,
+    "obs_type": None,
+    "obs_group": None,
+    "obs_id": None,
+    "detector": None,
+    "prc_status": -1024,
+}
+
+LEVEL1_PARAMS = {
+    "dataset": None,
+    "instrument": None,
+    "obs_type": None,
+    "obs_group": None,
+    "obs_id": None,
+    "detector": None,
+    "prc_status": -1024,
+    "data_model": None,
+    "batch_id": "default_batch",
+}
+
+PROC_PARAMS = {
+    "priority": 1,
+    "batch_id": "default_batch",
+    "pmapname": "pmapname",
+    "final_prc_status": -2,
+    "demo": False,
+}
+
+
+def override_common_keys(d1: dict, d2: dict) -> dict:
+    """
+    Construct a new dictionary by updating the values of basis_keys that exists in the first dictionary
+    with the values of the second dictionary.
+
+    Parameters
+    ----------
+    d1 : dict
+        The first dictionary.
+    d2 : dict
+        The second dictionary.
+
+    Returns
+    -------
+    dict:
+        The updated dictionary.
+    """
+    return {k: d2[k] if k in d2.keys() else d1[k] for k in d1.keys()}
+
+
+# def extract_basis(dlist: list[dict], basis_keys: tuple) -> np.ndarray:
+#     """Extract basis key-value pairs from a list of dictionaries."""
+#     return Table([{k: d.get(k) for k in basis_keys} for d in dlist]).as_array()
+
+
+def extract_basis(dlist: list[dict], basis_keys: tuple) -> np.typing.NDArray:
+    """Extract basis key-value pairs from a list of dictionaries."""
+    return np.array([{k: d.get(k) for k in basis_keys} for d in dlist], dtype=dict)
+
+
+class Dispatcher:
+    """
+    A class to dispatch tasks based on the observation type.
+    """
+
+    @staticmethod
+    def dispatch_level0_file(**kwargs) -> dict:
+        # plan_recs = plan.find(**override_common_keys(PLAN_PARAMS, kwargs))
+        data_recs = level0.find(**override_common_keys(LEVEL0_PARAMS, kwargs))
+        # construct results
+        task_list = []
+        for data_rec in data_recs:
+            # construct task
+            task = dict(
+                dataset=data_rec["dataset"],
+                instrument=data_rec["instrument"],
+                obs_type=data_rec["obs_type"],
+                obs_group=data_rec["obs_group"],
+                obs_id=data_rec["obs_id"],
+                detector=data_rec["detector"],
+                file_name=data_rec["file_name"],
+            )
+
+        return dict(
+            task_list=task_list,
+            relevant_data_id_list=[],
+        )
+
+    @staticmethod
+    def dispatch_level0_detector(**kwargs) -> dict:
+
+        # get instrument
+        assert "instrument" in kwargs.keys(), f"{kwargs} does not have key 'instrument'"
+        instrument = kwargs.get("instrument")
+        assert instrument in ("MSC", "MCI", "IFS", "CPIC", "HSTDM")
+
+        # query for plan and data
+        plan_recs = plan.find(**override_common_keys(PLAN_PARAMS, kwargs))
+        assert plan_recs.success, plan_recs
+        data_recs = level0.find(**override_common_keys(LEVEL0_PARAMS, kwargs))
+        assert data_recs.success, data_recs
+
+        import joblib
+
+        plan_recs = joblib.load("dagtest/csst-msc-c9-25sqdeg-v3.plan.dump")
+        data_recs = joblib.load("dagtest/csst-msc-c9-25sqdeg-v3.level0.dump")
+        print(f"{len(plan_recs.data)} plan records")
+        print(f"{len(data_recs.data)} data records")
+
+        instrument = "MSC"
+        from csst_dag._csst import csst
+
+        effective_detector_names = csst[instrument].effective_detector_names
+
+        # extract info
+        plan_basis = extract_basis(
+            plan_recs.data,
+            (
+                "dataset",
+                "instrument",
+                "obs_type",
+                "obs_group",
+                "obs_id",
+            ),
+        )
+        data_basis = extract_basis(
+            data_recs.data,
+            (
+                "dataset",
+                "instrument",
+                "obs_type",
+                "obs_group",
+                "obs_id",
+                "detector",
+            ),
+        )
+
+        # select plan basis relevant to data via `obs_id`
+        u_data_obsid = np.unique([_["obs_id"] for _ in data_basis])
+        relevant_plan_basis = [_ for _ in plan_basis if _["obs_id"] in u_data_obsid]
+        print(f"{len(relevant_plan_basis)} relevant plan records")
+
+        # idx_selected_relevant_plan_basis = np.zeros(len(relevant_plan_basis), dtype=bool)
+        # 好像并不是要找出所有的plan，而是要找出所有的任务，而detector级的任务要比plan_basis多得多
+
+        task_list = []
+        relevant_data_id_list = []
+
+        # loop over plan
+        for i_plan_basis, this_plan_basis in enumerate(relevant_plan_basis):
+            print(f"Processing {i_plan_basis + 1}/{len(relevant_plan_basis)}")
+            # span over `detector`
+            for this_detector in effective_detector_names:
+                # construct this_task
+                this_task = dict(
+                    dataset=this_plan_basis["dataset"],
+                    instrument=this_plan_basis["instrument"],
+                    obs_type=this_plan_basis["obs_type"],
+                    obs_group=this_plan_basis["obs_group"],
+                    obs_id=this_plan_basis["obs_id"],
+                    detector=this_detector,
+                )
+                # find this plan basis
+                idx_this_plan_basis = np.argwhere(
+                    plan_basis == this_plan_basis
+                ).flatten()[0]
+                # get n_frame, calculate n_file_expected
+                if instrument == "HSTDM":
+                    n_file_expected = plan_recs.data[idx_this_plan_basis]["params"][
+                        "num_epec_frame"
+                    ]
+                else:
+                    n_file_expected = 1
+                # count files found in data_basis
+                idx_files_found = np.argwhere(data_basis == this_task).flatten()
+                n_file_found = len(idx_files_found)
+                # if found == expected, append this task
+                if n_file_found == n_file_expected:
+                    task_list.append(this_task)
+                    relevant_data_id_list.extend(
+                        [data_recs.data[_]["_id"] for _ in idx_files_found]
+                    )
+
+        return dict(task_list=task_list, relevant_data_id_list=relevant_data_id_list)
+
+    @staticmethod
+    def dispatch_level0_obsid(**kwargs) -> list[dict]:
+        pass