update dispatcher.py

df44de32 · BO ZHANG · 3ddcd5a6 · df44de32 · df44de32 · df44de32
Commit df44de32 authored Jul 08, 2025 by BO ZHANG 🏀
--- a/csst_dag/_csst/dict.py
+++ b/csst_dag/_csst/dict.py
@@ -9,7 +9,7 @@ class DotDict(dict):
    def __getattr__(self, key):
        """属性访问优先级：1. 内置属性 → 2. 键值 → 3. 报错"""
        try:
-            # 优先返回内置属性（如 keys, items 等方法）
+            # 优先返回内置属性（如 basis_keys, items 等方法）
            return object.__getattribute__(self, key)
        except AttributeError:
            if key in self:

--- a/csst_dag/_csst/instrument.py
+++ b/csst_dag/_csst/instrument.py
@@ -137,6 +137,11 @@ class Telescope(DotDict):
    def n_instrument(self):
        return len(self.instruments)

+    # def plan_to_detector(self, plan_data):
+    #     # convert to dict
+    #     plan_dict = dict(plan_data)
+    #     if plan_dict["instrument"] == "HSTDM":
+

 mbi = SimpleInstrument(
    name="MBI",

--- a/csst_dag/dag/__init__.py
+++ b/csst_dag/dag/__init__.py
 from ._base_dag import BaseDAG
 from ._dag_list import DAG_LIST
-from .dags import GeneralDAGViaObsid
+from .dags import GeneralDAGViaObsid, GeneralDAGViaObsgroup
+from .dispatcher import Dispatcher


 class CsstDAGs(dict):
@@ -21,9 +22,9 @@ class CsstDAGs(dict):
        "csst-msc-l1-sls": GeneralDAGViaObsid(
            dag_group="msc-l1", dag="csst-msc-l1-sls", use_detector=True
        ),
-        # "csst-msc-l1-ooc": GeneralDAGViaObsgroup(
-        #     dag_group="msc-l1", dag="csst-msc-l1-ooc"
-        # ),
+        "csst-msc-l1-ooc": GeneralDAGViaObsgroup(
+            dag_group="msc-l1-ooc", dag="csst-msc-l1-ooc"
+        ),
        "csst-cpic-l1": GeneralDAGViaObsid(
            dag_group="cpic-l1", dag="csst-cpic-l1", use_detector=True
        ),

--- a/csst_dag/dag/dispatcher.py
+++ b/csst_dag/dag/dispatcher.py
@@ -128,6 +128,9 @@ DATA_BASIS_KEYS = (
    "_id",
 )

+# join_type for data x plan
+PLAN_JOIN_TYPE = "inner"
+

 class Dispatcher:
    """
@@ -188,8 +191,8 @@ class Dispatcher:
        plan_basis: table.Table,
        data_basis: table.Table,
    ) -> list[dict]:
-        # unique obsid
-        u_obsid = table.unique(data_basis["dataset", "obs_id"])
+        # unique obsid --> useless
+        # u_obsid = table.unique(data_basis["dataset", "obs_id"])

        # initialize task list
        task_list = []
@@ -201,9 +204,10 @@ class Dispatcher:
            dynamic_ncols=True,
        ):
            # i_data_basis = 1
+            this_task = dict(data_basis[i_data_basis])
            this_data_basis = data_basis[i_data_basis : i_data_basis + 1]
            this_relevant_plan = table.join(
-                u_obsid,
+                this_data_basis,
                plan_basis,
                keys=["dataset", "obs_id"],
                join_type="inner",
@@ -211,10 +215,12 @@ class Dispatcher:
            # append this task
            task_list.append(
                dict(
-                    task=this_data_basis,
+                    task=this_task,
                    success=True,
                    relevant_plan=this_relevant_plan,
                    relevant_data=data_basis[i_data_basis : i_data_basis + 1],
+                    n_relevant_plan=len(this_relevant_plan),
+                    n_relevant_data=1,
                )
            )

@@ -251,7 +257,7 @@ class Dispatcher:
            u_obsid,
            plan_basis,
            keys=["dataset", "obs_id"],
-            join_type="left",
+            join_type=PLAN_JOIN_TYPE,
        )
        print(f"{len(relevant_plan)} relevant plan records")

@@ -303,7 +309,7 @@ class Dispatcher:
                    "obs_group",
                    "obs_id",
                ],
-                join_type="left",
+                join_type=PLAN_JOIN_TYPE,
            )

            # whether detector effective
@@ -312,7 +318,12 @@ class Dispatcher:
            this_detector_effective = (
                this_detector in csst[this_instrument].effective_detector_names
            )
-            n_files_expected = this_data_detector_plan["n_frame"][0]
+
+            n_files_expected = (
+                this_data_detector_plan["n_frame"][0]
+                if len(this_data_detector_plan) > 0
+                else 0
+            )
            n_files_found = len(this_data_detector_files)
            # append this task
            task_list.append(
@@ -326,6 +337,8 @@ class Dispatcher:
                    ),
                    relevant_plan=this_data_detector_plan,
                    relevant_data=this_data_detector_files,
+                    n_relevant_plan=len(this_data_detector_plan),
+                    n_relevant_data=len(this_data_detector_files),
                )
            )
        return task_list
@@ -350,7 +363,7 @@ class Dispatcher:
            u_obsid,
            plan_basis,
            keys=["dataset", "obs_id"],
-            join_type="left",
+            join_type=PLAN_JOIN_TYPE,
        )
        print(f"{len(relevant_plan)} relevant plan records")

@@ -373,12 +386,12 @@ class Dispatcher:
            unit="task",
            dynamic_ncols=True,
        ):
-            i_data_obsid = 2
+            # i_data_obsid = 2
            this_task = dict(u_data_obsid[i_data_obsid])
            this_data_obsid = u_data_obsid[i_data_obsid : i_data_obsid + 1]

            # join data and plan
-            this_data_obsid_files = table.join(
+            this_data_obsid_file = table.join(
                this_data_obsid,
                data_basis,
                keys=[
@@ -400,13 +413,32 @@ class Dispatcher:
                    "obs_group",
                    "obs_id",
                ],
-                join_type="left",
+                join_type=PLAN_JOIN_TYPE,
            )

            # whether effective detectors all there
            this_instrument = this_data_obsid["instrument"][0]
-            this_success = set(csst[this_instrument].effective_detector_names).issubset(
-                set(this_data_obsid_files["detector"])
+            this_n_frame = (
+                this_data_obsid_plan["n_frame"] if len(this_data_obsid_plan) > 0 else 0
+            )
+            this_effective_detector_names = csst[
+                this_instrument
+            ].effective_detector_names
+
+            if this_instrument == "HSTDM":
+                # 不确定以后是1个探测器还是2个探测器
+                this_n_file_found = len(this_data_obsid_file)
+                this_n_file_expected = (this_n_frame, this_n_frame * 2)
+                this_success = this_n_file_found in this_n_file_expected
+            else:
+                # for other instruments, e.g., MSC
+                # n_file_found = len(this_obsgroup_obsid_file)
+                # n_file_expected = len(effective_detector_names)
+                # this_success &= n_file_found == n_file_expected
+
+                # or more strictly, expected files are a subset of files found
+                this_success = set(this_effective_detector_names) <= set(
+                    this_data_obsid_file["detector"]
                )

            # append this task
@@ -415,7 +447,9 @@ class Dispatcher:
                    task=this_task,
                    success=this_success,
                    relevant_plan=this_data_obsid_plan,
-                    relevant_data=this_data_obsid_files,
+                    relevant_data=this_data_obsid_file,
+                    n_relevant_plan=len(this_data_obsid_plan),
+                    n_relevant_data=len(this_data_obsid_file),
                )
            )

@@ -452,14 +486,14 @@ class Dispatcher:
            this_task = dict(obsgroup_basis[i_obsgroup])
            this_success = True

-            this_obsgroup_obsid = table.join(
+            this_obsgroup_plan = table.join(
                obsgroup_basis[i_obsgroup : i_obsgroup + 1],  # this obsgroup
                plan_basis,
                keys=["dataset", "instrument", "obs_type", "obs_group"],
-                join_type="left",
+                join_type=PLAN_JOIN_TYPE,
            )
            this_obsgroup_file = table.join(
-                this_obsgroup_obsid,
+                this_obsgroup_plan,
                data_basis,
                keys=["dataset", "instrument", "obs_type", "obs_group", "obs_id"],
                join_type="inner",
@@ -467,36 +501,37 @@ class Dispatcher:
            )

            # loop over obsid
-            for i_obsid in range(len(this_obsgroup_obsid)):
+            for i_obsid in range(len(this_obsgroup_plan)):
                # i_obsid = 1
                # print(i_obsid)
-                instrument = this_obsgroup_obsid[i_obsid]["instrument"]
-                n_frame = this_obsgroup_obsid[i_obsid]["n_frame"]
-                effective_detector_names = csst[instrument].effective_detector_names
+                this_instrument = this_obsgroup_plan[i_obsid]["instrument"]
+                this_n_frame = this_obsgroup_plan[i_obsid]["n_frame"]
+                this_effective_detector_names = csst[
+                    this_instrument
+                ].effective_detector_names

                this_obsgroup_obsid_file = table.join(
-                    this_obsgroup_obsid[i_obsid : i_obsid + 1],  # this obsid
+                    this_obsgroup_plan[i_obsid : i_obsid + 1],  # this obsid
                    data_basis,
                    keys=["dataset", "instrument", "obs_type", "obs_group", "obs_id"],
                    join_type="inner",
                    table_names=["plan", "data"],
                )

-                if instrument == "HSTDM":  # 我也不知道太赫兹要怎么玩
-                    # this_success &= (
-                    #     len(this_obsgroup_obsid_file) == n_frame
-                    #     or len(this_obsgroup_obsid_file) == n_frame * 2
-                    # )
-                    # or simply
-                    this_success &= len(this_obsgroup_obsid_file) % n_frame == 0
+                if this_instrument == "HSTDM":
+                    # 不确定以后是1个探测器还是2个探测器
+                    this_n_file_found = len(this_obsgroup_obsid_file)
+                    this_n_file_expected = (this_n_frame, this_n_frame * 2)
+                    this_success &= this_n_file_found in this_n_file_expected
                else:
-                    # n_detector == n_file
-                    # this_success &= len(this_obsgroup_obsid_file) == len(
-                    #     effective_detector_names
-                    # )
-                    # or more strictly, each detector matches
-                    this_success &= set(this_obsgroup_obsid_file["detector"]) == set(
-                        effective_detector_names
+                    # for other instruments, e.g., MSC
+                    # n_file_found = len(this_obsgroup_obsid_file)
+                    # n_file_expected = len(effective_detector_names)
+                    # this_success &= n_file_found == n_file_expected
+
+                    # or more strictly, expected files are a subset of files found
+                    this_success &= set(this_effective_detector_names) <= set(
+                        this_obsgroup_obsid_file["detector"]
                    )

            # append this task
@@ -504,8 +539,10 @@ class Dispatcher:
                dict(
                    task=this_task,
                    success=this_success,
-                    relevant_plan=this_obsgroup_obsid,
+                    relevant_plan=this_obsgroup_plan,
                    relevant_data=this_obsgroup_file,
+                    n_relevant_plan=len(this_obsgroup_plan),
+                    n_relevant_data=this_obsgroup_file,
                )
            )
        return task_list
@@ -522,6 +559,7 @@ class Dispatcher:
            _["n_frame"] = (
                _["params"]["n_epec_frame"] if _["instrument"] == "HSTDM" else 1
            )
+        # 未来如果HSTDM的设定简化一些，这里n_frame可以改成n_file，更直观
        plan_basis = extract_basis_table(
            plan_recs.data,
            PLAN_BASIS_KEYS,
@@ -531,24 +569,3 @@ class Dispatcher:
            DATA_BASIS_KEYS,
        )
        return plan_basis, data_basis
-
-
-# # 1221 plan recs, 36630 data recs
-# plan_basis, data_basis = Dispatcher.load_test_data()
-#
-# # 430 task/s
-# task_list_via_file = Dispatcher.dispatch_file(plan_basis, data_basis)
-#
-# # 13 task/s @n_jobs=1, 100*10 task/s @n_jobs=10 (max)
-# task_list_via_detector = Dispatcher.dispatch_detector(plan_basis, data_basis, n_jobs=10)
-#
-# # 16 task/s @n_jobs=1, 130*10 tasks/s @n_jobs=10 (max) 🔼
-# task_list_via_obsid = Dispatcher.dispatch_obsid(plan_basis, data_basis, n_jobs=10)
-#
-# # 13s/task
-# task_list_via_obsgroup = Dispatcher.dispatch_obsgroup(plan_basis, data_basis)
-# print(
-#     sum(_["success"] for _ in task_list_via_obsgroup),
-#     "/",
-#     len(task_list_via_obsgroup),
-# )