see if table group solves n_file bug

3d80bb19 · BO ZHANG · a9e5e5b9 · 3d80bb19
Commit 3d80bb19 authored Jul 22, 2025 by BO ZHANG 🏀
--- a/csst_dag/dag/_dispatcher.py
+++ b/csst_dag/dag/_dispatcher.py
@@ -466,113 +466,47 @@ class Dispatcher:
        if len(plan_basis) == 0 or len(data_basis) == 0:
            return []

-        obsid_basis = data_basis.group_by([""])
-
-        # unique obsid
-        u_obsid = table.unique(data_basis["dataset", "obs_id"])
-        relevant_plan = table.join(
-            u_obsid,
-            plan_basis,
-            keys=["dataset", "obs_id"],
-            join_type=PLAN_JOIN_TYPE,
-        )
-        print(f"{len(relevant_plan)} relevant plan records")
-
-        u_data_obsid = table.unique(
-            data_basis[
-                "dataset",
-                "instrument",
-                "obs_type",
-                "obs_group",
-                "obs_id",
-            ]
-        )
+        group_keys = ["dataset", "instrument", "obs_type", "obs_group", "obs_id"]
+        obsid_basis = data_basis.group_by(group_keys)

        # initialize task list
        task_list = []
-
-        # loop over plan
-        for i_data_obsid in trange(len(u_data_obsid), **TQDM_KWARGS):
-            # i_data_obsid = 2
-            this_task = dict(u_data_obsid[i_data_obsid])
-            this_data_obsid = u_data_obsid[i_data_obsid : i_data_obsid + 1]
-
-            # join data and plan
-            this_data_obsid_file = table.join(
-                this_data_obsid,
-                data_basis,
-                keys=[
-                    "dataset",
-                    "instrument",
-                    "obs_type",
-                    "obs_group",
-                    "obs_id",
-                ],
-                join_type="inner",
-            )
-            # print(this_data_obsid_file.colnames)
-            this_data_obsid_plan = table.join(
-                this_data_obsid,
-                relevant_plan,
-                keys=[
-                    "dataset",
-                    "instrument",
-                    "obs_type",
-                    "obs_group",
-                    "obs_id",
-                ],
+        # loop over obsid
+        for this_obsid_basis in obsid_basis.groups:
+            this_relevant_plan_basis = table.join(
+                this_obsid_basis[group_keys][:1],
+                plan_basis,
+                keys=group_keys,
                join_type=PLAN_JOIN_TYPE,
            )
-
-            # whether effective detectors all there
-            this_instrument = this_data_obsid["instrument"][0]
-            this_n_file = (
-                this_data_obsid_plan["n_file"] if len(this_data_obsid_plan) > 0 else 0
+            assert len(this_relevant_plan_basis) == 1
+            n_file_expected = this_relevant_plan_basis[0]["n_file"]
+            n_file_found = len(this_obsid_basis)
+            this_instrument = this_relevant_plan_basis[0]["instrument"]
+            detectors_found = set(this_obsid_basis["detector"])
+            detectors_expected = set(csst[this_instrument].effective_detector_names)
+            this_success = (
+                n_file_expected == n_file_found
+                and detectors_found == detectors_expected
            )
-            this_effective_detector_names = csst[
-                this_instrument
-            ].effective_detector_names
-
-            if this_instrument == "HSTDM":
-                # 不确定以后是1个探测器还是2个探测器
-                this_n_file_found = len(this_data_obsid_file)
-                this_n_file_expected = (this_n_file, this_n_file * 2)
-                this_success = this_n_file_found in this_n_file_expected
-            else:
-                # for other instruments, e.g., MSC
-                # n_file_found = len(this_obsgroup_obsid_file)
-                # n_file_expected = len(effective_detector_names)
-                # this_success &= n_file_found == n_file_expected
-
-                # or more strictly, expected files are a subset of files found
-                this_success = set(this_effective_detector_names) <= set(
-                    this_data_obsid_file["detector"]
-                )
-
-            n_file_expected = int(this_data_obsid_plan["n_file"].sum())
-            n_file_found = len(this_data_obsid_file)
-            # set n_file_expected and n_file_found
-            this_task["n_file_expected"] = n_file_expected
-            this_task["n_file_found"] = n_file_found
            # append this task
            task_list.append(
                dict(
-                    task=this_task,
+                    task=dict(this_relevant_plan_basis[group_keys][0]),
                    success=this_success,
-                    relevant_plan=this_data_obsid_plan,
-                    relevant_data=this_data_obsid_file,
-                    n_relevant_plan=len(this_data_obsid_plan),
-                    n_relevant_data=len(this_data_obsid_file),
+                    relevant_plan=this_relevant_plan_basis,
+                    relevant_data=this_obsid_basis,
+                    n_relevant_plan=len(this_relevant_plan_basis),
+                    n_relevant_data=len(this_obsid_basis),
                    relevant_data_id_list=(
                        []
-                        if len(this_data_obsid_file) == 0
-                        else list(this_data_obsid_file["_id"])
+                        if len(this_obsid_basis) == 0
+                        else list(this_obsid_basis["_id"])
                    ),
-                    n_file_expected=this_data_obsid_plan["n_file"].sum(),
-                    n_file_found=len(this_data_obsid_file),
+                    n_file_expected=n_file_expected,
+                    n_file_found=n_file_found,
                )
            )
-
        return task_list

    @staticmethod