Source code for gdt.missions.gifts.catalogs

#
# Copyright 2026 by University College Dublin. All rights reserved.
#
# Developed by: Derek O'Callaghan
#               University College Dublin
#               https://www.ucd.ie/
#
# Builds on:
#               Gamma-ray Data Tools - Core Components (https://github.com/USRA-STI/gdt-core)
#               Gamma-ray Data Tools - Fermi mission components (https://github.com/USRA-STI/gdt-fermi/)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing permissions and limitations under the
# License.
#

from abc import ABC
from dataclasses import dataclass, field
import numpy as np
import os
import pandas as pd
from typing import ClassVar, List

from gdt.core.file import FitsFileContextManager

from gdt.missions.gifts.detectors import GiftsDetectors
from gdt.missions.gifts.localization import GiftsHealPix
from gdt.missions.gifts.tcat import Tcat
from gdt.missions.gifts.trigdat import Trigdat
from gdt.missions.gifts.tte import GiftsTte

__all__ = ['TriggerCatalog', 'BurstCatalog']

@dataclass
class FsspecStorage():
    fsspec_kwargs: dict = field(init=False, repr=False)
    catalog_type: str = field(init=False, default=None)   
    config: str = field(default=None, repr=False)
    # TODO: the config will contain the archive and cache path, from which the base url is constructed
    
    # Cache path for all files retrieved with fsspec
    _base_cache_path: str = field(repr=False, default="./giftscache")
    # TODO: "zenodo_url...*.gz" should be provided by a config, or an init parameter
    _catalog_archive_url: str = field(repr=False,
                                      default="simplecache://::https://zenodo.org/records/17295200/files/gifts_bursts_catalog_v0_1_0.tar.gz")

    def __post_init__(self):
        self.fsspec_kwargs = {
            "simplecache": {
                "cache_storage": os.path.join(self._base_cache_path, self.catalog_type),
                "same_names": True
            }
        }

    def _catalog_file_path(self, file_path) -> str:
        return f"tar://{self.catalog_type}/{file_path}::{self._catalog_archive_url}"


@dataclass
class Catalog(FsspecStorage):
    """Base class for GIFTS catalogs"""

    # Catalog metadata table
    catalog: pd.DataFrame = field(init=False, default=None, repr=False)
    
    def __post_init__(self):
        super().__post_init__()
        self.catalog = pd.read_csv(self._catalog_file_path(file_path="catalog.csv"),
                                   dtype={"triggered_detector_mask": "O"},
                                   storage_options=self.fsspec_kwargs,
                                   # Need to specify this or fsspec won't be used for tar uncompression, as it
                                   # looks like pandas will assume tar compression due to it appearing in the URL
                                   compression=None 
                                  )

    @property
    def columns(self) -> np.ndarray:
        """The names of the columns available in the catalog"""
        return self.catalog.columns.values

    @property
    def num_columns(self) -> int:
        """The total number of columns (fields) in the catalog"""
        return self.catalog.shape[1]

    @property
    def num_rows(self) -> int:
        """The total number of rows in the catalog"""
        return self.catalog.shape[0]

    def column_range(self, column: str) -> tuple:
        """Return the data range for a specified column, in the form of (low, high).

        Args:
            column: The column name

        Returns:
            Tuple containg low (min) and high (max) values
        """
        # KeyError will be raised for unknown columns
        col_data = self.catalog[column]
        return col_data.min(), col_data.max()

    def get_table(self, columns: List[str]=None) -> np.recarray:
        """TODO Return the table data as a numpy record array.

        Args:
            columns (list of str, optional): The columns to return. If omitted,
                                             returns all columns.

        Returns:
            (np.recarray)
        """
        # KeyError will be raised for unknown columns
        column_data = self.catalog if columns is None else self.catalog[columns]
        return column_data.to_records(index=False)

    def to_dataframe(self) -> pd.DataFrame:
        """
        Returns a copy of the catalog as a Pandas DataFrame, which can be used for 
        subsequent slicing/filtering operations.
        """
        return self.catalog.copy(deep=True)
     
    def _product_path(self,
                      trigger_name: str,
                      product_type: str,
                      detector: str) -> str:
        """Constructs the catalog path for the specified trigger product type."""
        trigger_year = f"20{trigger_name[2:4]}"
        return self._catalog_file_path(file_path=f"{trigger_year}/{trigger_name}/gifts_{product_type}_{detector}_{trigger_name}_v[0-9][0-9].fit")

    def _open_product(self,
                      trigger_name: str,
                      product_type: str,
                      product_cls: ClassVar[FitsFileContextManager],
                      detector: str="all") -> FitsFileContextManager:
        """Opens the catalog FITS product for the specified trigger product type."""
        return product_cls.open(self._product_path(trigger_name, product_type, detector),
                                use_fsspec=True,
                                fsspec_kwargs=self.fsspec_kwargs)
        
    def get_tcat(self, trigger_name: str) -> Tcat:
        """
        Retrieve and open a Tcat product.

        Args:
            trigger_name: required trigger/burst

        Returns:
            GIFTS Tcat
        """
        return self._open_product(trigger_name, "tcat", Tcat)

    def get_trigdat(self, trigger_name: str) -> Trigdat:
        """
        Retrieve and open a Trigdat product.

        Args:
            trigger_name: required trigger/burst

        Returns:
            GIFTS Trigdat
        """
        return self._open_product(trigger_name, "trigdat", Trigdat)

    def get_tte(self, trigger_name: str) -> List[GiftsTte]:
        """
        Retrieve and open TTE products.

        Args:
            trigger_name: required trigger/burst

        Returns:
            List of GIFTS TTEs, one per detector.
        """
        return [self._open_product(trigger_name, "tte", GiftsTte, d.name) for d in GiftsDetectors]

    def get_healpix(self, trigger_name: str) -> GiftsHealPix:
        """
        Retrieve and open a localization HEALPix product.

        Args:
            trigger_name: required trigger/burst

        Returns:
            GiftsHealPix product
        """
        return self._open_product(trigger_name, "healpix", GiftsHealPix)


[docs] @dataclass class TriggerCatalog(Catalog): """GIFTS catalog for triggers""" catalog_type: str = "triggers"
[docs] @dataclass class BurstCatalog(Catalog): """GIFTS catalog for detected and confirmed bursts""" catalog_type: str = "bursts"