Coverage for lisacattools/catalog.py: 78%
98 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-02-06 17:36 +0000
« prev ^ index » next coverage.py v7.0.5, created at 2023-02-06 17:36 +0000
1# -*- coding: utf-8 -*-
2# Copyright (C) 2021 - James I. Thorpe, Tyson B. Littenberg, Jean-Christophe
3# Malapert
4#
5# This file is part of lisacattools.
6#
7# lisacattools is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# lisacattools is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with lisacattools. If not, see <https://www.gnu.org/licenses/>.
19"""This module is the interface for gravitational wave source catalogs. It is
20responsible for :
21 - registering new catalog implementations as plugins
22 - loading detections and source posterior samples
23"""
24import importlib
25from abc import ABC
26from abc import abstractmethod
27from dataclasses import dataclass
28from typing import List
29from typing import Optional
30from typing import Union
32import pandas as pd
34from .custom_logging import UtilsLogs # noqa: F401
35from .monitoring import UtilsMonitoring # noqa: F401
38class GWCatalogType:
39 """GW catalog implementation.
41 New implementations can be added as an attribute using the register
42 method of the GWCatalogs class.
43 """
45 @dataclass
46 class GWCatalogPlugin:
47 """Store information to load a plugin implementing the GW catalog.
49 The stored information is:
50 - the module name
51 - the class name
52 """
54 module_name: str
55 class_name: str
57 MBH = GWCatalogPlugin("lisacattools.plugins.mbh", "MbhCatalogs")
58 UCB = GWCatalogPlugin("lisacattools.plugins.ucb", "UcbCatalogs")
61class GWCatalog:
62 """Interface for handling a GW catalog."""
64 @classmethod
65 def __subclasshook__(cls, subclass):
66 return (
67 hasattr(subclass, "name")
68 and callable(subclass.name)
69 and hasattr(subclass, "location")
70 and callable(subclass.location)
71 and hasattr(subclass, "get_detections")
72 and callable(subclass.get_detections)
73 and hasattr(subclass, "get_attr_detections")
74 and callable(subclass.get_attr_detections)
75 and hasattr(subclass, "get_median_source")
76 and callable(subclass.get_median_source)
77 and hasattr(subclass, "get_source_samples")
78 and callable(subclass.get_source_samples)
79 and hasattr(subclass, "get_attr_source_samples")
80 and callable(subclass.get_source_samples)
81 and hasattr(subclass, "describe_source_samples")
82 and callable(subclass.describe_source_samples)
83 or NotImplemented
84 )
86 @property
87 @abstractmethod
88 def name(self) -> str:
89 """Returns the name of the GW catalog.
91 Raises:
92 NotImplementedError: Not implemented
94 Returns:
95 str: name of the GW catalog
96 """
97 raise NotImplementedError("Not implemented")
99 @property
100 @abstractmethod
101 def location(self) -> str:
102 """Returns the location of the GW catalog.
104 Raises:
105 NotImplementedError: Not implemented
107 Returns:
108 str: the location of the GW catalog
109 """
110 raise NotImplementedError("Not implemented")
112 @abstractmethod
113 def get_detections(
114 self, attr: Union[List[str], str] = None
115 ) -> Union[List[str], pd.DataFrame, pd.Series]:
116 """Returns the GW detections.
118 When no argument is provided, the name of each detection is returned.
119 When arguments are provided, each detection is returned with the
120 attributes.
122 Args:
123 attr (Union[List[str], str], optional): List of attributes or
124 single attribute. Defaults to None.
126 Raises:
127 NotImplementedError: Not implemented
129 Returns:
130 Union[List[str], pd.DataFrame, pd.Series]: the name of each
131 detection or the requested attributes of each detection
132 """
133 raise NotImplementedError("Not implemented")
135 @abstractmethod
136 def get_attr_detections(self) -> List[str]:
137 """Returns the attributes of the catalog.
139 Raises:
140 NotImplementedError: Not implemented
142 Returns:
143 List[str]: the list of attributes
144 """
145 raise NotImplementedError("Not implemented")
147 @abstractmethod
148 def get_median_source(self, attr: str) -> pd.DataFrame:
149 """Returns the source corresponding to the median of the specified
150 attribute.
152 Args:
153 attr (str): attribute name
155 Raises:
156 NotImplementedError: Not implemented
158 Returns:
159 pd.DataFrame: the source for which the median is computed on the
160 attribute
161 """
162 raise NotImplementedError("Not implemented")
164 @abstractmethod
165 def get_source_samples(
166 self, source_name: str, attr: List[str]
167 ) -> pd.DataFrame:
168 """Returns the posterior samples of the source
170 Args:
171 source_name (str): source name
172 attr (List[str]): the list of attributes to return in the result
174 Raises:
175 NotImplementedError: [description]
177 Returns:
178 pd.DataFrame: the posterior samples of the source
179 """
180 raise NotImplementedError("Not implemented")
182 @abstractmethod
183 def get_attr_source_samples(self, source_name: str) -> List[str]:
184 """Returns the attributes of the source posterior samples
186 Args:
187 source_name (str): source name
189 Raises:
190 NotImplementedError: Not implemented
192 Returns:
193 List[str]: the attributes
194 """
195 raise NotImplementedError("Not implemented")
197 @abstractmethod
198 def describe_source_samples(self, source_name: str) -> pd.DataFrame:
199 """Statistical summary of the source posterior samples
201 Args:
202 source_name (str): source name
204 Raises:
205 NotImplementedError: Not implemented
207 Returns:
208 pd.DataFrame: statistics
209 """
210 raise NotImplementedError("Not implemented")
213class GWCatalogs(ABC):
214 """Interface fo handling time-evolving GW catalogs"""
216 @classmethod
217 def __subclasshook__(cls, subclass):
218 return (
219 hasattr(subclass, "metadata")
220 and callable(subclass.metadata)
221 and hasattr(subclass, "count")
222 and callable(subclass.count)
223 and hasattr(subclass, "files")
224 and callable(subclass.files)
225 and hasattr(subclass, "get_catalogs_name")
226 and callable(subclass.get_catalogs_name)
227 and hasattr(subclass, "get_first_catalog")
228 and callable(subclass.get_first_catalog)
229 and hasattr(subclass, "get_last_catalog")
230 and callable(subclass.get_last_catalog)
231 and hasattr(subclass, "get_catalog")
232 and callable(subclass.get_catalog)
233 and hasattr(subclass, "get_catalog_by")
234 and callable(subclass.get_catalog_by)
235 and hasattr(subclass, "get_lineage")
236 and callable(subclass.get_lineage)
237 and hasattr(subclass, "get_lineage_data")
238 and callable(subclass.get_lineage_data)
239 or NotImplemented
240 )
242 @staticmethod
243 def register(type: str, nodule_name: str, class_name: str):
244 """Register a new implementation of GWCatalogs
246 Args:
247 type (str): name of the implementation
248 nodule_name (str): nodule name where the implementation is done
249 class_name (str): class name of the implementation
250 """
251 setattr(
252 GWCatalogType,
253 str(type),
254 GWCatalogType.GWCatalogPlugin(nodule_name, class_name),
255 )
257 @staticmethod
258 def create(
259 type: GWCatalogType.GWCatalogPlugin,
260 directory: str,
261 accepted_pattern: Optional[str] = None,
262 rejected_pattern: Optional[str] = None,
263 *args,
264 **kwargs
265 ) -> "GWCatalogs":
266 """Create a new object for handling a set of specific catalogs.
268 Catalogs are loaded according a set of filters : the accepted and
269 rejected pattern.
271 Note:
272 -----
273 The `extra_directories` parameter can be given as input in order to
274 load catalogs in other directories. a list of directories is expected
275 for `extra_directories` parameter. For example:
277 ```
278 GWCatalogs.create(
279 GWCatalogType.UCB,
280 "/tmp",
281 "*.h5",
282 "*chain*",
283 extra_direcories=[".", "./tutorial"]
284 )
285 ```
287 Args:
288 type (GWCatalogType.GWCatalogPlugin): Type of catalog
289 directory (str) : Directory where the data are located
290 accepted_pattern (str, optional) : pattern to select files in the
291 directory (e.g. '*.h5'). Default None
292 rejected_pattern (str, optional) : pattern to reject from the list
293 built using accepted_pattern. Default None
295 Returns:
296 GWCatalogs: the object implementing a set of specific catalogs
297 """
298 module = importlib.import_module(type.module_name)
299 my_class = getattr(module, type.class_name)
300 arguments = [directory]
301 if accepted_pattern is not None:
302 arguments.append(accepted_pattern)
303 if rejected_pattern is not None:
304 arguments.append(rejected_pattern)
305 return my_class(*arguments, *args, **kwargs)
307 @property
308 @abstractmethod
309 def metadata(self) -> pd.DataFrame:
310 """metadata.
312 :getter: Returns the metadata of the catalog set
313 :type: pd.DataFrame
314 """
315 raise NotImplementedError("Not implemented")
317 @property
318 @abstractmethod
319 def count(self) -> int:
320 """Count the number of catalogs in the catalog set.
322 :getter: Returns the number of catalogs in the catalog set
323 :type: int
324 """
325 raise NotImplementedError("Not implemented")
327 @property
328 @abstractmethod
329 def files(self) -> List[str]:
330 """Returns the list of files matching the accepted and rejected
331 pattern.
333 :getter: Returns the number of catalogs in the catalog set
334 :type: List[str]
335 """
336 raise NotImplementedError("Not implemented")
338 @abstractmethod
339 def get_catalogs_name(self) -> List[str]:
340 """Returns the name of each catalog included in the catalog set
342 Raises:
343 NotImplementedError: When the method is not implemented
345 Returns:
346 List[str]: name of each catalog
347 """
348 raise NotImplementedError("Not implemented")
350 @abstractmethod
351 def get_first_catalog(self) -> GWCatalog:
352 """Returns the first catalog from the catalog set
354 Raises:
355 NotImplementedError: When the method is not implemented
357 Returns:
358 GWCatalog: the fist catalog
359 """
360 raise NotImplementedError("Not implemented")
362 @abstractmethod
363 def get_last_catalog(self) -> GWCatalog:
364 """Returns the last catalog from the catalog set.
366 Raises:
367 NotImplementedError: When the method is not implemented
369 Returns:
370 GWCatalog: the last catalog
371 """
372 raise NotImplementedError("Not implemented")
374 @abstractmethod
375 def get_catalog(self, idx: int) -> GWCatalog:
376 """Returns a catalog based on its position in the catalog set
378 Args:
379 idx (int): position of the catalog (first idx: 0)
381 Raises:
382 NotImplementedError: When the method is not implemented
384 Returns:
385 GWCatalog: catalog
386 """
387 raise NotImplementedError("Not implemented")
389 @abstractmethod
390 def get_catalog_by(self, name: str) -> GWCatalog:
391 """Returns a catalog based on its name in the catalog set
393 Args:
394 name (str): name of the catalog
396 Raises:
397 NotImplementedError: When the method is not implemented
399 Returns:
400 GWCatalog: the catalog
401 """
402 raise NotImplementedError("Not implemented")
404 @abstractmethod
405 def get_lineage(self, cat_name: str, src_name: str) -> pd.DataFrame:
406 """Returns the history of a source (src_name: str) including metadata
407 and point estimates through a series of preceeding catalogs. Series
408 starts at current catalog (cat_name: str) and traces a
409 source's history back.
411 Args:
412 cat_name (str): catalog from which the lineage starts
413 src_name (str): particular source
415 Raises:
416 NotImplementedError: When the method is not implemented
418 Returns:
419 pd.DataFrame: history of a parituclar source including metadata
420 and point estimates through a series of preceeding catalogs.
421 """
422 raise NotImplementedError("Not implemented")
424 @abstractmethod
425 def get_lineage_data(self, lineage: pd.DataFrame) -> pd.DataFrame:
426 """Returns the posterior samples of a particular source at all
427 different epochs of obervation in the DataFrame returned by
428 get_lineage(). The samples are concatenated into a single DataFrame.
430 Args:
431 lineage (pd.DataFrame): time-dependent catalog for the evolution
432 of a particular source in a series of catalogs returned by
433 get_lineage()
435 Raises:
436 NotImplementedError: When the method is not implemented
438 Returns:
439 pd.DataFrame: posterior samples of a particular source at all
440 different epochs of obervation in the DataFrame returned by
441 get_lineage().
442 """
443 raise NotImplementedError("Not implemented")