Coverage for lisacattools/catalog.py: 78%

98 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-02-06 17:36 +0000

1# -*- coding: utf-8 -*- 

2# Copyright (C) 2021 - James I. Thorpe, Tyson B. Littenberg, Jean-Christophe 

3# Malapert 

4# 

5# This file is part of lisacattools. 

6# 

7# lisacattools is free software: you can redistribute it and/or modify 

8# it under the terms of the GNU General Public License as published by 

9# the Free Software Foundation, either version 3 of the License, or 

10# (at your option) any later version. 

11# 

12# lisacattools is distributed in the hope that it will be useful, 

13# but WITHOUT ANY WARRANTY; without even the implied warranty of 

14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

15# GNU General Public License for more details. 

16# 

17# You should have received a copy of the GNU General Public License 

18# along with lisacattools. If not, see <https://www.gnu.org/licenses/>. 

19"""This module is the interface for gravitational wave source catalogs. It is 

20responsible for : 

21 - registering new catalog implementations as plugins 

22 - loading detections and source posterior samples 

23""" 

24import importlib 

25from abc import ABC 

26from abc import abstractmethod 

27from dataclasses import dataclass 

28from typing import List 

29from typing import Optional 

30from typing import Union 

31 

32import pandas as pd 

33 

34from .custom_logging import UtilsLogs # noqa: F401 

35from .monitoring import UtilsMonitoring # noqa: F401 

36 

37 

38class GWCatalogType: 

39 """GW catalog implementation. 

40 

41 New implementations can be added as an attribute using the register 

42 method of the GWCatalogs class. 

43 """ 

44 

45 @dataclass 

46 class GWCatalogPlugin: 

47 """Store information to load a plugin implementing the GW catalog. 

48 

49 The stored information is: 

50 - the module name 

51 - the class name 

52 """ 

53 

54 module_name: str 

55 class_name: str 

56 

57 MBH = GWCatalogPlugin("lisacattools.plugins.mbh", "MbhCatalogs") 

58 UCB = GWCatalogPlugin("lisacattools.plugins.ucb", "UcbCatalogs") 

59 

60 

61class GWCatalog: 

62 """Interface for handling a GW catalog.""" 

63 

64 @classmethod 

65 def __subclasshook__(cls, subclass): 

66 return ( 

67 hasattr(subclass, "name") 

68 and callable(subclass.name) 

69 and hasattr(subclass, "location") 

70 and callable(subclass.location) 

71 and hasattr(subclass, "get_detections") 

72 and callable(subclass.get_detections) 

73 and hasattr(subclass, "get_attr_detections") 

74 and callable(subclass.get_attr_detections) 

75 and hasattr(subclass, "get_median_source") 

76 and callable(subclass.get_median_source) 

77 and hasattr(subclass, "get_source_samples") 

78 and callable(subclass.get_source_samples) 

79 and hasattr(subclass, "get_attr_source_samples") 

80 and callable(subclass.get_source_samples) 

81 and hasattr(subclass, "describe_source_samples") 

82 and callable(subclass.describe_source_samples) 

83 or NotImplemented 

84 ) 

85 

86 @property 

87 @abstractmethod 

88 def name(self) -> str: 

89 """Returns the name of the GW catalog. 

90 

91 Raises: 

92 NotImplementedError: Not implemented 

93 

94 Returns: 

95 str: name of the GW catalog 

96 """ 

97 raise NotImplementedError("Not implemented") 

98 

99 @property 

100 @abstractmethod 

101 def location(self) -> str: 

102 """Returns the location of the GW catalog. 

103 

104 Raises: 

105 NotImplementedError: Not implemented 

106 

107 Returns: 

108 str: the location of the GW catalog 

109 """ 

110 raise NotImplementedError("Not implemented") 

111 

112 @abstractmethod 

113 def get_detections( 

114 self, attr: Union[List[str], str] = None 

115 ) -> Union[List[str], pd.DataFrame, pd.Series]: 

116 """Returns the GW detections. 

117 

118 When no argument is provided, the name of each detection is returned. 

119 When arguments are provided, each detection is returned with the 

120 attributes. 

121 

122 Args: 

123 attr (Union[List[str], str], optional): List of attributes or 

124 single attribute. Defaults to None. 

125 

126 Raises: 

127 NotImplementedError: Not implemented 

128 

129 Returns: 

130 Union[List[str], pd.DataFrame, pd.Series]: the name of each 

131 detection or the requested attributes of each detection 

132 """ 

133 raise NotImplementedError("Not implemented") 

134 

135 @abstractmethod 

136 def get_attr_detections(self) -> List[str]: 

137 """Returns the attributes of the catalog. 

138 

139 Raises: 

140 NotImplementedError: Not implemented 

141 

142 Returns: 

143 List[str]: the list of attributes 

144 """ 

145 raise NotImplementedError("Not implemented") 

146 

147 @abstractmethod 

148 def get_median_source(self, attr: str) -> pd.DataFrame: 

149 """Returns the source corresponding to the median of the specified 

150 attribute. 

151 

152 Args: 

153 attr (str): attribute name 

154 

155 Raises: 

156 NotImplementedError: Not implemented 

157 

158 Returns: 

159 pd.DataFrame: the source for which the median is computed on the 

160 attribute 

161 """ 

162 raise NotImplementedError("Not implemented") 

163 

164 @abstractmethod 

165 def get_source_samples( 

166 self, source_name: str, attr: List[str] 

167 ) -> pd.DataFrame: 

168 """Returns the posterior samples of the source 

169 

170 Args: 

171 source_name (str): source name 

172 attr (List[str]): the list of attributes to return in the result 

173 

174 Raises: 

175 NotImplementedError: [description] 

176 

177 Returns: 

178 pd.DataFrame: the posterior samples of the source 

179 """ 

180 raise NotImplementedError("Not implemented") 

181 

182 @abstractmethod 

183 def get_attr_source_samples(self, source_name: str) -> List[str]: 

184 """Returns the attributes of the source posterior samples 

185 

186 Args: 

187 source_name (str): source name 

188 

189 Raises: 

190 NotImplementedError: Not implemented 

191 

192 Returns: 

193 List[str]: the attributes 

194 """ 

195 raise NotImplementedError("Not implemented") 

196 

197 @abstractmethod 

198 def describe_source_samples(self, source_name: str) -> pd.DataFrame: 

199 """Statistical summary of the source posterior samples 

200 

201 Args: 

202 source_name (str): source name 

203 

204 Raises: 

205 NotImplementedError: Not implemented 

206 

207 Returns: 

208 pd.DataFrame: statistics 

209 """ 

210 raise NotImplementedError("Not implemented") 

211 

212 

213class GWCatalogs(ABC): 

214 """Interface fo handling time-evolving GW catalogs""" 

215 

216 @classmethod 

217 def __subclasshook__(cls, subclass): 

218 return ( 

219 hasattr(subclass, "metadata") 

220 and callable(subclass.metadata) 

221 and hasattr(subclass, "count") 

222 and callable(subclass.count) 

223 and hasattr(subclass, "files") 

224 and callable(subclass.files) 

225 and hasattr(subclass, "get_catalogs_name") 

226 and callable(subclass.get_catalogs_name) 

227 and hasattr(subclass, "get_first_catalog") 

228 and callable(subclass.get_first_catalog) 

229 and hasattr(subclass, "get_last_catalog") 

230 and callable(subclass.get_last_catalog) 

231 and hasattr(subclass, "get_catalog") 

232 and callable(subclass.get_catalog) 

233 and hasattr(subclass, "get_catalog_by") 

234 and callable(subclass.get_catalog_by) 

235 and hasattr(subclass, "get_lineage") 

236 and callable(subclass.get_lineage) 

237 and hasattr(subclass, "get_lineage_data") 

238 and callable(subclass.get_lineage_data) 

239 or NotImplemented 

240 ) 

241 

242 @staticmethod 

243 def register(type: str, nodule_name: str, class_name: str): 

244 """Register a new implementation of GWCatalogs 

245 

246 Args: 

247 type (str): name of the implementation 

248 nodule_name (str): nodule name where the implementation is done 

249 class_name (str): class name of the implementation 

250 """ 

251 setattr( 

252 GWCatalogType, 

253 str(type), 

254 GWCatalogType.GWCatalogPlugin(nodule_name, class_name), 

255 ) 

256 

257 @staticmethod 

258 def create( 

259 type: GWCatalogType.GWCatalogPlugin, 

260 directory: str, 

261 accepted_pattern: Optional[str] = None, 

262 rejected_pattern: Optional[str] = None, 

263 *args, 

264 **kwargs 

265 ) -> "GWCatalogs": 

266 """Create a new object for handling a set of specific catalogs. 

267 

268 Catalogs are loaded according a set of filters : the accepted and 

269 rejected pattern. 

270 

271 Note: 

272 ----- 

273 The `extra_directories` parameter can be given as input in order to 

274 load catalogs in other directories. a list of directories is expected 

275 for `extra_directories` parameter. For example: 

276 

277 ``` 

278 GWCatalogs.create( 

279 GWCatalogType.UCB, 

280 "/tmp", 

281 "*.h5", 

282 "*chain*", 

283 extra_direcories=[".", "./tutorial"] 

284 ) 

285 ``` 

286 

287 Args: 

288 type (GWCatalogType.GWCatalogPlugin): Type of catalog 

289 directory (str) : Directory where the data are located 

290 accepted_pattern (str, optional) : pattern to select files in the 

291 directory (e.g. '*.h5'). Default None 

292 rejected_pattern (str, optional) : pattern to reject from the list 

293 built using accepted_pattern. Default None 

294 

295 Returns: 

296 GWCatalogs: the object implementing a set of specific catalogs 

297 """ 

298 module = importlib.import_module(type.module_name) 

299 my_class = getattr(module, type.class_name) 

300 arguments = [directory] 

301 if accepted_pattern is not None: 

302 arguments.append(accepted_pattern) 

303 if rejected_pattern is not None: 

304 arguments.append(rejected_pattern) 

305 return my_class(*arguments, *args, **kwargs) 

306 

307 @property 

308 @abstractmethod 

309 def metadata(self) -> pd.DataFrame: 

310 """metadata. 

311 

312 :getter: Returns the metadata of the catalog set 

313 :type: pd.DataFrame 

314 """ 

315 raise NotImplementedError("Not implemented") 

316 

317 @property 

318 @abstractmethod 

319 def count(self) -> int: 

320 """Count the number of catalogs in the catalog set. 

321 

322 :getter: Returns the number of catalogs in the catalog set 

323 :type: int 

324 """ 

325 raise NotImplementedError("Not implemented") 

326 

327 @property 

328 @abstractmethod 

329 def files(self) -> List[str]: 

330 """Returns the list of files matching the accepted and rejected 

331 pattern. 

332 

333 :getter: Returns the number of catalogs in the catalog set 

334 :type: List[str] 

335 """ 

336 raise NotImplementedError("Not implemented") 

337 

338 @abstractmethod 

339 def get_catalogs_name(self) -> List[str]: 

340 """Returns the name of each catalog included in the catalog set 

341 

342 Raises: 

343 NotImplementedError: When the method is not implemented 

344 

345 Returns: 

346 List[str]: name of each catalog 

347 """ 

348 raise NotImplementedError("Not implemented") 

349 

350 @abstractmethod 

351 def get_first_catalog(self) -> GWCatalog: 

352 """Returns the first catalog from the catalog set 

353 

354 Raises: 

355 NotImplementedError: When the method is not implemented 

356 

357 Returns: 

358 GWCatalog: the fist catalog 

359 """ 

360 raise NotImplementedError("Not implemented") 

361 

362 @abstractmethod 

363 def get_last_catalog(self) -> GWCatalog: 

364 """Returns the last catalog from the catalog set. 

365 

366 Raises: 

367 NotImplementedError: When the method is not implemented 

368 

369 Returns: 

370 GWCatalog: the last catalog 

371 """ 

372 raise NotImplementedError("Not implemented") 

373 

374 @abstractmethod 

375 def get_catalog(self, idx: int) -> GWCatalog: 

376 """Returns a catalog based on its position in the catalog set 

377 

378 Args: 

379 idx (int): position of the catalog (first idx: 0) 

380 

381 Raises: 

382 NotImplementedError: When the method is not implemented 

383 

384 Returns: 

385 GWCatalog: catalog 

386 """ 

387 raise NotImplementedError("Not implemented") 

388 

389 @abstractmethod 

390 def get_catalog_by(self, name: str) -> GWCatalog: 

391 """Returns a catalog based on its name in the catalog set 

392 

393 Args: 

394 name (str): name of the catalog 

395 

396 Raises: 

397 NotImplementedError: When the method is not implemented 

398 

399 Returns: 

400 GWCatalog: the catalog 

401 """ 

402 raise NotImplementedError("Not implemented") 

403 

404 @abstractmethod 

405 def get_lineage(self, cat_name: str, src_name: str) -> pd.DataFrame: 

406 """Returns the history of a source (src_name: str) including metadata 

407 and point estimates through a series of preceeding catalogs. Series 

408 starts at current catalog (cat_name: str) and traces a 

409 source's history back. 

410 

411 Args: 

412 cat_name (str): catalog from which the lineage starts 

413 src_name (str): particular source 

414 

415 Raises: 

416 NotImplementedError: When the method is not implemented 

417 

418 Returns: 

419 pd.DataFrame: history of a parituclar source including metadata 

420 and point estimates through a series of preceeding catalogs. 

421 """ 

422 raise NotImplementedError("Not implemented") 

423 

424 @abstractmethod 

425 def get_lineage_data(self, lineage: pd.DataFrame) -> pd.DataFrame: 

426 """Returns the posterior samples of a particular source at all 

427 different epochs of obervation in the DataFrame returned by 

428 get_lineage(). The samples are concatenated into a single DataFrame. 

429 

430 Args: 

431 lineage (pd.DataFrame): time-dependent catalog for the evolution 

432 of a particular source in a series of catalogs returned by 

433 get_lineage() 

434 

435 Raises: 

436 NotImplementedError: When the method is not implemented 

437 

438 Returns: 

439 pd.DataFrame: posterior samples of a particular source at all 

440 different epochs of obervation in the DataFrame returned by 

441 get_lineage(). 

442 """ 

443 raise NotImplementedError("Not implemented")