#!/usr/bin/env python # coding: utf-8 # # Explore astropy get_constellation bugs and Table ASCII CDS format # # This notebook explores various code related to an astropy bug: [get\_constellation\(\) should use exact HMS, DMS boundaries, not 4\-decimal approximations · Issue \#9855 · astropy/astropy](https://github.com/astropy/astropy/issues/9855) # # Here we compare the original astropy get_constellation() code, vs that same code with a fix for the PrecessedGeocentric bug identified in that issue, and aso the fast [SkyField](https://rhodesmill.org/skyfield/) code which seems to work at full floating-point resolution. # # We do several million trials of picking a uniformly random point in the sky, and finding the constellation via each routine. The results do closely correspond to the relative area of each constellation in the sky, as shown. # The original roundoff issue identified in the bug leads to an error rate (assuming SkyField is always right) of 17 failures per million. But the improper use of PrecessedGeocentric leads to more like 311 failures per million, at least under the circumstances and timing in play here. # # The most common error from the rounded-off Roman data seen here lies in the four gaps along the long border between Pegasus and Andromeda, e.g. along the meridian between RA 0.0166666666 to 0.0167, from declination 22 to 28 degrees. # See the map at http://pbarbier.com/constellations/8N.pdf # # After that, we briefly explore Pierre Barbier's reproduction of Delporte's original standardized constellation boundaries. It could evidently be used as the basis for a fully corrected astropy function, but the approach used by SkyField is probably quite a bit faster and should be considered. # # Along the way, we see that astropy.table's support for the [CDS Standards for Astronomical Catalogues](http://cdsarc.u-strasbg.fr/submit/catstd/catstd-3.1.htx) is a pretty nice alternative to CSV format for data with units! See [Unified File Read/Write Interface — Astropy v5\.0\.4](https://docs.astropy.org/en/stable/io/unified.html#getting-started-with-table-i-o) # # Note, these are rough, in-progress notes - expect errors and messes.... # # TODO: # * Confirm that all the errors are being properly attributed by checking more examples. # * Figure out what is underneath the PrecessedGeocentric bug: how does it work? Is it affected by time of day or year? What does it affect most? What distance is assumed for the constellations? # * Compare the speeds of the two main algorithms here # In[811]: from datetime import datetime from astropy.table import Table from astropy.coordinates import Angle from astropy import units as u # In[5]: import glob # In[812]: import pandas as pd # # Fix PrecessedGeocentric bug in astropy # get_constellation should use FK5, not PrecessedGeocentric. Here I've just pulled the function out of the module and fixed it to work stand-alone, so we can run both the buggy and fixed versions at the same time. # In[ ]: constel_coord = coord.transform_to(PrecessedGeocentric(equinox='B1875')) => # In[ ]: compare_const(randomRaDec()) # In[656]: import astropy.coordinates.funcs as funcs # In[664]: compare_const(randomRaDec()) # In[663]: def fix_get_constellation(coord, short_name=False, constellation_list='iau'): """ Determines the constellation(s) a given coordinate object contains. Parameters ---------- coord : coordinate object The object to determine the constellation of. short_name : bool If True, the returned names are the IAU-sanctioned abbreviated names. Otherwise, full names for the constellations are used. constellation_list : str The set of constellations to use. Currently only ``'iau'`` is supported, meaning the 88 "modern" constellations endorsed by the IAU. Returns ------- constellation : str or string array If ``coords`` contains a scalar coordinate, returns the name of the constellation. If it is an array coordinate object, it returns an array of names. Notes ----- To determine which constellation a point on the sky is in, this precesses to B1875, and then uses the Delporte boundaries of the 88 modern constellations, as tabulated by `Roman 1987 `_. """ # TODO: drop this which allows it to work outside the module _constellation_data = funcs._constellation_data import warnings from astropy import _erfa as erfa if constellation_list != 'iau': raise ValueError("only 'iau' us currently supported for constellation_list") # read the data files and cache them if they haven't been already if not _constellation_data: cdata = data.get_pkg_data_contents('data/constellation_data_roman87.dat') ctable = ascii.read(cdata, names=['ral', 'rau', 'decl', 'name']) cnames = data.get_pkg_data_contents('data/constellation_names.dat', encoding='UTF8') cnames_short_to_long = dict([(l[:3], l[4:]) for l in cnames.split('\n') if not l.startswith('#')]) cnames_long = np.array([cnames_short_to_long[nm] for nm in ctable['name']]) _constellation_data['ctable'] = ctable _constellation_data['cnames_long'] = cnames_long else: ctable = _constellation_data['ctable'] cnames_long = _constellation_data['cnames_long'] isscalar = coord.isscalar # if it is geocentric, we reproduce the frame but with the 1875 equinox, # which is where the constellations are defined # this yields a "dubious year" warning because ERFA considers the year 1875 # "dubious", probably because UTC isn't well-defined then and precession # models aren't precisely calibrated back to then. But it's plenty # sufficient for constellations with warnings.catch_warnings(): warnings.simplefilter('ignore', erfa.ErfaWarning) # Wrong constel_coord = coord.transform_to(PrecessedGeocentric(equinox='B1875')) constel_coord = coord.transform_to(FK5(equinox='B1875')) if isscalar: rah = constel_coord.ra.ravel().hour decd = constel_coord.dec.ravel().deg else: rah = constel_coord.ra.hour decd = constel_coord.dec.deg constellidx = -np.ones(len(rah), dtype=int) notided = constellidx == -1 # should be all for i, row in enumerate(ctable): msk = (row['ral'] < rah) & (rah < row['rau']) & (decd > row['decl']) constellidx[notided & msk] = i notided = constellidx == -1 if np.sum(notided) == 0: break else: raise ValueError('Could not find constellation for coordinates {}'.format(constel_coord[notided])) if short_name: names = ctable['name'][constellidx] else: names = cnames_long[constellidx] if isscalar: return names[0] else: return names # # Compare skyfield binary search approach with Roman method # Use millions of random points in the sky. # In[651]: from astropy.coordinates import SkyCoord, FK5, get_constellation, PrecessedGeocentric # In[157]: import numpy as np import skyfield as sf from skyfield.api import position_of_radec, load_constellation_map # In[625]: from skyfield.api import load ts = load.timescale() # In[158]: constellation_at = load_constellation_map() north_pole = position_of_radec(0, 90) constellation_at(north_pole) # In[627]: B1875 = ts.J(1875) # In[554]: position_of_radec(10, 90).radec() # In[754]: def randomRaDec(): """Return a random point in the sky: (ra, dec) in hr, degrees Thanks to https://astronomy.stackexchange.com/a/22399/16634 """ ran1, ran2 = np.random.random(2) ra = 24.0 * ran1 dec = np.arcsin(2.*(ran2 - 0.5)) * 180. / np.pi return (ra, dec) # Quick check of a small sample - are the results reasonable? # In[815]: df = pd.DataFrame([randomRaDec() for _ in range(10000)]) # In[816]: df.describe() # In[817]: def fix_constl_astropy(radec): "Run the fixed version of get_constallations from astropy with given (ra, dec) in hours,degs" ra, dec = radec p = FK5(ra*u.hour, dec*u.deg, equinox='B1875') return fix_get_constellation(p, short_name=True) # In[581]: def constl_astropy(radec): ra, dec = radec p = FK5(ra*u.hour, dec*u.deg, equinox='B1875') return get_constellation(p, short_name=True) # In[634]: from collections import Counter # Save previous results if necessary via: # # oldcounts, oldtrials = counts, trials # In[789]: counts = Counter() # In[790]: trials = [] # In[690]: def compare_const(radec): ra, dec = radec c_ap = constl_astropy((ra, dec)) c_fap = fix_constl_astropy((ra, dec)) c_sf = constellation_at(position_of_radec(ra, dec, epoch=B1875)) return (c_fap == c_sf, c_ap, c_fap, c_sf, radec) # In[818]: len(trials) # In[801]: def sim(n): for i in range(n): res = compare_const(randomRaDec()) trials.append(res) counts[res[1]] += 1 if not res[0]: print(i, res) # ~ 14 min for 100k trials, or 4 m overnight # In[830]: print(datetime.now()) get_ipython().run_line_magic('time', 'sim(4900000)') # In[895]: def which_constellations(trials, wrong, right): "Pull out and identify the top wrong and right constellation names from a given selection of trials" cnts = Counter([t[wrong] for t in trials]) n = sum(cnts.values()) print(f'Top wrong ids: {cnts.most_common(5)} out of {n=}') cnts = Counter([t[right] for t in trials]) print(f'Top right ids: {cnts.most_common(5)} out of {n=}') cnts = Counter([f'{t[wrong]} => {t[right]}' for t in trials]) commons = "\n ".join([str(t) for t in cnts.most_common(10)]) print(f'Top fixed ids out of {n=}:\n {commons}') # In[861]: precessedGeocentric_bug = [t for t in trials if t[1]!=t[2]] round_bug = [t for t in trials if t[2]!=t[3]] # Note that, as indicated in the later analysis, there are several errors of Peg for And, and Cet for Aqr. # In[897]: which_constellations(round_bug, 2, 3) # In[896]: which_constellations(precessedGeocentric_bug, 1, 2) # In[901]: def stats(trials): "Run some stats on trials. Note t[1] is c_ap, 2: c_fap, 3: c_sf" cnts = Counter(trials) n = sum(counts.values()) precessedGeocentric_bug = [t for t in trials if t[1]!=t[2]] pgbugrate = len(precessedGeocentric_bug)/ n round_bug = [t for t in trials if t[2]!=t[3]] roundbugrate = len(round_bug)/ n both_bug = [t for t in trials if t[1]!=t[3]] bothbugrate = len(both_bug)/ n print(f'{n=}, {roundbugrate=:.4%}, {pgbugrate=:.4%}, {bothbugrate=:.4%}') # In[902]: stats(trials) # In[831]: sum(counts.values()) # In[832]: [t for t in trials if t[3]!=t[2] ] # In[833]: len(_) # In[834]: [t for t in trials if t[1]!=t[3] ] # In[835]: len(_) # In[836]: [t for t in trials if t[1]!=t[2] ] # In[837]: len(_) # Compare percentages with https://en.wikipedia.org/wiki/IAU_designated_constellations_by_area # In[838]: [(i, c / sum(counts.values()) * 100.0) for i, c in counts.most_common()] # # Explore SkyField data structures # In[140]: indexed_abbreviations = np.load('skyfield/indexed_abbreviations.npy') # In[154]: len(indexed_abbreviations) # In[141]: indexed_abbreviations # In[142]: radec_to_index = np.load('skyfield/radec_to_index.npy') # In[151]: len(radec_to_index) # In[143]: radec_to_index # In[145]: sorted_dec = np.load('skyfield/sorted_dec.npy') # In[152]: len(sorted_dec) # In[149]: sorted_dec # In[146]: sorted_ra = np.load('skyfield/sorted_ra.npy') # In[153]: len(sorted_ra) # In[150]: sorted_ra # # Parse pbarbier's versions of Delporte's standard # Note: we had to fix (and report) a bug which appeared this way: # # InconsistentTableError: Can't find table edges_18.txt in pbarbier.com/constellations/ReadMe # # since the original ReadMe used the keyword "files" rather than "file" in cases where two files were described by the same format: # # Changing the lines like this in the ReadMe: # # Byte-by-byte Description of files: lines_18.txt and lines_in_18.txt # # to use the singular "file:": # # Byte-by-byte Description of file: lines_18.txt and lines_in_18.txt # # fixes the problem. # # The plural form is accepted by the [anafile](https://vizier.u-strasbg.fr/doc/anafile.htx) code, but not by astropy tables. Pierre kindly adjusted his ReadMe to fit the stricter standard, but it might also make sense for astropy to accept the looser definition. # In[7]: for f in glob.glob("pbarbier.com/constellations/*.txt"): try: t = Table.read(f, readme="pbarbier.com/constellations/ReadMe", format="ascii.cds") print("Succeed on", f) except Exception as e: print("Fail,", e) # # Look at gaps caused by rounding off angles, assess their sizes crudely # # FIXME: This is a very crude analysis, ignoring the fact that gaps in RA (measured in hours) are much bigger than gaps in declination, and that distances in RA vary from the equator to the poles. It also sometimes takes arcs the long way around, e.g. from RA 0 to 23.75 rather than 23.75 to 24.00. # # Note that we modify the description of the table so that it is more convenient to parse the angles in the sexagesimal format they are recorded in, rather than the format forced by CDS which requires parsing out the hours, degrees, minutes and seconds separately. If we use this data in astropy, it would probably make sense to define a custom unit to handle all that automatically. # # ``` # Byte-by-byte Description of file: edges_18.txt and bound_edges_18.txt # -------------------------------------------------------------------------------- # Bytes Format Units Label Explanations # -------------------------------------------------------------------------------- # 1- 3 A3 --- Key1 Key of 1st vertex # 4 A1 --- D1 [:] delimiter # 5- 7 A3 --- Key2 Key of 2nd vertex # 9 A1 --- Type Edge type: [M]eridian or [P]arallel # 10 A1 --- Dir Edge direction: + increasing or - decreasing # 12- 20 A9 ra RA1 Right ascension B1875 of 1st vertex # 21- 29 A9 dec DE1 Declination B1875 (degrees) of 1st vertex # 31- 38 A8 ra RA2 Right ascension B1875 of 2nd vertex # 40- 48 A9 dec DE2 Declination B1875 of 2nd vertex # 50- 57 A8 --- Cons Constellation abbreviations # -------------------------------------------------------------------------------- # ``` # # In[15]: t = Table.read("pbarbier.com/constellations/edges_18.txt", readme="pbarbier.com/constellations/ReadMe", format="ascii.cds") # In[21]: t.info # In[27]: t # In[19]: t.meta # In[917]: # FIXME: return spherical area, not rectangular # Note current output is in degree-hours def gaps(t): "For each constellation boundary, return the area of gap between edge of constellation and rounded off edge" for i, r in enumerate(t): ra1 = Angle(r['RA1'], unit=u.hour).hour ra2 = Angle(r['RA2'], unit=u.hour).hour dec1 = Angle(r['DE1'], unit=u.deg).deg dec2 = Angle(r['DE2'], unit=u.deg).deg if ra1 == ra2: delta = ra2 - ra2.round(4) if delta != 0: yield ((dec2 - dec1) * delta, "RA", i, ra1, ra2, dec1, dec2, r['Cons']) if dec1 == dec2: delta = dec2 - dec2.round(4) if delta != 0: if ra2 - ra1 >= 12. or ra2 - ra1 <= -12.: print("wrong arc?", r) yield ((ra2 - ra1) * delta, "Dec", i, ra1, ra2, dec1, dec2, r['Cons']) # print(r, type(r)) # In[918]: gs = list(gaps(t)) # In[115]: ra2 = gs[0][3] # In[116]: ra2 # In[117]: ra2.round(4) # In[118]: (ra2.round(4) - ra2) * 18 # In[849]: gs[:10] # In[121]: len(g) # In[129]: len(t) # Note that, as seen in the earlier analysis, some of the big gaps are Peg for And, and Cet for Aqr. # But note that this size estimate is way off: treating the length of the parallel as 23.75 hours, not 0.25 hours... I doubt that that affects the actual astropy calculations, and it seem unrelated to the large number of errors along the border between PEG and AND. That is simply a long border with several gaps. # In[122]: min(gs) # In[908]: .0007916666 / .0000333333 # In[905]: .000033333 * .25*15 # In[123]: max(gs) # In[904]: [g for g in gs if g[7] == "AND PEG"] # In[903]: [g for g in gs if g[7] == "PEG AND"] # In[913]: gsorted = sorted([g[0] for g in gs]) # In[914]: gsorted # In[135]: sum(abs(g[0]) for g in gs) # In[137]: _ / 41252 # In[130]: 180*24 # In[133]: 41252/15 # In[138]: 4320/2750 # 41252 real square degrees # # In[126]: sum(abs(g[0]) for g in gs if g[1] == "RA") # In[127]: sum(abs(g[0]) for g in gs if g[1] == "Dec") # In[73]: sum(abs(g[0]) for g in gs) # In[49]: ra1 # In[42]: ra1.deg - ra1.deg.round(4) # In[41]: ra1.deg.round(4) # In[45]: de1 # In[46]: type(de1) # In[31]: de1.round(3) # In[38]: de1.round(6) # Original ReadMe format # In[18]: t.show_in_browser(jsviewer=True) # In[41]: t.columns # In[43]: for r in t: print(r) # In[46]: r.keys() # In[48]: r['RA1_h'] # In[35]: t # In[12]: t = Table.read("pbarbier.com/constellations/edges_18.txt", readme="pbarbier.com/constellations/ReadMe", format="ascii.cds") # # Load Example Table over the web # In[2]: t = Table.read("ftp://cdsarc.u-strasbg.fr/pub/cats/VII/253/snrs.dat", readme="ftp://cdsarc.u-strasbg.fr/pub/cats/VII/253/ReadMe", format="ascii.cds") # In[3]: t # In[4]: type(t)