#!/usr/bin/env python # coding: utf-8 # # Asteroid Detection Module # # **Lecturer:** Quanzhi Ye
# **Jupyter Notebook Authors:** Quanzhi Ye, Ashish Mahabal, Dmitry Duev, & Cameron Hummels # # This is a Jupyter notebook lesson taken from the GROWTH Summer School 2019. For other lessons and their accompanying lectures, please see: http://growth.caltech.edu/growth-astro-school-2019-resources.html # # ## Objective # Compute the detection rate of fast-moving asteroids, asteroids that pass within ~20 lunar distances from Earth, and leave trails/streaks on astronomical images. # # ## Key steps # - Figure out which known asteroids will show up as streaks in ZTF images, and # - Examine the streak catalog to see how many of them are detected. # - [Optional] Run DeepStreaks, is a convolutional-neural-network, deep-learning system designed to efficiently identify streaking fast-moving near-Earth objects detected in the ZTF data # # ## Required dependencies # # See GROWTH school webpage for detailed instructions on how to install these modules and packages. Nominally, you should be able to install the python modules with `pip install `. The external astromatic packages are easiest installed using package managers (e.g., `rpm`, `apt-get`). # # ### Python modules # * python 3 # * astropy # * numpy # * pandas # * astroquery # * matplotlib # * pillow # # ### External packages # None # In[1]: import requests import numpy as np import pandas as pd import glob import matplotlib.pyplot as plt import os import tarfile from astroquery.jplhorizons import Horizons import datetime from astropy.time import Time from IPython.display import Image as Image_jupyter from IPython.display import display import pdb import warnings # If you are missing dependencies, pip-install them from this notebook: # In[2]: # !pip install astroquery # !pip install pillow # ## Step 1: fast-moving asteroids that fall on ZTF images # # We will loop over the catalog of known NEAs (Near-Earth Asteroids) and make a series of queries to JPL's Horizon service and IPAC's IRSA service, to determine if and when they will be visible at Palomar (where ZTF runs from). # Let's download the catalog of known NEAs from the Minor Planet Center (MPC). # In[3]: # !wget -O data/NEA.txt https://www.minorplanetcenter.net/iau/MPCORB/NEA.txt get_ipython().system('ls -lhtr data') # In[4]: path_data = 'data' # All relevant data are in data directory path_nea = os.path.join(path_data, 'NEA.txt') print('retrieving MPCORB...') with open(path_nea, 'r') as f: mpcorb = f.read() mpcorb = mpcorb.split("\n") print('MPCORB file retrieved.') # Let's convert the catalog into a `pandas` DataFrame for convenience. The format of the MPC catalog is described [here](https://www.minorplanetcenter.net/iau/info/MPOrbitFormat.html), it's best for machines but is also somewhat human readable. # In[5]: df = pd.read_fwf(path_nea, colspecs=[(0, 7), (8, 13), (14, 19), (20, 25), (26, 35), (37, 46), (48, 57), (59, 68), (70, 79), (80, 91), (92, 103), (105, 106), (107, 116), (117, 122), (123, 126), (127, 131), (131, 132), (132, 136), (137, 141), (142, 145), (146, 149), (150, 160), (161, 165), (166, 194), (194, 202)], names=['num', 'H', 'G', 'epoch', 'n', 'omega', 'Omega', 'i', 'e', 'n_daily', 'a', 'U', 'reference', 'num_obs', 'num_opp', 'date_1', 'date_2', 'date_3', 'rms_residual', 'coarse_ind_per', 'presice_ind_per', 'computer_name', 'hex_flags', 'designation', 'date_last_obs']) # display first and last five entries display(df.head()) display(df.tail()) # Now let us loop over the MPC catalog to check the visibility for every NEA. We will do it for NEA 2018 CL (the first NEA found by ZTF), and it's up to you to write a loop for it. # # First, we create an instance for the Horizons query. We only need to tell Horizons the name of the object we want to query, and the time window we want to query, and let Horizons do the heavy-lifting. Oh, and the three-letter-code for ZTF is `I41`, as described [here](https://www.minorplanetcenter.net/iau/lists/ObsCodesF.html). If the query returns zero entries, it means that the object is not observable at Palomar at the suggested time (interval). # In[6]: ZTF_code = 'I41' # First, let us define a time window we would like to query for. Our default is 20180205, the day ZTF science observations started: # In[7]: start_date = '20180205' end_date = '20180206' # Then let's do all the tasks described above. For now, we do our test with 2018 CL (the first NEA found by ZTF). # In[8]: # display db entry for 2018 CL: display(df[df.designation == '2018 CL']) obj_name = '2018 CL' # In[9]: obj = Horizons(id=obj_name, location=ZTF_code, \ epochs={'start': '{}-{}-{}'.format(start_date[0:4], start_date[4:6], start_date[6:8]), \ 'stop': '{}-{}-{}'.format(end_date[0:4], end_date[4:6], end_date[6:8]), \ 'step': '1h'}) eph = obj.ephemerides(skip_daylight=True, airmass_lessthan=5.0) if len(eph) <= 0: print('object {} was not observable at Palomar'.format(obj_name)) # We also want to calculate the motion of the asteroid to see if it will show up as a streak in our images. # In[10]: rate = np.sqrt(eph['RA_rate']**2+eph['DEC_rate']**2) # Now, since we just discovered the power of JPL Horizons... why don't we take a short detour and do something fun? Let's say, we want to see the predicted trajectory of 2018 CL... # In[11]: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(eph['RA'], eph['DEC'], 'o-') ax.set_title('trajectory of 2018 CL on from %s to %s' % (start_date, end_date)) ax.set_xlabel('RA (deg)') ax.set_ylabel('DEC (deg)') for i, xy in enumerate(zip(eph['RA'], eph['DEC'])): ax.annotate('%s' % eph['datetime_str'][i], xy=xy, textcoords='data') plt.show() # What about the change of azimuth and elevation angle? # In[12]: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(eph['AZ'], eph['EL'], 'o-') ax.set_title('azimuth/elevation angle of 2018 CL on from %s to %s' % (start_date, end_date)) ax.set_xlabel('Azimuth (deg)') ax.set_ylabel('Elevation (deg)') for i, xy in enumerate(zip(eph['AZ'], eph['EL'])): ax.annotate('%s' % eph['datetime_str'][i], xy=xy, textcoords='data') plt.show() # The complete lists of the values supported by `astroquery.jplhorizons` can be found [here](https://astroquery.readthedocs.io/en/latest/api/astroquery.jplhorizons.HorizonsClass.html#astroquery.jplhorizons.HorizonsClass.ephemerides). Play with it and be creative! For example, can you plot the change of RA/DEC rate over time? And what about the change of predicted V magnitude? # Now let's come back to our main journey. How fast does the asteroid need to be in order to show up as a streak? Let's take anything longer than 5 full-width-half-maximum (FWHM) as a streak. We know the exposure time of ZTF is 30 seconds, and the typical FWHM is 2''. Therefore, the minimum rate of motion is not too difficult to calculate: # In[13]: min_rate = 2*5/30 print('minimum rate of motion for a streak: {:.2f} arcsec/sec.'.format(min_rate)) # If the object will be a fast-mover at some point within the time window being queried, we will use IPAC's Moving Object Search Tool (MOST) to get a list of the images that the object is a fast-mover on them. # In[14]: if max(rate) > min_rate: params = {'catalog': 'ztf', 'input_type': 'name_input', 'obj_name': '{}'.format(obj_name), \ 'obs_begin': '{}-{}-{}'.format(start_date[0:4], start_date[4:6], start_date[6:8]), \ 'obs_end': '{}-{}-{}'.format(end_date[0:4], end_date[4:6], end_date[6:8]), \ 'output_mode': 'Brief'} irsa_return = requests.get('https://irsa.ipac.caltech.edu/cgi-bin/MOST/nph-most', params=params) # ...and MOST did give us something, right? # In[15]: from io import StringIO r = irsa_return.text.split("\n") r = [' '.join(rr.split())[0:] for rr in r] data_str = '\n'.join(r[20:]) # display(data_str) column_names = [cn.strip() for cn in r[18].split('|')[1:-1]] # display(column_names) dff = pd.read_csv(StringIO(data_str), sep=' ', parse_dates=[[22, 23]], header=None) column_names.remove('obsdate') dff.columns = ['obsdate'] + column_names dff # Here is a loop over the query result from MOST to find out which images will contain the target as a fast-mover. Note that we want the predicted magnitude of the object to be <20 since that's the typical depth of ZTF. # In[16]: limiting_magnitude = 20 # We also make another query to Horizons to ensure the object is really a fast-mover on these dates. We do this as a double-check since Horizons produces the most precise ephemerides for NEAs. Horizons also tells us what the expected positional uncertainty will be. Obviously, we don't want the positional uncertainty to be too big -- say, over 20". # In[17]: max_unc = 20 # And then we print out all the metadata that are potentially useful. We will just return the first entry (or it will be a very long entry!) # In[18]: w = dff.vmag < limiting_magnitude for index, row in dff.loc[w].iterrows(): obj_this = Horizons(id=obj_name, location=ZTF_code, epochs=row.obsjd) eph = obj_this.ephemerides() ztf_entry_rate = np.sqrt(eph['RA_rate'][0]**2 + eph['DEC_rate'][0]**2) if ztf_entry_rate > min_rate: ztf_entry_unc = np.sqrt(eph['RA_3sigma'][0]**2 + eph['DEC_3sigma'][0]**2) if ztf_entry_unc < max_unc: print('hit: object {}'.format(obj_name)) print('object name:', obj_name) print('image date (UT):', row.obsdate) print('fracday:', row.filefracday) print('filter code:', row.filtercode) print('distance to the Sun:', row.sun_dist, 'AU') print('distance to the Earth:', row.geo_dist, 'AU') print('predicted V magnitude:', row.vmag) print('ecliptic latitude:', eph['ObsEclLat'][0]) print('positional uncertainty: {:.2f} arcsec'.format(ztf_entry_unc)) break # Voila! You made it. Note that it takes quite a bit of time (a day or two) to loop over the entire NEA catalog -- this is because Horizons has to calculate the ephemerides for each of these ~15,000 NEAs and this is slow! Therefore, we provide a pre-generated result file for you to proceed to step 2. # But do take a few known asteroids and see what you get for them. # ## Step 2: compare the catalog derived from step 1 to the source catalog generated by ZTF streak detection pipeline # Let's read in the result of step 1... (OK, we cheated, it is pre-generated) # In[19]: path_fmo = os.path.join(path_data, 'check_fmo.txt') check_fmo = np.genfromtxt(path_fmo, dtype=None, \ delimiter=(12, 10, 6, 10, 10, 10, 5, 10, 10, 10), \ encoding='utf-8', names=["object", "dateUT", "filter", "rate", "sundist", \ "geodist", "vmag", "ecllat", "unc", "fracday"])[:-1] # convert to pandas dataframe df_check_fmo = pd.DataFrame(check_fmo) # comb object names: df_check_fmo['object'] = df_check_fmo['object'].str.strip() # compute jd's: df_check_fmo['jd'] = df_check_fmo['dateUT'].apply(lambda x: Time(datetime.datetime.strptime(str(x), '%Y%m%d')).jd) df_check_fmo['jd'] = df_check_fmo['jd'] + df_check_fmo['fracday']/1000000 # And also we read the source files from ZTF... essentially, these are a large number of "streak_qa" files, each file contains candidate streaks in one image. Apparently, we want to loop over them and see if there is any candidate that coincides the position of a known streaking NEA. We also record the rate and mag of detections and non-detections for diagnostic purposes. # # For a demonstration, let us pick one of them to show how this whole thing works. Let us start with our old friend, `2018 CL`. # In[20]: # for cfi in check_fmo: # if cfi[0].strip() == '2018 CL': # break ww = df_check_fmo.object == '2018 CL' # df_check_fmo.loc[ww, 'jd'].values df_check_fmo.loc[ww] # What are the uncertainty ranges (in arcsec) for these entries? # In[21]: df_check_fmo.loc[ww, 'unc'].values # This is very precise! Well, we know it should be small because the orbit from JPL was calculated using ZTF observations :) # # Now, let us obtain the predicted coordinates for `2018 CL` at these epochs using what we have just learned in Step 1. # In[22]: expTimeJD = df_check_fmo.loc[ww, 'jd'].values expTimeJD # In[23]: obj_this = Horizons(id=obj_name, location=ZTF_code, epochs=expTimeJD) eph = obj_this.ephemerides() eph # A simplistic approach is to loop through each small folder generated by findStreaks and see if there is any candidate matching the predicted RA and DEC: # In[24]: detectionRadius = 5/3600 # In[25]: path_streaks = os.path.join(path_data, 'run_merged', 'ztf*') d = sorted(glob.glob(path_streaks)) # display(d) # let's look at the first epoch: targetRA = eph['RA'][0] targetDec = eph['DEC'][0] for di in d: fileName = os.path.basename(di) streaksQA_path = '{}/{}_streaksqa.txt'.format(di, fileName) if os.stat(streaksQA_path).st_size > 500: try: streaksQA = np.loadtxt(streaksQA_path, delimiter=',', comments='\\') for streaksQA_item in streaksQA: if (abs(streaksQA_item[1]-targetRA) < detectionRadius and \ abs(streaksQA_item[2]-targetDec) < detectionRadius) or \ (abs(streaksQA_item[3]-targetRA) < detectionRadius and \ abs(streaksQA_item[4]-targetDec) < detectionRadius): print('YES! Streak found in', streaksQA_path) except: continue # Wonderful! We have a hit! We can even take a look at the cutout to see what it looks like: # In[26]: for di in d: fileName = os.path.basename(di) streaksQA_path = '{}/{}_streaksqa.txt'.format(di, fileName) if os.stat(streaksQA_path).st_size > 500: try: streaksQA = np.loadtxt(streaksQA_path, delimiter=',', comments='\\') for streaksQA_item in streaksQA: if (abs(streaksQA_item[1]-targetRA) < detectionRadius and \ abs(streaksQA_item[2]-targetDec) < detectionRadius) or \ (abs(streaksQA_item[3]-targetRA) < detectionRadius and \ abs(streaksQA_item[4]-targetDec) < detectionRadius): folder_name = di.split('/')[-1] print(folder_name) path_jpgs = os.path.join(path_data, 'run_merged', folder_name, folder_name + '_cutouts', '*jpg') jpgs = glob.glob(path_jpgs) print(jpgs[0]) display(Image_jupyter(filename = jpgs[0], width=157, height=157)) except: continue # Voila! Now, homework for you: can you use the example shown above to work out the detection efficiency of ZTF? # ## Step 3: run a light version of DeepStreaks [optional] # `DeepStreaks` is a convolutional-neural-network, deep-learning system designed to efficiently identify streaking fast-moving near-Earth objects, detected in the ZTF data. # # For details, please see [Duev et al., MNRAS.486.4158D, 2019](https://academic.oup.com/mnras/article-abstract/486/3/4158/5472913), [arXiv:1904.05920](https://arxiv.org/pdf/1904.05920.pdf), and [this Github repo](https://github.com/dmitryduev/DeepStreaks). # # We will load a light version of DeepStreaks and run it on the streaks to see if they would have been identified. `DeepStreaks` is implemented using `Google`'s `TensorFlow` library, so we will need it for this part. # In[27]: #!pip install tensorflow import tensorflow as tf from PIL import Image, ImageOps # In[28]: def load_model_helper(path, model_base_name): # return load_model(path) with open(os.path.join(path, f'{model_base_name}.architecture.json'), 'r') as json_file: loaded_model_json = json_file.read() m = tf.keras.models.model_from_json(loaded_model_json) m.load_weights(os.path.join(path, f'{model_base_name}.weights.h5')) return m # `DeepStreaks` consists of three sets of binary classifiers: the first one (real/bogus or $rb$) decides if a particular cutout contains a steak-like object (which includes the actual streaks and cosmic rays, for example), the second (short/long or $sl$) decides if a streak is long (i.e. caused by something that moves too fast like a LEO satellite) or short, and the third (keep/ditch or $kd$) makes the final judgement. If all three parts output a score >0.5, a streak is designated as a plausible FMO candidate. # # Download models from Github if necessary: # In[29]: model_names = { "rb_vgg6": "5b96af9c0354c9000b0aea36_VGG6_20181207_151757", "sl_vgg6": "5b99b2c6aec3c500103a14de_VGG6_20181207_182618", "kd_vgg6": "5be0ae7958830a0018821794_VGG6_20190210_011644" } base_url = 'https://raw.githubusercontent.com/dmitryduev/DeepStreaks/master/service/models/' for model in model_names.keys(): print('fetching {}'.format(model)) for part in ('.architecture.json', '.weights.h5'): r = requests.get(os.path.join(base_url, model_names[model] + part)) with open(os.path.join(path_data, model + part), 'wb') as f: f.write(r.content) # Load models: # In[30]: models = {m: load_model_helper(path_data, m) for m in model_names.keys()} # Now let us load the cutout image and resize it to the shape expected by the models. # In[31]: def load_cutout(path_image, resize=(144, 144)): img = np.array(ImageOps.grayscale(Image.open(path_image)).resize(resize, Image.BILINEAR)) / 255. img = np.expand_dims(img, 2) return img # In[32]: img = load_cutout(jpgs[0]) img = np.expand_dims(img, 0) img.shape # In[33]: for mn, m in models.items(): score = m.predict(img) print('{} score: {:.2f}'.format(mn, score[0][0])) # Great! This streak would have been successfully identified by `DeepStreaks`! # #### Run `DeepStreaks` on some random streaks # In[34]: path_jpgs = os.path.join(path_data, 'run_merged', '*', '*', '*jpg') cutouts = glob.glob(path_jpgs) # set a random seed for result repeatability np.random.seed(5) sample = np.random.choice(cutouts, size=10) # sample df_sample = pd.DataFrame.from_records([{'name': os.path.basename(ii)} for ii in sample]) # In[35]: for ni, ii in enumerate(sample): print('{}: {}'.format(ni, os.path.basename(ii))) display(Image_jupyter(filename=ii, width=157, height=157)) # In[36]: imgs = np.array([load_cutout(x) for x in sample]) imgs.shape # Let us now compute scores in batches and see if `DeepStreaks` would declare any of these streaks plausible candidates. # In[37]: for mn, m in models.items(): scores = m.predict(imgs) df_sample[mn] = scores display(df_sample) print('Plausible candidates:') w = (df_sample['rb_vgg6'] > 0.5) & (df_sample['sl_vgg6'] > 0.5) & (df_sample['kd_vgg6'] > 0.5) display(df_sample.loc[w]) # Hooray! Streaks #4 and #8 were correctly identified!