Owner: Yao-Yuan Mao, Scott Daniel (with help from Anže Slosar, Bhairav Valera, HyeYun Park)
Last Verified to Run: 2018-07-23
Notes:
After completing and studying this Notebook, you should be able to:
filters
and native_filters
appropriatelyadd_quantity_modifier
and get_quantity_modifier
FoFCatalogMatching
to do Friends-of-friends catalog matchingimport numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from astropy.coordinates import SkyCoord
import FoFCatalogMatching
import GCRCatalogs
GCRCatalogs.available_catalogs
# load coadd catalog (for a single tract)
coadd_cat = GCRCatalogs.load_catalog('dc2_object_run1.2i_all_columns')
# Let's first visually inspect the footprint of one tract of the coadd catalog.
# When `return_iterator` is turned on, the method `get_quantities` will return an
# iterator, and each element in the iterator will be the quantities we requested in
# different chunks of the dataset.
# For coadd catalogs, the different chunks happen to be different patches,
# resulting in a different color for each patch in the scatter plot below.
for coadd_data in coadd_cat.get_quantities(['ra', 'dec'], return_iterator=True):
plt.scatter(coadd_data['ra'], coadd_data['dec'], s=1, rasterized=True);
plt.xlabel('RA');
plt.ylabel('Dec');
# Let's choose a small RA and Dec range to do the matching so that it won't take too long!
ra_min, ra_max = 55.5, 56.0
dec_min, dec_max = -29.0, -28.5
coord_filters = [
'ra >= {}'.format(ra_min),
'ra < {}'.format(ra_max),
'dec >= {}'.format(dec_min),
'dec < {}'.format(dec_max),
]
from GCR import GCRQuery
star_sprinkled_filter = [
~GCRQuery('star'),
~GCRQuery('sprinkled'),
~GCRQuery('agn')
]
# Lensing cuts based on Mandelbaum 2017 (arxiv 1705.06745)
lensing_cuts = [
~GCRQuery((np.isnan, 'i_modelfit_CModel_instFlux')), # (from this and below) remove nan entries
~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_resolution')),
~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_e1')),
~GCRQuery((np.isnan, 'ext_shapeHSM_HsmShapeRegauss_e2')),
GCRQuery('detect_isPrimary'), # (from this and below) basic flag cuts
~GCRQuery('deblend_skipped'),
~GCRQuery('base_PixelFlags_flag_edge'),
~GCRQuery('base_PixelFlags_flag_interpolatedCenter'),
~GCRQuery('base_PixelFlags_flag_saturatedCenter'),
~GCRQuery('base_PixelFlags_flag_crCenter'),
~GCRQuery('base_PixelFlags_flag_bad'),
~GCRQuery('base_PixelFlags_flag_suspectCenter'),
~GCRQuery('base_PixelFlags_flag_clipped'),
~GCRQuery('ext_shapeHSM_HsmShapeRegauss_flag'),
GCRQuery('HSM_res >= 0.3'),
GCRQuery('HSM_ell < 2.0'),
GCRQuery('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4'),
GCRQuery('mag_i_cModel < 24.5'), # FIXME: Doesnt have exinction correction
GCRQuery('base_Blendedness_abs_instFlux < 10**(-0.375)'),
]
# GCRQuery('i_SN_cmodel >= 10'), # (from this and below) cut on object properties
# Let's also define a magnitude cut
mag_filters = [
(np.isfinite, 'mag_i'),
'mag_i < 24.5',
]
# let's add total ellipticity for later use (not needed for now)
coadd_cat.add_derived_quantity('shape_hsm_regauss_etot', np.hypot, 'ext_shapeHSM_HsmShapeRegauss_e1', 'ext_shapeHSM_HsmShapeRegauss_e2')
print(sorted(coadd_cat.list_all_quantities()))
print(sorted(coadd_cat.list_all_native_quantities()))
coadd_cat.add_quantity_modifier('i_SN_cmodel',
(np.divide, 'i_modelfit_CModel_instFlux', 'i_modelfit_CModel_instFluxErr'),
overwrite=True)
coadd_cat.add_quantity_modifier('HSM_res',
'ext_shapeHSM_HsmShapeRegauss_resolution',
overwrite=True)
coadd_cat.add_quantity_modifier('HSM_ell',
(np.hypot, 'ext_shapeHSM_HsmShapeRegauss_e1', 'ext_shapeHSM_HsmShapeRegauss_e2'),
overwrite=True)
coadd_cat.add_quantity_modifier('psf_size',
(lambda xx, yy, xy: 0.168*2.355*(xx*yy - xy*xy)**0.25, 'i_base_SdssShape_psf_xx', 'i_base_SdssShape_psf_yy', 'i_base_SdssShape_psf_xy'),
overwrite=True)
# Load ra and dec from coadd, using both of the filters we just defined. (why not also grab e1 and e2 for later use?)
coadd_data = coadd_cat.get_quantities(['ra', 'dec', 'objectId','mag_i_cModel','mag_u_cModel','mag_g_cModel','mag_r_cModel',
'mag_y_cModel','mag_z_cModel',
'magerr_i','magerr_u','magerr_g','magerr_r','magerr_y',
'magerr_z','shape_hsm_regauss_etot'])#,filters=(lensing_cuts))#, filters=(coord_filters + mag_filters))
# Let's now turn to the truth catalog, turn of md5 sum check to save time
truth_cat = GCRCatalogs.load_catalog('dc2_truth_run1.2_static', {'md5': None})
# for a reason that we will soon see, let's inspect the quantities in truth catalog
print(sorted(truth_cat.list_all_quantities()))
print('---')
print(sorted(truth_cat.list_all_native_quantities()))
# so we see there is not mag_i, but only mag_true_i (i.e., magnitude before lensing), and it maps to `i`
truth_cat.get_quantity_modifier('mag_true_i')
'i'
# to make our `mag_filters` work, let's define mag_i for the truth catalog
truth_cat.add_quantity_modifier('mag_i', 'i')
# get ra and dec from truth catalog
# note that we add i < 24.5 to the native filter to speed up load time
#truth_native_filters = (coord_filters + ['i < 24.5'])
truth_data = truth_cat.get_quantities(['ra', 'dec', 'object_id', 'star', 'sprinkled','agn','redshift','mag_true_i',
'g','mag_true_g','mag_true_r', 'mag_true_u', 'mag_true_y', 'mag_true_z'],filters=star_sprinkled_filter)#, filters=mag_filters, native_filters=truth_native_filters)
# We will use the object_id, star, and sprinkled columns when cross-referencing truth information with the extragalactic catalog.
print (len(coadd_data['ra']))
print (len(truth_data['ra']))
2840487 8881781
cond_notstar = ~truth_data['star'][:]
cond_notagn = ~truth_data['agn'][:]
cond_notsprinkled = ~truth_data['sprinkled'][:]
cond = cond_notstar*cond_notagn*cond_notsprinkled
print(len(cond),np.sum(cond))
8881781 8881781
# now we can really do the matching!
# FoFCatalogMatching.match takes a dictionary of catalogs to match, a friends-of-friends linking length.
# Because our "catalog" is not an astropy table or pandas dataframe,
# `len(truth_coord)` won't give the actual length of the table
# so we need to specify `catalog_len_getter` so that the code knows how to get the length of the catalog.
results = FoFCatalogMatching.match(
catalog_dict={'truth': truth_data, 'coadd': coadd_data},
linking_lengths=1.0,
catalog_len_getter=lambda x: len(x['ra']),
)
# return of FoFCatalogMatching.match is an astropy table
results
row_index | catalog_key | group_id |
---|---|---|
int64 | str5 | int64 |
0 | truth | 0 |
9166 | coadd | 0 |
1 | truth | 1 |
9843 | coadd | 1 |
2 | truth | 2 |
10045 | coadd | 2 |
3 | truth | 3 |
9932 | coadd | 3 |
4 | truth | 4 |
640 | coadd | 4 |
... | ... | ... |
21938 | coadd | 24481 |
21975 | coadd | 24482 |
21987 | coadd | 24483 |
22010 | coadd | 24484 |
22041 | coadd | 24485 |
22044 | coadd | 24486 |
22078 | coadd | 24487 |
22081 | coadd | 24488 |
22083 | coadd | 24489 |
22093 | coadd | 24490 |
# now we want to count the number of truth and coadd objects *for each group*
# but instead of looping over groups, we can do this in a smart (and very fast) way
# first we need to know which rows are from the truth catalog and which are from the coadd
truth_mask = results['catalog_key'] == 'truth'
coadd_mask = ~truth_mask
# then np.bincount will give up the number of id occurrences (like historgram but with integer input)
n_groups = results['group_id'].max() + 1
n_truth = np.bincount(results['group_id'][truth_mask], minlength=n_groups)
n_coadd = np.bincount(results['group_id'][coadd_mask], minlength=n_groups)
# now n_truth and n_coadd are the number of truth/coadd objects in each group
# we want to make a 2d histrogram of (n_truth, n_coadd).
n_max = max(n_truth.max(), n_coadd.max()) + 1
hist_2d = np.bincount(n_coadd * n_max + n_truth, minlength=n_max*n_max).reshape(n_max, n_max)
plt.imshow(np.log10(hist_2d+1), extent=(-0.5, n_max-0.5, -0.5, n_max-0.5), origin='lower');
plt.xlabel('Number of truth objects');
plt.ylabel('Number of coadd objects');
plt.colorbar(label=r'$\log(N_{\rm groups} \, + \, 1)$');
# Let's further inspect the objects in the groups that have 1-to-1 truth/coadd match.
# first, let's find our the IDs of the groups that have 1-to-1 truth/coadd match:
one_to_one_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_truth == 1) & (n_coadd == 1)))
# and then we can find the row indices in the *original* truth/coadd catalogs for those 1-to-1 groups
truth_idx = results['row_index'][one_to_one_group_mask & truth_mask]
coadd_idx = results['row_index'][one_to_one_group_mask & coadd_mask]
print (len(truth_idx))
1077538
truth_sc = SkyCoord(truth_data['ra'][truth_idx], truth_data['dec'][truth_idx], unit="deg")
coadd_sc = SkyCoord(coadd_data['ra'][coadd_idx], coadd_data['dec'][coadd_idx], unit="deg")
delta_ra = (coadd_sc.ra.arcsec - truth_sc.ra.arcsec) * np.cos(np.deg2rad(0.5*(truth_sc.dec.deg + coadd_sc.dec.deg)))
delta_dec = coadd_sc.dec.arcsec - truth_sc.dec.arcsec
delta_arcsec = coadd_sc.separation(truth_sc).arcsec
truth_z=truth_data['redshift'][truth_idx]
truth_mag_i=truth_data['mag_true_i'][truth_idx]
truth_mag_u=truth_data['mag_true_u'][truth_idx]
truth_mag_g=truth_data['mag_true_g'][truth_idx]
#mag_g=truth_data['g']#[truth_idx]
truth_mag_r=truth_data['mag_true_r'][truth_idx]
truth_mag_y=truth_data['mag_true_y'][truth_idx]
truth_mag_z=truth_data['mag_true_z'][truth_idx]
#print (truth_z)
#print (len(truth_z), len(truth_mag_i))
#print (len(truth_idx))
print (truth_data['redshift'][truth_idx])
print (np.min(truth_data['redshift'][truth_idx]), np.max(truth_data['redshift'][truth_idx]))
trainingfile=open("DC2.train", "w")
trainingfile.write("#redshift u g r i z u-g g-r r-i i-z\n")
testfile=open("DC2.test","w")
testfile.write("#redshift u g r i z u-g g-r r-i i-z\n")
index=np.random.choice(len(truth_z),len(truth_z),replace=False)
for i in index[0:len(index)//2]:
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n'%(truth_z[i],truth_mag_u[i],truth_mag_g[i],
truth_mag_r[i],truth_mag_i[i],truth_mag_z[i],
truth_mag_u[i]-truth_mag_g[i], truth_mag_g[i]-truth_mag_r[i],
truth_mag_r[i]-truth_mag_i[i], truth_mag_i[i]-truth_mag_z[i])
trainingfile.write(string)
trainingfile.close()
for i in index[len(index)//2+1:len(index)]:
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n'%(truth_z[i],truth_mag_u[i],truth_mag_g[i],
truth_mag_r[i],truth_mag_i[i],truth_mag_z[i],
truth_mag_u[i]-truth_mag_g[i], truth_mag_g[i]-truth_mag_r[i],
truth_mag_r[i]-truth_mag_i[i], truth_mag_i[i]-truth_mag_z[i])
testfile.write(string)
testfile.close()
truth_z=truth_data['redshift'][truth_idx]
coadd_mag_i=coadd_data['mag_i_cModel'][coadd_idx]
coadd_mag_u=coadd_data['mag_u_cModel'][coadd_idx]
coadd_mag_r=coadd_data['mag_r_cModel'][coadd_idx]
coadd_mag_g=coadd_data['mag_g_cModel'][coadd_idx]
coadd_mag_y=coadd_data['mag_y_cModel'][coadd_idx]
coadd_mag_z=coadd_data['mag_z_cModel'][coadd_idx]
mag_error_i=coadd_data['magerr_i'][coadd_idx]
mag_error_u=coadd_data['magerr_u'][coadd_idx]
mag_error_r=coadd_data['magerr_r'][coadd_idx]
mag_error_g=coadd_data['magerr_g'][coadd_idx]
mag_error_y=coadd_data['magerr_y'][coadd_idx]
mag_error_z=coadd_data['magerr_z'][coadd_idx]
trainingfile=open("DC2_out_ss_lensingcuts_SN.train", "w")
trainingfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
testfile=open("DC2_out_ss_lensingcuts_SN.test","w")
testfile.write("#redshift u g r i y z u-g g-r r-i i-z eu eg er ei ey ez\n")
index=np.random.choice(len(truth_z),len(truth_z),replace=False)
for i in index[0:len(index)//2]:
if not np.any(np.isnan([truth_z[i],coadd_mag_i[i], coadd_mag_u[i], coadd_mag_r[i], coadd_mag_g[i], coadd_mag_y[i],coadd_mag_z[i], mag_error_i[i], mag_error_u[i], mag_error_r[i], mag_error_g[i], mag_error_y[i], mag_error_z[i]])):
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z[i],coadd_mag_u[i],coadd_mag_g[i],coadd_mag_r[i],
coadd_mag_i[i],coadd_mag_y[i],coadd_mag_z[i],coadd_mag_u[i]-coadd_mag_g[i],
coadd_mag_g[i]-coadd_mag_r[i],coadd_mag_r[i]-coadd_mag_i[i],
coadd_mag_i[i]-coadd_mag_z[i],mag_error_u[i], mag_error_g[i],
mag_error_r[i], mag_error_i[i],mag_error_y[i],mag_error_z[i])
trainingfile.write(string)
trainingfile.close()
for i in index[len(index)//2+1:len(index)]:
if not np.any(np.isnan([truth_z[i],coadd_mag_i[i], coadd_mag_u[i], coadd_mag_r[i], coadd_mag_g[i], coadd_mag_y[i],coadd_mag_z[i], mag_error_i[i], mag_error_u[i], mag_error_r[i], mag_error_g[i], mag_error_y[i], mag_error_z[i]])):
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f %.9f\n'%(truth_z[i], coadd_mag_u[i],coadd_mag_g[i],
coadd_mag_r[i],coadd_mag_i[i],coadd_mag_y[i], coadd_mag_z[i],coadd_mag_u[i]-coadd_mag_g[i], coadd_mag_g[i]-coadd_mag_r[i],coadd_mag_r[i]-coadd_mag_i[i], coadd_mag_i[i]-coadd_mag_z[i],mag_error_u[i], mag_error_g[i],mag_error_r[i], mag_error_i[i],mag_error_y[i],mag_error_z[i])
testfile.write(string)
testfile.close()
data = np.vstack((truth_z,truth_mag_u,truth_mag_g,truth_mag_r,truth_mag_i,truth_mag_z,truth_mag_y,coadd_mag_u,coadd_mag_g,coadd_mag_r,coadd_mag_i,coadd_mag_z,coadd_mag_y,mag_error_u,mag_error_g,mag_error_r,mag_error_i,mag_error_z,mag_error_y))
data = np.transpose(data)
np.save('chi-ting',data)
print (np.isnan([np.nan, 1,2,3]))
print (np.any(np.isnan([np.nan,1,2,3])))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-b3dac184320a> in <module> ----> 1 print (np.isnan([np.nan, 1,2,3])) 2 print (np.any(np.isnan([np.nan,1,2,3]))) NameError: name 'np' is not defined
plt.figure(figsize=(7.3, 6)) # Pick a figuresize that will result in a square equal-axis plus colorbar
plt.hist2d(delta_ra, delta_dec, bins=40, range=((-0.5, +0.5), (-0.5, +0.5)));
plt.xlabel(r'$\Delta$ RA [arcsec]');
plt.ylabel(r'$\Delta$ Dec [arcsec]');
plt.colorbar();
plt.xlim(-0.5, +0.5)
plt.ylim(-0.5, +0.5)
plt.axis('equal');
#Plotting Delta angle for the outputs
plt.hist(delta_arcsec, bins=80);
plt.xlim(0, 0.4);
plt.xlabel(r'$\Delta$ angle [arcsec]');
Most of the astrophysical truth information about the objects in DC2 is actually stored in the extragalactic catalog. The truth catalog only contains the positions and magnitudes that we believe should have been produced by the image simulators. To access further truth information (redshift, shape parameters, mass), we must cross-reference the truth catalog with the extragalactic catalog. This can be done via the column object_id
in the truth catalog, which maps directly to galaxy_id
in the extragalactic catalog.
Let's use pandas for this part to make our lives a bit easier :)
import pandas as pd
# convert truth_data and coadd_data to pandas dataframe and select the matched one
truth_matched = pd.DataFrame(truth_data).iloc[truth_idx].reset_index(drop=True)
coadd_matched = pd.DataFrame(coadd_data).iloc[coadd_idx].reset_index(drop=True)
matched = pd.merge(truth_matched, coadd_matched, left_index=True, right_index=True, suffixes=('_truth', '_coadd'))
# Select only those truth objects that are galaxies which were not sprinkled
# (stars and sprinkled objects do not occur in the extragalactic catalog)
matched_gals = matched.query('~star').query('~sprinkled')
# First, load the extragalactic catalog that was used for this simulation (using _test to skip md5 check)
extragalactic_cat = GCRCatalogs.load_catalog('proto-dc2_v2.1.2_test')
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCRCatalogs/alphaq.py:105: UserWarning: No md5 sum specified in the config file warnings.warn('No md5 sum specified in the config file')
# load redshift and shear parameters from the extragalactic catalog, only for galaxise that are already in `matched_gals`
extragalactic_data = extragalactic_cat.get_quantities(
['galaxy_id', 'mag_i', 'ellipticity_true', 'shear_1', 'shear_2'],
filters=[(lambda x: np.in1d(x, matched_gals['object_id'].values, True), 'galaxy_id')]
)
# merge extragalactic_data to matched_gals
matched_gals = pd.merge(matched_gals, pd.DataFrame(extragalactic_data), 'left', left_on='object_id', right_on='galaxy_id', suffixes=('', '_extra'))
np.testing.assert_array_equal(matched_gals['object_id'], matched_gals['galaxy_id'])
trainingfile=open("DC2_output.train", "w")
trainingfile.write("#zs u g r i z u-g g-r r-i i-z eu eg er ei ez\n")
testfile=open("DC2_output.test","w")
testfile.write("#zs u g r i z u-g g-r r-i i-z eu eg er ei ez\n")
index=np.random.choice(len(matched_gals['redshift']),len(matched_gals['redshift']),replace=False)
for i in index[0:len(index)//2]:
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f\n'%(matched_gals['redshift'][i],
matched_gals['mag_u'][i],
matched_gals['mag_g'][i],
matched_gals['mag_r'][i],
matched_gals['mag_i'][i],
matched_gals['mag_z'][i],
matched_gals['mag_u'][i]-matched_gals['mag_g'][i],
matched_gals['mag_g'][i]-matched_gals['mag_r'][i],
matched_gals['mag_r'][i]-matched_gals['mag_i'][i],
matched_gals['mag_i'][i]-matched_gals['mag_z'][i],
matched_gals['magerr_u'][i],
matched_gals['magerr_g'][i],
matched_gals['magerr_r'][i],
matched_gals['magerr_i'][i],
matched_gals['magerr_z'][i])
trainingfile.write(string)
trainingfile.close()
for i in index[len(index)//2+1:len(index)]:
string='%.6f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.9f %.9f %.9f %.9f %.9f\n'%(matched_gals['redshift'][i],
matched_gals['mag_u'][i],
matched_gals['mag_g'][i],
matched_gals['mag_r'][i],
matched_gals['mag_i'][i],
matched_gals['mag_z'][i],
matched_gals['mag_u'][i]-matched_gals['mag_g'][i],
matched_gals['mag_g'][i]-matched_gals['mag_r'][i],
matched_gals['mag_r'][i]-matched_gals['mag_i'][i],
matched_gals['mag_i'][i]-matched_gals['mag_z'][i],
matched_gals['magerr_u'][i],
matched_gals['magerr_g'][i],
matched_gals['magerr_r'][i],
matched_gals['magerr_i'][i],
matched_gals['magerr_z'][i])
testfile.write(string)
testfile.close()
# check out the table
matched_gals['magerr_i']
File "<ipython-input-114-b2efd47be35d>", line 2 print ([x in matched_gals['magerr_i'] if np.isnan(x)]) ^ SyntaxError: invalid syntax
# compare the magnitude
plt.figure(figsize=(5,5));
plt.scatter(matched_gals['mag_i'], matched_gals['mag_i_extra'], s=1);
lims = [14, 26]
plt.plot(lims, lims, c='k', lw=0.5);
plt.xlabel('$i$ coadd');
plt.ylabel('$i$ extragalactic');
plt.xlim(lims);
plt.ylim(lims);
# compare the ellipticity
plt.figure(figsize=(5,5));
plt.scatter(matched_gals['shape_hsm_regauss_etot'], matched_gals['ellipticity_true'], s=1);
lims = [0, 1]
plt.plot(lims, lims, c='k', lw=0.5);
plt.xlabel('ellipticity coadd');
plt.ylabel('ellipticity extragalactic');
plt.xlim(lims);
plt.ylim(lims);