import pandas as pd
import geopandas as gpd
import os, shutil
# Fire perimeter datasets for the US can be found via GeoMAC at
# https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/current_year_fire_data/current_year_all_states/
# Fire perimeters for Canada can be found at
# https://catalogue.data.gov.bc.ca/dataset/fire-perimeters-current
# Define which state/province we are downloading L8 data for
state = 'WY'
# Define the filepaths where the shapefile data resides
CA = r'D:\data\FirePerimeters\2018_2019_Canada_perimeters.shp'
US = r'D:\data\FirePerimeters\perimeters_dd83.shp'
stateboundaries = (os.path.join(r'D:\data\boundaries',state + '.shp'))
wrsfile = r'D:\data\l8\wrs2_descending.shp'
# Define where the resultant l8 scenes and metadata files will go
l8out = r'D:\data\imagery'
sceneinfo = r'D:\data\l8'
if state == 'BC' or state == 'AB':
country = 'Canada'
fire = gpd.GeoDataFrame.from_file(CA)
# Used est perimeter data from http://cwfis.cfs.nrcan.gc.ca/downloads/hotspots/ (merged 2019/2018)
else:
country = 'US'
fire = gpd.GeoDataFrame.from_file(US)
# Read the state boundary shapefile and the wrs path/row shapefile
# State files need to be the same projection as the WRS file
bounds = gpd.read_file(stateboundaries)
wrs = gpd.GeoDataFrame.from_file(wrsfile)
# Select the Landsat path/rows that intersect with the state of interest
wrs_intersection = wrs[wrs.intersects(bounds.geometry[0])]
# Select the fires that intersect to later determine the needed imagery date
fires = gpd.sjoin(fire, wrs, how='inner', op='within')###
# sort dataframe by most recent date, change date format to match AWS's landsat metadata date format
if country == 'Canada':
fires['enddate'] = fires['LASTDATE']+ '.000000'
else:
fires['enddate'] = fires['DATECRNT']+' 00:00:00.000000'
# empty gdf for most recent fire perimeter date
recent_fire = gpd.GeoDataFrame()
# select just fires in the state, make lowercase strings for consistent matching of fire names
if country == 'US':
fires = fires.loc[(fires.STATE == state)]
fires.FIRENAME = fires.FIRENAME.str.lower()
fires = fires[['FIRENAME','PATH','ROW','enddate']]
else:
print 'Skipping firename'
fires = fires[['PATH','ROW','enddate']]
fires['PR'] = fires['PATH'].astype(str)+' '+fires['ROW'].astype(str)
# for each fire, pick the latest date
if country == 'US':
for firename in fires['FIRENAME']:
rec_fire = fires.loc[(fires.FIRENAME == firename)]
rec_fire['enddate'].sort_values()
rec_fire = rec_fire.tail(1)
recent_fire = recent_fire.append(rec_fire)
else:
rec_fire = fires
recent_fire = recent_fire.append(rec_fire)
# then find the latest fire date for the path/row
pr_date = gpd.GeoDataFrame()
for pr in recent_fire['PR'].unique():
prdate = recent_fire.loc[(recent_fire.PR == pr)]
prdate['enddate'].sort_values()
prdate = prdate.tail(1)
pr_date = pr_date.append(prdate)
# OPTIONAL: view folium map of the path/rows selected to visualize coverage
import folium
import numpy as np
xy = np.asarray(bounds.centroid[0].xy).squeeze()
center = list(xy[::-1])
zoom = 6
m = folium.Map(location=center, zoom_start=zoom, control_scale=True)
m.add_child(folium.GeoJson(bounds.__geo_interface__, name='Path/Row Coverage',
style_function=lambda x: {'color': 'red', 'alpha': 0}))
for i, row in wrs_intersection.iterrows():
# Create a string for the name containing the path and row of this Polygon
name = 'path: %03d, row: %03d' % (row.PATH, row.ROW)
# Create the folium geometry of this Polygon
g = folium.GeoJson(row.geometry.__geo_interface__, name=name)
# Add a folium Popup object with the name string
g.add_child(folium.Popup(name))
# Add the object to the map
g.add_to(m)
folium.LayerControl().add_to(m)
m
c:\python27\lib\site-packages\folium\__init__.py:59: UserWarning: This version of folium is the last to support Python 2. Transition to Python 3 to be able to receive updates and fixes. Check out https://python3statement.org/ for more info. UserWarning
paths, rows = wrs_intersection['PATH'].values, wrs_intersection['ROW'].values
# Count how many paths and rows there are to download imagery for
count_images = 0
for (path, row) in enumerate(zip(paths, rows)):
count_images = count_images + 1
print str(count_images) + ' scenes'
24 scenes
# Read AWS metadata csv for l8 into a dataframe. This is the data we will use to select scenes matching our requirements.
s3_scenes = pd.read_csv('http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', compression='gzip')
# bulk download list
bulk_list = []
not_found = []
n = 0
# Find scenes for each path/row
for path, row in zip(paths, rows):
n = n + 1
print n
## Define the thresholds for date range and cloud cover:
datelowest = '2019-06-01 00:00:00.000000'
datehigh = '2019-09-30 00:00:00.000000'
cloudcover = 10
print 'Path: ' + str(path) + ' Row: ' + str(row)
#def fire_scene():
# Check if the Path/Row has a recent fire, use the fire's end date for the datelow L8 scene search
pr = str(path) +' '+ str(row)
prloc = pr_date.loc[(pr_date.PR == pr)]
print prloc.head()
if prloc.shape[0] != 1:
datelow = datelowest
print 'No fire - daterange unchanged '+ datelow[:-15] +'to '+ datehigh[:-16]
elif prloc['enddate'].values[0] > datelowest: #Ensure most recent years imagery used, if using more than 1 fire year
datelow = prloc['enddate'].values[0]
print 'Fire occured - new scene daterange ' + datelow[:-15] +'to '+ datehigh[:-16]
else:
datelow = datelowest
print 'Fires present from previous year, using current year imagery '+ datelow[:-15] +'to '+ datehigh[:-16]
if datelow == '':
datelow = datelowest
#fire_scene()
# Filter the Landsat Amazon S3 table for images matching path/row and cloudcover parameters.
tries = 10
# Ideally, imagery will be <10% scene cloud cover. The below code loops through the imagery in increments
# of 10% cover until a 100% threshhold is reached. Change the threshold requirements as needed.
# Currently there is no way to look at cloud cover within the fire perimeter/aoi before download - but this
# method (looking at total scene cover) should be adequate for most purposes.
while tries >= 10 and tries <= 90:
if tries > 10:
ntries = tries/10
cloudcover = tries
print 'Try #' + str(ntries) +': '+ str(cloudcover) + '% cloudcover threshold'
scenes = s3_scenes[(s3_scenes.path == path) & (s3_scenes.row == row) &
(s3_scenes.cloudCover <= cloudcover) &
(s3_scenes.acquisitionDate >= datelow) &
(s3_scenes.acquisitionDate <= datehigh) &
# We don't want any tier2/uncorrected data
(~s3_scenes.productId.str.contains('_T2')) &
(~s3_scenes.productId.str.contains('_RT'))]
print 'Found {} images\n'.format(len(scenes))
if len(scenes) == 0:
tries = tries + 10
print 'Retry with higher cloudcover threshold:'
else: tries = 100
# Select the scenes that meet the date and cloud cover criteria
if len(scenes)>0:
# select a scene in the middle of the date ranges if possible - for my purposes, full leaf imagery is ideal
sc = len(scenes)
sd = sc / 2
sl = sc - sd
if sd > 2 and sl < 2:
sl = -1
else:
sl = sl * -1
# pick the middle date scene
scene = scenes.sort_values('acquisitionDate').iloc[sl]
# Add the selected scene to the bulk download list.
bulk_list.append(scene)
else:
# if there are no scenes found even after altering the cloudcover threshold, create a list (find manually)
print 'No scenes were selected for this path/row'
nf = str(path) + ',' + str(row)
not_found.append(nf)
1 Path: 36 Row: 29 FIRENAME PATH ROW enddate PR 2103 valley 2 36 29 2019-08-17 00:00:00.000000 36 29 Fire occured - new scene daterange 2019-08-17 to 2019-09-30 Found 1 images 2 Path: 36 Row: 30 FIRENAME PATH ROW enddate PR 2328 sawmill 36 30 2019-08-13 00:00:00.000000 36 30 Fire occured - new scene daterange 2019-08-13 to 2019-09-30 Found 2 images 3 Path: 36 Row: 31 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 5 images 4 Path: 36 Row: 32 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 5 images 5 Path: 34 Row: 29 FIRENAME PATH ROW enddate PR 1306 prairie dog draw 34 29 2019-09-10 00:00:00.000000 34 29 Fire occured - new scene daterange 2019-09-10 to 2019-09-30 Found 1 images 6 Path: 34 Row: 30 FIRENAME PATH ROW enddate PR 1784 spring canyon 34 30 2019-08-26 00:00:00.000000 34 30 Fire occured - new scene daterange 2019-08-26 to 2019-09-30 Found 2 images 7 Path: 34 Row: 31 FIRENAME PATH ROW enddate PR 2304 ashenfelder 34 31 2019-09-06 00:00:00.000000 34 31 Fire occured - new scene daterange 2019-09-06 to 2019-09-30 Found 1 images 8 Path: 34 Row: 32 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 1 images 9 Path: 39 Row: 29 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 2 images 10 Path: 37 Row: 29 FIRENAME PATH ROW enddate PR 2871 brimstone 37 29 2019-09-17 00:00:00.000000 37 29 Fire occured - new scene daterange 2019-09-17 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 1 images 11 Path: 37 Row: 30 FIRENAME PATH ROW enddate PR 2459 bomber lake 37 30 2019-08-29 00:00:00.000000 37 30 Fire occured - new scene daterange 2019-08-29 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 2 images 12 Path: 37 Row: 31 FIRENAME PATH ROW enddate PR 2280 currant 37 31 2019-09-03 00:00:00.000000 37 31 Fire occured - new scene daterange 2019-09-03 to 2019-09-30 Found 2 images 13 Path: 37 Row: 32 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 5 images 14 Path: 35 Row: 29 FIRENAME PATH ROW enddate PR 1299 fortification creek 35 29 2019-08-22 00:00:00.000000 35 29 Fire occured - new scene daterange 2019-08-22 to 2019-09-30 Found 1 images 15 Path: 35 Row: 30 FIRENAME PATH ROW enddate PR 1849 whipsaw 35 30 2019-08-19 00:00:00.000000 35 30 Fire occured - new scene daterange 2019-08-19 to 2019-09-30 Found 2 images 16 Path: 35 Row: 31 FIRENAME PATH ROW enddate PR 2850 pedro mountain 35 31 2019-08-28 00:00:00.000000 35 31 Fire occured - new scene daterange 2019-08-28 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 2 images 17 Path: 35 Row: 32 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 2 images 18 Path: 33 Row: 29 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 1 images 19 Path: 33 Row: 30 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 2 images 20 Path: 33 Row: 31 FIRENAME PATH ROW enddate PR 1428 finkbine 33 31 2019-07-24 00:00:00.000000 33 31 Fire occured - new scene daterange 2019-07-24 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 1 images 21 Path: 33 Row: 32 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 3 images 22 Path: 38 Row: 29 FIRENAME PATH ROW enddate PR 1819 box creek 38 29 2019-09-11 00:00:00.000000 38 29 Fire occured - new scene daterange 2019-09-11 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 1 images 23 Path: 38 Row: 30 FIRENAME PATH ROW enddate PR 1587 saddle butte 38 30 2019-09-03 00:00:00.000000 38 30 Fire occured - new scene daterange 2019-09-03 to 2019-09-30 Found 0 images Retry with higher cloudcover threshold: Try #2: 20% cloudcover threshold Found 1 images 24 Path: 38 Row: 31 Empty DataFrame Columns: [FIRENAME, PATH, ROW, enddate, PR] Index: [] No fire - daterange unchanged 2019-06-01 to 2019-09-30 Found 4 images
# Concatenate the scene info into two lists: scenes that have no match, and scenes we want to download.
bulk_frame = pd.concat(bulk_list, 1).T
nf_frame = pd.DataFrame(not_found)
nf_frame.to_csv((os.path.join(sceneinfo, state + 'scenes_missing.txt')),sep='\t', index=False, header=False)
bulk_frame.head(10)
productId | entityId | acquisitionDate | cloudCover | processingLevel | path | row | min_lat | min_lon | max_lat | max_lon | download_url | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1698771 | LC08_L1TP_036029_20190831_20190916_01_T1 | LC80360292019243LGN00 | 2019-08-31 17:54:50.649040 | 1.11 | L1TP | 36 | 29 | 43.4775 | -108.983 | 45.6465 | -105.894 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1698381 | LC08_L1TP_036030_20190831_20190916_01_T1 | LC80360302019243LGN00 | 2019-08-31 17:55:14.535844 | 0.52 | L1TP | 36 | 30 | 42.0485 | -109.453 | 44.2195 | -106.418 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1641340 | LC08_L1TP_036031_20190714_20190720_01_T1 | LC80360312019195LGN00 | 2019-07-14 17:55:23.158334 | 1.07 | L1TP | 36 | 31 | 40.6706 | -109.78 | 42.8245 | -107.077 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1641341 | LC08_L1TP_036032_20190714_20190720_01_T1 | LC80360322019195LGN00 | 2019-07-14 17:55:47.045136 | 2.04 | L1TP | 36 | 32 | 39.2485 | -110.209 | 41.3938 | -107.557 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1715327 | LC08_L1TP_034029_20190918_20190926_01_T1 | LC80340292019261LGN00 | 2019-09-18 17:42:34.829412 | 0 | L1TP | 34 | 29 | 43.5227 | -105.811 | 45.6673 | -102.901 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1715084 | LC08_L1TP_034030_20190918_20190926_01_T1 | LC80340302019261LGN00 | 2019-09-18 17:42:58.716216 | 0.01 | L1TP | 34 | 30 | 42.1046 | -106.284 | 44.2397 | -103.42 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1715409 | LC08_L1TP_034031_20190918_20190926_01_T1 | LC80340312019261LGN00 | 2019-09-18 17:43:22.611492 | 5.09 | L1TP | 34 | 31 | 40.6765 | -106.738 | 42.8102 | -103.92 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1697556 | LC08_L1TP_034032_20190902_20190916_01_T1 | LC80340322019245LGN00 | 2019-09-02 17:43:40.977337 | 7.84 | L1TP | 34 | 32 | 39.2426 | -107.169 | 41.379 | -104.394 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1701659 | LC08_L1TP_039029_20190905_20190917_01_T1 | LC80390292019248LGN00 | 2019-09-05 18:13:23.833895 | 14.55 | L1TP | 39 | 29 | 43.5041 | -113.579 | 45.6572 | -110.568 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
1712605 | LC08_L1TP_037029_20190923_20190926_01_T1 | LC80370292019266LGN00 | 2019-09-23 18:01:08.866094 | 14.89 | L1TP | 37 | 29 | 43.5178 | -110.43 | 45.674 | -107.58 | https://s3-us-west-2.amazonaws.com/landsat-pds... |
# Option 1 - get the scene list to upload to earthexplorer.usgs.gov/filelist
bulklist = bulk_frame[['entityId']]
bulklist.to_csv((os.path.join(sceneinfo, state + 'pathrowlist.txt')),sep='\t', index=False, header=False)
bulk_frame.to_csv((os.path.join(sceneinfo, state + 'frame.txt')),sep='\t', index=False)
# Option 2 - download the data directly
import requests
from bs4 import BeautifulSoup
LANDSAT_PATH = os.path.join(l8out, state, 'l8imagery')
# For each row
for i, row in bulk_frame.iterrows():
entity_dir = os.path.join(LANDSAT_PATH, row.productId)
# added to skip the file if it already has been downloaded - check and re-download any files that may be corrupted
# if download is interrupted
if os.path.isdir(entity_dir):
print'Skipping ' + entity_dir + ' as it already exists'
else:
# Print the product ID
print '\n', 'EntityId:', row.productId, '\n'
# Request the html text of the download_url from the amazon server.
response = requests.get(row.download_url)
# If the response status code is fine (200)
if response.status_code == 200:
# Import the html to beautiful soup
html = BeautifulSoup(response.content, 'html.parser')
# Create the dir where we will put this image files.
if not os.path.exists(entity_dir):
os.makedirs(entity_dir)
# Second loop: for each band of this image that we find using the html <li> tag
for li in html.find_all('li'):
# Get the href tag
file = li.find_next('a').get('href')
filestring = str(file)
filen = os.path.join(LANDSAT_PATH,entity_dir,filestring)
# only download the .tif and metadata files, other formats (.IMD) aren't necessary for what I need
if filestring[-4:] == '.TIF' or filestring[-8:] == '_MTL.txt' or filestring[-8:] == '_ANG.txt':
if not os.path.isfile(os.path.join(filen)): # skip anything already downloaded
print ' Downloading: {}'.format(file)
# Download the files
response = requests.get(row.download_url.replace('index.html', file), stream=True)
with open(os.path.join(entity_dir, file), 'wb') as output:
shutil.copyfileobj(response.raw, output)
del response
else: print filestring + ' exists'
Skipping D:\data\imagery\WY\l8imagery\LC08_L1TP_036029_20190831_20190916_01_T1 as it already exists Skipping D:\data\imagery\WY\l8imagery\LC08_L1TP_036030_20190831_20190916_01_T1 as it already exists Skipping D:\data\imagery\WY\l8imagery\LC08_L1TP_036031_20190714_20190720_01_T1 as it already exists EntityId: LC08_L1TP_036032_20190714_20190720_01_T1 Downloading: LC08_L1TP_036032_20190714_20190720_01_T1_B6.TIF Downloading: LC08_L1TP_036032_20190714_20190720_01_T1_B8.TIF