#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import geopandas as gpd
import os, shutil


# In[2]:


# Fire perimeter datasets for the US can be found via GeoMAC at 
# https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/current_year_fire_data/current_year_all_states/
# Fire perimeters for Canada can be found at
# https://catalogue.data.gov.bc.ca/dataset/fire-perimeters-current


# In[3]:


# Define which state/province we are downloading L8 data for
state = 'WY'

# Define the filepaths where the shapefile data resides
CA = r'D:\data\FirePerimeters\2018_2019_Canada_perimeters.shp'
US = r'D:\data\FirePerimeters\perimeters_dd83.shp'
stateboundaries = (os.path.join(r'D:\data\boundaries',state + '.shp'))
wrsfile = r'D:\data\l8\wrs2_descending.shp'

# Define where the resultant l8 scenes and metadata files will go
l8out = r'D:\data\imagery'
sceneinfo = r'D:\data\l8'


# In[4]:


if state == 'BC' or state == 'AB':
    country = 'Canada'
    fire = gpd.GeoDataFrame.from_file(CA)
    # Used est perimeter data from http://cwfis.cfs.nrcan.gc.ca/downloads/hotspots/ (merged 2019/2018)
else: 
    country = 'US'
    fire = gpd.GeoDataFrame.from_file(US)


# In[5]:


# Read the state boundary shapefile and the wrs path/row shapefile
# State files need to be the same projection as the WRS file
bounds = gpd.read_file(stateboundaries)
wrs = gpd.GeoDataFrame.from_file(wrsfile)

# Select the Landsat path/rows that intersect with the state of interest
wrs_intersection = wrs[wrs.intersects(bounds.geometry[0])]


# In[ ]:


# Select the fires that intersect to later determine the needed imagery date
fires =  gpd.sjoin(fire, wrs, how='inner', op='within')###

# sort dataframe by most recent date, change date format to match AWS's landsat metadata date format
if country == 'Canada':
    fires['enddate'] = fires['LASTDATE']+ '.000000'
else:
    fires['enddate'] = fires['DATECRNT']+' 00:00:00.000000'

# empty gdf for most recent fire perimeter date
recent_fire = gpd.GeoDataFrame()

# select just fires in the state, make lowercase strings for consistent matching of fire names
if country == 'US':
    fires = fires.loc[(fires.STATE == state)]
    fires.FIRENAME = fires.FIRENAME.str.lower()
    fires = fires[['FIRENAME','PATH','ROW','enddate']]
else:
    print 'Skipping firename'
    fires = fires[['PATH','ROW','enddate']]

fires['PR'] = fires['PATH'].astype(str)+' '+fires['ROW'].astype(str)

# for each fire, pick the latest date
if country == 'US':
    for firename in fires['FIRENAME']:
        rec_fire = fires.loc[(fires.FIRENAME == firename)]
        rec_fire['enddate'].sort_values()
        rec_fire = rec_fire.tail(1)
        recent_fire = recent_fire.append(rec_fire)
else:
    rec_fire = fires
    recent_fire = recent_fire.append(rec_fire)
    
# then find the latest fire date for the path/row
pr_date = gpd.GeoDataFrame()

for pr in recent_fire['PR'].unique():
    prdate = recent_fire.loc[(recent_fire.PR == pr)]
    prdate['enddate'].sort_values()
    prdate = prdate.tail(1)
    pr_date = pr_date.append(prdate)


# In[7]:


# OPTIONAL: view folium map of the path/rows selected to visualize coverage
import folium
import numpy as np
xy = np.asarray(bounds.centroid[0].xy).squeeze()
center = list(xy[::-1])
zoom = 6
m = folium.Map(location=center, zoom_start=zoom, control_scale=True)
m.add_child(folium.GeoJson(bounds.__geo_interface__, name='Path/Row Coverage', 
                           style_function=lambda x: {'color': 'red', 'alpha': 0}))
for i, row in wrs_intersection.iterrows():
    # Create a string for the name containing the path and row of this Polygon
    name = 'path: %03d, row: %03d' % (row.PATH, row.ROW)
    # Create the folium geometry of this Polygon 
    g = folium.GeoJson(row.geometry.__geo_interface__, name=name)
    # Add a folium Popup object with the name string
    g.add_child(folium.Popup(name))
    # Add the object to the map
    g.add_to(m)

folium.LayerControl().add_to(m)
m


# In[8]:


paths, rows = wrs_intersection['PATH'].values, wrs_intersection['ROW'].values


# In[9]:


# Count how many paths and rows there are to download imagery for
count_images = 0
for (path, row) in enumerate(zip(paths, rows)):
    count_images = count_images + 1
print str(count_images) + ' scenes'


# In[10]:


# Read AWS metadata csv for l8 into a dataframe. This is the data we will use to select scenes matching our requirements.
s3_scenes = pd.read_csv('http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', compression='gzip')


# In[11]:


# bulk download list
bulk_list = []
not_found = []
n = 0


# Find scenes for each path/row
for path, row in zip(paths, rows):
    n = n + 1
    print n

    ## Define the thresholds for date range and cloud cover:
    datelowest = '2019-06-01 00:00:00.000000'
    datehigh = '2019-09-30 00:00:00.000000'
    cloudcover = 10
    print 'Path: ' +  str(path) +  ' Row: ' + str(row)
    
    #def fire_scene():
    # Check if the Path/Row has a recent fire, use the fire's end date for the datelow L8 scene search
    pr = str(path) +' '+ str(row)
    prloc = pr_date.loc[(pr_date.PR == pr)]
    print prloc.head()

    if prloc.shape[0] != 1:
        datelow = datelowest
        print 'No fire - daterange unchanged '+ datelow[:-15] +'to '+ datehigh[:-16]
    elif prloc['enddate'].values[0] > datelowest: #Ensure most recent years imagery used, if using more than 1 fire year
            datelow = prloc['enddate'].values[0]
            print 'Fire occured - new scene daterange ' + datelow[:-15] +'to '+ datehigh[:-16]
    else:
        datelow = datelowest
        print 'Fires present from previous year, using current year imagery '+ datelow[:-15] +'to '+ datehigh[:-16]
    if datelow == '':
        datelow = datelowest
    #fire_scene()
    
    # Filter the Landsat Amazon S3 table for images matching path/row and cloudcover parameters.
    tries = 10
    
    # Ideally, imagery will be <10% scene cloud cover. The below code loops through the imagery in increments
    # of 10% cover until a 100% threshhold is reached. Change the threshold requirements as needed.
    # Currently there is no way to look at cloud cover within the fire perimeter/aoi before download - but this
    # method (looking at total scene cover) should be adequate for most purposes.
    while tries >= 10 and tries <= 90:
        if tries > 10:
            ntries = tries/10
            cloudcover = tries
            print 'Try #' + str(ntries) +': '+ str(cloudcover) + '% cloudcover threshold'
        scenes = s3_scenes[(s3_scenes.path == path) & (s3_scenes.row == row) & 
                           (s3_scenes.cloudCover <= cloudcover) & 
                           (s3_scenes.acquisitionDate >= datelow) & 
                           (s3_scenes.acquisitionDate <= datehigh) &
                           # We don't want any tier2/uncorrected data
                           (~s3_scenes.productId.str.contains('_T2')) &
                           (~s3_scenes.productId.str.contains('_RT'))]
        
        print 'Found {} images\n'.format(len(scenes))
        if len(scenes) == 0:
            tries = tries + 10
            print 'Retry with higher cloudcover threshold:'
        else: tries = 100
            
    # Select the scenes that meet the date and cloud cover criteria
    if len(scenes)>0:
        # select a scene in the middle of the date ranges if possible - for my purposes, full leaf imagery is ideal
        sc =  len(scenes)
        sd = sc / 2
        sl = sc - sd
        if sd > 2 and sl < 2:
            sl = -1
        else:
            sl = sl * -1
        
        # pick the middle date scene
        scene = scenes.sort_values('acquisitionDate').iloc[sl]
        
        # Add the selected scene to the bulk download list.
        bulk_list.append(scene)
    else:
        # if there are no scenes found even after altering the cloudcover threshold, create a list (find manually)
        print 'No scenes were selected for this path/row'
        nf = str(path) + ',' + str(row)
        not_found.append(nf)


# In[12]:


# Concatenate the scene info into two lists: scenes that have no match, and scenes we want to download.
bulk_frame = pd.concat(bulk_list, 1).T
nf_frame = pd.DataFrame(not_found)
nf_frame.to_csv((os.path.join(sceneinfo, state + 'scenes_missing.txt')),sep='\t', index=False, header=False)
bulk_frame.head(10)


# In[13]:


# Option 1 - get the scene list to upload to earthexplorer.usgs.gov/filelist
bulklist = bulk_frame[['entityId']]
bulklist.to_csv((os.path.join(sceneinfo, state + 'pathrowlist.txt')),sep='\t', index=False, header=False)
bulk_frame.to_csv((os.path.join(sceneinfo, state + 'frame.txt')),sep='\t', index=False)


# In[ ]:


# Option 2 - download the data directly
import requests
from bs4 import BeautifulSoup

LANDSAT_PATH = os.path.join(l8out, state, 'l8imagery')

# For each row
for i, row in bulk_frame.iterrows():
    
    entity_dir = os.path.join(LANDSAT_PATH, row.productId)
    
    # added to skip the file if it already has been downloaded - check and re-download any files that may be corrupted
    # if download is interrupted
    if os.path.isdir(entity_dir): 
        print'Skipping ' + entity_dir + ' as it already exists'
    else:
        # Print the product ID
        print '\n', 'EntityId:', row.productId, '\n'

        # Request the html text of the download_url from the amazon server. 
        response = requests.get(row.download_url)

        # If the response status code is fine (200)
        if response.status_code == 200:

            # Import the html to beautiful soup
            html = BeautifulSoup(response.content, 'html.parser')

            # Create the dir where we will put this image files.
            if not os.path.exists(entity_dir):
                os.makedirs(entity_dir)


            # Second loop: for each band of this image that we find using the html <li> tag
            for li in html.find_all('li'):

                # Get the href tag
                file = li.find_next('a').get('href')

                filestring = str(file)
                filen = os.path.join(LANDSAT_PATH,entity_dir,filestring)

                # only download the .tif and metadata files, other formats (.IMD) aren't necessary for what I need
                if filestring[-4:] == '.TIF' or filestring[-8:] == '_MTL.txt' or filestring[-8:] == '_ANG.txt':
                    if not os.path.isfile(os.path.join(filen)): # skip anything already downloaded
                        print '  Downloading: {}'.format(file)

                        # Download the files
                        response = requests.get(row.download_url.replace('index.html', file), stream=True)

                        with open(os.path.join(entity_dir, file), 'wb') as output:
                            shutil.copyfileobj(response.raw, output)
                        del response
                    else: print filestring + ' exists'


# In[ ]: