#!/usr/bin/env python # coding: utf-8 # #### Microsoft Azure open data program https://azure.microsoft.com/en-us/services/open-datasets/catalog/naip/ # #### This code utilizes the capability of Cloud Optimized Geotiffs to analyze NAIP tiles covering a large area of interest. Instead of downloading each tile manually to use it later for analysis, it can be streamed directly from an Azure blob. # #### The analysis (in this case NDVI or MSAVI classification) can be completed and saved for later use, while also saving limited drive space available to me. # In[1]: import pandas as pd import geopandas as gpd import os, shutil import rasterio import urllib import re import fiona.transform import requests import numpy as np import gdal gdal.SetConfigOption("GDAL_HTTP_UNSAFESSL", "YES") gdal.VSICurlClearCache() # In[3]: # Put the quad and aoi files in the infilepath directory, define a different outpath if desired infilepath = r'D:\code\NAIP download' outpath = infilepath #text to append to each NAIP quad's complete analysis outfilen_append = '_ndvi.tif' # shapefile from microsoft Azure with NAIP quads # https://naipeuwest.blob.core.windows.net/naip/v002/ca/2018/ca_shpfl_2018/index.html inNAIPquad = 'NAIP_18_CA.shp' #file with polygons/points of interest. If using the whole state: aoi_file = 'centroid' (to get the centroid of each scene) aoi_file = 'CA_aoi.gpkg' #aoi_file = 'centroid' # In[4]: # NAIP metadata year = '2018' state = 'ca' #lowercase resolution = '060cm' blob_root = 'https://naipeuwest.blob.core.windows.net/naip' # In[5]: ## Read the aoi file. if using the whole state, the code will create a list of tiles based on centroids later if aoi_file == 'centroid': aoi = 'centroid' else: aoi = gpd.read_file(os.path.join(infilepath, aoi_file)) # In[6]: def createListofTiles(inpath,NAIPquad,inaoi): # NAIP Quad file ntiles = gpd.GeoDataFrame.from_file(os.path.join(inpath,NAIPquad)) # if inaoi == 'centroid': if isinstance(inaoi, str): print('centroid') aoicreate = gpd.GeoDataFrame(ntiles.geometry.centroid, columns=['geometry']) inaoi = gpd.sjoin(aoicreate, ntiles, how="inner", op='intersects') inaoi = inaoi[['geometry']] if inaoi.crs != ntiles.crs: print(inaoi.crs,ntiles.crs) c = ntiles.crs.dtypes print(c) inaoi= inaoi.to_crs(c) # sort so that the images with the most aoi's get processed first if str((aoi.geom_type == 'Point')[0]) == 'True': # Intersection with AOI to the names of all the tiles needed quad_intersection = gpd.sjoin(ntiles, inaoi, how="right", op='intersects') print(quad_intersection) print('Sorting by counts of points in each quad') quad_intersection['counts'] = quad_intersection.groupby(['FileName'])['FileName'].transform('count') quad_intersection = quad_intersection.sort_values(by='counts',ascending=False) else: # Intersection with AOI to the names of all the tiles needed quad_intersection = gpd.sjoin(inaoi, ntiles, how="left", op='intersects') print(quad_intersection) quadlist = quad_intersection.FileName.unique() return quadlist # In[7]: ql = createListofTiles(infilepath,inNAIPquad,aoi) # In[8]: # Selected indexes for analysis def calcNDVI(src): # read red and nir bands, change to dtype to float64 for output ndvi calc red,nir = src.read(1).astype('float64'),src.read(4).astype('float64') # Calculate NDVI print('Calculate NDVI') ndvi = np.where((nir+red) == 0, 0, (nir-red)/(nir+red)) return ndvi def calcClassedNDVI(src): # read red and nir bands, change to dtype to float64 for output ndvi calc red,nir = src.read(1).astype('float64'),src.read(4).astype('float64') # Calculate NDVI print('Calculate NDVI') ndvi = np.where((nir+red) == 0, 0, (nir-red)/(nir+red)) # Return reclassified NDVI into fuel (1) or no fuel (0) # Change NDVI thresholds where applicable ndvi[ndvi>.2] = 1 ndvi[ndvi<=.2] = 0 return ndvi def calcMSAVI2(src): from math import sqrt # read red and nir bands, change to dtype to float64 for output calc red,nir = src.read(1).astype('float64'),src.read(4).astype('float64') print('Calculate MSAVI2') msavi2 = (2 * (nir.astype(float) + 1) - np.sqrt((2 * nir.astype(float) + 1)**2 - 8 * (nir.astype(float) - red.astype(float))))/2 return msavi2 # In[9]: for n in range(len(ql)): for filen in (ql): print(str(n) + ' of ' + str(len(ql))) # Retrieve the name of the quad file and define link to file quadrangle = filen[2:7] filename = filen tile_url = blob_root + '/v002/' + state + '/' + year + '/' + state + '_' + resolution + \ '_' + year + '/' + quadrangle + '/' + filename print(tile_url) # Output file name outputanalysis = os.path.join(outpath,filename[:-4]+outfilen_append) # If the file has not already been completed previously- stream the data, perform the calculation, and save output if not os.path.exists(outputanalysis): with rasterio.open(tile_url) as f: #### Update analysis to be run analysis = calcNDVI(f) # set transform affine= f.transform with rasterio.open(outputanalysis, 'w', driver='GTiff', height=f.shape[0], width=f.shape[1], compress='lzw', #lossless count=1, dtype= 'float64', # 'uint8' for scaled ndvi or reclass crs= f.crs, transform=affine) as dst: dst.write(analysis,1) print(filename[:-4]+ outfilen_append + ' Complete') # In[ ]: