'''
Final Project Code
Britnee Pannell
Created on: 04/29/2020
Updated on: 05/14/2020

Source:
Code derived from python and arcpy tool reference websites unless specified by a citation above code developed by 
others. 
'''

#The Plan: Batch the process of pulling out EVI data from multidimensional
# raster files obtained from MODIS TERRA's MOD13Q1 sensor
# Save these files to a new folder (paying attention to years and dates obtained)
# Batch mask these files, look into normalizing data- based on NASA site multiply raw values by
# .0001 conversion factor to get normalized data (map algebra)
# Batch map algebra the files to null and 1 (no veg and veg) based on .1 <= cell_value = vegetation present
# Research shows most set lower bar for vegetation at .2, however upon examining other images during wet desert years,
# .1 is a more accurate number for presence of vegetation in a desert biome for the purposes of this project
# Output a new table for each date containing % veg cover and make a graphic
# Write up the project info and submit for each class

# Demonstrate using arcGIS Pro how EVI data is calculated using Landsat data and bands at Sutter Buttes
# Take care of batch prepairing (clipping) Landsat data using existing code (IE below). 



import arcpy, sys, os, traceback
from arcpy.sa import *

'''
Section 0:
Set path variables for entire project.
Hardcode known needed file paths after making file trees via windows explorer

'''
basepath = "C:\\Users\\Zaryla\\Desktop\\GIS_FINAL_PROJECTS\\"
database = "Final_Project_GIS_Data\\GIS_Final_Project\\GIS_Final_Project.gdb"

workspace_path = os.path.join(basepath, database)

arcpy.env.workspace = workspace_path #set GIS workspace
arcpy.env.outputCoordinateSystem = arcpy.SpatialReference("WGS 1984 UTM Zone 11N") #MODIS data comes in Sinusoidal Projection natively, this does not play well with grids so setting the output projection fixes this
arcpy.env.extent = "Mojave_Preserve_Boundary" #some masking errors, trying to fix this by hard coding the Preserve shapefile as the extent

#hardcoded files made by hand through Windows Explorer
raw_data = "Raw_Images\\"
processed_data = "EVI_images\\"

landsat_data = "Landsat_Data\\" #Had additional Landsat data to clip for a different portion of my project, I put it here for my ease

non_sb = "2015\\" #non superbloom year data
sb = "2017\\"     #superbloom year data

#making file paths more 'pythonic'
base_input = os.path.join(basepath, raw_data)
base_output = os.path.join(basepath, processed_data)

'''
Section 1: Extract EVI data from multidimensional raster files

'''
#Write functions to reduce error risk and simplify code

#Writes a new file 
def Make_Path(full_path):  
    os.makedirs(full_path)
    print("Made new folder " + full_path) #Informs user a new folder has been written
    
#Checks for presence of a file path, if it doesn't exist makes file path
def Test_Path(input_path, path_extension):  
    full_path = os.path.join(input_path, path_extension)
    if not os.path.exists(full_path):
        Make_Path(full_path)
    else: print(full_path + " exists.")
    return(full_path) #returns full path name to pass value to other functions

#Extracts EVI layer from HDF files, able to process data from multiple file sources by including arguments for input and output files
def Extract_EVI_from_HDF(input_directory, output_directory): 
    directory = os.fsencode(input_directory) 
    for file in os.listdir(directory): 
        filename = os.fsdecode(file)
         #checks to see if processed file already exists before attempting processing
        if not os.path.exists(output_directory + filename[0:10] + ".tif"): 
            arcpy.ExtractSubDataset_management(directory + file, output_directory + filename[0:10] + ".tif", "1")
            print(filename + " has been converted to .tif format.") #Tracking progress of function
        else:
            print(filename + " has previously been converted to .tif format.") #Tracking files the function skipped

'''
Section 2: Use Extract by mask to limit extent of rasters to Mojave Desert Preserve Boundaries. Will need to seperatly specify
saving the data after extracting by mask- will not save by default. Will need to specify the file type to be read since the Extract function
made 4 new files for every layer extracted from an HDF file.
Also clip Landsat data used for demonstration of using ArcGIS Pro to calculate EVI data- because automation wins

'''
# Set needed paths for next section
# The Mojave Desert Mask has already been added to my workspace/geodatabase from within ArcGIS pro, with the workspace defined as the geodatabase the mask can simply be named
boundary = "Mojave_Preserve_Boundary"
Landsat_boundary = "Sutter_Buttes" #For testing EVI calculations within arcGIS Pro
clip_file = "Clips\\" #Don't need to specify the new file with the functions I wrote, but storing the variable makes mistakes less likely

#Clips specified .tif files to hardcoded boundary file by masking, input = output path from previous function, export directory = new folder made within input directory
def Clip_Rasters(input_directory, output_directory): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file) #os functions work on bytes not strings, so need to decode to use 'endswith'- may be an update to this, but I haven't looked fully into it
        #focuses function on .tif files, not the 3 other parts used by arcGIS pro and made while extracting layers from HDF file- I'm doing most processing through file trees, not from within my workspace
        if filename.endswith(".tif"): 
            if not os.path.exists(output_directory + filename[0:10] + "_clip.tif"): #checks to see if a clipped file has been made, if not runs the function
                outExtractByMask = ExtractByMask(input_directory + filename[0:10] + ".tif", boundary) #clipping raster with a shapefile requires extract by mask with the hard coded boundary being used as the mask
                outExtractByMask.save(output_directory + filename[0:10] + "_clip.tif") #saves and renames the clipped file to the newly made folder 
                print(filename + " has been clipped.") #tracks progress
            else:
                print(filename + " has previously been clipped.") #Tracks files the function skipped

#Rewritten for landsat data: Clips specified .TIF files to hardcoded boundary file, input = output path from previous function, export directory = new folder made within input directory
def Clip_Rasters_Landsat(input_directory, output_directory): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file) 
        if filename.endswith(".TIF"): #focuses function on .TIF files
            if not os.path.exists(output_directory + filename[17:26] + filename[38:44] + "_clip.tif"): #Specifically re-written to deal with landsat naming conventions
                outExtractByMask = ExtractByMask(input_directory + filename, Landsat_boundary) 
                outExtractByMask.save(output_directory + filename[17:26] + filename[38:44] + "_clip.tif")  
                print(filename + " has been clipped.") 
            else:
                print(filename + " has previously been clipped.") 

'''
Section 3:
Map Algebra, it is a two parter.
Go through Clips folder
First need to normalize the clipped .tif files (after specifying again to focus only on .tif files) by multiplying each cell value by .0001 (as found on the NASA site for MODIS data)
Second, go through Normalized folder:
need to use the con function : Con('raster' >= .1, 1) to make a raster displaying only areas considered "vegetated" IE with EVI values >= .1 (after normalization)
I could skip the first step, but I want the normalized images for reference, and all papers displaying EVI data I have seen utilize the normalized data.
Put each set of data into its own folder and subfolder within the EVI folder

'''
#hardcoding new files to use below
normalize_file = "Nomalized\\"
veg_data = "Vegetation_Presence\\"

#Normalizes MODIS EVI data to a -1 to 1 scale by multiplying raster values by a scale factor of .001 as directed by the MODIS product guide
def Normalize_Raster(input_directory, output_directory): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith(".tif"):
            if not os.path.exists(output_directory + filename[0:10] + "_norm.tif"):
                inRaster = (input_directory + filename)
                outRaster = Raster(inRaster) * .0001 #map algebra function
                outRaster.save(output_directory + filename[0:10] + "_norm.tif") #saving the file to new filename hardcoded in the begining of the section
                print(filename + " has been normalized.")
            else:
                print(filename + " has already been normalized.")

#Returns a raster where vegetation is represented by 1, and lack of vegetation by null
def Does_Veg_Exist(input_directory, output_directory): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith(".tif"):
            if not os.path.exists(output_directory + filename[0:10] + "_veg.tif"):
                inRaster = (input_directory + filename)
                outRaster = Con(Raster(inRaster) >= .1, 1)#conditional map algebra- if cell is >= .1 returns 1, if not no value is returned
                outRaster.save(output_directory + filename[0:10] + "_veg.tif")
                print(filename + " has been processed.")
            else:
                print(filename + " has already been processed.")

'''
Section 4:
Calculate using data from the vegetation files' tables and export into one table such that:
Name of file / %coverage to allow for a simple x and y graph to display data

Current table lay out:
OID/ Value/ Count

Initial Plan:
Using Vegetation rasters only:
Make new column in each table called Veg_Area
Use field calculator to multiply Count by cell size (m^2) to get area of vegetation- transform to km^2 by dividing by 10^6

Make new column for Moj_Area- fill it with the value for the preserve boundary's area (in km^2)

Make new column for % coverage
Use field calculator to go through and divide Veg_Area by Preserve_Area * 100 to get % coverage- won't need the spaces for all of the numbers, only 2 digits for % value

Make new column for Date using the file names as the date entries- will need a seperate function as Im going to have to use cursors here




'''
#in_table = obtained from veg directory 'for file in directory' code
#refrence code for double checking entries 
#arcpy.AddField_management(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain})

#hard coded values for ease
#size of each grid cell on output vegetation presence rasters (in m) according to file properties
veg_cell = 231.66
#area of preserve shapefile in sq km - via calculation within arcGIS pro
mojave_area = 6425.62 

#field to calculate area covered in vegetation
veg_area = "Veg_km2"
#field for area of Mojave Preserve, from boundary file and Calculate Geometric Attributes function
moj_area = "Moj_km2"
#field for % coverage calculation
cover = "Coverage" 


#expression for veg area cell
veg_calculation = "((veg_cell * veg_cell) * !Count!) / 1000000"
#expression for moj area cell
moj_calculation = "mojave_area"
#expression for % coverage cell
cover_calculation = "(!Veg_km2! / !Moj_km2!) * 100" 

#Adds numerical field to existing table, arguments for changing data type precisison and length
def Add_Field(input_directory, field, data_type, prec, length):
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        fullpath = input_directory + filename #have to make a refrence to the full file path since I'm not working out of the set workspace or saving data to a new specified path
        if filename.endswith(".tif"):
            fields = arcpy.ListFields(fullpath) #makes a list of fields from the table found in the file path
            for header in fields: 
                if header.name == field:
                    arcpy.DeleteField_management(fullpath, field) #deletes field if it already exists, only run this function to create field, will delete input data otherwise
                    print(field + " has been deleted from " + filename[0:10] + ".")
            else: 
                arcpy.AddField_management(fullpath, field, data_type, prec, "", length, "", "", "", "") #makes new field in a table with the inputs alterable by the arguments
                print(field + " has been added to the table in " + filename[0:10] + ".")#tracks progress

#uses Calculate field function to input data into new fields- will only work on INT fields
def Fill_Field(input_directory, field, expression): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        fullpath = input_directory + filename
        if filename.endswith(".tif"):
            arcpy.CalculateField_management(fullpath, field, expression, "PYTHON3")#calculate field function 
            print(filename[0:10] + " table field " + field + " updated with new value.")#tracks progress of function

#fills out specified field with the name of the file the table is in
def Update_Field(input_directory, field_name): 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        fullpath = input_directory + filename
        if filename.endswith(".tif"):
            value = [str(filename[0:10])] #can hardcode change this value to input something else if you add a value argument to the function parameters, I didnt because filename is defined within the function
            #I need to pull out the looping into its own function 
            field = field_name
            with arcpy.da.UpdateCursor(fullpath, field) as cursor: #finds specified field in table
                for row in cursor:#goes through each row in the table
                    row = value#sets the row to the specified value
                    cursor.updateRow(row)#saves the chaged row value 
                    print(field + " field in " + filename[0:10] + " updated with value " + str(row)) #tracks progress of function
'''
Section 5
Run all code within a try except block to make sure errors can be properly traced back

Export data from tables into one table
Go through each table with cursors and pull out line OID = 0 fields [Date, Cover] for every table and save it to a new line in a new table for each year
-should allow for making the 2 graphics I need to compare years

'''
try:
    #extract .tif files from HDFs for the 2 study years
    Extract_EVI_from_HDF(Test_Path(base_input, non_sb), Test_Path(base_output, non_sb)) 
    Extract_EVI_from_HDF(Test_Path(base_input, sb), Test_Path(base_output, sb))
    
    #clip extracted .tif files for study years and landsat data
    Clip_Rasters(Test_Path(base_output, non_sb), Test_Path(Test_Path(base_output, non_sb), clip_file))  
    Clip_Rasters(Test_Path(base_output, sb), Test_Path(Test_Path(base_output, sb), clip_file)) 
    #Clip_Rasters_Landsat(Test_Path(basepath, landsat_data), Test_Path(Test_Path(basepath, landsat_data), "Landsat_Clips\\"))
    
    #normalize clipped rasters 
    Normalize_Raster(Test_Path(Test_Path(base_output, non_sb), clip_file), Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), normalize_file))
    Normalize_Raster(Test_Path(Test_Path(base_output, sb), clip_file), Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), normalize_file))
    
    #make a raster that only shows cells with value >= .1
    Does_Veg_Exist(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), normalize_file), Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data))
    Does_Veg_Exist(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), normalize_file), Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data))
    
    #add fields to table in vegetation rasters - non super bloom year
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), veg_area, "DOUBLE", "", "")
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), moj_area, "DOUBLE", "", "")
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), cover, "SHORT", 2, "")#only keeping 2 digits for whole percentage value
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), "Date", "TEXT", "", 10)#last min addition of text field

    #add fields to table in vegetation rasters - super bloom year
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), veg_area, "DOUBLE", "", "")
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), moj_area, "DOUBLE", "", "")
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), cover, "SHORT", 2, "")#only keeping 2 digits for whole percentage value
    Add_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), "Date", "TEXT", "", 10)#last min addition of text field 

    #calculate and fill out area of vegetated areas to tables
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), veg_area, veg_calculation)
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), veg_area, veg_calculation)

    #insert area of mojave preserve to tables
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), moj_area, moj_calculation)
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), moj_area, moj_calculation)

    #calculate % coverage of vegetation to total land area, fill out tables
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), cover, cover_calculation)
    Fill_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), cover, cover_calculation)

    #fill out created date field with file name as date value for each file
    Update_Field(Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data), "Date")
    Update_Field(Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data), "Date")

    #hard coded names for newly created aggregation tables- will be saved to my geodatabase so no need for a full path name here
    out_name = "Data_2015" #new table for year 2015
    out_name_sb = "Data_2017" #new table for year 2017

    #hard coded needed field names for aggregating data
    fieldnames = ["Date", "Coverage"]
    fieldnames_out = ["Date", "Per_Cover"] 

    #if 2015 table exists, delete it 
    if arcpy.Exists(out_name):
        arcpy.Delete_management(out_name)
        print("Deleted " + out_name)
    #create aggregate table for year 2015 data and add fields
    arcpy.CreateTable_management(workspace_path, out_name)
    arcpy.AddField_management(out_name, "Date", "TEXT", "", "", 10, "", "", "", "")
    print("Made data field.")
    arcpy.AddField_management(out_name, "Per_Cover", "SHORT", 2, "", "", "", "", "", "")
    print("Made coverage field.")
    arcpy.AddField_management(out_name, "Dates", "DATE", "", "", "", "", "", "", "")
    print("Made date field.")
    
    
    #following code pulls data from the specified fields (hard coded above) and inputs them line by line into the specified aggregate
    #table
    input_directory = Test_Path(Test_Path(Test_Path(base_output, non_sb), clip_file), veg_data) 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        fullpath = input_directory + filename
        if filename.endswith(".tif"):
            #Code below for compiling table data sourced from Stack Exchange user Jason:  https://gis.stackexchange.com/a/90376
            with arcpy.da.SearchCursor(fullpath, fieldnames) as sCursor:
                with arcpy.da.InsertCursor(out_name, fieldnames_out) as iCursor:
                    for row in sCursor:
                        iCursor.insertRow(row)
                        print("Inserted " + str(row))
    #Make a 'Dates' field for the aggregate table and reformat the original 'Date' field data to date format, insert into new "Dates" field
    #for use in graphing, need a date formated data field to properly graph % coverage values
    #arguments for source table, source data field, the custom date format the source data currently exists in (if a specific date format is not used)
    #output field (will be made if does not exist), and the format the exported date data will be stored in
    arcpy.ConvertTimeField_management(out_name, "Date", "MM'_'dd'_'yyyy", "Dates", "DATE")
    print("Date values converted and inserted into new field.")
            
    #if 2017 table exists, delete it
    if arcpy.Exists(out_name_sb):
        arcpy.Delete_management(out_name_sb)
        print("Deleted " + out_name_sb)
    #create aggregate table for year 2017 data and add fields
    arcpy.CreateTable_management(workspace_path, out_name_sb)
    arcpy.AddField_management(out_name_sb, "Date", "TEXT", "", "", 10, "", "", "", "")
    print("Made data field.")
    arcpy.AddField_management(out_name_sb, "Per_Cover", "SHORT", 2, "", "", "", "", "", "")
    print("Made coverage field.")
    arcpy.AddField_management(out_name_sb, "Dates", "DATE", "", "", "", "", "", "", "")
    print("Made date field.")

    #following code pulls data from the specified fields (hard coded above) and inputs them line by line into the specified aggregate
    #table
    input_directory = Test_Path(Test_Path(Test_Path(base_output, sb), clip_file), veg_data) 
    directory = os.fsencode(input_directory)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        fullpath = input_directory + filename
        if filename.endswith(".tif"):
            #Code below for compiling table data sourced from Stack Exchange user Jason:  https://gis.stackexchange.com/a/90376
            with arcpy.da.SearchCursor(fullpath, fieldnames) as sCursor:
                with arcpy.da.InsertCursor(out_name_sb, fieldnames_out) as iCursor:
                    for row in sCursor:
                        iCursor.insertRow(row)
                        print("Inserted " + str(row))
    #Make a 'Dates' field for the aggregate table and reformat the original 'Date' field data to date format, insert into new "Dates" field
    #for use in graphing, need a date formated data field to properly graph % coverage values
    #arguments for source table, source data field, the custom date format the source data currently exists in (if a specific date format is not used)
    #output field (will be made if does not exist), and the format the exported date data will be stored in
    arcpy.ConvertTimeField_management(out_name_sb, "Date", "MM'_'dd'_'yyyy", "Dates", "DATE")
    print("Date values converted and inserted into new field.")

  
   
except:
    # https://pro.arcgis.com/en/pro-app/arcpy/get-started/error-handling-with-python.htm
    tb = sys.exc_info()[2]
    tbinfo = traceback.format_tb(tb)[0]
    pymsg = "PYTHON ERRORS:\nTraceback Info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1])
    msgs = "ARCPY ERRORS:\n" + arcpy.GetMessages(2) + "\n"

    arcpy.AddError(msgs)
    arcpy.AddError(pymsg)

    print (msgs)
    print (pymsg)
    
    arcpy.AddMessage(arcpy.GetMessages(1))
    print (arcpy.GetMessages(1))

