###############################################################################
# Master Recession Curve (MRC) Fitting Program MRCfit 
# John Nimmo
# Adaptation started 3/16/17
# Based on code MRC_create by Lara Mitchell, Lasted updated 2/2/14
# This R program generates parametric Master Recession Curves (MRCs) from 
# water-level data by fitting to the time derivative of water-level elevation. 
###############################################################################

#### User inputs ##############################################################

# Folders for input data and output:
inputlocation <- "C:/Users/kperkins/Desktop/class/"   
outputlocation = "C:/Users/kperkins/Desktop/class/"

# File names for input data and for output:
inputfile <- "sampledata-masserNW95.csv" # units are h, m, mm
# If no output file is desired, use an empty string ("") for the file name
outputfile <- "" 

# Desired degree of the fitted polynomial (=1 for exponential E(t)):
poly_degree = 1

# Force the fit through the origin? (TRUE for yes, FALSE for no):
through_origin = FALSE

# The time increment between successive data points is not required to be 
# constant through the file. State here the normal time increment between  
# points (i.e. where there is no data gap) in input file (units of time):
tinc = .5

# Minimum duration of interval between significant precip and recession 
# start (units of time): 
mindrytime = 48 

# Negligibility-of-precip criterion (units of precip):
maxdelprec = .5

# Duration of segments for linearization (units of time):
tseg_given = 6 

# Maximum allowable elevation-increase for linearized segments to be used 
# for fitting the MRC (units of elevation/time):
maxslope = .002 

# Desired bin size (units of elevation) to bin the calculated dE/dt for 
# fitting purposes (0 for no binning) 
bin_size = .1 

# Make extra plot, slope vs. time (useful for evaluating time-dependenc)? 
plotslopetime = FALSE

#### End user inputs ##########################################################

# Read data

rawData <- read.csv(paste(inputlocation,inputfile,sep=""), header=TRUE)
dataname = substr(inputfile, 1, (nchar(inputfile)-4)) 

# Test for complete data set:
test <- is.na(rawData)
if(sum(test) > 0) {
  cat("Warning: Values are missing from data file. \n\n")
}

tim <- rawData[,1]
elev <- rawData[,2]
precip <- rawData[,3]

np = length(tim) 
nmindry = as.integer(mindrytime / tinc)
lseg = as.integer(tseg_given / tinc)    # The number of points in a segment
lsegm1 = lseg - 1
tseg = tinc * lseg       # Reset segment duration for consistency with lseg 
tdeltarget = tinc * lsegm1
tnegl = tinc / 3         # Negligible amount of time in segment partitioning 
cat(tinc, nmindry, lseg, tseg_given, tseg, "times  \n" )
# Verify that cumulative precipitation increases monotonically 
check_precip <- which( diff(precip) < 0 )
if(length(check_precip) > 0){
  cat("Warning: Decrease in cumulative precipitation detected...\n")
  for(ind in check_precip){
    # ind = index of element right before the problem occurs (call it Elem1)
    # Find ALL values less than Elem1 that occur AFTER Elem1:
    ind_replace = (precip < precip[ind])
    ind_replace[1:ind] = FALSE				
    # Replace those problem elements with the value in Elem1:
    precip[ind_replace] = precip[ind];
  }
  cat("Precipitation data adjusted\n")
}

# Identify points satisfying the precip criterion to be considered within a 
# recessionary interval. 
ipc = which( (diff(precip, lag=nmindry) < maxdelprec) ) + nmindry
npc = length(ipc)  # total number of data that meet precip criterion
nseg = as.integer(npc / lseg)

# Identify segments provisionally as groups of lseg consecutive points.
# Next line: indices of initial points of each provisional segment 
iseg = ipc[seq(1, npc-lseg, by=lseg)]   
nb = nseg + 1 

# Identify those of the provisional segments that have no data-point gaps 
# and have no significant precip within them.
isegcindx = which( (tim[iseg + lsegm1] - tim[iseg] - tdeltarget < tnegl) & 
                     (precip[iseg + lsegm1] - precip[iseg] < maxdelprec) ) 
isegc = iseg[isegcindx]   
# isegc has indices at start of gapless segments with negligible precip
nsegc = length(isegc)
##cat("+++", nseg, "  iseg ", iseg[1:120], "iseg \n")
##cat("--- ", length(isegcindx), "isegcindx ", isegcindx, "isegcindx \n")
##cat("+++", nsegc, "isegc ", isegc[1:120], "isegc \n")

# Compute linear fits to data within gapless segments 

irec = vector(mode = "integer", length = 0) 
ti = vector(mode = "double", length = lseg) 
ei = vector(mode = "double", length = lseg) 
segslo = vector(mode = "double", length = nsegc) 
segele = vector(mode = "double", length = nsegc) 
for (i1 in (1:nsegc)) {
  ti[1:lseg] = tim[isegc[i1]:(isegc[i1]+lsegm1)]
  ei[1:lseg] = elev[isegc[i1]:(isegc[i1]+lsegm1)] 
  irec = c(irec, isegc[i1]:(isegc[i1]+lsegm1))
  segdata = data.frame(ti, ei) 
  segment = lm(formula = ei ~ ti, data = segdata)
  segslo[i1] = segment$coefficients[2] 
  segele[i1] = mean(ei) 
}
timrec = tim[irec]
elevrec = elev[irec]

# Distinguish between points whose slopes are or are not less than the 
# established maximum

# Identify indices of initial points of selected segments
isegcfindx = which( segslo <= maxslope )
isegcf = isegc[which( segslo <= maxslope )]   
nsegcf = length(isegcf)
# Identify indices of initial points of excluded segments
iseghighslope = isegc[which( segslo > maxslope )] 
nseghighslope = length(iseghighslope)
# Identify data for use 
segslope = vector(mode = "double", length = nsegcf) 
segelev = vector(mode = "double", length = nsegcf) 
segslope = segslo[isegcfindx]
segelev = segele[isegcfindx] 
# Identify precip data for plotting
irecf = vector(mode = "integer", length = 0) 
for (i1 in (1:nsegcf)) {
  irecf = c(irecf, isegcf[i1]:(isegcf[i1]+lsegm1))
}
timrecf = tim[irecf]
preciprecf = precip[irecf] 
elevrecf = elev[irecf] 

# Average within bins, if called for

if(bin_size > 0) {
  temp <- sort(segelev,index.return=TRUE)   
##  print(temp[[2]])
  segelev_sort <- temp[[1]]            # sort E from smallest to largest
  segslope_sort <- segslope[temp[[2]]]   # sort dE/dt according to z_sort
  Hmax <- segelev_sort[length(segelev_sort)]  
  Hmin <- segelev_sort[1]
  if( (Hmax - Hmin)/bin_size <= 1) {
    cat("Bin size is too large.\n\n")
  } else {
    bin_num <- ceiling( (Hmax - Hmin)/bin_size )
  }
  segelev_avg <- rep(0,bin_num)
  segslope_avg <- rep(0,bin_num)
  j <- length(segelev_sort)
  for(i in 1:bin_num) {
    bmin <- Hmax - i*bin_size
    temp <- which(bmin < segelev_sort[1:j])
    j <- min(temp) - 1
    segelev_avg[i] <- sum(segelev_sort[temp])/length(temp)
    segslope_avg[i] <- sum(segslope_sort[temp])/length(temp) 
  }
} else {
  segelev_avg <- segelev
  segslope_avg <- segslope 
}
######################################################################

# Fit polynomial to the selected recession data, yielding coefficients 
# ordered from highest to lowest order 

if(through_origin) {
  use_formula <- switch(poly_degree,
    '1' = {segslope_avg ~ segelev_avg + 0},
    '2' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + 0},
    '3' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + I(segelev_avg^3) + 0}
  )	# don't  go above 3 
} else {
  use_formula <- switch(poly_degree,
    '1' = {segslope_avg ~ segelev_avg},
    '2' = {segslope_avg ~ segelev_avg + I(segelev_avg^2)},
    '3' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + I(segelev_avg^3)}
  )	# don't go above 3 
}
fit <- lm(use_formula)
adj_R2 <- summary(fit)[["adj.r.squared"]]

# Show results

n <- poly_degree
if(through_origin) {
  output_df <- data.frame("Estimated_coefficients"=rep(NA,n))
  rownames(output_df) <- paste("p[",1:n,"]",sep="")
  output_df[,1] = fit[["coefficients"]]		## FIT!
  
  output_formula <- paste("p[",1:n,"]*X^",1:n,sep="")
  output_formula <- paste(output_formula,collapse=" + ")
} else {
  output_df <- data.frame("Estimated_coefficients"=rep(NA,n+1))
  rownames(output_df) <- paste("p[",0:n,"]",sep="")
  output_df[,1] = fit[["coefficients"]]		## FIT!
  
  output_formula <- paste("p[",1:n,"]*X^",1:n,sep="")
  output_formula <- c("p[0]",output_formula)
  output_formula <- paste(output_formula,collapse=" + ")
}
print(output_formula)
print(output_df) 

######################################################################

# Create unconventional data frame to save as the output csv file:
if(nchar(outputfile) > 0) {
  final_df <- data.frame("Coefficient"=rownames(output_df),
      "Estimated_coefficients"=output_df[,1], stringsAsFactors=FALSE)
  final_df[nrow(final_df) + 1,1:2] <- rep("",2)
  final_df[nrow(final_df) + 1,1:2] <- c("Formula","")
  final_df[nrow(final_df) + 1,1:2] <- c(output_formula,"")
  
  write.csv(final_df, file=paste(outputlocation, outputfile, sep=""), 
      row.names=FALSE)
}
######################################################################

# Plot cumulative precipitation vs. time:
dev.new(width=15,height=10)
par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1))
plot(tim, precip, type="l", main=paste("Precip vs. Time  ", dataname), 
     xlab="Time", ylab="Cumulative precipitation")
points(timrecf, preciprecf, col="red", pch=20, cex=.8)
points(tim[isegcf], precip[isegcf], col="blue", pch="|", cex=1)
legend("topleft",c("Cumulative precip","Recession periods"),  
       col=c("black","red"), lty=c(1, NA), pch=c(NA, 20))

# Plot WT Elevation E vs. time:
dev.new(width=15, height=10)
par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1))
plot(tim, elev, type="l", main=paste("Elev vs. Time  ", dataname), 
     xlab="Time", ylab="WT Elevation E")
points(timrecf, elevrecf, col="red", pch=20, cex=.8)
points(tim[isegcf], elev[isegcf], col="blue", pch="+", cex=1.2)
legend("topright",c("WT Elevation E","Recession periods"), 
       col=c("black","red"), lty=c(1,NA), pch=c(NA, 20))

# Plot recession data, bin-averaged recession data, and fitted curve:			
dev.new(width=15,height=10)
par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1))
xlim1 = min(segslope); xlim2 = maxslope + (maxslope-xlim1)/10
plot(segslope, segelev, type="p", xlab="dE/dt", 
    ylab="WT Elevation E", xlim=c(xlim1, xlim2), pch=5, cex=1., col="blue",	
    main=paste("Master Recession Curve  ", dataname))
points(segslo[which(segslo > maxslope)], segele[which(segslo > maxslope)], 
    pch=5, cex=.6, col=616)
points(segslope_avg, segelev_avg, pch=16, cex=1., col="red")
idx.sort <- sort(segelev_avg, index.return=TRUE)[["ix"]]
plot.H <- segelev_avg[idx.sort]
plot.fitted.dedt <- fit[["fitted.values"]][idx.sort]
lines(plot.fitted.dedt, plot.H,  col="black", lwd=3)		
legend("bottomleft", c("Selected slopes","Bin averages", "Fitted curve",
  "Nonselected slopes"), col=c("blue","red","black",616), pch=c(5,16,NA,5), 
  pt.cex=c(1,1,NA,.6), cex=1, lty=c(NA,NA,1,NA), lwd=c(NA,NA,3,NA), bty="n")
mtext(paste("GoF =", round(adj_R2,digits=3)), side=3, adj=0, 
      at=range(segslope)[1]*1.03, line=-.9, cex=.8)	
for (i1 in 1:(poly_degree+1)) {
  mtext( paste("p[", (i1-1), "] ", signif(output_df[i1, 1], digits = 6)), 
      cex=.8, side=3, adj=0, at=range(segslope)[1]*1.03, line=(-.9-.9*i1)) 
}
# If called for, plot segment slopes vs. time:
if (plotslopetime) {
  dev.new(width=15, height=10)
  par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1))
  plot(tim[isegc+lseg/2], segslope, type="p", col="red", pch=20, cex=1.4, 
       main=paste("Segment slope vs. Time  ", dataname), xlab="Time", 
       ylab="Segment slope")
}
if (poly_degree == 1) { 
  asymp = -output_df[1,1] / output_df[2,1] 
  cat( "Asymptote approached by master recession: ", asymp, " \n")
  mtext(paste("Asymptote", round(asymp, digits = 3)), side=3, adj=0, 
        at=range(segslope)[1]*1.03, line=(-.9-.9*3), cex=.8) 
}
controlparams = c( paste(sprintf("%.3f", mindrytime), nmindry), 
  paste(sprintf("%.3f", maxdelprec)), 
  paste(sprintf("%.3f", tseg), lseg), 
  paste(sprintf("%.3f", maxslope)) ) 
for (i1 in 1:4) {
  mtext(controlparams[i1], cex=.8, side=3, adj=0, 
         at=range(segslope)[1]*1.03, line=(-.9-.9*(i1+3))) 
}
######################################################################