############################################################################### # Master Recession Curve (MRC) Fitting Program MRCfit # John Nimmo # Adaptation started 3/16/17 # Based on code MRC_create by Lara Mitchell, Lasted updated 2/2/14 # This R program generates parametric Master Recession Curves (MRCs) from # water-level data by fitting to the time derivative of water-level elevation. ############################################################################### #### User inputs ############################################################## # Folders for input data and output: inputlocation <- "C:/Users/kperkins/Desktop/class/" outputlocation = "C:/Users/kperkins/Desktop/class/" # File names for input data and for output: inputfile <- "sampledata-masserNW95.csv" # units are h, m, mm # If no output file is desired, use an empty string ("") for the file name outputfile <- "" # Desired degree of the fitted polynomial (=1 for exponential E(t)): poly_degree = 1 # Force the fit through the origin? (TRUE for yes, FALSE for no): through_origin = FALSE # The time increment between successive data points is not required to be # constant through the file. State here the normal time increment between # points (i.e. where there is no data gap) in input file (units of time): tinc = .5 # Minimum duration of interval between significant precip and recession # start (units of time): mindrytime = 48 # Negligibility-of-precip criterion (units of precip): maxdelprec = .5 # Duration of segments for linearization (units of time): tseg_given = 6 # Maximum allowable elevation-increase for linearized segments to be used # for fitting the MRC (units of elevation/time): maxslope = .002 # Desired bin size (units of elevation) to bin the calculated dE/dt for # fitting purposes (0 for no binning) bin_size = .1 # Make extra plot, slope vs. time (useful for evaluating time-dependenc)? plotslopetime = FALSE #### End user inputs ########################################################## # Read data rawData <- read.csv(paste(inputlocation,inputfile,sep=""), header=TRUE) dataname = substr(inputfile, 1, (nchar(inputfile)-4)) # Test for complete data set: test <- is.na(rawData) if(sum(test) > 0) { cat("Warning: Values are missing from data file. \n\n") } tim <- rawData[,1] elev <- rawData[,2] precip <- rawData[,3] np = length(tim) nmindry = as.integer(mindrytime / tinc) lseg = as.integer(tseg_given / tinc) # The number of points in a segment lsegm1 = lseg - 1 tseg = tinc * lseg # Reset segment duration for consistency with lseg tdeltarget = tinc * lsegm1 tnegl = tinc / 3 # Negligible amount of time in segment partitioning cat(tinc, nmindry, lseg, tseg_given, tseg, "times \n" ) # Verify that cumulative precipitation increases monotonically check_precip <- which( diff(precip) < 0 ) if(length(check_precip) > 0){ cat("Warning: Decrease in cumulative precipitation detected...\n") for(ind in check_precip){ # ind = index of element right before the problem occurs (call it Elem1) # Find ALL values less than Elem1 that occur AFTER Elem1: ind_replace = (precip < precip[ind]) ind_replace[1:ind] = FALSE # Replace those problem elements with the value in Elem1: precip[ind_replace] = precip[ind]; } cat("Precipitation data adjusted\n") } # Identify points satisfying the precip criterion to be considered within a # recessionary interval. ipc = which( (diff(precip, lag=nmindry) < maxdelprec) ) + nmindry npc = length(ipc) # total number of data that meet precip criterion nseg = as.integer(npc / lseg) # Identify segments provisionally as groups of lseg consecutive points. # Next line: indices of initial points of each provisional segment iseg = ipc[seq(1, npc-lseg, by=lseg)] nb = nseg + 1 # Identify those of the provisional segments that have no data-point gaps # and have no significant precip within them. isegcindx = which( (tim[iseg + lsegm1] - tim[iseg] - tdeltarget < tnegl) & (precip[iseg + lsegm1] - precip[iseg] < maxdelprec) ) isegc = iseg[isegcindx] # isegc has indices at start of gapless segments with negligible precip nsegc = length(isegc) ##cat("+++", nseg, " iseg ", iseg[1:120], "iseg \n") ##cat("--- ", length(isegcindx), "isegcindx ", isegcindx, "isegcindx \n") ##cat("+++", nsegc, "isegc ", isegc[1:120], "isegc \n") # Compute linear fits to data within gapless segments irec = vector(mode = "integer", length = 0) ti = vector(mode = "double", length = lseg) ei = vector(mode = "double", length = lseg) segslo = vector(mode = "double", length = nsegc) segele = vector(mode = "double", length = nsegc) for (i1 in (1:nsegc)) { ti[1:lseg] = tim[isegc[i1]:(isegc[i1]+lsegm1)] ei[1:lseg] = elev[isegc[i1]:(isegc[i1]+lsegm1)] irec = c(irec, isegc[i1]:(isegc[i1]+lsegm1)) segdata = data.frame(ti, ei) segment = lm(formula = ei ~ ti, data = segdata) segslo[i1] = segment$coefficients[2] segele[i1] = mean(ei) } timrec = tim[irec] elevrec = elev[irec] # Distinguish between points whose slopes are or are not less than the # established maximum # Identify indices of initial points of selected segments isegcfindx = which( segslo <= maxslope ) isegcf = isegc[which( segslo <= maxslope )] nsegcf = length(isegcf) # Identify indices of initial points of excluded segments iseghighslope = isegc[which( segslo > maxslope )] nseghighslope = length(iseghighslope) # Identify data for use segslope = vector(mode = "double", length = nsegcf) segelev = vector(mode = "double", length = nsegcf) segslope = segslo[isegcfindx] segelev = segele[isegcfindx] # Identify precip data for plotting irecf = vector(mode = "integer", length = 0) for (i1 in (1:nsegcf)) { irecf = c(irecf, isegcf[i1]:(isegcf[i1]+lsegm1)) } timrecf = tim[irecf] preciprecf = precip[irecf] elevrecf = elev[irecf] # Average within bins, if called for if(bin_size > 0) { temp <- sort(segelev,index.return=TRUE) ## print(temp[[2]]) segelev_sort <- temp[[1]] # sort E from smallest to largest segslope_sort <- segslope[temp[[2]]] # sort dE/dt according to z_sort Hmax <- segelev_sort[length(segelev_sort)] Hmin <- segelev_sort[1] if( (Hmax - Hmin)/bin_size <= 1) { cat("Bin size is too large.\n\n") } else { bin_num <- ceiling( (Hmax - Hmin)/bin_size ) } segelev_avg <- rep(0,bin_num) segslope_avg <- rep(0,bin_num) j <- length(segelev_sort) for(i in 1:bin_num) { bmin <- Hmax - i*bin_size temp <- which(bmin < segelev_sort[1:j]) j <- min(temp) - 1 segelev_avg[i] <- sum(segelev_sort[temp])/length(temp) segslope_avg[i] <- sum(segslope_sort[temp])/length(temp) } } else { segelev_avg <- segelev segslope_avg <- segslope } ###################################################################### # Fit polynomial to the selected recession data, yielding coefficients # ordered from highest to lowest order if(through_origin) { use_formula <- switch(poly_degree, '1' = {segslope_avg ~ segelev_avg + 0}, '2' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + 0}, '3' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + I(segelev_avg^3) + 0} ) # don't go above 3 } else { use_formula <- switch(poly_degree, '1' = {segslope_avg ~ segelev_avg}, '2' = {segslope_avg ~ segelev_avg + I(segelev_avg^2)}, '3' = {segslope_avg ~ segelev_avg + I(segelev_avg^2) + I(segelev_avg^3)} ) # don't go above 3 } fit <- lm(use_formula) adj_R2 <- summary(fit)[["adj.r.squared"]] # Show results n <- poly_degree if(through_origin) { output_df <- data.frame("Estimated_coefficients"=rep(NA,n)) rownames(output_df) <- paste("p[",1:n,"]",sep="") output_df[,1] = fit[["coefficients"]] ## FIT! output_formula <- paste("p[",1:n,"]*X^",1:n,sep="") output_formula <- paste(output_formula,collapse=" + ") } else { output_df <- data.frame("Estimated_coefficients"=rep(NA,n+1)) rownames(output_df) <- paste("p[",0:n,"]",sep="") output_df[,1] = fit[["coefficients"]] ## FIT! output_formula <- paste("p[",1:n,"]*X^",1:n,sep="") output_formula <- c("p[0]",output_formula) output_formula <- paste(output_formula,collapse=" + ") } print(output_formula) print(output_df) ###################################################################### # Create unconventional data frame to save as the output csv file: if(nchar(outputfile) > 0) { final_df <- data.frame("Coefficient"=rownames(output_df), "Estimated_coefficients"=output_df[,1], stringsAsFactors=FALSE) final_df[nrow(final_df) + 1,1:2] <- rep("",2) final_df[nrow(final_df) + 1,1:2] <- c("Formula","") final_df[nrow(final_df) + 1,1:2] <- c(output_formula,"") write.csv(final_df, file=paste(outputlocation, outputfile, sep=""), row.names=FALSE) } ###################################################################### # Plot cumulative precipitation vs. time: dev.new(width=15,height=10) par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1)) plot(tim, precip, type="l", main=paste("Precip vs. Time ", dataname), xlab="Time", ylab="Cumulative precipitation") points(timrecf, preciprecf, col="red", pch=20, cex=.8) points(tim[isegcf], precip[isegcf], col="blue", pch="|", cex=1) legend("topleft",c("Cumulative precip","Recession periods"), col=c("black","red"), lty=c(1, NA), pch=c(NA, 20)) # Plot WT Elevation E vs. time: dev.new(width=15, height=10) par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1)) plot(tim, elev, type="l", main=paste("Elev vs. Time ", dataname), xlab="Time", ylab="WT Elevation E") points(timrecf, elevrecf, col="red", pch=20, cex=.8) points(tim[isegcf], elev[isegcf], col="blue", pch="+", cex=1.2) legend("topright",c("WT Elevation E","Recession periods"), col=c("black","red"), lty=c(1,NA), pch=c(NA, 20)) # Plot recession data, bin-averaged recession data, and fitted curve: dev.new(width=15,height=10) par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1)) xlim1 = min(segslope); xlim2 = maxslope + (maxslope-xlim1)/10 plot(segslope, segelev, type="p", xlab="dE/dt", ylab="WT Elevation E", xlim=c(xlim1, xlim2), pch=5, cex=1., col="blue", main=paste("Master Recession Curve ", dataname)) points(segslo[which(segslo > maxslope)], segele[which(segslo > maxslope)], pch=5, cex=.6, col=616) points(segslope_avg, segelev_avg, pch=16, cex=1., col="red") idx.sort <- sort(segelev_avg, index.return=TRUE)[["ix"]] plot.H <- segelev_avg[idx.sort] plot.fitted.dedt <- fit[["fitted.values"]][idx.sort] lines(plot.fitted.dedt, plot.H, col="black", lwd=3) legend("bottomleft", c("Selected slopes","Bin averages", "Fitted curve", "Nonselected slopes"), col=c("blue","red","black",616), pch=c(5,16,NA,5), pt.cex=c(1,1,NA,.6), cex=1, lty=c(NA,NA,1,NA), lwd=c(NA,NA,3,NA), bty="n") mtext(paste("GoF =", round(adj_R2,digits=3)), side=3, adj=0, at=range(segslope)[1]*1.03, line=-.9, cex=.8) for (i1 in 1:(poly_degree+1)) { mtext( paste("p[", (i1-1), "] ", signif(output_df[i1, 1], digits = 6)), cex=.8, side=3, adj=0, at=range(segslope)[1]*1.03, line=(-.9-.9*i1)) } # If called for, plot segment slopes vs. time: if (plotslopetime) { dev.new(width=15, height=10) par(mfrow=c(1,1), mar=c(4.5,4.5,3,2), oma=c(0,0,0,1)) plot(tim[isegc+lseg/2], segslope, type="p", col="red", pch=20, cex=1.4, main=paste("Segment slope vs. Time ", dataname), xlab="Time", ylab="Segment slope") } if (poly_degree == 1) { asymp = -output_df[1,1] / output_df[2,1] cat( "Asymptote approached by master recession: ", asymp, " \n") mtext(paste("Asymptote", round(asymp, digits = 3)), side=3, adj=0, at=range(segslope)[1]*1.03, line=(-.9-.9*3), cex=.8) } controlparams = c( paste(sprintf("%.3f", mindrytime), nmindry), paste(sprintf("%.3f", maxdelprec)), paste(sprintf("%.3f", tseg), lseg), paste(sprintf("%.3f", maxslope)) ) for (i1 in 1:4) { mtext(controlparams[i1], cex=.8, side=3, adj=0, at=range(segslope)[1]*1.03, line=(-.9-.9*(i1+3))) } ######################################################################