R Script: Parametric Regression

# Fit parametric regression models to observations
# Plot resulting curve fits with binned data

# Create storage list
modlist.glm <- as.list(rep(NA, times = length(taxa.names)))

for (i in 1:length(taxa.names)) {

# Create a logical vector that is true if taxon is
# present and false if taxon is absent.

resp <- dfmerge[,taxa.names[i]]>0

# Fit the regression model and store the results in a list.
# Here, poly(temp,2) specifies that the
# model is fitting using a second order polynomial of the
# explanatory variable. glm calls the function that fits
# Generalized Linear Models. We specify in this case that
# the response variable is distributed binomially.
modlist.glm[[i]] <- glm(resp ~ poly(temp,2), data = dfmerge,
family = "binomial")

print(summary(modlist.glm[[i]]))
}

par(mfrow = c(1,3), pty = "s")
# Specify 3 plots per page
for (i in 1:length(taxa.names)) {
# Compute mean predicted probability of occurrence
# and standard errors about this predicted probability.
predres <- predict(modlist.glm[[i]], type= "link", se.fit = T)

# Compute upper and lower 90% confidence limits
up.bound.link <- predres$fit + 1.65*predres$se.fit
low.bound.link <- predres$fit - 1.65*predres$se.fit
mean.resp.link <- predres$fit

# Convert from logit transformed values to probability.
up.bound <- exp(up.bound.link)/(1+exp(up.bound.link))
low.bound <- exp(low.bound.link)/(1+exp(low.bound.link))
mean.resp <- exp(mean.resp.link)/(1+exp(mean.resp.link))

iord <- order(dfmerge$temp)
# Sort the environmental variable.

# Define bins to summarize observational data as
# probabilities of occurrence

# Define the number of bins
nbin <- 20

# Define bin boundaries so each bin has approximately the same
# number of observations.
cutp <- quantile(dfmerge$temp,probs = seq(from = 0, to = 1, length = 20))

# Compute the midpoint of each bin
cutm <- 0.5*(cutp[-1] + cutp[-nbin])

# Assign a factor to each bin
cutf <- cut(dfmerge$temp, cutp, include.lowest = T)

# Compute the mean of the presence/absence data within each
# bin.
vals <- tapply(dfmerge[, taxa.names[i]] > 0, cutf, mean)

# Now generate the plot, # Plot binned observational data as symbols.
plot(cutm, vals, xlab = "Temperature",
ylab = "Probability of occurrence", ylim = c(0,1),
main = taxa.names[i])

# Plot mean fit as a solid line.
lines(dfmerge$temp[iord], mean.resp[iord])

# Plot confidence limits as dotted lines.
lines(dfmerge$temp[iord], up.bound[iord], lty = 2)
lines(dfmerge$temp[iord], low.bound[iord], lty = 2)
}

R Script: Parametric Regression

Causal Analysis/Diagnosis Decision Information System (CADDIS)