# Set environment ---------------------------------------------------------
rm(list=ls())
setwd("/home/antonio/MEGA/Lavoro_sync/Didattica/2020_2021/GLMs/")

# Case study 1: Data ------------------------------------------------------
load(file = "data/Ohio.RData")
str(Ohio); head(Ohio)
# The dataset refers to a subset of the six-city study (n=2148), a longitudinal study of the health effects of
# air pollution on children. The variables are as follows:
# resp: an indicator of wheeze status (1=yes, 0=no)
# id: a numeric vector for subject id
# age: a (rescaled) numeric vector of age: {-2,-1,0,1,2} <-> {7,8,9,10} (0 corresponds to 9 years)
# smoke: an indicator of maternal smoking at the first year of the study

Ohio$resp = factor(x = Ohio$resp,levels = c(0,1),labels = c("no wheeze","wheeze"))
Ohio$smoke = factor(x = Ohio$smoke,levels = c(0,1),labels = c("no smoke","smoke"))

# First exploratory analyses
p_tab = prop.table(table(Ohio$resp,Ohio$age,Ohio$smoke),margin = c(2,3))
print(p_tab)

p_tab[1,,] #proportions by fixing the first margin (no wheeze)
p_tab[2,,] #proportions by fixing the first margin (wheeze)

# plot proportions of nosmoke/smoke for subjects showing wheeze as a function of age
agex = 7:10
plot(agex,p_tab[2,,][,1],bty="n",type="b",pch=1,lty=1,ylim=c(0.10,0.25),xlab="age",ylab="prop wheeze") #no smoke
points(agex,p_tab[2,,][,2],bty="n",type="b",pch=2,lty=2) #no smoke
legend("topright",legend=c("no smoke","smoke"),bty="n",pch=c(1,2),lty=c(1,2))
# The proportion of wheeze is higher in the group with smoke=1 (regardless of age), with higher proportions at age 8.


# Case study 1: Models ----------------------------------------------------
Ohio$smoke = relevel(Ohio$smoke,ref = "no smoke")
Ohio$resp = relevel(Ohio$resp,ref = "no wheeze")

# We may try defining and fitting a glm for repeated measures.
mod1 = lme4::glmer(data = Ohio,formula = resp~age+smoke+(1|id),family=binomial)
mod2 = lme4::glmer(data = Ohio,formula = resp~age*smoke+(1|id),family=binomial)
anova(mod1,mod2)

# ICC coefficient: 
# the proportion of the variance explained by the grouping structure in the population. 
# This index goes from 0, if the grouping conveys no information, to 1, if all observations in a group are identical.
# It is calculated by dividing the random effect variance by the total variance, i.e. the sum of the random effect variance and the residual variance.
performance::icc(model = mod1)

# Plot of the model
plot(effects::allEffects(mod1))

summary(mod1)
# For a fixed subject, wheeze decreases as a function of age and the proportion does not significantly differ between smoke and no-smoke groups.





