# Set environment ---------------------------------------------------------
rm(list=ls())
setwd("/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/glms/")
source("utilities.R")


# Data --------------------------------------------------------------------
## Consider the data stored in 'data1.Rda'. They refer to n=50 participants practicing sports and J=3 variables
## about personality, quality of life, and sport/exercise addiction. In particular:
## 'eai' is the total score from Exercise Addiction Inventory (EAI), a short screening tool consisting of six questions based upon 
## six general components of addiction. In general, a score greater then 24 indicates exercise addiction;
## 'extrav' is the score of the subscale Extraversion from NEO PI-R personality inventory;
## 'social' is the subscale Sociality from Short-Form 36 (SF-36), a questionnaire used to assess health-related quality of life. The subscale codifies the social functioning.
## The goal here is to predict the exercise addiction score (eai) as a function of extraversion and social functioning.
load("data1.Rda")
str(datax)


# Descriptive analyses ----------------------------------------------------
psych::describe(datax)

x11();par(mfrow=c(1,3)); #note: X11() creates a new window for the plots. It works for Linux and Windows systems. For Mac users, it should be replaced by quartz()
hist(datax$eai,xlab = "eai",main=""); hist(datax$extrav,xlab = "extrav",main=""); hist(datax$social,xlab = "social",main="")

x11();par(mfrow=c(1,2))
plot(datax$extrav,datax$eai,main="",xlab="extrav",ylab="eai",bty="n")
plot(datax$social,datax$eai,main="",xlab="social",ylab="eai",bty="n")

#alternatively:
exploratory_plots(y = datax$eai,X = datax[,-1])


# Normal linear model -----------------------------------------------------
## We want to predict 'eai' as a function of 'extrav' and 'social' by means of a Normal linear model. 
## The marginal plots in lines 46-48 suggest that the outcome varies linearly as a function of the predictors.
## Then, a Normal linear model can be adequately defined:
## eai = b0 + extrav*b1 + social*b2 + e
## e ~ N(0,sigma_y)
## eai ~ N(mu,sigmay), with mu = b0 + extrav*b1 + social*b2 
defMod = formula(eai~extrav+social) #additive model
estMod = lm(formula = defMod,data = datax)
summary(estMod) #visualize and compute relevant statistics of the fitted model

# The output can be interpreted as follows:
# Resisuals: statistics for the residuals of the model -- we will see it later
# Coeffs: extrav (b1), social (b2), Intercept (b0)
## Estimate: regression coefficients
## Std. Error: standard errors of regression coefficients
## t value: statistic (in this case: t-statistic) used to make inference on betas under H0: beta_j=0
## Pr(>|t|): probability associated to the t value under H0 (p-value)
## Residual standard error: sigma_y of the Normal linear model
## degrees of freedom: n-J-1 with J being the number of variables (in this case, p=2) -- we will see it later
## F-statistic: statistic associated to the R2 of the model (omnibus test) -- we will see it later

# CIs can be computed using the default confint() function:
confint.lm(estMod,level = 0.95)

## Partial regression plots (see slides 23 and 48, Module B) via the avPlots() function of the 'car' library.
car::avPlots(estMod,main="",id=FALSE)


# Adding a categorical variable -------------------------------------------
datax$social_categ = rep(0,NROW(datax))
datax$social_categ[datax$social>median(datax$social)] = 1 #median split
datax$social_categ = as.factor(datax$social_categ)

#..additively
mod2 = lm(formula = "eai~extrav+social_categ",data = datax)
summary(mod2) #visualize and compute relevant statistics of the fitted model
plot(effects::allEffects(mod2))

#..with an interaction (a.k.a., moderation)
# Note: Sometimes, it is important to mean center both your moderator and your IV to reduce multicolinearity and make interpretation easier. 
# Centering can be done using the scale function, which subtracts the mean of a variable from each value in that variable. See ?scale for further details.
mod3 = lm(formula = "eai~extrav*social_categ",data = datax)
summary(mod3) 
plot(effects::allEffects(mod3))

#alternatively:
rockchalk::plotSlopes(model = mod3,plotx = "extrav",modx = "social_categ")

#..by using the variable social as is
mod4 = lm(formula = "eai~extrav*social",data = datax)
summary(mod4) 
plot(effects::allEffects(mod4))
rockchalk::plotSlopes(model = mod4,plotx = "extrav",modx = "social",modxVals = "quantile") #see also: ..modxVals = "std.dev"




