rm(list=ls())

# Data1 -------------------------------------------------------------------
set.seed(210904)
n = 50; b = c(-2.3,1.9,-0.78); sigmay = 1.67
X = mvtnorm::rmvnorm(n,mean = rep(26,2),sigma = diag(2)*2.5)
X = model.matrix(~X)
y = X%*%b + rnorm(n,sd = sigmay)
datax = data.frame(eai=y,extrav=X[,2],social=X[,3])
save(datax,file = "/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/smda/labs/data/data1.Rda")
# eai: Total score from Exercise Addiction Inventory (EAI), a short screening tool consisting of six questions based upon 
# six general components of addiction. In general, a score greater then 24 indicates exercise addiction.
# extrav: Subscale Extraversion from NEO PI-R personality inventory
# social: Subscale Sociality from Short-Form 36 (SF-36), a questionnaire used to assess health-related quality of life. 
# The subscale refers to social functioning.
# Data were inspired by: Lichtenstein, M. B., Christiansen, E., Elklit, A., Bilenberg, N., & Støving, R. K. (2014). Exercise addiction: a study of eating disorder symptoms, quality of life, personality traits and attachment styles. Psychiatry research, 215(2), 410-416.


# Data2 -------------------------------------------------------------------
set.seed(220904)
n = 120; J = 9; b = runif(J+1,-0.15,0.35); sigmay = 2.11
R = clusterGeneration::rcorrmatrix(J,0.5)
X = mvtnorm::rmvnorm(n,mean = rep(5,J),sigma = R*2.5)
X = model.matrix(~round(X))
mu = X%*%b 
y0 = rnorm(n,mu,sigmay)
#y0[y0<0]=0
y = exp(y0/mean(y0))
datax = data.frame(rts=y,novelty_seek=X[,2],harm_avoid=X[,3],reward_dep=X[,4],anger=X[,5],confusion=X[,6],tension=X[,7],fatigue=X[,8],depress=X[,9],activity=X[,10])
save(datax,file = "/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/smda/labs/data/data2.Rda")
# rts: Reaction times as computed using the Multi-Attribute TaskBattery, a set of tasks used in laboratory studies of operator performance and workload.
# The Multi-AttributeTask Battery is sensitive to sleep-deprivation effects, especially the tracking taskand the reaction time measures imbedded within the monitoring subtasks.
# The outcome variable has been recorded during sleep deprivation.
# Personality measures -traits: novelty seeking (novelty_seek), harm avoidance (harm_avoid), reward dependence (reward_dep).
# Personality measures -states: anger, confusion, tension, fatigue, depression, activity.
# Data were inspired by: Carlozzi, N. E., Horner, M. D., Kose, S., Yamanaka, K., Mishory, A., Mu, Q., ... & George, M. S. (2010). Personality and reaction time after sleep deprivation. Current Psychology, 29(1), 24-33.



# Data3 -------------------------------------------------------------------
set.seed(220906)
n = 250; J = 9; b = c(0,runif(4,-1.25,2.35)); sigmay = 2.11
R = clusterGeneration::rcorrmatrix(J,0.5)
X = mvtnorm::rmvnorm(n,mean = rep(5,J),sigma = R*2.5)
x1 = apply(X[,1:3],1,mean); x2 = apply(X[,4:9],1,mean)
x3 = factor(rbinom(n,1,0.5),labels = c("M","F"))
X = model.matrix(~x1+I(x2^3)+x3+x1:x3)
mu = X%*%b
y = rnorm(n,mu,sigmay); y = exp(y/mean(y))
datax = data.frame(rts=y,pers_traits=x1,pers_states=x2,group=x3)
save(datax,file = "/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/smda/labs/data/data3.Rda")
# rts: Reaction times as computed using the Multi-Attribute TaskBattery, a set of tasks used in laboratory studies of operator performance and workload.
# The Multi-AttributeTask Battery is sensitive to sleep-deprivation effects, especially the tracking taskand the reaction time measures imbedded within the monitoring subtasks.
# The outcome variable has been recorded during sleep deprivation.
# Personality measures -traits: mean of novelty seeking (novelty_seek), harm avoidance (harm_avoid), reward dependence (reward_dep).
# Personality measures -states: mean of anger, confusion, tension, fatigue, depression, activity.




# Data4 -------------------------------------------------------------------
set.seed(210907)
n = 100 #no. of stat units
M = 15 #no. of repeated measures (for each stat. unit)
b = c(0.98,-2.1,0.8,1.78)
eta = rnorm(M,0,1.25); e = rnorm(n*M,0,2.65)
x1 = factor(rep(rbinom(n,1,0.5),each=M),labels = c("LD","HD")) 
x2 = runif(n*M,27,102);# x2 = rep(x2,each=M)
X = model.matrix(~x1+x2*x1:x2)
y = X%*%b + rep(eta,each=n) + e
datax = data.frame(sbj=as.character(rep(1:n,each=M)),x1,x2,y); colnames(datax)=c("sbj","group","xenazine","whodas")
save(datax,file = "/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/smda/labs/data/data4.Rda")
# whodas: WHO Disability Assessment Schedule (total score) used  to  provide  a  measure  of  daily  
# functioning  and  disability  inneuropsychiatric   disorders.
# group: Boolean variable indicating patients with low Huntington disease vs. high Huntington disease.
# xenazine: VMAT2 inhibitors dose for each m=1,..,M trial
# sbj: code indicating the repeated measures for each patient.
# Inspired by: Downing, N. R., Kim, J. I., Williams, J. K., Long, J. D., Mills, J. A., & Paulsen, J. S. (2014). WHODAS 2.0 in prodromal Huntington disease: measures of functioning in neuropsychiatric disease. European Journal of Human Genetics, 22(8), 958-963.



# Data5 -------------------------------------------------------------------
set.seed(210910)
n = 100; J = 4; b = c(0,runif(4,-2,2));
R = clusterGeneration::rcorrmatrix(3,0.5)
XJ = mvtnorm::rmvnorm(n,mean = rep(5,3),sigma = R*2.5)
x1 = factor(rbinom(n,1,0.5),labels = c("S","C"))
x2 = rnorm(n,6,2); x3 = runif(n,1,6)
X=model.matrix(~x1+x2+x3+x1:x2)
y=mapply(function(i)rnorm(1,mean = X[i,]%*%b,exp(0.2*x2[i])),1:n)
datax = data.frame(y=y,x1=x1,x2=x2,x3=x3)
#save(datax,file = "/home/antonio/MEGA/Lavoro_sync/Didattica/2021_2022/smda/labs/data/data5.Rda")
summary(lm(y~x1+x2+x3+x1:x2,data=datax))
b

mod0=lm(y~x1+x2+x3+x1:x2,data=datax)
plot(performance::check_heteroscedasticity(mod0))
lmtest::bptest(mod0)
plot(residuals(mod0)~x1)
plot(x2,residuals(mod0))
plot(x3,residuals(mod0))
plot(mod0$qr$qr[,5],residuals(mod0))

estimatr::lm_robust(formula = y~x1+x2+x3+x1:x2,data=datax,se_type = "HC1")

library(nlme)
mod1=nlme::gls(model = y~x1+x2+x3+x1:x2,data=datax,weights = varExp())
mod1b=nlme::gls(model = y~x1+x2+x3+x1:x2,data=datax,weights = varConstProp())
mod1c=nlme::gls(model = y~x1+x2+x3+x1:x2,data=datax,weights = varComb())
plot(mod1)
AIC(mod1,mod1b)

#boxy=caret::BoxCoxTrans(datax$y)
#datax$ynew=predict(boxy, datax$y)
#mod2 = lm(ynew~x1+x2+x3+x1:x2,data=datax)
#plot(performance::check_heteroscedasticity(mod2))

mod2 = lm(formula = y~x1+x2+x3+x1:x2,data=datax,weights=diag(diag(n)*1/var(datax$y)))
mod0
plot(mod2,which=3)
lmtest::bptest(mod2)

# modeling the pattern of non-constant variance: nlme::gls() by specifying varFun() function
# correct standard errors of estimates for robust inference

mod0 = lm()

leaps::leaps(x = datax[,2:4],y = datax$y,int = TRUE,method = "adj")






