#######################################################################
## Testing psicologico (PSP6075525)
### Modelli e metodi statistici per la misurazione in psicologia
## A.A. 2023/2024
## prof. Antonio Calcagnì (antonio.calcagni@unipd.it)
#######################################################################


## CONTENUTO DEL CODICE ###############################################
# (A) Modello Parallelo, Tau-Equivalente, Congenerico
# (B) Modello Sovra-ordinato
# (C) Modello Bi-factor
#######################################################################


# Inizializzazione ambiente di lavoro -------------------------------------
rm(list=ls()); graphics.off()
setwd("~/MEGA/Lavoro_sync/Didattica/2023_2024/testing_psicologico/laboratorio/") #change it according to your local path!
library(lavaan); library(semPlot)
source("utilities.R")


# (A) Modello Parallelo, Tau-Equivalente, Congenerico ---------------------
## Fonte: https://ccaudek.github.io/cfa_book/cfa-confronto-tra-modelli.html

## Dati
load("data/brows2015.Rdata")
str(datax) #lista composta da due elementi: cov (matrice di covarianza), n (numerosità campionaria)


## (I) Modello Parallelo
## E[y] = 0 (mu=0 per semplicità)
## Cov[y] = 1*phi*1 + I*tht --oppure: Cov[y] = l0*phi*l0+ I*theta (con l0 parametro reale da stimare diverso da 1)
model_parallel = "auditorymemory =~ l1*x1 + l1*x2 + l1*x3 \n visualmemory   =~ l2*x4 + l2*x5 + l2*x6
x1 ~~ tht1*x1
x2 ~~ tht1*x2
x3 ~~ tht1*x3
x4 ~~ tht2*x4
x5 ~~ tht2*x5
x6 ~~ tht2*x6
"
fit_parallel = cfa(model_parallel, sample.cov = datax$cov,sample.nobs = datax$n,std.lv = TRUE)

## (II) Modello Tau-Equivalente
## E[y] = 0 (mu=0 per semplicità)
## Cov[y] = 1*phi*1 + Theta_delta --oppure: Cov[y] = l0*phi*l0+ I*theta (con l0 parametro reale da stimare diverso da 1)
model_taueq = "auditorymemory =~ l1*x1 + l1*x2 + l1*x3 \n visualmemory   =~ l2*x4 + l2*x5 + l2*x6
x1 ~~ tht1*x1
x2 ~~ tht2*x2
x3 ~~ tht3*x3
x4 ~~ tht4*x4
x5 ~~ tht5*x5
x6 ~~ tht6*x6
" #le righe 50-55 si possono omettere (in questo caso lavaan stima tanti parametri quanti sono gli indicatori osservati)
fit_taueq = cfa(model_taueq, sample.cov = datax$cov,sample.nobs = datax$n,std.lv = TRUE)


## (III) Modello Congenerico
## E[y] = 0 (mu=0 per semplicità)
## Cov[y] = l*phi*l + Theta_delta 
model_cong = "auditorymemory =~ l1*x1 + l2*x2 + l3*x3 \n visualmemory   =~ l4*x4 + l5*x5 + l6*x6
x1 ~~ tht1*x1
x2 ~~ tht2*x2
x3 ~~ tht3*x3
x4 ~~ tht4*x4
x5 ~~ tht5*x5
x6 ~~ tht6*x6
" #le righe 50-55 si possono omettere (in questo caso lavaan stima tanti parametri quanti sono gli indicatori osservati)
fit_cong = cfa(model_cong, sample.cov = datax$cov,sample.nobs = datax$n,std.lv = TRUE)

# Confronto tra modelli
anova(fit_parallel,fit_taueq,fit_cong, test = "chisq")  #modello parallelo si adatta meglio ai dati



# (B) Modello sovra-ordinato ----------------------------------------------
## More info at: https://www.frontiersin.org/articles/10.3389/fpsyg.2020.01357/full#h4
# The higher-order model (Thurstone, 1944) incorporates at least one superordinate (higher-order) factor and a series of subordinate factors upon which specified sub-group of items load.
# The higher-order model estimates two sets of loadings: those showing the relationships between the observed variables and the relevant grouping, or subordinate, factor, plus those showing the relationship between the higher-order factor and each of the subordinate factors. 
# Higher-order models are often used for theory testing (Brown, 2015), and they enable the researcher to explore theoretical understandings of the relationship between a series of sub-tests as distinct from one another, but also united by a common factor, which attempts to explain the scores in the higher-order factor.
# If the loadings between the higher-order and subordinate factors are satisfactorily high, it can be concluded that there is enough commonality between the sub-skills to justify this reporting both sub-scores and an overall score.
# It is important to note that in this model the observed variables act as indicators of the subordinate factors, and therefore, the commonality modeled by the higher-order factor is between the scales already established for each sub-group. This mediating role for the subordinate factors means that the higher-order factor, therefore, represents a “distilled” estimate of general ability rather than a more direct estimate, which accounts for commonalities between all observed variables as per the unidimensional model.

## Dati
# Consideriamo il dataset 'HolzingerSwineford1939' della libreria lavaan
str(HolzingerSwineford1939)

## Modello sovraordinato (o modello CFA di ordine due)
model1 = "visual  =~ x1 + x2 + x3 \n textual =~ x4 + x5 + x6 \n speed   =~ x7 + x8 + x9 \n eta =~ visual + textual + speed"
model1_fit = cfa(model = model1, data = HolzingerSwineford1939)
out1 = lavInspect(object = model1_fit,what = "std")
out1$psi                                            #matrice Phi stimata (Phi=0 per definizione del modello)
out1$beta
summary(model1_fit,standardize=TRUE)                #summary delle matrici stimate dei parametri

fitMeasures(object = model1_fit,fit.measures = c("CFI","RMSEA","AIC","chisq","npar"))
semTools::reliabilityL2(object = model1_fit,secondFactor = "eta") #calcolo dell'indice di reliability per modelli di ordine 2


# (C) Modello Bi-factor ---------------------------------------------------
## More info at: https://www.frontiersin.org/articles/10.3389/fpsyg.2020.01357/full#h4
# The bifactor model (Holzinger and Swineford, 1937), also described as a nested-factor (NF) model (Gustafsson and Åberg-Bengtsson, 2010; Brunner et al., 2012), incorporates a general factor, which loads directly onto all of the observed variables in the model.
# One of the defining features of the bifactor model is that the grouping factors in the model are hypothesized to be orthogonal (uncorrelated) with the general factor.
# The bifactor model does not offer a “simple structure” solution in which each observed variable only loads onto a single factor (Gustafsson and Åberg-Bengtsson, 2010). Observed variables, by design, in this model load onto more than one factor, meaning that the variance explanation is split between (at least) two latencies. Each observed variable in the bifactor model is an indicator of both the general factor and one grouping factor. This means that each observed variable has two loading estimates in the model; the first will show its relationship with the general factor and the second with its allocated grouping factor.
# A key distinguishing feature between the bifactor model and the higher-order model is that the general factor is hypothesized to load directly on each of the observed variables.

## Dati
load(file = "data/databi.Rdata")
str(datax)                                      #il dataframe contiene otto variabili categoriali (al più ordinate)

# Le variabili devono essere convertite in R come fattori ordinati. Per farlo usiamo la sintassi seguente:
datax = as.factor_dataframe(X = datax,type = "all",ordered = TRUE)
str(datax)

#Nota: (i) La funzione as.factor_dataframe() si trova nel file utilities.R
#     (ii) La sintassi appena utilizzata, converte tutte le variabili presenti nel dataset di input. Se invece
#          si volesse convertire solo un sottoinsieme di esse (ad esempio, solo la prima, la terza e la quinta variabile), 
#          si procederà come segue:
#          datax = as.factor_dataframe(X = datax,type = "subset",subset=c(1,3,5),ordered = TRUE)   

# Dopo aver codificato le variabili, possiamo procedere come segue (stats descrittive:
apply(datax,2,function(x)prop.table(table(x)))  #tabella di frequenze delle categs di risposta (in riga) per ciascun item (in colonna)


## Modello a due fattori correlati (baseline di riferimento)
model0 = "eta1 =~ item1+item2+item3+item4 \n eta2 =~ item5+item6+item7+item8"
model0_fit = cfa(model = model0,data = datax,ordered = colnames(datax),estimator="DWLS")

## Modello bifattoriale (eta0 è il fattore generale)
model1 = "eta1 =~ item1+item2+item3+item4 \n eta2 =~ item5+item6+item7+item8 
eta0 =~ item1+item2+item3+item4+item5+item6+item7+item8 
eta0~~0*eta1 \n eta0~~0*eta2 \n 
eta1~~0*eta2
"
model1_fit = cfa(model = model1,data = datax,ordered = colnames(datax),estimator="DWLS")
summary(model1_fit)

# Nota: l'ortogonalità completa può anche essere ottenuta come segue:
model1 = "eta1 =~ item1+item2+item3+item4 \n eta2 =~ item5+item6+item7+item8 
eta0 =~ item1+item2+item3+item4+item5+item6+item7+item8"

model1_fit = cfa(model = model1,data = datax,ordered = colnames(datax),estimator="DWLS", orthogonal=TRUE) #si noti il parametro aggiunto: orthogonal=TRUE

# eliminamo il vincolo di ortogonalità completo per favorire la convergenza dell'algoritmo
model1b = "eta1 =~ item1+item2+item3+item4 \n eta2 =~ item5+item6+item7+item8 
eta0 =~ item1+item2+item3+item4+item5+item6+item7+item8 
eta0~~0*eta1 \n eta0~~0*eta2
"
model1b_fit = cfa(model = model1b,data = datax,ordered = colnames(datax),estimator="DWLS")
summary(model1b_fit)

out = lavInspect(object = model1b_fit,what = "std")
Lambda = out$lambda
Theta_delta = out$theta
Phi = out$psi
res = BifactorIndicesCalculator::bifactorIndices(Lambda = Lambda,Theta = Theta_delta,Phi = Phi,standardized = TRUE)
print(res)

# ECV index: proportion of common variance of the items in each specific factor which is due to the general factor.
# ECV_GS:    ECV of the general factor with respect to a specific factor **
# ECV_SG:    ECV of the specific factor with respect to a general factor
# ECV_SS:    ECV of a specific factor with respect to itself **
# Omega:     the omega reliability estimate for all factors **
# OmegaH:    coefficient omegaH estimates the proportion of variance in total scores that can be attributed to a single general factor **
# H:         construct replicability conceptualized by Hancock and Mueller (2001) which 
#            "represent[s] the correlation between a factor and an optimally-weighted item composite… 
#            high H values (> .80) suggest a well-defined latent variable."




