###########################################################################
### PSQ4106864 DATAVIS
### A.Y. 2024/2025
### prof. Antonio Calcagni' (antonio.calcagni@unipd.it)
###########################################################################


### CONTENTS ###########################################
## (A) Plotting in a nutshell
## (B) A step-by-step approach
########################################################


# Set environment ---------------------------------------------------------
rm(list=ls())
setwd("/home/antonio/MEGA/Lavoro_sync/Didattica/2024_2025/datavis/")

## Data import
datax <- read.csv(file <- "datasets/data_anxiety.csv",header <- TRUE,sep <- ",",) 
str(datax)



# (A) Plotting in a nutshell ----------------------------------------------

x11() #it opens a new window where the plot is projected onto (for macOS users: quartz() should be instead used)
plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income") 
#bty="n" remove the external frame
#xlab/ylab allow for changing the axes labels

plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=20)
#pch=20 filled dots

plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=2)
#pch=2 triangles

## See the Moodle file 'r_pch.pdf' for more information about the pch parameters.

plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=2,cex=3,col="red")
#cex magnifies the plot symbols whereas col changes their color

## Colors can be specified also by means the HEX code (see: https://r-charts.com/colors/)
plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=2,cex=3,col="#458B74")

plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=20,cex=1.252,col="#458B74",type = "b")
#type="b" print out points (in this case filled points) that are connected by lines

plot(x = datax$age,y = datax$income,bty="n",xlab="age",ylab="income",pch=20,cex=1.252,col="#458B74",type = "l")
#type="l" masks points and print out only lines



# (B) A step-by-step approach ---------------------------------------------

## Suppose we are interesting in creating a scatter plot for the variable 'anxiety_score' as a function of the variable 'gender'.
## In this case, the (basic) plot should contain the units (x-axis) as a function of the variable being studied (y-axis).
## Before constructing the plot, it is needed to set out the relevant statistics that will be used in the next lines:
summary(datax$anxiety_score)
unique(datax$gender)

ymin <- min(datax$anxiety_score) #min y-axis
ymax <- max(datax$anxiety_score) #max y-axis
x <- 1:nrow(datax) #variable on the x-axis
y <- datax$anxiety_score #y variable
iid_male <- datax$gender=="Male" #indicator variable for the first group
iid_female <- datax$gender=="Female" #indicator variable for the first group
col_male <- "#A52A2A" #color for the first group 
col_female <- "#66CD00" #color for the second group

x11()
plot(0,0,xlim=c(min(x),max(x)),ylim=c(ymin,ymax),bty="n",xlab="stat units",ylab="anxiety") #empty plot set for the current case
points(x[iid_male],y[iid_male],col=col_male,pch=20,cex=1.25) #points() works only if plot() has been called before
points(x[iid_female],y[iid_female],col=col_female,pch=20,cex=1.25)
title(main = "Anxiety",cex.main=1.35,line=0,adj=0.25) #add the title
#line moves the text along the y-axis (it can be -1,0,1)
#adj moves the text along the x-axis, it is a number btw 0 (left) and 1 (right)

# ..the same as before but now using lines:
x11() 
plot(0,0,xlim=c(min(x),max(x)),ylim=c(ymin,ymax),bty="n",xlab="stat units",ylab="anxiety") #empty plot set for the current case
lines(x[iid_male],y[iid_male],col=col_male,pch=20,cex=1.25) #points() works only if plot() has been called before
lines(x[iid_female],y[iid_female],col=col_female,pch=20,cex=1.25)
title(main = "Anxiety",cex.main=1.35,line=0,adj=0.25) #add the title

# ..the same as before but now using dots and lines:
x11() 
plot(0,0,xlim=c(min(x),max(x)),ylim=c(ymin,ymax),bty="n",xlab="stat units",ylab="anxiety") #empty plot set for the current case
points(x[iid_male],y[iid_male],col=col_male,pch=20,cex=1.25,type="b") #points() works only if plot() has been called before
points(x[iid_female],y[iid_female],col=col_female,pch=20,cex=1.25,type="b")
title(main = "Anxiety",cex.main=1.35,line=0,adj=0.25) #add the title

# ..adding extra elements (eg, average line for the y variable or median value of x)
abline(h = mean(y),col="black",lty=2) #lty=2  dashed line
abline(v = median(x),col="black",lty=1) #lty=1  filled line

# ..manually add axes
x11() 
plot(0,0,xlim=c(min(x),max(x)),ylim=c(ymin,ymax),bty="n",xlab="stat units",ylab="anxiety",axes=FALSE) #axes=FALSE removes axes
points(x[iid_male],y[iid_male],col=col_male,pch=20,cex=1.25,type="b") #points() works only if plot() has been called before
points(x[iid_female],y[iid_female],col=col_female,pch=20,cex=1.25,type="b")
title(main = "Anxiety",cex.main=1.35,line=0,adj=0) #add the title

axis(side = 1,at = c(x[1],x[100],x[200]),labels = c(x[1],x[100],x[200])) #adding to the x-axis three levels only (manually)

yyd <- round(seq(from=min(y),to=max(y),length=10),2)
axis(side = 2,at = yyd,labels = yyd) #adding to the x-axis three levels only (randomly)

# ..add some floating text
text(x=100,y=max(y),label="here I am",cex=2,col="#8B4513") #the coordinates (x,y) are on the current x and y scales

lbl <- paste0("Avg=",round(mean(y),3))
text(x=100,y=min(y),label=lbl,cex=1.5,col="#8B4513") 
  
## Similarly to other functions like points(), lines(), title(), other graphical parameters are also available for axis().
## See the file 'r_par.pdf' on the Moodle page of the course.








