install and load multiple packages
Copy while (dev.cur() > 1) dev.off()
packages <- c("corrplot", "tidyverse", "ggpubr",
"Hmisc", "parameters", "performance",
"psych", "see", "sjlabelled", "sjmisc", "sjPlot")
installed_packages <- rownames(installed.packages())
for (pkg in packages) {if (!(pkg %in% installed_packages)) {
message(paste("Installing package:", pkg))
install.packages(pkg, dependencies = TRUE)} else {
message(paste("Package already installed:", pkg))}
library(pkg, character.only = TRUE)}
install and load a single package
Copy if (!require("PackageNameHere")) install.packages("PackageNameHere", dependencies = TRUE); library("PackageNameHere")
load GSS
required package(s): "sjlabelled"
Copy temp <- tempfile()
download.file("https://drive.google.com/uc?export=download&id=1mF7gMY4aU9amTgYLSVOyVQaHT_opDUbj",temp, mode = "wb")
unzip(temp, files="OrigData/2022/GSS2022.dta",exdir = "OrigData")
gss <- haven::read_dta("OrigData/OrigData/2022/GSS2022.dta")
key <- as.data.frame(get_label(gss))
frequency table (for categorical variables)
required package(s): "sjmisc"
Copy frq(gss$variable_here, out = "v")
descriptive table (for continuous variables)
required package(s): "sjmisc"
Copy descr(gss$variable_here, out = "v", show = "short")
recoding
required package(s): "sjmisc"
(1) merging values (categorical to categorical)
1.2. recoding (merging values with 2 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2=1 [label1];
3,4=2 [label2]", append = FALSE)
1.3. recoding (merging values with 3 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2,3=1 [label1];
2,4,5=2 [label2];
6,7,8=1 [label3]", append = FALSE)
1.4. recoding (merging values with 4 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2=1 [label1];
3,4=2 [label2];
5,6=3 [label3]", append = FALSE)
1.5. recoding (merging values with 5 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2=1 [label1];
3,4,5=2 [label2];
6=3 [label3];
7,8=4 [label4];
9,10=5 [label3]", append = FALSE)
1.6. recoding (merging values with 6 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2=1 [label1];
3,4,5=2 [label2];
6,7,8=3 [label3];
9,10=4 [label4];
11,12=5 [label5];
13,14,15=6 [label6]", append = FALSE)
1.7. recoding (merging values with 7 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1,2=1 [label1];
3,4,5=2 [label2];
6,7,8=3 [label3];
9,10=4 [label4];
11,12=5 [label5];
13,14,15=6 [label6];
16,17,18=7 [label7]", append = FALSE)
(2) reversing values (categorical to categorical)
2.2. recoding (reversing values with 2 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=2 [label1];
2=1 [label2]", append = FALSE)
2.3. recoding (reversing values with 3 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=3 [label1];
2=2 [label2];
3=1 [label3]", append = FALSE)
2.4. recoding (reversing values with 4 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=4 [label1];
2=3 [label2];
3=2 [label3];
4=1 [label4]", append = FALSE)
2.5. recoding (reversing values with 5 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=5 [label1];
2=4 [label2];
3=3 [label3];
4=2 [label4];
5=1 [label5]", append = FALSE)
2.6. recoding (reversing values with 6 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=6 [label1];
2=5 [label2];
3=4 [label3];
4=3 [label4];
5=2 [label5];
6=1 [label6]", append = FALSE)
2.7. recoding (reversing values with 7 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1=7 [label1];
2=6 [label2];
3=5 [label3];
4=4 [label4];
5=3 [label5];
6=2 [label6];
7=1 [label7]", append = FALSE)
(3) transforming continuous variables into groups (continuous to categorical)
3.2. recoding (transforming continuous variables into groups with 2 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"0:40=1 [label1];
41:100=2 [label2]", append = FALSE)
3.3. recoding (transforming continuous variables into groups with 3 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1:10=1 [label1];
11:20=2 [label2];
21:100=3 [label3]", append = FALSE)
3.4. recoding (transforming continuous variables into groups with 4 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"0:10=1 [label1];
11:20=2 [label2];
21:40=3 [label3];
41:100=4 [label4]", append = FALSE)
3.5. recoding (transforming continuous variables into groups with 5 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1:10=1 [label1];
11:20=2 [label2];
21:30=3 [label3];
31:40=4 [label4];
41:100=5 [label5]", append = FALSE)
3.6. recoding (transforming continuous variables into groups with 6 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1:10=1 [label1];
11:20=2 [label2];
21:30=3 [label3];
31:40=4 [label4];
41:50=5 [label5];
51:100=6 [label6]", append = FALSE)
3.7. recoding (transforming continuous variables into groups with 7 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1:10=1 [label1];
11:20=2 [label2];
21:30=3 [label3];
31:40=4 [label4];
41:50=5 [label5];
51:60=6 [label6];
61:100=7 [label7]", append = FALSE)
3.8. recoding (transforming continuous variables into groups with 8 values)
Copy gss$new_variable_here <- rec(gss$original_variable_here, rec =
"1:10=1 [label1];
11:20=2 [label2];
21:30=3 [label3];
31:40=4 [label4];
41:50=5 [label5];
51:60=6 [label6];
61:70=7 [label7];
71:100=8 [label8]", append = FALSE)
computing
required package(s): "tidyverse"
computing 1
Copy gss <- gss %>%
rowwise() %>%
mutate (new_variable_here = mean (c(variable_1_here, variable_2_here, variable_3_here)))
computing 2 (with recoding) - sample 1
Copy frq(gss$happy, out = "v")
gss$happynew <- rec(gss$happy, rec =
"1=3 [very happy];
2=2 [pretty happy];
3=1 [not too happy]", append = FALSE)
frq(gss$life, out = "v")
gss$lifenew <- rec(gss$life, rec =
"1=3 [exciting];
2=2 [routine];
3=1 [dull]", append = FALSE)
frq(gss$satfin, out = "v")
gss$satfinnew <- rec(gss$satfin, rec =
"1=3 [satisfied];
2=2 [more or less];
3=1 [not at all]", append = FALSE)
gss <- gss %>%
rowwise() %>%
mutate (hapindex = mean (c(happynew,lifenew,satfinnew)))
computing 3 (with recoding) - sample 2
Copy frq(gss$socrel, out = "v")
gss$socrelnew <- rec(gss$socrel, rec =
"1=7 [almost daily];
2=6 [once or twice a week];
3=5 [several times a month];
4=4 [about once a month];
5=3 [several times a year];
6=2 [about once a year];
7=1 [never]", append = FALSE)
frq(gss$socommun, out = "v")
gss$socommunnew <- rec(gss$socommun, rec =
"1=7 [almost daily];
2=6 [once or twice a week];
3=5 [several times a month];
4=4 [about once a month];
5=3 [several times a year];
6=2 [about once a year];
7=1 [never]", append = FALSE)
frq(gss$socfrend, out = "v")
gss$socfrendnew <- rec(gss$socfrend, rec =
"1=7 [almost daily];
2=6 [once or twice a week];
3=5 [several times a month];
4=4 [about once a month];
5=3 [several times a year];
6=2 [about once a year];
7=1 [never]", append = FALSE)
frq(gss$socbar, out = "v")
gss$socbarnew <- rec(gss$socbar, rec =
"1=7 [almost daily];
2=6 [once or twice a week];
3=5 [several times a month];
4=4 [about once a month];
5=3 [several times a year];
6=2 [about once a year];
7=1 [never]", append = FALSE)
gss <- gss %>%
rowwise() %>%
mutate (sociallifeindex = mean (c(socrelnew,socommunnew,socfrendnew, socbarnew)))
chi square
required package(s): "sjPlot"
Copy sjt.xtab(gss$independent_variable_here, gss$dependent_variable_here, show.row.prc = TRUE)
sampling: data creation for subsamples
required package(s): "sjPlot"
non-random (last 100 cases)
Copy gsslast100 <- gss[3445:3544,]
# use "gsslast100" dataset instead of "gss" in the codes.
# for example: descr(gsslast100 $variable_here, out = "v", show = "short")
25% simple random sample
Copy gssrandom25per <- gss[sample(1:nrow(gss), 886, replace=FALSE),]
# use "gssrandom25per" dataset instead of "gss" in the codes.
# for example: descr(gssrandom25per $variable_here, out = "v", show = "short")
10% systematic random sample
Copy gss10persystematic = gss[seq(1, nrow(gss), 10),]
# use "gss10persystematic" dataset instead of "gss" in the codes.
# for example: descr(gss10persystematic $variable_here, out = "v", show = "short")
ttest
required package(s): "tidyverse" | "parameters"
Copy t.test(dependent_variable_here ~ independent_variable_here, data = gss) %>%
parameters() %>%
display(format="html")
visualization
bar graph (for categorical variables)
required package(s): "sjPlot"
Copy plot_frq(gss$variable_here, type = "bar", geom.colors = "#336699")
histogram (for continuous variables)
required package(s): "sjPlot"
Copy plot_frq(gss$educ, type = "hist",
geom.colors = "#336699",
normal.curve = TRUE,
normal.curve.color = "#9b2226")
stacked bar graphs for multiple variables
required package(s): "sjPlot" | "tidyverse"
Copy graph <- gss %>%
select (variable_1_here, variable_2_here, variable_3_here, variable_4_here, variable_5_here) %>%
plot_stackfrq(sort.frq = "first.asc", coord.flip = TRUE, geom.colors = "Blues", show.total = FALSE,
title = "type graph title here")
graph + theme(
axis.text.x = element_text(size=14), # change font size of x-axis labels
axis.text.y = element_text(size=14), # change font size of y-axis labels
plot.title=element_text(size=20), # change font size of plot title
legend.text = element_text(size=14)) # change font size of legend
stacked bar graphs for multiple variables (flip coordination)
required package(s): "sjPlot" | "tidyverse"
Copy graph <- gss %>%
select (variable_1_here, variable_2_here, variable_3_here, variable_4_here, variable_5_here) %>%
plot_stackfrq(sort.frq = "first.asc", coord.flip = FALSE, geom.colors = "Blues", show.total = FALSE,
title = "type graph title here")
graph + theme(
axis.text.x = element_text(size=14), # change font size of x-axis labels
axis.text.y = element_text(size=14), # change font size of y-axis labels
plot.title=element_text(size=20), # change font size of plot title
legend.text = element_text(size=14)) # change font size of legend
stacked bar graphs by different groups
required package(s): "sjPlot"
Copy plot_xtab(gss$variable_1_here, gss$variable_2_here, show.total=FALSE, show.n = FALSE)
bar graphs between groups (margin=row)
required package(s): "sjPlot"
Copy plot_xtab(gss$variable_1_here, gss$variable_2_here, show.total=FALSE, show.n = FALSE, margin = "row")
scatterplot with two continuous variables
required package(s): "sjPlot"
Copy plot_scatter(gss, variable_1_here, variable_2_here, jitter = TRUE,
fit.grps = lm, show.ci = TRUE, grid = TRUE,
title = "type graph title here")
scatterplot with two continuous variables by groups
required package(s): "sjPlot"
Copy plot_scatter(gss, variable_1_here, variable_2_here, categoricalvariable_here, jitter = TRUE,
fit.grps = lm, show.ci = TRUE, grid = TRUE,
title = "type graph title here")
correlation analysis
correlation analysis structure
Correlation analysis examines the linear relationship of two continuous variables.
IF the p-value is statistically significant (<0.05);
less than |0.3| … weak correlation
0.3 < | r | < 0.5 … moderate correlation
greater than 0.5 ………. strong correlation
The order of the variables does not matter.
(1) correlation analysis table
required package(s): "sjPlot"
Copy tab_corr (gss[, c("variable_1_here", "variable_2_here")],
wrap.labels = 30, p.numeric = TRUE, triangle="lower", na.deletion = "pairwise")
(2) correlation scatterplot graph
xlab: "what it measures column" of variable 1 (x)
ylab: "what it measures column" of variable 2 (y)
required package(s): "ggpubr"
Copy scatterplot <- ggscatter(gss, x = "variable_1_here", y = "variable_2_here",
add = "loess", conf.int = TRUE, color = "black", point=F,
xlab = "what it measures of variable_1", ylab = "what it measures of variable_2")
scatterplot + stat_cor(p.accuracy = 0.001, r.accuracy = 0.01)
(3) correlation matrix
required package(s): "sjPlot"
Copy tab_corr (gss[, c("variable_1_here", "variable_2_here", "variable_3_here", "variable_4_here", "variable_5_here")],
wrap.labels = 30, p.numeric = TRUE, triangle="lower", na.deletion = "pairwise")
(4) scatterplot matrix
required package(s): "psych"
Copy pairs.panels(gss[, c("variable_1_here", "variable_2_here", "variable_3_here", "variable_4_here", "variable_5_here")],
ellipses=F, scale=F, show.points=F, stars=T, ci=T)
(5) correlogram
required package(s): "corrplot" | "Hmisc"
Copy selectedvariables <- c("variable_1_here", "variable_2_here", "variable_3_here", "variable_4_here", "variable_5_here")
testRes = cor.mtest(gss[, selectedvariables])
gssrcorr = rcorr(as.matrix(gss[, selectedvariables]))
gsscoeff = gssrcorr$r
corrplot(gsscoeff, p.mat = testRes$p, method = 'pie', type = 'lower', insig='blank',
addCoef.col = 'black', order = 'original', diag = FALSE)$corrPos
linear regression
required package(s): "sjPlot"
(1) linear regression with 1 independent variable
Copy model1 <- lm(depvar ~ indepvar1, data = gss)
tab_model(model1, show.std = T, show.ci = F, collapse.se = T, p.style = "stars")
(2) linear regression with 2 independent variables
Copy model2 <- lm(depvar ~ indepvar1 + indepvar2, data = gss)
tab_model(model2, show.std = T, show.ci = F, collapse.se = T, p.style = "stars")
(3) linear regression with 3 independent variables
Copy model3 <- lm(depvar ~ indepvar1 + indepvar2 + indepvar3, data = gss)
tab_model(model3, show.std = T, show.ci = F, collapse.se = T, p.style = "stars")
(4) linear regression with 4 independent variables
Copy model4 <- lm(depvar ~ indepvar1 + indepvar2 + indepvar3 + indepvar4, data = gss)
tab_model(model4, show.std = T, show.ci = F, collapse.se = T, p.style = "stars")
add more independent variables with a plus (+)
logistic regression
(1) logistic regression with 1 independent variable
Copy model1 <- glm(dummydepvar ~ indepvar1, data = gss, family = binomial(link="logit"))
tab_model(model4, show.std = TRUE, show.ci = FALSE, collapse.se = TRUE, p.style = "stars")
(2) logistic regression with 2 independent variables
Copy model2 <- glm(dummydepvar ~ indepvar1 + indepvar2, data = gss, family = binomial(link="logit"))
tab_model(model4, show.std = TRUE, show.ci = FALSE, collapse.se = TRUE, p.style = "stars")
(3) logistic regression with 3 independent variables
Copy model3 <- glm(dummydepvar ~ indepvar1 + indepvar2 + indepvar3, data = gss, family = binomial(link="logit"))
tab_model(model4, show.std = TRUE, show.ci = FALSE, collapse.se = TRUE, p.style = "stars")
(4) logistic regression with 4 independent variables
Copy model4 <- glm(dummydepvar ~ indepvar1 + indepvar2 + indepvar3 + indepvar4, data = gss, family = binomial(link="logit"))
tab_model(model4, show.std = TRUE, show.ci = FALSE, collapse.se = TRUE, p.style = "stars")
add more independent variables with a plus (+)
dummy variables
DUMMY EXAMPLE
First step: Check the frequency distribution of the original variable to see what the values (1, 2, 3, etc.) mean.
Code:
Copy frq(gss$happy, out = "v")
Second step : Create dummy variables for each category.
Codes:
Copy gss$veryhappy <- ifelse(gss$happy == 1, 1, 0)
gss$prettyhappy <- ifelse(gss$happy == 2, 1, 0)
gss$nottoohappy <- ifelse(gss$happy == 3, 1, 0)
Third step: Do not include (omit) one of the dummy variables in your model. The omitted dummy variable is called “comparison category” and should be used in interpretation as well.
required package(s): no package needed
dummy variable: categorical (binary)
Copy gss$var1newname <- ifelse(gss$var1 == 1, 1, 0)
gss$var2newname <- ifelse(gss$var1 == 2, 1, 0)
dummy variable: nominal/ordinal 1
Copy gss$var1newname <- ifelse(gss$var1 == 1, 1, 0)
gss$var2newname <- ifelse(gss$var1 == 2, 1, 0)
gss$var3newname <- ifelse(gss$var1 == 3, 1, 0)
dummy variable: nominal/ordinal 1 (merging categories)
Copy gss$var1newname <- ifelse(gss$var1 == 1, 1, 0)
gss$var2newname <- ifelse(gss$var1 == 2 | gss$var1 == 3 | gss$var1 == 4, 1, 0)
gss$var3newname <- ifelse(gss$var1 == 5, 1, 0)
dummy variable: nominal/ordinal 2 (merging categories)
Copy gss$var1newname <- ifelse(gss$var1 == 1 | gss$var1 == 2, 1, 0)
gss$var1newname <- ifelse(gss$var1 == 3 | gss$var1 == 4, 1, 0)
gss$var1newname <- ifelse(gss$var1 == 5 | gss$var1 == 6 | gss$var1 == 7, 1, 0)
gss$var1newname <- ifelse(gss$var1 == 8 | gss$var1 == 9 | gss$var1 == 10 | gss$var1 == 11, 1, 0)
gss$var1newname <- ifelse(gss$var1 == 12 | gss$var1 == 13 | gss$var1 == 14 | gss$var1 == 15 | gss$var1 == 16, 1, 0)
dummy variable: continuous
Copy gss$var1newname <- ifelse(gss$var1 <= 10, 1, 0)
gss$var2newname <- ifelse(gss$var1 >= 11 & gss$var1 <= 15, 1, 0)
gss$var3newname <- ifelse(gss$var1 >= 16, 1, 0)
scientific notations (e.g., 2e-16)
Copy options(digits=5, scipen=15)
mean centering
Copy gss$age_cent <-scale(gss$age, center = TRUE, scale = FALSE)
delete the environment
remove categories from a variable
Copy gss <- gss[gss$marital != 2 & gss$marital != 3,]
remove label
Copy gss$educ <- remove_all_labels(gss$educ)
rename variables
Copy gss <- newvariablename(gss, agenew = "age_groups")
change the variable from continuous to categorical
Copy gss$var1 <- to_factor(gss$var1)
change the variable from categorical to continuous
Copy gss$var1 <- as.numeric(as.character(gss$var1))
show codebook
Copy view_df(gss, show.frq = TRUE, show.prc = TRUE, use.viewer=FALSE, show.na = TRUE, max.len = 300)
remove packages
Copy remove.packages ("package_you_want_to_remove")
assigning labels
Copy gss$sex<- set_labels(gss$sex,
labels = c("male" = 1,
"female" = 2))