gss <- gss %>%rowwise()%>%mutate (new_variable_here = mean (c(variable_1_here, variable_2_here, variable_3_here)))
computing 2 (with recoding) - sample 1
frq(gss$happy, out ="v")gss$happynew <-rec(gss$happy, rec ="1=3 [very happy]; 2=2 [pretty happy]; 3=1 [not too happy]", append =FALSE)frq(gss$life, out ="v")gss$lifenew <-rec(gss$life, rec ="1=3 [exciting]; 2=2 [routine]; 3=1 [dull]", append =FALSE)frq(gss$satfin, out ="v")gss$satfinnew <-rec(gss$satfin, rec ="1=3 [satisfied]; 2=2 [more or less]; 3=1 [not at all]", append =FALSE)gss <- gss %>%rowwise()%>%mutate (hapindex = mean (c(happynew,lifenew,satfinnew)))
computing 3 (with recoding) - sample 2
frq(gss$socrel, out ="v")gss$socrelnew <-rec(gss$socrel, rec ="1=7 [almost daily];2=6 [once or twice a week]; 3=5 [several times a month]; 4=4 [about once a month]; 5=3 [several times a year]; 6=2 [about once a year];7=1 [never]", append =FALSE)frq(gss$socommun, out ="v")gss$socommunnew <-rec(gss$socommun, rec ="1=7 [almost daily];2=6 [once or twice a week]; 3=5 [several times a month]; 4=4 [about once a month]; 5=3 [several times a year]; 6=2 [about once a year];7=1 [never]", append =FALSE)frq(gss$socfrend, out ="v")gss$socfrendnew <-rec(gss$socfrend, rec ="1=7 [almost daily];2=6 [once or twice a week]; 3=5 [several times a month]; 4=4 [about once a month]; 5=3 [several times a year]; 6=2 [about once a year];7=1 [never]", append =FALSE)frq(gss$socbar, out ="v")gss$socbarnew <-rec(gss$socbar, rec ="1=7 [almost daily];2=6 [once or twice a week]; 3=5 [several times a month]; 4=4 [about once a month]; 5=3 [several times a year]; 6=2 [about once a year];7=1 [never]", append =FALSE)gss <- gss %>%rowwise()%>%mutate (sociallifeindex = mean (c(socrelnew,socommunnew,socfrendnew, socbarnew)))
gsslast100 <- gss[3445:3544,]# use "gsslast100" dataset instead of "gss" in the codes.# for example: descr($variable_here, out = "v", show = "short")
25% simple random sample
gssrandom25per <- gss[sample(1:nrow(gss), 886, replace=FALSE),]# use "gssrandom25per" dataset instead of "gss" in the codes.# for example: descr($variable_here, out = "v", show = "short")
10% systematic random sample
gss10persystematic = gss[seq(1, nrow(gss), 10),]# use "gss10persystematic" dataset instead of "gss" in the codes.# for example: descr($variable_here, out = "v", show = "short")
ttest
required package(s): "tidyverse" | "parameters"
t.test(dependent_variable_here ~ independent_variable_here, data = gss) %>%parameters()%>%display(format="html")
visualization
bar graph (for categorical variables)
required package(s): "sjPlot"
plot_frq(gss$variable_here, type ="bar", geom.colors ="#336699")
histogram (for continuous variables)
required package(s): "sjPlot"
plot_frq(gss$educ, type ="hist", geom.colors ="#336699", normal.curve =TRUE, normal.curve.color ="#9b2226")
stacked bar graphs for multiple variables
required package(s): "sjPlot" | "tidyverse"
graph <- gss %>%select (variable_1_here, variable_2_here, variable_3_here, variable_4_here, variable_5_here)%>%plot_stackfrq(sort.frq ="first.asc", coord.flip =TRUE, geom.colors ="Blues", show.total =FALSE, title ="type graph title here")graph +theme( axis.text.x =element_text(size=14), # change font size of x-axis labels axis.text.y =element_text(size=14), # change font size of y-axis labels plot.title=element_text(size=20), # change font size of plot title legend.text =element_text(size=14))# change font size of legend
stacked bar graphs for multiple variables (flip coordination)
required package(s): "sjPlot" | "tidyverse"
graph <- gss %>%select (variable_1_here, variable_2_here, variable_3_here, variable_4_here, variable_5_here)%>%plot_stackfrq(sort.frq ="first.asc", coord.flip =FALSE, geom.colors ="Blues", show.total =FALSE, title ="type graph title here")graph +theme( axis.text.x =element_text(size=14), # change font size of x-axis labels axis.text.y =element_text(size=14), # change font size of y-axis labels plot.title=element_text(size=20), # change font size of plot title legend.text =element_text(size=14))# change font size of legend
scatterplot <-ggscatter(gss, x ="variable_1_here", y ="variable_2_here", add ="loess", conf.int =TRUE, color ="black", point=F, xlab ="what it measures of variable_1", ylab ="what it measures of variable_2") scatterplot +stat_cor(p.accuracy =0.001, r.accuracy =0.01)
Third step: Do not include (omit) one of the dummy variables in your model. The omitted dummy variable is called “comparison category” and should be used in interpretation as well.