Test whether you have understood all the codes below: 

setwd()

getwd()

diabetes 
=c("Type1","Type2","Type1","Type2","Type2","Type2","Type1","Type1","Type2","Type1")

install.packages(tydiverse)

library(dplyr)

detach("package:stats", unload = TRUE)

data("USArrests")

head(USArrests)

summary(USArrests)

sd(USArrests$Murder)

View(USArrests)

table(UCBAdmissions)

str(UCBAdmissions)

levels(gss_cat$rincome)

shapiro.test(USArrests$Murder)

load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")

library(haven)

mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")

mcs$math<-as.factor(mcs$mths)

mcs$math<-recode("1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")

mcs$science=recode(mcs$math,"1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")

gfk_cleaned_eul$birthyear = 2014 - gfk_cleaned_eul$age


gfk_cleaned_eul$birthyear_cat = cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))

gfk_cleaned_eul$birthyear_cat = recode (gfk_cleaned_eul$birthyear_cat,"(-Inf,1945]"= "born in 1945 or before","(1945,1964]"="Boomers","(1965,1984]"="GenX","(1985,1996]"="Millenium","(1997,Inf]"="GenZ")

gfk_excel_version <- read_excel("gfk_excel_version.xls")

gfk_excel_version$hhincome<-na_if(gfk_excel_version$hhincome,"Refused")


gfk_excel_version$hhincome<-factor(gfk_excel_version$hhincome, levels=c("Over 200,000","150,000 - 199,999","100,000 - 149,999","95,000 - 99,999","90,000 - 94,999","85,000 - 89,999","80,000 - 84,999","75,000 - 79,999","70,000 - 74,999","65,000 - 69,999","60,000 - 64,999","55,000 - 59,999","50,000 - 54,999","45,000 - 49,999","40,000 - 44,999","35,000 - 39,999","30,000 - 34,999","25 000 - 29 999","20,000 - 24,999","15,000 - 19,999","10,000 - 14,999","5,000 - 10,000","Under 5,000"))

nlevels(gfk_excel_version$hhincome)

min_birthyear <- min(gfk_cleaned_eul$birthyear)

print(min_birthyear)

View(SSE_students_data)

cuhksz_students_h <- rbind(SME_students_data,SSE_students_data)

mean(cuhksz_students_h$BMI)

Survey_GE_class_choice_2<-Survey_GE_class_choice_2[,-7]

CUHK_employement_1<-merge(CUHKSZ_employment_survey_1,CUHKSZ_employment_survey_1b, by="ID")


new_name <- "CUHKSZ_employment_survey_2"

assign(new_name, CUHKSZ_employment_survey_2)

CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]

column_index_to_change <- which(column_names == "Month_salary_22.x")

column_names[column_index_to_change] <- new_column_name

help(recode)

coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11),labels=c("0_2","2_5","5_7","more_than_7"))

Answer:

# Setting and getting working directory
setwd()
getwd()

# Creating a vector 'diabetes'
diabetes <- c("Type1", "Type2", "Type1", "Type2", "Type2", "Type2", "Type1", "Type1", "Type2", "Type1")  
# function_name: "c"; function: "combine elements into a vector"; format: "new_vector <- c(...)" 

# Installing and loading necessary packages
install.packages("tidyverse")  
# function_name: "install.packages"; function: "install specified packages"; format: "install.packages(...)"
library(dplyr)  
# function_name: "library"; function: "load specified library"; format: "library(...)"
detach("package:stats", unload = TRUE)  
# function_name: "detach"; function: "detach specified package"; format: "detach(..., unload = TRUE)"

# Loading the 'USArrests' dataset and performing basic operations
data("USArrests")  
# function_name: "data"; function: "load specified dataset"; format: "data(...)"
head(USArrests)  
# function_name: "head"; function: "display the first few rows of a dataset"; format: "head(...)"
summary(USArrests)  
# function_name: "summary"; function: "display summary statistics of a dataset"; format: "summary(...)"
sd(USArrests$Murder)  
# function_name: "sd"; function: "calculate standard deviation"; format: "new_variable <- sd(...)"
View(USArrests)  
# function_name: "View"; function: "open a viewer for a dataset"; format: "View(...)"

# Exploring data in 'UCBAdmissions'
table(UCBAdmissions)  
# function_name: "table"; function: "create a table of counts"; format: "table(...)"

# Checking levels in a categorical variable in 'gss_cat'
levels(gss_cat$rincome)  
# function_name: "levels"; function: "get the levels of a factor variable"; format: "levels(...)"

# Conducting a Shapiro-Wilk test on 'Murder' column in 'USArrests'
shapiro.test(USArrests$Murder)  
# function_name: "shapiro.test"; function: "conduct the Shapiro-Wilk test"; format: "shapiro.test(...)"

# Loading and manipulating data from the '2015 Millennium Cohort Study'
load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")  
# function_name: "load"; function: "load specified file or dataset"; format: "load(...)"
library(haven)  
# function_name: "library"; function: "load specified library"; format: "library(...)"
mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")  
# function_name: "read_dta"; function: "read data from a Stata file"; format: "new_dataset <- read_dta(...)"

# Creating a new variable 'birthyear' and categorizing it
gfk_cleaned_eul$birthyear <- 2014 - gfk_cleaned_eul$age  
# function_name: "subtract"; function: "subtract one variable from another"; format: "new_variable <- ... - ..."
gfk_cleaned_eul$birthyear_cat <- cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))  
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"
gfk_cleaned_eul$birthyear_cat <- recode(gfk_cleaned_eul$birthyear_cat, "(-Inf,1945]"="born in 1945 or before", "(1945,1964]"="Boomers", "(1965,1984]"="GenX", "(1985,1996]"="Millenium", "(1997,Inf]"="GenZ")  
# function_name: "recode"; function: "recode levels of a factor variable"; format: "new_variable <- recode(...)"

# Reading an Excel file and processing 'hhincome' variable
gfk_excel_version <- read_excel("gfk_excel_version.xls")  
# function_name: "read_excel"; function: "read data from an Excel file"; format: "new_dataset <- read_excel(...)"
gfk_excel_version$hhincome <- na_if(gfk_excel_version$hhincome, "Refused")  
# function_name: "na_if"; function: "replace specific values with NA"; format: "new_variable <- na_if(...)"
gfk_excel_version$hhincome <- factor(gfk_excel_version$hhincome, levels = c("Over 200,000", "150,000 - 199,999", ...))  
# function_name: "factor"; function: "convert a variable to a factor with specified levels"; format: "new_variable <- factor(...)"

# Merging and cleaning datasets
CUHK_employement_1 <- merge(CUHKSZ_employment_survey_1, CUHKSZ_employment_survey_1b, by="ID")  
# function_name: "merge"; function: "merge datasets by a common variable"; format: "new_dataset <- merge(...)"
new_name <- "CUHKSZ_employment_survey_2"  
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"; Note: This line has a placeholder, and the actual value is not provided.
assign(new_name, CUHKSZ_employment_survey_2)  
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"
CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]  
# function_name: "subset"; function: "remove specified column(s)"; format: "new_dataset <- old_dataset[, -ncol(old_dataset)]"

# Manipulating data in 'coffeenew'
column_index_to_change <- which(column_names == "Month_salary_22.x")  
# function_name: "which"; function: "get the index of elements that satisfy a condition"; format: "new_index <- which(...)"
column_names[column_index_to_change] <- new_column_name  
# function_name: "replacement"; function: "replace specific values"; format: "new_vector <- old_vector; new_vector[index] <- new_value"
help(recode)  # function_name: "help"; function: "display help documentation"; format: "help(...)"
coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11), labels=c("0_2","2_5","5_7","more_than_7"))  
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"

 

Quiz:

  1. Question: What R function is used to set the working directory?

    • a) setdir()
    • b) setwd()
    • c) setworking()
    • d) workdir()
  2. Question: Which function installs specified R packages?

    • a) load.packages()
    • b) install.library()
    • c) install.packages()
    • d) library.install()
  3. Question: What function is used to load a specified library in R?

    • a) load()
    • b) library()
    • c) load.library()
    • d) import.library()
  4. Question: In R, which function is used to calculate the standard deviation of a numeric variable?

    • a) calculate_sd()
    • b) std_dev()
    • c) sd()
    • d) variance()
  5. Question: What function opens a viewer for a dataset in R?

    • a) explore()
    • b) browse()
    • c) view()
    • d) View()
  6. Question: In R, what function create a table of counts for categorical data?

    • a) tabulate()
    • b) table()
    • c) count()
    • d) crosstab()
  7. Question: Which function is used to get the levels of a factor variable in R?

    • a) getlevels()
    • b) factorlevels()
    • c) levels()
    • d) factor_levels()
  8. Question: What R function is used to conduct the Shapiro-Wilk test?

    • a) shapiro()
    • b) wilks.test()
    • c) shapiro.test()
    • d) test.shapiro()
  9. Question: Which function reads data from a Stata file in R?

    • a) read_spss()
    • b) read_stata()
    • c) read_sas()
    • d) read_dta()
  10. Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins?

  • a) bin()
  • b) create_cat()
  • c) cut()
  • d) category()

11. Question: Which function is used to merge datasets by a common variable in R?

  • a) combine()
  • b) merge()
  • c) join()
  • d) concat()

12. Question: What function is used to replace specific values with NA in R?

  • a) replace_na()
  • b) na_replace()
  • c) na_if()
  • d) replace_with_na()

13. Question: In R, which function is used to convert a variable to a factor with specified levels?

  • a) convert_factor()
  • b) to_factor()
  • c) factorize()
  • d) factor()

14. Question: What R function is used to remove specified columns from a dataset?

  • a) remove_cols()
  • b) subset()
  • c) drop_cols()
  • d) exclude()

15. Question: Which function in R is used to get the index of elements that satisfy a condition?

  • a) find()
  • b) locate()
  • c) index()
  • d) which()

16. Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins with labels?

  • a) categorize()
  • b) label_cut()
  • c) create_category()
  • d) cut()

17. Question: Which function in R displays help documentation for a specified function?

  • a) help()
  • b) info()
  • c) documentation()
  • d) assist()

18. Question: What function is used to replace specific values with new values in R?

  • a) replace_values()
  • b) change()
  • c) recode()
  • d) modify()

19. Question: In R, what function is used to create a new categorical variable based on the values of a numeric variable?

  • a) category_from_numeric()
  • b) create_categorical()
  • c) label_numeric()
  • d) cut()

20. Question: Which function in R is used to replace specific values with new values in a dataset?

  • a) replace()
  • b) modify()
  • c) recalculate()
  • d) recode()

Answer for quiz

  1. Answer: b) setwd()

    • Example: setwd("/path/to/your/directory")
  2. Answer: c) install.packages()

    • Example: install.packages("tidyverse")
  3. Answer: b) library()

    • Example: library(dplyr)
  4. Answer: c) sd()

    • Example: standard_deviation <- sd(data$variable)
  5. Answer: d) View()

    • Example: View(data)
  6. Answer: b) table()

    • Example: table(factor_data)
  7. Answer: c) levels()

    • Example: factor_levels <- levels(factor_data)
  8. Answer: c) shapiro.test()

    • Example: shapiro.test(data$numeric_variable)
  9. Answer: d) read_dta()

    • Example: dataset <- read_dta("file.dta")
  10. Answer: c) cut()

    • Example: cut_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100))

  11. Answer: b) merge()

    • Example: merged_data <- merge(data1, data2, by="common_variable")
  12. Answer: c) na_if()

    • Example: data$variable <- na_if(data$variable, "specific_value")
  13. Answer: d) factor()

    • Example: data$variable <- factor(data$variable, levels = c("level1", "level2", "level3"))
  14. Answer: b) subset()

    • Example: new_data <- subset(data, select = -c(column_to_remove))
  15. Answer: d) which()

    • Example: index <- which(data$condition == TRUE)
  16. Answer: d) cut()

    • Example: category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))(注意:按照此代码样例是划分了4个区间,最后一个区间 (75, 100] 将没有与之关联的标签。如果你想要为这个区间指定一个特定的标签,你可以在 labels 参数中加入一个额外的标签)
  17. Answer: a) help()

    • Example: help(function_name)
  18. Answer: c) recode()

    • Example: data$variable <- recode(data$variable, "old_value" = "new_value")
  19. Answer: d) cut()

    • Example: category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))
  20. Answer: a) replace()

    • Example: data$variable <- replace(data$variable, data$condition, new_value)
02-19 18:10