A:以下的4个检验都会自动判断数据里面的数据类型,然后选择对应的变量作统计检验;
B:检验结果中有的是捕获了检验警告信息的,帮助选择检验结果数据;
C:所有检验均只返回统计检验结果数据,不判断使用哪种检验结果作为最终需要的数据,有需要这一层判断自行添加逻辑
1、单样本检验
R
#date_base为需要检验变量的数据,虽然单样本检验为连续型变量,
#但这里不用去选择变量的数据类型,函数里面会去判断
one_independent_test <- function(data_base){
data_base %>% select(where(is.double)) %>%
imap(~ {
shapiro_p <- shapiro.test(.x)$p.value
t_or_wil_test=if_else(shapiro.test(.x)$p.value > 0.05,
t.test(.x) %>% tidy(),
wilcox.test(.x) %>% tidy())
bind_cols(tibble(shapiro_p,t_or_wil_test))
}
) %>%
bind_rows( .id = "variable")
}
2、两独立样本检验
R
#date_base为需要检验变量的数据,group_var为分组的变量,
#虽然两独立样本检验为连续型变量,但这里不用去选择变量的数据类型,函数里面会去判断
two_independent_test <- function(data_base,group_var){
group_vec <- data_base %>% pull({{group_var}})
test_var_list=data_base %>% select(where(is.double))
all_test_res=test_var_list %>%
imap(~{
shapiro_res=data_base %>%
group_by({{group_var}}) %>%
summarise(
test_result = list(shapiro.test(!!sym(.y)))) %>%
mutate(
results = map(test_result, tidy)) %>%
unnest(results)
levene_res=leveneTest(.x~group_vec,data = data_base)%>% tidy() %>%
mutate(method="Levene's Test for Homogeneity")
t_res_vareaual=t.test(.x~group_vec,data = data_base,var.equal=TRUE) %>% tidy()
t_res_varuneaual=t.test(.x~group_vec,data = data_base,var.equal=FALSE) %>% tidy()
wilcox_res=wilcox.test(.x~group_vec,data = data_base)%>% tidy()
per_test_res=bind_rows(shapiro_res,
levene_res,
t_res_vareaual,
t_res_varuneaual,
wilcox_res)
}
)%>%
bind_rows(.id = "variable")
}
3、配对样本检验
R
#date_base为需要检验变量的数据,
#配对样本检验为连续型变量,这里不用去选择变量的数据类型,函数里面会去判断,
#但需要配对检验的变量在判断完数据类型后是相邻的,否则得到结果不是预期的
two_paired_test <- function(data_base){
test_var_list=data_base %>% select(where(is.double)) %>% names()
var_pair_former=head(test_var_list,-1)
var_pair_latter=tail(test_var_list,-1)
names(var_pair_former) <- var_pair_former
names(var_pair_latter) <- var_pair_latter
all_test_res=map2(var_pair_former,var_pair_latter,~{
x <- data_base[[.x]]
y <- data_base[[.y]]
z <- x-y
shapiro_res=bind_rows(shapiro.test(x) %>% tidy() %>% mutate(variable_format=.x),
shapiro.test(y) %>% tidy() %>% mutate(variable_format=.y),
shapiro.test(z) %>% tidy() %>% mutate(variable_format=str_c('diff',.x,'-',.y)))
t_paired_res=t.test(x,y,data = data_base,paired=TRUE) %>%
tidy() %>%
mutate(variable_format=paste0(.x,'~',.y))
wilcox_paired_res=wilcox.test(x, y,data = data_base,paired=TRUE)%>%
tidy() %>%
mutate(variable_format=paste0(.x,'~',.y))
per_test_paired_res=bind_rows(
shapiro_res,
t_paired_res,
wilcox_paired_res)
}
)%>%
bind_rows( .id= "variable")
}
4、分类样本检验
R
#date_base为需要检验变量的数据,group_var为分组变量
#分类样本检验为分类变量,这里不用去选择变量的数据类型,函数里面会去判断
multi_category_test <- function(data_base,group_var){
test_var_list=data_base %>% select(where(is.character)) %>%
select(-{{group_var}}) %>% names()
names(test_var_list) <- test_var_list
all_test_res=test_var_list %>%
imap(~{
var_distinct=data_base %>% select(.x) %>% distinct()
var_distinct_mum=data_base %>% select(.x) %>%n_distinct()
data_base_nomiss=if(str_detect(var_distinct,"missing")&var_distinct_mum==2){
data_base %>%
filter(is.na(!!sym(.x))==FALSE)
}else{
data_base %>%
filter(!!sym(.x)!='missing') %>%
filter(is.na(!!sym(.x))==FALSE)
}
chisq_base_table=data_base_nomiss %>%
tabyl(!!sym(.x),{{group_var}})
var_distinct_mum_deal=data_base_nomiss %>% select(.x) %>%n_distinct()
warn_msg <- NA_character_
chisq_not_correct_res=if(var_distinct_mum_deal==1){
return(tibble(
test_tag = NA_character_,
method = NA_character_,
p.value = NA_real_,
statistic = NA_real_,
warning_message = "Only one unique value"))
}else{
withCallingHandlers(
{
test_res <- chisq.test(chisq_base_table, correct = F) %>%
tidy()%>% remove_rownames()
},
warning = function(w) {
warn_msg <<- conditionMessage(w)
invokeRestart("muffleWarning") # 阻止警告打印
}
)}
chisq_not_correct_res$warning_message <- warn_msg
chisq_not_correct=chisq.test(chisq_base_table,correct=F)
chisq_not_correct_observed=chisq_not_correct$observed %>%
mutate(test_tag='observed')%>% select(-.x)
print(chisq_not_correct_observed)
chisq_not_correct_expected=chisq_not_correct$expected %>%
mutate(test_tag='expected')%>% select(-.x)
chisq_correct_res=chisq.test(chisq_base_table,correct=T) %>%
tidy()%>% remove_rownames()
fisher_test_res=fisher.test(chisq_base_table) %>% tidy()%>% remove_rownames()
per_test_res=bind_rows(
chisq_not_correct_res,
chisq_not_correct_observed,
chisq_not_correct_expected,
chisq_correct_res,
fisher_test_res)
}
) %>%
bind_rows(.id = "variable") %>% remove_rownames()
}