修改了结果,并提交了r文件
This commit is contained in:
parent
c71365d1ed
commit
19540467ee
|
@ -6,3 +6,4 @@ IEEE.pdf
|
|||
!latex/acmcopyright.sty
|
||||
!latex/sigproc.bib
|
||||
!latex/sig-alternate-05-2015.cls
|
||||
!*.r
|
||||
|
|
|
@ -56,6 +56,9 @@ where \textit{average F-measure} as $f_{avg}$, \textit{F-measure} of bug (nonbug
|
|||
SVM vs. NB & 0.133 & 0.021 & 0.286 & -8.58631110 & 0.000000e+00 \\
|
||||
SVM vs. LR & 0.326 & 0.077 & 0.219 & -4.20208441 & 2.366319e-04 \\
|
||||
SVM vs. RF & 0.317 & 0.224 & 0.428 & -4.44821666 & 1.250544e-04 \\
|
||||
\bottomrule
|
||||
Our approach vs. 1st Baseline & 0.429 & 0.338 & 0.524 & -1.7422153 & 0.0188184 \\
|
||||
Our approach vs. 2nd Baseline & 0.435 & 0.344 & 0.530 & -1.5946872 & 0.0248742 \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table*}
|
||||
|
@ -193,7 +196,7 @@ and some key words may be enough to build an effective classification model.
|
|||
$log(confuse\_count + 0.5)$ & -1.83346 &***& 134.623 &*** \\
|
||||
$log(med\_word\_count)$ & 0.12505 & & 0.067 & \\ \hline
|
||||
|
||||
marginal R-squared & \multicolumn{4}{c}{XXX} \\
|
||||
marginal R-squared & \multicolumn{4}{c}{0.6798150} \\
|
||||
conditional R-squared & \multicolumn{4}{c}{0.9251896} \\ \hline
|
||||
|
||||
\multicolumn{5}{l}{signif.: $p<0.001$ `***', $p<0.01$ `**', $p<0.05$ `*'} \\
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
result <- read.csv("E:/work/paper/new/lunwen/reference/result.csv")
|
||||
|
||||
boxplot(result)
|
||||
|
||||
boxplot(result$mean_svm_performance, result$lg_performance, result$lg_performance_noconfuse)
|
||||
|
||||
|
||||
summary(result$mean_svm_performance)
|
||||
|
||||
|
||||
result_1 = subset(result, mean_svm_performance <= 0.7596)
|
||||
boxplot(result_1$mean_svm_performance, result_1$lg_performance, result_1$lg_performance_noconfuse)
|
||||
|
||||
summary(result_2$mean_svm_performance)
|
||||
|
||||
result_2 = subset(result, mean_svm_performance < 0.8)
|
||||
boxplot(result_2$mean_svm_performance, result_2$lg_performance, result_2$lg_performance_noconfuse)
|
||||
|
||||
|
||||
result <- read.csv("E:/work/paper/new/lunwen/reference/result_final.csv")
|
||||
summary(result$svm)
|
||||
boxplot(result$svm, result$di, result$pi)
|
||||
|
||||
result_1 = subset(result, svm <= 0.7596)
|
||||
boxplot(result_1$svm, result_1$di, result_1$pi)
|
||||
|
||||
|
||||
result_2 = subset(result, svm < 0.7922)
|
||||
boxplot(result_2$svm, result_2$di, result_2$pi)
|
||||
|
||||
summary(result_1$svm)
|
||||
summary(result_1$di)
|
||||
summary(result_1$pi)
|
||||
|
||||
summary(result_2$svm)
|
||||
summary(result_2$di)
|
||||
summary(result_2$pi)
|
||||
|
||||
|
||||
library(ggplot2)
|
||||
counts = table(Arthritis$Improved, Arthritis$Treatment)
|
||||
|
||||
method = c("1st baseline","2nd baseline","our approach","1st baseline","2nd baseline","our approach")
|
||||
case = c("case 1","case 1","case 1","case 2","case 2","case 2")
|
||||
data = c(0.7378,0.7394,0.7555,0.7568,0.7584,0.7705)
|
||||
final_result = data.frame(method,case,data)
|
||||
|
||||
|
||||
p = ggplot(final_result, mapping = aes(x = case, y = data, fill = method)) +
|
||||
geom_bar(position = "dodge", stat = "identity") +
|
||||
ylab("performance") + xlab("") +
|
||||
theme_set(theme_bw()) +
|
||||
theme(panel.grid.major=element_line(colour=NA))
|
||||
p
|
||||
p + coord_cartesian(ylim=c(0.7,0.8))
|
||||
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
library(MASS)
|
||||
library(car)
|
||||
library(lme4)
|
||||
library(MuMIn)
|
||||
library(lmerTest)
|
||||
library(scales)
|
||||
logit<-function(x) log(x/(1-x))
|
||||
|
||||
summary(r2)
|
||||
hist(log(r2$commits), breaks=100)
|
||||
hist(logit(r2$svm_accuracy-0.00001), breaks=100)
|
||||
hist(logit(r2$contributors-0.00001), breaks=100)
|
||||
hist(log(r2$commits), breaks=100)
|
||||
|
||||
r2$proj_id = as.factor(r2$proj_id)
|
||||
mix_svm = lmer(logit(svm_accuracy-0.00001)~
|
||||
# +log(commits)
|
||||
+log(star+watch)
|
||||
+log(post_num)
|
||||
+log(contributors)
|
||||
+log(age+0.5)
|
||||
+log(commits)
|
||||
+log(confuse_count+0.5)
|
||||
+log(med_word_count)
|
||||
|
||||
+ (1|proj_id)
|
||||
, data = r2
|
||||
)
|
||||
vif.lme(mix_svm)
|
||||
summary(mix_svm)
|
||||
anova(mix_svm)
|
||||
r.squaredGLMM(mix_svm)
|
||||
|
||||
#--------------------------------
|
||||
vif.lme <- function (fit) {
|
||||
## adapted from rms::vif
|
||||
v <- vcov(fit)
|
||||
nam <- names(fixef(fit))
|
||||
## exclude intercepts
|
||||
ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
|
||||
if (ns > 0) {
|
||||
v <- v[-(1:ns), -(1:ns), drop = FALSE]
|
||||
nam <- nam[-(1:ns)] }
|
||||
d <- diag(v)^0.5
|
||||
v <- diag(solve(v/(d %o% d)))
|
||||
names(v) <- nam
|
||||
v
|
||||
}
|
||||
vif.lme(lt)
|
|
@ -0,0 +1,118 @@
|
|||
result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
|
||||
wilcox.test(result$mean_svm_performance,result$mean_rf_performance)
|
||||
wilcox.test(result$mean_svm_performance,result$mean_nb_performance)
|
||||
wilcox.test(result$mean_svm_performance,result$mean_lg_performance)
|
||||
|
||||
wilcox.test(result$mean_svm_performance,result$baseline)
|
||||
|
||||
wilcox.test(result$mean_nb_performance,result$baseline)
|
||||
wilcox.test(result$mean_lg_performance,result$baseline)
|
||||
wilcox.test(result$mean_rf_performance,result$baseline)
|
||||
|
||||
|
||||
wilcox.test(result$mean_svm_performance,result$lg_performance)
|
||||
|
||||
boxplot(result$mean_svm_performance,result$lg_performance)
|
||||
|
||||
boxplot(result$baseline,result$mean_nb_performance,result$mean_lg_performance,result$mean_rf_performance,result$mean_svm_performance,
|
||||
names = c("Base Line","NB","LR","RF","SVM"),ylab="performance")
|
||||
|
||||
summary(result$mean_svm_performance-result$mean_rf_performance)
|
||||
|
||||
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/improve_result.csv")
|
||||
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/3_result.csv")
|
||||
|
||||
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/false.csv")
|
||||
|
||||
summary(result$lg_performance-result$mean_svm_performance)
|
||||
summary(result$lg_performance_noconfuse-result$mean_svm_performance)
|
||||
summary(result$lg_performance-result$lg_performance_noconfuse)
|
||||
|
||||
wilcox.test(result$mean_svm_performance,result$lg_performance)
|
||||
wilcox.test(result$lg_performance_noconfuse,result$lg_performance)
|
||||
wilcox.test(result$mean_svm_performance,result$lg_performance_noconfuse)
|
||||
|
||||
boxplot(result$lg_performance_noconfuse,result$lg_performance)
|
||||
|
||||
table(result$mean_svm_performance)
|
||||
|
||||
result = subset(result, svm < 0.8)
|
||||
|
||||
boxplot(result$mean_svm_performance,result$lg_performance_noconfuse,result$lg_performance,
|
||||
names = c("SVM","DI+SVM","MI+DI+SVM"),ylab="performance")
|
||||
boxplot(result$mean_svm_performance,result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
|
||||
|
||||
summary(result$mean_svm_performance)
|
||||
summary(result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
|
||||
|
||||
|
||||
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/0.75.csv")
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/0.8.csv")
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/result_final.csv")
|
||||
|
||||
result = subset(result, svm < 0.75)
|
||||
boxplot(result$svm,result$di,result$pi,
|
||||
names = c("SVM","DI+SVM","PI+DI+SVM"),ylab="performance")
|
||||
|
||||
summary(result$di-result$svm)
|
||||
summary(result$pi-result$svm)
|
||||
summary(result$pi-result$di)
|
||||
wilcox.test(result$di,result$svm)
|
||||
wilcox.test(result$pi,result$svm)
|
||||
wilcox.test(result$pi,result$di)
|
||||
|
||||
|
||||
result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
|
||||
result <- result[,c("mean_svm_performance","mean_rf_performance","mean_nb_performance","mean_lg_performance","baseline")]
|
||||
library(reshape)
|
||||
result = result_final[,c("pi","svm","di")]
|
||||
result = melt(result)
|
||||
library(nparcomp)
|
||||
npar <- nparcomp(value~variable, data=result, type="Tukey")
|
||||
summary(npar)
|
||||
|
||||
|
||||
boxplot(result$svm,result$di,result$pi)
|
||||
|
||||
|
||||
library(nparcomp)
|
||||
data(ChickWeight)
|
||||
summary(ChickWeight)
|
||||
npar <- nparcomp(weight~Diet, data=ChickWeight, type="Tukey")
|
||||
summary(npar)
|
||||
|
||||
|
||||
|
||||
|
||||
# compare plot
|
||||
|
||||
result = result[order(result$mean_machine),]
|
||||
fx <- result[,c("proj_id","mean_nb_performance","mean_lg_performance","mean_rf_performance","mean_svm_performance","baseline")]
|
||||
#View(fx)
|
||||
library(reshape2)
|
||||
fx <- melt(fx)
|
||||
library(ggplot2)
|
||||
names(fx)[names(fx)=="variable"]="MLA";
|
||||
|
||||
|
||||
npg <- ggplot(data=fx, aes(x=reorder(tolower(proj_id),value), y=value, group=MLA, colour=MLA)) + geom_line()+ geom_point() +
|
||||
theme_bw()+
|
||||
theme(axis.text.x = element_text(angle = 90,hjust = 1),
|
||||
panel.grid.major.x = element_blank(),
|
||||
panel.grid.minor.x = element_blank(),
|
||||
panel.grid.major.y = element_line(colour="grey60",linetype = "dashed"))
|
||||
npg
|
||||
|
||||
|
||||
feature <- read.csv("C:/Users/qiangge/Desktop/feature.csv")
|
||||
bug <- read.csv("C:/Users/qiangge/Desktop/bug.csv")
|
||||
|
||||
boxplot(feature$diff,bug$diff,
|
||||
names = c("feature","bug"),ylab="timestamp")
|
||||
summary(feature$diff)
|
||||
summary(bug$diff)
|
||||
|
Loading…
Reference in New Issue