修改了结果,并提交了r文件

This commit is contained in:
qiangge 2017-01-23 08:55:39 +08:00
parent c71365d1ed
commit 19540467ee
5 changed files with 229 additions and 1 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ IEEE.pdf
!latex/acmcopyright.sty
!latex/sigproc.bib
!latex/sig-alternate-05-2015.cls
!*.r

View File

@ -56,6 +56,9 @@ where \textit{average F-measure} as $f_{avg}$, \textit{F-measure} of bug (nonbug
SVM vs. NB & 0.133 & 0.021 & 0.286 & -8.58631110 & 0.000000e+00 \\
SVM vs. LR & 0.326 & 0.077 & 0.219 & -4.20208441 & 2.366319e-04 \\
SVM vs. RF & 0.317 & 0.224 & 0.428 & -4.44821666 & 1.250544e-04 \\
\bottomrule
Our approach vs. 1st Baseline & 0.429 & 0.338 & 0.524 & -1.7422153 & 0.0188184 \\
Our approach vs. 2nd Baseline & 0.435 & 0.344 & 0.530 & -1.5946872 & 0.0248742 \\
\bottomrule
\end{tabular}
\end{table*}
@ -193,7 +196,7 @@ and some key words may be enough to build an effective classification model.
$log(confuse\_count + 0.5)$ & -1.83346 &***& 134.623 &*** \\
$log(med\_word\_count)$ & 0.12505 & & 0.067 & \\ \hline
marginal R-squared & \multicolumn{4}{c}{XXX} \\
marginal R-squared & \multicolumn{4}{c}{0.6798150} \\
conditional R-squared & \multicolumn{4}{c}{0.9251896} \\ \hline
\multicolumn{5}{l}{signif.: $p<0.001$ `***', $p<0.01$ `**', $p<0.05$ `*'} \\

View File

@ -0,0 +1,57 @@
result <- read.csv("E:/work/paper/new/lunwen/reference/result.csv")
boxplot(result)
boxplot(result$mean_svm_performance, result$lg_performance, result$lg_performance_noconfuse)
summary(result$mean_svm_performance)
result_1 = subset(result, mean_svm_performance <= 0.7596)
boxplot(result_1$mean_svm_performance, result_1$lg_performance, result_1$lg_performance_noconfuse)
summary(result_2$mean_svm_performance)
result_2 = subset(result, mean_svm_performance < 0.8)
boxplot(result_2$mean_svm_performance, result_2$lg_performance, result_2$lg_performance_noconfuse)
result <- read.csv("E:/work/paper/new/lunwen/reference/result_final.csv")
summary(result$svm)
boxplot(result$svm, result$di, result$pi)
result_1 = subset(result, svm <= 0.7596)
boxplot(result_1$svm, result_1$di, result_1$pi)
result_2 = subset(result, svm < 0.7922)
boxplot(result_2$svm, result_2$di, result_2$pi)
summary(result_1$svm)
summary(result_1$di)
summary(result_1$pi)
summary(result_2$svm)
summary(result_2$di)
summary(result_2$pi)
library(ggplot2)
counts = table(Arthritis$Improved, Arthritis$Treatment)
method = c("1st baseline","2nd baseline","our approach","1st baseline","2nd baseline","our approach")
case = c("case 1","case 1","case 1","case 2","case 2","case 2")
data = c(0.7378,0.7394,0.7555,0.7568,0.7584,0.7705)
final_result = data.frame(method,case,data)
p = ggplot(final_result, mapping = aes(x = case, y = data, fill = method)) +
geom_bar(position = "dodge", stat = "identity") +
ylab("performance") + xlab("") +
theme_set(theme_bw()) +
theme(panel.grid.major=element_line(colour=NA))
p
p + coord_cartesian(ylim=c(0.7,0.8))

49
r scrip/fq.R Normal file
View File

@ -0,0 +1,49 @@
library(MASS)
library(car)
library(lme4)
library(MuMIn)
library(lmerTest)
library(scales)
logit<-function(x) log(x/(1-x))
summary(r2)
hist(log(r2$commits), breaks=100)
hist(logit(r2$svm_accuracy-0.00001), breaks=100)
hist(logit(r2$contributors-0.00001), breaks=100)
hist(log(r2$commits), breaks=100)
r2$proj_id = as.factor(r2$proj_id)
mix_svm = lmer(logit(svm_accuracy-0.00001)~
# +log(commits)
+log(star+watch)
+log(post_num)
+log(contributors)
+log(age+0.5)
+log(commits)
+log(confuse_count+0.5)
+log(med_word_count)
+ (1|proj_id)
, data = r2
)
vif.lme(mix_svm)
summary(mix_svm)
anova(mix_svm)
r.squaredGLMM(mix_svm)
#--------------------------------
vif.lme <- function (fit) {
## adapted from rms::vif
v <- vcov(fit)
nam <- names(fixef(fit))
## exclude intercepts
ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
if (ns > 0) {
v <- v[-(1:ns), -(1:ns), drop = FALSE]
nam <- nam[-(1:ns)] }
d <- diag(v)^0.5
v <- diag(solve(v/(d %o% d)))
names(v) <- nam
v
}
vif.lme(lt)

118
r scrip/result.r Normal file
View File

@ -0,0 +1,118 @@
result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
wilcox.test(result$mean_svm_performance,result$mean_rf_performance)
wilcox.test(result$mean_svm_performance,result$mean_nb_performance)
wilcox.test(result$mean_svm_performance,result$mean_lg_performance)
wilcox.test(result$mean_svm_performance,result$baseline)
wilcox.test(result$mean_nb_performance,result$baseline)
wilcox.test(result$mean_lg_performance,result$baseline)
wilcox.test(result$mean_rf_performance,result$baseline)
wilcox.test(result$mean_svm_performance,result$lg_performance)
boxplot(result$mean_svm_performance,result$lg_performance)
boxplot(result$baseline,result$mean_nb_performance,result$mean_lg_performance,result$mean_rf_performance,result$mean_svm_performance,
names = c("Base Line","NB","LR","RF","SVM"),ylab="performance")
summary(result$mean_svm_performance-result$mean_rf_performance)
result <- read.csv("C:/Users/qiangge/Desktop/improve_result.csv")
result <- read.csv("C:/Users/qiangge/Desktop/3_result.csv")
result <- read.csv("C:/Users/qiangge/Desktop/false.csv")
summary(result$lg_performance-result$mean_svm_performance)
summary(result$lg_performance_noconfuse-result$mean_svm_performance)
summary(result$lg_performance-result$lg_performance_noconfuse)
wilcox.test(result$mean_svm_performance,result$lg_performance)
wilcox.test(result$lg_performance_noconfuse,result$lg_performance)
wilcox.test(result$mean_svm_performance,result$lg_performance_noconfuse)
boxplot(result$lg_performance_noconfuse,result$lg_performance)
table(result$mean_svm_performance)
result = subset(result, svm < 0.8)
boxplot(result$mean_svm_performance,result$lg_performance_noconfuse,result$lg_performance,
names = c("SVM","DI+SVM","MI+DI+SVM"),ylab="performance")
boxplot(result$mean_svm_performance,result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
summary(result$mean_svm_performance)
summary(result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
result <- read.csv("C:/Users/qiangge/Desktop/0.75.csv")
result <- read.csv("C:/Users/qiangge/Desktop/0.8.csv")
result <- read.csv("C:/Users/qiangge/Desktop/result_final.csv")
result = subset(result, svm < 0.75)
boxplot(result$svm,result$di,result$pi,
names = c("SVM","DI+SVM","PI+DI+SVM"),ylab="performance")
summary(result$di-result$svm)
summary(result$pi-result$svm)
summary(result$pi-result$di)
wilcox.test(result$di,result$svm)
wilcox.test(result$pi,result$svm)
wilcox.test(result$pi,result$di)
result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
result <- result[,c("mean_svm_performance","mean_rf_performance","mean_nb_performance","mean_lg_performance","baseline")]
library(reshape)
result = result_final[,c("pi","svm","di")]
result = melt(result)
library(nparcomp)
npar <- nparcomp(value~variable, data=result, type="Tukey")
summary(npar)
boxplot(result$svm,result$di,result$pi)
library(nparcomp)
data(ChickWeight)
summary(ChickWeight)
npar <- nparcomp(weight~Diet, data=ChickWeight, type="Tukey")
summary(npar)
# compare plot
result = result[order(result$mean_machine),]
fx <- result[,c("proj_id","mean_nb_performance","mean_lg_performance","mean_rf_performance","mean_svm_performance","baseline")]
#View(fx)
library(reshape2)
fx <- melt(fx)
library(ggplot2)
names(fx)[names(fx)=="variable"]="MLA";
npg <- ggplot(data=fx, aes(x=reorder(tolower(proj_id),value), y=value, group=MLA, colour=MLA)) + geom_line()+ geom_point() +
theme_bw()+
theme(axis.text.x = element_text(angle = 90,hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60",linetype = "dashed"))
npg
feature <- read.csv("C:/Users/qiangge/Desktop/feature.csv")
bug <- read.csv("C:/Users/qiangge/Desktop/bug.csv")
boxplot(feature$diff,bug$diff,
names = c("feature","bug"),ylab="timestamp")
summary(feature$diff)
summary(bug$diff)