修改了结果，并提交了r文件

2017-01-23 08:55:39 +08:00 · 2017-01-23 08:55:39 +08:00 · 19540467ee
parent c71365d1ed
commit 19540467ee
5 changed files with 229 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@ IEEE.pdf
 !latex/acmcopyright.sty
 !latex/sigproc.bib
 !latex/sig-alternate-05-2015.cls
+!*.r
--- a/latex/result.tex
+++ b/latex/result.tex
@ -56,6 +56,9 @@ where \textit{average F-measure} as $f_{avg}$, \textit{F-measure} of bug (nonbug
    SVM vs. NB & 0.133 & 0.021 & 0.286 & -8.58631110 & 0.000000e+00 \\
    SVM vs. LR & 0.326 & 0.077 & 0.219 & -4.20208441 & 2.366319e-04 \\
    SVM vs. RF & 0.317 & 0.224 & 0.428 & -4.44821666 & 1.250544e-04 \\
+  \bottomrule
+    Our approach vs. 1st Baseline & 0.429 & 0.338 & 0.524 & -1.7422153 & 0.0188184 \\
+    Our approach vs. 2nd Baseline & 0.435 & 0.344 & 0.530 & -1.5946872 & 0.0248742 \\
  \bottomrule
 \end{tabular}
 \end{table*}
@ -193,7 +196,7 @@ and some key words may be enough to build an effective classification model.
    $log(confuse\_count + 0.5)$ & -1.83346 &***& 134.623 &*** \\
    $log(med\_word\_count)$     & 0.12505  &   & 0.067   &  \\ \hline

-    marginal R-squared          & \multicolumn{4}{c}{XXX}   \\
+    marginal R-squared          & \multicolumn{4}{c}{0.6798150}  \\
    conditional R-squared       & \multicolumn{4}{c}{0.9251896}  \\   \hline

  \multicolumn{5}{l}{signif.: $p<0.001$ `***', $p<0.01$ `**',  $p<0.05$  `*'} \\
--- a/classifier.r
+++ b/classifier.r
@ -0,0 +1,57 @@
+result <- read.csv("E:/work/paper/new/lunwen/reference/result.csv")
+
+boxplot(result)
+
+boxplot(result$mean_svm_performance, result$lg_performance, result$lg_performance_noconfuse)
+
+
+summary(result$mean_svm_performance)
+
+
+result_1 = subset(result, mean_svm_performance <= 0.7596)
+boxplot(result_1$mean_svm_performance, result_1$lg_performance, result_1$lg_performance_noconfuse)
+
+summary(result_2$mean_svm_performance)
+
+result_2 = subset(result, mean_svm_performance < 0.8)
+boxplot(result_2$mean_svm_performance, result_2$lg_performance, result_2$lg_performance_noconfuse)
+
+
+result <- read.csv("E:/work/paper/new/lunwen/reference/result_final.csv")
+summary(result$svm)
+boxplot(result$svm, result$di, result$pi)
+
+result_1 = subset(result, svm <= 0.7596)
+boxplot(result_1$svm, result_1$di, result_1$pi)
+
+
+result_2 = subset(result, svm < 0.7922)
+boxplot(result_2$svm, result_2$di, result_2$pi)
+
+summary(result_1$svm)
+summary(result_1$di)
+summary(result_1$pi)
+
+summary(result_2$svm)
+summary(result_2$di)
+summary(result_2$pi)
+
+
+library(ggplot2)
+counts = table(Arthritis$Improved, Arthritis$Treatment)
+
+method = c("1st baseline","2nd baseline","our approach","1st baseline","2nd baseline","our approach")
+case = c("case 1","case 1","case 1","case 2","case 2","case 2")
+data = c(0.7378,0.7394,0.7555,0.7568,0.7584,0.7705)
+final_result = data.frame(method,case,data)
+
+ 
+p = ggplot(final_result, mapping =  aes(x = case, y = data, fill = method)) +
+  geom_bar(position = "dodge", stat = "identity") +
+  ylab("performance") + xlab("") +
+  theme_set(theme_bw()) +
+  theme(panel.grid.major=element_line(colour=NA))
+p
+p + coord_cartesian(ylim=c(0.7,0.8))
+
+
--- a/scrip/fq.R
+++ b/scrip/fq.R
@ -0,0 +1,49 @@
+library(MASS)
+library(car)
+library(lme4)
+library(MuMIn)
+library(lmerTest)
+library(scales)
+logit<-function(x) log(x/(1-x))
+
+summary(r2)
+hist(log(r2$commits), breaks=100)
+hist(logit(r2$svm_accuracy-0.00001), breaks=100)
+hist(logit(r2$contributors-0.00001), breaks=100)
+hist(log(r2$commits), breaks=100)
+
+r2$proj_id = as.factor(r2$proj_id)
+mix_svm = lmer(logit(svm_accuracy-0.00001)~
+            # +log(commits)
+             +log(star+watch)
+             +log(post_num)
+             +log(contributors)
+             +log(age+0.5)
+             +log(commits)
+             +log(confuse_count+0.5)
+             +log(med_word_count)
+             
+             + (1|proj_id)
+             , data = r2
+)
+vif.lme(mix_svm)
+summary(mix_svm)
+anova(mix_svm)
+r.squaredGLMM(mix_svm)
+
+#--------------------------------
+vif.lme <- function (fit) {
+  ## adapted from rms::vif
+  v <- vcov(fit)
+  nam <- names(fixef(fit))
+  ## exclude intercepts
+  ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
+  if (ns > 0) {
+    v <- v[-(1:ns), -(1:ns), drop = FALSE]
+    nam <- nam[-(1:ns)] }
+  d <- diag(v)^0.5
+  v <- diag(solve(v/(d %o% d)))
+  names(v) <- nam
+  v 
+}
+vif.lme(lt)
--- a/scrip/result.r
+++ b/scrip/result.r
@ -0,0 +1,118 @@
+result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
+wilcox.test(result$mean_svm_performance,result$mean_rf_performance)
+wilcox.test(result$mean_svm_performance,result$mean_nb_performance)
+wilcox.test(result$mean_svm_performance,result$mean_lg_performance)
+
+wilcox.test(result$mean_svm_performance,result$baseline)
+
+wilcox.test(result$mean_nb_performance,result$baseline)
+wilcox.test(result$mean_lg_performance,result$baseline)
+wilcox.test(result$mean_rf_performance,result$baseline)
+
+
+wilcox.test(result$mean_svm_performance,result$lg_performance)
+
+boxplot(result$mean_svm_performance,result$lg_performance)
+
+boxplot(result$baseline,result$mean_nb_performance,result$mean_lg_performance,result$mean_rf_performance,result$mean_svm_performance,
+        names = c("Base Line","NB","LR","RF","SVM"),ylab="performance")
+
+summary(result$mean_svm_performance-result$mean_rf_performance)
+
+
+result <- read.csv("C:/Users/qiangge/Desktop/improve_result.csv")
+
+result <- read.csv("C:/Users/qiangge/Desktop/3_result.csv")
+
+
+result <- read.csv("C:/Users/qiangge/Desktop/false.csv")
+
+summary(result$lg_performance-result$mean_svm_performance)
+summary(result$lg_performance_noconfuse-result$mean_svm_performance)
+summary(result$lg_performance-result$lg_performance_noconfuse)
+
+wilcox.test(result$mean_svm_performance,result$lg_performance)
+wilcox.test(result$lg_performance_noconfuse,result$lg_performance)
+wilcox.test(result$mean_svm_performance,result$lg_performance_noconfuse)
+
+boxplot(result$lg_performance_noconfuse,result$lg_performance)
+
+table(result$mean_svm_performance)
+
+result = subset(result, svm < 0.8)
+
+boxplot(result$mean_svm_performance,result$lg_performance_noconfuse,result$lg_performance,
+        names = c("SVM","DI+SVM","MI+DI+SVM"),ylab="performance")
+boxplot(result$mean_svm_performance,result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
+
+summary(result$mean_svm_performance)
+summary(result$GREATEST.lg_performance.lg_performance_change.lg_performance_noconfuse.lg_performance_perplexity.)
+
+
+
+result <- read.csv("C:/Users/qiangge/Desktop/0.75.csv")
+result <- read.csv("C:/Users/qiangge/Desktop/0.8.csv")
+result <- read.csv("C:/Users/qiangge/Desktop/result_final.csv")
+
+result = subset(result, svm < 0.75)
+boxplot(result$svm,result$di,result$pi,
+        names = c("SVM","DI+SVM","PI+DI+SVM"),ylab="performance")
+
+summary(result$di-result$svm)
+summary(result$pi-result$svm)
+summary(result$pi-result$di)
+wilcox.test(result$di,result$svm)
+wilcox.test(result$pi,result$svm)
+wilcox.test(result$pi,result$di)
+
+
+result <- read.csv("C:/Users/qiangge/Desktop/result.csv")
+result <- result[,c("mean_svm_performance","mean_rf_performance","mean_nb_performance","mean_lg_performance","baseline")]
+library(reshape)
+result = result_final[,c("pi","svm","di")]
+result = melt(result)
+library(nparcomp)
+npar <- nparcomp(value~variable, data=result, type="Tukey")
+summary(npar)
+
+
+boxplot(result$svm,result$di,result$pi)
+
+
+library(nparcomp)
+data(ChickWeight)
+summary(ChickWeight)
+npar <- nparcomp(weight~Diet, data=ChickWeight, type="Tukey")
+summary(npar)
+
+
+
+
+# compare plot
+
+result = result[order(result$mean_machine),]
+fx <- result[,c("proj_id","mean_nb_performance","mean_lg_performance","mean_rf_performance","mean_svm_performance","baseline")]
+#View(fx)
+library(reshape2)
+fx <- melt(fx)
+library(ggplot2)
+names(fx)[names(fx)=="variable"]="MLA";
+
+
+npg <- ggplot(data=fx, aes(x=reorder(tolower(proj_id),value), y=value, group=MLA, colour=MLA)) + geom_line()+ geom_point() +
+  theme_bw()+
+  theme(axis.text.x = element_text(angle = 90,hjust = 1),
+        panel.grid.major.x = element_blank(),
+        panel.grid.minor.x = element_blank(),
+        panel.grid.major.y = element_line(colour="grey60",linetype = "dashed"))
+npg  
+
+
+feature <- read.csv("C:/Users/qiangge/Desktop/feature.csv")
+bug <- read.csv("C:/Users/qiangge/Desktop/bug.csv")
+
+boxplot(feature$diff,bug$diff,
+        names = c("feature","bug"),ylab="timestamp")
+summary(feature$diff)
+summary(bug$diff)
+