Compare commits
No commits in common. "master" and "master" have entirely different histories.
|
@ -1,10 +1,7 @@
|
|||
import jsonlines
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as ticker
|
||||
import json
|
||||
import math
|
||||
import mysql_search
|
||||
|
||||
|
||||
def auto_label(current_bar):
|
||||
|
@ -16,62 +13,33 @@ def auto_label(current_bar):
|
|||
plt.text(current_bar[i].get_x()+current_bar[i].get_width()/2.-0.6, height, '%s' % int(pow(10,height)),color = "b",size=7, rotation=10)
|
||||
|
||||
|
||||
def star_fork():
|
||||
with open("./search_result.txt", 'r') as f:
|
||||
reader = jsonlines.Reader(f)
|
||||
star_num, fork_num = np.zeros(100), np.zeros(100)
|
||||
# star_num, fork_num = {}, {}
|
||||
for item in reader:
|
||||
star_num[item['stars']] += 1
|
||||
fork_num[item['forks']] += 1
|
||||
|
||||
for i in range(1, len(star_num) + 1):
|
||||
if star_num[-i] != 0:
|
||||
print(100 - i)
|
||||
x = np.arange(0, 100, 1)
|
||||
a = plt.bar(x, np.log10(star_num))
|
||||
auto_label(a)
|
||||
plt.xlabel("#stars")
|
||||
plt.ylabel("#repository (log)")
|
||||
plt.title("Distributions on stars of repositories forked from rails/rails")
|
||||
plt.show()
|
||||
with open("./search_result.txt",'r') as f:
|
||||
reader = jsonlines.Reader(f)
|
||||
star_num, fork_num = np.zeros(100), np.zeros(100)
|
||||
# star_num, fork_num = {}, {}
|
||||
for item in reader:
|
||||
star_num[item['stars']] += 1
|
||||
fork_num[item['forks']] += 1
|
||||
|
||||
for i in range(1, len(fork_num) + 1):
|
||||
if fork_num[-i] != 0:
|
||||
print(100 - i)
|
||||
b = plt.bar(x, np.log10(fork_num))
|
||||
auto_label(b)
|
||||
plt.xlabel("#forks")
|
||||
plt.ylabel("#repository (log)")
|
||||
plt.title("Distributions on forks of repositories forked from rails/rails")
|
||||
plt.show()
|
||||
for i in range(1,len(star_num)+1):
|
||||
if star_num[-i] != 0:
|
||||
print(100-i)
|
||||
x = np.arange(0,100,1)
|
||||
a = plt.bar(x, np.log10(star_num))
|
||||
auto_label(a)
|
||||
plt.xlabel("#stars")
|
||||
plt.ylabel("#repository (log)")
|
||||
plt.title("Distributions on stars of repositories forked from rails/rails")
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
for i in range(1,len(fork_num)+1):
|
||||
if fork_num[-i] != 0:
|
||||
print(100-i)
|
||||
b = plt.bar(x, np.log10(fork_num))
|
||||
auto_label(b)
|
||||
plt.xlabel("#forks")
|
||||
plt.ylabel("#repository (log)")
|
||||
plt.title("Distributions on forks of repositories forked from rails/rails")
|
||||
plt.show()
|
||||
|
||||
with open("./pr_fork_com_dev_result.txt", 'r') as f:
|
||||
content = json.load(f)
|
||||
x = mysql_search.create_time_point()
|
||||
for i in range(0,len(x)):
|
||||
x[i] = x[i][:7]
|
||||
|
||||
|
||||
|
||||
plt.plot(x, content["pr"])
|
||||
plt.ylabel("#PR")
|
||||
plt.xticks(rotation=90, fontsize=7)
|
||||
plt.show()
|
||||
|
||||
plt.plot(x, content["commit"])
|
||||
plt.ylabel("#Commit")
|
||||
plt.xticks(rotation=90, fontsize=7)
|
||||
plt.show()
|
||||
|
||||
plt.plot(x, content["fork"])
|
||||
plt.ylabel("#Fork")
|
||||
plt.xticks(rotation=90, fontsize=7)
|
||||
plt.show()
|
||||
|
||||
plt.plot(x, content["developer"])
|
||||
plt.ylabel("#Developer")
|
||||
plt.xticks(rotation=90, fontsize=7)
|
||||
plt.show()
|
165
mysql_search.py
165
mysql_search.py
|
@ -7,151 +7,34 @@ mysql: root / 11111111
|
|||
import pymysql
|
||||
from tqdm import tqdm
|
||||
import jsonlines
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
def mysql_search(sql):
|
||||
mysql_conn = pymysql.connect(host='192.168.8.100', port=3306, user='root', passwd='11111111', db='ghtorrent0619')
|
||||
|
||||
sql = "SELECT * FROM projects WHERE forked_from = 1334"
|
||||
try:
|
||||
with mysql_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
fork_repos = cursor.fetchall()
|
||||
# print(select_result)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
sf_repo = []
|
||||
for i in tqdm(range(0,len(fork_repos)-1)):
|
||||
try:
|
||||
with mysql_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
search_result = cursor.fetchall()
|
||||
repo_id = fork_repos[i][0]
|
||||
cursor.execute("SELECT COUNT(*) FROM watchers WHERE repo_id=%s", repo_id)
|
||||
stars = cursor.fetchone()
|
||||
cursor.execute("SELECT COUNT(*) FROM projects WHERE forked_from=%s", repo_id)
|
||||
forks = cursor.fetchone()
|
||||
sf_repo.append({"repo_id": fork_repos[i][0], "repo_name": fork_repos[i][3], "stars": stars[0], "forks": forks[0]})
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return search_result
|
||||
|
||||
def mysql_time_search(sql_, time_point):
|
||||
result_list = []
|
||||
for end_time in time_point:
|
||||
sql = sql_%end_time
|
||||
result = mysql_search(sql)
|
||||
result_list.append(result[0][0])
|
||||
return result_list
|
||||
with open("./search_result.txt",'w') as f:
|
||||
writer = jsonlines.Writer(f)
|
||||
writer.write_all(sf_repo)
|
||||
writer.close()
|
||||
|
||||
def search_fork():
|
||||
'''查询从rails项目fork出去的所有项目'''
|
||||
fork_repos = mysql_search("SELECT * FROM projects WHERE forked_from = 1334")
|
||||
return fork_repos
|
||||
|
||||
def search_pr_in_time(time_point):
|
||||
'''查询rails项目的所有pr'''
|
||||
sql = "SELECT count(pr.id) " \
|
||||
"from pull_requests pr, pull_request_history prh " \
|
||||
"where pr.id=prh.pull_request_id " \
|
||||
"and pr.base_repo_id = 1334 " \
|
||||
"and prh.created_at>'2008-04-11' " \
|
||||
"and prh.created_at<'%s'"
|
||||
pr_time = mysql_time_search(sql, time_point)
|
||||
return pr_time
|
||||
|
||||
def search_fork_in_time(time_point):
|
||||
'''查询rails项目的所有fork'''
|
||||
sql = "SELECT count(*) FROM projects " \
|
||||
"WHERE forked_from = 1334 " \
|
||||
"and projects.created_at>'2008-4-11' " \
|
||||
"and projects.created_at<'%s'"
|
||||
fork_time = mysql_time_search(sql, time_point)
|
||||
return fork_time
|
||||
|
||||
def search_commit_in_time(time_point):
|
||||
'''查询rails项目的所有commit'''
|
||||
sql = "SELECT count(*) FROM commits " \
|
||||
"WHERE project_id = 1334 " \
|
||||
"and commits.created_at>'2008-04-11' " \
|
||||
"and commits.created_at<'%s'"
|
||||
commit_time = mysql_time_search(sql, time_point)
|
||||
return commit_time
|
||||
|
||||
def search_develop_in_tim(time_point):
|
||||
'''
|
||||
1 查所有的commit
|
||||
2 查该commit的author以及该author第一次提交Commit的时间,视作为加入项目的时间
|
||||
'''
|
||||
|
||||
author_list, participate_list = (), ()
|
||||
commit_list = mysql_search("SELECT commit_id FROM project_commits WHERE project_id = 1334")
|
||||
for commit in tqdm(commit_list):
|
||||
commit_id = commit[0]
|
||||
author_id_t = mysql_search("SELECT author_id FROM commits WHERE id=%s"%commit_id)
|
||||
if author_id_t == ():
|
||||
continue
|
||||
author_id = author_id_t[0]
|
||||
if author_id[0] not in author_list:
|
||||
author_list += author_id
|
||||
participate_time_t = mysql_search("SELECT created_at FROM commits WHERE author_id = %s and project_id = 1334 ORDER BY created_at ASC limit 1"%author_id)
|
||||
if participate_time_t == ():
|
||||
continue
|
||||
participate_time = participate_time_t[0]
|
||||
participate_list += participate_time
|
||||
else:
|
||||
pass
|
||||
|
||||
date_list, frag_developer_time, developer_time = [], [], []
|
||||
for time in time_point:
|
||||
date_list.append(datetime.strptime(time, "%Y-%m-%d"))
|
||||
frag_developer_time.append(0)
|
||||
developer_time.append(0)
|
||||
|
||||
for p_time in participate_list:
|
||||
for i in range(0,len(date_list)-1):
|
||||
if p_time > date_list[i] and p_time < date_list[i+1]:
|
||||
frag_developer_time[i+1] += 1
|
||||
continue
|
||||
|
||||
for i in range(0,len(frag_developer_time)):
|
||||
developer_time[i] = sum(frag_developer_time[:i+1])
|
||||
|
||||
return developer_time
|
||||
|
||||
def search_second_star_fork(fork_repos):
|
||||
'''查询从rails项目中fork出去的每一个项目的star数量和fork数量'''
|
||||
sf_repo = []
|
||||
for i in tqdm(range(0, len(fork_repos) - 1)):
|
||||
repo_id = fork_repos[i][0]
|
||||
stars = mysql_search("SELECT COUNT(*) FROM watchers WHERE repo_id=%s", repo_id)
|
||||
forks = mysql_search("SELECT COUNT(*) FROM projects WHERE forked_from=%s", repo_id)
|
||||
sf_repo.append(
|
||||
{"repo_id": fork_repos[i][0], "repo_name": fork_repos[i][3], "stars": stars[0], "forks": forks[0]})
|
||||
return sf_repo
|
||||
|
||||
def save_jsonline_to_file(file_path, content):
|
||||
with open(file_path, 'w') as f:
|
||||
writer = jsonlines.Writer(f)
|
||||
writer.write_all(content)
|
||||
writer.close()
|
||||
|
||||
def save_json_to_file(file_path, content):
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(content,f)
|
||||
|
||||
|
||||
def create_time_point():
|
||||
time_point = ['2008-04-11','2008-07-01','2008-10-01']
|
||||
for j in range(2009,2019,1):
|
||||
for i in ['01','04', '07','10']:
|
||||
time_point.append(str(j)+"-"+i+"-01")
|
||||
time_point += ["2019-01-01", "2019-04-01", "2019-06-01"]
|
||||
return time_point
|
||||
|
||||
if __name__ == '__main__':
|
||||
mysql_conn = pymysql.connect(host='192.168.8.100', port=3306, user='root', passwd='11111111', db='ghtorrent0619')
|
||||
|
||||
time_point = create_time_point()
|
||||
|
||||
print("pr")
|
||||
pr_time = search_pr_in_time(time_point)
|
||||
print("fork")
|
||||
fork_time = search_fork_in_time(time_point)
|
||||
print("commit")
|
||||
commit_time = search_commit_in_time(time_point)
|
||||
print("developer")
|
||||
developer_time = search_develop_in_tim(time_point)
|
||||
content = {"pr":pr_time, "fork": fork_time, "commit": commit_time, "developer": developer_time}
|
||||
save_json_to_file("./pr_fork_com_dev_result.txt", content)
|
||||
|
||||
# fork_repos = search_fork()
|
||||
#
|
||||
# sf_repo = search_second_star_fork(fork_repos)
|
||||
# save_jsonline_to_file("./search_result.txt", sf_repo)
|
||||
|
||||
|
||||
mysql_conn.close()
|
||||
mysql_conn.close()
|
|
@ -1 +0,0 @@
|
|||
{"pr": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 275, 479, 2311, 4110, 5795, 42772, 80241, 82371, 84025, 86330, 88385, 90459, 92475, 93920, 96144, 97828, 99435, 101305, 102961, 104865, 106414, 108443, 110032, 111323, 112787, 114683, 116313, 118526, 120020, 121292, 122636, 123986, 125346, 127007, 127943], "fork": [0, 59, 104, 159, 222, 303, 375, 443, 505, 593, 701, 814, 985, 1353, 1751, 2085, 2566, 3135, 3842, 4617, 5716, 6600, 7469, 8220, 9044, 9617, 9830, 10182, 11067, 12021, 12248, 12498, 12733, 13169, 13941, 14267, 14789, 15312, 16015, 16621, 17229, 17841, 18423, 18958, 19483, 19838], "commit": [0, 577, 1243, 1917, 2310, 2589, 3117, 3962, 5413, 6543, 8189, 9918, 11037, 13391, 15185, 17293, 18764, 20250, 21193, 22097, 23254, 24528, 25951, 27513, 28761, 31294, 33177, 35025, 36878, 38185, 39595, 40801, 42245, 43527, 44609, 45581, 46717, 47768, 48901, 49657, 50612, 51361, 52243, 52947, 54012, 54479], "developer": [0, 106, 189, 255, 333, 364, 431, 456, 532, 613, 722, 786, 876, 1023, 1159, 1283, 1352, 1443, 1494, 1555, 1631, 1695, 1748, 1783, 1848, 1940, 2033, 2130, 2195, 2256, 2325, 2369, 2411, 2462, 2491, 2522, 2550, 2571, 2595, 2618, 2650, 2670, 2704, 2736, 2779, 2794]}
|
Loading…
Reference in New Issue