ADD file via upload
This commit is contained in:
parent
0b6c02c121
commit
47627e5444
|
@ -0,0 +1,68 @@
|
|||
import requests
|
||||
import openpyxl
|
||||
import time
|
||||
|
||||
def search_repo(q,page):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'Authorization': 'token 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',
|
||||
'Content-Type': 'application/json',
|
||||
'method': 'GET',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
url = 'https://api.github.com/search/repositories?q='+q
|
||||
#查找参数
|
||||
params = {'sort': 'stargazers_count', 'order': 'desc', 'per_page': 100,'page':page,'created':'2019-01-01..2019-12-31'} #以星级倒叙,每页100条
|
||||
reponse = requests.get(url,headers=headers,params=params)
|
||||
if (reponse.status_code==200):
|
||||
print(reponse.status_code, "响应成功!")
|
||||
return reponse
|
||||
|
||||
|
||||
|
||||
#获取前1000个仓库
|
||||
time_list=[2021,2022,2023]
|
||||
for year in time_list:
|
||||
print('-------------{0}开始爬取-------------'.format(year))
|
||||
repo_list = []
|
||||
for page in range(10):
|
||||
status=True
|
||||
while (status==True):
|
||||
try:
|
||||
reponse=search_repo('created:{0}-01-01..{0}-12-31'.format(year),page)
|
||||
response_dict = reponse.json()
|
||||
repo_list.append(response_dict['items'])
|
||||
status=False
|
||||
except KeyError:
|
||||
status=True #若出现错误重新运行循环
|
||||
print('第{0}页获取失败'.format(page+1))
|
||||
print(response_dict)
|
||||
time.sleep(2) #频繁访问间隔
|
||||
print("已获取页数:", len(repo_list))
|
||||
print('-------------爬取结束-------------')
|
||||
print("获取库数:", len(repo_list)*100)
|
||||
'''
|
||||
for repo_dict in repo_list:
|
||||
print('名字:', repo_dict['name'])
|
||||
print('作者:', repo_dict['owner']['login'])
|
||||
print('Stars:', repo_dict['stargazers_count'])
|
||||
print('网址:', repo_dict['html_url'])
|
||||
print('简介:', repo_dict['description'])
|
||||
print('标签:',repo_dict['topics'])
|
||||
'''
|
||||
|
||||
wb=openpyxl.Workbook() #导出excel
|
||||
ws=wb.active
|
||||
ws.append(['名称','作者','创建时间','更新时间','push时间','Stars','语言','网址','标签','开源许可证'])
|
||||
for i in range(len(repo_list)):
|
||||
#print(i)
|
||||
for repo_dict in repo_list[i]:
|
||||
if (repo_dict['license']!=None): #获取库所以用的协议
|
||||
l=repo_dict['license']['name']
|
||||
else:
|
||||
l=None
|
||||
ws.append([repo_dict['name'],repo_dict['owner']['login'],repo_dict['created_at'],repo_dict['updated_at'],repo_dict['pushed_at'],repo_dict['stargazers_count'],repo_dict['language'],repo_dict['html_url'],','.join(repo_dict['topics']),l])
|
||||
wb.save("{0}GitHub数据.xlsx".format(year))
|
||||
print("-----------------Done-----------------")
|
||||
print("-----------------Finish-----------------")
|
||||
|
Loading…
Reference in New Issue