cscw_2021_sponsor/crawler_sponsor_page.py

25 lines
620 B
Python

# aim: crawl the sponsor page
# date: 2021-03-31
# author: zhangxunhui
import pymysql, yaml, math, datetime
from utils import *
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
f = open('config.yaml', 'r')
config = yaml.load(f.read(), Loader=yaml.BaseLoader)
conn = connectMysqlDB(config, autocommit = True)
cur = conn.cursor(pymysql.cursors.DictCursor)
time_per_hour = 50
cur.execute("select login from github_sponsor_listing where deleted=0")
items = cur.fetchall()
for item in items:
login = item['login']
url = "https://github.com/sponsors/" + login