130 lines
6.4 KiB
Python
130 lines
6.4 KiB
Python
# aim: find users(maintainer, sponsor, others)
|
|
# author: zxh
|
|
# date: 2021-06-02
|
|
|
|
import pymysql, yaml, math, datetime, json
|
|
from utils import *
|
|
import seaborn as sns
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
import numpy as np
|
|
import threading, queue # 多线程搜集数据
|
|
from ghapi.all import GhApi
|
|
from pymongo import MongoClient
|
|
|
|
f = open('config.yaml', 'r')
|
|
config = yaml.load(f.read(), Loader=yaml.BaseLoader)
|
|
conn = connectMysqlDB(config, autocommit = False)
|
|
cur = conn.cursor(pymysql.cursors.DictCursor)
|
|
|
|
import random
|
|
random.seed( 10 )
|
|
|
|
# find maintainers
|
|
cur.execute("select gsl.login, mdre.name, mdre.email from github_sponsor_listing gsl, middle_data_recent_emails mdre where gsl.deleted=0 and gsl.login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != '' and mdre.email not like '%noreply%'")
|
|
items = cur.fetchall()
|
|
query_dict = {}
|
|
all_sponsor_account_users = []
|
|
for item in items:
|
|
query_dict[item['login']] = {
|
|
"name": item['name'],
|
|
"email": item['email']
|
|
}
|
|
all_sponsor_account_users.append(item['login'])
|
|
all_sponsor_account_users = list(set(all_sponsor_account_users))
|
|
|
|
# read first round user logins
|
|
cur.execute("select login from middle_data_questionnaire_users_firstround where type='maintainer' and which_round=1")
|
|
first_round = cur.fetchall()
|
|
first_round = [u['login'] for u in first_round]
|
|
# 2. the rest 80% for second round
|
|
second_round = list(set(all_sponsor_account_users) - set(first_round))
|
|
|
|
for user in first_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "maintainer", 1, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "maintainer", 1))
|
|
|
|
for user in second_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "maintainer", 2, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "maintainer", 2))
|
|
conn.commit()
|
|
|
|
# find sponsors
|
|
cur.execute("select gsam.sponsor_login, mdre.name, mdre.email from github_sponsorships_as_maintainer gsam, middle_data_recent_emails mdre where gsam.flag=0 and gsam.sponsor_login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != '' and mdre.email not like '%noreply%'")
|
|
items = cur.fetchall()
|
|
query_dict = {}
|
|
all_sponsors = []
|
|
for item in items:
|
|
query_dict[item['sponsor_login']] = {
|
|
"name": item['name'],
|
|
"email": item['email']
|
|
}
|
|
all_sponsors.append(item['sponsor_login'])
|
|
all_sponsors = list(set(all_sponsors) - set(all_sponsor_account_users))
|
|
|
|
# read first round user logins
|
|
cur.execute("select login from middle_data_questionnaire_users_firstround where type='sponsors' and which_round=1")
|
|
first_round = cur.fetchall()
|
|
first_round = [u['login'] for u in first_round]
|
|
# 2. the rest
|
|
second_round = list(set(all_sponsors) - set(first_round))
|
|
|
|
for user in first_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "sponsors", 1, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "sponsors", 1))
|
|
|
|
for user in second_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "sponsors", 2, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "sponsors", 2))
|
|
conn.commit()
|
|
|
|
|
|
# find others
|
|
cur.execute("select md.login, mdre.name, mdre.email from middle_data_no_sponsor_account_users md, middle_data_recent_emails mdre, middle_data_recent_stars mdrs where md.login=mdre.login and mdre.login=mdrs.login and mdre.deleted=0 and mdre.name is not null and mdre.name != '' and mdre.email not like '%noreply%' and mdrs.total_own_repo_stars>=10 and mdrs.deleted=0")
|
|
items = cur.fetchall()
|
|
query_dict = {}
|
|
all_others = []
|
|
for item in items:
|
|
query_dict[item['login']] = {
|
|
"name": item['name'],
|
|
"email": item['email']
|
|
}
|
|
all_others.append(item['login'])
|
|
all_others = list(set(all_others) - set(all_sponsor_account_users) - set(all_sponsors))
|
|
|
|
# read first round user logins
|
|
cur.execute("select login from middle_data_questionnaire_users_firstround where type='others' and which_round=1")
|
|
first_round = cur.fetchall()
|
|
first_round = [u['login'] for u in first_round]
|
|
# 2. random 7500
|
|
second_round = random.sample(list(set(all_others) - set(first_round)), 7500)
|
|
# 3. random 7500 for the third time
|
|
third_round = random.sample(list((set(all_others) - set(first_round)) - set(second_round)), 7500)
|
|
|
|
for user in first_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "others", 1, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "others", 1))
|
|
|
|
for user in second_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "others", 2, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "others", 2))
|
|
|
|
|
|
for user in third_round:
|
|
if user in query_dict:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round, name, email) values (%s, %s, %s, %s, %s)", (user, "others", 3, query_dict[user]['name'], query_dict[user]['email']))
|
|
else:
|
|
cur.execute("insert into middle_data_questionnaire_users (login, type, which_round) values (%s, %s, %s)", (user, "others", 3))
|
|
conn.commit() |