2021-06-23 19:25:10 +08:00
# aim: find users(maintainer, sponsor, others)
# author: zxh
# date: 2021-06-02
import pymysql , yaml , math , datetime , json
from utils import *
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import threading , queue # 多线程搜集数据
from ghapi . all import GhApi
from pymongo import MongoClient
f = open ( ' config.yaml ' , ' r ' )
config = yaml . load ( f . read ( ) , Loader = yaml . BaseLoader )
conn = connectMysqlDB ( config , autocommit = False )
cur = conn . cursor ( pymysql . cursors . DictCursor )
import random
random . seed ( 10 )
# find maintainers
cur . execute ( " select gsl.login, mdre.name, mdre.email from github_sponsor_listing gsl, middle_data_recent_emails mdre where gsl.deleted=0 and gsl.login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' " )
items = cur . fetchall ( )
query_dict = { }
all_sponsor_account_users = [ ]
for item in items :
query_dict [ item [ ' login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_sponsor_account_users . append ( item [ ' login ' ] )
all_sponsor_account_users = list ( set ( all_sponsor_account_users ) )
2021-06-29 14:25:32 +08:00
# read first round user logins
cur . execute ( " select login from middle_data_questionnaire_users_firstround where type= ' maintainer ' and which_round=1 " )
first_round = cur . fetchall ( )
first_round = [ u [ ' login ' ] for u in first_round ]
2021-06-23 19:25:10 +08:00
# 2. the rest 80% for second round
second_round = list ( set ( all_sponsor_account_users ) - set ( first_round ) )
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " maintainer " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " maintainer " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " maintainer " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " maintainer " , 2 ) )
conn . commit ( )
# find sponsors
cur . execute ( " select gsam.sponsor_login, mdre.name, mdre.email from github_sponsorships_as_maintainer gsam, middle_data_recent_emails mdre where gsam.flag=0 and gsam.sponsor_login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' " )
items = cur . fetchall ( )
query_dict = { }
all_sponsors = [ ]
for item in items :
query_dict [ item [ ' sponsor_login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_sponsors . append ( item [ ' sponsor_login ' ] )
all_sponsors = list ( set ( all_sponsors ) - set ( all_sponsor_account_users ) )
2021-06-29 14:25:32 +08:00
# read first round user logins
cur . execute ( " select login from middle_data_questionnaire_users_firstround where type= ' sponsors ' and which_round=1 " )
first_round = cur . fetchall ( )
first_round = [ u [ ' login ' ] for u in first_round ]
2021-06-23 19:25:10 +08:00
# 2. the rest
second_round = list ( set ( all_sponsors ) - set ( first_round ) )
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " sponsors " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " sponsors " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " sponsors " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " sponsors " , 2 ) )
conn . commit ( )
# find others
2021-06-29 14:25:32 +08:00
cur . execute ( " select md.login, mdre.name, mdre.email from middle_data_no_sponsor_account_users md, middle_data_recent_emails mdre, middle_data_recent_stars mdrs where md.login=mdre.login and mdre.login=mdrs.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' and mdrs.total_own_repo_stars>=10 and mdrs.deleted=0 " )
2021-06-23 19:25:10 +08:00
items = cur . fetchall ( )
query_dict = { }
all_others = [ ]
for item in items :
query_dict [ item [ ' login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_others . append ( item [ ' login ' ] )
all_others = list ( set ( all_others ) - set ( all_sponsor_account_users ) - set ( all_sponsors ) )
2021-06-29 14:25:32 +08:00
# read first round user logins
cur . execute ( " select login from middle_data_questionnaire_users_firstround where type= ' others ' and which_round=1 " )
first_round = cur . fetchall ( )
first_round = [ u [ ' login ' ] for u in first_round ]
# 2. random 7500
second_round = random . sample ( list ( set ( all_others ) - set ( first_round ) ) , 7500 )
# 3. random 7500 for the third time
third_round = random . sample ( list ( ( set ( all_others ) - set ( first_round ) ) - set ( second_round ) ) , 7500 )
2021-06-23 19:25:10 +08:00
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " others " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " others " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " others " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " others " , 2 ) )
2021-06-29 14:25:32 +08:00
for user in third_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " others " , 3 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " others " , 3 ) )
2021-06-23 19:25:10 +08:00
conn . commit ( )