2021-06-03 07:55:52 +08:00
# aim: find users(maintainer, sponsor, others)
# author: zxh
# date: 2021-06-02
import pymysql , yaml , math , datetime , json
from utils import *
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import threading , queue # 多线程搜集数据
from ghapi . all import GhApi
from pymongo import MongoClient
f = open ( ' config.yaml ' , ' r ' )
config = yaml . load ( f . read ( ) , Loader = yaml . BaseLoader )
conn = connectMysqlDB ( config , autocommit = False )
cur = conn . cursor ( pymysql . cursors . DictCursor )
import random
random . seed ( 10 )
# find maintainers
2021-06-07 22:27:36 +08:00
cur . execute ( " select gsl.login, mdre.name, mdre.email from github_sponsor_listing gsl, middle_data_recent_emails mdre where gsl.deleted=0 and gsl.login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' " )
2021-06-03 07:55:52 +08:00
items = cur . fetchall ( )
query_dict = { }
all_sponsor_account_users = [ ]
for item in items :
query_dict [ item [ ' login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_sponsor_account_users . append ( item [ ' login ' ] )
2021-06-07 22:27:36 +08:00
all_sponsor_account_users = list ( set ( all_sponsor_account_users ) )
2021-06-03 07:55:52 +08:00
2021-06-07 22:27:36 +08:00
all_sponsor_account_users = random . sample ( all_sponsor_account_users , 2000 ) # firstly random 2000
2021-06-03 07:55:52 +08:00
# 1. random 20% for first round
first_round = random . sample ( all_sponsor_account_users , int ( len ( all_sponsor_account_users ) * 0.2 ) )
# 2. the rest 80% for second round
second_round = list ( set ( all_sponsor_account_users ) - set ( first_round ) )
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " maintainer " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " maintainer " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " maintainer " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " maintainer " , 2 ) )
conn . commit ( )
# find sponsors
2021-06-07 22:27:36 +08:00
cur . execute ( " select gsam.sponsor_login, mdre.name, mdre.email from github_sponsorships_as_maintainer gsam, middle_data_recent_emails mdre where gsam.flag=0 and gsam.sponsor_login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' " )
2021-06-03 07:55:52 +08:00
items = cur . fetchall ( )
query_dict = { }
all_sponsors = [ ]
for item in items :
query_dict [ item [ ' sponsor_login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_sponsors . append ( item [ ' sponsor_login ' ] )
all_sponsors = list ( set ( all_sponsors ) - set ( all_sponsor_account_users ) )
# 1. random 20% for first round
2021-06-07 22:27:36 +08:00
all_sponsors = random . sample ( all_sponsors , 2000 )
2021-06-03 07:55:52 +08:00
first_round = random . sample ( all_sponsors , int ( len ( all_sponsors ) * 0.2 ) )
# 2. the rest 80% for second round
second_round = list ( set ( all_sponsors ) - set ( first_round ) )
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " sponsors " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " sponsors " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " sponsors " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " sponsors " , 2 ) )
conn . commit ( )
# find others
2021-06-07 22:27:36 +08:00
cur . execute ( " select md.login, mdre.name, mdre.email from middle_data_no_sponsor_account_users md, middle_data_recent_emails mdre where md.login=mdre.login and mdre.deleted=0 and mdre.name is not null and mdre.name != ' ' and mdre.email not like ' % noreply % ' " )
2021-06-03 07:55:52 +08:00
items = cur . fetchall ( )
query_dict = { }
all_others = [ ]
for item in items :
query_dict [ item [ ' login ' ] ] = {
" name " : item [ ' name ' ] ,
" email " : item [ ' email ' ]
}
all_others . append ( item [ ' login ' ] )
all_others = list ( set ( all_others ) - set ( all_sponsor_account_users ) - set ( all_sponsors ) )
2021-06-07 22:27:36 +08:00
# first random 2000 users
all_others = random . sample ( all_others , 2000 )
2021-06-03 07:55:52 +08:00
# 1. random 20% for first round
first_round = random . sample ( all_others , int ( len ( all_others ) * 0.2 ) )
# 2. the rest 80% for second round
second_round = list ( set ( all_others ) - set ( first_round ) )
for user in first_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " others " , 1 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " others " , 1 ) )
for user in second_round :
if user in query_dict :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round, name, email) values ( %s , %s , %s , %s , %s ) " , ( user , " others " , 2 , query_dict [ user ] [ ' name ' ] , query_dict [ user ] [ ' email ' ] ) )
else :
cur . execute ( " insert into middle_data_questionnaire_users (login, type, which_round) values ( %s , %s , %s ) " , ( user , " others " , 2 ) )
conn . commit ( )