cscw_2021_sponsor/analysis_questionnaire_2_st...

{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "orig_nbformat": 4,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.8.5 64-bit ('base': conda)"
  },
  "interpreter": {
   "hash": "b3ba2566441a7c06988d0923437866b63cedc61552a5af99d1f4fb67d367b25f"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "source": [
    "# 统计reasons for maintainer\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-13\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"It allows users of my projects to express thanks/appreciation\",\r\n",
    "    \"Sponsorship can motivate my future OSS contribution\",\r\n",
    "    \"Side income for OSS contribution\",\r\n",
    "    \"It can reflect community recognition for my work\",\r\n",
    "    \"Just for fun\",\r\n",
    "    \"I deserve to be rewarded for my past OSS contribution\",\r\n",
    "    \"I am able to prioritize the requirements of sponsors (e.g., fixing bugs)\",\r\n",
    "    \"It’s a way for me to make a living\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_set_up_reasons where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_set_up_reasons where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_set_up_reasons where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "64.9\n",
      "63.1\n",
      "60.6\n",
      "39.9\n",
      "28.9\n",
      "21.8\n",
      "18.8\n",
      "13.1\n",
      "1.9\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "source": [
    "# 统计reasons for sponsors\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-13\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"Because I benefit from the developer’s projects\",\r\n",
    "    \"To encourage the developer to continue the contribution\",\r\n",
    "    \"To show my recognition of the developer’s work\",\r\n",
    "    \"Because I’m interested in the developer’s projects\",\r\n",
    "    \"To motivate the developer to work harder on a specific feature\",\r\n",
    "    \"Because I know the developer\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_reasons where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_reasons where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_reasons where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "85.8\n",
      "78.4\n",
      "69.5\n",
      "49.0\n",
      "9.4\n",
      "8.9\n",
      "1.0\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "source": [
    "# 统计reasons for others\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-13\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"No need to be sponsored\",\r\n",
    "    \"I contribute to OSS not for money\",\r\n",
    "    \"My work is not worth being sponsored\",\r\n",
    "    \"Never heard of it\",\r\n",
    "    \"It’s cumbersome\",\r\n",
    "    \"Not available in my region\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_others_2_not_set_reasons where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_others_2_not_set_reasons where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_others_2_not_set_reasons where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "39.3\n",
      "38.3\n",
      "28.4\n",
      "26.4\n",
      "8.5\n",
      "2.0\n",
      "10.4\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "source": [
    "# 计算选项之间的相关性（即选择一个选项的人多大概率会选择另外一个选项）\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-14\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "import numpy\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from itertools import combinations\r\n",
    "from upsetplot import from_memberships\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from upsetplot import generate_counts, plot\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "def upsetplot(table_name):\r\n",
    "    option_login_dict = {} # key: option, value: login list\r\n",
    "    distinct_values = []\r\n",
    "\r\n",
    "    cur.execute(\"select login, value from \" + table_name + \" where value is not null\")\r\n",
    "    items = cur.fetchall()\r\n",
    "    for item in items:\r\n",
    "        login = item['login']\r\n",
    "        value = item['value']\r\n",
    "        if value.startswith(\"Other (please specify)\"):\r\n",
    "            value = \"Other\"\r\n",
    "        option_login_dict.setdefault(value, [])\r\n",
    "        option_login_dict[value].append(login)\r\n",
    "        distinct_values.append(value)\r\n",
    "\r\n",
    "    # 形成排列组合\r\n",
    "    distinct_values = list(set(distinct_values))\r\n",
    "    combs = []\r\n",
    "    nums = []\r\n",
    "    for i in range(2, len(distinct_values), 1):\r\n",
    "        combs.extend(list(combinations(distinct_values, i)))\r\n",
    "    xList = []\r\n",
    "    for comb in combs:\r\n",
    "        xs = []\r\n",
    "        for i in range(0, len(comb)):\r\n",
    "            xs.append(comb[i])\r\n",
    "        xList.append(xs)\r\n",
    "\r\n",
    "    yList = []\r\n",
    "    for xs in xList:\r\n",
    "        yTmp = set()\r\n",
    "        for i in range(len(xs)):\r\n",
    "            x = xs[i]\r\n",
    "            if i == 0:\r\n",
    "                yTmp = option_login_dict[x]\r\n",
    "            else:\r\n",
    "                yTmp = set(yTmp) & set(option_login_dict[x])\r\n",
    "        yList.append(len(yTmp))\r\n",
    "    # 保留y>0的组合\r\n",
    "    xList_final_tmp = []\r\n",
    "    yList_final_tmp = []\r\n",
    "    for i in range(len(yList)):\r\n",
    "        if yList[i] >= 1:\r\n",
    "            xList_final_tmp.append(xList[i])\r\n",
    "            yList_final_tmp.append(yList[i])\r\n",
    "    xList_final_tmp = numpy.array(xList_final_tmp)\r\n",
    "    yList_final_tmp = numpy.array(yList_final_tmp)\r\n",
    "    sort_index = numpy.argsort(yList_final_tmp)\r\n",
    "    xList_final = []\r\n",
    "    yList_final = []\r\n",
    "    for index in sort_index:\r\n",
    "        xList_final.append(xList_final_tmp[index])\r\n",
    "        yList_final.append(yList_final_tmp[index])\r\n",
    "\r\n",
    "    \r\n",
    "    example = from_memberships(xList_final, data=yList_final)\r\n",
    "    plot(example, sort_by='cardinality', show_counts='%d')\r\n",
    "    plt.savefig(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + table_name + \"_upset.png\")\r\n",
    "\r\n",
    "# upsetplot(\"questionnaire_results_maintainer_2_set_up_reasons\")\r\n",
    "# upsetplot(\"questionnaire_results_sponsor_2_sponsor_reasons\")\r\n",
    "# upsetplot(\"questionnaire_results_others_2_not_set_reasons\")\r\n",
    "upsetplot(\"questionnaire_results_maintainer_2_how_get_more\")"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "<ipython-input-1-83ddfca3f4c1>:65: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
      "  xList_final_tmp = numpy.array(xList_final_tmp)\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": [
       "<Figure size 33069.8x544 with 4 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAZEUAAAG+CAYAAAAloiz9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAGRFElEQVR4nOzWfYzeZb3n8c91d4AFs2UpdepALUjTo7XTB3BEEaFoT7FbGkCWQyzKAiV4fDjrLvIg5xA8ICIYUFBhA6mggKtGTsUiaXroSktJVlIGHNoCgSJtKFKpxbqEVsHitX/s0AyWgjO08+vD65Xc6X1/r999Xe/fTPKbllprAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA2D21mg4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACgOa2mAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGhOq+kAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAmtNqOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIDmtJoOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAoDmtpgMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABoTqvpAICdUSnl5lLK2lLK8j6zS0opvyml9PS+pvdZm1BK+WUp5ZFSyrJSyn9ophwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4LVKrbXpBoCdTinl6CQvJrm11trZO7skyYu11qv/6tq2JA8lOa3W+nApZf8kf6i1vjLI2QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAW2g1HQCwM6q1Lk7y+7/x8mOTLK21Ptz73edrra9stzgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgH5oNR0AsIv5p1LK0lLKzaWU/Xpnf5ekllL+vZTyUCnlgiYDAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD6ams6AGBnMG3atDp//vzXzFauXJkZM2YkSU2S3/72txk+fHhKKbn44ovHr1mz5swkueqqq3L99dfngQceyD777JMpU6Yc+otf/OLrU6ZM2ep5s2bNyl133ZX29vYsX748SXLJJZdk9uzZefvb354k+drXvpbp06dnwYIFufDCC/Pyyy9nzz33zFVXXZWPfvSj2+PHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADsvMrWFlqDWQGws1q3bt2bXjNixIgMGTIkrVYrZ599dpYsWZIkGTlyZCZPnpzhw4dnn332yfTp0/PQQw+94V5nnHFG5s+fv8X8nHPOSU9PT3p6ejJ9+vQkyfDhw/Pzn/88y5Ytyy233JLTTjttAHcIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOyuWk0HAOwq1qxZs/n9HXfckc7OziTJxz72sSxdujQbN27Mpk2bcu+99+a9733vG+519NFHZ9iwYX/TuYceemgOOOCAJMm4cePypz/9KS+99NKbfm/WrFlpb2/f3NnX1VdfnVJK1q1blyR5+eWXc+aZZ2b8+PGZOHFiFi1a9De1AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA7vlbTAQA7o5kzZ+aII47I448/npEjR+amm27KBRdckPHjx2fChAlZuHBhrrnmmiTJfvvtly9+8Yt5//vfn0mTJuWwww7LcccdN6Bzr7vuukyYMCGzZs3K+vXrt1ifM2dODj300Oy1115vutcZZ5yR+fPnbzFfvXp1FixYkFGjRm2ezZ49O0mybNmyLFiwIOeee27+8pe/DOgeAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAdS6m1Nt0AsMPr6uqq3d3dg3rmqlWrMmPGjCxfvjxJ8txzz2X48OEppeTiiy/OmjVrcvPNN2++/pFHHsnxxx+fu+++O6NHjx7QGUly8skn5+KLL84JJ5yQ7u7uDB8+PJ///OdzxBFH5FOf+lSSZMqUKbniiity+OGHb8M7BgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC2o7K1hdZgVgAwcCNGjMiQIUPSarVy9tlnZ8mSJZvXnnnmmXz84x/PrbfemtGjRw/4jDvvvDMHHnhgJk6c+Jr5xIkTM3fu3GzatCkrV67Mgw8+mNWrVw/4HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgx9HWdAAAf5s1a9ako6MjSXLHHXeks7MzSfKHP/whxx13XK644ooceeSRA95/48aNufzyy3P33XdvsTZr1qw89thj6erqykEHHZQPfehDaWvzJwQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "source": [
    "# 查看一个题的选择会与下一个题目之间的选择有没有关系（桑基图），例如：self-description与参与原因之间的关系\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-14\r\n",
    "\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "import numpy\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from itertools import combinations\r\n",
    "from upsetplot import from_memberships\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from upsetplot import generate_counts, plot\r\n",
    "from pyecharts.charts import  Sankey\r\n",
    "from pyecharts import options as opts\r\n",
    "from pyecharts_snapshot.main import make_a_snapshot\r\n",
    "from snapshot_phantomjs import snapshot\r\n",
    "from pyecharts.render import make_snapshot\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "def sankeyplot(table_name_list, column_names):\r\n",
    "    # read all the nodes for all tables\r\n",
    "    node_level = {} # 用于记录节点属于哪个level\r\n",
    "    node_author_dict_list = [] # values are the dict for each table and column\r\n",
    "    for i in range(len(table_name_list)):\r\n",
    "        node_author_dict = {} # key: node, value: author list\r\n",
    "        table_name = table_name_list[i]\r\n",
    "        column_name = column_names[i]\r\n",
    "\r\n",
    "        cur.execute(\"select login,\" + column_name + \" as value from \" + table_name + \" where \" + column_name + \" is not null\")\r\n",
    "        items = cur.fetchall()\r\n",
    "        for item in items:\r\n",
    "            login = item['login']\r\n",
    "            value = item['value']\r\n",
    "            if value.startswith(\"Other (please specify)\"):\r\n",
    "                value = \"Other_\" + str(i+1)\r\n",
    "            node_author_dict.setdefault(value, [])\r\n",
    "            node_author_dict[value].append(login)\r\n",
    "            node_level[value] = i+1\r\n",
    "        node_author_dict_list.append(node_author_dict)\r\n",
    "\r\n",
    "    # form nodes: \r\n",
    "    '''\r\n",
    "    [{'name': '遥控'}, {'name': '非遥控'}, {'name': '机器人'}, {'name': '猛击赛车'}, {'name': '莱肯赛车'}]\r\n",
    "    '''\r\n",
    "    nodes = []\r\n",
    "    for dict_list in node_author_dict_list:\r\n",
    "        for key in dict_list.keys(): \r\n",
    "            nodes.append({\"name\": key})\r\n",
    "    print(nodes)\r\n",
    "    # form links:\r\n",
    "    '''\r\n",
    "    [{'source': '遥控', 'target': '机器人', 'value': 15},{'source': '遥控', 'target': '猛击赛车', 'value': 23},{'source': '遥控', 'target': '莱肯赛车', 'value': 36},{'source': '非遥控', 'target': '机器人', 'value': 48},{'source': '非遥控', 'target': '猛击赛车', 'value': 21},{'source': '非遥控', 'target': '莱肯赛车', 'value': 11}]\r\n",
    "    '''\r\n",
    "    links = []\r\n",
    "    for i in range(len(node_author_dict_list)):\r\n",
    "        source_node_dict = node_author_dict_list[i]\r\n",
    "        for source_node, source_authors in source_node_dict.items():\r\n",
    "            for j in range(i+1, len(node_author_dict_list)):\r\n",
    "                target_node_dict = node_author_dict_list[j]\r\n",
    "                # 如果根据level target_node_level-source_node_level!=1 就不考虑了\r\n",
    "                if node_level[list(target_node_dict.keys())[0]] - node_level[list(source_node_dict.keys())[0]] != 1:\r\n",
    "                    continue\r\n",
    "                for target_node, target_authors in target_node_dict.items():\r\n",
    "                    num = len(set(source_authors) & set(target_authors))\r\n",
    "                    links.append({\"source\": source_node, \"target\": target_node, \"value\": num})\r\n",
    "    print(links)\r\n",
    "\r\n",
    "\r\n",
    "    pic=(\r\n",
    "        Sankey().add(\r\n",
    "            '',#图例名称\r\n",
    "            nodes,\r\n",
    "            links,\r\n",
    "            linestyle_opt=opts.LineStyleOpts(opacity=0.3,curve=0.5,color='source'),\r\n",
    "            label_opts=opts.LabelOpts(position='right'),\r\n",
    "            node_gap=30,\r\n",
    "        )\r\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=''))\r\n",
    "    )\r\n",
    "    filename = \"_\".join(table_name_list)\r\n",
    "    pic.render(path=\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\")\r\n",
    "    make_a_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.pdf\")\r\n",
    "    # make_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \".html\",\".pdf\")\r\n",
    "\r\n",
    "sankeyplot([\"questionnaire_results_others_2_basic\", \"questionnaire_results_others_2_not_set_reasons\"], [\"self_description\", \"value\"])"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[{'name': 'Developer working in industry'}, {'name': 'Student'}, {'name': 'Academic researcher'}, {'name': 'Full time independent developer'}, {'name': 'Other_1'}, {'name': 'Never heard of it'}, {'name': 'My work is not worth being sponsored'}, {'name': 'It’s cumbersome'}, {'name': 'I contribute to OSS not for money'}, {'name': 'No need to be sponsored'}, {'name': 'Other_2'}, {'name': 'Not available in my region'}]\n",
      "[{'source': 'Developer working in industry', 'target': 'Never heard of it', 'value': 29}, {'source': 'Developer working in industry', 'target': 'My work is not worth being sponsored', 'value': 38}, {'source': 'Developer working in industry', 'target': 'It’s cumbersome', 'value': 11}, {'source': 'Developer working in industry', 'target': 'I contribute to OSS not for money', 'value': 51}, {'source': 'Developer working in industry', 'target': 'No need to be sponsored', 'value': 57}, {'source': 'Developer working in industry', 'target': 'Other_2', 'value': 16}, {'source': 'Developer working in industry', 'target': 'Not available in my region', 'value': 1}, {'source': 'Student', 'target': 'Never heard of it', 'value': 3}, {'source': 'Student', 'target': 'My work is not worth being sponsored', 'value': 5}, {'source': 'Student', 'target': 'It’s cumbersome', 'value': 2}, {'source': 'Student', 'target': 'I contribute to OSS not for money', 'value': 5}, {'source': 'Student', 'target': 'No need to be sponsored', 'value': 1}, {'source': 'Student', 'target': 'Other_2', 'value': 0}, {'source': 'Student', 'target': 'Not available in my region', 'value': 0}, {'source': 'Academic researcher', 'target': 'Never heard of it', 'value': 12}, {'source': 'Academic researcher', 'target': 'My work is not worth being sponsored', 'value': 6}, {'source': 'Academic researcher', 'target': 'It’s cumbersome', 'value': 2}, {'source': 'Academic researcher', 'target': 'I contribute to OSS not for money', 'value': 14}, {'source': 'Academic researcher', 'target': 'No need to be sponsored', 'value': 12}, {'source': 'Academic researcher', 'target': 'Other_2', 'value': 3}, {'source': 'Academic researcher', 'target': 'Not available in my region', 'value': 1}, {'source': 'Full time independent developer', 'target': 'Never heard of it', 'value': 6}, {'source': 'Full time independent developer', 'target': 'My work is not worth being sponsored', 'value': 4}, {'source': 'Full time independent developer', 'target': 'It’s cumbersome', 'value': 2}, {'source': 'Full time independent developer', 'target': 'I contribute to OSS not for money', 'value': 3}, {'source': 'Full time independent developer', 'target': 'No need to be sponsored', 'value': 5}, {'source': 'Full time independent developer', 'target': 'Other_2', 'value': 2}, {'source': 'Full time independent developer', 'target': 'Not available in my region', 'value': 2}, {'source': 'Other_1', 'target': 'Never heard of it', 'value': 3}, {'source': 'Other_1', 'target': 'My work is not worth being sponsored', 'value': 3}, {'source': 'Other_1', 'target': 'It’s cumbersome', 'value': 0}, {'source': 'Other_1', 'target': 'I contribute to OSS not for money', 'value': 4}, {'source': 'Other_1', 'target': 'No need to be sponsored', 'value': 4}, {'source': 'Other_1', 'target': 'Other_2', 'value': 0}, {'source': 'Other_1', 'target': 'Not available in my region', 'value': 0}]\n",
      "<ipython-input-20-4730b367d0cf>:83: RuntimeWarning: coroutine 'make_a_snapshot' was never awaited\n",
      "  make_a_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.pdf\")\n",
      "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "source": [
    "# 绘制likert-scale for maintainer motivation and motivate_extent\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-21\r\n",
    "\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "import numpy\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "import plot_likert\r\n",
    "\r\n",
    "motivate_extent_scale = [\r\n",
    "    \"Not at all\",\r\n",
    "    \"A little\",\r\n",
    "    \"A moderate amount\",\r\n",
    "    \"A lot\",\r\n",
    "    \"A great deal\"\r\n",
    "]\r\n",
    "\r\n",
    "reasons = [\r\n",
    "    \"It allows users of my projects to express thanks/appreciation\",\r\n",
    "    \"Sponsorship can motivate my future OSS contribution\",\r\n",
    "    \"Side income for OSS contribution\",\r\n",
    "    \"It can reflect community recognition for my work\",\r\n",
    "    \"Just for fun\",\r\n",
    "    \"I deserve to be rewarded for my past OSS contribution\",\r\n",
    "    \"I am able to prioritize the requirements of sponsors (e.g., fixing bugs)\",\r\n",
    "    \"It’s a way for me to make a living\"\r\n",
    "]\r\n",
    "\r\n",
    "''' 形成数据结果\r\n",
    "    Q1\tQ2 (questions)\r\n",
    "0\tStrongly disagree\tAgree\r\n",
    "1\tAgree\tNeither agree nor disagree\r\n",
    "2\tNeither agree nor disagree\tStrongly agree\r\n",
    "3\tStrongly disagree\tAgree\r\n",
    "4\tDisagree\tStrongly disagree\r\n",
    "5\tNeither agree nor disagree\tStrongly agree\r\n",
    "6\tAgree\tAgree\r\n",
    "7\tAgree\tAgree\r\n",
    "8\tNeither agree nor disagree\tStrongly disagree\r\n",
    "9\tStrongly agree\tNeither agree nor disagree\r\n",
    "'''\r\n",
    "\r\n",
    "# 找到每个问题的likert-scale选择情况\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "# 读取所有的信息\r\n",
    "sql = \"select basic.login as login, reason.value as question, basic.motivate_extent as opt from questionnaire_results_maintainer_2_set_up_reasons reason, questionnaire_results_maintainer_2_basic basic where reason.login=basic.login\"\r\n",
    "cur.execute(sql)\r\n",
    "items = cur.fetchall()\r\n",
    "df = pd.DataFrame.from_dict(items)\r\n",
    "\r\n",
    "def form_results(opts, questions, df):\r\n",
    "    results = {}\r\n",
    "    for question in questions:\r\n",
    "        result_col = df.loc[df['question']==question]['opt']\r\n",
    "        results[question] = result_col\r\n",
    "    return results\r\n",
    "\r\n",
    "data = form_results(opts=motivate_extent_scale, questions=reasons, df=df)\r\n",
    "data = pd.DataFrame(data)\r\n",
    "plot_likert.plot_likert(data, motivate_extent_scale, plot_percentage=True, colors=plot_likert.colors.likert5);"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\plot_likert\\plot_likert.py:172: UserWarning: In your data, not all questions have the same number of responses. i.e., different numbers of people answered each question. Therefore, the percentages aren't directly comparable: X% for one question represents a different number of responses than X% for another question, yet they will appear the same in the percentage graph. This may be misleading to your reader.\n",
      "  warn(\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAApsAAAEGCAYAAAAqgl5kAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABsvElEQVR4nO3dd5xU1fnH8c+XDtJEERFEFOlIXTEoKlE0JhobGguJYn5GMTFGDRoTjS2aaGwJMYolihCs2DEiFhQsiEtfugUsiGJDkL77/P44Z2AYZrbg7M7u8rxfr3ntzLnnnnPunUWfPffe88jMcM4555xzrjzUyPUAnHPOOedc9eXBpnPOOeecKzcebDrnnHPOuXLjwaZzzjnnnCs3Hmw655xzzrlyUyvXA3CuOLvuuqu1bds218NwFejLL78EYJdddsnxSJyruqZNm/aFmTXP9TicAw82XSXXtm1b8vPzcz0MV4FGjhwJwJAhQ3I6DueqMklLcz0G5xL8MrpzzjnnnCs3PrPpnKtUDjnkkFwPwTnnXBZ5sOmcq1T22WefXA/BOedcFnmw6ZyrVJYvXw7A7rvvnuOROOdSTZs2bbdatWrdC3TDb8VzWxQBBZs2bTq7T58+n6duzEqwKWm1mTWU1BY40Mwe/B77jzOzbtkYV1UhqTkwDqgDXGBmk3M8pLQkvWlmB27HfscDi8xsXvZH5aqb8ePHA/6AkHOVUa1ate7dfffdOzdv3vzrGjVqWK7H4yqHoqIirVixosvy5cvvBY5N3Z7tmc22wOlAmYLNqk6SAJlZ0XY2cTiwwMzOzOKwtoukmmZWmG7b9gSa0fGEYLraBJsLHyz7qWhx0A3lMJLs+XhVs1wPAYC8vDwACgoKcjySLT5v3CDXQ8i5/8yZkesh5MyYowflegiVSTcPNF2qGjVqWPPmzVcuX7487WRhtqfAbwAOljRT0kXJGyQ1lPSypOmS5kg6rriGJNWTdH+sO0PSD2P5/yR1j+9nSLoyvv+LpLMltZQ0KY6hQNLBadpeImnX+D5P0qvx/aFxv5mx7Uax/BJJ70iaLemaWNZW0nxJdwDTgT0ljYx9zkk9/rjPXvEczI4/20jqCfwd+Enst36asf5V0luS8iX1lvSCpPckDY11RiefT0ljJB2b0s6AeF6elDRP0ghJNeK21ZKulfQ20E/SxfE4CiRdmNTG6qT325yTWH5GLJsVx3Ug4a+cm+LxtZN0QRzDbEkPF/d74JxzrlKp4YGmSyf+XqSNK7M9s3kZMMzMjkmzbR1wgpl9GwO9KZKeMbNMv7S/ATCz/SR1AiZI6gBMIgS0S4BNwEGxfn/gv4SZ1RfM7HpJNYGyTEkMA35jZm9Iagisk3Qk0B7oCwh4RtIhwIdAR+AsM/u1pD5Aq8QtAJKapmn/dmCUmT0g6ZfAcDM7PgbMeWZ2foZxfWRm/STdBoyMx1wPmAuMAO4FLgKeltQEOBBIN0vaF+gCLAXGAycCY4GdgAIzuzIex1nAAfF435b0mpltntYo5px8CVwOHGRmX0hqZmZfSXqGcHvE2Lj/ZcDeZrY+w3lyzjnnXDVRkQ8ICfhrDEqKgFZAC2B5hvr9gX8BmNkChQVqOwCTgQuAD4DngCMkNQDamtlCSS2A+yTVBp4ys5llGOMbwK2SxgBPmNnHMbA6EkgEWw0JgdaHwFIzmxLL3wf2kfSvOK4JadrvRwjwAEYTZjRL45n4cw7Q0MxWAaskrZPU1Mxek/RvSbvF9h83s01p2plqZu8DSHqIcI7HAoXA47FOf+BJM/su1nsCODjp+CGcj3TnpAcw1sy+ADCzrzIcz2xgjKSngKdKeQ6cc85VMgsfPLBPNtvrePqb00qqI6nP2Wef/dk999zzMcCVV17ZYvXq1TVvvfXWZZn2GT16dNMuXbqs69Onz7rSjmXcuHGN6tatW3TEEUd8V5Z9brnllhYTJ058d/jw4bvk5+fvNGrUqA9Lu391VZFPkg0GmgN9zKwn8Blhdi4TZSh/B8gjBECTCAHPr4BpAGY2CTgE+AQYLemMNG1sYsuxbx6Dmd0AnA3UJ8y8dorj+JuZ9Yyvfc3sP3GX75L2/ZoQbL1KmJW9t5hj27xbKeoArI8/i5LeJz4n/mAYTTjHZwH3l7K/xOd1SfdpZjrvyTKdE6XpI52jgX8DfYBpknxVBOecc6VSp04d+9///rfzp59+Wur/dzz11FNNZ8+eXb/kmlu88sorjSZPntyw7CN0qbIdbK4CGmXY1gT43Mw2xvsv9yqhrUmE4Il4+bwNsNDMNgAfAT8DphBmOofFn0jaK/ZzD/AfoHeatpcQAh2AzXd+S2pnZnPM7EYgH+gEvAD8Ml5WR1KrOIO4lXhrQA0zexz4c4Z+3wROje8HA6+XcA7KYiRwIYCZzc1Qp6+kveO9mqdk6H8ScLykBpJ2Ak4gntskmc7Jy8DPJO0SyxNPnGz+vYh972lmE4FLgaaEmVHnnHOuRDVr1rQzzjhjxV//+tcWqdsWLVpUp1+/fh06dOjQpV+/fh0WL15c58UXX9zppZdeanrFFVe07tSpU5e5c+fWTd7nwQcfbNK9e/dOnTt37nLggQd2+Oijj2otXLiwzqhRo5qPGDGiRadOnbqMHz9+q/9PTZw4sUGvXr06de7cuUuvXr06zZo1qy4uo2wHm7OBTfHhkNQHZMYAeZLyCYHWghLaugOoKWkO8AgwxMwSs3qTgc/MbE1835otAdEAYKakGYRA8p9p2r4G+KekyYRLyAkXxodiZgFrgefNbALh6fq34ljGkj6gbgW8KmkmIfD7Y5o6FwBnSZoN/AL4XQnnoNTM7DNgPplnNQHeIjzEVUC4DeHJNO1MJ4x/KvA2cG/S/ZoW66Q9JzHIvR54LZ7DW+N+DwOXxO+kPfDfuN8M4DYz+2Y7D9s559wO6JJLLvn8iSeeaPbll1/WTC4fOnRom9NPP/3LRYsWzTvllFO+PO+88/Y84ogjvhs4cOA311133ccLFiyY17Vr1+QrhBxxxBGrZ86cuWD+/PnzTjrppK+uvfba3Tt27LjhjDPOWDF06NDPFixYMO+oo45anbxPjx491k2dOnXB/Pnz51111VWfXHrppa0r4rirqqxcvjSzhvHnRsIyPunqfEG4Z7G4/ZcQForFzNYBQzLU/zNh9hAzW0bSpV8zewB4oITxTibc/5la/tsM9f9J+qC1W1KdWaSfzUxuZwlwWJrykYQAL90+bTPVS94W71ttDzxUzBDWmNkpafpomPL5VrYEion2dwG+SqqT9pykO/9m9gbhwaSE/sWMEUnnAOcAtGnTpriqzjnndkDNmjUrOvnkk7+84YYbdqtfv/7mZQdnzJix0/PPP/8ewHnnnffVNddcU2IQ+MEHH9Q5/vjjW69YsaL2hg0bauy5557rS9rnq6++qnnKKafsvWTJknqSbOPGjaW5BW2H5av/VwOSBhJmiv9lZivLof09CLOiN2e77XTM7G4zyzOzvObNm1dEl84556qYP/7xj589+OCDu3733XffK5Y5//zz2/z617/+fNGiRfNuv/32pevXry+xvT/84Q+tDj300FWLFy+e++yzz767YcMGj6eK4SenGjCzl8ysjZn9o5g6r2ZYkqo07S8zsw5m9q/tHqRzzjmXRS1atCj86U9/+vWDDz64a6KsV69e39177707A9x1113N8vLyVgM0bNiw8Ntvv00b86xatapmmzZtNgKMHDlyl0R5o0aNCletWlUz3T7ffvttzdatW2+I/eyaro7botjL6HENxNPN7I6KGU7FUVhMfQ8z+1/8fDWw2sy+9+ydpJEkrStZivotgQfM7Mjv23e2qITUlKm/G3H2c7iZnVRBQ3TOOZdjpVmqqDxdfvnlyx944IHNl8DuvPPOD88888y2//znP3ffZZddNo0aNWoJwODBg78677zz2o4YMaLF2LFj30u+b/Pyyy9fdtppp7Vr0aLFhry8vO8+/PDDugCDBg365qSTTmr3/PPPN/3HP/7xYfJ9m3/4wx+Wn3322XsPHz5894MPPvjbCjzkKkmZ11QPWXKoJLnKVUwaxe1sbwhJC6n
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "source": [
    "# 计算所有人的参与对自己的激励情况\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "import numpy\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "import plot_likert\r\n",
    "\r\n",
    "motivate_extent_scale = [\r\n",
    "    \"Not at all\",\r\n",
    "    \"A little\",\r\n",
    "    \"A moderate amount\",\r\n",
    "    \"A lot\",\r\n",
    "    \"A great deal\"\r\n",
    "]\r\n",
    "\r\n",
    "reasons = [\r\n",
    "    \"Because I benefit from the developer’s projects\",\r\n",
    "    \"To encourage the developer to continue the contribution\",\r\n",
    "    \"To show my recognition of the developer’s work\",\r\n",
    "    \"Because I’m interested in the developer’s projects\",\r\n",
    "    \"To motivate the developer to work harder on a specific feature\",\r\n",
    "    \"Because I know the developer\"\r\n",
    "]\r\n",
    "\r\n",
    "''' 形成数据结果\r\n",
    "    Q1\tQ2 (questions)\r\n",
    "0\tStrongly disagree\tAgree\r\n",
    "1\tAgree\tNeither agree nor disagree\r\n",
    "2\tNeither agree nor disagree\tStrongly agree\r\n",
    "3\tStrongly disagree\tAgree\r\n",
    "4\tDisagree\tStrongly disagree\r\n",
    "5\tNeither agree nor disagree\tStrongly agree\r\n",
    "6\tAgree\tAgree\r\n",
    "7\tAgree\tAgree\r\n",
    "8\tNeither agree nor disagree\tStrongly disagree\r\n",
    "9\tStrongly agree\tNeither agree nor disagree\r\n",
    "'''\r\n",
    "\r\n",
    "# 找到每个问题的likert-scale选择情况\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "# 读取所有的信息\r\n",
    "sql = \"select basic.login as login, reason.value as question, basic.expectation_extent as opt from questionnaire_results_sponsor_2_sponsor_reasons reason, questionnaire_results_sponsor_2_basic basic where reason.login=basic.login\"\r\n",
    "cur.execute(sql)\r\n",
    "items = cur.fetchall()\r\n",
    "df = pd.DataFrame.from_dict(items)\r\n",
    "\r\n",
    "def form_results(opts, questions, df):\r\n",
    "    results = {}\r\n",
    "    for question in questions:\r\n",
    "        result_col = df.loc[df['question']==question]['opt']\r\n",
    "        results[question] = result_col\r\n",
    "    return results\r\n",
    "\r\n",
    "data = form_results(opts=motivate_extent_scale, questions=reasons, df=df)\r\n",
    "data = pd.DataFrame(data)\r\n",
    "plot_likert.plot_likert(data, motivate_extent_scale, plot_percentage=True, colors=plot_likert.colors.likert5);"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\plot_likert\\plot_likert.py:172: UserWarning: In your data, not all questions have the same number of responses. i.e., different numbers of people answered each question. Therefore, the percentages aren't directly comparable: X% for one question represents a different number of responses than X% for another question, yet they will appear the same in the percentage graph. This may be misleading to your reader.\n",
      "  warn(\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAApkAAAEGCAYAAAAud45ZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABca0lEQVR4nO3dd5xU1fnH8c+XpXdRRAURRenSdkVRUELQaIwVEJVEMDEKiS22aDR2o0aT+COJYokiKwYEu0bECiioLH2pFsCKIugK0nef3x/3jIzDbGNnd5bd5/16zWtnzj33lDuT+HBueWRmOOecc845l0o10j0A55xzzjlX9XiQ6ZxzzjnnUs6DTOecc845l3IeZDrnnHPOuZTzINM555xzzqVczXQPwLlk9tprL2vTpk26h+FKYO3atQDsueeeaR6Jc2727Nlfm1nzdI/DOfAg01VSbdq0IScnJ93DcCUwZswYAIYPH57WcTjnQNKqdI/BuRg/Xe6cc84551LOVzKdc2Vy9NFHp3sIzjnnKiEPMp1zZXLQQQelewjOOecqIQ8ynXNlsnr1agD22WefNI/EOVdeZs+evXfNmjUfArrgl9q5HQqA3O3bt5+XmZn5VeJGDzKdc2UyefJkwG/8ca4qq1mz5kP77LNPx+bNm39To0YNS/d4XOVQUFCgNWvWdFq9evVDwMmJ2/1fI84555wrTpfmzZt/5wGmi1ejRg1r3rx5HtEK987bK3g8zjnnnNv91PAA0yUTfhdJ40k/Xe5cnGWPH5m0vMVRd1TYGD5d36zC+kqFrKwsAHJzc9M8kpL7qnH9dA+hyvrPwrnpHsJuY9yJA9M9BOfKVYUHmZLygYWAgHzgQjObUdHjKA1JNwIbzOzuhPIxwAtmNqkc++4AjAcMGARkm9mRktoAR5rZ44Xsdxfwc+B/ZnZlOY7vT2b2l/C+DdHxSLps7pxzrmpY9viRmalsr/3ZM2YXV0dS5nnnnfflgw8++CnA9ddf32LDhg0Zf//73z8vbJ/s7OymnTp12pyZmbm5pGN54YUXGtWpU6fg2GOP/b40+/ztb39r8cYbb3wwatSoPXNychqMHTv245LuX1Wl43T5JjPrbmbdgGuA29Mwht3JqcCzZtbDzD40s9hSWxvg7CL2uwDomRhgSkr1Pyz+lOL2nHPOuZ3Url3b/ve//+3xxRdflPi/Y88880zTBQsW1CtNP6+//nqj6dOnNyz9CF2idF+T2Rj4JvZB0pWSZklaIOmmuPJzQtl8Sdmh7CRJ70qaK+lVSS1C+Y2SrojbN1dSG0kNJL0Y2siVNCRsz5Q0VdJsSS9L2reUcxggabqk5ZJ+EdrMkHRX3FwuCOX9JL0paZKkpZLGSVJh45D0c+BS4DxJb4R6G0K/dwB9Jc2T9If4AUl6DmgAvCtpiKQxkv4e2rhTUndJ74SxPS1pj7Dfm5L+IWmapCWSDpP0lKT3Jd2aOHFJdwD1whjGheIMSQ9KWiRpiqR6oW5bSZPD/KaHFVrnnHOuRDIyMuycc85Z85e//KVF4rbly5fX7t27d7t27dp16t27d7v333+/9iuvvNLg1VdfbXrddde16tChQ6dFixbVid/n8ccfb9K1a9cOHTt27HTkkUe2++STT2ouW7as9tixY5uPHj26RYcOHTpNnjz5R8HmG2+8Ub9Hjx4dOnbs2KlHjx4d5s+fXwdXqHQEmbGgZCnwEHALgKTjgEOAXkB3IFPS0ZI6A9cC/cPq5yWhnbeAI8ysB9Hp5KuK6fd44HMz6xZO506WVAv4JzDIzDKBh4HbSjmfNsAxwInAaEl1gd8AeWZ2GHAY8FtJB4b6PYgCx07AQcBRhY3DzP4HjAb+YWY/Sej3amB6WBX+R/wGMzuZHSvGE0JxO2CAmV0OjAX+aGZdiS5duCFu961mdnTo91ng90R3jQ2XtGdCP1fH9TM0FB8C/NvMOgPfArGLjh4ALgrzuwK4t8ij6pxzziW48sorv3rqqaearV27NiO+fMSIEa3PPvvstcuXL188ZMiQtSNHjtz/2GOP/X7AgAHf3nrrrZ8uXbp0cefOnbfE73PsscdumDdv3tIlS5YsHjRo0Lqbb755n/bt228955xz1owYMeLLpUuXLj7++OM3xO/TrVu3ze+9997SJUuWLL7hhhs+u+qqq1pVxLx3V+m48WeTmXUHkNQbGCupC3BceMWuGm9IFLB0AyaZ2dcAZrYubG8FTAgrj7WBFcX0uxC4W9KdRNcNTg/9dgFeCQuKGcAXpZzPE2ZWALwv6SOgQ5hHV0mDQp0mYS5bgffM7NMw/3lEQeq3KRhHcSaaWb6kJkBTM5sayh8FJsbVey78XQgsMrMvwlg/AvYH1hbTzwozmxfezwbaSGoIHAlMDPMD8H/9OeecK5VmzZoVDB48eO0dd9yxd7169Qpi5XPnzm3w0ksvfQgwcuTIdTfddFOxwd+KFStqn3rqqa3WrFlTa+vWrTX233//LcXts27duowhQ4YcuHLlyrqSbNu2bSpun+osrafLzWwmsBfQnOhGoNvDqlh3MzvYzP4TypM9NuGfwL/M7FCi6w/rhvLt/HhedUNfy4FMouDpdknXh7YXxfV5qJkdV9ppJPksolW7WLsHmtmUsD3+R5xPFOinYhzFKekFzLHxFfDjsRZQsn+UJJtfDeDbuPl1N7OOJRyPc84594Nrrrnmy8cff3yv77//vkwxzIUXXtj6d7/73VfLly9f/K9//WvVli1bim3vj3/8Y8tjjjlm/fvvv7/o+eef/2Dr1q3pvuywUkvrwQnX5WUQrY69DPw6rHohqaWkvYHXgDNip2olxZ7v0gT4LLwfFtfsSqBnqNsTODC83w/YaGaPAXeHOsuA5mFFFUm1wun50hgsqYaktkSnv5eFuYwMp8GR1E5SgyLa2JVxrAcalXKsmFke8I2kvqHoV8DUInYpzrbYPIvo8ztghaTBAIp0K0OfzjnnqqkWLVrkn3TSSd88/vjje8XKevTo8f1DDz20B8D999/fLCsrawNAw4YN87/77ruksc769eszWrduvQ1gzJgxP1wO1qhRo/z169dnJNvnu+++y2jVqtXW0M9eyeq4HdJxurxeOE0M0QreMDPLB6ZI6gjMDKdUNwC/NLNFkm4Dpip6/NFcYDhwI9Hp18+AdwjBJPAkcE7oYxawPJQfCtwlqQDYBow0s63hlPaocBq5JnAPsKgU81lGFKS1AEaY2WZJDxGdBp8TbuxZQ3SXeFK7OI4FwHZJ84ExiddlFmMY0fWj9YGPgHNLsW+iB4AFkuYQXTtbmKHAfZKuA2oRXUc7vwz9OuecS5OSPHKoPF177bWrH3300eaxz/fdd9/Hw4YNa/N///d/++y5557bx44duxJg6NCh60aOHNlm9OjRLSZNmvRh/HWZ11577ednnXVW2xYtWmzNysr6/uOPP64DMHDgwG8HDRrU9qWXXmp6zz33fBx/XeYf//jH1eedd96Bo0aN2qdv377fVeCUd0sy8wf4u8pB0vnA+QCtW7fOXLVqVYWPwR/GXj34w9jLjz+MveTK42HskmabWVaq250/f/7Kbt26fZ3qdl3VMH/+/L26devWJrHcryVwlYaZPWBmWWaW1bx58+J3cM4551yl5UGmc84555xLOQ8ynXPOOedcylXbIFMJmYEqa5uF9PNxuFu+3EkaIemcXdivqaTflceYnHPOOVf5Vdsgc3ckqWZ4+PxcM/s8le0Wts3MRpvZ2F1otingQaZzzjlXTVWrIFPStZKWSXoVaB9XvlNebUlNJK2UVCPUqS/pk/AMy2LzcKvo/OD3SJqhKId6r1DeQNLDivKdz5V0SigfLmmipOeBKUAecE7cPjvlY08YR2H93SjpAUlTiLIuHSDptTDe1yS1jqt3RWHHKZS3CHOcH15HEuVWb6sohehdinKxTwufc+Oe0+mcc865Kigdz8lMC0mZwJlEucNrAnOI0h5C9KzHEWb2vqTDgXvNrH94BuUxwBvAScDLZrZN0k71gf4JXY4lyvozVdLNRPnBLw3bGpjZkZKOJspT3oXoGZOvm9mvJTUF3gvBMEBvoGt
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "source": [
    "# 绘制maintainer: participant reason, satisfaction, motivate_extent的桑基图\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-21\r\n",
    "\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "import numpy\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from itertools import combinations\r\n",
    "from upsetplot import from_memberships\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "from upsetplot import generate_counts, plot\r\n",
    "from pyecharts.charts import  Sankey\r\n",
    "from pyecharts import options as opts\r\n",
    "from pyecharts_snapshot.main import make_a_snapshot\r\n",
    "from snapshot_phantomjs import snapshot\r\n",
    "from pyecharts.render import make_snapshot\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "def sankeyplot(table_name_list, column_names):\r\n",
    "    # read all the nodes for all tables\r\n",
    "    node_level = {} # 用于记录节点属于哪个level\r\n",
    "    node_author_dict_list = [] # values are the dict for each table and column\r\n",
    "    for i in range(len(table_name_list)):\r\n",
    "        node_author_dict = {} # key: node, value: author list\r\n",
    "        table_name = table_name_list[i]\r\n",
    "        column_name = column_names[i]\r\n",
    "\r\n",
    "        cur.execute(\"select login,\" + column_name + \" as value from \" + table_name + \" where \" + column_name + \" is not null\")\r\n",
    "        items = cur.fetchall()\r\n",
    "        for item in items:\r\n",
    "            login = item['login']\r\n",
    "            value = item['value']\r\n",
    "            if value.startswith(\"Other (please specify)\"):\r\n",
    "                value = \"Other_\" + str(i+1)\r\n",
    "            node_author_dict.setdefault(value, [])\r\n",
    "            node_author_dict[value].append(login)\r\n",
    "            node_level[value] = i+1\r\n",
    "        node_author_dict_list.append(node_author_dict)\r\n",
    "\r\n",
    "    # form nodes: \r\n",
    "    '''\r\n",
    "    [{'name': '遥控'}, {'name': '非遥控'}, {'name': '机器人'}, {'name': '猛击赛车'}, {'name': '莱肯赛车'}]\r\n",
    "    '''\r\n",
    "    nodes = []\r\n",
    "    for dict_list in node_author_dict_list:\r\n",
    "        for key in dict_list.keys(): \r\n",
    "            nodes.append({\"name\": key})\r\n",
    "    print(nodes)\r\n",
    "    # form links:\r\n",
    "    '''\r\n",
    "    [{'source': '遥控', 'target': '机器人', 'value': 15},{'source': '遥控', 'target': '猛击赛车', 'value': 23},{'source': '遥控', 'target': '莱肯赛车', 'value': 36},{'source': '非遥控', 'target': '机器人', 'value': 48},{'source': '非遥控', 'target': '猛击赛车', 'value': 21},{'source': '非遥控', 'target': '莱肯赛车', 'value': 11}]\r\n",
    "    '''\r\n",
    "    links = []\r\n",
    "    for i in range(len(node_author_dict_list)):\r\n",
    "        source_node_dict = node_author_dict_list[i]\r\n",
    "        for source_node, source_authors in source_node_dict.items():\r\n",
    "            for j in range(i+1, len(node_author_dict_list)):\r\n",
    "                target_node_dict = node_author_dict_list[j]\r\n",
    "                # 如果根据level target_node_level-source_node_level!=1 就不考虑了\r\n",
    "                if node_level[list(target_node_dict.keys())[0]] - node_level[list(source_node_dict.keys())[0]] != 1:\r\n",
    "                    continue\r\n",
    "                for target_node, target_authors in target_node_dict.items():\r\n",
    "                    num = len(set(source_authors) & set(target_authors))\r\n",
    "                    links.append({\"source\": source_node, \"target\": target_node, \"value\": num})\r\n",
    "    print(links)\r\n",
    "\r\n",
    "\r\n",
    "    pic=(\r\n",
    "        Sankey().add(\r\n",
    "            '',#图例名称\r\n",
    "            nodes,\r\n",
    "            links,\r\n",
    "            linestyle_opt=opts.LineStyleOpts(opacity=0.3,curve=0.5,color='source'),\r\n",
    "            label_opts=opts.LabelOpts(position='right'),\r\n",
    "            node_gap=30,\r\n",
    "        )\r\n",
    "        .set_global_opts(title_opts=opts.TitleOpts(title=''))\r\n",
    "    )\r\n",
    "    filename = \"_\".join(table_name_list)\r\n",
    "    pic.render(path=\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\")\r\n",
    "    make_a_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.pdf\")\r\n",
    "    # make_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \".html\",\".pdf\")\r\n",
    "\r\n",
    "sankeyplot([\"questionnaire_results_maintainer_2_set_up_reasons\", \"questionnaire_results_maintainer_2_basic\"], [\"value\", \"motivate_extent\"])"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[{'name': 'Neither satisfied nor dissatisfied'}, {'name': 'Satisfied'}, {'name': 'Very dissatisfied'}, {'name': 'Dissatisfied'}, {'name': 'Very satisfied'}, {'name': 'A little'}, {'name': 'A lot'}, {'name': 'A great deal'}, {'name': 'A moderate amount'}, {'name': 'Not at all'}]\n",
      "[{'source': 'Neither satisfied nor dissatisfied', 'target': 'A little', 'value': 48}, {'source': 'Neither satisfied nor dissatisfied', 'target': 'A lot', 'value': 48}, {'source': 'Neither satisfied nor dissatisfied', 'target': 'A great deal', 'value': 32}, {'source': 'Neither satisfied nor dissatisfied', 'target': 'A moderate amount', 'value': 64}, {'source': 'Neither satisfied nor dissatisfied', 'target': 'Not at all', 'value': 18}, {'source': 'Satisfied', 'target': 'A little', 'value': 10}, {'source': 'Satisfied', 'target': 'A lot', 'value': 21}, {'source': 'Satisfied', 'target': 'A great deal', 'value': 16}, {'source': 'Satisfied', 'target': 'A moderate amount', 'value': 20}, {'source': 'Satisfied', 'target': 'Not at all', 'value': 0}, {'source': 'Very dissatisfied', 'target': 'A little', 'value': 11}, {'source': 'Very dissatisfied', 'target': 'A lot', 'value': 23}, {'source': 'Very dissatisfied', 'target': 'A great deal', 'value': 30}, {'source': 'Very dissatisfied', 'target': 'A moderate amount', 'value': 18}, {'source': 'Very dissatisfied', 'target': 'Not at all', 'value': 7}, {'source': 'Dissatisfied', 'target': 'A little', 'value': 7}, {'source': 'Dissatisfied', 'target': 'A lot', 'value': 25}, {'source': 'Dissatisfied', 'target': 'A great deal', 'value': 17}, {'source': 'Dissatisfied', 'target': 'A moderate amount', 'value': 19}, {'source': 'Dissatisfied', 'target': 'Not at all', 'value': 1}, {'source': 'Very satisfied', 'target': 'A little', 'value': 1}, {'source': 'Very satisfied', 'target': 'A lot', 'value': 6}, {'source': 'Very satisfied', 'target': 'A great deal', 'value': 16}, {'source': 'Very satisfied', 'target': 'A moderate amount', 'value': 5}, {'source': 'Very satisfied', 'target': 'Not at all', 'value': 1}]\n"
     ]
    },
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "<ipython-input-38-de444cf2be5d>:88: RuntimeWarning: coroutine 'make_a_snapshot' was never awaited\n",
      "  make_a_snapshot(\"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.html\", \"C:\\\\Users\\\\zhang\\\\Documents\\\\Trustie-on-blockchain\\\\papers\\\\CSCW-2021-sponsor-assist\\\\pics\\\\\" + filename + \"_sankey.pdf\")\n",
      "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "source": [
    "# 统计ways for maintainers\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-22\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"Producing useful projects\",\r\n",
    "    \"Staying active and contributing more in the community\",\r\n",
    "    \"Advertising myself or my work to the community\",\r\n",
    "    \"Producing valuable code\",\r\n",
    "    \"Getting involved in popular projects\",\r\n",
    "    \"Getting involved in projects adopted by companies\",\r\n",
    "    \"Getting involved in long-term projects\",\r\n",
    "    \"Getting involved in less maintained yet important projects\",\r\n",
    "    \"Getting involved in projects led by companies\",\r\n",
    "    \"Providing localized content\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_how_get_more where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_how_get_more where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_how_get_more where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "62.6\n",
      "54.5\n",
      "54.1\n",
      "38.5\n",
      "29.1\n",
      "25.5\n",
      "21.6\n",
      "19.1\n",
      "8.8\n",
      "7.4\n",
      "3.6\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "source": [
    "# 统计ways for sponsors\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-22\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"Developers whose projects I benefit from\",\r\n",
    "    \"Developers whose projects I’m interested in\",\r\n",
    "    \"Developers who make important contributions\",\r\n",
    "    \"Developers who are active in community\",\r\n",
    "    \"Independent developers\",\r\n",
    "    \"Developers who haven’t received much sponsorship\",\r\n",
    "    \"Developers who are in hardship\",\r\n",
    "    \"Developers who I know\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_kinds where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_kinds where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_sponsor_kinds where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "85.1\n",
      "60.3\n",
      "50.9\n",
      "42.0\n",
      "31.1\n",
      "24.1\n",
      "18.7\n",
      "15.4\n",
      "1.0\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "source": [
    "# 分析maintainer给出的way中选择了多种情况的人所占的比例\r\n",
    "'''\r\n",
    "统计table_name中colname列出现了>=n个选项的有多少个login\r\n",
    "'''\r\n",
    "\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "def multi_choice_perc(table_name, colname, n):\r\n",
    "    cur.execute(\"select distinct login from \" + table_name + \" where \" + colname + \" is not null group by login having count(*)>=\" + str(n))\r\n",
    "    num = len(cur.fetchall())\r\n",
    "\r\n",
    "    cur.execute(\"select count(distinct login) as num from \" + table_name + \" where \" + colname + \" is not null\")\r\n",
    "    all_num = cur.fetchone()['num']\r\n",
    "\r\n",
    "    print(\"table: %s, col: %s, >=%s, perc: %s\" % (table_name, colname, str(n), str(round(num / all_num * 100, 1))))\r\n",
    "\r\n",
    "multi_choice_perc(\"questionnaire_results_maintainer_2_how_get_more\", \"value\", 5)"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "table: questionnaire_results_maintainer_2_how_get_more, col: value, >=2, perc: 85.6\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "source": [
    "# 计算maintainer shortcoming所占比例\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-26\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"It’s hard for others to discover me for sponsorship\",\r\n",
    "    \"I can’t interact with my sponsors on GitHub (e.g., for expressing appreciation)\",\r\n",
    "    \"Lack of a wide range of payment options (e.g., one-time/yearly/quarterly payment)\",\r\n",
    "    \"GitHub does not distinctly mark my sponsors (e.g., I cannot easily tell whether an issue submitter is my sponsor)\",\r\n",
    "    \"I have to pay taxes\",\r\n",
    "    \"None. It's perfect to me\",\r\n",
    "    \"It is not supported in many regions\",\r\n",
    "    \"I can’t declare how I dealt with the received money\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_shortcomings where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_shortcomings where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_maintainer_2_shortcomings where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "51.3\n",
      "29.4\n",
      "25.1\n",
      "20.7\n",
      "19.3\n",
      "13.1\n",
      "11.0\n",
      "10.1\n",
      "9.4\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# 计算sponsor shortcoming所占比例\r\n",
    "# author: zxh\r\n",
    "# date: 2021-07-26\r\n",
    "import pymysql, yaml\r\n",
    "from utils import *\r\n",
    "import seaborn as sns\r\n",
    "import pandas as pd\r\n",
    "from matplotlib import pyplot as plt\r\n",
    "\r\n",
    "f = open('config.yaml', 'r')\r\n",
    "config = yaml.load(f.read(), Loader=yaml.BaseLoader)\r\n",
    "conn = connectMysqlDB(config, autocommit = False)\r\n",
    "cur = conn.cursor(pymysql.cursors.DictCursor)\r\n",
    "\r\n",
    "all_reasons = [\r\n",
    "    \"I cannot assess how urgently a developer needs to be sponsored\",\r\n",
    "    \"None. It’s perfect\",\r\n",
    "    \"It’s hard for me to find the developer I should sponsor\",\r\n",
    "    \"It is not supported in many regions\",\r\n",
    "    \"I can’t interact with the developer I sponsored on GitHub\",\r\n",
    "    \"I’m not distinctly marked in the projects whose maintainers have been sponsored by me (e.g., when I submit an issue)\",\r\n",
    "    \"Other\"\r\n",
    "]\r\n",
    "\r\n",
    "cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_shortcomings where value is not null\")\r\n",
    "all_num = cur.fetchone()[\"num\"]\r\n",
    "\r\n",
    "for reason in all_reasons:\r\n",
    "    if reason == \"Other\":\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_shortcomings where value like 'Other (please specify)%'\")\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    else:\r\n",
    "        cur.execute(\"select count(distinct login) as num from questionnaire_results_sponsor_2_shortcomings where value=%s\", (reason,))\r\n",
    "        num = cur.fetchone()[\"num\"]\r\n",
    "    print(round(num / all_num * 100, 1))"
   ],
   "outputs": [],
   "metadata": {}
  }
 ]
}