UCI糖尿病数据利用逻辑回归算法进行训练和预测
jupyter
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"diabetes_data = pd.read_csv('diabetes.csv')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pregnancies</th>\n",
" <th>Glucose</th>\n",
" <th>BloodPressure</th>\n",
" <th>SkinThickness</th>\n",
" <th>Insulin</th>\n",
" <th>BMI</th>\n",
" <th>DiabetesPedigreeFunction</th>\n",
" <th>Age</th>\n",
" <th>Outcome</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6</td>\n",
" <td>148</td>\n",
" <td>72</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>33.6</td>\n",
" <td>0.627</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>85</td>\n",
" <td>66</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>26.6</td>\n",
" <td>0.351</td>\n",
" <td>31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>183</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>23.3</td>\n",
" <td>0.672</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>23</td>\n",
" <td>94</td>\n",
" <td>28.1</td>\n",
" <td>0.167</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>137</td>\n",
" <td>40</td>\n",
" <td>35</td>\n",
" <td>168</td>\n",
" <td>43.1</td>\n",
" <td>2.288</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 6 148 72 35 0 33.6 \n",
"1 1 85 66 29 0 26.6 \n",
"2 8 183 64 0 0 23.3 \n",
"3 1 89 66 23 94 28.1 \n",
"4 0 137 40 35 168 43.1 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diabetes_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def sigmoid(z):\n",
" return 1 / (1 + np.exp(-z))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sigmoid(0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x1dd0d80a908>]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHelJREFUeJzt3Xt8VPWd//HXh4SLCoiCpCoWoaAuVrcaxKK2QnUtUITaFcSu1l7Z/rr0113b3wraosW2q7v667Yr2iurpR0Q6w01Sisk+rCKAq2goEikXrIgeEMIKBDy2T++ExziJDOZ25nL+/l4nEfOmfOdyZuTySdfvnPO+Zq7IyIi5aVb1AFERCT3VNxFRMqQiruISBlScRcRKUMq7iIiZUjFXUSkDKm4i4iUIRV3EZEypOIuIlKGqqP6xgMGDPBjjz02o+fu3LmTQw45JLeBckj5sqN82Sv2jMqXuVWrVr3h7kekbOjukSy1tbWeqfr6+oyfWwjKlx3ly16xZ1S+zAErPY0aq2EZEZEypOIuIlKGVNxFRMqQiruISBlScRcRKUMpi7uZzTOzrWb2bAf7zcx+amaNZrbGzE7NfUwREemKdHrutwLjOtk/HhgeX6YDt2QfS0REspHyIiZ3f9TMju2kyWTgN/HzL5ebWT8zO9LdN+coo4hI4A7vvgu7d0NLC+zd27Wl7TmtrQcu7gdsH/X88/Dssx9s19Y2cWnL1ZX188+H007L66HKxRWqRwOvJmw3xR/7QHE3s+mE3j01NTU0NDRk9A2bm5szfm4hKF92lC97xZqxescOem3eTM/XX+eZxx6jetcuqnbt2v815fq772KtrXnPeVweX9vN2NDczKadO/P4XXJT3C3JY0ln3Xb3XwC/ABg5cqSPGTMmo2/Y0NBAps8tBOXLjvJlL7KM7rB1KzQ2huXFFw9cf+utjp9bVQV9+0KfPmHp2xf6939/ve3x3r2hZ0/o3r3rS3V1+FpVBd26hcXs/fX48qfHH+fMT3ziA49j9n57eH873XVCwTyO/P4BgdwU9ybgmITtQcCmHLyuiBSj1lZoajqweCcW8cQeabduMHgwDBsGF10EH/kIDB3KX15+mVPOPvvAwt2r1wFFMEp7DzsMBgyIOkZWclHcFwMzzGwhcDrwjsbbRcpMSwssXQrz58M99xxYwHv0gCFDQgEfMyYU8GHDwjJ4cNjfzjsNDXDKKQWLX4lSFnczWwCMAQaYWRNwNdAdwN1/BtQBE4BGYBfwpXyFFZECcofVq0NBj8XgtdegXz/4/Oehtvb9Aj5oUBjmkKKSztkyF6fY78A/5SyRiESrqSkU8/nzwxkj3bvDZz4Dl14avvbsGXVCSUNk93MXkSKyYwfcdVco6MuWhV776NFw880wdWr4YFNKioq7SKVqaYGHHw4F/e67w/njQ4fC7NlwySVhyEVKloq7SKVZvRpuuy0MvWzZAocdBpddFoZdRo8umjNWJDsq7iKVwh1+8IPQM+/eHSZODAV9wgSNo5chFXeRStDSAt/4Bvzyl6Gg/+d/wuGHR51K8kjFXaTc7dwZLiB64AG48srQe9fQS9lTcRcpZ1u3huGXVavgllvg61+POpEUiIq7SLlqbIRx42DTpnA2zKRJUSeSAlJxFylDfdatgylTwsayZfDxj0cbSApO0+yJlJv77uNjl18ebsj1+OMq7BVKxV2knPz85/DZz7Lz2GNDYR8+POpEEhEVd5Fy4A7f/W74wHTcOFb/+MdQUxN1KomQirtIqdu7F770JfjhD+ErX4F772XfQQdFnUoipuIuUsp27AinOt52G1xzTbhIqVrnSYjOlhEpXZs3h1vwrlkDv/41fPnLUSeSIqLiLlKKnn8+nMP+xhtw330wfnzUiaTIqLiLlJo//SlckFRdDQ0NMHJk1ImkCGnMXaSUrFkD554bJs944gkVdumQeu4ipeRf/xUOOggeewwGDow6jRQxFXeRUrF0KSxZAjfcoMIuKWlYRqQUtLbCFVfAhz8M/6T56CU19dxFSsGiReG2vbfdBr16RZ1GSoB67iLFbs8euOoqOOkk+Id/iDqNlAj13EWK3c9/Dhs3Ql0dVFVFnUZKhHruIsVs+3aYMwfGjg0XLYmkScVdpJjdcEO4CvX66zXvqXSJirtIsXrtNbjxRpg6FU47Leo0UmJU3EWK1fe/Hz5M/eEPo04iJUjFXaQYvfBCuH3vP/4jDBsWdRopQSruIsXoyivDbQa+972ok0iJSqu4m9k4M1tvZo1mNjPJ/g+bWb2Z/cXM1pjZhNxHFakQy5fDnXfCd76jqfIkYymLu5lVAXOB8cAI4GIzG9Gu2XeBRe5+CjANuDnXQUUqgnu4zcDAgXD55VGnkRKWzkVMo4BGd98IYGYLgcnAuoQ2DvSNrx8KbMplSJGKUVcHjz4Kc+dCnz5Rp5ESlk5xPxp4NWG7CTi9XZtrgD+Y2TeBQ4Bzc5JOpJLs2wczZ4YPUL/2tajTSIkzd++8gdkU4NPu/tX49qXAKHf/ZkKby+OvdaOZjQZ+DXzU3VvbvdZ0YDpATU1N7cKFCzMK3dzcTO/evTN6biEoX3YqNd+HHnqIE66/nrVXX83rY8Zk9VqVegxzpZjzjR07dpW7p56lxd07XYDRwJKE7VnArHZt1gLHJGxvBAZ29rq1tbWeqfr6+oyfWwjKl52KzLdrl/ugQe6jRrm3tmb9chV5DHOomPMBKz1F3Xb3tM6WWQEMN7MhZtaD8IHp4nZtXgHOATCzvwF6Aa+n8doiAnDTTdDUpNsMSM6kLO7u3gLMAJYAzxHOillrZnPMbFK82beBr5nZamAB8MX4XxgRSeWtt+BHP4IJEyDL4RiRNmnd8tfd64C6do/NTlhfB5yZ22giFeK66+Cdd+Df/i3qJFJGdIWqSJReeQV++lP4whfg5JOjTiNlRMVdJEpXXx2+zpkTbQ4pOyruIlF55pkwJ+o3vxkmvhbJIRV3kajMnAmHHgqzZkWdRMqQ5lAViUJDQ7jVwPXXw+GHR51GypB67iKF1nZzsEGDwpCMSB6o5y5SaHfeCU89BfPmhXu2i+SBeu4ihbR3b5iI48QTw+mPInminrtIId1zD2zYEL5WVUWdRsqYeu4ihRSLwZFHwsSJUSeRMqfiLlIo27aFM2Quuki9dsk7FXeRQrnrLtizBz7/+aiTSAVQcRcplFgszLI0MvU8CyLZUnEXKYTNm2HZstBr1/3apQBU3EUKYdGicPHSxRdHnUQqhIq7SCHEYnDKKXDCCVEnkQqh4i6Sb42N4YpUfZAqBaTiLpJvCxaEcfZp06JOIhVExV0kn9zDkMwnPxluFCZSICruIvm0ejU8/7w+SJWCU3EXyadYDKqr4cILo04iFUbFXSRfWlvDePu4cdC/f9RppMKouIvky2OPQVOTzpKRSKi4i+TLggVw8MEwaVLUSaQCqbiL5MOePeGq1MmT4ZBDok4jFUjFXSQf/vhHeOstDclIZFTcRfIhFoPDD4fzzos6iVQoFXeRXNu5E+69F6ZMgR49ok4jFUrFXSTX7rsvFHhduCQRUnEXybVYDI4+Gj7xiaiTSAVTcRfJpTffhAcfDL32bvr1kuik9e4zs3Fmtt7MGs1sZgdtpprZOjNba2ax3MYUKRF33gktLTpLRiJXnaqBmVUBc4G/A5qAFWa22N3XJbQZDswCznT3t81sYL4CixS1BQvg+OPhYx+LOolUuHR67qOARnff6O57gIXA5HZtvgbMdfe3Adx9a25jipSApiZ45BHNkypFwdy98wZmFwLj3P2r8e1LgdPdfUZCm3uAF4AzgSrgGnd/KMlrTQemA9TU1NQuXLgwo9DNzc307t07o+cWgvJlp1TzDVq0iGG33MKT8+fzbsT3bi/VY1gsijnf2LFjV7n7yJQN3b3TBZgC/Cph+1Lgv9q1uR+4G+gODCEM3/Tr7HVra2s9U/X19Rk/txCULzslm+/UU91PO62gWTpSssewSBRzPmClp6jb7p7WsEwTcEzC9iBgU5I297r7Xnf/K7AeGJ7Ga4uUh/Xr4c9/1gepUjTSKe4rgOFmNsTMegDTgMXt2twDjAUwswHAccDGXAYVKWpt86ROnRp1EhEgjeLu7i3ADGAJ8BywyN3XmtkcM2u7l+kS4E0zWwfUA//P3d/MV2iRotI2T+rYsXDUUVGnEQHSOBUSwN3rgLp2j81OWHfg8vgiUllWrYING+CKK6JOIrKfLqETyVYsFm4Q9rnPRZ1EZD8Vd5Fs7NsHt98O48fDYYdFnUZkPxV3kWw8+ihs2qSzZKToqLiLZCMWg969YeLEqJOIHEDFXSRTu3fD738PF1wQJsIWKSIq7iKZWrIEtm3TkIwUJRV3kUzFYjBgAJxzTtRJRD5AxV0kEzt2wOLF4YrU7t2jTiPyASruIpm49154910NyUjRUnEXycSCBTB4MIweHXUSkaRU3EW6qPu2beHD1GnTNE+qFC29M0W66IhHHglXpmpIRoqYirtIFw1cuhROPBFOOinqKCIdUnEX6YpXXqHfM89onlQpeiruIl3RNu/vtGnR5hBJQcVdpCtiMd4ZMQKGDo06iUinVNxF0rV2LaxezVZdkSolQMVdJF0LFkC3bmwdMybqJCIpqbiLpMM9FPdzz2Xv4YdHnUYkJRV3kXQ89RRs3AgXXxx1EpG0qLiLpCMWg549w73bRUqAirtIKi0tYZ7UiRPh0EOjTiOSFhV3kVQaGmDLFt1uQEqKirtIKrEY9O0LEyZEnUQkbSruIp157z2480743OegV6+o04ikTcVdpDN1dbB9u4ZkpOSouIt0JhaDmhoYOzbqJCJdouIu0pHt2+H+++Gii6C6Ouo0Il2i4i7Skbvvht27deGSlCQVd5GOxGIwZAicfnrUSUS6LK3ibmbjzGy9mTWa2cxO2l1oZm5mI3MXUSQCW7bAww9rUg4pWSmLu5lVAXOB8cAI4GIzG5GkXR/g/wJP5jqkSMHdcQe0tuosGSlZ6fTcRwGN7r7R3fcAC4HJSdpdC/w78F4O84lEIxaDk0+GER/ox4iUhHSK+9HAqwnbTfHH9jOzU4Bj3P3+HGYTicbGjfDEE+q1S0lL5/yuZAOOvn+nWTfgx8AXU76Q2XRgOkBNTQ0NDQ1phWyvubk54+cWgvJlJ+p8H/7tbxkKPDF4MLuT5Ig6XzqKPaPyFYC7d7oAo4ElCduzgFkJ24cCbwAvxZf3gE3AyM5et7a21jNVX1+f8XMLQfmyE3m+E090P+usDndHni8NxZ5R+TIHrPQUddvd0xqWWQEMN7MhZtYDmAYsTvjj8I67D3D3Y939WGA5MMndV+bij49IQT3zTJgrVee2S4lLWdzdvQWYASwBngMWuftaM5tjZpPyHVCkoGIxqKqCKVOiTiKSlbSuqXb3OqCu3WOzO2g7JvtYIhFobQ3zpJ53HhxxRNRpRLKiK1RF2jzxBLz8ss6SkbKg4i7SZsECOOggmJzsMg6R0qLiLgKwdy8sWgTnnw99+kSdRiRrKu4iAEuXwuuva0hGyoaKuwiEs2T69YNx46JOIpITKu4iu3aFe7dfeCH07Bl1GpGcUHEXeeABaG7WhUtSVlTcRWIxOPJIOPvsqJOI5IyKu1S2t9+GujqYNi1cmSpSJlTcpbLddRfs2aOzZKTsqLhLZVuwAIYNg9raqJOI5JSKu1SuzZth2TLNkyplScVdKtftt4O7zpKRsqTiLpUrFoNTT4UTTog6iUjOqbhLZdqwAVas0AepUrZU3KUyLVwYxtkvuijqJCJ5oeIulccdfvc7+OQnYdCgqNOI5IWKu1Sep5+G9es1JCNlTcVdKk8sBt27w9//fdRJRPJGxV0qS2trGG//9Kehf/+o04jkjYq7VJaHH4amJg3JSNlTcZfK4Q5XXQXHHAMXXBB1GpG8qo46gEjB3HEHrFwJt94KvXpFnUYkr9Rzl8qwdy9ceSWcdBJccknUaUTyTj13qQy/+AW8+GKYdUn3bZcKoJ67lL8dO2DOnDDT0vjxUacRKQj13KX83XgjbN0K992nW/tKxVDPXcrbli1www1w4YUwalTUaUQKRsVdytucObB7N/zoR1EnESkoFXcpXxs2hA9Sp0+H4cOjTiNSUCruUr6uugp69oTZs6NOIlJwaRV3MxtnZuvNrNHMZibZf7mZrTOzNWa21MwG5z6qSBc89VS4aOk734GamqjTiBRcyuJuZlXAXGA8MAK42MxGtGv2F2Cku58M/B7491wHFUmbO1xxBQwcCN/+dtRpRCKRTs99FNDo7hvdfQ+wEJic2MDd6919V3xzOaAZECQ6Dz0EDQ1hOKZPn6jTiETC3L3zBmYXAuPc/avx7UuB0919RgftbwJec/cfJNk3HZgOUFNTU7tw4cKMQjc3N9O7d++MnlsIypedrPLt28fI6dOpeu89nrr1Vrx799yGo/iPHxR/RuXL3NixY1e5+8iUDd290wWYAvwqYftS4L86aHsJoefeM9Xr1tbWeqbq6+szfm4hKF92ssp3663u4H777TnL016xHz/34s+ofJkDVnqK+uruaV2h2gQck7A9CNjUvpGZnQtcBZzt7rvTeF2R3HrvPfje92DkyHDRkkgFS6e4rwCGm9kQ4H+AacABMx2Y2SnAzwnDN1tznlIkHTfdBK++CrfdBt10lq9UtpS/Ae7eAswAlgDPAYvcfa2ZzTGzSfFm/wH0Bu4ws6fNbHHeEosk8/bb4SrUceNg7Nio04hELq0bh7l7HVDX7rHZCevn5jiXSNdcdx1s2xa+ioiuUJUy8Oqr8JOfhEk4/vZvo04jUhRU3KX0XX11uHDp2mujTiJSNFTcpbQ9+2z4AHXGDBisu16ItFFxl9I2a1a4CvXKK6NOIlJUVNyldD36KNx/P8ycCf37R51GpKiouEtpars52NFHw7e+FXUakaKjOVSlNN19NyxfDr/6FRx0UNRpRIqOeu5SevbuDWPtI0bAZZdFnUakKKnnLqVn3jx44QW4916o1ltYJBn13KW0NDfDNdfAWWfB+edHnUakaKnbI6XjjTdg0iTYsgXuugvMok4kUrRU3KU0bNwI48fDyy+HuVFHj446kUhRU3GX4rdqFUyYED5IXboUzjwz6kQiRU9j7lLcHnwQzj47nO74+OMq7CJpUnGX4jVvXvjQ9Ljj4Ikn4IQTok4kUjJU3KX4uMP3vw9f+Qqccw488ggceWTUqURKisbcpbi0tHDcjTfCAw+EC5R++Uvo3j3qVCIlRz13KR7NzTB5Mkc98AB897vw3/+twi6SIRV3KQ5btoS5Tx96iPX/8i9h4g2dxy6SMQ3LSPReeCGcw755M9xzD5v79OH4qDOJlDj13CVay5fDGWfA9u3Q0KBbCojkiIq7RGfxYvjUp6Bfv3Cq46hRUScSKRsq7hKNW26BCy6Aj340XJw0bFjUiUTKioq7FFZLS5jv9BvfCLcUqK+HgQOjTiVSdvSBquSfOzz9NMyfDwsWwGuvwfTpMHeu7scukif6zZL8aWqCWCwU9WefDeesf+Yz8MUvhlv36lRHkbxRcZfc2rEj3Gt9/nxYtiz02kePhptvhqlToX//qBOKVAQVd8leSwv88Y/w29+GiavffReGDoXZs+GSS/RhqUgEVNwlM+7wl7+8P46+ZQscdli4H8yll4beuoZdRCKj4i7p2bcPXn0VGhthxYrQS1+3LoyjT5wYCvqECdCzZ9RJRYQ0i7uZjQN+AlQBv3L369rt7wn8BqgF3gQucveXchtV8m7PHnjppVDAGxvhxRffX//rX8NMSG3OOCOcqz51Khx+eGSRRSS5lMXdzKqAucDfAU3ACjNb7O7rEpp9BXjb3YeZ2TTgeuCifASWDLW2hrsu7tjBIRs3wrZtHyzgr7wS2rXp3TuMl590UrjgaNgw+MhH4Pjj4aijovu3iEhK6fTcRwGN7r4RwMwWApOBxOI+Gbgmvv574CYzM3f3HGYtD+6hgO7de+DS0vLBxzpa9uwJZ6Vs3x6+prPe3Lw/wmmJefr3DwX7jDPgC18I621FfOBAjZuLlKh0ivvRwKsJ203A6R21cfcWM3sH6A+8kYuQB5g3j9PmzIGDDw7b7f9+JPt7kqxN22Op1jva19oalsT1+HJWS0soiomPt7XLx9+7Hj2gb1/o0ycsffvCEUeEM1baHk/Yv3bTJk6cODEU8MMOy30eEYlcOsU9WdetfYVKpw1mNh2YDlBTU0NDQ0Ma3/5A/Tdvpv+gQexMvLKxfe8ySW/Tk/VA2x4zO3B/Z4/Ht71bt/f3d+t2wNc9LS1079kz7O8Wv8NDuzZeXY1XV9NaVYVXVe3f3v9YZ9vV1ew7+GD2HXwwLQcdhPfo0aVj2HzUUbze3AyrV3fpeYXS3Nyc0XujUIo9HxR/RuUrAHfvdAFGA0sStmcBs9q1WQKMjq9XE3rs1tnr1tbWeqbq6+szfm4hKF92lC97xZ5R+TIHrPQUddvd07px2ApguJkNMbMewDRgcbs2i4HL4usXAsviIUREJAIph2U8jKHPIPTOq4B57r7WzOYQ/oIsBn4NzDezRuAtwh8AERGJSFrnubt7HVDX7rHZCevvAVNyG01ERDKl+7mLiJQhFXcRkTKk4i4iUoZU3EVEypCKu4hIGbKoTkc3s9eBlzN8+gDycWuD3FG+7Chf9oo9o/JlbrC7H5GqUWTFPRtmttLdR0adoyPKlx3ly16xZ1S+/NOwjIhIGVJxFxEpQ6Va3H8RdYAUlC87ype9Ys+ofHlWkmPuIiLSuVLtuYuISCeKtrib2RQzW2tmrWY2st2+WWbWaGbrzezTHTx/iJk9aWYbzOz2+O2K85X1djN7Or68ZGZPd9DuJTN7Jt5uZb7yJPm+15jZ/yRknNBBu3HxY9poZjMLmO8/zOx5M1tjZnebWb8O2hX0+KU6HmbWM/6zb4y/147Nd6aE732MmdWb2XPx35NvJWkzxszeSfi5z072WnnM2OnPy4Kfxo/fGjM7tYDZjk84Lk+b2XYz++d2bSI9fllL56bvUSzA3wDHAw3AyITHRwCrgZ7AEOBFoCrJ8xcB0+LrPwP+T4Fy3wjM7mDfS8CACI7lNcB3UrSpih/LoUCP+DEeUaB85wHV8fXrgeujPn7pHA/gG8DP4uvTgNsL+DM9Ejg1vt4HeCFJvjHA/YV+v6X78wImAA8SZnL7OPBkRDmrgNcI548XzfHLdinanru7P+fu65PsmgwsdPfd7v5XoJEwifd+ZmbApwiTdQPcBnw2n3kTvu9UYEG+v1ce7J8I3d33AG0Toeedu//B3Vvim8uBQYX4vimkczwmE95bEN5r58TfA3nn7pvd/c/x9R3Ac4S5jEvJZOA3HiwH+pnZkRHkOAd40d0zvaiyKBVtce9Esgm727+p+wPbEgpGsjb58Algi7tv6GC/A38ws1Xx+WQLaUb8v77zzCzZrNjpHNdC+DKhN5dMIY9fOsfjgInhgbaJ4QsqPhx0CvBkkt2jzWy1mT1oZicWNFjqn1exvOem0XGHLMrjl5W0JuvIFzN7GPhQkl1Xufu9HT0tyWMZTdjdFWlmvZjOe+1nuvsmMxsI/NHMnnf3R7PJlU4+4BbgWsIxuJYwdPTl9i+R5Lk5O5UqneNnZlcBLcDvOniZvB2/JCJ5n3WVmfUG7gT+2d23t9v9Z8JQQ3P8c5Z7gOEFjJfq51UMx68HMIkwN3R7UR+/rERa3N393Aye1gQck7A9CNjUrs0bhP/iVcd7VMnadEmqrGZWDXwOqO3kNTbFv241s7sJ//XPSXFK91ia2S+B+5PsSue4ZiyN43cZMBE4x+MDnkleI2/HL4l0jkdbm6b4z/9QwjSTBWFm3QmF/Xfuflf7/YnF3t3rzOxmMxvg7gW5Z0oaP6+8vufSNB74s7tvab8j6uOXrVIcllkMTIufqTCE8Jf0qcQG8eJQT5isG8Lk3R39TyBXzgWed/emZDvN7BAz69O2TvgQ8dk8Z2r73onjmBd08H3TmQg9X/nGAVcAk9x9VwdtCn38inpi+PjY/q+B59z9/3fQ5kNtnwGY2SjC7/ubBcqXzs9rMfCF+FkzHwfecffNhciXoMP/bUd5/HIi6k90O1oIRagJ2A1sAZYk7LuKcCbDemB8wuN1wFHx9aGEot8I3AH0zHPeW4Gvt3vsKKAuIc/q+LKWMBxRqGM5H3gGWEP4hTqyfb749gTCWRcvFjhfI2Hs9en48rP2+aI4fsmOBzCH8EcIoFf8vdUYf68NLeAxO4swhLEm4bhNAL7e9j4EZsSP1WrCB9VnFDBf0p9Xu3wGzI0f32dIOCuuQBkPJhTrQxMeK4rjl4tFV6iKiJShUhyWERGRFFTcRUTKkIq7iEgZUnEXESlDKu4iImVIxV1EpAypuIuIlCEVdxGRMvS/Tq7yTBjr6swAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"xx = np.arange(-10,10,step=1)\n",
"yy = sigmoid(xx)\n",
"plt.grid()\n",
"plt.plot(xx, yy, 'r')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def logistic_model(X, theta):\n",
" temp = np.dot(X,theta.T)\n",
" return sigmoid(temp)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"diabetes_data.insert(0,'Ones',1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ones</th>\n",
" <th>Pregnancies</th>\n",
" <th>Glucose</th>\n",
" <th>BloodPressure</th>\n",
" <th>SkinThickness</th>\n",
" <th>Insulin</th>\n",
" <th>BMI</th>\n",
" <th>DiabetesPedigreeFunction</th>\n",
" <th>Age</th>\n",
" <th>Outcome</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>148</td>\n",
" <td>72</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>33.6</td>\n",
" <td>0.627</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>85</td>\n",
" <td>66</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>26.6</td>\n",
" <td>0.351</td>\n",
" <td>31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>183</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>23.3</td>\n",
" <td>0.672</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>23</td>\n",
" <td>94</td>\n",
" <td>28.1</td>\n",
" <td>0.167</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>137</td>\n",
" <td>40</td>\n",
" <td>35</td>\n",
" <td>168</td>\n",
" <td>43.1</td>\n",
" <td>2.288</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Ones Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 1 6 148 72 35 0 33.6 \n",
"1 1 1 85 66 29 0 26.6 \n",
"2 1 8 183 64 0 0 23.3 \n",
"3 1 1 89 66 23 94 28.1 \n",
"4 1 0 137 40 35 168 43.1 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diabetes_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"orig_data = diabetes_data.values\n",
"cols = orig_data.shape[1]\n",
"X = orig_data[:,0:cols-1] #X大写,表示是一个矩阵\n",
"y = orig_data[:,cols-1:cols] #y小写,表示矢量\n",
"theta = np.zeros([1,cols-1]) #theta设为行向量"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0., 0., 0., 0., 0., 0., 0., 0., 0.]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"theta"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def cost(X, y, theta):\n",
" item1 = np.multiply(y, np.log(logistic_model(X,theta))) \n",
" item2 = np.multiply(1-y, np.log(1 - logistic_model(X,theta))) \n",
" return np.sum(item1 - item2) / (len(X))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.20938821079415015"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cost(X, y, theta)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def gradient(X, y, theta):\n",
" grad = np.zeros(theta.shape) #一个梯度是对theta求导的\n",
" error = (logistic_model(X,theta) - y).ravel()\n",
" for j in range(len(theta.ravel())):\n",
" temp = np.multiply(error, X[:,j])\n",
" grad[0,j] = np.sum(temp) / len(X)\n",
" return grad"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"#拟合 fit,求模型的参数,也称为训练的过程,\n",
"def fit(X, y, theta, iter_num = 5000, alpha=0.00001):\n",
" #梯度下降求解\n",
" i = 0 # 迭代次数\n",
" grad = np.zeros(theta.shape) # 计算的梯度\n",
" costs = [cost(X, y, theta)] # 损失值\n",
" while True:\n",
" grad = gradient(X, y, theta)\n",
" theta = theta - alpha*grad # 参数更新\n",
" costs.append(cost(X, y, theta)) # 计算新的损失\n",
" i += 1 \n",
" if i % (iter_num / 10) == 0: print(costs[i])\n",
" if i > iter_num: break\n",
" \n",
" return theta"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in log\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"D:\\Anaconda\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in multiply\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-5.511409235893268\n",
"-5.31859458354884\n",
"-5.063153190867181\n",
"-4.696751678117883\n",
"-4.074011252701483\n",
"-3.902875016698084\n",
"-3.744762960211906\n",
"-3.601077279719243\n",
"-3.4711575061505564\n",
"-3.352062637279824\n"
]
}
],
"source": [
"# 调参,只能根据经验来,炼丹\n",
"theta = fit(X, y, theta,iter_num = 500000, alpha=0.0015)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda\\lib\\site-packages\\sklearn\\model_selection\\_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
" FutureWarning)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = .8)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(614, 9)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"theta = np.zeros([1,cols-1])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in log\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"D:\\Anaconda\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in multiply\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-3.4953304853973584\n",
"-4.075793679628199\n",
"-3.7134641993740907\n",
"-3.402073232763518\n",
"-3.155158733286961\n",
"-2.9550791865014996\n",
"-2.785268118118423\n",
"-2.6323045981679933\n",
"-2.4989764065989086\n",
"-2.3919272008733246\n"
]
}
],
"source": [
"theta = fit(X_train, y_train, theta,iter_num = 1000000, alpha=0.0015)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# 测试过程\n",
"def predict(X,theta, threshold = 0.5):\n",
" p = logistic_model(X, theta)\n",
" #分类,如果这个概率大于0.5,分类为1,否则为0\n",
" y = np.where(p > threshold, 1, 0)\n",
" return y"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# shift + enter\n",
"myPredict_y = predict(X_test, theta, threshold = 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>MyPredict</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>152</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>154 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 MyPredict\n",
"0 0.0 0\n",
"1 1.0 1\n",
"2 1.0 1\n",
"3 0.0 0\n",
"4 0.0 0\n",
"5 0.0 1\n",
"6 1.0 1\n",
"7 1.0 1\n",
"8 0.0 0\n",
"9 1.0 1\n",
"10 0.0 0\n",
"11 0.0 1\n",
"12 0.0 0\n",
"13 1.0 1\n",
"14 0.0 1\n",
"15 0.0 1\n",
"16 0.0 0\n",
"17 0.0 0\n",
"18 0.0 1\n",
"19 1.0 1\n",
"20 0.0 1\n",
"21 1.0 1\n",
"22 0.0 0\n",
"23 0.0 1\n",
"24 0.0 1\n",
"25 0.0 0\n",
"26 0.0 0\n",
"27 1.0 1\n",
"28 1.0 1\n",
"29 0.0 0\n",
".. ... ...\n",
"124 1.0 1\n",
"125 1.0 1\n",
"126 1.0 1\n",
"127 1.0 1\n",
"128 0.0 1\n",
"129 1.0 0\n",
"130 0.0 1\n",
"131 0.0 0\n",
"132 1.0 1\n",
"133 0.0 1\n",
"134 0.0 1\n",
"135 1.0 1\n",
"136 1.0 1\n",
"137 1.0 1\n",
"138 0.0 0\n",
"139 0.0 1\n",
"140 0.0 0\n",
"141 0.0 0\n",
"142 0.0 1\n",
"143 0.0 1\n",
"144 0.0 0\n",
"145 0.0 0\n",
"146 0.0 0\n",
"147 0.0 0\n",
"148 0.0 0\n",
"149 0.0 0\n",
"150 0.0 1\n",
"151 1.0 1\n",
"152 0.0 1\n",
"153 0.0 1\n",
"\n",
"[154 rows x 2 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 要看预测的值和真实值y_test之间的差别\n",
"yy = pd.DataFrame(y_test)\n",
"yy[\"MyPredict\"] = myPredict_y\n",
"yy\n",
"#20个数据对了16个, 80%准确率"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\Anaconda\\lib\\site-packages\\sklearn\\utils\\validation.py:578: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
]
},
{
"data": {
"text/plain": [
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
" penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 看sklearn的函数的表现\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"lr = LogisticRegression()\n",
"\n",
"lr.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"lr_predict = lr.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,\n",
" 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,\n",
" 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,\n",
" 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1.,\n",
" 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,\n",
" 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,\n",
" 1.])"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr_predict"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>MyPredict</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151</th>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>152</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153</th>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>154 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 MyPredict\n",
"0 0.0 0\n",
"1 1.0 1\n",
"2 1.0 1\n",
"3 0.0 0\n",
"4 0.0 0\n",
"5 0.0 1\n",
"6 1.0 1\n",
"7 1.0 1\n",
"8 0.0 0\n",
"9 1.0 1\n",
"10 0.0 0\n",
"11 0.0 1\n",
"12 0.0 0\n",
"13 1.0 1\n",
"14 0.0 1\n",
"15 0.0 1\n",
"16 0.0 0\n",
"17 0.0 0\n",
"18 0.0 1\n",
"19 1.0 1\n",
"20 0.0 1\n",
"21 1.0 1\n",
"22 0.0 0\n",
"23 0.0 1\n",
"24 0.0 1\n",
"25 0.0 0\n",
"26 0.0 0\n",
"27 1.0 1\n",
"28 1.0 1\n",
"29 0.0 0\n",
".. ... ...\n",
"124 1.0 1\n",
"125 1.0 1\n",
"126 1.0 1\n",
"127 1.0 1\n",
"128 0.0 1\n",
"129 1.0 0\n",
"130 0.0 1\n",
"131 0.0 0\n",
"132 1.0 1\n",
"133 0.0 1\n",
"134 0.0 1\n",
"135 1.0 1\n",
"136 1.0 1\n",
"137 1.0 1\n",
"138 0.0 0\n",
"139 0.0 1\n",
"140 0.0 0\n",
"141 0.0 0\n",
"142 0.0 1\n",
"143 0.0 1\n",
"144 0.0 0\n",
"145 0.0 0\n",
"146 0.0 0\n",
"147 0.0 0\n",
"148 0.0 0\n",
"149 0.0 0\n",
"150 0.0 1\n",
"151 1.0 1\n",
"152 0.0 1\n",
"153 0.0 1\n",
"\n",
"[154 rows x 2 columns]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"yy"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}