Untitled.ipynb

YI

Uploaded on: April 21, 2019, 10:22 a.m.
.python

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"train.csv\")\n",
    "df_test = pd.read_csv(\"test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>sex</th>\n",
       "      <th>length</th>\n",
       "      <th>diameter</th>\n",
       "      <th>height</th>\n",
       "      <th>whole_weight</th>\n",
       "      <th>shucked_weight</th>\n",
       "      <th>viscera_weight</th>\n",
       "      <th>shell_weight</th>\n",
       "      <th>rings</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1721</td>\n",
       "      <td>M</td>\n",
       "      <td>0.655</td>\n",
       "      <td>0.550</td>\n",
       "      <td>0.180</td>\n",
       "      <td>1.2740</td>\n",
       "      <td>0.5860</td>\n",
       "      <td>0.2810</td>\n",
       "      <td>0.3650</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3757</td>\n",
       "      <td>I</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.410</td>\n",
       "      <td>0.140</td>\n",
       "      <td>0.6990</td>\n",
       "      <td>0.3395</td>\n",
       "      <td>0.1290</td>\n",
       "      <td>0.1945</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3723</td>\n",
       "      <td>I</td>\n",
       "      <td>0.470</td>\n",
       "      <td>0.355</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.4915</td>\n",
       "      <td>0.1765</td>\n",
       "      <td>0.1125</td>\n",
       "      <td>0.1325</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2005</td>\n",
       "      <td>I</td>\n",
       "      <td>0.395</td>\n",
       "      <td>0.290</td>\n",
       "      <td>0.095</td>\n",
       "      <td>0.3000</td>\n",
       "      <td>0.1580</td>\n",
       "      <td>0.0680</td>\n",
       "      <td>0.0780</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1279</td>\n",
       "      <td>I</td>\n",
       "      <td>0.495</td>\n",
       "      <td>0.380</td>\n",
       "      <td>0.130</td>\n",
       "      <td>0.5125</td>\n",
       "      <td>0.2185</td>\n",
       "      <td>0.1160</td>\n",
       "      <td>0.1600</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1230</td>\n",
       "      <td>I</td>\n",
       "      <td>0.365</td>\n",
       "      <td>0.270</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.1960</td>\n",
       "      <td>0.0825</td>\n",
       "      <td>0.0375</td>\n",
       "      <td>0.0600</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4037</td>\n",
       "      <td>I</td>\n",
       "      <td>0.540</td>\n",
       "      <td>0.415</td>\n",
       "      <td>0.155</td>\n",
       "      <td>0.7020</td>\n",
       "      <td>0.3220</td>\n",
       "      <td>0.1670</td>\n",
       "      <td>0.1900</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2296</td>\n",
       "      <td>F</td>\n",
       "      <td>0.535</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.135</td>\n",
       "      <td>0.8075</td>\n",
       "      <td>0.3220</td>\n",
       "      <td>0.1810</td>\n",
       "      <td>0.2500</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1811</td>\n",
       "      <td>M</td>\n",
       "      <td>0.650</td>\n",
       "      <td>0.525</td>\n",
       "      <td>0.190</td>\n",
       "      <td>1.6125</td>\n",
       "      <td>0.7770</td>\n",
       "      <td>0.3685</td>\n",
       "      <td>0.3965</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>3580</td>\n",
       "      <td>F</td>\n",
       "      <td>0.620</td>\n",
       "      <td>0.480</td>\n",
       "      <td>0.165</td>\n",
       "      <td>1.0430</td>\n",
       "      <td>0.4835</td>\n",
       "      <td>0.2210</td>\n",
       "      <td>0.3100</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>4098</td>\n",
       "      <td>F</td>\n",
       "      <td>0.650</td>\n",
       "      <td>0.495</td>\n",
       "      <td>0.160</td>\n",
       "      <td>1.3105</td>\n",
       "      <td>0.5770</td>\n",
       "      <td>0.3315</td>\n",
       "      <td>0.3550</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2466</td>\n",
       "      <td>M</td>\n",
       "      <td>0.425</td>\n",
       "      <td>0.325</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.3755</td>\n",
       "      <td>0.1420</td>\n",
       "      <td>0.1065</td>\n",
       "      <td>0.1050</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2667</td>\n",
       "      <td>F</td>\n",
       "      <td>0.585</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.150</td>\n",
       "      <td>0.9380</td>\n",
       "      <td>0.4670</td>\n",
       "      <td>0.2030</td>\n",
       "      <td>0.2250</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>3437</td>\n",
       "      <td>I</td>\n",
       "      <td>0.395</td>\n",
       "      <td>0.300</td>\n",
       "      <td>0.090</td>\n",
       "      <td>0.2790</td>\n",
       "      <td>0.1340</td>\n",
       "      <td>0.0490</td>\n",
       "      <td>0.0750</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1354</td>\n",
       "      <td>I</td>\n",
       "      <td>0.600</td>\n",
       "      <td>0.475</td>\n",
       "      <td>0.150</td>\n",
       "      <td>1.1200</td>\n",
       "      <td>0.5650</td>\n",
       "      <td>0.2465</td>\n",
       "      <td>0.2700</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2122</td>\n",
       "      <td>F</td>\n",
       "      <td>0.435</td>\n",
       "      <td>0.350</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.4585</td>\n",
       "      <td>0.1920</td>\n",
       "      <td>0.1000</td>\n",
       "      <td>0.1300</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>249</td>\n",
       "      <td>I</td>\n",
       "      <td>0.345</td>\n",
       "      <td>0.270</td>\n",
       "      <td>0.110</td>\n",
       "      <td>0.2135</td>\n",
       "      <td>0.0820</td>\n",
       "      <td>0.0545</td>\n",
       "      <td>0.0700</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1088</td>\n",
       "      <td>I</td>\n",
       "      <td>0.450</td>\n",
       "      <td>0.340</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.4925</td>\n",
       "      <td>0.2410</td>\n",
       "      <td>0.1075</td>\n",
       "      <td>0.1200</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2937</td>\n",
       "      <td>M</td>\n",
       "      <td>0.625</td>\n",
       "      <td>0.515</td>\n",
       "      <td>0.165</td>\n",
       "      <td>1.2170</td>\n",
       "      <td>0.6670</td>\n",
       "      <td>0.2065</td>\n",
       "      <td>0.3115</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>3516</td>\n",
       "      <td>F</td>\n",
       "      <td>0.700</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.200</td>\n",
       "      <td>1.7365</td>\n",
       "      <td>0.7755</td>\n",
       "      <td>0.3965</td>\n",
       "      <td>0.4610</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>3890</td>\n",
       "      <td>M</td>\n",
       "      <td>0.515</td>\n",
       "      <td>0.400</td>\n",
       "      <td>0.140</td>\n",
       "      <td>0.7365</td>\n",
       "      <td>0.2955</td>\n",
       "      <td>0.1840</td>\n",
       "      <td>0.1850</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>129</td>\n",
       "      <td>M</td>\n",
       "      <td>0.710</td>\n",
       "      <td>0.540</td>\n",
       "      <td>0.165</td>\n",
       "      <td>1.9590</td>\n",
       "      <td>0.7665</td>\n",
       "      <td>0.2610</td>\n",
       "      <td>0.7800</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2729</td>\n",
       "      <td>I</td>\n",
       "      <td>0.405</td>\n",
       "      <td>0.305</td>\n",
       "      <td>0.100</td>\n",
       "      <td>0.2680</td>\n",
       "      <td>0.1145</td>\n",
       "      <td>0.0530</td>\n",
       "      <td>0.0850</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>3690</td>\n",
       "      <td>M</td>\n",
       "      <td>0.640</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.175</td>\n",
       "      <td>1.2730</td>\n",
       "      <td>0.5065</td>\n",
       "      <td>0.2925</td>\n",
       "      <td>0.4050</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1638</td>\n",
       "      <td>I</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.170</td>\n",
       "      <td>0.8015</td>\n",
       "      <td>0.3475</td>\n",
       "      <td>0.1465</td>\n",
       "      <td>0.2500</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2337</td>\n",
       "      <td>M</td>\n",
       "      <td>0.560</td>\n",
       "      <td>0.455</td>\n",
       "      <td>0.165</td>\n",
       "      <td>0.8600</td>\n",
       "      <td>0.4015</td>\n",
       "      <td>0.1695</td>\n",
       "      <td>0.2450</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>3139</td>\n",
       "      <td>I</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.090</td>\n",
       "      <td>0.1835</td>\n",
       "      <td>0.0780</td>\n",
       "      <td>0.0240</td>\n",
       "      <td>0.0650</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1303</td>\n",
       "      <td>F</td>\n",
       "      <td>0.535</td>\n",
       "      <td>0.410</td>\n",
       "      <td>0.130</td>\n",
       "      <td>0.7145</td>\n",
       "      <td>0.3350</td>\n",
       "      <td>0.1440</td>\n",
       "      <td>0.2075</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>587</td>\n",
       "      <td>F</td>\n",
       "      <td>0.550</td>\n",
       "      <td>0.410</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.8285</td>\n",
       "      <td>0.3095</td>\n",
       "      <td>0.1905</td>\n",
       "      <td>0.2500</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>3772</td>\n",
       "      <td>M</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.465</td>\n",
       "      <td>0.120</td>\n",
       "      <td>1.0535</td>\n",
       "      <td>0.5160</td>\n",
       "      <td>0.2185</td>\n",
       "      <td>0.2350</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3311</th>\n",
       "      <td>2229</td>\n",
       "      <td>M</td>\n",
       "      <td>0.370</td>\n",
       "      <td>0.280</td>\n",
       "      <td>0.095</td>\n",
       "      <td>0.2225</td>\n",
       "      <td>0.0805</td>\n",
       "      <td>0.0510</td>\n",
       "      <td>0.0750</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3312</th>\n",
       "      <td>2243</td>\n",
       "      <td>M</td>\n",
       "      <td>0.465</td>\n",
       "      <td>0.360</td>\n",
       "      <td>0.130</td>\n",
       "      <td>0.5265</td>\n",
       "      <td>0.2105</td>\n",
       "      <td>0.1185</td>\n",
       "      <td>0.1650</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3313</th>\n",
       "      <td>4100</td>\n",
       "      <td>F</td>\n",
       "      <td>0.675</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.175</td>\n",
       "      <td>1.4940</td>\n",
       "      <td>0.7365</td>\n",
       "      <td>0.3055</td>\n",
       "      <td>0.3700</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3314</th>\n",
       "      <td>431</td>\n",
       "      <td>M</td>\n",
       "      <td>0.600</td>\n",
       "      <td>0.470</td>\n",
       "      <td>0.155</td>\n",
       "      <td>1.0360</td>\n",
       "      <td>0.4375</td>\n",
       "      <td>0.1960</td>\n",
       "      <td>0.3250</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3315</th>\n",
       "      <td>1143</td>\n",
       "      <td>M</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.445</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.8470</td>\n",
       "      <td>0.4150</td>\n",
       "      <td>0.1945</td>\n",
       "      <td>0.2200</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3316</th>\n",
       "      <td>3504</td>\n",
       "      <td>F</td>\n",
       "      <td>0.620</td>\n",
       "      <td>0.510</td>\n",
       "      <td>0.180</td>\n",
       "      <td>1.2330</td>\n",
       "      <td>0.5920</td>\n",
       "      <td>0.2740</td>\n",
       "      <td>0.3220</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3317</th>\n",
       "      <td>2925</td>\n",
       "      <td>I</td>\n",
       "      <td>0.605</td>\n",
       "      <td>0.480</td>\n",
       "      <td>0.155</td>\n",
       "      <td>0.9995</td>\n",
       "      <td>0.4250</td>\n",
       "      <td>0.1985</td>\n",
       "      <td>0.3000</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3318</th>\n",
       "      <td>1036</td>\n",
       "      <td>F</td>\n",
       "      <td>0.660</td>\n",
       "      <td>0.505</td>\n",
       "      <td>0.185</td>\n",
       "      <td>1.5280</td>\n",
       "      <td>0.6900</td>\n",
       "      <td>0.3025</td>\n",
       "      <td>0.4410</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3319</th>\n",
       "      <td>3962</td>\n",
       "      <td>F</td>\n",
       "      <td>0.720</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.195</td>\n",
       "      <td>2.1505</td>\n",
       "      <td>1.0745</td>\n",
       "      <td>0.3820</td>\n",
       "      <td>0.5850</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3320</th>\n",
       "      <td>1047</td>\n",
       "      <td>F</td>\n",
       "      <td>0.705</td>\n",
       "      <td>0.535</td>\n",
       "      <td>0.180</td>\n",
       "      <td>1.6850</td>\n",
       "      <td>0.6930</td>\n",
       "      <td>0.4200</td>\n",
       "      <td>0.4045</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3321</th>\n",
       "      <td>1273</td>\n",
       "      <td>I</td>\n",
       "      <td>0.475</td>\n",
       "      <td>0.380</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.4410</td>\n",
       "      <td>0.1785</td>\n",
       "      <td>0.0885</td>\n",
       "      <td>0.1505</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3322</th>\n",
       "      <td>3682</td>\n",
       "      <td>M</td>\n",
       "      <td>0.620</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.180</td>\n",
       "      <td>1.3915</td>\n",
       "      <td>0.7260</td>\n",
       "      <td>0.2795</td>\n",
       "      <td>0.3320</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3323</th>\n",
       "      <td>383</td>\n",
       "      <td>M</td>\n",
       "      <td>0.470</td>\n",
       "      <td>0.375</td>\n",
       "      <td>0.120</td>\n",
       "      <td>0.5565</td>\n",
       "      <td>0.2260</td>\n",
       "      <td>0.1220</td>\n",
       "      <td>0.1950</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3324</th>\n",
       "      <td>3205</td>\n",
       "      <td>M</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.265</td>\n",
       "      <td>0.095</td>\n",
       "      <td>0.1975</td>\n",
       "      <td>0.0795</td>\n",
       "      <td>0.0375</td>\n",
       "      <td>0.0700</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3325</th>\n",
       "      <td>1917</td>\n",
       "      <td>M</td>\n",
       "      <td>0.600</td>\n",
       "      <td>0.475</td>\n",
       "      <td>0.150</td>\n",
       "      <td>0.9900</td>\n",
       "      <td>0.3860</td>\n",
       "      <td>0.2195</td>\n",
       "      <td>0.3105</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3326</th>\n",
       "      <td>650</td>\n",
       "      <td>M</td>\n",
       "      <td>0.255</td>\n",
       "      <td>0.180</td>\n",
       "      <td>0.065</td>\n",
       "      <td>0.0790</td>\n",
       "      <td>0.0340</td>\n",
       "      <td>0.0140</td>\n",
       "      <td>0.0250</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3327</th>\n",
       "      <td>4074</td>\n",
       "      <td>I</td>\n",
       "      <td>0.520</td>\n",
       "      <td>0.400</td>\n",
       "      <td>0.140</td>\n",
       "      <td>0.6220</td>\n",
       "      <td>0.2780</td>\n",
       "      <td>0.1455</td>\n",
       "      <td>0.1690</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3328</th>\n",
       "      <td>4084</td>\n",
       "      <td>F</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.480</td>\n",
       "      <td>0.170</td>\n",
       "      <td>1.1000</td>\n",
       "      <td>0.5060</td>\n",
       "      <td>0.2485</td>\n",
       "      <td>0.3100</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3329</th>\n",
       "      <td>340</td>\n",
       "      <td>M</td>\n",
       "      <td>0.575</td>\n",
       "      <td>0.455</td>\n",
       "      <td>0.145</td>\n",
       "      <td>1.1650</td>\n",
       "      <td>0.5810</td>\n",
       "      <td>0.2275</td>\n",
       "      <td>0.3000</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3330</th>\n",
       "      <td>3526</td>\n",
       "      <td>I</td>\n",
       "      <td>0.335</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.085</td>\n",
       "      <td>0.1920</td>\n",
       "      <td>0.0970</td>\n",
       "      <td>0.0300</td>\n",
       "      <td>0.0540</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3331</th>\n",
       "      <td>748</td>\n",
       "      <td>M</td>\n",
       "      <td>0.535</td>\n",
       "      <td>0.420</td>\n",
       "      <td>0.130</td>\n",
       "      <td>0.8055</td>\n",
       "      <td>0.3010</td>\n",
       "      <td>0.1810</td>\n",
       "      <td>0.2800</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3332</th>\n",
       "      <td>3952</td>\n",
       "      <td>I</td>\n",
       "      <td>0.315</td>\n",
       "      <td>0.235</td>\n",
       "      <td>0.080</td>\n",
       "      <td>0.1800</td>\n",
       "      <td>0.0800</td>\n",
       "      <td>0.0450</td>\n",
       "      <td>0.0470</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3333</th>\n",
       "      <td>1382</td>\n",
       "      <td>F</td>\n",
       "      <td>0.625</td>\n",
       "      <td>0.515</td>\n",
       "      <td>0.160</td>\n",
       "      <td>1.2640</td>\n",
       "      <td>0.5715</td>\n",
       "      <td>0.3260</td>\n",
       "      <td>0.3210</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3334</th>\n",
       "      <td>579</td>\n",
       "      <td>F</td>\n",
       "      <td>0.630</td>\n",
       "      <td>0.480</td>\n",
       "      <td>0.175</td>\n",
       "      <td>1.3675</td>\n",
       "      <td>0.5015</td>\n",
       "      <td>0.3035</td>\n",
       "      <td>0.5150</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3335</th>\n",
       "      <td>3562</td>\n",
       "      <td>F</td>\n",
       "      <td>0.570</td>\n",
       "      <td>0.420</td>\n",
       "      <td>0.160</td>\n",
       "      <td>0.8875</td>\n",
       "      <td>0.4315</td>\n",
       "      <td>0.1915</td>\n",
       "      <td>0.2230</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3336</th>\n",
       "      <td>1311</td>\n",
       "      <td>I</td>\n",
       "      <td>0.550</td>\n",
       "      <td>0.430</td>\n",
       "      <td>0.145</td>\n",
       "      <td>0.7895</td>\n",
       "      <td>0.3745</td>\n",
       "      <td>0.1710</td>\n",
       "      <td>0.2230</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3337</th>\n",
       "      <td>99</td>\n",
       "      <td>F</td>\n",
       "      <td>0.475</td>\n",
       "      <td>0.375</td>\n",
       "      <td>0.125</td>\n",
       "      <td>0.5785</td>\n",
       "      <td>0.2775</td>\n",
       "      <td>0.0850</td>\n",
       "      <td>0.1550</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3338</th>\n",
       "      <td>2535</td>\n",
       "      <td>F</td>\n",
       "      <td>0.640</td>\n",
       "      <td>0.500</td>\n",
       "      <td>0.180</td>\n",
       "      <td>1.4995</td>\n",
       "      <td>0.5930</td>\n",
       "      <td>0.3140</td>\n",
       "      <td>0.4310</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3339</th>\n",
       "      <td>3253</td>\n",
       "      <td>I</td>\n",
       "      <td>0.430</td>\n",
       "      <td>0.350</td>\n",
       "      <td>0.105</td>\n",
       "      <td>0.3660</td>\n",
       "      <td>0.1705</td>\n",
       "      <td>0.0855</td>\n",
       "      <td>0.1100</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3340</th>\n",
       "      <td>1789</td>\n",
       "      <td>F</td>\n",
       "      <td>0.545</td>\n",
       "      <td>0.385</td>\n",
       "      <td>0.150</td>\n",
       "      <td>1.1185</td>\n",
       "      <td>0.5425</td>\n",
       "      <td>0.2445</td>\n",
       "      <td>0.2845</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3341 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        ID sex  length  diameter  height  whole_weight  shucked_weight  \\\n",
       "0     1721   M   0.655     0.550   0.180        1.2740          0.5860   \n",
       "1     3757   I   0.520     0.410   0.140        0.6990          0.3395   \n",
       "2     3723   I   0.470     0.355   0.120        0.4915          0.1765   \n",
       "3     2005   I   0.395     0.290   0.095        0.3000          0.1580   \n",
       "4     1279   I   0.495     0.380   0.130        0.5125          0.2185   \n",
       "5     1230   I   0.365     0.270   0.085        0.1960          0.0825   \n",
       "6     4037   I   0.540     0.415   0.155        0.7020          0.3220   \n",
       "7     2296   F   0.535     0.450   0.135        0.8075          0.3220   \n",
       "8     1811   M   0.650     0.525   0.190        1.6125          0.7770   \n",
       "9     3580   F   0.620     0.480   0.165        1.0430          0.4835   \n",
       "10    4098   F   0.650     0.495   0.160        1.3105          0.5770   \n",
       "11    2466   M   0.425     0.325   0.120        0.3755          0.1420   \n",
       "12    2667   F   0.585     0.450   0.150        0.9380          0.4670   \n",
       "13    3437   I   0.395     0.300   0.090        0.2790          0.1340   \n",
       "14    1354   I   0.600     0.475   0.150        1.1200          0.5650   \n",
       "15    2122   F   0.435     0.350   0.120        0.4585          0.1920   \n",
       "16     249   I   0.345     0.270   0.110        0.2135          0.0820   \n",
       "17    1088   I   0.450     0.340   0.120        0.4925          0.2410   \n",
       "18    2937   M   0.625     0.515   0.165        1.2170          0.6670   \n",
       "19    3516   F   0.700     0.575   0.200        1.7365          0.7755   \n",
       "20    3890   M   0.515     0.400   0.140        0.7365          0.2955   \n",
       "21     129   M   0.710     0.540   0.165        1.9590          0.7665   \n",
       "22    2729   I   0.405     0.305   0.100        0.2680          0.1145   \n",
       "23    3690   M   0.640     0.500   0.175        1.2730          0.5065   \n",
       "24    1638   I   0.575     0.445   0.170        0.8015          0.3475   \n",
       "25    2337   M   0.560     0.455   0.165        0.8600          0.4015   \n",
       "26    3139   I   0.335     0.260   0.090        0.1835          0.0780   \n",
       "27    1303   F   0.535     0.410   0.130        0.7145          0.3350   \n",
       "28     587   F   0.550     0.410   0.145        0.8285          0.3095   \n",
       "29    3772   M   0.575     0.465   0.120        1.0535          0.5160   \n",
       "...    ...  ..     ...       ...     ...           ...             ...   \n",
       "3311  2229   M   0.370     0.280   0.095        0.2225          0.0805   \n",
       "3312  2243   M   0.465     0.360   0.130        0.5265          0.2105   \n",
       "3313  4100   F   0.675     0.520   0.175        1.4940          0.7365   \n",
       "3314   431   M   0.600     0.470   0.155        1.0360          0.4375   \n",
       "3315  1143   M   0.575     0.445   0.145        0.8470          0.4150   \n",
       "3316  3504   F   0.620     0.510   0.180        1.2330          0.5920   \n",
       "3317  2925   I   0.605     0.480   0.155        0.9995          0.4250   \n",
       "3318  1036   F   0.660     0.505   0.185        1.5280          0.6900   \n",
       "3319  3962   F   0.720     0.575   0.195        2.1505          1.0745   \n",
       "3320  1047   F   0.705     0.535   0.180        1.6850          0.6930   \n",
       "3321  1273   I   0.475     0.380   0.120        0.4410          0.1785   \n",
       "3322  3682   M   0.620     0.500   0.180        1.3915          0.7260   \n",
       "3323   383   M   0.470     0.375   0.120        0.5565          0.2260   \n",
       "3324  3205   M   0.335     0.265   0.095        0.1975          0.0795   \n",
       "3325  1917   M   0.600     0.475   0.150        0.9900          0.3860   \n",
       "3326   650   M   0.255     0.180   0.065        0.0790          0.0340   \n",
       "3327  4074   I   0.520     0.400   0.140        0.6220          0.2780   \n",
       "3328  4084   F   0.575     0.480   0.170        1.1000          0.5060   \n",
       "3329   340   M   0.575     0.455   0.145        1.1650          0.5810   \n",
       "3330  3526   I   0.335     0.260   0.085        0.1920          0.0970   \n",
       "3331   748   M   0.535     0.420   0.130        0.8055          0.3010   \n",
       "3332  3952   I   0.315     0.235   0.080        0.1800          0.0800   \n",
       "3333  1382   F   0.625     0.515   0.160        1.2640          0.5715   \n",
       "3334   579   F   0.630     0.480   0.175        1.3675          0.5015   \n",
       "3335  3562   F   0.570     0.420   0.160        0.8875          0.4315   \n",
       "3336  1311   I   0.550     0.430   0.145        0.7895          0.3745   \n",
       "3337    99   F   0.475     0.375   0.125        0.5785          0.2775   \n",
       "3338  2535   F   0.640     0.500   0.180        1.4995          0.5930   \n",
       "3339  3253   I   0.430     0.350   0.105        0.3660          0.1705   \n",
       "3340  1789   F   0.545     0.385   0.150        1.1185          0.5425   \n",
       "\n",
       "      viscera_weight  shell_weight  rings  \n",
       "0             0.2810        0.3650     10  \n",
       "1             0.1290        0.1945     10  \n",
       "2             0.1125        0.1325      9  \n",
       "3             0.0680        0.0780      7  \n",
       "4             0.1160        0.1600      7  \n",
       "5             0.0375        0.0600      7  \n",
       "6             0.1670        0.1900     10  \n",
       "7             0.1810        0.2500     13  \n",
       "8             0.3685        0.3965     11  \n",
       "9             0.2210        0.3100     10  \n",
       "10            0.3315        0.3550      9  \n",
       "11            0.1065        0.1050      9  \n",
       "12            0.2030        0.2250      7  \n",
       "13            0.0490        0.0750      8  \n",
       "14            0.2465        0.2700     10  \n",
       "15            0.1000        0.1300     11  \n",
       "16            0.0545        0.0700      7  \n",
       "17            0.1075        0.1200      6  \n",
       "18            0.2065        0.3115     10  \n",
       "19            0.3965        0.4610     11  \n",
       "20            0.1840        0.1850     16  \n",
       "21            0.2610        0.7800     18  \n",
       "22            0.0530        0.0850      7  \n",
       "23            0.2925        0.4050     13  \n",
       "24            0.1465        0.2500      9  \n",
       "25            0.1695        0.2450     11  \n",
       "26            0.0240        0.0650     11  \n",
       "27            0.1440        0.2075      9  \n",
       "28            0.1905        0.2500     13  \n",
       "29            0.2185        0.2350      9  \n",
       "...              ...           ...    ...  \n",
       "3311          0.0510        0.0750      7  \n",
       "3312          0.1185        0.1650     10  \n",
       "3313          0.3055        0.3700      9  \n",
       "3314          0.1960        0.3250     20  \n",
       "3315          0.1945        0.2200      9  \n",
       "3316          0.2740        0.3220     10  \n",
       "3317          0.1985        0.3000     10  \n",
       "3318          0.3025        0.4410     11  \n",
       "3319          0.3820        0.5850     10  \n",
       "3320          0.4200        0.4045     12  \n",
       "3321          0.0885        0.1505      8  \n",
       "3322          0.2795        0.3320     11  \n",
       "3323          0.1220        0.1950     12  \n",
       "3324          0.0375        0.0700      9  \n",
       "3325          0.2195        0.3105     10  \n",
       "3326          0.0140        0.0250      5  \n",
       "3327          0.1455        0.1690      8  \n",
       "3328          0.2485        0.3100     10  \n",
       "3329          0.2275        0.3000     14  \n",
       "3330          0.0300        0.0540      6  \n",
       "3331          0.1810        0.2800     14  \n",
       "3332          0.0450        0.0470      5  \n",
       "3333          0.3260        0.3210      9  \n",
       "3334          0.3035        0.5150     17  \n",
       "3335          0.1915        0.2230      8  \n",
       "3336          0.1710        0.2230     11  \n",
       "3337          0.0850        0.1550     10  \n",
       "3338          0.3140        0.4310     11  \n",
       "3339          0.0855        0.1100      6  \n",
       "3340          0.2445        0.2845      9  \n",
       "\n",
       "[3341 rows x 10 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ID                0\n",
       "sex               0\n",
       "length            0\n",
       "diameter          0\n",
       "height            0\n",
       "whole_weight      0\n",
       "shucked_weight    0\n",
       "viscera_weight    0\n",
       "shell_weight      0\n",
       "rings             0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['F', 'I', 'M'], dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(df[\"sex\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LabelEncoder()"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "lb=LabelEncoder()\n",
    "lb.fit(df[\"sex\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"sex\"] = pd.DataFrame(lb.transform(df[\"sex\"]),columns = [\"sex\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.copy()\n",
    "del X[\"rings\"]\n",
    "y = df[\"rings\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3341, 9)"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2672, 8) (2672,)\n",
      "(669, 8) (669,)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "4.720478325859491"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from sklearn import datasets, linear_model\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# create training and testing vars\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
    "print(X_train.shape, y_train.shape)\n",
    "print(X_test.shape, y_test.shape)\n",
    "\n",
    "# Create linear regression object\n",
    "regr = linear_model.LinearRegression()\n",
    "\n",
    "# Train the model using the training sets\n",
    "regr.fit(X_train, y_train)\n",
    "y_pred = regr.predict(X_test)\n",
    "y_pred = [round(x) for x in y_pred]\n",
    "\n",
    "mean_squared_error(y_test, y_pred)\n",
    "# # Make predictions using the testing set\n",
    "# y_pred = regr.predict(X_test)\n",
    "\n",
    "# # The coefficients\n",
    "# print('Coefficients: \\n', regr.coef_)\n",
    "# # The mean squared error\n",
    "# print(\"Mean squared error: %.2f\"\n",
    "#       % mean_squared_error(y_test, y_pred))\n",
    "# # Explained variance score: 1 is perfect prediction\n",
    "# print('Variance score: %.2f' % r2_score(y_test, y_pred))\n",
    "\n",
    "# # # Plot outputs\n",
    "# # plt.scatter(X_test, y_test,  color='black')\n",
    "# # plt.plot(X_test, y_pred, color='blue', linewidth=3)\n",
    "\n",
    "# # plt.xticks(())\n",
    "# # plt.yticks(())\n",
    "\n",
    "# # plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.800862261642637"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lm = linear_model.LinearRegression()\n",
    "model = lm.fit(X_train,y_train)\n",
    "model.score(X_test,y_test)\n",
    "\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "mean_squared_error(y_test, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_kk = df_test.copy()\n",
    "del X_kk[\"ID\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_kk[\"sex\"] = pd.DataFrame(lb.transform(X_kk[\"sex\"]),columns=[\"sex\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2672, 8)"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(835, 8)"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_kk.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "prediction = model.predict(X_kk)\n",
    "prediction = [round(x) for x in prediction]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit = pd.concat([df_test[\"ID\"],pd.DataFrame(prediction)],axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit.columns = [\"ID\",\"rings\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "submit.to_csv(\"submit.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.938434539226715"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import statsmodels.api as sm\n",
    "# Note the difference in argument order\n",
    "model = sm.OLS(y_train, X_train).fit()\n",
    "# predictions = model.predict(X_train) # make the predictions by the model\n",
    "\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "mean_squared_error(y_test, y_pred)\n",
    "# Print out the statistics\n",
    "# model.summary()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.589766517294562"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import linear_model\n",
    "clf = linear_model.Lasso(alpha=0.1)\n",
    "clf.fit(X_train,y_train)\n",
    "y_pred = clf.predict(X_test)\n",
    "mean_squared_error(y_test, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.29126584701343283"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}