diff --git a/notebooks/Block_0/Exercise Sheet - Basics Numpy.ipynb b/notebooks/Block_0/Exercise Sheet - Basics Numpy.ipynb index 1448f33090fe73ef340591659ef141bcfa3809f3..8dff11b44153eca7940c63d131916d238016b355 100644 --- a/notebooks/Block_0/Exercise Sheet - Basics Numpy.ipynb +++ b/notebooks/Block_0/Exercise Sheet - Basics Numpy.ipynb @@ -717,21 +717,542 @@ "metadata": {}, "source": [ "# Problem 4: Fuel Consumption\n", - "In this exercise, you will analyse a dataset containing information on Fuel consumption of cars in the eighties. The data contains 3 columns, the weight of the car in Pounds(1 Pound = 0.454 kg), the range in Miles per Gallon(1 mile=1.61 km; 1 gallon=3.79 l), and the type of car. Download the dataset and save it somewhere practically, possibly in the same folder as this notebook." + "In this exercise, you will analyse a dataset containing information on Fuel consumption of cars in the eighties. The data contains 3 columns, the weight of the car in Pounds(1 Pound = 0.454 kg), the range in Miles per Gallon(1 mile=1.61 km; 1 gallon=3.79 l), and the type of car. Download the dataset and save it somewhere practical, possibly in the same folder as this notebook." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>6</td>\n", + " <td>2285</td>\n", + " <td>26</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>7</td>\n", + " <td>2275</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>8</td>\n", + " <td>2350</td>\n", + " <td>28</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>9</td>\n", + " <td>2295</td>\n", + " <td>25</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>10</td>\n", + " <td>1900</td>\n", + " <td>34</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>11</td>\n", + " <td>2390</td>\n", + " <td>29</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>12</td>\n", + " <td>2075</td>\n", + " <td>35</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>13</td>\n", + " <td>2330</td>\n", + " <td>26</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>14</td>\n", + " <td>3320</td>\n", + " <td>20</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>15</td>\n", + " <td>2885</td>\n", + " <td>27</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>16</td>\n", + " <td>3310</td>\n", + " <td>19</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>17</td>\n", + " <td>2695</td>\n", + " <td>30</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>18</td>\n", + " <td>2170</td>\n", + " <td>33</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>19</td>\n", + " <td>2710</td>\n", + " <td>27</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>20</td>\n", + " <td>2775</td>\n", + " <td>24</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>21</td>\n", + " <td>2840</td>\n", + " <td>26</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>22</td>\n", + " <td>2485</td>\n", + " <td>28</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>23</td>\n", + " <td>2670</td>\n", + " <td>27</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>24</td>\n", + " <td>2640</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>25</td>\n", + " <td>2655</td>\n", + " <td>26</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>26</td>\n", + " <td>3065</td>\n", + " <td>25</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>27</td>\n", + " <td>2750</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>28</td>\n", + " <td>2920</td>\n", + " <td>26</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>29</td>\n", + " <td>2780</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>30</td>\n", + " <td>2745</td>\n", + " <td>25</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>31</td>\n", + " <td>3110</td>\n", + " <td>21</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>32</td>\n", + " <td>2920</td>\n", + " <td>21</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>33</td>\n", + " <td>2645</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>34</td>\n", + " <td>2575</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>35</td>\n", + " <td>2935</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>36</td>\n", + " <td>2920</td>\n", + " <td>27</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>37</td>\n", + " <td>2985</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>38</td>\n", + " <td>3265</td>\n", + " <td>20</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>39</td>\n", + " <td>2880</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>40</td>\n", + " <td>2975</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>41</td>\n", + " <td>3450</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>42</td>\n", + " <td>3145</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>43</td>\n", + " <td>3190</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>44</td>\n", + " <td>3610</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>45</td>\n", + " <td>2885</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>46</td>\n", + " <td>3480</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>47</td>\n", + " <td>3200</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>48</td>\n", + " <td>2765</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>49</td>\n", + " <td>3220</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>50</td>\n", + " <td>3480</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50</th>\n", + " <td>51</td>\n", + " <td>3325</td>\n", + " <td>23</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>51</th>\n", + " <td>52</td>\n", + " <td>3855</td>\n", + " <td>18</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>53</td>\n", + " <td>3850</td>\n", + " <td>20</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>54</td>\n", + " <td>3195</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>55</td>\n", + " <td>3735</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>55</th>\n", + " <td>56</td>\n", + " <td>3665</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>56</th>\n", + " <td>57</td>\n", + " <td>3735</td>\n", + " <td>19</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57</th>\n", + " <td>58</td>\n", + " <td>3415</td>\n", + " <td>20</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>58</th>\n", + " <td>59</td>\n", + " <td>3185</td>\n", + " <td>20</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>60</td>\n", + " <td>3690</td>\n", + " <td>19</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "0 1 2560 33 Small\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small\n", + "5 6 2285 26 Small\n", + "6 7 2275 33 Small\n", + "7 8 2350 28 Small\n", + "8 9 2295 25 Small\n", + "9 10 1900 34 Small\n", + "10 11 2390 29 Small\n", + "11 12 2075 35 Small\n", + "12 13 2330 26 Small\n", + "13 14 3320 20 Sporty\n", + "14 15 2885 27 Sporty\n", + "15 16 3310 19 Sporty\n", + "16 17 2695 30 Sporty\n", + "17 18 2170 33 Sporty\n", + "18 19 2710 27 Sporty\n", + "19 20 2775 24 Sporty\n", + "20 21 2840 26 Sporty\n", + "21 22 2485 28 Sporty\n", + "22 23 2670 27 Compact\n", + "23 24 2640 23 Compact\n", + "24 25 2655 26 Compact\n", + "25 26 3065 25 Compact\n", + "26 27 2750 24 Compact\n", + "27 28 2920 26 Compact\n", + "28 29 2780 24 Compact\n", + "29 30 2745 25 Compact\n", + "30 31 3110 21 Compact\n", + "31 32 2920 21 Compact\n", + "32 33 2645 23 Compact\n", + "33 34 2575 24 Compact\n", + "34 35 2935 23 Compact\n", + "35 36 2920 27 Compact\n", + "36 37 2985 23 Compact\n", + "37 38 3265 20 Medium\n", + "38 39 2880 21 Medium\n", + "39 40 2975 22 Medium\n", + "40 41 3450 22 Medium\n", + "41 42 3145 22 Medium\n", + "42 43 3190 22 Medium\n", + "43 44 3610 23 Medium\n", + "44 45 2885 23 Medium\n", + "45 46 3480 21 Medium\n", + "46 47 3200 22 Medium\n", + "47 48 2765 21 Medium\n", + "48 49 3220 21 Medium\n", + "49 50 3480 23 Medium\n", + "50 51 3325 23 Large\n", + "51 52 3855 18 Large\n", + "52 53 3850 20 Large\n", + "53 54 3195 18 Van\n", + "54 55 3735 18 Van\n", + "55 56 3665 18 Van\n", + "56 57 3735 19 Van\n", + "57 58 3415 20 Van\n", + "58 59 3185 20 Van\n", + "59 60 3690 19 Van" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# First import Pandas\n", "import pandas as pd\n", "\n", "# load the database using pandas.read_csv. The file location is different for everyone. If you made a new folder in this folder named \"data\", the path would be ./data/d.fuel.dat\n", "# Example: data = pd.read_csv(\"/data/d.fuel.dat\")\n", - "data = None\n", + "data = pd.read_csv(\"./data/d.fuel.dat\")\n", "\n", "data" ] @@ -747,20 +1268,184 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "0 1 2560 33 Small\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Print the first 5 rows using data.loc\n" + "# Print the first 5 rows using data.loc\n", + "data.loc[:4,]" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 46, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "0 1 2560 33 Small\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# print the first 5 rows using data.head()\n" + "# print the first 5 rows using data.head()\n", + "data.head()" ] }, { @@ -775,7 +1460,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 76, "metadata": {}, "outputs": [ { @@ -783,17 +1468,17 @@ "output_type": "stream", "text": [ "Average miles per galon is: \n", - "0 \n", + "24.58 \n", "Average miles per galon for all Medium type cars is: \n", - "0\n" + "21.77\n" ] } ], "source": [ - "avg_mpg = 0\n", - "avg_medium = 0\n", + "avg_mpg = data.loc[:, \"mpg\"].mean()\n", + "avg_medium = data[data[\"type\"].isin([\"Medium\"])].loc[:, \"mpg\"].mean()\n", "\n", - "print(\"Average miles per galon is: \\n{}\".format(round(avg_mpg, 2)), \"\\nAverage miles per galon for all Medium type cars is: \\n{}\".format(round(avg_medium,2)))" + "print(\"Average miles per galon is: \\n{}\".format(round(avg_mpg, 2)), \"\\nAverage miles per galon for all Medium type cars is: \\n{}\".format(round(avg_medium, 2)))" ] }, { @@ -807,30 +1492,36 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", + "0 14.018470\n", + "1 14.018470\n", + "2 15.717678\n", + "3 13.593668\n", + "4 13.593668\n", + "Name: mpg, dtype: float64\n", + "0 1162.24\n", + "1 1064.63\n", + "2 837.63\n", + "3 1026.04\n", + "4 1107.76\n", + "Name: weight, dtype: float64\n", "\n", "Average Kilometer per liter is: \n", - "Series([], dtype: float64) \n", + "10.44 \n", "Average weight in kilogram is: \n", - "Series([], dtype: float64)\n" + "1316.98\n" ] } ], "source": [ - "t_kml = pd.DataFrame()\n", - "t_kg = pd.DataFrame()\n", + "t_kml = data.loc[:, \"mpg\"]*1.61/3.79\n", + "t_kg = data.loc[:, \"weight\"]*0.454\n", "\n", "print(t_kml.head())\n", "print(t_kg.head())\n",