diff --git a/data/Block 0/Daten_Serie_1.zip b/data/Block 0/Daten_Serie_1.zip new file mode 100644 index 0000000000000000000000000000000000000000..39f181fd828efc6a053309c4085dd77789304f85 Binary files /dev/null and b/data/Block 0/Daten_Serie_1.zip differ diff --git a/notebooks/Block 0/Checking_Correct_Installation.ipynb b/notebooks/Block 0/Checking_Correct_Installation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..67bad2a39ef329a88674babb3f4076d720b2b4f6 --- /dev/null +++ b/notebooks/Block 0/Checking_Correct_Installation.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking your installation\n", + "\n", + "Please check that the notebook below runs smoothly." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'3.7.7 (default, May 6 2020, 11:45:54) [MSC v.1916 64 bit (AMD64)]'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.version #Should work and give 3.7." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.1.0'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tensorflow as tf\n", + "tf.__version__ #Should work and give 2.1.0" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.19.1'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np \n", + "np.__version__ #Should work and give something > 1.19" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.collections.PathCollection at 0x17a7a929508>" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "plt.scatter(range(100), np.sin(0.1 * np.array(range(100))))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/Block 0/Exercise Sheet - Basics Numpy.ipynb b/notebooks/Block 0/Exercise Sheet - Basics Numpy.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ce2bb78b87ba4652d57442c3bbf0448f18a0d259 --- /dev/null +++ b/notebooks/Block 0/Exercise Sheet - Basics Numpy.ipynb @@ -0,0 +1,473 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Problem 1 - Introduction Numpy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is just a short programming quiz that asks you use a few NumPy features. It is meant to give you a little practice if you don't have NumPy experience" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input as Array: None\n", + "Input minus min: None\n", + "Input Array: None\n", + "Multiply 1:\n", + "None\n", + "Multiply 2:\n", + "None\n", + "Multiply 3:\n", + "None\n", + "Mean == None\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "\n", + "def prepare_inputs(inputs):\n", + " # TODO: create a 2-dimensional ndarray from the given 1-dimensional list;\n", + " # assign it to input_array\n", + " input_array = None\n", + " \n", + " # TODO: find the minimum value in input_array and subtract that\n", + " # value from all the elements of input_array. Store the\n", + " # result in inputs_minus_min\n", + " inputs_minus_min = None\n", + "\n", + " # TODO: find the maximum value in inputs_minus_min and divide\n", + " # all of the values in inputs_minus_min by the maximum value.\n", + " # Store the results in inputs_div_max.\n", + " inputs_div_max = None\n", + "\n", + " # return the three arrays we've created\n", + " return input_array, inputs_minus_min, inputs_div_max\n", + " \n", + "\n", + "def multiply_inputs(m1, m2):\n", + " # TODO: Check the shapes of the matrices m1 and m2. \n", + " # m1 and m2 will be ndarray objects.\n", + " #\n", + " # Return False if the shapes cannot be used for matrix\n", + " # multiplication. You may not use a transpose\n", + " pass\n", + "\n", + "\n", + " # TODO: If you have not returned False, then calculate the matrix product\n", + " # of m1 and m2 and return it. Do not use a transpose,\n", + " # but you swap their order if necessary\n", + " pass\n", + " \n", + "\n", + "def find_mean(values):\n", + " # TODO: Return the average of the values in the given Python list\n", + " pass\n", + "\n", + "\n", + "input_array, inputs_minus_min, inputs_div_max = prepare_inputs([-1,2,7])\n", + "print(\"Input as Array: {}\".format(input_array))\n", + "print(\"Input minus min: {}\".format(inputs_minus_min))\n", + "print(\"Input Array: {}\".format(inputs_div_max))\n", + "\n", + "print(\"Multiply 1:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1],[2],[3],[4]]))))\n", + "print(\"Multiply 2:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1],[2],[3]]))))\n", + "print(\"Multiply 3:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1,2]]))))\n", + "\n", + "print(\"Mean == {}\".format(find_mean([1,3,4])))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Problem 2 - Body Mass Index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The goal is to write a function which can calculate the Body Mass Index (BMI) of a given dataset. BMI is given by:\n", + "\n", + "$$ BMI = \\frac{Weight}{Length^2} $$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The given weights and lengths result in BMI's: \n", + "None\n" + ] + } + ], + "source": [ + "# import numpy\n", + "import numpy as np\n", + "np.set_printoptions(precision=2)\n", + "\n", + "def BMI(weight, length):\n", + " # Function returning the BMI of input vectors m and l\n", + " return \n", + "\n", + "\n", + "m_example = [60, 72, 57, 90, 95, 72]\n", + "l_example = [1.75, 1.80, 1.65, 1.90, 1.74, 1.91]\n", + "print(\"The given weights and lengths result in BMI's: \\n{}\".format(BMI(m_example, l_example)))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Problem 3 - Using Pandas: Weather" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this problem you will have to make use of **Pandas** to load and analyse a small, fictional database weather.csv (the same as used in the Script).\n", + "The data containes the temperature in 4 cities over 6 months. Download the dataset and save it somewhere practically, possibly in the same folder as this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# First import Pandas\n", + "import pandas as pd\n", + "\n", + "# load the database using pandas.read_csv. The file location is different for everyone. If you made a new folder in this folder named \"data\", the path would be ./data/weather.csv\n", + "# Example: data = pd.read_csv(\"/data/weather.csv\")\n", + "data = None\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Viewing Data using Indices:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the names of rows and columns (not integer indices), find:\n", + "- Temperature in Basel for all months\n", + "- Temperature in Chur in February\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Temperature in Basel is:\n", + " None \n", + "\n", + "Temperature in Chur in February is:\n", + " None\n" + ] + } + ], + "source": [ + "t_basel = None\n", + "t_chur_feb = None\n", + "\n", + "print(\"Temperature in Basel is:\\n\", t_basel, \"\\n\")\n", + "print(\"Temperature in Chur in February is:\\n\", t_chur_feb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Changing Indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will look at the indices themselves. \n", + "- Find the name of column 3 using Python\n", + "- Change \"Basel\" to \"Bern\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "# Find the name of column 3:\n", + "name_c3 = None\n", + "print(name_c3)\n", + " \n", + "# Change \"Basel\" to \"Bern\"\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .mean() Method:\n", + "Now find :\n", + "- the average temperature in Chur (for the given Period)\n", + "- the average temperature in Spring in Switzerland (spring is Mar, Apr, May, mean of all cities)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average temperature in Chur was: None\n", + "Average temperature in Spring was: 0\n" + ] + } + ], + "source": [ + "t_chur_mean = None\n", + "\n", + "\n", + "t_spring_mean = 0\n", + "# Hint: mean() on a DataFrame gives the result in a Serries. The axis of the function to be applied on can be set with axis={index (0), columns (1)}.\n", + "# To find the mean of the whole matrix, the mean method can be applied twice\n", + "\n", + "\n", + "print(\"Average temperature in Chur was: {}\".format(t_chur_mean))\n", + "print(\"Average temperature in Spring was: {}\".format(round(t_spring_mean, 1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .sort_values() Method:\n", + "- Sort the data based on the temperature in Zürich\n", + "- Sort the data based on decreasing temperature in Basel" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Sort data based on Zurich" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Sort data based on Basel, descending with temperature" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Problem 4: Fuel Consumption\n", + "In this exercise, you will analyse a dataset containing information on Fuel consumption of cars in the eighties. The data contains 3 columns, the weight of the car in Pounds(1 Pound = 0.454 kg), the range in Miles per Gallon(1 mile=1.61 km; 1 gallon=3.79 l), and the type of car. Download the dataset and save it somewhere practically, possibly in the same folder as this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# First import Pandas\n", + "import pandas as pd\n", + "\n", + "# load the database using pandas.read_csv. The file location is different for everyone. If you made a new folder in this folder named \"data\", the path would be ./data/d.fuel.dat\n", + "# Example: data = pd.read_csv(\"/data/d.fuel.dat\")\n", + "data = None\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " To get a quick overview, we can view only the first 5 rows of the dataset. Print the first five rows using:\n", + " - **dataframe.loc**\n", + " - **DataFrame.head()** " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Print the first 5 rows using data.loc\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# print the first 5 rows using data.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .mean() Method:\n", + "Now find :\n", + "- the average range of all cars\n", + "- the average range of all cars with type \"Medium\" (hint, select all rows with a certain constraint using **DataFrame[DataFrame[** *column* **].isin([** *values* **])]**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average miles per galon is: \n", + "0 \n", + "Average miles per galon for all Medium type cars is: \n", + "0\n" + ] + } + ], + "source": [ + "avg_mpg = 0\n", + "avg_medium = 0\n", + "\n", + "print(\"Average miles per galon is: \\n{}\".format(round(avg_mpg, 2)), \"\\nAverage miles per galon for all Medium type cars is: \\n{}\".format(round(avg_medium,2)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conversion to SI units\n", + "- Create a Series containing the range in km/l and another Series containing the weight in kg.\n", + "- Find the average of these new Vectors\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "\n", + "Average Kilometer per liter is: \n", + "Series([], dtype: float64) \n", + "Average weight in kilogram is: \n", + "Series([], dtype: float64)\n" + ] + } + ], + "source": [ + "t_kml = pd.DataFrame()\n", + "t_kg = pd.DataFrame()\n", + "\n", + "print(t_kml.head())\n", + "print(t_kg.head())\n", + "\n", + "avg_kml = t_kml.mean()\n", + "avg_kg = t_kg.mean()\n", + "print(\"\\nAverage Kilometer per liter is: \\n{}\".format(round(avg_kml, 2)), \"\\nAverage weight in kilogram is: \\n{}\".format(round(avg_kg,2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/Block 0/Solution - Basics Numpy.ipynb b/notebooks/Block 0/Solution - Basics Numpy.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fd04a3a2c286252570ce427931eff90e4ede4242 --- /dev/null +++ b/notebooks/Block 0/Solution - Basics Numpy.ipynb @@ -0,0 +1,1507 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution to Problem 1" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the numpy library\n", + "import numpy as np\n", + "np.set_printoptions(precision=2)\n", + "\n", + "\n", + "######################################################\n", + "#\n", + "# MESSAGE TO STUDENTS:\n", + "#\n", + "# This file contains a solution to the coding quiz. Feel free\n", + "# to look at it when you are stuck, but try to solve the\n", + "# problem on your own first.\n", + "#\n", + "######################################################\n", + "\n", + "\n", + "def prepare_inputs(inputs):\n", + " # TODO: create a 2-dimensional ndarray from the given 1-dimensional list;\n", + " # assign it to input_array\n", + " input_array = np.array([inputs])\n", + " \n", + " # TODO: find the minimum value in input_array and subtract that\n", + " # value from all the elements of input_array. Store the\n", + " # result in inputs_minus_min\n", + " # We can use NumPy's min function and element-wise division\n", + " inputs_minus_min = input_array - np.min(input_array)\n", + "\n", + " # TODO: find the maximum value in inputs_minus_min and divide\n", + " # all of the values in inputs_minus_min by the maximum value.\n", + " # Store the results in inputs_div_max.\n", + " # We can use NumPy's max function and element-wise division\n", + " inputs_div_max = inputs_minus_min / np.max(inputs_minus_min)\n", + "\n", + " return input_array, inputs_minus_min, inputs_div_max\n", + " \n", + "\n", + "def multiply_inputs(m1, m2):\n", + " # Check the shapes of the matrices m1 and m2. \n", + " # m1 and m2 will be ndarray objects.\n", + " #\n", + " # Return False if the shapes cannot be used for matrix\n", + " # multiplication. You may not use a transpose\n", + " if m1.shape[0] != m2.shape[1] and m1.shape[1] != m2.shape[0]: \n", + " return False\n", + "\n", + " # Have not returned False, so calculate the matrix product\n", + " # of m1 and m2 and return it. Do not use a transpose,\n", + " # but you swap their order if necessary\n", + " if m1.shape[1] == m2.shape[0]:\n", + " return np.matmul(m1, m2) \n", + " else:\n", + " return np.matmul(m2, m1) \n", + "\n", + "\n", + "def find_mean(values):\n", + " # Return the average of the values in the given Python list\n", + " # NumPy has a lot of helpful methods like this.\n", + " return np.mean(values)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input as Array: [[-1 2 7]]\n", + "Input minus min: [[0 3 8]]\n", + "Input Array: [[0. 0.38 1. ]]\n", + "Multiply 1:\n", + "False\n", + "Multiply 2:\n", + "[[14]\n", + " [32]]\n", + "Multiply 3:\n", + "[[ 9 12 15]]\n", + "Mean == 2.6666666666666665\n" + ] + } + ], + "source": [ + "\n", + "input_array, inputs_minus_min, inputs_div_max = prepare_inputs([-1,2,7])\n", + "print(\"Input as Array: {}\".format(input_array))\n", + "print(\"Input minus min: {}\".format(inputs_minus_min))\n", + "print(\"Input Array: {}\".format(inputs_div_max))\n", + "\n", + "print(\"Multiply 1:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1],[2],[3],[4]]))))\n", + "print(\"Multiply 2:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1],[2],[3]]))))\n", + "print(\"Multiply 3:\\n{}\".format(multiply_inputs(np.array([[1,2,3],[4,5,6]]), np.array([[1,2]]))))\n", + "\n", + "print(\"Mean == {}\".format(find_mean([1,3,4])))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution to Problem 2 - Body Mass Index" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The given weights and lengths result in BMI's: \n", + "[19.59 22.22 20.94 24.93 31.38 19.74]\n" + ] + } + ], + "source": [ + "# import numpy\n", + "import numpy as np\n", + "np.set_printoptions(precision=2)\n", + "\n", + "def BMI(weight, length):\n", + " # Function returning the BMI of input vectors m and l\n", + " m = np.array(weight) # Create arrays from inputs\n", + " l = np.array(length)\n", + " \n", + " # Calculate all BMI's simultaniously\n", + " BMI = np.divide(m, l**2) \n", + " # Or just\n", + " BMI = m/l**2 \n", + " \n", + " return BMI\n", + "\n", + "m_example = [60, 72, 57, 90, 95, 72]\n", + "l_example = [1.75, 1.80, 1.65, 1.90, 1.74, 1.91]\n", + "print(\"The given weights and lengths result in BMI's: \\n{}\".format(BMI(m_example, l_example)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution to Problem 3 - Using Pandas: Weather" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Luzern</th>\n", + " <th>Basel</th>\n", + " <th>Chur</th>\n", + " <th>Zurich</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Jan</th>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>-3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Feb</th>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Mar</th>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>13</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Apr</th>\n", + " <td>16</td>\n", + " <td>12</td>\n", + " <td>14</td>\n", + " <td>17</td>\n", + " </tr>\n", + " <tr>\n", + " <th>May</th>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>21</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jun</th>\n", + " <td>25</td>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>27</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Luzern Basel Chur Zurich\n", + "Jan 2 5 -3 4\n", + "Feb 5 6 1 0\n", + "Mar 10 11 13 8\n", + "Apr 16 12 14 17\n", + "May 21 23 21 20\n", + "Jun 25 21 23 27" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# First import Pandas\n", + "import pandas as pd\n", + "\n", + "# load the database using pandas.read_csv \n", + "data = pd.read_csv(\"./Daten_Serie_1/weather.csv\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Viewing Data using Indices:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Temperature in Basel is:\n", + " Jan 5\n", + "Feb 6\n", + "Mar 11\n", + "Apr 12\n", + "May 23\n", + "Jun 21\n", + "Name: Basel, dtype: int64 \n", + "\n", + "Temperature in Chur in February is:\n", + " 1\n" + ] + } + ], + "source": [ + "t_basel = data[\"Basel\"]\n", + "t_chur_feb = data.loc[\"Feb\", \"Chur\"]\n", + "\n", + "print(\"Temperature in Basel is:\\n\", t_basel, \"\\n\")\n", + "print(\"Temperature in Chur in February is:\\n\", t_chur_feb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Changing Indices" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Zurich\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Luzern</th>\n", + " <th>Bern</th>\n", + " <th>Chur</th>\n", + " <th>Zurich</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Jan</th>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>-3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Feb</th>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Mar</th>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>13</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Apr</th>\n", + " <td>16</td>\n", + " <td>12</td>\n", + " <td>14</td>\n", + " <td>17</td>\n", + " </tr>\n", + " <tr>\n", + " <th>May</th>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>21</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jun</th>\n", + " <td>25</td>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>27</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Luzern Bern Chur Zurich\n", + "Jan 2 5 -3 4\n", + "Feb 5 6 1 0\n", + "Mar 10 11 13 8\n", + "Apr 16 12 14 17\n", + "May 21 23 21 20\n", + "Jun 25 21 23 27" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Find the name of column 3:\n", + "all_columns = data.columns\n", + "name_c3 = all_columns[3]\n", + "# Oder \n", + "name_c3 = data.columns.values[3]\n", + "print(name_c3)\n", + " \n", + "# Change \"Basel\" to \"Bern\"\n", + "data.columns.values[1] = \"Bern\"\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .mean() Method:\n", + "Now find :\n", + "- the average temperature in Chur (for the given Period)\n", + "- the average temperature in Spring in Switzerland (spring is Mar, Apr, May, mean of all cities)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average temperature in Chur was: 11.5\n", + "Average temperature in Spring was: 15.5\n" + ] + } + ], + "source": [ + "t_chur_mean = data.loc[:, \"Chur\"].mean()\n", + "t_spring_mean = data.loc[\"Mar\":\"May\", :].mean().mean()\n", + "# Hint: mean() on a DataFrame gives the result in a Serries. The axis of the function to be applied on can be set with axis={index (0), columns (1)}.\n", + "# To find the mean of the whole matrix, the mean method can be applied twice\n", + "\n", + "\n", + "print(\"Average temperature in Chur was: {}\".format(t_chur_mean))\n", + "print(\"Average temperature in Spring was: {}\".format(round(t_spring_mean, 1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .sort_values() Method:\n", + "- Sort the data based on the temperature in Zürich\n", + "- Sort the data based on decreasing temperature in Basel" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Luzern</th>\n", + " <th>Bern</th>\n", + " <th>Chur</th>\n", + " <th>Zurich</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Feb</th>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jan</th>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>-3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Mar</th>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>13</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Apr</th>\n", + " <td>16</td>\n", + " <td>12</td>\n", + " <td>14</td>\n", + " <td>17</td>\n", + " </tr>\n", + " <tr>\n", + " <th>May</th>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>21</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jun</th>\n", + " <td>25</td>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>27</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Luzern Bern Chur Zurich\n", + "Feb 5 6 1 0\n", + "Jan 2 5 -3 4\n", + "Mar 10 11 13 8\n", + "Apr 16 12 14 17\n", + "May 21 23 21 20\n", + "Jun 25 21 23 27" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sort_values(\"Zurich\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Luzern</th>\n", + " <th>Bern</th>\n", + " <th>Chur</th>\n", + " <th>Zurich</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>May</th>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>21</td>\n", + " <td>20</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jun</th>\n", + " <td>25</td>\n", + " <td>21</td>\n", + " <td>23</td>\n", + " <td>27</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Apr</th>\n", + " <td>16</td>\n", + " <td>12</td>\n", + " <td>14</td>\n", + " <td>17</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Mar</th>\n", + " <td>10</td>\n", + " <td>11</td>\n", + " <td>13</td>\n", + " <td>8</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Feb</th>\n", + " <td>5</td>\n", + " <td>6</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Jan</th>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " <td>-3</td>\n", + " <td>4</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Luzern Bern Chur Zurich\n", + "May 21 23 21 20\n", + "Jun 25 21 23 27\n", + "Apr 16 12 14 17\n", + "Mar 10 11 13 8\n", + "Feb 5 6 1 0\n", + "Jan 2 5 -3 4" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.sort_values(\"Basel\", ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution to Problem 4: Fuel Consumption" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>6</td>\n", + " <td>2285</td>\n", + " <td>26</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>7</td>\n", + " <td>2275</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>8</td>\n", + " <td>2350</td>\n", + " <td>28</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>9</td>\n", + " <td>2295</td>\n", + " <td>25</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>10</td>\n", + " <td>1900</td>\n", + " <td>34</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>11</td>\n", + " <td>2390</td>\n", + " <td>29</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>12</td>\n", + " <td>2075</td>\n", + " <td>35</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>13</td>\n", + " <td>2330</td>\n", + " <td>26</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>14</td>\n", + " <td>3320</td>\n", + " <td>20</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>15</td>\n", + " <td>2885</td>\n", + " <td>27</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>16</td>\n", + " <td>3310</td>\n", + " <td>19</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>17</td>\n", + " <td>2695</td>\n", + " <td>30</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>18</td>\n", + " <td>2170</td>\n", + " <td>33</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>19</td>\n", + " <td>2710</td>\n", + " <td>27</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>20</td>\n", + " <td>2775</td>\n", + " <td>24</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>21</td>\n", + " <td>2840</td>\n", + " <td>26</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>22</td>\n", + " <td>2485</td>\n", + " <td>28</td>\n", + " <td>Sporty</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>23</td>\n", + " <td>2670</td>\n", + " <td>27</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>24</td>\n", + " <td>2640</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>25</td>\n", + " <td>2655</td>\n", + " <td>26</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>26</td>\n", + " <td>3065</td>\n", + " <td>25</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>27</td>\n", + " <td>2750</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>28</td>\n", + " <td>2920</td>\n", + " <td>26</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>29</td>\n", + " <td>2780</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>30</td>\n", + " <td>2745</td>\n", + " <td>25</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>31</td>\n", + " <td>3110</td>\n", + " <td>21</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>32</td>\n", + " <td>2920</td>\n", + " <td>21</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>33</td>\n", + " <td>2645</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>34</td>\n", + " <td>2575</td>\n", + " <td>24</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>35</td>\n", + " <td>2935</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>36</td>\n", + " <td>2920</td>\n", + " <td>27</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>37</td>\n", + " <td>2985</td>\n", + " <td>23</td>\n", + " <td>Compact</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>38</td>\n", + " <td>3265</td>\n", + " <td>20</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>39</td>\n", + " <td>2880</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>40</td>\n", + " <td>2975</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>41</td>\n", + " <td>3450</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>42</td>\n", + " <td>3145</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>43</td>\n", + " <td>3190</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>44</td>\n", + " <td>3610</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>45</td>\n", + " <td>2885</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>46</td>\n", + " <td>3480</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>47</td>\n", + " <td>3200</td>\n", + " <td>22</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>48</td>\n", + " <td>2765</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>49</td>\n", + " <td>3220</td>\n", + " <td>21</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>50</td>\n", + " <td>3480</td>\n", + " <td>23</td>\n", + " <td>Medium</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50</th>\n", + " <td>51</td>\n", + " <td>3325</td>\n", + " <td>23</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>51</th>\n", + " <td>52</td>\n", + " <td>3855</td>\n", + " <td>18</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>52</th>\n", + " <td>53</td>\n", + " <td>3850</td>\n", + " <td>20</td>\n", + " <td>Large</td>\n", + " </tr>\n", + " <tr>\n", + " <th>53</th>\n", + " <td>54</td>\n", + " <td>3195</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>54</th>\n", + " <td>55</td>\n", + " <td>3735</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>55</th>\n", + " <td>56</td>\n", + " <td>3665</td>\n", + " <td>18</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>56</th>\n", + " <td>57</td>\n", + " <td>3735</td>\n", + " <td>19</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>57</th>\n", + " <td>58</td>\n", + " <td>3415</td>\n", + " <td>20</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>58</th>\n", + " <td>59</td>\n", + " <td>3185</td>\n", + " <td>20</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " <tr>\n", + " <th>59</th>\n", + " <td>60</td>\n", + " <td>3690</td>\n", + " <td>19</td>\n", + " <td>Van</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "0 1 2560 33 Small\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small\n", + "5 6 2285 26 Small\n", + "6 7 2275 33 Small\n", + "7 8 2350 28 Small\n", + "8 9 2295 25 Small\n", + "9 10 1900 34 Small\n", + "10 11 2390 29 Small\n", + "11 12 2075 35 Small\n", + "12 13 2330 26 Small\n", + "13 14 3320 20 Sporty\n", + "14 15 2885 27 Sporty\n", + "15 16 3310 19 Sporty\n", + "16 17 2695 30 Sporty\n", + "17 18 2170 33 Sporty\n", + "18 19 2710 27 Sporty\n", + "19 20 2775 24 Sporty\n", + "20 21 2840 26 Sporty\n", + "21 22 2485 28 Sporty\n", + "22 23 2670 27 Compact\n", + "23 24 2640 23 Compact\n", + "24 25 2655 26 Compact\n", + "25 26 3065 25 Compact\n", + "26 27 2750 24 Compact\n", + "27 28 2920 26 Compact\n", + "28 29 2780 24 Compact\n", + "29 30 2745 25 Compact\n", + "30 31 3110 21 Compact\n", + "31 32 2920 21 Compact\n", + "32 33 2645 23 Compact\n", + "33 34 2575 24 Compact\n", + "34 35 2935 23 Compact\n", + "35 36 2920 27 Compact\n", + "36 37 2985 23 Compact\n", + "37 38 3265 20 Medium\n", + "38 39 2880 21 Medium\n", + "39 40 2975 22 Medium\n", + "40 41 3450 22 Medium\n", + "41 42 3145 22 Medium\n", + "42 43 3190 22 Medium\n", + "43 44 3610 23 Medium\n", + "44 45 2885 23 Medium\n", + "45 46 3480 21 Medium\n", + "46 47 3200 22 Medium\n", + "47 48 2765 21 Medium\n", + "48 49 3220 21 Medium\n", + "49 50 3480 23 Medium\n", + "50 51 3325 23 Large\n", + "51 52 3855 18 Large\n", + "52 53 3850 20 Large\n", + "53 54 3195 18 Van\n", + "54 55 3735 18 Van\n", + "55 56 3665 18 Van\n", + "56 57 3735 19 Van\n", + "57 58 3415 20 Van\n", + "58 59 3185 20 Van\n", + "59 60 3690 19 Van" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# First import Pandas\n", + "import pandas as pd\n", + "\n", + "# load the database using pandas.read_csv with options: sep=\",\" and index_col=0\n", + "# data = None\n", + "data = pd.read_csv(\"./Daten_Serie_1/d.fuel.dat\")\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " To get a quick overview, we can view only the first 5 rows of the dataset. Print the first five rows using:\n", + " - **dataframe.loc**\n", + " - **DataFrame.head()** " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>6</td>\n", + " <td>2285</td>\n", + " <td>26</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small\n", + "5 6 2285 26 Small" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Print the first 5 rows using data.loc\n", + "data.loc[1:5,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>X</th>\n", + " <th>weight</th>\n", + " <th>mpg</th>\n", + " <th>type</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>2560</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2</td>\n", + " <td>2345</td>\n", + " <td>33</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>3</td>\n", + " <td>1845</td>\n", + " <td>37</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>4</td>\n", + " <td>2260</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>5</td>\n", + " <td>2440</td>\n", + " <td>32</td>\n", + " <td>Small</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " X weight mpg type\n", + "0 1 2560 33 Small\n", + "1 2 2345 33 Small\n", + "2 3 1845 37 Small\n", + "3 4 2260 32 Small\n", + "4 5 2440 32 Small" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# print the first 5 rows using data.head()\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### .mean() Method:\n", + "Now find :\n", + "- the average range of all cars\n", + "- the average range of all cars with type \"Medium\" (hint, select all rows with a certain constraint using **DataFrame[DataFrame[** *column* **].isin([** *values* **])]**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average miles per galon is: \n", + "24.58 \n", + "Average miles per galon for all Medium type cars is: \n", + "21.77\n" + ] + } + ], + "source": [ + "data.mean() # Gives all averages\n", + "avg_mpg = data.mean()[\"mpg\"] # Gives the average mpg\n", + "\n", + "avg_medium = data[data[\"type\"].isin([\"Medium\"])].mean()[\"mpg\"]\n", + "\n", + "print(\"Average miles per galon is: \\n{}\".format(round(avg_mpg, 2)), \"\\nAverage miles per galon for all Medium type cars is: \\n{}\".format(round(avg_medium,2)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conversion to SI units\n", + "- Create a Series containing the range in km/l and another Series containing the weight in kg.\n", + "- Find the average of these new Vectors\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 14.018470\n", + "1 14.018470\n", + "2 15.717678\n", + "3 13.593668\n", + "4 13.593668\n", + "Name: mpg, dtype: float64\n", + "0 1162.24\n", + "1 1064.63\n", + "2 837.63\n", + "3 1026.04\n", + "4 1107.76\n", + "Name: weight, dtype: float64\n", + "\n", + "Average Kilometer per liter is: \n", + "10.44 \n", + "Average weight in kilogram is: \n", + "1316.98\n" + ] + } + ], + "source": [ + "t_kml = data[\"mpg\"]*1.61/3.79\n", + "t_kg = data[\"weight\"]*0.454\n", + "\n", + "print(t_kml.head())\n", + "print(t_kg.head())\n", + "\n", + "avg_kml = t_kml.mean()\n", + "avg_kg = t_kg.mean()\n", + "print(\"\\nAverage Kilometer per liter is: \\n{}\".format(round(avg_kml, 2)), \"\\nAverage weight in kilogram is: \\n{}\".format(round(avg_kg,2)))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/requirements.txt b/requirements.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..30d634363c8c94c1fe5412d74cedcc061c4e5571 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,5 @@ +numpy==1.19.1 +pandas==1.1.3 +matplotlib==3.3.1 +opencv-python==4.1.2.30 +tensorflow==2.1.0 \ No newline at end of file