From 4a890dfc731439531e9d8f6aa215464741a0dc05 Mon Sep 17 00:00:00 2001 From: Mirko Birbaumer <mirko.birbaumer@hslu.ch> Date: Fri, 16 Sep 2022 11:34:05 +0000 Subject: [PATCH] Added code for Numpy Intro --- .../Preliminaries_Numpy_Pandas.ipynb | 710 ++++++++++++++++-- 1 file changed, 638 insertions(+), 72 deletions(-) diff --git a/notebooks/Block_0/Examples script/Preliminaries_Numpy_Pandas.ipynb b/notebooks/Block_0/Examples script/Preliminaries_Numpy_Pandas.ipynb index 0fcbd39..eb29636 100644 --- a/notebooks/Block_0/Examples script/Preliminaries_Numpy_Pandas.ipynb +++ b/notebooks/Block_0/Examples script/Preliminaries_Numpy_Pandas.ipynb @@ -4,16 +4,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.2 Funktions, Conditionals, and Iteration in Python\n", + "## Functions, Conditionals, and Iteration in Python\n", "\n", "Let us create a Python function, and call it from a loop." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello World, x was < 10\n", + "3\n" + ] + } + ], "source": [ "def HelloWorldXY(x, y):\n", " if (x < 10):\n", @@ -24,7 +33,7 @@ " print(\"Hello World, x was >= 20\")\n", " return x + y\n", "\n", - "print(HelloWorldXY(1,2))\n" + "print(HelloWorldXY(1,2))" ] }, { @@ -36,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -44,18 +53,21 @@ "output_type": "stream", "text": [ "\n", - "--- Now running with i: 8\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'HelloWorldXY' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-1-7ea2350df544>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m25\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# i=8, 13, 18, 23 (start, stop, step)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n--- Now running with i: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mHelloWorldXY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Result from HelloWorld: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'HelloWorldXY' is not defined" + "--- Now running with i: 8\n", + "Hello World, x was < 10\n", + "Result from HelloWorld: 16\n", + "\n", + "--- Now running with i: 13\n", + "Hello World, x was >= 10 but < 20\n", + "Result from HelloWorld: 26\n", + "\n", + "--- Now running with i: 18\n", + "Hello World, x was >= 10 but < 20\n", + "Result from HelloWorld: 36\n", + "\n", + "--- Now running with i: 23\n", + "Hello World, x was >= 20\n", + "Result from HelloWorld: 46\n" ] } ], @@ -75,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -87,13 +99,7 @@ "1\n", "Iterate over an actual list.\n", "0\n", - "1\n", - "While works\n", - "0\n", - "1\n", - "Python supports standard key words like continue and break\n", - "Entered while\n", - "while broken\n" + "1\n" ] } ], @@ -104,8 +110,28 @@ "\n", "print(\"Iterate over an actual list.\")\n", "for i in [0,1]:\n", - " print(i)\n", - "\n", + " print(i)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "While works\n", + "0\n", + "1\n", + "Python supports standard key words like continue and break\n", + "Entered while\n", + "while broken\n" + ] + } + ], + "source": [ "print(\"While works\")\n", "i = 0\n", "while i < 2:\n", @@ -123,63 +149,603 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3.3 Data in Numpy" + "## NumPy\n", + "\n", + "### Introducing NumPy\n", + "\n", + "Python is convenient, but it can also be slow. However, it does \n", + "allow you to access libraries that execute faster code written in \n", + "languages like C. NumPy is one such library: it provides fast alternatives \n", + "to math operations in Python and is designed to work efficiently with \n", + "groups of numbers - like matrices.\n", + "\n", + "NumPy is a large library and we are only going to scratch the surface \n", + "of it here. If you plan on doing much math with Python, you should \n", + "definitely spend some time exploring its documentation to learn more.\n", + "\n", + "### Importing Numpy\n", + "\n", + "When importing the NumPy library, the convention you will see \n", + "used most often - including here - is to name it `np`, like so:" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can use the library by prefixing the names of functions and \n", + "types with `np`, which you will see in the following examples.\n", + "\n", + "### Data Types and Shapes\n", + "\n", + "The most common way to work with numbers in NumPy is through `ndarray` \n", + "objects. They are similar to Python lists, but can have any number of \n", + "dimensions. Also, `ndarray` supports fast math operations, which \n", + "is just what we want.\n", + "\n", + "Since it can store any number of dimensions, you can use `ndarrays` \n", + "to represent any of the data types : scalars, vectors, \n", + "matrices, or tensors. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scalars\n", + "\n", + "Scalars in NumPy are a bit more involved than in Python. Instead of \n", + "Python's basic types like `int`, `float`, etc., NumPy lets \n", + "you specify signed and unsigned types, as well as different sizes.\n", + "So instead of Python's `int`, you have access to types \n", + "like `uint8`, `int8`, `uint16`, `int16`, and so on.\n", + "\n", + "These types are important because every object you make \n", + "(vectors, matrices, tensors) eventually stores scalars. And when you \n", + "create a NumPy array, you can specify the type - _but every item in the \n", + "array must have the same type_. In this regard, NumPy arrays are more \n", + "like C arrays than Python lists.\n", + "\n", + "If you want to create a NumPy array that holds a scalar, you do so \n", + "by passing the value to NumPy's `array` function, as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape scaler () \n", - "Shape vector (3,) \n", - "Shape matrix (3, 3) \n", - "Shape tensor (3, 3, 2, 1)\n", - "Type scalar or array <class 'numpy.ndarray'> \n", - "Type after addition with integer <class 'numpy.int64'>\n", - "v[1:] = [ 2 10] \n", - "m[1:][2:] = \n", - " [[5 6]\n", - " [8 9]]\n", - "[ 1 2 10] [[ 1 2 10]] [[ 1 2 10]]\n", - "(3,) (1, 3) (1, 3)\n" - ] + "data": { + "text/plain": [ + "array(5)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "import numpy as np\n", - "\n", "# Scalar\n", "s = np.array(5)\n", - "# Vector\n", - "v = np.array([1, 2, 10])\n", - "# Matrix\n", - "m = np.array([[1,2,3], \n", - " [4,5,6], \n", - " [7,8,9]])\n", - "# Tensor:\n", - "t = np.array([[[[1],[2]], [[3],[4]], [[5],[6]]],\n", - " [[[7],[8]], [[9],[10]], [[11],[12]]],\n", - " [[[13],[14]], [[15],[16]], [[17],[17]]]])\n", - "\n", - "# Shape\n", - "print(\"Shape scaler\", s.shape, \"\\nShape vector\", v.shape, \"\\nShape matrix\", m.shape, \"\\nShape tensor\", t.shape)\n", - "\n", - "# Type\n", - "print(\"Type scalar or array\", type(s), \"\\nType after addition with integer\", type(s + 3))\n", - "\n", - "# Slicing\n", - "print(\"v[1:] = \", v[1:], \"\\nm[1:][2:] = \\n\", m[1:,1:])\n", - "\n", - "# Reshape arrays\n", - "x = v.reshape(1, 3)\n", - "y = v[None, :]\n", - "print(v, x, y)\n", - "print(v.shape, x.shape, y.shape)\n" + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can display the number of axes of a NumPy `array` via the `ndim` attribute;\n", + "a scalar array has $0$ axes (`ndim` == 0). The number of axes of an array is also\n", + "called its _rank_. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see the shape of your arrays by checking their `shape` attribute. So if \n", + "you executed this code:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "()" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "it would print out the result, an empty pair of parenthesis, `()`. This \n", + "indicates that it has zero dimensions.\n", + "\n", + "Even though scalars are inside arrays, you still use them like a normal scalar. \n", + "So you could type:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = s + 3\n", + "type(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "and $x$ would now equal $8$. If you were to check the type of \n", + "$x$, you would find it is probably `numPy.int64`, because \n", + "it is working with NumPy types, not Python types.\n", + "\n", + "By the way, even scalar types support most of the array functions. \n", + "So you can call `x.shape` and it would return `()` because \n", + "it has zero dimensions, even though it is not an array. If you tried \n", + "that with a normal Python scalar, you would get an error." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vectors\n", + "\n", + "To create a vector, you would pass a Python list to the `array` function, like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v = np.array([1,2,3])\n", + "v" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This vector has three entries and so is called a 3-dimensional vector. If you check a vector's `shape` attribute, it will return a single number representing the \n", + "vector's one-dimensional length. In the above example, " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3,)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Don’t confuse a 3D vector with a 3D array. A 3D vector has only one axis and has three dimensions along its axis, whereas a 3D array has three axes (and may have any number of dimensions along each axis). " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dimensionality can denote either the number of entries along a specific\n", + "axis (as in the case of our 3D vector) or the number of axes in an array (such as a\n", + "3D array), which can be confusing at times.\n", + "\n", + "You can access an element within the vector using indices, like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NumPy also supports advanced indexing techniques. For example, to access the items from the \n", + "second element onward, you would say:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2, 3])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v[1:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NumPy slicing is quite powerful, \n", + "allowing you to access any combination of items in an `ndarray`. But it can also be a bit complicated, \n", + "so you should read up on it in the documentation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Matrices\n", + "\n", + "You create matrices using `NumPy`'s array function, just you did for vectors. However, instead \n", + "of just passing in a list, you need to supply a list of lists, where each list represents \n", + "a row. So to create a $3\\times 3$ matrix containing the numbers one through nine, you could \n", + "do this:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m = np.array([[1,2,3], [4,5,6], [7,8,9]])\n", + "m" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The entries from the first axis are called the _rows_, and the entries from \n", + "the second axis are called the _columns_. A matrix thus has two axes or _rank_ 2:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Checking its shape attribute would return the tuple `(3, 3)` to indicate it has two dimensions, each length $3$:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 3)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can access elements of matrices just like vectors, but using additional index values. So to find \n", + "the number $6$ in the above matrix, you would access" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m[1][2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tensors\n", + "\n", + "Tensors are just like vectors and matrices, but they can have more dimensions. For example, to \n", + "create a $3\\times 3\\times 2\\times 1$ tensor, you could do the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[[ 1],\n", + " [ 2]],\n", + "\n", + " [[ 3],\n", + " [ 4]],\n", + "\n", + " [[ 5],\n", + " [ 6]]],\n", + "\n", + "\n", + " [[[ 7],\n", + " [ 8]],\n", + "\n", + " [[ 9],\n", + " [10]],\n", + "\n", + " [[11],\n", + " [12]]],\n", + "\n", + "\n", + " [[[13],\n", + " [14]],\n", + "\n", + " [[15],\n", + " [16]],\n", + "\n", + " [[17],\n", + " [17]]]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t = np.array([[[[1],[2]],[[3],[4]],[[5],[6]]],[[[7],[8]],\\\n", + " [[9],[10]],[[11],[12]]],[[[13],[14]],[[15],[16]],[[17],[17]]]])\n", + "t" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And `t.shape` returns `(3, 3, 2, 1)` and `t.ndim` indicates that we are dealing with a rank 4 tensor." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 3, 2, 1)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can access items just like with matrices, but with more indices. So " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "16" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t[2][1][1][0]" ] }, { -- GitLab