diff --git a/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb b/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb index 8f8f6b84c47fe19560e937a93d7737c443a8023a..6315722a0aef24b727b2686257d65699ba46b57c 100644 --- a/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb +++ b/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb @@ -550,7 +550,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -569,12 +569,13 @@ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Conv2D, MaxPooling2D\n", "from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense\n", - "from tensorflow.keras.callbacks import TensorBoard " + "from tensorflow.keras.callbacks import TensorBoard \n", + "from tensorflow.keras import layers" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -584,7 +585,7 @@ "<IPython.core.display.Image object>" ] }, - "execution_count": 6, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -856,7 +857,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -887,9 +888,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(480, 4, 4, 512)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "train_features.shape" ] @@ -903,18 +915,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(480, 8)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "train_labels.shape" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(80, 4, 4, 512)\n", + "(80, 8)\n" + ] + } + ], "source": [ "print(val_features.shape)\n", "print(val_labels.shape)" @@ -922,7 +954,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -939,18 +971,111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " input_3 (InputLayer) [(None, 4, 4, 512)] 0 \n", + " \n", + " flatten (Flatten) (None, 8192) 0 \n", + " \n", + " dense (Dense) (None, 256) 2097408 \n", + " \n", + " dropout (Dropout) (None, 256) 0 \n", + " \n", + " dense_1 (Dense) (None, 8) 2056 \n", + " \n", + "=================================================================\n", + "Total params: 2,099,464\n", + "Trainable params: 2,099,464\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], "source": [ "model.summary()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/30\n", + "15/15 [==============================] - 2s 121ms/step - loss: 45.5870 - accuracy: 0.3187 - val_loss: 24.1916 - val_accuracy: 0.3875\n", + "Epoch 2/30\n", + "15/15 [==============================] - 1s 91ms/step - loss: 15.6600 - accuracy: 0.6167 - val_loss: 18.9074 - val_accuracy: 0.4750\n", + "Epoch 3/30\n", + "15/15 [==============================] - 1s 90ms/step - loss: 10.6316 - accuracy: 0.7083 - val_loss: 19.3461 - val_accuracy: 0.5375\n", + "Epoch 4/30\n", + "15/15 [==============================] - 1s 104ms/step - loss: 10.1559 - accuracy: 0.7333 - val_loss: 29.1282 - val_accuracy: 0.5000\n", + "Epoch 5/30\n", + "15/15 [==============================] - 1s 88ms/step - loss: 6.2820 - accuracy: 0.7917 - val_loss: 26.4884 - val_accuracy: 0.5500\n", + "Epoch 6/30\n", + "15/15 [==============================] - 1s 80ms/step - loss: 4.9714 - accuracy: 0.8542 - val_loss: 23.7884 - val_accuracy: 0.5250\n", + "Epoch 7/30\n", + "15/15 [==============================] - 1s 89ms/step - loss: 4.7241 - accuracy: 0.8542 - val_loss: 25.3600 - val_accuracy: 0.5375\n", + "Epoch 8/30\n", + "15/15 [==============================] - 1s 98ms/step - loss: 4.3037 - accuracy: 0.8729 - val_loss: 28.7259 - val_accuracy: 0.4875\n", + "Epoch 9/30\n", + "15/15 [==============================] - 1s 89ms/step - loss: 2.3747 - accuracy: 0.8979 - val_loss: 30.9324 - val_accuracy: 0.4875\n", + "Epoch 10/30\n", + "15/15 [==============================] - 1s 89ms/step - loss: 2.8623 - accuracy: 0.9000 - val_loss: 28.1495 - val_accuracy: 0.5000\n", + "Epoch 11/30\n", + "15/15 [==============================] - 1s 95ms/step - loss: 2.3595 - accuracy: 0.9187 - val_loss: 35.2087 - val_accuracy: 0.4375\n", + "Epoch 12/30\n", + "15/15 [==============================] - 1s 84ms/step - loss: 2.6335 - accuracy: 0.9104 - val_loss: 30.2336 - val_accuracy: 0.5000\n", + "Epoch 13/30\n", + "15/15 [==============================] - 1s 89ms/step - loss: 3.3222 - accuracy: 0.9083 - val_loss: 32.3224 - val_accuracy: 0.4875\n", + "Epoch 14/30\n", + "15/15 [==============================] - 1s 100ms/step - loss: 1.9744 - accuracy: 0.9312 - val_loss: 29.7901 - val_accuracy: 0.5125\n", + "Epoch 15/30\n", + "15/15 [==============================] - 1s 83ms/step - loss: 2.5093 - accuracy: 0.9396 - val_loss: 28.1908 - val_accuracy: 0.5625\n", + "Epoch 16/30\n", + "15/15 [==============================] - 1s 90ms/step - loss: 2.2067 - accuracy: 0.9271 - val_loss: 26.1439 - val_accuracy: 0.5375\n", + "Epoch 17/30\n", + "15/15 [==============================] - 1s 89ms/step - loss: 1.4664 - accuracy: 0.9479 - val_loss: 27.0301 - val_accuracy: 0.5500\n", + "Epoch 18/30\n", + "15/15 [==============================] - 1s 96ms/step - loss: 1.9414 - accuracy: 0.9333 - val_loss: 28.5395 - val_accuracy: 0.5375\n", + "Epoch 19/30\n", + "15/15 [==============================] - 1s 85ms/step - loss: 0.8723 - accuracy: 0.9583 - val_loss: 32.3182 - val_accuracy: 0.5125\n", + "Epoch 20/30\n", + "15/15 [==============================] - 1s 88ms/step - loss: 0.7459 - accuracy: 0.9604 - val_loss: 34.9039 - val_accuracy: 0.4250\n", + "Epoch 21/30\n", + "15/15 [==============================] - 1s 92ms/step - loss: 1.0338 - accuracy: 0.9583 - val_loss: 32.4511 - val_accuracy: 0.5125\n", + "Epoch 22/30\n", + "15/15 [==============================] - 1s 85ms/step - loss: 1.3923 - accuracy: 0.9458 - val_loss: 35.8044 - val_accuracy: 0.4750\n", + "Epoch 23/30\n", + "15/15 [==============================] - 1s 88ms/step - loss: 0.8510 - accuracy: 0.9667 - val_loss: 41.1432 - val_accuracy: 0.5000\n", + "Epoch 24/30\n", + "15/15 [==============================] - 1s 101ms/step - loss: 1.3980 - accuracy: 0.9563 - val_loss: 37.2629 - val_accuracy: 0.4875\n", + "Epoch 25/30\n", + "15/15 [==============================] - 1s 92ms/step - loss: 0.8034 - accuracy: 0.9708 - val_loss: 36.0811 - val_accuracy: 0.4750\n", + "Epoch 26/30\n", + "15/15 [==============================] - 1s 86ms/step - loss: 1.5871 - accuracy: 0.9667 - val_loss: 34.1562 - val_accuracy: 0.5625\n", + "Epoch 27/30\n", + "15/15 [==============================] - 1s 104ms/step - loss: 1.5226 - accuracy: 0.9521 - val_loss: 35.3834 - val_accuracy: 0.5000\n", + "Epoch 28/30\n", + "15/15 [==============================] - 1s 91ms/step - loss: 1.0952 - accuracy: 0.9625 - val_loss: 37.5495 - val_accuracy: 0.4750\n", + "Epoch 29/30\n", + "15/15 [==============================] - 1s 83ms/step - loss: 1.4550 - accuracy: 0.9417 - val_loss: 37.8654 - val_accuracy: 0.4625\n", + "Epoch 30/30\n", + "15/15 [==============================] - 1s 101ms/step - loss: 0.8663 - accuracy: 0.9750 - val_loss: 33.1618 - val_accuracy: 0.5750\n" + ] + } + ], "source": [ "model.compile(loss=\"categorical_crossentropy\",\n", " optimizer=\"rmsprop\",\n", @@ -997,9 +1122,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "plt.plot(history.history['accuracy'])\n", "plt.plot(history.history['val_accuracy'])\n", @@ -1019,18 +1169,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3/3 [==============================] - 0s 9ms/step - loss: 33.1618 - accuracy: 0.5750\n" + ] + }, + { + "data": { + "text/plain": [ + "[33.161827087402344, 0.574999988079071]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "model.evaluate(validation_dataset)" + "model.evaluate(val_features, val_labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We reach a validation accuracy of about 46% — much worse than we achieved in the\n", + "We reach a validation accuracy of about 57% — much worse than we achieved in the\n", "previous section with the small model trained from scratch. \n", "\n", "The learning curves indicate that we’re overfitting almost from the start—\n", @@ -1096,7 +1264,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -1120,7 +1288,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -1129,7 +1297,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -1146,7 +1314,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -1155,7 +1323,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -1192,7 +1360,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -1207,7 +1375,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -1250,7 +1418,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -1265,7 +1433,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1273,105 +1441,33 @@ "output_type": "stream", "text": [ "Epoch 1/50\n", - "15/15 [==============================] - 170s 11s/step - loss: 40.4872 - accuracy: 0.3104 - val_loss: 22.2523 - val_accuracy: 0.4375\n", + "15/15 [==============================] - 169s 11s/step - loss: 81.2648 - accuracy: 0.5354 - val_loss: 96.7737 - val_accuracy: 0.1250\n", "Epoch 2/50\n", - "15/15 [==============================] - 163s 11s/step - loss: 16.3431 - accuracy: 0.5542 - val_loss: 23.0516 - val_accuracy: 0.4875\n", + "15/15 [==============================] - 166s 11s/step - loss: 35.0252 - accuracy: 0.6292 - val_loss: 102.6046 - val_accuracy: 0.2375\n", "Epoch 3/50\n", - "15/15 [==============================] - 162s 11s/step - loss: 16.4505 - accuracy: 0.6062 - val_loss: 30.8938 - val_accuracy: 0.4125\n", + "15/15 [==============================] - 167s 11s/step - loss: 37.1056 - accuracy: 0.5312 - val_loss: 103.8217 - val_accuracy: 0.2000\n", "Epoch 4/50\n", - "15/15 [==============================] - 165s 11s/step - loss: 14.6992 - accuracy: 0.6229 - val_loss: 21.7834 - val_accuracy: 0.5125\n", + "15/15 [==============================] - 168s 11s/step - loss: 26.6293 - accuracy: 0.6083 - val_loss: 94.3766 - val_accuracy: 0.2375\n", "Epoch 5/50\n", - "15/15 [==============================] - 167s 11s/step - loss: 11.4793 - accuracy: 0.6729 - val_loss: 27.7946 - val_accuracy: 0.4500\n", + "15/15 [==============================] - 165s 11s/step - loss: 19.5419 - accuracy: 0.6479 - val_loss: 83.0323 - val_accuracy: 0.2500\n", "Epoch 6/50\n", - "15/15 [==============================] - 168s 11s/step - loss: 12.4257 - accuracy: 0.6729 - val_loss: 25.0661 - val_accuracy: 0.4125\n", + "15/15 [==============================] - 162s 11s/step - loss: 19.0512 - accuracy: 0.6625 - val_loss: 76.1191 - val_accuracy: 0.3000\n", "Epoch 7/50\n", - "15/15 [==============================] - 164s 11s/step - loss: 8.7416 - accuracy: 0.7250 - val_loss: 26.6285 - val_accuracy: 0.5125\n", + "15/15 [==============================] - 168s 11s/step - loss: 17.0602 - accuracy: 0.6771 - val_loss: 70.0092 - val_accuracy: 0.3750\n", "Epoch 8/50\n", - "15/15 [==============================] - 162s 11s/step - loss: 8.3796 - accuracy: 0.7250 - val_loss: 24.7431 - val_accuracy: 0.4375\n", + "15/15 [==============================] - 168s 11s/step - loss: 20.1183 - accuracy: 0.6687 - val_loss: 78.0569 - val_accuracy: 0.3000\n", "Epoch 9/50\n", - "15/15 [==============================] - 163s 11s/step - loss: 8.1311 - accuracy: 0.7417 - val_loss: 20.8759 - val_accuracy: 0.5000\n", + "15/15 [==============================] - 161s 11s/step - loss: 15.5537 - accuracy: 0.6750 - val_loss: 71.6261 - val_accuracy: 0.3625\n", "Epoch 10/50\n", - "15/15 [==============================] - 161s 11s/step - loss: 8.5853 - accuracy: 0.7604 - val_loss: 27.0080 - val_accuracy: 0.5250\n", + "15/15 [==============================] - 164s 11s/step - loss: 13.9609 - accuracy: 0.7333 - val_loss: 83.8828 - val_accuracy: 0.3125\n", "Epoch 11/50\n", - "15/15 [==============================] - 161s 11s/step - loss: 7.2803 - accuracy: 0.7750 - val_loss: 25.0368 - val_accuracy: 0.4875\n", + "15/15 [==============================] - 160s 11s/step - loss: 15.3212 - accuracy: 0.7125 - val_loss: 79.7747 - val_accuracy: 0.3250\n", "Epoch 12/50\n", - "15/15 [==============================] - 161s 11s/step - loss: 8.3474 - accuracy: 0.7708 - val_loss: 24.2293 - val_accuracy: 0.5000\n", + "15/15 [==============================] - 167s 11s/step - loss: 16.1321 - accuracy: 0.7021 - val_loss: 80.1300 - val_accuracy: 0.3125\n", "Epoch 13/50\n", - "15/15 [==============================] - 163s 11s/step - loss: 7.2649 - accuracy: 0.7937 - val_loss: 27.9647 - val_accuracy: 0.4875\n", + "15/15 [==============================] - 166s 11s/step - loss: 17.3188 - accuracy: 0.6896 - val_loss: 72.9308 - val_accuracy: 0.3375\n", "Epoch 14/50\n", - "15/15 [==============================] - 161s 11s/step - loss: 5.6787 - accuracy: 0.8000 - val_loss: 25.7642 - val_accuracy: 0.5625\n", - "Epoch 15/50\n", - "15/15 [==============================] - 161s 11s/step - loss: 9.5175 - accuracy: 0.7750 - val_loss: 25.2030 - val_accuracy: 0.5875\n", - "Epoch 16/50\n", - "15/15 [==============================] - 159s 11s/step - loss: 5.8167 - accuracy: 0.8125 - val_loss: 27.7553 - val_accuracy: 0.4500\n", - "Epoch 17/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 5.7371 - accuracy: 0.8375 - val_loss: 33.3951 - val_accuracy: 0.4875\n", - "Epoch 18/50\n", - "15/15 [==============================] - 162s 11s/step - loss: 5.5106 - accuracy: 0.8062 - val_loss: 24.9338 - val_accuracy: 0.5250\n", - "Epoch 19/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 5.6374 - accuracy: 0.8208 - val_loss: 31.7434 - val_accuracy: 0.5375\n", - "Epoch 20/50\n", - "15/15 [==============================] - 181s 12s/step - loss: 5.2847 - accuracy: 0.8500 - val_loss: 27.8406 - val_accuracy: 0.5250\n", - "Epoch 21/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 3.9255 - accuracy: 0.8604 - val_loss: 24.6560 - val_accuracy: 0.5500\n", - "Epoch 22/50\n", - "15/15 [==============================] - 162s 11s/step - loss: 4.5439 - accuracy: 0.8667 - val_loss: 28.7081 - val_accuracy: 0.4875\n", - "Epoch 23/50\n", - "15/15 [==============================] - 157s 10s/step - loss: 5.1945 - accuracy: 0.8333 - val_loss: 28.1471 - val_accuracy: 0.5250\n", - "Epoch 24/50\n", - "15/15 [==============================] - 157s 11s/step - loss: 5.4685 - accuracy: 0.8292 - val_loss: 27.2732 - val_accuracy: 0.4875\n", - "Epoch 25/50\n", - "15/15 [==============================] - 156s 10s/step - loss: 4.9645 - accuracy: 0.8458 - val_loss: 28.9958 - val_accuracy: 0.4625\n", - "Epoch 26/50\n", - "15/15 [==============================] - 158s 11s/step - loss: 4.2419 - accuracy: 0.8604 - val_loss: 31.0298 - val_accuracy: 0.5250\n", - "Epoch 27/50\n", - "15/15 [==============================] - 177s 12s/step - loss: 4.6601 - accuracy: 0.8667 - val_loss: 35.1135 - val_accuracy: 0.5000\n", - "Epoch 28/50\n", - "15/15 [==============================] - 158s 11s/step - loss: 4.3568 - accuracy: 0.8729 - val_loss: 35.3589 - val_accuracy: 0.4750\n", - "Epoch 29/50\n", - "15/15 [==============================] - 182s 12s/step - loss: 4.6585 - accuracy: 0.8479 - val_loss: 39.2265 - val_accuracy: 0.4500\n", - "Epoch 30/50\n", - "15/15 [==============================] - 165s 11s/step - loss: 4.8581 - accuracy: 0.8583 - val_loss: 30.4833 - val_accuracy: 0.5375\n", - "Epoch 31/50\n", - "15/15 [==============================] - 165s 11s/step - loss: 3.3673 - accuracy: 0.8833 - val_loss: 31.5158 - val_accuracy: 0.5125\n", - "Epoch 32/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 5.6675 - accuracy: 0.8625 - val_loss: 29.5487 - val_accuracy: 0.5375\n", - "Epoch 33/50\n", - "15/15 [==============================] - 157s 10s/step - loss: 3.5243 - accuracy: 0.8771 - val_loss: 29.5125 - val_accuracy: 0.5500\n", - "Epoch 34/50\n", - "15/15 [==============================] - 158s 11s/step - loss: 3.1388 - accuracy: 0.8833 - val_loss: 31.3419 - val_accuracy: 0.5125\n", - "Epoch 35/50\n", - "15/15 [==============================] - 164s 11s/step - loss: 4.4508 - accuracy: 0.8771 - val_loss: 28.2114 - val_accuracy: 0.5375\n", - "Epoch 36/50\n", - "15/15 [==============================] - 159s 11s/step - loss: 3.7561 - accuracy: 0.8667 - val_loss: 27.7479 - val_accuracy: 0.5250\n", - "Epoch 37/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 2.8535 - accuracy: 0.8958 - val_loss: 30.9036 - val_accuracy: 0.5125\n", - "Epoch 38/50\n", - "15/15 [==============================] - 158s 11s/step - loss: 2.7719 - accuracy: 0.8896 - val_loss: 28.7516 - val_accuracy: 0.5500\n", - "Epoch 39/50\n", - "15/15 [==============================] - 162s 11s/step - loss: 4.3758 - accuracy: 0.8562 - val_loss: 30.6686 - val_accuracy: 0.5500\n", - "Epoch 40/50\n", - "15/15 [==============================] - 182s 12s/step - loss: 3.4704 - accuracy: 0.8750 - val_loss: 26.9565 - val_accuracy: 0.5750\n", - "Epoch 41/50\n", - "15/15 [==============================] - 169s 11s/step - loss: 3.2676 - accuracy: 0.9042 - val_loss: 33.6229 - val_accuracy: 0.5000\n", - "Epoch 42/50\n", - "15/15 [==============================] - 167s 11s/step - loss: 3.6569 - accuracy: 0.8792 - val_loss: 30.1333 - val_accuracy: 0.5250\n", - "Epoch 43/50\n", - "15/15 [==============================] - 164s 11s/step - loss: 3.4009 - accuracy: 0.9062 - val_loss: 30.1136 - val_accuracy: 0.5625\n", - "Epoch 44/50\n", - "15/15 [==============================] - 157s 11s/step - loss: 3.0915 - accuracy: 0.9083 - val_loss: 33.6270 - val_accuracy: 0.5500\n", - "Epoch 45/50\n", - "15/15 [==============================] - 177s 12s/step - loss: 3.6787 - accuracy: 0.8562 - val_loss: 32.5366 - val_accuracy: 0.5625\n", - "Epoch 46/50\n", - "15/15 [==============================] - 160s 11s/step - loss: 2.6454 - accuracy: 0.9042 - val_loss: 35.2887 - val_accuracy: 0.5375\n", - "Epoch 47/50\n", - "15/15 [==============================] - 165s 11s/step - loss: 4.6315 - accuracy: 0.8771 - val_loss: 26.3066 - val_accuracy: 0.5750\n", - "Epoch 48/50\n", - "15/15 [==============================] - 179s 12s/step - loss: 3.1069 - accuracy: 0.9021 - val_loss: 27.8295 - val_accuracy: 0.6000\n", - "Epoch 49/50\n", - "15/15 [==============================] - 163s 11s/step - loss: 2.7718 - accuracy: 0.8854 - val_loss: 35.5000 - val_accuracy: 0.5125\n", - "Epoch 50/50\n", - "15/15 [==============================] - 159s 11s/step - loss: 3.1988 - accuracy: 0.8938 - val_loss: 33.8036 - val_accuracy: 0.5625\n" + "15/15 [==============================] - ETA: 0s - loss: 13.4744 - accuracy: 0.7479" ] } ], @@ -1392,34 +1488,9 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "plt.plot(history.history['accuracy'])\n", "plt.plot(history.history['val_accuracy'])\n", @@ -1439,27 +1510,9 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3/3 [==============================] - 23s 7s/step - loss: 33.8036 - accuracy: 0.5625\n" - ] - }, - { - "data": { - "text/plain": [ - "[33.8035888671875, 0.5625]" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.evaluate(validation_dataset)" ] @@ -1468,7 +1521,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you can see, we reach a validation accuracy of over 56%. This is a strong improvement over the previous model." + "As you can see, we reach a validation accuracy of over 56%. " ] }, { @@ -1707,7 +1760,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -1716,7 +1769,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 25, "metadata": { "colab": {}, "colab_type": "code", @@ -1725,17 +1778,14 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 2 0 1 0 0 2 0 5 ], angelina jolie\n", - "[ 0 3 0 0 4 0 3 0 ], brad pitt\n", - "[ 0 0 8 0 0 0 0 2 ], catherine deneuve\n", - "[ 0 0 1 6 1 0 1 1 ], johnny depp\n", - "[ 0 2 0 0 7 0 1 0 ], leonardo dicaprio\n", - "[ 2 0 1 0 0 3 0 4 ], marion cotillard\n", - "[ 0 1 0 0 0 0 9 0 ], robert de niro\n", - "[ 1 0 0 0 0 2 0 7 ], sandra bullock\n" + "ename": "NameError", + "evalue": "name 'num_valid_images' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-25-d6a92e7cceeb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mY_valid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_valid_images\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnum_valid_images\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mnum_classes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_classes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mY_valid\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mind\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mind\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mind\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'num_valid_images' is not defined" ] } ], @@ -1904,6 +1954,262 @@ " " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part III : Semantic Segmentation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section, we’ll focus on semantic segmentation: we’ll be looking once again at\n", + "images of cats and dogs, and this time we’ll learn how to tell apart the main subject\n", + "and its background.\n", + "\n", + "We’ll work with the Oxford-IIIT Pets dataset (www.robots.ox.ac.uk/~vgg/data/pets/), which contains 7'390 pictures of various breeds of cats and dogs, together with foreground-background segmentation masks for \n", + "each picture. A segmentation mask is the image-segmentation equivalent of a label: it’s an image the same size as the input image, with a single color channel where each integer value corresponds to the class of the corresponding pixel in the input image. In our case, the pixels of our segmentation\n", + "masks can take one of three integer values:\n", + "1. (foreground)\n", + "2. (background)\n", + "3. (contour)\n", + "\n", + "Let’s start by downloading and uncompressing our dataset, using the `wget` and `tar`\n", + "shell utilities:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget http:/ /www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz\n", + "!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz\n", + "!tar -xf images.tar.gz\n", + "!tar -xf annotations.tar.gz" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The input pictures are stored as JPG files in the `images/` folder \n", + "(such as images/Abyssinian_1.jpg), and the corresponding segmentation mask is stored as a PNG file with\n", + "the same name in the `annotations/trimaps/` folder (such as annotations/trimaps/Abyssinian_1.png).\n", + "Let’s prepare the list of input file paths, as well as the list of the corresponding\n", + "mask file paths:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "input_dir = \"images/\"\n", + "target_dir = \"annotations/trimaps/\"\n", + "input_img_paths = sorted(\n", + " [os.path.join(input_dir, fname)\n", + " for fname in os.listdir(input_dir)\n", + " if fname.endswith(\".jpg\")])\n", + " \n", + "target_paths = sorted([os.path.join(target_dir, fname)\n", + " for fname in os.listdir(target_dir)\n", + " if fname.endswith(\".png\") and not fname.startswith(\".\")])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, what does one of these inputs and its mask look like? Let’s take a quick look.\n", + "Here’s a sample image:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from tensorflow.keras.utils import load_img, img_to_array\n", + "plt.axis(\"off\")\n", + "# Display input image number 9\n", + "plt.imshow(load_img(input_img_paths[9]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And here is its corresponding target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def display_target(target_array):\n", + " # The original labels are 1, 2, and 3. We subtract 1 so that the\n", + " # labels range from 0 to 2, and then we multiply by 127 so that\n", + " # the labels become 0 (black), 127 (gray), 254 (near-white).\n", + " normalized_array = (target_array.astype(\"uint8\") - 1) * 127\n", + " plt.axis(\"off\")\n", + " plt.imshow(normalized_array[:, :, 0])\n", + "\n", + "# We use color_mode=\"grayscale\" so that the image we load is treated as\n", + "# having a single color channel. \n", + "img = img_to_array(load_img(target_paths[9], color_mode=\"grayscale\"))\n", + "display_target(img)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let’s load our inputs and targets into two NumPy arrays, and let’s split the arrays\n", + "into a training and a validation set. Since the dataset is very small, we can just load\n", + "everything into memory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import random\n", + "\n", + "# We resize everything to 200x200\n", + "img_size = (200, 200)\n", + "# Total number of samples in the data\n", + "num_imgs = len(input_img_paths)\n", + "\n", + "# Shuffle the file paths (they were originally sorted by breed). We \n", + "# use the same seed (1337) in both statements to ensure that the input \n", + "# paths and target paths stay in the same order\n", + "\n", + "random.Random(1337).shuffle(input_img_paths)\n", + "random.Random(1337).shuffle(target_paths)\n", + "\n", + "def path_to_input_image(path):\n", + " return img_to_array(load_img(path, target_size=img_size))\n", + "\n", + "def path_to_target(path):\n", + " img = img_to_array(\n", + " load_img(path, target_size=img_size, color_mode=\"grayscale\"))\n", + " # Subtract 1 so that our labels become 0, 1, and 2\n", + " img = img.astype(\"uint8\") - 1\n", + " return img\n", + "\n", + "# Load all images in the input_imgs float32 array and their masks in the\n", + "# targets uint8 array (same order). The inputs have three channels (RBG values)\n", + "# and the targets have a single channel (which contains integer labels)\n", + "input_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype=\"float32\")\n", + "targets = np.zeros((num_imgs,) + img_size + (1,), dtype=\"uint8\")\n", + "for i in range(num_imgs):\n", + " input_imgs[i] = path_to_input_image(input_img_paths[i])\n", + " targets[i] = path_to_target(target_paths[i])\n", + " \n", + "# Reserve 1000 samples for validation\n", + "num_val_samples = 1000\n", + "\n", + "# Split the data into a training and a\n", + "# validation set\n", + "train_input_imgs = input_imgs[:-num_val_samples]\n", + "train_targets = targets[:-num_val_samples]\n", + "val_input_imgs = input_imgs[-num_val_samples:]\n", + "val_targets = targets[-num_val_samples:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now it’s time to define our model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_model(img_size, num_classes):\n", + " inputs = keras.Input(shape=img_size + (3,))\n", + " # Don’t forget to rescale input images to the [0-1] range\n", + " x = layers.Rescaling(1./255)(inputs)\n", + " ### [First half of the network: downsampling inputs] ###\n", + "\n", + " # Entry block\n", + " x = layers.Conv2D(32, 3, strides=2, padding=\"same\")(inputs)\n", + " x = layers.BatchNormalization()(x)\n", + " x = layers.Activation(\"relu\")(x)\n", + "\n", + " previous_block_activation = x # Set aside residual\n", + "\n", + " # Blocks 1, 2, 3 are identical apart from the feature depth.\n", + " for filters in [64, 128, 256]:\n", + " x = layers.Activation(\"relu\")(x)\n", + " x = layers.SeparableConv2D(filters, 3, padding=\"same\")(x)\n", + " x = layers.BatchNormalization()(x)\n", + "\n", + " x = layers.Activation(\"relu\")(x)\n", + " x = layers.SeparableConv2D(filters, 3, padding=\"same\")(x)\n", + " x = layers.BatchNormalization()(x)\n", + "\n", + " x = layers.MaxPooling2D(3, strides=2, padding=\"same\")(x)\n", + "\n", + " # Project residual\n", + " residual = layers.Conv2D(filters, 1, strides=2, padding=\"same\")(\n", + " previous_block_activation\n", + " )\n", + " x = layers.add([x, residual]) # Add back residual\n", + " previous_block_activation = x # Set aside next residual\n", + "\n", + " ### [Second half of the network: upsampling inputs] ###\n", + "\n", + " for filters in [256, 128, 64, 32]:\n", + " x = layers.Activation(\"relu\")(x)\n", + " x = layers.Conv2DTranspose(filters, 3, padding=\"same\")(x)\n", + " x = layers.BatchNormalization()(x)\n", + "\n", + " x = layers.Activation(\"relu\")(x)\n", + " x = layers.Conv2DTranspose(filters, 3, padding=\"same\")(x)\n", + " x = layers.BatchNormalization()(x)\n", + "\n", + " x = layers.UpSampling2D(2)(x)\n", + "\n", + " # Project residual\n", + " residual = layers.UpSampling2D(2)(previous_block_activation)\n", + " residual = layers.Conv2D(filters, 1, padding=\"same\")(residual)\n", + " x = layers.add([x, residual]) # Add back residual\n", + " previous_block_activation = x # Set aside next residual\n", + "\n", + " # Add a per-pixel classification layer\n", + " outputs = layers.Conv2D(num_classes, 3, activation=\"softmax\", padding=\"same\")(x)\n", + "\n", + " # Define the model\n", + " model = keras.Model(inputs, outputs)\n", + " return model\n", + "\n", + "\n", + "# Free up RAM in case the model definition cells were run multiple times\n", + "keras.backend.clear_session()\n", + "\n", + "# Build model\n", + "model = get_model(img_size, num_classes)\n", + "model.summary()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1918,6 +2224,27 @@ "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part IV : Object Detection with Yolo" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1925,7 +2252,7 @@ "id": "R0dfpdDOGhM2" }, "source": [ - "# Part IV : Object Detection with Mask R-CNN\n", + "# Part V : Instance Segmentation with Mask R-CNN\n", "\n", "### Please run this section on Colab !" ]