diff --git a/notebooks/Linear Regression/LR_7_1.ipynb b/notebooks/Linear Regression/LR_7_1.ipynb index 035b0ad2962c43391f815021e5512a7ebbdc7b22..7e8c8bbeb39ccd355efd03b13479325456074aba 100644 --- a/notebooks/Linear Regression/LR_7_1.ipynb +++ b/notebooks/Linear Regression/LR_7_1.ipynb @@ -10,18 +10,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 15, "id": "6645d58f-73f2-4bb4-ae9f-17a0b334a652", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n" - ] - } - ], + "outputs": [], "source": [ "import arviz as az\n", "import matplotlib.pyplot as plt\n", @@ -35,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "id": "a76946c6-bb24-4032-9b40-35a2303dec27", "metadata": {}, "outputs": [ @@ -63,8 +55,8 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "97ef4d21-6ffb-4b9e-a973-94c6815600b3", + "execution_count": 17, + "id": "305f1c77-27b3-4c5f-ac93-24d87111d0af", "metadata": {}, "outputs": [ { @@ -74,7 +66,7 @@ "Auto-assigning NUTS sampler...\n", "Initializing NUTS using jitter+adapt_diag...\n", "Multiprocess sampling (4 chains in 4 jobs)\n", - "NUTS: [sigma, Intercept, TV, Zeitung, Radio]\n" + "NUTS: [sigma, Intercept, TV]\n" ] }, { @@ -106,86 +98,90 @@ "name": "stderr", "output_type": "stream", "text": [ - "Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 5 seconds.\n" + "Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 4 seconds.\n" ] } ], "source": [ "import bambi as bmb\n", - "model_trz = bmb.Model(\"Verkauf ~ TV + Zeitung + Radio\", werbung)\n", - "idata_trz = model_trz.fit(random_seed=123)" + "model_t = bmb.Model(\"Verkauf ~ TV\", werbung)\n", + "idata_t = model_t.fit(random_seed=123)" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "27f1854b-e66e-432e-aa61-66b928e73f16", + "execution_count": 18, + "id": "b1e588a7-0840-4b8b-a41d-523901096600", "metadata": {}, "outputs": [], "source": [ "# Extract posterior means of coefficients\n", - "beta_0 = idata_trz.posterior[\"Intercept\"].mean().item()\n", - "beta_tv = idata_trz.posterior[\"TV\"].mean().item()\n", - "beta_zeitung = idata_trz.posterior[\"Zeitung\"].mean().item()\n", - "beta_radio = idata_trz.posterior[\"Radio\"].mean().item()\n", + "beta_0 = idata_t.posterior[\"Intercept\"].mean().item()\n", + "beta_tv = idata_t.posterior[\"TV\"].mean().item()\n", + "\n", "\n", "# Compute predictions (ŷ)\n", - "y_pred = beta_0 + beta_tv * werbung[\"TV\"] + beta_zeitung * werbung[\"Zeitung\"] + beta_radio * werbung[\"Radio\"]" + "y_pred = beta_0 + beta_tv * werbung[\"TV\"] \n", + "\n", + "# Actual y values\n", + "y_obs = werbung[\"Verkauf\"].values" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "45172e19-c1df-48ed-a524-cb8408a603e4", + "execution_count": 19, + "id": "8313a376-068f-4a41-84e8-e0cfc2d6fcb2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "R²: 0.8972105391174441\n" - ] - } - ], + "outputs": [], "source": [ - "# Actual y values\n", - "y_obs = werbung[\"Verkauf\"].values\n", + "# Compute residual sum of squares (RSS)\n", "\n", - "# Compute total sum of squares (TSS) and residual sum of squares (RSS)\n", - "ss_total = np.sum((y_obs - np.mean(y_obs))**2)\n", - "ss_residual = np.sum((y_obs - y_pred) ** 2)\n", - "\n", - "# Compute R²\n", - "r2 = 1 - (ss_residual / ss_total)\n", - "print(\"R²:\", r2)" + "ss_residual = np.sum((y_obs - y_pred) ** 2)" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "e078416c-69fb-4504-8798-983486f039e8", + "execution_count": 20, + "id": "63d5da55-b5e9-4177-bc6e-29d71a969382", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "RSE: 1.6855111856055986\n" + "RSE: 3.258656512155862\n" ] } ], "source": [ "n = len(y_obs) # Number of observations\n", - "p = 3 # Number of predictors (TV, Zeitung, Radio)\n", + "p = 1 # Number of predictors (TV, Zeitung, Radio)\n", "\n", "rse = np.sqrt(ss_residual / (n - p - 1))\n", "print(\"RSE:\", rse)" ] }, + { + "cell_type": "markdown", + "id": "0bf3b298-2779-4cab-a454-ca5a266db0b7", + "metadata": {}, + "source": [ + "Der RSE Wert für das einfache lineare Regressionsmodell mit der Prädiktorvariablen beträgt also 3.26. Das heisst, dass die eigentlichen Verkäufe in den 200 Märkten durchschnittlich $ 3260 $ Einheiten von der wahren Regressionsgeraden abweichen. \n", + "Wäre also unser Modell korrekt und wären die an sich unbekannten Koeffizienten $\\beta_0$ und $\\beta_1$ exakt bekannt, würde jede Vorhersage des Verkaufs auf der Basis des TV-Werbeausgaben im Durchschnitt 3260 Einheiten abweichen. \n", + "\n", + "Ob nun 3260 Einheiten Abweichung von der Vorhersage viel ist oder nicht, hängt vom Problemkontext ab. In diesem Beispiel ist der Mittelwert von `Verkauf` etwa $14000$ Einheiten und damit macht der prozentuale Fehler\n", + "\\begin{equation*}\n", + "\\dfrac{3.260}{14.000}\n", + "\\approx 0.23\n", + "=23\\%\n", + "\\end{equation*} \n", + "aus." + ] + }, { "cell_type": "code", - "execution_count": 8, - "id": "fd86c259-21a6-4ebc-a2c9-6d7165f6a13a", + "execution_count": 21, + "id": "97ef4d21-6ffb-4b9e-a973-94c6815600b3", "metadata": {}, "outputs": [ { @@ -233,64 +229,52 @@ ], "source": [ "import bambi as bmb\n", - "model_tr = bmb.Model(\"Verkauf ~ TV + Radio\", werbung)\n", - "idata_tr = model_trz.fit(random_seed=123)" + "model_trz = bmb.Model(\"Verkauf ~ TV + Zeitung + Radio\", werbung)\n", + "idata_trz = model_trz.fit(random_seed=123)" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "125cbdce-ae89-456c-945b-b603d8cf3d7d", + "execution_count": 22, + "id": "27f1854b-e66e-432e-aa61-66b928e73f16", "metadata": {}, "outputs": [], "source": [ "# Extract posterior means of coefficients\n", "beta_0 = idata_trz.posterior[\"Intercept\"].mean().item()\n", "beta_tv = idata_trz.posterior[\"TV\"].mean().item()\n", + "beta_zeitung = idata_trz.posterior[\"Zeitung\"].mean().item()\n", "beta_radio = idata_trz.posterior[\"Radio\"].mean().item()\n", "\n", "# Compute predictions (ŷ)\n", - "y_pred = beta_0 + beta_tv * werbung[\"TV\"] + beta_radio * werbung[\"Radio\"]" + "y_pred = beta_0 + beta_tv * werbung[\"TV\"] + beta_zeitung * werbung[\"Zeitung\"] + beta_radio * werbung[\"Radio\"]" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "4990d7e2-8761-469b-aca8-7dc283fd8c99", + "execution_count": 23, + "id": "45172e19-c1df-48ed-a524-cb8408a603e4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "R²: 0.897153312886268\n" - ] - } - ], + "outputs": [], "source": [ "# Actual y values\n", "y_obs = werbung[\"Verkauf\"].values\n", "\n", "# Compute total sum of squares (TSS) and residual sum of squares (RSS)\n", - "ss_total = np.sum((y_obs - np.mean(y_obs))**2)\n", - "ss_residual = np.sum((y_obs - y_pred) ** 2)\n", - "\n", - "# Compute R²\n", - "r2 = 1 - (ss_residual / ss_total)\n", - "print(\"R²:\", r2)" + "ss_residual = np.sum((y_obs - y_pred) ** 2)" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "d28de8da-8beb-49bb-93af-4e03eb2340b4", + "execution_count": 24, + "id": "e078416c-69fb-4504-8798-983486f039e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "RSE: 1.6859803097292891\n" + "RSE: 1.6855111856055986\n" ] } ], @@ -301,14 +285,6 @@ "rse = np.sqrt(ss_residual / (n - p - 1))\n", "print(\"RSE:\", rse)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4092c15c-bc63-4a8b-8f0a-6e4aee6a2240", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {