{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Varying regularization in Multi-layer Perceptron\n\nA comparison of different values for regularization parameter 'alpha' on\nsynthetic datasets. The plot shows that different alphas yield different\ndecision functions.\n\nAlpha is a parameter for regularization term, aka penalty term, that combats\noverfitting by constraining the size of the weights. Increasing alpha may fix\nhigh variance (a sign of overfitting) by encouraging smaller weights, resulting\nin a decision boundary plot that appears with lesser curvatures.\nSimilarly, decreasing alpha may fix high bias (a sign of underfitting) by\nencouraging larger weights, potentially resulting in a more complicated\ndecision boundary.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.colors import ListedColormap\n\nfrom sklearn.datasets import make_circles, make_classification, make_moons\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\nh = 0.02  # step size in the mesh\n\nalphas = np.logspace(-1, 1, 5)\n\nclassifiers = []\nnames = []\nfor alpha in alphas:\n    classifiers.append(\n        make_pipeline(\n            StandardScaler(),\n            MLPClassifier(\n                solver=\"lbfgs\",\n                alpha=alpha,\n                random_state=1,\n                max_iter=2000,\n                early_stopping=True,\n                hidden_layer_sizes=[10, 10],\n            ),\n        )\n    )\n    names.append(f\"alpha {alpha:.2f}\")\n\nX, y = make_classification(\n    n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1\n)\nrng = np.random.RandomState(2)\nX += 2 * rng.uniform(size=X.shape)\nlinearly_separable = (X, y)\n\ndatasets = [\n    make_moons(noise=0.3, random_state=0),\n    make_circles(noise=0.2, factor=0.5, random_state=1),\n    linearly_separable,\n]\n\nfigure = plt.figure(figsize=(17, 9))\ni = 1\n# iterate over datasets\nfor X, y in datasets:\n    # split into training and test part\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=0.4, random_state=42\n    )\n\n    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\n    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\n    # just plot the dataset first\n    cm = plt.cm.RdBu\n    cm_bright = ListedColormap([\"#FF0000\", \"#0000FF\"])\n    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n    # Plot the training points\n    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n    # and testing points\n    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n    i += 1\n\n    # iterate over classifiers\n    for name, clf in zip(names, classifiers):\n        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)\n        clf.fit(X_train, y_train)\n        score = clf.score(X_test, y_test)\n\n        # Plot the decision boundary. For that, we will assign a color to each\n        # point in the mesh [x_min, x_max] x [y_min, y_max].\n        if hasattr(clf, \"decision_function\"):\n            Z = clf.decision_function(np.column_stack([xx.ravel(), yy.ravel()]))\n        else:\n            Z = clf.predict_proba(np.column_stack([xx.ravel(), yy.ravel()]))[:, 1]\n\n        # Put the result into a color plot\n        Z = Z.reshape(xx.shape)\n        ax.contourf(xx, yy, Z, cmap=cm, alpha=0.8)\n\n        # Plot also the training points\n        ax.scatter(\n            X_train[:, 0],\n            X_train[:, 1],\n            c=y_train,\n            cmap=cm_bright,\n            edgecolors=\"black\",\n            s=25,\n        )\n        # and testing points\n        ax.scatter(\n            X_test[:, 0],\n            X_test[:, 1],\n            c=y_test,\n            cmap=cm_bright,\n            alpha=0.6,\n            edgecolors=\"black\",\n            s=25,\n        )\n\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n        ax.set_title(name)\n        ax.text(\n            xx.max() - 0.3,\n            yy.min() + 0.3,\n            f\"{score:.3f}\".lstrip(\"0\"),\n            size=15,\n            horizontalalignment=\"right\",\n        )\n        i += 1\n\nfigure.subplots_adjust(left=0.02, right=0.98)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.14"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}