{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# MNIST classification using multinomial logistic + L1\n\nHere we fit a multinomial logistic regression with L1 penalty on a subset of\nthe MNIST digits classification task. We use the SAGA algorithm for this\npurpose: this a solver that is fast when the number of samples is significantly\nlarger than the number of features and is able to finely optimize non-smooth\nobjective functions which is the case with the l1-penalty. Test accuracy\nreaches > 0.8, while weight vectors remains *sparse* and therefore more easily\n*interpretable*.\n\nNote that this accuracy of this l1-penalized linear model is significantly\nbelow what can be reached by an l2-penalized linear model or a non-linear\nmulti-layer perceptron model on this dataset.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport time\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.utils import check_random_state\n\n# Turn down for faster convergence\nt0 = time.time()\ntrain_samples = 5000\n\n# Load data from https://www.openml.org/d/554\nX, y = fetch_openml(\"mnist_784\", version=1, return_X_y=True, as_frame=False)\n\nrandom_state = check_random_state(0)\npermutation = random_state.permutation(X.shape[0])\nX = X[permutation]\ny = y[permutation]\nX = X.reshape((X.shape[0], -1))\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, train_size=train_samples, test_size=10000\n)\n\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Turn up tolerance for faster convergence\nclf = LogisticRegression(C=50.0 / train_samples, l1_ratio=1, solver=\"saga\", tol=0.1)\nclf.fit(X_train, y_train)\nsparsity = np.mean(clf.coef_ == 0) * 100\nscore = clf.score(X_test, y_test)\n# print('Best C % .4f' % clf.C_)\nprint(\"Sparsity with L1 penalty: %.2f%%\" % sparsity)\nprint(\"Test score with L1 penalty: %.4f\" % score)\n\ncoef = clf.coef_.copy()\nplt.figure(figsize=(10, 5))\nscale = np.abs(coef).max()\nfor i in range(10):\n    l1_plot = plt.subplot(2, 5, i + 1)\n    l1_plot.imshow(\n        coef[i].reshape(28, 28),\n        interpolation=\"nearest\",\n        cmap=plt.cm.RdBu,\n        vmin=-scale,\n        vmax=scale,\n    )\n    l1_plot.set_xticks(())\n    l1_plot.set_yticks(())\n    l1_plot.set_xlabel(f\"Class {i}\")\nplt.suptitle(\"Classification vector for...\")\n\nrun_time = time.time() - t0\nprint(\"Example run in %.3f s\" % run_time)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.14"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}