{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Kernel Density Estimate of Species Distributions\nThis shows an example of a neighbors-based query (in particular a kernel\ndensity estimate) on geospatial data, using a Ball Tree built upon the\nHaversine distance metric -- i.e. distances over points in latitude/longitude.\nThe dataset is provided by Phillips et. al. (2006) [1]_.\nIf available, the example uses\n[basemap](https://matplotlib.org/basemap/)\nto plot the coast lines and national boundaries of South America.\n\nThis example does not perform any learning over the data (see\n`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` for an\nexample of classification based on the attributes in this dataset).  It simply shows the\nkernel density estimate of observed data points in geospatial coordinates.\n\nThe two species are:\n\n- [\"Bradypus variegatus\"](https://www.iucnredlist.org/species/3038/47437046) ,\n  the Brown-throated Sloth.\n\n- [\"Microryzomys minutus\"](http://www.iucnredlist.org/details/13408/0) ,\n  also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n  Colombia, Ecuador, Peru, and Venezuela.\n\n## References\n\n.. [1] [\"Maximum entropy modeling of species geographic distributions\"](http://rob.schapire.net/papers/ecolmod.pdf)\n       S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n       190:231-259, 2006.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_species_distributions\nfrom sklearn.neighbors import KernelDensity\n\n# if basemap is available, we'll use it.\n# otherwise, we'll improvise later...\ntry:\n    from mpl_toolkits.basemap import Basemap\n\n    basemap = True\nexcept ImportError:\n    basemap = False\n\n\ndef construct_grids(batch):\n    \"\"\"Construct the map grid from the batch object\n\n    Parameters\n    ----------\n    batch : Batch object\n        The object returned by :func:`fetch_species_distributions`\n\n    Returns\n    -------\n    (xgrid, ygrid) : 1-D arrays\n        The grid corresponding to the values in batch.coverages\n    \"\"\"\n    # x,y coordinates for corner cells\n    xmin = batch.x_left_lower_corner + batch.grid_size\n    xmax = xmin + (batch.Nx * batch.grid_size)\n    ymin = batch.y_left_lower_corner + batch.grid_size\n    ymax = ymin + (batch.Ny * batch.grid_size)\n\n    # x coordinates of the grid cells\n    xgrid = np.arange(xmin, xmax, batch.grid_size)\n    # y coordinates of the grid cells\n    ygrid = np.arange(ymin, ymax, batch.grid_size)\n\n    return (xgrid, ygrid)\n\n\n# Get matrices/arrays of species IDs and locations\ndata = fetch_species_distributions()\nspecies_names = [\"Bradypus Variegatus\", \"Microryzomys Minutus\"]\n\nXtrain = np.vstack([data[\"train\"][\"dd lat\"], data[\"train\"][\"dd long\"]]).T\nytrain = np.array(\n    [d.decode(\"ascii\").startswith(\"micro\") for d in data[\"train\"][\"species\"]],\n    dtype=\"int\",\n)\nXtrain *= np.pi / 180.0  # Convert lat/long to radians\n\n# Set up the data grid for the contour plot\nxgrid, ygrid = construct_grids(data)\nX, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])\nland_reference = data.coverages[6][::5, ::5]\nland_mask = (land_reference > -9999).ravel()\n\nxy = np.vstack([Y.ravel(), X.ravel()]).T\nxy = xy[land_mask]\nxy *= np.pi / 180.0\n\n# Plot map of South America with distributions of each species\nfig = plt.figure()\nfig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)\n\nfor i in range(2):\n    plt.subplot(1, 2, i + 1)\n\n    # construct a kernel density estimate of the distribution\n    print(\" - computing KDE in spherical coordinates\")\n    kde = KernelDensity(\n        bandwidth=0.04, metric=\"haversine\", kernel=\"gaussian\", algorithm=\"ball_tree\"\n    )\n    kde.fit(Xtrain[ytrain == i])\n\n    # evaluate only on the land: -9999 indicates ocean\n    Z = np.full(land_mask.shape[0], -9999, dtype=\"int\")\n    Z[land_mask] = np.exp(kde.score_samples(xy))\n    Z = Z.reshape(X.shape)\n\n    # plot contours of the density\n    levels = np.linspace(0, Z.max(), 25)\n    plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)\n\n    if basemap:\n        print(\" - plot coastlines using basemap\")\n        m = Basemap(\n            projection=\"cyl\",\n            llcrnrlat=Y.min(),\n            urcrnrlat=Y.max(),\n            llcrnrlon=X.min(),\n            urcrnrlon=X.max(),\n            resolution=\"c\",\n        )\n        m.drawcoastlines()\n        m.drawcountries()\n    else:\n        print(\" - plot coastlines from coverage\")\n        plt.contour(\n            X, Y, land_reference, levels=[-9998], colors=\"k\", linestyles=\"solid\"\n        )\n        plt.xticks([])\n        plt.yticks([])\n\n    plt.title(species_names[i])\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.14"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}