diff --git a/.circleci/config.yml b/.circleci/config.yml index 4aaead4..44a4dab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,3 +27,27 @@ jobs: sudo pip install . python -m unittest discover -v -s test/integration + - add_ssh_keys: + fingerprints: + - "79:16:39:74:e9:b3:39:52:87:2c:90:aa:ee:3c:09:13" + + - run: + name: Deploy documentation + command: | + if [ "${CIRCLE_BRANCH}" == "${PRODUCTION_BRANCH}" ]; then + cd docs + make html + cd _build/html + git init + git config user.name "Devseed-CI" + git config user.email "dev@developmentseed.org" + touch .nojekyll # Add this so GitHub doesn't try and build site + git add . + git commit -m "CI deploy [skip ci]" + git remote add origin git@github.com:developmentseed/label-maker.git + git fetch + git push origin --force --quiet HEAD:gh-pages + rm -rf .git + else + echo "Not the branch you're looking for, skipping documentation deploy" + fi diff --git a/.gitignore b/.gitignore index 8a4ec3d..9f48021 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ config.json stdout* /integration* .idea/ +docs/_build/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..298ea9e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..50f4f3f --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../label_maker')) + + +# -- Project information ----------------------------------------------------- + +project = 'label-maker' +copyright = '2018, Development Seed' +author = 'Development Seed' + +# The short X.Y version +version = '0.3.2' +# The full version, including alpha/beta/rc tags +release = '0.3.2' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon', + 'sphinx.ext.githubpages', + 'sphinxcontrib.fulltoc' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'label-makerdoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'label-maker.tex', 'label-maker Documentation', + 'Development Seed', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'label-maker', 'label-maker Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'label-maker', 'label-maker Documentation', + author, 'label-maker', 'Data preparation for machine learning on overhead imagery.', + 'Miscellaneous'), +] + + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + + +# -- Extension configuration ------------------------------------------------- diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..6f478de --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,32 @@ +Contributing +============ + +A list of issues and ongoing work is available on the Label Maker `issues page `_. If you want to contribute code, the best way is to coordinate with the core developers via an issue or pull request conversation. + +Development installation +^^^^^^^^^^^^^^^^^^^^^^^^ +Fork Label Maker into your Github account. Then, clone the repo and install it locally with pip as follows: + +.. code-block:: bash + + $ git clone git@github.com:your_user_name/label-maker.git + $ cd label-maker + $ pip install -e . + +Testing +^^^^^^^ +Label Maker runs tests using ``unittest``. You can find unit tests at ``tests/unit`` and integration tests at ``tests/integration``. + +Run a single test with: + +.. code-block:: bash + + python -m unittest test/unit/test_validate.py + +or an entire folder using: + +.. code-block:: bash + + python -m unittest discover -v -s test/unit + +More details on using ``unittest`` are `here `_. \ No newline at end of file diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 0000000..a143f48 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,6 @@ +Examples +######## + +Examples live `here `_ + +ToDo: move these to their own page within this docs website. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..5c47446 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,160 @@ +.. label-maker documentation master file, created by + sphinx-quickstart on Sun Sep 16 11:05:39 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Label Maker Documentation +######################### + +Label Maker generates training data for ML algorithms focused on overhead imagery (e.g., from satellites or drones). It downloads OpenStreetMap QA Tile information and overhead imagery tiles and saves them as an Numpy `.npz `_ file for easy use in ML pipelines. For more details, see the `inaugural blog post `_. + +Requirements +============ +* `Python 3.6 `_ +* `tippecanoe `_ + +Standard pip install +==================== + +.. code-block:: bash + + pip install label-maker + +.. note:: + + Label Maker requires ``tippecanoe`` to be available from your command-line. Confirm this before proceeding. + +Configuration +============= +Before you can use Label Maker, you must specify inputs to the data-creation process within ``config.json`` file. Below is a simple example. To see the complete list of parameters and options for imagery access, check out the `parameters page `_. + +.. code-block:: json + + { + "country": "togo", + "bounding_box": [1.09725, 6.05520, 1.34582, 6.30915], + "zoom": 12, + "classes": [ + { "name": "Roads", "filter": ["has", "highway"] }, + { "name": "Buildings", "filter": ["has", "building"] } + ], + "imagery": "http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN", + "background_ratio": 1, + "ml_type": "classification" + } + +Before using this configuration, make sure to replace ``ACCESS_TOKEN`` with your `Mapbox Access Token `_ + +Command line interface (CLI) +============================ + +Label Maker is most easily used as a command line tool. There are five commands documented below. You should run them in order as each operation builds on the previous one and commands accept two flags: + + ``-d`` or ``--dest``: string + Directory for storing output files. Defaults to ``'./data'`` + ``-c`` or ``--config``: string + Location of ``config.json`` file. Defaults to ``'./config.json'`` + +CLI Step 1: download +^^^^^^^^^^^^^^^^^^^^ +Download and unzip OSM QA tiles containing feature information. + +.. code-block:: bash + + $ label-maker download + Saving QA tiles to data/ghana.mbtiles + 100% 18.6 MiB 1.8 MiB/s 0:00:00 ETA + +CLI Step 2: labels +^^^^^^^^^^^^^^^^^^ +Retiles the OSM data to the desired zoom level, creates label data (``labels.npz``), calculates class statistics, creates visual label files (either GeoJSON or PNG files depending upon ``ml_type``). Requires the mbtiles file from the ``label-maker download`` step. + +Accepts one additional flag: + + ``-s`` or ``--sparse``: boolean + Specifies if features in the class of interest are sparse. If ``True``, only save labels for up to ``n`` background tiles, where ``n`` is equal to ``background_ratio`` times the number of tiles with a class label. Defaults to ``False``. + +.. code-block:: bash + + $ label-maker labels + Determining labels for each tile + --- + Residential: 638 tiles + Total tiles: 1189 + Write out labels to data/labels.npz + +CLI Step 3: preview (optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Downloads example overhead images for each class. Requires the ``labels.npz`` file from the ``label-maker labels`` step. + +Accepts one additional flag: + + ``-n`` or ``--number``: int + Specifies the number of examples images to create per class. Defaults to ``5``. + +.. code-block:: bash + + $ label-maker preview -n 10 + Writing example images to data/examples + Downloading 10 tiles for class Residential + +CLI Step 4: images +^^^^^^^^^^^^^^^^^^ + +Downloads all imagery tiles needed to create the training data. Requires the ``labels.npz`` file from the ``label-maker labels`` step. + +.. code-block:: bash + + $ label-maker images + Downloading 1189 tiles to data/tiles + +CLI Step 5: package +^^^^^^^^^^^^^^^^^^^ +Bundles the images and OSM labels to create a final ``data.npz`` file. Requires the ``labels.npz`` file from the ``label-maker labels`` step and downloaded image tiles from the ``label-maker images`` step. + +.. code-block:: bash + + $ label-maker package + Saving packaged file to data/data.npz + +Using the packaged data +======================= +Once you have a create ``data.npz`` file using the above commands, you can use `numpy.load `_ to load it. For example, you can supply the created data to a `Keras `_ ``Model`` as follows: + +.. code-block:: bash + + # Load the data, shuffled and split between train and test sets + npz = np.load('data.npz') + x_train = npz['x_train'] + y_train = npz['y_train'] + x_test = npz['x_test'] + y_test = npz['y_test'] + + # Define your model here, example usage in Keras + model = Sequential() + # ... + model.compile(...) + + # Train + model.fit(x_train, y_train, batch_size=16, epochs=50) + model.evaluate(x_test, y_test, batch_size=16) + +For more detailed walkthroughs, see the `examples page `_. + +Acknowledgements +================ + +This library builds on the concepts of `skynet-data `_. It wouldn't be possible without the excellent data from OpenStreetMap and Mapbox under the following licenses: + +* OSM QA tile data `copyright OpenStreetMap contributors `_ and licensed under `ODbL `_. +* Mapbox Satellite data can be `traced for noncommercial purposes `_. + + +.. toctree:: + :hidden: + :maxdepth: 2 + + parameters + examples + contributing \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..27f573b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/parameters.rst b/docs/parameters.rst new file mode 100644 index 0000000..89eb1ed --- /dev/null +++ b/docs/parameters.rst @@ -0,0 +1,50 @@ +Parameters +---------- +Here is the full list of configuration parameters you can specify in a ``config.json`` file. + +**country**: string + The `OSM QA Tile `_ extract to download. The string should be a country matching a one of the options in ``label_maker/countries.txt`` + +**bounding_box**: list of floats + The bounding box to create images from. This should be given in the form: ``[xmin, ymin, xmax, ymax]`` as longitude and latitude values between ``[-180, 180]`` and ``[-90, 90]``, respectively. Values should use the WGS84 datum, with longitude and latitude units in decimal degrees. + +**zoom**: int + The `zoom level `_ used to create images. This functions as a rough proxy for resolution. Value should be given as an int on the interval [0, 19]. + +**classes**: list of dicts + The training classes. Each class is defined as dict object with two required keys: + + **name**: string + The class name. + **filter**: list of strings + A `Mapbox GL Filter `_ to define any vector features matching this class. Filters are applied with the standalone `featureFilter `_ from Mapbox GL JS. + +**buffer**: int + Optional paramter to buffer labels in ``'object-detection'`` and ``'segmentation'`` tasks by an arbitrary number of pixels. Accepts both positive and negative integers. It uses `Shapely object.buffer `_ to calculate the final geometry. You can verify that your buffer options create the desired labels by inspecting the files created in ``data/labels/`` after running the ``label-maker labels`` command. + +**imagery**: string + Label Maker expects to receive imagery tiles that are 256 x 256 pixels. You can specific the source of the imagery with one of: + + A template string for a tiled imagery service. Note that you will generally need an API key to obtain images and there may be associated costs. The above example requires a `Mapbox access token `_. Also see `OpenAerialMap `_ for open imagery. + + A GeoTIFF file location. Works with local files: ``'http://oin-hotosm.s3.amazonaws.com/593ede5ee407d70011386139/0/3041615b-2bdb-40c5-b834-36f580baca29.tif'`` + + Remote files like a `WMS endpoint `_ ``GetMap`` request. Fill out all necessary parameters except ``bbox`` which should be set as ``{bbox}``. Ex: ``'https://basemap.nationalmap.gov/arcgis/services/USGSImageryOnly/MapServer/WMSServer?SERVICE=WMS&REQUEST=GetMap&VERSION=1.1.1&LAYERS=0&STYLES=&FORMAT=image%2Fjpeg&TRANSPARENT=false&HEIGHT=256&WIDTH=256&SRS=EPSG%3A3857&BBOX={bbox}'`` + +**background_ratio**: float + Specify how many background (or "negative") training examples to create when there is only one class specified with the ``classes`` parameter. Label Maker will generate ``background_ratio`` times the number of images matching the one class. + +**ml_type**: string + One of ``'classification'``, ``'object-detection'``, or ``'segmentation'``. This defines the output format for the final label numpy arrays (``y_train`` and ``y_test``). + + ``'classification'`` + Output is an array of the same length as `classes`. Each array value will be either `1` or `0` based on whether it matches the class at the same index. + + ``'object-detection'`` + Output is an array of bounding boxes of the form ``[xmin, ymin, width, height, class_index]``. In this case, the values are pixel values measured from the upper left-hand corner (not latitude and longitude values). Each feature is tested against each class, so if a feature matches two or more classes, it will have the corresponding number of bounding boxes created. + + ``'segmentation'`` + Output is an array of shape ``(256, 256)`` with values matching the class index label at that position. The classes are applied sequentially according to ``config.json`` so latter classes will be written over earlier class labels if there is overlap. + +**imagery_offset**: list of ints + An optional list of integers representing the number of pixels to offset imagery. For example ``[15, -5]`` will move the images 15 pixels right and 5 pixels up relative to the requested tile bounds. diff --git a/requirements-dev.txt b/requirements-dev.txt index a66d85d..d1a6b31 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,6 @@ astroid>=1.6.0 isort>=4.2.15 pylint==1.8.1 +Sphinx==1.8.0 +sphinx-autobuild==0.7.1 +sphinxcontrib-fulltoc==1.2.0 diff --git a/requirements.txt b/requirements.txt index 14ccb59..aa97863 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,6 @@ pycurl==7.43.0.1 pyproj==1.9.5.1 rasterio==1.0a12 requests==2.11.0 -Shapely==1.6.3 +Shapely>=1.6.3 six==1.10.0 tilepie==0.2.1