refactor: excel parse
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
from geopandas._config import options
|
||||
|
||||
from geopandas.geoseries import GeoSeries
|
||||
from geopandas.geodataframe import GeoDataFrame
|
||||
from geopandas.array import points_from_xy
|
||||
|
||||
from geopandas.io.file import _read_file as read_file
|
||||
from geopandas.io.file import _list_layers as list_layers
|
||||
from geopandas.io.arrow import _read_parquet as read_parquet
|
||||
from geopandas.io.arrow import _read_feather as read_feather
|
||||
from geopandas.io.sql import _read_postgis as read_postgis
|
||||
from geopandas.tools import sjoin, sjoin_nearest
|
||||
from geopandas.tools import overlay
|
||||
from geopandas.tools._show_versions import show_versions
|
||||
from geopandas.tools import clip
|
||||
|
||||
|
||||
import geopandas.datasets
|
||||
|
||||
|
||||
# make the interactive namespace easier to use
|
||||
# for `from geopandas import *` demos.
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from . import _version
|
||||
|
||||
__version__ = _version.get_versions()["version"]
|
||||
@@ -0,0 +1,92 @@
|
||||
import importlib
|
||||
from packaging.version import Version
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.geos
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pandas compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
|
||||
PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
|
||||
PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
|
||||
PANDAS_GE_202 = Version(pd.__version__) >= Version("2.0.2")
|
||||
PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
|
||||
PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0")
|
||||
PANDAS_GE_30 = Version(pd.__version__) >= Version("3.0.0.dev0")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shapely / GEOS compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
SHAPELY_GE_204 = Version(shapely.__version__) >= Version("2.0.4")
|
||||
|
||||
GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0)
|
||||
GEOS_GE_310 = shapely.geos.geos_version >= (3, 10, 0)
|
||||
|
||||
|
||||
def import_optional_dependency(name: str, extra: str = ""):
|
||||
"""
|
||||
Import an optional dependency.
|
||||
|
||||
Adapted from pandas.compat._optional::import_optional_dependency
|
||||
|
||||
Raises a formatted ImportError if the module is not present.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The module name.
|
||||
extra : str
|
||||
Additional text to include in the ImportError message.
|
||||
Returns
|
||||
-------
|
||||
module
|
||||
"""
|
||||
msg = """Missing optional dependency '{name}'. {extra} "
|
||||
"Use pip or conda to install {name}.""".format(
|
||||
name=name, extra=extra
|
||||
)
|
||||
|
||||
if not isinstance(name, str):
|
||||
raise ValueError(
|
||||
"Invalid module name: '{name}'; must be a string".format(name=name)
|
||||
)
|
||||
|
||||
try:
|
||||
module = importlib.import_module(name)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(msg) from None
|
||||
|
||||
return module
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pyproj compat
|
||||
# -----------------------------------------------------------------------------
|
||||
try:
|
||||
import pyproj # noqa: F401
|
||||
|
||||
HAS_PYPROJ = True
|
||||
|
||||
except ImportError as err:
|
||||
HAS_PYPROJ = False
|
||||
pyproj_import_error = str(err)
|
||||
|
||||
|
||||
def requires_pyproj(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
if not HAS_PYPROJ:
|
||||
raise ImportError(
|
||||
f"The 'pyproj' package is required for {func.__name__} to work. "
|
||||
"Install it and initialize the object with a CRS before using it."
|
||||
f"\nImporting pyproj resulted in: {pyproj_import_error}"
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Lightweight options machinery.
|
||||
|
||||
Based on https://github.com/topper-123/optioneer, but simplified (don't deal
|
||||
with nested options, deprecated options, ..), just the attribute-style dict
|
||||
like holding the options and giving a nice repr.
|
||||
"""
|
||||
|
||||
import textwrap
|
||||
import warnings
|
||||
from collections import namedtuple
|
||||
|
||||
Option = namedtuple("Option", "key default_value doc validator callback")
|
||||
|
||||
|
||||
class Options(object):
|
||||
"""Provide attribute-style access to configuration dict."""
|
||||
|
||||
def __init__(self, options):
|
||||
super().__setattr__("_options", options)
|
||||
# populate with default values
|
||||
config = {}
|
||||
for key, option in options.items():
|
||||
config[key] = option.default_value
|
||||
|
||||
super().__setattr__("_config", config)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
# you can't set new keys
|
||||
if key in self._config:
|
||||
option = self._options[key]
|
||||
if option.validator:
|
||||
option.validator(value)
|
||||
self._config[key] = value
|
||||
if option.callback:
|
||||
option.callback(key, value)
|
||||
else:
|
||||
msg = "You can only set the value of existing options"
|
||||
raise AttributeError(msg)
|
||||
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self._config[key]
|
||||
except KeyError:
|
||||
raise AttributeError("No such option")
|
||||
|
||||
def __dir__(self):
|
||||
return list(self._config.keys())
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__.__name__
|
||||
description = ""
|
||||
for key, option in self._options.items():
|
||||
descr = "{key}: {cur!r} [default: {default!r}]\n".format(
|
||||
key=key, cur=self._config[key], default=option.default_value
|
||||
)
|
||||
description += descr
|
||||
|
||||
if option.doc:
|
||||
doc_text = "\n".join(textwrap.wrap(option.doc, width=70))
|
||||
else:
|
||||
doc_text = "No description available."
|
||||
doc_text = textwrap.indent(doc_text, prefix=" ")
|
||||
description += doc_text + "\n"
|
||||
space = "\n "
|
||||
description = description.replace("\n", space)
|
||||
return "{}({}{})".format(cls, space, description)
|
||||
|
||||
|
||||
def _validate_display_precision(value):
|
||||
if value is not None:
|
||||
if not isinstance(value, int) or not (0 <= value <= 16):
|
||||
raise ValueError("Invalid value, needs to be an integer [0-16]")
|
||||
|
||||
|
||||
display_precision = Option(
|
||||
key="display_precision",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The precision (maximum number of decimals) of the coordinates in "
|
||||
"the WKT representation in the Series/DataFrame display. "
|
||||
"By default (None), it tries to infer and use 3 decimals for projected "
|
||||
"coordinates and 5 decimals for geographic coordinates."
|
||||
),
|
||||
validator=_validate_display_precision,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
|
||||
def _warn_use_pygeos_deprecated(_value):
|
||||
warnings.warn(
|
||||
"pygeos support was removed in 1.0. "
|
||||
"geopandas.use_pygeos is a no-op and will be removed in geopandas 1.1.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
|
||||
def _validate_io_engine(value):
|
||||
if value is not None:
|
||||
if value not in ("pyogrio", "fiona"):
|
||||
raise ValueError(f"Expected 'pyogrio' or 'fiona', got '{value}'")
|
||||
|
||||
|
||||
io_engine = Option(
|
||||
key="io_engine",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The default engine for ``read_file`` and ``to_file``. "
|
||||
"Options are 'pyogrio' and 'fiona'."
|
||||
),
|
||||
validator=_validate_io_engine,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
# TODO: deprecate this
|
||||
use_pygeos = Option(
|
||||
key="use_pygeos",
|
||||
default_value=False,
|
||||
doc=(
|
||||
"Deprecated option previously used to enable PyGEOS. "
|
||||
"It will be removed in GeoPandas 1.1."
|
||||
),
|
||||
validator=_warn_use_pygeos_deprecated,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
options = Options(
|
||||
{
|
||||
"display_precision": display_precision,
|
||||
"use_pygeos": use_pygeos,
|
||||
"io_engine": io_engine,
|
||||
}
|
||||
)
|
||||
@@ -0,0 +1,52 @@
|
||||
from textwrap import dedent
|
||||
from typing import Callable, Union
|
||||
|
||||
# doc decorator function ported with modifications from Pandas
|
||||
# https://github.com/pandas-dev/pandas/blob/master/pandas/util/_decorators.py
|
||||
|
||||
|
||||
def doc(*docstrings: Union[str, Callable], **params) -> Callable:
|
||||
"""
|
||||
A decorator take docstring templates, concatenate them and perform string
|
||||
substitution on it.
|
||||
This decorator will add a variable "_docstring_components" to the wrapped
|
||||
callable to keep track the original docstring template for potential usage.
|
||||
If it should be consider as a template, it will be saved as a string.
|
||||
Otherwise, it will be saved as callable, and later user __doc__ and dedent
|
||||
to get docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*docstrings : str or callable
|
||||
The string / docstring / docstring template to be appended in order
|
||||
after default docstring under callable.
|
||||
**params
|
||||
The string which would be used to format docstring template.
|
||||
"""
|
||||
|
||||
def decorator(decorated: Callable) -> Callable:
|
||||
# collecting docstring and docstring templates
|
||||
docstring_components: list[Union[str, Callable]] = []
|
||||
if decorated.__doc__:
|
||||
docstring_components.append(dedent(decorated.__doc__))
|
||||
|
||||
for docstring in docstrings:
|
||||
if hasattr(docstring, "_docstring_components"):
|
||||
docstring_components.extend(docstring._docstring_components)
|
||||
elif isinstance(docstring, str) or docstring.__doc__:
|
||||
docstring_components.append(docstring)
|
||||
|
||||
# formatting templates and concatenating docstring
|
||||
decorated.__doc__ = "".join(
|
||||
(
|
||||
component.format(**params)
|
||||
if isinstance(component, str)
|
||||
else dedent(component.__doc__ or "")
|
||||
)
|
||||
for component in docstring_components
|
||||
)
|
||||
|
||||
decorated._docstring_components = docstring_components
|
||||
return decorated
|
||||
|
||||
return decorator
|
||||
@@ -0,0 +1,21 @@
|
||||
|
||||
# This file was generated by 'versioneer.py' (0.29) from
|
||||
# revision-control system data, or from the parent directory name of an
|
||||
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
||||
# of this file.
|
||||
|
||||
import json
|
||||
|
||||
version_json = '''
|
||||
{
|
||||
"date": "2024-07-02T14:23:16+0200",
|
||||
"dirty": false,
|
||||
"error": null,
|
||||
"full-revisionid": "747d66ee6fcf00b819c08f11ecded53736c4652b",
|
||||
"version": "1.0.1"
|
||||
}
|
||||
''' # END VERSION_JSON
|
||||
|
||||
|
||||
def get_versions():
|
||||
return json.loads(version_json)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,47 @@
|
||||
import os.path
|
||||
|
||||
import geopandas
|
||||
|
||||
import pytest
|
||||
from geopandas.tests.util import _NATURALEARTH_CITIES, _NATURALEARTH_LOWRES, _NYBB
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def add_geopandas(doctest_namespace):
|
||||
doctest_namespace["geopandas"] = geopandas
|
||||
|
||||
|
||||
# Datasets used in our tests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def naturalearth_lowres() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NATURALEARTH_LOWRES) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NATURALEARTH_LOWRES
|
||||
else:
|
||||
pytest.skip("Naturalearth lowres dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def naturalearth_cities() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NATURALEARTH_CITIES) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NATURALEARTH_CITIES
|
||||
else:
|
||||
pytest.skip("Naturalearth cities dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def nybb_filename() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NYBB[len("zip://") :]) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NYBB
|
||||
else:
|
||||
pytest.skip("NYBB dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def _setup_class_nybb_filename(nybb_filename, request):
|
||||
"""Attach nybb_filename class attribute for unittest style setup_method"""
|
||||
request.cls.nybb_filename = nybb_filename
|
||||
@@ -0,0 +1,25 @@
|
||||
__all__ = []
|
||||
available = [] # previously part of __all__
|
||||
_prev_available = ["naturalearth_cities", "naturalearth_lowres", "nybb"]
|
||||
|
||||
|
||||
def get_path(dataset):
|
||||
ne_message = "https://www.naturalearthdata.com/downloads/110m-cultural-vectors/."
|
||||
nybb_message = (
|
||||
"the geodatasets package.\n\nfrom geodatasets import get_path\n"
|
||||
"path_to_file = get_path('nybb')\n"
|
||||
)
|
||||
error_msg = (
|
||||
"The geopandas.dataset has been deprecated and was removed in GeoPandas "
|
||||
f"1.0. You can get the original '{dataset}' data from "
|
||||
f"{ne_message if 'natural' in dataset else nybb_message}"
|
||||
)
|
||||
if dataset in _prev_available:
|
||||
raise AttributeError(error_msg)
|
||||
else:
|
||||
error_msg = (
|
||||
"The geopandas.dataset has been deprecated and "
|
||||
"was removed in GeoPandas 1.0. New sample datasets are now available "
|
||||
"in the geodatasets package (https://geodatasets.readthedocs.io/en/latest/)"
|
||||
)
|
||||
raise AttributeError(error_msg)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,614 @@
|
||||
import json
|
||||
from packaging.version import Version
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from numpy.typing import NDArray
|
||||
|
||||
import shapely
|
||||
from shapely import GeometryType
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import SHAPELY_GE_204
|
||||
from geopandas.array import from_shapely, from_wkb
|
||||
|
||||
GEOARROW_ENCODINGS = [
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
]
|
||||
|
||||
|
||||
## GeoPandas -> GeoArrow
|
||||
|
||||
|
||||
class ArrowTable:
|
||||
"""
|
||||
Wrapper class for Arrow data.
|
||||
|
||||
This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
|
||||
``__arrow_c_stream__`` method). This object can then be consumed by
|
||||
your Arrow implementation of choice that supports this protocol.
|
||||
|
||||
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> import pyarrow as pa
|
||||
>>> pa.table(gdf.to_arrow()) # doctest: +SKIP
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pa_table):
|
||||
self._pa_table = pa_table
|
||||
|
||||
def __arrow_c_stream__(self, requested_schema=None):
|
||||
return self._pa_table.__arrow_c_stream__(requested_schema=requested_schema)
|
||||
|
||||
|
||||
class GeoArrowArray:
|
||||
"""
|
||||
Wrapper class for a geometry array as Arrow data.
|
||||
|
||||
This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
|
||||
``__arrow_c_array/stream__`` method). This object can then be consumed by
|
||||
your Arrow implementation of choice that supports this protocol.
|
||||
|
||||
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> import pyarrow as pa
|
||||
>>> pa.array(ser.to_arrow()) # doctest: +SKIP
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pa_field, pa_array):
|
||||
self._pa_array = pa_array
|
||||
self._pa_field = pa_field
|
||||
|
||||
def __arrow_c_array__(self, requested_schema=None):
|
||||
if requested_schema is not None:
|
||||
raise NotImplementedError(
|
||||
"Requested schema is not supported for geometry arrays"
|
||||
)
|
||||
return (
|
||||
self._pa_field.__arrow_c_schema__(),
|
||||
self._pa_array.__arrow_c_array__()[1],
|
||||
)
|
||||
|
||||
|
||||
def geopandas_to_arrow(
|
||||
df,
|
||||
index=None,
|
||||
geometry_encoding="WKB",
|
||||
interleaved=True,
|
||||
include_z=None,
|
||||
):
|
||||
"""
|
||||
Convert GeoDataFrame to a pyarrow.Table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
The GeoDataFrame to convert.
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
|
||||
The GeoArrow encoding to use for the data conversion.
|
||||
interleaved : bool, default True
|
||||
Only relevant for 'geoarrow' encoding. If True, the geometries'
|
||||
coordinates are interleaved in a single fixed size list array.
|
||||
If False, the coordinates are stored as separate arrays in a
|
||||
struct type.
|
||||
include_z : bool, default None
|
||||
Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
|
||||
of the individial geometries is preserved).
|
||||
If False, return 2D geometries. If True, include the third dimension
|
||||
in the output (if a geometry has no third dimension, the z-coordinates
|
||||
will be NaN). By default, will infer the dimensionality from the
|
||||
input geometries. Note that this inference can be unreliable with
|
||||
empty geometries (for a guaranteed result, it is recommended to
|
||||
specify the keyword).
|
||||
|
||||
"""
|
||||
mask = df.dtypes == "geometry"
|
||||
geometry_columns = df.columns[mask]
|
||||
geometry_indices = np.asarray(mask).nonzero()[0]
|
||||
|
||||
df_attr = pd.DataFrame(df.copy(deep=False))
|
||||
|
||||
# replace geometry columns with dummy values -> will get converted to
|
||||
# Arrow null column (not holding any memory), so we can afterwards
|
||||
# fill the resulting table with the correct geometry fields
|
||||
for col in geometry_columns:
|
||||
df_attr[col] = None
|
||||
|
||||
table = pa.Table.from_pandas(df_attr, preserve_index=index)
|
||||
|
||||
geometry_encoding_dict = {}
|
||||
|
||||
if geometry_encoding.lower() == "geoarrow":
|
||||
if Version(pa.__version__) < Version("10.0.0"):
|
||||
raise ValueError("Converting to 'geoarrow' requires pyarrow >= 10.0.")
|
||||
|
||||
# Encode all geometry columns to GeoArrow
|
||||
for i, col in zip(geometry_indices, geometry_columns):
|
||||
field, geom_arr = construct_geometry_array(
|
||||
np.array(df[col].array),
|
||||
include_z=include_z,
|
||||
field_name=col,
|
||||
crs=df[col].crs,
|
||||
interleaved=interleaved,
|
||||
)
|
||||
table = table.set_column(i, field, geom_arr)
|
||||
geometry_encoding_dict[col] = (
|
||||
field.metadata[b"ARROW:extension:name"]
|
||||
.decode()
|
||||
.removeprefix("geoarrow.")
|
||||
)
|
||||
|
||||
elif geometry_encoding.lower() == "wkb":
|
||||
# Encode all geometry columns to WKB
|
||||
for i, col in zip(geometry_indices, geometry_columns):
|
||||
field, wkb_arr = construct_wkb_array(
|
||||
np.asarray(df[col].array), field_name=col, crs=df[col].crs
|
||||
)
|
||||
table = table.set_column(i, field, wkb_arr)
|
||||
geometry_encoding_dict[col] = "WKB"
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Expected geometry encoding 'WKB' or 'geoarrow' got {geometry_encoding}"
|
||||
)
|
||||
return table, geometry_encoding_dict
|
||||
|
||||
|
||||
def construct_wkb_array(
|
||||
shapely_arr: NDArray[np.object_],
|
||||
*,
|
||||
field_name: str = "geometry",
|
||||
crs: Optional[str] = None,
|
||||
) -> Tuple[pa.Field, pa.Array]:
|
||||
|
||||
if shapely.geos_version > (3, 10, 0):
|
||||
kwargs = {"flavor": "iso"}
|
||||
else:
|
||||
if shapely.has_z(shapely_arr).any():
|
||||
raise ValueError("Cannot write 3D geometries with GEOS<3.10")
|
||||
kwargs = {}
|
||||
|
||||
wkb_arr = shapely.to_wkb(shapely_arr, **kwargs)
|
||||
extension_metadata = {"ARROW:extension:name": "geoarrow.wkb"}
|
||||
if crs is not None:
|
||||
extension_metadata["ARROW:extension:metadata"] = json.dumps(
|
||||
{"crs": crs.to_json()}
|
||||
)
|
||||
else:
|
||||
# In theory this should not be needed, but otherwise pyarrow < 17
|
||||
# crashes on receiving such data through C Data Interface
|
||||
# https://github.com/apache/arrow/issues/41741
|
||||
extension_metadata["ARROW:extension:metadata"] = "{}"
|
||||
|
||||
field = pa.field(
|
||||
field_name, type=pa.binary(), nullable=True, metadata=extension_metadata
|
||||
)
|
||||
parr = pa.array(np.asarray(wkb_arr), pa.binary())
|
||||
return field, parr
|
||||
|
||||
|
||||
def _convert_inner_coords(coords, interleaved, dims, mask=None):
|
||||
if interleaved:
|
||||
coords_field = pa.field(dims, pa.float64(), nullable=False)
|
||||
typ = pa.list_(coords_field, len(dims))
|
||||
if mask is None:
|
||||
# mask keyword only added in pyarrow 15.0.0
|
||||
parr = pa.FixedSizeListArray.from_arrays(coords.ravel(), type=typ)
|
||||
else:
|
||||
parr = pa.FixedSizeListArray.from_arrays(
|
||||
coords.ravel(), type=typ, mask=mask
|
||||
)
|
||||
else:
|
||||
if dims == "xy":
|
||||
fields = [
|
||||
pa.field("x", pa.float64(), nullable=False),
|
||||
pa.field("y", pa.float64(), nullable=False),
|
||||
]
|
||||
parr = pa.StructArray.from_arrays(
|
||||
[coords[:, 0].copy(), coords[:, 1].copy()], fields=fields, mask=mask
|
||||
)
|
||||
else:
|
||||
fields = [
|
||||
pa.field("x", pa.float64(), nullable=False),
|
||||
pa.field("y", pa.float64(), nullable=False),
|
||||
pa.field("z", pa.float64(), nullable=False),
|
||||
]
|
||||
parr = pa.StructArray.from_arrays(
|
||||
[coords[:, 0].copy(), coords[:, 1].copy(), coords[:, 2].copy()],
|
||||
fields=fields,
|
||||
mask=mask,
|
||||
)
|
||||
return parr
|
||||
|
||||
|
||||
def _linestring_type(point_type):
|
||||
return pa.list_(pa.field("vertices", point_type, nullable=False))
|
||||
|
||||
|
||||
def _polygon_type(point_type):
|
||||
return pa.list_(
|
||||
pa.field(
|
||||
"rings",
|
||||
pa.list_(pa.field("vertices", point_type, nullable=False)),
|
||||
nullable=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _multipoint_type(point_type):
|
||||
return pa.list_(pa.field("points", point_type, nullable=False))
|
||||
|
||||
|
||||
def _multilinestring_type(point_type):
|
||||
return pa.list_(
|
||||
pa.field("linestrings", _linestring_type(point_type), nullable=False)
|
||||
)
|
||||
|
||||
|
||||
def _multipolygon_type(point_type):
|
||||
return pa.list_(pa.field("polygons", _polygon_type(point_type), nullable=False))
|
||||
|
||||
|
||||
def construct_geometry_array(
|
||||
shapely_arr: NDArray[np.object_],
|
||||
include_z: Optional[bool] = None,
|
||||
*,
|
||||
field_name: str = "geometry",
|
||||
crs: Optional[str] = None,
|
||||
interleaved: bool = True,
|
||||
) -> Tuple[pa.Field, pa.Array]:
|
||||
# NOTE: this implementation returns a (field, array) pair so that it can set the
|
||||
# extension metadata on the field without instantiating extension types into the
|
||||
# global pyarrow registry
|
||||
geom_type, coords, offsets = shapely.to_ragged_array(
|
||||
shapely_arr, include_z=include_z
|
||||
)
|
||||
|
||||
mask = shapely.is_missing(shapely_arr)
|
||||
if mask.any():
|
||||
if (
|
||||
geom_type == GeometryType.POINT
|
||||
and interleaved
|
||||
and Version(pa.__version__) < Version("15.0.0")
|
||||
):
|
||||
raise ValueError(
|
||||
"Converting point geometries with missing values is not supported "
|
||||
"for interleaved coordinates with pyarrow < 15.0.0. Please "
|
||||
"upgrade to a newer version of pyarrow."
|
||||
)
|
||||
mask = pa.array(mask, type=pa.bool_())
|
||||
|
||||
if geom_type == GeometryType.POINT and not SHAPELY_GE_204:
|
||||
# bug in shapely < 2.0.4, see https://github.com/shapely/shapely/pull/2034
|
||||
# this workaround only works if there are no empty points
|
||||
indices = np.nonzero(mask)[0]
|
||||
indices = indices - np.arange(len(indices))
|
||||
coords = np.insert(coords, indices, np.nan, axis=0)
|
||||
|
||||
else:
|
||||
mask = None
|
||||
|
||||
if coords.shape[-1] == 2:
|
||||
dims = "xy"
|
||||
elif coords.shape[-1] == 3:
|
||||
dims = "xyz"
|
||||
else:
|
||||
raise ValueError(f"Unexpected coords dimensions: {coords.shape}")
|
||||
|
||||
extension_metadata: Dict[str, str] = {}
|
||||
if crs is not None:
|
||||
extension_metadata["ARROW:extension:metadata"] = json.dumps(
|
||||
{"crs": crs.to_json()}
|
||||
)
|
||||
else:
|
||||
# In theory this should not be needed, but otherwise pyarrow < 17
|
||||
# crashes on receiving such data through C Data Interface
|
||||
# https://github.com/apache/arrow/issues/41741
|
||||
extension_metadata["ARROW:extension:metadata"] = "{}"
|
||||
|
||||
if geom_type == GeometryType.POINT:
|
||||
parr = _convert_inner_coords(coords, interleaved, dims, mask=mask)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.point"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.LINESTRING:
|
||||
assert len(offsets) == 1, "Expected one offsets array"
|
||||
(geom_offsets,) = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
parr = pa.ListArray.from_arrays(
|
||||
pa.array(geom_offsets), _parr, _linestring_type(_parr.type), mask=mask
|
||||
)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.linestring"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.POLYGON:
|
||||
assert len(offsets) == 2, "Expected two offsets arrays"
|
||||
ring_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
|
||||
parr = parr.cast(_polygon_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.polygon"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTIPOINT:
|
||||
assert len(offsets) == 1, "Expected one offsets array"
|
||||
(geom_offsets,) = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
parr = pa.ListArray.from_arrays(
|
||||
pa.array(geom_offsets), _parr, type=_multipoint_type(_parr.type), mask=mask
|
||||
)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multipoint"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTILINESTRING:
|
||||
assert len(offsets) == 2, "Expected two offsets arrays"
|
||||
ring_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
|
||||
parr = parr.cast(_multilinestring_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multilinestring"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTIPOLYGON:
|
||||
assert len(offsets) == 3, "Expected three offsets arrays"
|
||||
ring_offsets, polygon_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
_parr2 = pa.ListArray.from_arrays(pa.array(polygon_offsets), _parr1)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr2, mask=mask)
|
||||
parr = parr.cast(_multipolygon_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multipolygon"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported type for geoarrow: {geom_type}")
|
||||
|
||||
|
||||
## GeoArrow -> GeoPandas
|
||||
|
||||
|
||||
def _get_arrow_geometry_field(field):
|
||||
if (meta := field.metadata) is not None:
|
||||
if (ext_name := meta.get(b"ARROW:extension:name", None)) is not None:
|
||||
if ext_name.startswith(b"geoarrow."):
|
||||
if (
|
||||
ext_meta := meta.get(b"ARROW:extension:metadata", None)
|
||||
) is not None:
|
||||
ext_meta = json.loads(ext_meta.decode())
|
||||
return ext_name.decode(), ext_meta
|
||||
|
||||
if isinstance(field.type, pa.ExtensionType):
|
||||
ext_name = field.type.extension_name
|
||||
if ext_name.startswith("geoarrow."):
|
||||
ext_meta_ser = field.type.__arrow_ext_serialize__()
|
||||
if ext_meta_ser:
|
||||
ext_meta = json.loads(ext_meta_ser.decode())
|
||||
else:
|
||||
ext_meta = None
|
||||
return ext_name, ext_meta
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def arrow_to_geopandas(table, geometry=None):
|
||||
"""
|
||||
Convert Arrow table object to a GeoDataFrame based on GeoArrow extension types.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table : pyarrow.Table
|
||||
The Arrow table to convert.
|
||||
geometry : str, default None
|
||||
The name of the geometry column to set as the active geometry
|
||||
column. If None, the first geometry column found will be used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
"""
|
||||
if not isinstance(table, pa.Table):
|
||||
table = pa.table(table)
|
||||
|
||||
geom_fields = []
|
||||
|
||||
for i, field in enumerate(table.schema):
|
||||
geom = _get_arrow_geometry_field(field)
|
||||
if geom is not None:
|
||||
geom_fields.append((i, field.name, *geom))
|
||||
|
||||
if len(geom_fields) == 0:
|
||||
raise ValueError("No geometry column found in the Arrow table.")
|
||||
|
||||
table_attr = table.drop([f[1] for f in geom_fields])
|
||||
df = table_attr.to_pandas()
|
||||
|
||||
for i, col, ext_name, ext_meta in geom_fields:
|
||||
crs = None
|
||||
if ext_meta is not None and "crs" in ext_meta:
|
||||
crs = ext_meta["crs"]
|
||||
|
||||
if ext_name == "geoarrow.wkb":
|
||||
geom_arr = from_wkb(np.array(table[col]), crs=crs)
|
||||
elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
|
||||
|
||||
geom_arr = from_shapely(
|
||||
construct_shapely_array(table[col].combine_chunks(), ext_name), crs=crs
|
||||
)
|
||||
else:
|
||||
raise TypeError(f"Unknown GeoArrow extension type: {ext_name}")
|
||||
|
||||
df.insert(i, col, geom_arr)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry or geom_fields[0][1])
|
||||
|
||||
|
||||
def arrow_to_geometry_array(arr):
|
||||
"""
|
||||
Convert Arrow array object (representing single GeoArrow array) to a
|
||||
geopandas GeometryArray.
|
||||
|
||||
Specifically for GeoSeries.from_arrow.
|
||||
"""
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
raise ValueError("Importing from Arrow requires pyarrow >= 14.0.")
|
||||
|
||||
schema_capsule, array_capsule = arr.__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
pa_arr = pa.Array._import_from_c_capsule(field.__arrow_c_schema__(), array_capsule)
|
||||
|
||||
geom_info = _get_arrow_geometry_field(field)
|
||||
if geom_info is None:
|
||||
raise ValueError("No GeoArrow geometry field found.")
|
||||
ext_name, ext_meta = geom_info
|
||||
|
||||
crs = None
|
||||
if ext_meta is not None and "crs" in ext_meta:
|
||||
crs = ext_meta["crs"]
|
||||
|
||||
if ext_name == "geoarrow.wkb":
|
||||
geom_arr = from_wkb(np.array(pa_arr), crs=crs)
|
||||
elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
|
||||
|
||||
geom_arr = from_shapely(construct_shapely_array(pa_arr, ext_name), crs=crs)
|
||||
else:
|
||||
raise ValueError(f"Unknown GeoArrow extension type: {ext_name}")
|
||||
|
||||
return geom_arr
|
||||
|
||||
|
||||
def _get_inner_coords(arr):
|
||||
if pa.types.is_struct(arr.type):
|
||||
if arr.type.num_fields == 2:
|
||||
coords = np.column_stack(
|
||||
[np.asarray(arr.field("x")), np.asarray(arr.field("y"))]
|
||||
)
|
||||
else:
|
||||
coords = np.column_stack(
|
||||
[
|
||||
np.asarray(arr.field("x")),
|
||||
np.asarray(arr.field("y")),
|
||||
np.asarray(arr.field("z")),
|
||||
]
|
||||
)
|
||||
return coords
|
||||
else:
|
||||
# fixed size list
|
||||
return np.asarray(arr.values).reshape(len(arr), -1)
|
||||
|
||||
|
||||
def construct_shapely_array(arr: pa.Array, extension_name: str):
|
||||
"""
|
||||
Construct a NumPy array of shapely geometries from a pyarrow.Array
|
||||
with GeoArrow extension type.
|
||||
|
||||
"""
|
||||
if isinstance(arr, pa.ExtensionArray):
|
||||
arr = arr.storage
|
||||
|
||||
if extension_name == "geoarrow.point":
|
||||
coords = _get_inner_coords(arr)
|
||||
result = shapely.from_ragged_array(GeometryType.POINT, coords, None)
|
||||
|
||||
elif extension_name == "geoarrow.linestring":
|
||||
coords = _get_inner_coords(arr.values)
|
||||
offsets1 = np.asarray(arr.offsets)
|
||||
offsets = (offsets1,)
|
||||
result = shapely.from_ragged_array(GeometryType.LINESTRING, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.polygon":
|
||||
coords = _get_inner_coords(arr.values.values)
|
||||
offsets2 = np.asarray(arr.offsets)
|
||||
offsets1 = np.asarray(arr.values.offsets)
|
||||
offsets = (offsets1, offsets2)
|
||||
result = shapely.from_ragged_array(GeometryType.POLYGON, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.multipoint":
|
||||
coords = _get_inner_coords(arr.values)
|
||||
offsets1 = np.asarray(arr.offsets)
|
||||
offsets = (offsets1,)
|
||||
result = shapely.from_ragged_array(GeometryType.MULTIPOINT, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.multilinestring":
|
||||
coords = _get_inner_coords(arr.values.values)
|
||||
offsets2 = np.asarray(arr.offsets)
|
||||
offsets1 = np.asarray(arr.values.offsets)
|
||||
offsets = (offsets1, offsets2)
|
||||
result = shapely.from_ragged_array(
|
||||
GeometryType.MULTILINESTRING, coords, offsets
|
||||
)
|
||||
|
||||
elif extension_name == "geoarrow.multipolygon":
|
||||
coords = _get_inner_coords(arr.values.values.values)
|
||||
offsets3 = np.asarray(arr.offsets)
|
||||
offsets2 = np.asarray(arr.values.offsets)
|
||||
offsets1 = np.asarray(arr.values.values.offsets)
|
||||
offsets = (offsets1, offsets2, offsets3)
|
||||
result = shapely.from_ragged_array(GeometryType.MULTIPOLYGON, coords, offsets)
|
||||
|
||||
else:
|
||||
raise ValueError(extension_name)
|
||||
|
||||
# apply validity mask
|
||||
if arr.null_count:
|
||||
mask = np.asarray(arr.is_null())
|
||||
result = np.where(mask, None, result)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,72 @@
|
||||
from packaging.version import Version
|
||||
|
||||
import pyarrow
|
||||
|
||||
_ERROR_MSG = """\
|
||||
Disallowed deserialization of 'arrow.py_extension_type':
|
||||
storage_type = {storage_type}
|
||||
serialized = {serialized}
|
||||
pickle disassembly:\n{pickle_disassembly}
|
||||
|
||||
Reading of untrusted Parquet or Feather files with a PyExtensionType column
|
||||
allows arbitrary code execution.
|
||||
If you trust this file, you can enable reading the extension type by one of:
|
||||
|
||||
- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
|
||||
- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
|
||||
`import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
|
||||
|
||||
We strongly recommend updating your Parquet/Feather files to use extension types
|
||||
derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
|
||||
See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
|
||||
for more details.
|
||||
"""
|
||||
|
||||
|
||||
def patch_pyarrow():
|
||||
# starting from pyarrow 14.0.1, it has its own mechanism
|
||||
if Version(pyarrow.__version__) >= Version("14.0.1"):
|
||||
return
|
||||
|
||||
# if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
|
||||
# installed, use this instead (which also ensures it works if they had
|
||||
# called `pyarrow_hotfix.uninstall()`)
|
||||
try:
|
||||
import pyarrow_hotfix # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
# if the hotfix is already installed and enabled
|
||||
if getattr(pyarrow, "_hotfix_installed", False):
|
||||
return
|
||||
|
||||
class ForbiddenExtensionType(pyarrow.ExtensionType):
|
||||
def __arrow_ext_serialize__(self):
|
||||
return b""
|
||||
|
||||
@classmethod
|
||||
def __arrow_ext_deserialize__(cls, storage_type, serialized):
|
||||
import io
|
||||
import pickletools
|
||||
|
||||
out = io.StringIO()
|
||||
pickletools.dis(serialized, out)
|
||||
raise RuntimeError(
|
||||
_ERROR_MSG.format(
|
||||
storage_type=storage_type,
|
||||
serialized=serialized,
|
||||
pickle_disassembly=out.getvalue(),
|
||||
)
|
||||
)
|
||||
|
||||
pyarrow.unregister_extension_type("arrow.py_extension_type")
|
||||
pyarrow.register_extension_type(
|
||||
ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
|
||||
)
|
||||
|
||||
pyarrow._hotfix_installed = True
|
||||
|
||||
|
||||
patch_pyarrow()
|
||||
@@ -0,0 +1,913 @@
|
||||
import json
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
import shapely
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import import_optional_dependency
|
||||
from geopandas.array import from_shapely, from_wkb
|
||||
|
||||
from .file import _expand_user
|
||||
|
||||
METADATA_VERSION = "1.0.0"
|
||||
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
|
||||
GEOARROW_ENCODINGS = [
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
]
|
||||
SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
|
||||
|
||||
# reference: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
# Metadata structure:
|
||||
# {
|
||||
# "geo": {
|
||||
# "columns": {
|
||||
# "<name>": {
|
||||
# "encoding": "WKB"
|
||||
# "geometry_types": <list of str: REQUIRED>
|
||||
# "crs": "<PROJJSON or None: OPTIONAL>",
|
||||
# "orientation": "<'counterclockwise' or None: OPTIONAL>"
|
||||
# "edges": "planar"
|
||||
# "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL>
|
||||
# "epoch": <float: OPTIONAL>
|
||||
# }
|
||||
# },
|
||||
# "primary_column": "<str: REQUIRED>",
|
||||
# "version": "<METADATA_VERSION>",
|
||||
#
|
||||
# # Additional GeoPandas specific metadata (not in metadata spec)
|
||||
# "creator": {
|
||||
# "library": "geopandas",
|
||||
# "version": "<geopandas.__version__>"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
def _is_fsspec_url(url):
|
||||
return (
|
||||
isinstance(url, str)
|
||||
and "://" in url
|
||||
and not url.startswith(("http://", "https://"))
|
||||
)
|
||||
|
||||
|
||||
def _remove_id_from_member_of_ensembles(json_dict):
|
||||
"""
|
||||
Older PROJ versions will not recognize IDs of datum ensemble members that
|
||||
were added in more recent PROJ database versions.
|
||||
|
||||
Cf https://github.com/opengeospatial/geoparquet/discussions/110
|
||||
and https://github.com/OSGeo/PROJ/pull/3221
|
||||
|
||||
Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
|
||||
"""
|
||||
for key, value in json_dict.items():
|
||||
if isinstance(value, dict):
|
||||
_remove_id_from_member_of_ensembles(value)
|
||||
elif key == "members" and isinstance(value, list):
|
||||
for member in value:
|
||||
member.pop("id", None)
|
||||
|
||||
|
||||
# type ids 0 to 7
|
||||
_geometry_type_names = [
|
||||
"Point",
|
||||
"LineString",
|
||||
"LineString",
|
||||
"Polygon",
|
||||
"MultiPoint",
|
||||
"MultiLineString",
|
||||
"MultiPolygon",
|
||||
"GeometryCollection",
|
||||
]
|
||||
_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
|
||||
|
||||
|
||||
def _get_geometry_types(series):
|
||||
"""
|
||||
Get unique geometry types from a GeoSeries.
|
||||
"""
|
||||
arr_geometry_types = shapely.get_type_id(series.array._data)
|
||||
# ensure to include "... Z" for 3D geometries
|
||||
has_z = shapely.has_z(series.array._data)
|
||||
arr_geometry_types[has_z] += 8
|
||||
|
||||
geometry_types = Series(arr_geometry_types).unique().tolist()
|
||||
# drop missing values (shapely.get_type_id returns -1 for those)
|
||||
if -1 in geometry_types:
|
||||
geometry_types.remove(-1)
|
||||
|
||||
return sorted([_geometry_type_names[idx] for idx in geometry_types])
|
||||
|
||||
|
||||
def _create_metadata(
|
||||
df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
|
||||
):
|
||||
"""Create and encode geo metadata dict.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
if schema_version is None:
|
||||
if geometry_encoding and any(
|
||||
encoding != "WKB" for encoding in geometry_encoding.values()
|
||||
):
|
||||
schema_version = "1.1.0"
|
||||
else:
|
||||
schema_version = METADATA_VERSION
|
||||
|
||||
if schema_version not in SUPPORTED_VERSIONS:
|
||||
raise ValueError(
|
||||
f"schema_version must be one of: {', '.join(SUPPORTED_VERSIONS)}"
|
||||
)
|
||||
|
||||
# Construct metadata for each geometry
|
||||
column_metadata = {}
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
|
||||
geometry_types = _get_geometry_types(series)
|
||||
if schema_version[0] == "0":
|
||||
geometry_types_name = "geometry_type"
|
||||
if len(geometry_types) == 1:
|
||||
geometry_types = geometry_types[0]
|
||||
else:
|
||||
geometry_types_name = "geometry_types"
|
||||
|
||||
crs = None
|
||||
if series.crs:
|
||||
if schema_version == "0.1.0":
|
||||
crs = series.crs.to_wkt()
|
||||
else: # version >= 0.4.0
|
||||
crs = series.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
|
||||
column_metadata[col] = {
|
||||
"encoding": geometry_encoding[col],
|
||||
"crs": crs,
|
||||
geometry_types_name: geometry_types,
|
||||
}
|
||||
|
||||
bbox = series.total_bounds.tolist()
|
||||
if np.isfinite(bbox).all():
|
||||
# don't add bbox with NaNs for empty / all-NA geometry column
|
||||
column_metadata[col]["bbox"] = bbox
|
||||
|
||||
if write_covering_bbox:
|
||||
column_metadata[col]["covering"] = {
|
||||
"bbox": {
|
||||
"xmin": ["bbox", "xmin"],
|
||||
"ymin": ["bbox", "ymin"],
|
||||
"xmax": ["bbox", "xmax"],
|
||||
"ymax": ["bbox", "ymax"],
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"primary_column": df._geometry_column_name,
|
||||
"columns": column_metadata,
|
||||
"version": schema_version,
|
||||
"creator": {"library": "geopandas", "version": geopandas.__version__},
|
||||
}
|
||||
|
||||
|
||||
def _encode_metadata(metadata):
|
||||
"""Encode metadata dict to UTF-8 JSON string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
|
||||
Returns
|
||||
-------
|
||||
UTF-8 encoded JSON string
|
||||
"""
|
||||
return json.dumps(metadata).encode("utf-8")
|
||||
|
||||
|
||||
def _decode_metadata(metadata_str):
|
||||
"""Decode a UTF-8 encoded JSON string to dict
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata_str : string (UTF-8 encoded)
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
if metadata_str is None:
|
||||
return None
|
||||
|
||||
return json.loads(metadata_str.decode("utf-8"))
|
||||
|
||||
|
||||
def _validate_dataframe(df):
|
||||
"""Validate that the GeoDataFrame conforms to requirements for writing
|
||||
to Parquet format.
|
||||
|
||||
Raises `ValueError` if the GeoDataFrame is not valid.
|
||||
|
||||
copied from `pandas.io.parquet`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
"""
|
||||
|
||||
if not isinstance(df, DataFrame):
|
||||
raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames")
|
||||
|
||||
# must have value column names (strings only)
|
||||
if df.columns.inferred_type not in {"string", "unicode", "empty"}:
|
||||
raise ValueError("Writing to Parquet/Feather requires string column names")
|
||||
|
||||
# index level names must be strings
|
||||
valid_names = all(
|
||||
isinstance(name, str) for name in df.index.names if name is not None
|
||||
)
|
||||
if not valid_names:
|
||||
raise ValueError("Index level names must be strings")
|
||||
|
||||
|
||||
def _validate_geo_metadata(metadata):
|
||||
"""Validate geo metadata.
|
||||
Must not be empty, and must contain the structure specified above.
|
||||
|
||||
Raises ValueError if metadata is not valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
"""
|
||||
|
||||
if not metadata:
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
# version was schema_version in 0.1.0
|
||||
version = metadata.get("version", metadata.get("schema_version"))
|
||||
if not version:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'version'"
|
||||
)
|
||||
|
||||
required_keys = ("primary_column", "columns")
|
||||
for key in required_keys:
|
||||
if metadata.get(key, None) is None:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'{key}'".format(key=key)
|
||||
)
|
||||
|
||||
if not isinstance(metadata["columns"], dict):
|
||||
raise ValueError("'columns' in 'geo' metadata must be a dict")
|
||||
|
||||
# Validate that geometry columns have required metadata and values
|
||||
# leaving out "geometry_type" for compatibility with 0.1
|
||||
required_col_keys = ("encoding",)
|
||||
for col, column_metadata in metadata["columns"].items():
|
||||
for key in required_col_keys:
|
||||
if key not in column_metadata:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key "
|
||||
"'{key}' for column '{col}'".format(key=key, col=col)
|
||||
)
|
||||
|
||||
if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
|
||||
raise ValueError(
|
||||
"Only WKB geometry encoding or one of the native encodings "
|
||||
f"({GEOARROW_ENCODINGS!r}) are supported, "
|
||||
f"got: {column_metadata['encoding']}"
|
||||
)
|
||||
|
||||
if column_metadata.get("edges", "planar") == "spherical":
|
||||
warnings.warn(
|
||||
f"The geo metadata indicate that column '{col}' has spherical edges, "
|
||||
"but because GeoPandas currently does not support spherical "
|
||||
"geometry, it ignores this metadata and will interpret the edges of "
|
||||
"the geometries as planar.",
|
||||
UserWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
if "covering" in column_metadata:
|
||||
covering = column_metadata["covering"]
|
||||
if "bbox" in covering:
|
||||
bbox = covering["bbox"]
|
||||
for var in ["xmin", "ymin", "xmax", "ymax"]:
|
||||
if var not in bbox.keys():
|
||||
raise ValueError("Metadata for bbox column is malformed.")
|
||||
|
||||
|
||||
def _geopandas_to_arrow(
|
||||
df,
|
||||
index=None,
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=None,
|
||||
):
|
||||
"""
|
||||
Helper function with main, shared logic for to_parquet/to_feather.
|
||||
"""
|
||||
from pyarrow import StructArray
|
||||
|
||||
from geopandas.io._geoarrow import geopandas_to_arrow
|
||||
|
||||
_validate_dataframe(df)
|
||||
|
||||
if schema_version is not None:
|
||||
if geometry_encoding != "WKB" and schema_version != "1.1.0":
|
||||
raise ValueError(
|
||||
"'geoarrow' encoding is only supported with schema version >= 1.1.0"
|
||||
)
|
||||
|
||||
table, geometry_encoding_dict = geopandas_to_arrow(
|
||||
df, geometry_encoding=geometry_encoding, index=index, interleaved=False
|
||||
)
|
||||
geo_metadata = _create_metadata(
|
||||
df,
|
||||
schema_version=schema_version,
|
||||
geometry_encoding=geometry_encoding_dict,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
|
||||
if write_covering_bbox:
|
||||
if "bbox" in df.columns:
|
||||
raise ValueError(
|
||||
"An existing column 'bbox' already exists in the dataframe. "
|
||||
"Please rename to write covering bbox."
|
||||
)
|
||||
bounds = df.bounds
|
||||
bbox_array = StructArray.from_arrays(
|
||||
[bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
|
||||
names=["xmin", "ymin", "xmax", "ymax"],
|
||||
)
|
||||
table = table.append_column("bbox", bbox_array)
|
||||
|
||||
# Store geopandas specific file-level metadata
|
||||
# This must be done AFTER creating the table or it is not persisted
|
||||
metadata = table.schema.metadata
|
||||
metadata.update({b"geo": _encode_metadata(geo_metadata)})
|
||||
|
||||
return table.replace_schema_metadata(metadata)
|
||||
|
||||
|
||||
def _to_parquet(
|
||||
df,
|
||||
path,
|
||||
index=None,
|
||||
compression="snappy",
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write a GeoDataFrame to the Parquet format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification at:
|
||||
https://github.com/opengeospatial/geoparquet. Writing older versions is
|
||||
supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
|
||||
Name of the compression to use. Use ``None`` for no compression.
|
||||
geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
|
||||
The encoding to use for the geometry columns. Defaults to "WKB"
|
||||
for maximum interoperability. Specify "geoarrow" to use one of the
|
||||
native GeoArrow-based single-geometry type encodings.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to pyarrow.parquet.write_table().
|
||||
"""
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(
|
||||
df,
|
||||
index=index,
|
||||
geometry_encoding=geometry_encoding,
|
||||
schema_version=schema_version,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
parquet.write_table(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _to_feather(df, path, index=None, compression=None, schema_version=None, **kwargs):
|
||||
"""
|
||||
Write a GeoDataFrame to the Feather format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification for
|
||||
the metadata at: https://github.com/opengeospatial/geoparquet. Writing
|
||||
older versions is supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'zstd', 'lz4', 'uncompressed'}, optional
|
||||
Name of the compression to use. Use ``"uncompressed"`` for no
|
||||
compression. By default uses LZ4 if available, otherwise uncompressed.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version for the metadata; if not provided
|
||||
will default to latest supported version.
|
||||
kwargs
|
||||
Additional keyword arguments passed to pyarrow.feather.write_feather().
|
||||
"""
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
feather.write_feather(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _arrow_to_geopandas(table, geo_metadata=None):
|
||||
"""
|
||||
Helper function with main, shared logic for read_parquet/read_feather.
|
||||
"""
|
||||
if geo_metadata is None:
|
||||
# Note: this path of not passing metadata is also used by dask-geopandas
|
||||
geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
|
||||
|
||||
# Find all geometry columns that were read from the file. May
|
||||
# be a subset if 'columns' parameter is used.
|
||||
geometry_columns = [
|
||||
col for col in geo_metadata["columns"] if col in table.column_names
|
||||
]
|
||||
result_column_names = list(table.slice(0, 0).to_pandas().columns)
|
||||
geometry_columns.sort(key=result_column_names.index)
|
||||
|
||||
if not len(geometry_columns):
|
||||
raise ValueError(
|
||||
"""No geometry columns are included in the columns read from
|
||||
the Parquet/Feather file. To read this file without geometry columns,
|
||||
use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
geometry = geo_metadata["primary_column"]
|
||||
|
||||
# Missing geometry likely indicates a subset of columns was read;
|
||||
# promote the first available geometry to the primary geometry.
|
||||
if len(geometry_columns) and geometry not in geometry_columns:
|
||||
geometry = geometry_columns[0]
|
||||
|
||||
# if there are multiple non-primary geometry columns, raise a warning
|
||||
if len(geometry_columns) > 1:
|
||||
warnings.warn(
|
||||
"Multiple non-primary geometry columns read from Parquet/Feather "
|
||||
"file. The first column read was promoted to the primary geometry.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
table_attr = table.drop(geometry_columns)
|
||||
df = table_attr.to_pandas()
|
||||
|
||||
# Convert the WKB columns that are present back to geometry.
|
||||
for col in geometry_columns:
|
||||
col_metadata = geo_metadata["columns"][col]
|
||||
if "crs" in col_metadata:
|
||||
crs = col_metadata["crs"]
|
||||
if isinstance(crs, dict):
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
else:
|
||||
# per the GeoParquet spec, missing CRS is to be interpreted as
|
||||
# OGC:CRS84
|
||||
crs = "OGC:CRS84"
|
||||
|
||||
if col_metadata["encoding"] == "WKB":
|
||||
geom_arr = from_wkb(np.array(table[col]), crs=crs)
|
||||
else:
|
||||
from geopandas.io._geoarrow import construct_shapely_array
|
||||
|
||||
geom_arr = from_shapely(
|
||||
construct_shapely_array(
|
||||
table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
|
||||
),
|
||||
crs=crs,
|
||||
)
|
||||
|
||||
df.insert(result_column_names.index(col), col, geom_arr)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry)
|
||||
|
||||
|
||||
def _get_filesystem_path(path, filesystem=None, storage_options=None):
|
||||
"""
|
||||
Get the filesystem and path for a given filesystem and path.
|
||||
|
||||
If the filesystem is not None then it's just returned as is.
|
||||
"""
|
||||
import pyarrow
|
||||
|
||||
if (
|
||||
isinstance(path, str)
|
||||
and storage_options is None
|
||||
and filesystem is None
|
||||
and Version(pyarrow.__version__) >= Version("5.0.0")
|
||||
):
|
||||
# Use the native pyarrow filesystem if possible.
|
||||
try:
|
||||
from pyarrow.fs import FileSystem
|
||||
|
||||
filesystem, path = FileSystem.from_uri(path)
|
||||
except Exception:
|
||||
# fallback to use get_handle / fsspec for filesystems
|
||||
# that pyarrow doesn't support
|
||||
pass
|
||||
|
||||
if _is_fsspec_url(path) and filesystem is None:
|
||||
fsspec = import_optional_dependency(
|
||||
"fsspec", extra="fsspec is requred for 'storage_options'."
|
||||
)
|
||||
filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
|
||||
|
||||
if filesystem is None and storage_options:
|
||||
raise ValueError(
|
||||
"Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
|
||||
)
|
||||
|
||||
return filesystem, path
|
||||
|
||||
|
||||
def _ensure_arrow_fs(filesystem):
|
||||
"""
|
||||
Simplified version of pyarrow.fs._ensure_filesystem. This is only needed
|
||||
below because `pyarrow.parquet.read_metadata` does not yet accept a
|
||||
filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719)
|
||||
"""
|
||||
from pyarrow import fs
|
||||
|
||||
if isinstance(filesystem, fs.FileSystem):
|
||||
return filesystem
|
||||
|
||||
# handle fsspec-compatible filesystems
|
||||
try:
|
||||
import fsspec
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(filesystem, fsspec.AbstractFileSystem):
|
||||
return fs.PyFileSystem(fs.FSSpecHandler(filesystem))
|
||||
|
||||
return filesystem
|
||||
|
||||
|
||||
def _validate_and_decode_metadata(metadata):
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
raise ValueError(
|
||||
"""Missing geo metadata in Parquet/Feather file.
|
||||
Use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
# check for malformed metadata
|
||||
try:
|
||||
decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
|
||||
except (TypeError, json.decoder.JSONDecodeError):
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
_validate_geo_metadata(decoded_geo_metadata)
|
||||
return decoded_geo_metadata
|
||||
|
||||
|
||||
def _read_parquet_schema_and_metadata(path, filesystem):
|
||||
"""
|
||||
Opening the Parquet file/dataset a first time to get the schema and metadata.
|
||||
|
||||
TODO: we should look into how we can reuse opened dataset for reading the
|
||||
actual data, to avoid discovering the dataset twice (problem right now is
|
||||
that the ParquetDataset interface doesn't allow passing the filters on read)
|
||||
|
||||
"""
|
||||
import pyarrow
|
||||
from pyarrow import parquet
|
||||
|
||||
kwargs = {}
|
||||
if Version(pyarrow.__version__) < Version("15.0.0"):
|
||||
kwargs = dict(use_legacy_dataset=False)
|
||||
|
||||
try:
|
||||
schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
|
||||
except Exception:
|
||||
schema = parquet.read_schema(path, filesystem=filesystem)
|
||||
|
||||
metadata = schema.metadata
|
||||
|
||||
# read metadata separately to get the raw Parquet FileMetaData metadata
|
||||
# (pyarrow doesn't properly exposes those in schema.metadata for files
|
||||
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
try:
|
||||
metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return schema, metadata
|
||||
|
||||
|
||||
def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
|
||||
"""
|
||||
Load a Parquet object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_parquet` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the GeoParquet metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
storage_options : dict, optional
|
||||
Extra options that make sense for a particular storage connection, e.g. host,
|
||||
port, username, password, etc. For HTTP(S) URLs the key-value pairs are
|
||||
forwarded to urllib as header options. For other URLs (e.g. starting with
|
||||
"s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
|
||||
see fsspec and urllib for more details.
|
||||
|
||||
When no storage options are provided and a filesystem is implemented by
|
||||
both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
|
||||
filesystem is preferred. Provide the instantiated fsspec filesystem using
|
||||
the ``filesystem`` keyword if you wish to use its implementation.
|
||||
bbox : tuple, optional
|
||||
Bounding box to be used to filter selection from geoparquet data. This
|
||||
is only usable if the data was saved with the bbox covering metadata.
|
||||
Input is of the tuple format (xmin, ymin, xmax, ymax).
|
||||
|
||||
**kwargs
|
||||
Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_parquet("data.parquet") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_parquet(
|
||||
... "data.parquet",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
# TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
|
||||
# adds filesystem as a keyword and match that.
|
||||
filesystem = kwargs.pop("filesystem", None)
|
||||
filesystem, path = _get_filesystem_path(
|
||||
path, filesystem=filesystem, storage_options=storage_options
|
||||
)
|
||||
path = _expand_user(path)
|
||||
schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
|
||||
|
||||
geo_metadata = _validate_and_decode_metadata(metadata)
|
||||
|
||||
bbox_filter = (
|
||||
_get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
|
||||
)
|
||||
|
||||
if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
|
||||
|
||||
# by default, bbox column is not read in, so must specify which
|
||||
# columns are read in if it exists.
|
||||
if not columns and if_bbox_column_exists:
|
||||
columns = _get_non_bbox_columns(schema, geo_metadata)
|
||||
|
||||
# if both bbox and filters kwargs are used, must splice together.
|
||||
if "filters" in kwargs:
|
||||
filters_kwarg = kwargs.pop("filters")
|
||||
filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
|
||||
else:
|
||||
filters = bbox_filter
|
||||
|
||||
kwargs["use_pandas_metadata"] = True
|
||||
|
||||
table = parquet.read_table(
|
||||
path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
|
||||
)
|
||||
|
||||
return _arrow_to_geopandas(table, geo_metadata)
|
||||
|
||||
|
||||
def _read_feather(path, columns=None, **kwargs):
|
||||
"""
|
||||
Load a Feather object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_feather` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the Feather metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
**kwargs
|
||||
Any additional kwargs passed to pyarrow.feather.read_table().
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_feather("data.feather") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_feather(
|
||||
... "data.feather",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
|
||||
table = feather.read_table(path, columns=columns, **kwargs)
|
||||
return _arrow_to_geopandas(table)
|
||||
|
||||
|
||||
def _get_parquet_bbox_filter(geo_metadata, bbox):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
|
||||
if _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
|
||||
|
||||
elif geo_metadata["columns"][primary_column]["encoding"] == "point":
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return (
|
||||
(pc.field((primary_column, "x")) >= bbox[0])
|
||||
& (pc.field((primary_column, "x")) <= bbox[2])
|
||||
& (pc.field((primary_column, "y")) >= bbox[1])
|
||||
& (pc.field((primary_column, "y")) <= bbox[3])
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Specifying 'bbox' not supported for this Parquet file (it should either "
|
||||
"have a bbox covering column or use 'point' encoding)."
|
||||
)
|
||||
|
||||
|
||||
def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return ~(
|
||||
(pc.field((bbox_column_name, "xmin")) > bbox[2])
|
||||
| (pc.field((bbox_column_name, "ymin")) > bbox[3])
|
||||
| (pc.field((bbox_column_name, "xmax")) < bbox[0])
|
||||
| (pc.field((bbox_column_name, "ymax")) < bbox[1])
|
||||
)
|
||||
|
||||
|
||||
def _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return "covering" in geo_metadata["columns"][primary_column].keys()
|
||||
|
||||
|
||||
def _get_bbox_encoding_column_name(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
|
||||
|
||||
|
||||
def _get_non_bbox_columns(schema, geo_metadata):
|
||||
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
columns = schema.names
|
||||
if bbox_column_name in columns:
|
||||
columns.remove(bbox_column_name)
|
||||
return columns
|
||||
|
||||
|
||||
def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
if bbox_filter is None:
|
||||
return kwarg_filters
|
||||
|
||||
filters_expression = parquet.filters_to_expression(kwarg_filters)
|
||||
return bbox_filter & filters_expression
|
||||
@@ -0,0 +1,851 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import urllib.request
|
||||
import warnings
|
||||
from io import IOBase
|
||||
from packaging.version import Version
|
||||
from pathlib import Path
|
||||
|
||||
# Adapted from pandas.io.common
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_netloc, uses_params, uses_relative
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_integer_dtype
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import mapping
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
|
||||
from geopandas.io.util import vsi_path
|
||||
|
||||
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
|
||||
_VALID_URLS.discard("")
|
||||
# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
|
||||
_VALID_URLS.discard("file")
|
||||
|
||||
fiona = None
|
||||
fiona_env = None
|
||||
fiona_import_error = None
|
||||
FIONA_GE_19 = False
|
||||
|
||||
|
||||
def _import_fiona():
|
||||
global fiona
|
||||
global fiona_env
|
||||
global fiona_import_error
|
||||
global FIONA_GE_19
|
||||
|
||||
if fiona is None:
|
||||
try:
|
||||
import fiona
|
||||
|
||||
# only try to import fiona.Env if the main fiona import succeeded
|
||||
# (otherwise you can get confusing "AttributeError: module 'fiona'
|
||||
# has no attribute '_loading'" / partially initialized module errors)
|
||||
try:
|
||||
from fiona import Env as fiona_env
|
||||
except ImportError:
|
||||
try:
|
||||
from fiona import drivers as fiona_env
|
||||
except ImportError:
|
||||
fiona_env = None
|
||||
|
||||
FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
|
||||
"1.9.0"
|
||||
)
|
||||
|
||||
except ImportError as err:
|
||||
fiona = False
|
||||
fiona_import_error = str(err)
|
||||
|
||||
|
||||
pyogrio = None
|
||||
pyogrio_import_error = None
|
||||
|
||||
|
||||
def _import_pyogrio():
|
||||
global pyogrio
|
||||
global pyogrio_import_error
|
||||
|
||||
if pyogrio is None:
|
||||
try:
|
||||
import pyogrio
|
||||
|
||||
except ImportError as err:
|
||||
pyogrio = False
|
||||
pyogrio_import_error = str(err)
|
||||
|
||||
|
||||
def _check_fiona(func):
|
||||
if not fiona:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'fiona' package, but it is not installed or does "
|
||||
f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_pyogrio(func):
|
||||
if not pyogrio:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'pyogrio' package, but it is not installed "
|
||||
"or does not import correctly."
|
||||
"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
|
||||
if metadata is None:
|
||||
return
|
||||
if driver != "GPKG":
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for the GPKG driver."
|
||||
)
|
||||
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for Fiona >= 1.9."
|
||||
)
|
||||
|
||||
|
||||
def _check_engine(engine, func):
|
||||
# if not specified through keyword or option, then default to "pyogrio" if
|
||||
# installed, otherwise try fiona
|
||||
if engine is None:
|
||||
import geopandas
|
||||
|
||||
engine = geopandas.options.io_engine
|
||||
|
||||
if engine is None:
|
||||
_import_pyogrio()
|
||||
if pyogrio:
|
||||
engine = "pyogrio"
|
||||
else:
|
||||
_import_fiona()
|
||||
if fiona:
|
||||
engine = "fiona"
|
||||
|
||||
if engine == "pyogrio":
|
||||
_import_pyogrio()
|
||||
_check_pyogrio(func)
|
||||
elif engine == "fiona":
|
||||
_import_fiona()
|
||||
_check_fiona(func)
|
||||
elif engine is None:
|
||||
raise ImportError(
|
||||
f"The {func} requires the 'pyogrio' or 'fiona' package, "
|
||||
"but neither is installed or imports correctly."
|
||||
f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
f"\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
_EXTENSION_TO_DRIVER = {
|
||||
".bna": "BNA",
|
||||
".dxf": "DXF",
|
||||
".csv": "CSV",
|
||||
".shp": "ESRI Shapefile",
|
||||
".dbf": "ESRI Shapefile",
|
||||
".json": "GeoJSON",
|
||||
".geojson": "GeoJSON",
|
||||
".geojsonl": "GeoJSONSeq",
|
||||
".geojsons": "GeoJSONSeq",
|
||||
".gpkg": "GPKG",
|
||||
".gml": "GML",
|
||||
".xml": "GML",
|
||||
".gpx": "GPX",
|
||||
".gtm": "GPSTrackMaker",
|
||||
".gtz": "GPSTrackMaker",
|
||||
".tab": "MapInfo File",
|
||||
".mif": "MapInfo File",
|
||||
".mid": "MapInfo File",
|
||||
".dgn": "DGN",
|
||||
".fgb": "FlatGeobuf",
|
||||
}
|
||||
|
||||
|
||||
def _expand_user(path):
|
||||
"""Expand paths that use ~."""
|
||||
if isinstance(path, str):
|
||||
path = os.path.expanduser(path)
|
||||
elif isinstance(path, Path):
|
||||
path = path.expanduser()
|
||||
return path
|
||||
|
||||
|
||||
def _is_url(url):
|
||||
"""Check to see if *url* has a valid protocol."""
|
||||
try:
|
||||
return parse_url(url).scheme in _VALID_URLS
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _read_file(
|
||||
filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame from a file or URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely
|
||||
geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries
|
||||
or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the
|
||||
dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of
|
||||
shapely geometry objects. Cannot be used with mask.
|
||||
mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter for features that intersect with the given dict-like geojson
|
||||
geometry, GeoSeries, GeoDataFrame or shapely geometry.
|
||||
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
|
||||
Cannot be used with bbox. If multiple geometries are passed, this will
|
||||
first union all geometries, which may be computationally expensive.
|
||||
columns : list, optional
|
||||
List of column names to import from the data source. Column names
|
||||
must exactly match the names in the data source. To avoid reading
|
||||
any columns (besides the geometry column), pass an empty list-like.
|
||||
By default reads all columns.
|
||||
rows : int or slice, default None
|
||||
Load in specific rows by passing an integer (first `n` rows) or a
|
||||
slice() object.
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "pyogrio" engine, the keyword arguments are passed to
|
||||
`pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
|
||||
arguments are passed to fiona.open`. For more information on possible
|
||||
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_file("nybb.shp") # doctest: +SKIP
|
||||
|
||||
Specifying layer of GPKG:
|
||||
|
||||
>>> df = geopandas.read_file("file.gpkg", layer='cities') # doctest: +SKIP
|
||||
|
||||
Reading only first 10 rows:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", rows=10) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``mask``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", mask=polygon) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``bbox``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20)) # doctest: +SKIP
|
||||
|
||||
Returns
|
||||
-------
|
||||
:obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
|
||||
If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
|
||||
When specifying a URL, geopandas will check if the server supports reading
|
||||
partial data and in that case pass the URL as is to the underlying engine,
|
||||
which will then use the network file system handler of GDAL to read from
|
||||
the URL. Otherwise geopandas will download the data from the URL and pass
|
||||
all data in-memory to the underlying engine.
|
||||
If you need more control over how the URL is read, you can specify the
|
||||
GDAL virtual filesystem manually (e.g. ``/vsicurl/https://...``). See the
|
||||
GDAL documentation on filesystems for more details
|
||||
(https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
|
||||
|
||||
"""
|
||||
engine = _check_engine(engine, "'read_file' function")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
from_bytes = False
|
||||
if _is_url(filename):
|
||||
# if it is a url that supports random access -> pass through to
|
||||
# pyogrio/fiona as is (to support downloading only part of the file)
|
||||
# otherwise still download manually because pyogrio/fiona don't support
|
||||
# all types of urls (https://github.com/geopandas/geopandas/issues/2908)
|
||||
with urllib.request.urlopen(filename) as response:
|
||||
if not response.headers.get("Accept-Ranges") == "bytes":
|
||||
filename = response.read()
|
||||
from_bytes = True
|
||||
|
||||
if engine == "pyogrio":
|
||||
return _read_file_pyogrio(
|
||||
filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
|
||||
)
|
||||
|
||||
elif engine == "fiona":
|
||||
if pd.api.types.is_file_like(filename):
|
||||
data = filename.read()
|
||||
path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data
|
||||
from_bytes = True
|
||||
else:
|
||||
path_or_bytes = filename
|
||||
|
||||
return _read_file_fiona(
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=bbox,
|
||||
mask=mask,
|
||||
columns=columns,
|
||||
rows=rows,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _read_file_fiona(
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=None,
|
||||
mask=None,
|
||||
columns=None,
|
||||
rows=None,
|
||||
where=None,
|
||||
**kwargs,
|
||||
):
|
||||
if where is not None and not FIONA_GE_19:
|
||||
raise NotImplementedError("where requires fiona 1.9+")
|
||||
|
||||
if columns is not None:
|
||||
if "include_fields" in kwargs:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'include_fields' and 'columns' keywords"
|
||||
)
|
||||
if not FIONA_GE_19:
|
||||
raise NotImplementedError("'columns' keyword requires fiona 1.9+")
|
||||
kwargs["include_fields"] = columns
|
||||
elif "include_fields" in kwargs:
|
||||
# alias to columns, as this variable is used below to specify column order
|
||||
# in the dataframe creation
|
||||
columns = kwargs["include_fields"]
|
||||
|
||||
if not from_bytes:
|
||||
# Opening a file via URL or file-like-object above automatically detects a
|
||||
# zipped file. In order to match that behavior, attempt to add a zip scheme
|
||||
# if missing.
|
||||
path_or_bytes = vsi_path(str(path_or_bytes))
|
||||
|
||||
if from_bytes:
|
||||
reader = fiona.BytesCollection
|
||||
else:
|
||||
reader = fiona.open
|
||||
|
||||
with fiona_env():
|
||||
with reader(path_or_bytes, **kwargs) as features:
|
||||
crs = features.crs_wkt
|
||||
# attempt to get EPSG code
|
||||
try:
|
||||
# fiona 1.9+
|
||||
epsg = features.crs.to_epsg(confidence_threshold=100)
|
||||
if epsg is not None:
|
||||
crs = epsg
|
||||
except AttributeError:
|
||||
# fiona <= 1.8
|
||||
try:
|
||||
crs = features.crs["init"]
|
||||
except (TypeError, KeyError):
|
||||
pass
|
||||
|
||||
# handle loading the bounding box
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
assert len(bbox) == 4
|
||||
# handle loading the mask
|
||||
elif isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
mask = mapping(mask.to_crs(crs).union_all())
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = mapping(mask)
|
||||
|
||||
filters = {}
|
||||
if bbox is not None:
|
||||
filters["bbox"] = bbox
|
||||
if mask is not None:
|
||||
filters["mask"] = mask
|
||||
if where is not None:
|
||||
filters["where"] = where
|
||||
|
||||
# setup the data loading filter
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
rows = slice(rows)
|
||||
elif not isinstance(rows, slice):
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
f_filt = features.filter(rows.start, rows.stop, rows.step, **filters)
|
||||
elif filters:
|
||||
f_filt = features.filter(**filters)
|
||||
else:
|
||||
f_filt = features
|
||||
# get list of columns
|
||||
columns = columns or list(features.schema["properties"])
|
||||
datetime_fields = [
|
||||
k for (k, v) in features.schema["properties"].items() if v == "datetime"
|
||||
]
|
||||
if (
|
||||
kwargs.get("ignore_geometry", False)
|
||||
or features.schema["geometry"] == "None"
|
||||
):
|
||||
df = pd.DataFrame(
|
||||
[record["properties"] for record in f_filt], columns=columns
|
||||
)
|
||||
else:
|
||||
df = GeoDataFrame.from_features(
|
||||
f_filt, crs=crs, columns=columns + ["geometry"]
|
||||
)
|
||||
for k in datetime_fields:
|
||||
as_dt = None
|
||||
# plain try catch for when pandas will raise in the future
|
||||
# TODO we can tighten the exception type in future when it does
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
# pandas 2.x does not yet enforce this behaviour but raises a
|
||||
# warning -> we want to to suppress this warning for our users,
|
||||
# and do this by turning it into an error so we take the
|
||||
# `except` code path to try again with utc=True
|
||||
warnings.filterwarnings(
|
||||
"error",
|
||||
"In a future version of pandas, parsing datetimes with "
|
||||
"mixed time zones will raise an error",
|
||||
FutureWarning,
|
||||
)
|
||||
as_dt = pd.to_datetime(df[k])
|
||||
except Exception:
|
||||
pass
|
||||
if as_dt is None or as_dt.dtype == "object":
|
||||
# if to_datetime failed, try again for mixed timezone offsets
|
||||
# This can still fail if there are invalid datetimes
|
||||
try:
|
||||
as_dt = pd.to_datetime(df[k], utc=True)
|
||||
except Exception:
|
||||
pass
|
||||
# if to_datetime succeeded, round datetimes as
|
||||
# fiona only supports up to ms precision (any microseconds are
|
||||
# floating point rounding error)
|
||||
if as_dt is not None and not (as_dt.dtype == "object"):
|
||||
if PANDAS_GE_20:
|
||||
df[k] = as_dt.dt.as_unit("ms")
|
||||
else:
|
||||
df[k] = as_dt.dt.round(freq="ms")
|
||||
return df
|
||||
|
||||
|
||||
def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
kwargs["max_features"] = rows
|
||||
elif isinstance(rows, slice):
|
||||
if rows.start is not None:
|
||||
if rows.start < 0:
|
||||
raise ValueError(
|
||||
"Negative slice start not supported with the 'pyogrio' engine."
|
||||
)
|
||||
kwargs["skip_features"] = rows.start
|
||||
if rows.stop is not None:
|
||||
kwargs["max_features"] = rows.stop - (rows.start or 0)
|
||||
if rows.step is not None:
|
||||
raise ValueError("slice with step is not supported")
|
||||
else:
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
|
||||
if bbox is not None and mask is not None:
|
||||
# match error message from Fiona
|
||||
raise ValueError("mask and bbox can not be set together")
|
||||
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
if len(bbox) != 4:
|
||||
raise ValueError("'bbox' should be a length-4 tuple.")
|
||||
|
||||
if mask is not None:
|
||||
# NOTE: mask cannot be used at same time as bbox keyword
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = shapely.unary_union(mask)
|
||||
elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
|
||||
# convert GeoJSON to shapely geometry
|
||||
mask = shapely.geometry.shape(mask)
|
||||
|
||||
kwargs["mask"] = mask
|
||||
|
||||
if kwargs.pop("ignore_geometry", False):
|
||||
kwargs["read_geometry"] = False
|
||||
|
||||
# translate `ignore_fields`/`include_fields` keyword for back compat with fiona
|
||||
if "ignore_fields" in kwargs and "include_fields" in kwargs:
|
||||
raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
|
||||
elif "ignore_fields" in kwargs:
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'ignore_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
ignore_fields = kwargs.pop("ignore_fields")
|
||||
fields = pyogrio.read_info(path_or_bytes)["fields"]
|
||||
include_fields = [col for col in fields if col not in ignore_fields]
|
||||
kwargs["columns"] = include_fields
|
||||
elif "include_fields" in kwargs:
|
||||
# translate `include_fields` keyword for back compat with fiona engine
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'include_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
kwargs["columns"] = kwargs.pop("include_fields")
|
||||
|
||||
return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
|
||||
|
||||
|
||||
def _detect_driver(path):
|
||||
"""
|
||||
Attempt to auto-detect driver based on the extension
|
||||
"""
|
||||
try:
|
||||
# in case the path is a file handle
|
||||
path = path.name
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
return _EXTENSION_TO_DRIVER[Path(path).suffix.lower()]
|
||||
except KeyError:
|
||||
# Assume it is a shapefile folder for now. In the future,
|
||||
# will likely raise an exception when the expected
|
||||
# folder writing behavior is more clearly defined.
|
||||
return "ESRI Shapefile"
|
||||
|
||||
|
||||
def _to_file(
|
||||
df,
|
||||
filename,
|
||||
driver=None,
|
||||
schema=None,
|
||||
index=None,
|
||||
mode="w",
|
||||
crs=None,
|
||||
engine=None,
|
||||
metadata=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write this GeoDataFrame to an OGR data source
|
||||
|
||||
A dictionary of supported OGR providers is available via:
|
||||
|
||||
>>> import pyogrio
|
||||
>>> pyogrio.list_drivers() # doctest: +SKIP
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame to be written
|
||||
filename : string
|
||||
File path or file handle to write to. The path may specify a
|
||||
GDAL VSI scheme.
|
||||
driver : string, default None
|
||||
The OGR format driver used to write the vector file.
|
||||
If not specified, it attempts to infer it from the file extension.
|
||||
If no extension is specified, it saves ESRI Shapefile to a folder.
|
||||
schema : dict, default None
|
||||
If specified, the schema dictionary is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the schema based on each column's dtype.
|
||||
Not supported for the "pyogrio" engine.
|
||||
index : bool, default None
|
||||
If True, write index into one or more columns (for MultiIndex).
|
||||
Default None writes the index into one or more columns only if
|
||||
the index is named, is a MultiIndex, or has a non-integer data
|
||||
type. If False, no index is written.
|
||||
|
||||
.. versionadded:: 0.7
|
||||
Previously the index was not written.
|
||||
mode : string, default 'w'
|
||||
The write mode, 'w' to overwrite the existing file and 'a' to append;
|
||||
when using the pyogrio engine, you can also pass ``append=True``.
|
||||
Not all drivers support appending. For the fiona engine, the drivers
|
||||
that support appending are listed in fiona.supported_drivers or
|
||||
https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py.
|
||||
For the pyogrio engine, you should be able to use any driver that
|
||||
is available in your installation of GDAL that supports append
|
||||
capability; see the specific driver entry at
|
||||
https://gdal.org/drivers/vector/index.html for more information.
|
||||
crs : pyproj.CRS, default None
|
||||
If specified, the CRS is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the crs based on crs df attribute.
|
||||
The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
metadata : dict[str, str], default None
|
||||
Optional metadata to be stored in the file. Keys and values must be
|
||||
strings. Only supported for the "GPKG" driver
|
||||
(requires Fiona >= 1.9 or pyogrio >= 0.6).
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "fiona" engine, the keyword arguments are passed to
|
||||
fiona.open`. For more information on possible keywords, type:
|
||||
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
|
||||
the keyword arguments are passed to `pyogrio.write_dataframe`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
"""
|
||||
engine = _check_engine(engine, "'to_file' method")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
if index is None:
|
||||
# Determine if index attribute(s) should be saved to file
|
||||
# (only if they are named or are non-integer)
|
||||
index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype)
|
||||
if index:
|
||||
df = df.reset_index(drop=False)
|
||||
|
||||
if driver is None:
|
||||
driver = _detect_driver(filename)
|
||||
|
||||
if driver == "ESRI Shapefile" and any(len(c) > 10 for c in df.columns.tolist()):
|
||||
warnings.warn(
|
||||
"Column names longer than 10 characters will be truncated when saved to "
|
||||
"ESRI Shapefile.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
if (df.dtypes == "geometry").sum() > 1:
|
||||
raise ValueError(
|
||||
"GeoDataFrame contains multiple geometry columns but GeoDataFrame.to_file "
|
||||
"supports only a single geometry column. Use a GeoDataFrame.to_parquet or "
|
||||
"GeoDataFrame.to_feather, drop additional geometry columns or convert them "
|
||||
"to a supported format like a well-known text (WKT) using "
|
||||
"`GeoSeries.to_wkt()`.",
|
||||
)
|
||||
_check_metadata_supported(metadata, engine, driver)
|
||||
|
||||
if mode not in ("w", "a"):
|
||||
raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
|
||||
|
||||
if engine == "pyogrio":
|
||||
_to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
elif engine == "fiona":
|
||||
_to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
if not HAS_PYPROJ and crs:
|
||||
raise ImportError(
|
||||
"The 'pyproj' package is required to write a file with a CRS, but it is not"
|
||||
" installed or does not import correctly."
|
||||
)
|
||||
|
||||
if schema is None:
|
||||
schema = infer_schema(df)
|
||||
|
||||
if crs:
|
||||
from pyproj import CRS
|
||||
|
||||
crs = CRS.from_user_input(crs)
|
||||
else:
|
||||
crs = df.crs
|
||||
|
||||
with fiona_env():
|
||||
crs_wkt = None
|
||||
try:
|
||||
gdal_version = Version(
|
||||
fiona.env.get_gdal_release_name().strip("e")
|
||||
) # GH3147
|
||||
except (AttributeError, ValueError):
|
||||
gdal_version = Version("2.0.0") # just assume it is not the latest
|
||||
if gdal_version >= Version("3.0.0") and crs:
|
||||
crs_wkt = crs.to_wkt()
|
||||
elif crs:
|
||||
crs_wkt = crs.to_wkt("WKT1_GDAL")
|
||||
with fiona.open(
|
||||
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
|
||||
) as colxn:
|
||||
if metadata is not None:
|
||||
colxn.update_tags(metadata)
|
||||
colxn.writerecords(df.iterfeatures())
|
||||
|
||||
|
||||
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if schema is not None:
|
||||
raise ValueError(
|
||||
"The 'schema' argument is not supported with the 'pyogrio' engine."
|
||||
)
|
||||
|
||||
if mode == "a":
|
||||
kwargs["append"] = True
|
||||
|
||||
if crs is not None:
|
||||
raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
|
||||
|
||||
# for the fiona engine, this check is done in gdf.iterfeatures()
|
||||
if not df.columns.is_unique:
|
||||
raise ValueError("GeoDataFrame cannot contain duplicated column names.")
|
||||
|
||||
pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
|
||||
|
||||
|
||||
def infer_schema(df):
|
||||
from collections import OrderedDict
|
||||
|
||||
# TODO: test pandas string type and boolean type once released
|
||||
types = {
|
||||
"Int32": "int32",
|
||||
"int32": "int32",
|
||||
"Int64": "int",
|
||||
"string": "str",
|
||||
"boolean": "bool",
|
||||
}
|
||||
|
||||
def convert_type(column, in_type):
|
||||
if in_type == object:
|
||||
return "str"
|
||||
if in_type.name.startswith("datetime64"):
|
||||
# numpy datetime type regardless of frequency
|
||||
return "datetime"
|
||||
if str(in_type) in types:
|
||||
out_type = types[str(in_type)]
|
||||
else:
|
||||
out_type = type(np.zeros(1, in_type).item()).__name__
|
||||
if out_type == "long":
|
||||
out_type = "int"
|
||||
return out_type
|
||||
|
||||
properties = OrderedDict(
|
||||
[
|
||||
(col, convert_type(col, _type))
|
||||
for col, _type in zip(df.columns, df.dtypes)
|
||||
if col != df._geometry_column_name
|
||||
]
|
||||
)
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"You are attempting to write an empty DataFrame to file. "
|
||||
"For some drivers, this operation may fail.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
# Since https://github.com/Toblerity/Fiona/issues/446 resolution,
|
||||
# Fiona allows a list of geometry types
|
||||
geom_types = _geometry_types(df)
|
||||
|
||||
schema = {"geometry": geom_types, "properties": properties}
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def _geometry_types(df):
|
||||
"""
|
||||
Determine the geometry types in the GeoDataFrame for the schema.
|
||||
"""
|
||||
geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
|
||||
geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
|
||||
geom_types = geom_types_3D + geom_types_2D
|
||||
|
||||
if len(geom_types) == 0:
|
||||
# Default geometry type supported by Fiona
|
||||
# (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
|
||||
return "Unknown"
|
||||
|
||||
if len(geom_types) == 1:
|
||||
geom_types = geom_types[0]
|
||||
|
||||
return geom_types
|
||||
|
||||
|
||||
def _list_layers(filename) -> pd.DataFrame:
|
||||
"""List layers available in a file.
|
||||
|
||||
Provides an overview of layers available in a file or URL together with their
|
||||
geometry types. When supported by the data source, this includes both spatial and
|
||||
non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
|
||||
column being ``None``. GeoPandas will not read such layers but they can be read into
|
||||
a pd.DataFrame using :func:`pyogrio.read_dataframe`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.DataFrame
|
||||
A DataFrame with columns "name" and "geometry_type" and one row per layer.
|
||||
"""
|
||||
_import_pyogrio()
|
||||
_check_pyogrio("list_layers")
|
||||
|
||||
import pyogrio
|
||||
|
||||
return pd.DataFrame(
|
||||
pyogrio.list_layers(filename), columns=["name", "geometry_type"]
|
||||
)
|
||||
@@ -0,0 +1,473 @@
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from functools import lru_cache
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.wkb
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(conn_or_engine):
|
||||
"""
|
||||
Yield a connection within a transaction context.
|
||||
|
||||
Engine.begin() returns a Connection with an implicit Transaction while
|
||||
Connection.begin() returns the Transaction. This helper will always return a
|
||||
Connection with an implicit (possibly nested) Transaction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
conn_or_engine : Connection or Engine
|
||||
A sqlalchemy Connection or Engine instance
|
||||
Returns
|
||||
-------
|
||||
Connection
|
||||
"""
|
||||
from sqlalchemy.engine.base import Connection, Engine
|
||||
|
||||
if isinstance(conn_or_engine, Connection):
|
||||
if not conn_or_engine.in_transaction():
|
||||
with conn_or_engine.begin():
|
||||
yield conn_or_engine
|
||||
else:
|
||||
yield conn_or_engine
|
||||
elif isinstance(conn_or_engine, Engine):
|
||||
with conn_or_engine.begin() as conn:
|
||||
yield conn
|
||||
else:
|
||||
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
|
||||
|
||||
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
|
||||
"""
|
||||
Transforms a pandas DataFrame into a GeoDataFrame.
|
||||
The column 'geom_col' must be a geometry column in WKB representation.
|
||||
To be used to convert df based on pd.read_sql to gdf.
|
||||
Parameters
|
||||
----------
|
||||
df : DataFrame
|
||||
pandas DataFrame with geometry column in WKB representation.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : pyproj.CRS, optional
|
||||
CRS to use for the returned GeoDataFrame. The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
If not set, tries to determine CRS from the SRID associated with the
|
||||
first geometry in the database, and assigns that to all geometries.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
"""
|
||||
|
||||
if geom_col not in df:
|
||||
raise ValueError("Query missing geometry column '{}'".format(geom_col))
|
||||
|
||||
if df.columns.to_list().count(geom_col) > 1:
|
||||
raise ValueError(
|
||||
f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
|
||||
"one geometry column is allowed."
|
||||
)
|
||||
|
||||
geoms = df[geom_col].dropna()
|
||||
|
||||
if not geoms.empty:
|
||||
load_geom_bytes = shapely.wkb.loads
|
||||
"""Load from Python 3 binary."""
|
||||
|
||||
def load_geom_text(x):
|
||||
"""Load from binary encoded as text."""
|
||||
return shapely.wkb.loads(str(x), hex=True)
|
||||
|
||||
if isinstance(geoms.iat[0], bytes):
|
||||
load_geom = load_geom_bytes
|
||||
else:
|
||||
load_geom = load_geom_text
|
||||
|
||||
df[geom_col] = geoms = geoms.apply(load_geom)
|
||||
if crs is None:
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
# if no defined SRID in geodatabase, returns SRID of 0
|
||||
if srid != 0:
|
||||
try:
|
||||
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
|
||||
except pd.errors.DatabaseError:
|
||||
warning_msg = (
|
||||
f"Could not find the spatial reference system table "
|
||||
f"(spatial_ref_sys) in PostGIS."
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
else:
|
||||
if not spatial_ref_sys_df.empty:
|
||||
auth_name = spatial_ref_sys_df["auth_name"].item()
|
||||
crs = f"{auth_name}:{srid}"
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Could not find srid {srid} in the "
|
||||
f"spatial_ref_sys table. "
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
|
||||
return GeoDataFrame(df, crs=crs, geometry=geom_col)
|
||||
|
||||
|
||||
def _read_postgis(
|
||||
sql,
|
||||
con,
|
||||
geom_col="geom",
|
||||
crs=None,
|
||||
index_col=None,
|
||||
coerce_float=True,
|
||||
parse_dates=None,
|
||||
params=None,
|
||||
chunksize=None,
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame corresponding to the result of the query
|
||||
string, which must contain a geometry column in WKB representation.
|
||||
|
||||
It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
|
||||
Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sql : string
|
||||
SQL query to execute in selecting entries from database, or name
|
||||
of the table to read from the database.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : dict or str, optional
|
||||
CRS to use for the returned GeoDataFrame; if not set, tries to
|
||||
determine CRS from the SRID associated with the first geometry in
|
||||
the database, and assigns that to all geometries.
|
||||
chunksize : int, default None
|
||||
If specified, return an iterator where chunksize is the number of rows to
|
||||
include in each chunk.
|
||||
|
||||
See the documentation for pandas.read_sql for further explanation
|
||||
of the following parameters:
|
||||
index_col, coerce_float, parse_dates, params, chunksize
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
PostGIS
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
|
||||
>>> con = create_engine(db_connection_url) # doctest: +SKIP
|
||||
>>> sql = "SELECT geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
|
||||
SpatiaLite
|
||||
|
||||
>>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if chunksize is None:
|
||||
# read all in one chunk and return a single GeoDataFrame
|
||||
df = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
|
||||
|
||||
else:
|
||||
# read data in chunks and return a generator
|
||||
df_generator = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return (
|
||||
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
|
||||
)
|
||||
|
||||
|
||||
def _get_geometry_type(gdf):
|
||||
"""
|
||||
Get basic geometry type of a GeoDataFrame. See more info from:
|
||||
https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
|
||||
|
||||
Following rules apply:
|
||||
- if geometries all share the same geometry-type,
|
||||
geometries are inserted with the given GeometryType with following types:
|
||||
- Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
|
||||
GeometryCollection.
|
||||
- LinearRing geometries will be converted into LineString -objects.
|
||||
- in all other cases, geometries will be inserted with type GEOMETRY:
|
||||
- a mix of Polygons and MultiPolygons in GeoSeries
|
||||
- a mix of Points and LineStrings in GeoSeries
|
||||
- geometry is of type GeometryCollection,
|
||||
such as GeometryCollection([Point, LineStrings])
|
||||
- if any of the geometries has Z-coordinate, all records will
|
||||
be written with 3D.
|
||||
"""
|
||||
geom_types = list(gdf.geometry.geom_type.unique())
|
||||
has_curve = False
|
||||
|
||||
for gt in geom_types:
|
||||
if gt is None:
|
||||
continue
|
||||
elif "LinearRing" in gt:
|
||||
has_curve = True
|
||||
|
||||
if len(geom_types) == 1:
|
||||
if has_curve:
|
||||
target_geom_type = "LINESTRING"
|
||||
else:
|
||||
if geom_types[0] is None:
|
||||
raise ValueError("No valid geometries in the data.")
|
||||
else:
|
||||
target_geom_type = geom_types[0].upper()
|
||||
else:
|
||||
target_geom_type = "GEOMETRY"
|
||||
|
||||
# Check for 3D-coordinates
|
||||
if any(gdf.geometry.has_z):
|
||||
target_geom_type += "Z"
|
||||
|
||||
return target_geom_type, has_curve
|
||||
|
||||
|
||||
def _get_srid_from_crs(gdf):
|
||||
"""
|
||||
Get EPSG code from CRS if available. If not, return 0.
|
||||
"""
|
||||
|
||||
# Use geoalchemy2 default for srid
|
||||
# Note: undefined srid in PostGIS is 0
|
||||
srid = None
|
||||
warning_msg = (
|
||||
"Could not parse CRS from the GeoDataFrame. "
|
||||
"Inserting data without defined CRS."
|
||||
)
|
||||
if gdf.crs is not None:
|
||||
try:
|
||||
for confidence in (100, 70, 25):
|
||||
srid = gdf.crs.to_epsg(min_confidence=confidence)
|
||||
if srid is not None:
|
||||
break
|
||||
auth_srid = gdf.crs.to_authority(
|
||||
auth_name="ESRI", min_confidence=confidence
|
||||
)
|
||||
if auth_srid is not None:
|
||||
srid = int(auth_srid[1])
|
||||
break
|
||||
except Exception:
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
if srid is None:
|
||||
srid = 0
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
return srid
|
||||
|
||||
|
||||
def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
from shapely.geometry import LineString
|
||||
|
||||
# Todo: Use shapely function once it's implemented:
|
||||
# https://github.com/shapely/shapely/issues/1617
|
||||
|
||||
mask = gdf.geom_type == "LinearRing"
|
||||
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
|
||||
lambda geom: LineString(geom)
|
||||
)
|
||||
return gdf
|
||||
|
||||
|
||||
def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
"""Convert geometries to ewkb."""
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
# The gdf will warn that the geometry column doesn't hold in-memory geometries
|
||||
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
|
||||
# the user that the dtypes are unexpected.
|
||||
df = pd.DataFrame(gdf, copy=False)
|
||||
df[geom_name] = geoms
|
||||
return df
|
||||
|
||||
|
||||
def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
import csv
|
||||
import io
|
||||
|
||||
s_buf = io.StringIO()
|
||||
writer = csv.writer(s_buf)
|
||||
writer.writerows(data_iter)
|
||||
s_buf.seek(0)
|
||||
|
||||
columns = ", ".join('"{}"'.format(k) for k in keys)
|
||||
|
||||
dbapi_conn = conn.connection
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
with dbapi_conn.cursor() as cur:
|
||||
# Use psycopg method if it's available
|
||||
if hasattr(cur, "copy") and callable(cur.copy):
|
||||
with cur.copy(sql) as copy:
|
||||
copy.write(s_buf.read())
|
||||
else: # otherwise use psycopg2 method
|
||||
cur.copy_expert(sql, s_buf)
|
||||
|
||||
|
||||
def _write_postgis(
|
||||
gdf,
|
||||
name,
|
||||
con,
|
||||
schema=None,
|
||||
if_exists="fail",
|
||||
index=False,
|
||||
index_label=None,
|
||||
chunksize=None,
|
||||
dtype=None,
|
||||
):
|
||||
"""
|
||||
Upload GeoDataFrame into PostGIS database.
|
||||
|
||||
This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
|
||||
Python driver (e.g. psycopg2) to be installed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the target table.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the PostGIS database.
|
||||
if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||
How to behave if the table already exists:
|
||||
|
||||
- fail: Raise a ValueError.
|
||||
- replace: Drop the table before inserting new values.
|
||||
- append: Insert new values to the existing table.
|
||||
schema : string, optional
|
||||
Specify the schema. If None, use default schema: 'public'.
|
||||
index : bool, default True
|
||||
Write DataFrame index as a column.
|
||||
Uses *index_label* as the column name in the table.
|
||||
index_label : string or sequence, default None
|
||||
Column label for index column(s).
|
||||
If None is given (default) and index is True,
|
||||
then the index names are used.
|
||||
chunksize : int, optional
|
||||
Rows will be written in batches of this size at a time.
|
||||
By default, all rows will be written at once.
|
||||
dtype : dict of column name to SQL type, default None
|
||||
Specifying the datatype for columns.
|
||||
The keys should be the column names and the values
|
||||
should be the SQLAlchemy types.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
|
||||
/mydatabase";) # doctest: +SKIP
|
||||
>>> gdf.to_postgis("my_table", engine) # doctest: +SKIP
|
||||
"""
|
||||
try:
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
raise ImportError("'to_postgis()' requires geoalchemy2 package.")
|
||||
|
||||
gdf = gdf.copy()
|
||||
geom_name = gdf.geometry.name
|
||||
|
||||
# Get srid
|
||||
srid = _get_srid_from_crs(gdf)
|
||||
|
||||
# Get geometry type and info whether data contains LinearRing.
|
||||
geometry_type, has_curve = _get_geometry_type(gdf)
|
||||
|
||||
# Build dtype with Geometry
|
||||
if dtype is not None:
|
||||
dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
|
||||
else:
|
||||
dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
|
||||
|
||||
# Convert LinearRing geometries to LineString
|
||||
if has_curve:
|
||||
gdf = _convert_linearring_to_linestring(gdf, geom_name)
|
||||
|
||||
# Convert geometries to EWKB
|
||||
gdf = _convert_to_ewkb(gdf, geom_name, srid)
|
||||
|
||||
if schema is not None:
|
||||
schema_name = schema
|
||||
else:
|
||||
schema_name = "public"
|
||||
|
||||
if if_exists == "append":
|
||||
# Check that the geometry srid matches with the current GeoDataFrame
|
||||
with _get_conn(con) as connection:
|
||||
# Only check SRID if table exists
|
||||
if connection.dialect.has_table(connection, name, schema):
|
||||
target_srid = connection.execute(
|
||||
text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema=schema_name, table=name, geom_col=geom_name
|
||||
)
|
||||
)
|
||||
).fetchone()[0]
|
||||
|
||||
if target_srid != srid:
|
||||
msg = (
|
||||
"The CRS of the target table (EPSG:{epsg_t}) differs from the "
|
||||
"CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
|
||||
epsg_t=target_srid, epsg_src=srid
|
||||
)
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
with _get_conn(con) as connection:
|
||||
gdf.to_sql(
|
||||
name,
|
||||
connection,
|
||||
schema=schema_name,
|
||||
if_exists=if_exists,
|
||||
index=index,
|
||||
index_label=index_label,
|
||||
chunksize=chunksize,
|
||||
dtype=dtype,
|
||||
method=_psql_insert_copy,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _get_spatial_ref_sys_df(con, srid):
|
||||
spatial_ref_sys_sql = (
|
||||
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
|
||||
)
|
||||
return pd.read_sql(spatial_ref_sys_sql, con)
|
||||
+100
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Script to create the data and write legacy storage (pickle) files.
|
||||
|
||||
Based on pandas' generate_legacy_storage_files.py script.
|
||||
|
||||
To use this script, create an environment for which you want to
|
||||
generate pickles, activate the environment, and run this script as:
|
||||
|
||||
$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
|
||||
geopandas/geopandas/io/tests/data/pickle/ pickle
|
||||
|
||||
This script generates a storage file for the current arch, system,
|
||||
|
||||
The idea here is you are using the *current* version of the
|
||||
generate_legacy_storage_files with an *older* version of geopandas to
|
||||
generate a pickle file. We will then check this file into a current
|
||||
branch, and test using test_pickle.py. This will load the *older*
|
||||
pickles and test versus the current data that is generated
|
||||
(with master). These are then compared.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
import geopandas
|
||||
|
||||
|
||||
def create_pickle_data():
|
||||
"""create the pickle data"""
|
||||
|
||||
# custom geometry column name
|
||||
gdf_the_geom = geopandas.GeoDataFrame(
|
||||
{"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
geometry="the_geom",
|
||||
)
|
||||
|
||||
# with crs
|
||||
gdf_crs = geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
|
||||
|
||||
|
||||
def platform_name():
|
||||
return "_".join(
|
||||
[
|
||||
str(geopandas.__version__),
|
||||
"pd-" + str(pd.__version__),
|
||||
"py-" + str(platform.python_version()),
|
||||
str(platform.machine()),
|
||||
str(platform.system().lower()),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_legacy_pickles(output_dir):
|
||||
print(
|
||||
"This script generates a storage file for the current arch, system, "
|
||||
"and python version"
|
||||
)
|
||||
print("geopandas version: {}").format(geopandas.__version__)
|
||||
print(" output dir : {}".format(output_dir))
|
||||
print(" storage format: pickle")
|
||||
|
||||
pth = "{}.pickle".format(platform_name())
|
||||
|
||||
fh = open(os.path.join(output_dir, pth), "wb")
|
||||
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
|
||||
fh.close()
|
||||
|
||||
print("created pickle file: {}".format(pth))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit(
|
||||
"Specify output directory and storage type: generate_legacy_"
|
||||
"storage_files.py <output_dir> <storage_type> "
|
||||
)
|
||||
|
||||
output_dir = str(sys.argv[1])
|
||||
storage_type = str(sys.argv[2])
|
||||
|
||||
if storage_type == "pickle":
|
||||
write_legacy_pickles(output_dir=output_dir)
|
||||
else:
|
||||
sys.exit("storage_type must be one of {'pickle'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+328
@@ -0,0 +1,328 @@
|
||||
import os
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from .test_file import FIONA_MARK, PYOGRIO_MARK
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST TOOLING
|
||||
|
||||
|
||||
class _ExpectedError:
|
||||
def __init__(self, error_type, error_message_match):
|
||||
self.type = error_type
|
||||
self.match = error_message_match
|
||||
|
||||
|
||||
class _ExpectedErrorBuilder:
|
||||
def __init__(self, composite_key):
|
||||
self.composite_key = composite_key
|
||||
|
||||
def to_raise(self, error_type, error_match):
|
||||
_expected_exceptions[self.composite_key] = _ExpectedError(
|
||||
error_type, error_match
|
||||
)
|
||||
|
||||
|
||||
def _expect_writing(gdf, ogr_driver):
|
||||
return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
|
||||
|
||||
|
||||
def _composite_key(gdf, ogr_driver):
|
||||
return frozenset([id(gdf), ogr_driver])
|
||||
|
||||
|
||||
def _expected_error_on(gdf, ogr_driver):
|
||||
composite_key = _composite_key(gdf, ogr_driver)
|
||||
return _expected_exceptions.get(composite_key, None)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST CASES
|
||||
_geodataframes_to_write = []
|
||||
_expected_exceptions = {}
|
||||
_CRS = "epsg:4326"
|
||||
|
||||
# ------------------
|
||||
# gdf with Points
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPoint([city_hall_balcony, city_hall_council_chamber]),
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Points and MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
|
||||
# Polygon/MultiPolygon but does not mention Point/MultiPoint
|
||||
# see https://www.gdal.org/drv_shapefile.html
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings and MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygons
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygon and MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and 3D Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometries only
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with all shape types mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with all 2D shape types and 3D Point mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6, 7]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
|
||||
@pytest.fixture(params=_geodataframes_to_write)
|
||||
def geodataframe(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
("GeoJSON", ".geojson"),
|
||||
("ESRI Shapefile", ".shp"),
|
||||
("GPKG", ".gpkg"),
|
||||
("SQLite", ".sqlite"),
|
||||
]
|
||||
)
|
||||
def ogr_driver(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("fiona", marks=FIONA_MARK),
|
||||
pytest.param("pyogrio", marks=PYOGRIO_MARK),
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
driver, ext = ogr_driver
|
||||
output_file = os.path.join(str(tmpdir), "output_file" + ext)
|
||||
write_kwargs = {}
|
||||
if driver == "SQLite":
|
||||
write_kwargs["spatialite"] = True
|
||||
|
||||
# This if statement can be removed once minimal fiona version >= 1.8.20
|
||||
if engine == "fiona":
|
||||
from packaging.version import Version
|
||||
|
||||
import fiona
|
||||
|
||||
if Version(fiona.__version__) < Version("1.8.20"):
|
||||
pytest.skip("SQLite driver only available from version 1.8.20")
|
||||
|
||||
# If only 3D Points, geometry_type needs to be specified for spatialite at the
|
||||
# moment. This if can be removed once the following PR is released:
|
||||
# https://github.com/geopandas/pyogrio/pull/223
|
||||
if (
|
||||
engine == "pyogrio"
|
||||
and len(geodataframe == 2)
|
||||
and geodataframe.geometry[0] is None
|
||||
and geodataframe.geometry[1] is not None
|
||||
and geodataframe.geometry[1].has_z
|
||||
):
|
||||
write_kwargs["geometry_type"] = "Point Z"
|
||||
|
||||
expected_error = _expected_error_on(geodataframe, driver)
|
||||
if expected_error:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Failed to write record|Could not add feature to layer"
|
||||
):
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
else:
|
||||
if driver == "SQLite" and engine == "pyogrio":
|
||||
try:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
except ValueError as e:
|
||||
if "unrecognized option 'SPATIALITE'" in str(e):
|
||||
pytest.xfail(
|
||||
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
|
||||
reloaded = geopandas.read_file(output_file, engine=engine)
|
||||
|
||||
if driver == "GeoJSON" and engine == "pyogrio":
|
||||
# For GeoJSON files, the int64 column comes back as int32
|
||||
reloaded["a"] = reloaded["a"].astype("int64")
|
||||
|
||||
assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
|
||||
@@ -0,0 +1,537 @@
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
|
||||
import shapely
|
||||
from shapely import MultiPoint, Point, box
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
|
||||
pytest.importorskip("pyarrow")
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
from pyarrow import feather
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
def pa_table(table):
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
return table._pa_table
|
||||
else:
|
||||
return pa.table(table)
|
||||
|
||||
|
||||
def pa_array(array):
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
return array._pa_array
|
||||
else:
|
||||
return pa.array(array)
|
||||
|
||||
|
||||
def assert_table_equal(left, right, check_metadata=True):
|
||||
geom_type = left["geometry"].type
|
||||
# in case of Points (directly the inner fixed_size_list or struct type)
|
||||
# -> there are NaNs for empties -> we need to compare them separately
|
||||
# and then fill, because pyarrow.Table.equals considers NaNs as not equal
|
||||
if pa.types.is_fixed_size_list(geom_type):
|
||||
left_values = left["geometry"].chunk(0).values
|
||||
right_values = right["geometry"].chunk(0).values
|
||||
assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
|
||||
left_geoms = pa.FixedSizeListArray.from_arrays(
|
||||
pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
|
||||
type=left["geometry"].type,
|
||||
)
|
||||
right_geoms = pa.FixedSizeListArray.from_arrays(
|
||||
pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
|
||||
type=right["geometry"].type,
|
||||
)
|
||||
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
|
||||
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
|
||||
|
||||
elif pa.types.is_struct(geom_type):
|
||||
left_arr = left["geometry"].chunk(0)
|
||||
right_arr = right["geometry"].chunk(0)
|
||||
|
||||
for i in range(left_arr.type.num_fields):
|
||||
assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
|
||||
|
||||
left_geoms = pa.StructArray.from_arrays(
|
||||
[
|
||||
pc.replace_with_mask(
|
||||
left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
|
||||
)
|
||||
for i in range(left_arr.type.num_fields)
|
||||
],
|
||||
fields=list(left["geometry"].type),
|
||||
)
|
||||
right_geoms = pa.StructArray.from_arrays(
|
||||
[
|
||||
pc.replace_with_mask(
|
||||
right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
|
||||
)
|
||||
for i in range(right_arr.type.num_fields)
|
||||
],
|
||||
fields=list(right["geometry"].type),
|
||||
)
|
||||
|
||||
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
|
||||
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
|
||||
|
||||
if left.equals(right, check_metadata=check_metadata):
|
||||
return
|
||||
|
||||
if not left.schema.equals(right.schema):
|
||||
raise AssertionError(
|
||||
"Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
|
||||
left.schema, right.schema
|
||||
)
|
||||
)
|
||||
|
||||
if check_metadata:
|
||||
if not left.schema.equals(right.schema, check_metadata=True):
|
||||
if not left.schema.metadata == right.schema.metadata:
|
||||
raise AssertionError(
|
||||
"Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
|
||||
left.schema.metadata, right.schema.metadata
|
||||
)
|
||||
)
|
||||
for col in left.schema.names:
|
||||
assert left.schema.field(col).equals(
|
||||
right.schema.field(col), check_metadata=True
|
||||
)
|
||||
|
||||
for col in left.column_names:
|
||||
a_left = pa.concat_arrays(left.column(col).chunks)
|
||||
a_right = pa.concat_arrays(right.column(col).chunks)
|
||||
if not a_left.equals(a_right):
|
||||
raise AssertionError(
|
||||
"Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
|
||||
)
|
||||
|
||||
raise AssertionError("Tables not equal for unknown reason")
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
shapely.geos_version < (3, 9, 0),
|
||||
reason="Checking for empty is buggy with GEOS<3.9",
|
||||
) # an old GEOS is installed in the CI builds with the defaults channel
|
||||
@pytest.mark.parametrize(
|
||||
"dim",
|
||||
[
|
||||
"xy",
|
||||
pytest.param(
|
||||
"xyz",
|
||||
marks=pytest.mark.skipif(
|
||||
shapely.geos_version < (3, 10, 0),
|
||||
reason="Cannot write 3D geometries with GEOS<3.10",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_encoding, interleaved",
|
||||
[("WKB", None), ("geoarrow", True), ("geoarrow", False)],
|
||||
ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
|
||||
)
|
||||
def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df["row_number"] = df["row_number"].astype("int32")
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.array.crs = None
|
||||
|
||||
# Read the expected data
|
||||
if geometry_encoding == "WKB":
|
||||
filename = f"example-{suffix}-wkb.arrow"
|
||||
else:
|
||||
filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
|
||||
expected = feather.read_table(base_path / filename)
|
||||
|
||||
# GeoDataFrame -> Arrow Table
|
||||
result = pa_table(
|
||||
df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
|
||||
)
|
||||
# remove the "pandas" metadata
|
||||
result = result.replace_schema_metadata(None)
|
||||
|
||||
mask_nonempty = None
|
||||
if (
|
||||
geometry_encoding == "WKB"
|
||||
and dim == "xyz"
|
||||
and geometry_type.startswith("multi")
|
||||
):
|
||||
# for collections with z dimension, drop the empties because those don't
|
||||
# roundtrip correctly to WKB
|
||||
# (https://github.com/libgeos/geos/issues/888)
|
||||
mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
|
||||
result = result.filter(mask_nonempty)
|
||||
expected = expected.filter(mask_nonempty)
|
||||
|
||||
assert_table_equal(result, expected)
|
||||
|
||||
# GeoSeries -> Arrow array
|
||||
if geometry_encoding != "WKB" and geometry_type == "point":
|
||||
# for points, we again have to handle NaNs separately, we already did that
|
||||
# for table so let's just skip this part
|
||||
return
|
||||
result_arr = pa_array(
|
||||
df.geometry.to_arrow(
|
||||
geometry_encoding=geometry_encoding, interleaved=interleaved
|
||||
)
|
||||
)
|
||||
if mask_nonempty is not None:
|
||||
result_arr = result_arr.filter(mask_nonempty)
|
||||
assert result_arr.equals(expected["geometry"].chunk(0))
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_multiple_geometry_crs(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
# ensure each geometry column has its own crs
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
|
||||
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
|
||||
meta1 = json.loads(
|
||||
result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
|
||||
)
|
||||
assert json.loads(meta1["crs"])["id"]["code"] == 4326
|
||||
meta2 = json.loads(
|
||||
result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
|
||||
)
|
||||
assert json.loads(meta2["crs"])["id"]["code"] == 3857
|
||||
|
||||
roundtripped = GeoDataFrame.from_arrow(result)
|
||||
assert_geodataframe_equal(gdf, roundtripped)
|
||||
assert gdf.geometry.crs == "epsg:4326"
|
||||
assert gdf.geom2.crs == "epsg:3857"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_series_name_crs(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
|
||||
gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
|
||||
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
assert field.name == "geom"
|
||||
assert (
|
||||
field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
|
||||
if encoding == "WKB"
|
||||
else b"geoarrow.polygon"
|
||||
)
|
||||
meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
|
||||
assert json.loads(meta["crs"])["id"]["code"] == 4326
|
||||
|
||||
# ensure it also works without a name
|
||||
gser = GeoSeries([box(0, 0, 10, 10)])
|
||||
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
assert field.name == ""
|
||||
|
||||
|
||||
def test_geoarrow_unsupported_encoding():
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
|
||||
with pytest.raises(ValueError, match="Expected geometry encoding"):
|
||||
gdf.to_arrow(geometry_encoding="invalid")
|
||||
|
||||
with pytest.raises(ValueError, match="Expected geometry encoding"):
|
||||
gdf.geometry.to_arrow(geometry_encoding="invalid")
|
||||
|
||||
|
||||
def test_geoarrow_mixed_geometry_types():
|
||||
gdf = GeoDataFrame(
|
||||
{"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Geometry type combination is not supported"):
|
||||
gdf.to_arrow(geometry_encoding="geoarrow")
|
||||
|
||||
gdf = GeoDataFrame(
|
||||
{"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert (
|
||||
result.schema.field("geometry").metadata[b"ARROW:extension:name"]
|
||||
== b"geoarrow.multipoint"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geom_type", ["point", "polygon"])
|
||||
@pytest.mark.parametrize(
|
||||
"encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
|
||||
)
|
||||
def test_geoarrow_missing(encoding, interleaved, geom_type):
|
||||
# dummy test for single geometry type until missing values are included
|
||||
# in the test data for test_geoarrow_export
|
||||
gdf = GeoDataFrame(
|
||||
geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
|
||||
crs="epsg:4326",
|
||||
)
|
||||
if (
|
||||
encoding == "geoarrow"
|
||||
and geom_type == "point"
|
||||
and interleaved
|
||||
and Version(pa.__version__) < Version("15.0.0")
|
||||
):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Converting point geometries with missing values is not supported",
|
||||
):
|
||||
gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
|
||||
return
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
|
||||
assert result["geometry"].null_count == 1
|
||||
assert result["geometry"].is_null().to_pylist() == [False, True]
|
||||
|
||||
|
||||
def test_geoarrow_include_z():
|
||||
gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert table["geometry"].type.value_field.name == "xy"
|
||||
assert table["geometry"].type.list_size == 2
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
|
||||
assert table["geometry"].type.value_field.name == "xyz"
|
||||
assert table["geometry"].type.list_size == 3
|
||||
assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
|
||||
|
||||
gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert table["geometry"].type.value_field.name == "xyz"
|
||||
assert table["geometry"].type.list_size == 3
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
|
||||
assert table["geometry"].type.value_field.name == "xy"
|
||||
assert table["geometry"].type.list_size == 2
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def with_geoarrow_extension_types():
|
||||
gp = pytest.importorskip("geoarrow.pyarrow")
|
||||
gp.register_extension_types()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
gp.unregister_extension_types()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
def test_geoarrow_export_with_extension_types(geometry_type, dim):
|
||||
# ensure the exported data can be imported by geoarrow-pyarrow and are
|
||||
# recognized as extension types
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df["row_number"] = df["row_number"].astype("int32")
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.array.crs = None
|
||||
|
||||
pytest.importorskip("geoarrow.pyarrow")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
|
||||
assert isinstance(result1["geometry"].type, pa.ExtensionType)
|
||||
|
||||
result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert isinstance(result2["geometry"].type, pa.ExtensionType)
|
||||
|
||||
result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
|
||||
assert isinstance(result3["geometry"].type, pa.ExtensionType)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
[
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
],
|
||||
)
|
||||
def test_geoarrow_import(geometry_type, dim):
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.crs = None
|
||||
|
||||
table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
|
||||
result1 = GeoDataFrame.from_arrow(table1)
|
||||
assert_geodataframe_equal(result1, df)
|
||||
|
||||
table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
|
||||
result2 = GeoDataFrame.from_arrow(table2)
|
||||
assert_geodataframe_equal(result2, df)
|
||||
|
||||
table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
|
||||
result3 = GeoDataFrame.from_arrow(table3)
|
||||
assert_geodataframe_equal(result3, df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_import_geometry_column(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
# ensure each geometry column has its own crs
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
|
||||
gdf["centroid"] = gdf.geometry.centroid
|
||||
|
||||
result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
assert result.active_geometry_name == "geometry"
|
||||
|
||||
result = GeoDataFrame.from_arrow(
|
||||
pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
|
||||
)
|
||||
assert result.active_geometry_name == "centroid"
|
||||
|
||||
result = GeoDataFrame.from_arrow(
|
||||
pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
|
||||
)
|
||||
assert result.active_geometry_name == "centroid"
|
||||
assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
|
||||
|
||||
|
||||
def test_geoarrow_import_missing_geometry():
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
|
||||
table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
|
||||
with pytest.raises(ValueError, match="No geometry column found"):
|
||||
GeoDataFrame.from_arrow(table)
|
||||
|
||||
with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
|
||||
GeoSeries.from_arrow(table["a"].chunk(0))
|
||||
|
||||
|
||||
def test_geoarrow_import_capsule_interface():
|
||||
# ensure we can import non-pyarrow object
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
|
||||
|
||||
result = GeoDataFrame.from_arrow(gdf.to_arrow())
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
def test_geoarrow_import_from_extension_types(geometry_type, dim):
|
||||
# ensure the exported data can be imported by geoarrow-pyarrow and are
|
||||
# recognized as extension types
|
||||
pytest.importorskip("pyproj")
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df = GeoDataFrame(df, crs="EPSG:3857")
|
||||
|
||||
pytest.importorskip("geoarrow.pyarrow")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
result1 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="WKB"))
|
||||
)
|
||||
assert_geodataframe_equal(result1, df)
|
||||
|
||||
result2 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="geoarrow"))
|
||||
)
|
||||
assert_geodataframe_equal(result2, df)
|
||||
|
||||
result3 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
|
||||
)
|
||||
assert_geodataframe_equal(result3, df)
|
||||
|
||||
|
||||
def test_geoarrow_import_geoseries():
|
||||
pytest.importorskip("pyproj")
|
||||
gp = pytest.importorskip("geoarrow.pyarrow")
|
||||
ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
# the name is lost when going through a pyarrow.Array
|
||||
ser.name = "name"
|
||||
arr = gp.array(ser.to_arrow())
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert result.name is None
|
||||
# we can specify the name as one of the kwargs
|
||||
result = GeoSeries.from_arrow(arr, name="test")
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
|
||||
def test_geoarrow_import_unknown_geoarrow_type():
|
||||
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
|
||||
table = pa_table(gdf.to_arrow())
|
||||
schema = table.schema
|
||||
new_field = schema.field("geometry").with_metadata(
|
||||
{
|
||||
b"ARROW:extension:name": b"geoarrow.unknown",
|
||||
b"ARROW:extension:metadata": b"{}",
|
||||
}
|
||||
)
|
||||
|
||||
new_schema = pa.schema([schema.field(0), new_field])
|
||||
new_table = table.cast(new_schema)
|
||||
|
||||
with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
|
||||
GeoDataFrame.from_arrow(new_table)
|
||||
@@ -0,0 +1,306 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas.io.file import infer_schema
|
||||
|
||||
import pytest
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
linestring_3D = LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5546126200639, 45.5086813829106, 300),
|
||||
(-73.5540185061397, 45.5084409343852, 300),
|
||||
)
|
||||
)
|
||||
polygon_3D = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5535801792994, 45.5089539203786, 300),
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_infer_schema_only_points():
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_points_and_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPoint", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint(
|
||||
[city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_only_linestrings():
|
||||
df = GeoDataFrame(geometry=city_hall_walls)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_linestrings_and_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiLineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "MultiLineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_polygons():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_polygons_and_multipolygons():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPolygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipolygons():
|
||||
df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_multiple_shape_types():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_shape_type():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"3D Point",
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Point():
|
||||
df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Point", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Points():
|
||||
df = GeoDataFrame(geometry=[point_3D, point_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_linestring():
|
||||
df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D LineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_linestrings():
|
||||
df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "3D LineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Polygon():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Polygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Polygons():
|
||||
df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_2D_point():
|
||||
df = GeoDataFrame(geometry=[None, city_hall_entrance])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_3D_point():
|
||||
df = GeoDataFrame(geometry=[None, point_3D])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_all():
|
||||
df = GeoDataFrame(geometry=[None, None])
|
||||
|
||||
# None geometry type in then replaced by 'Unknown'
|
||||
# (default geometry type supported by Fiona)
|
||||
assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
|
||||
)
|
||||
def test_infer_schema_int32(array_data, dtype):
|
||||
int32col = pd.array(data=array_data, dtype=dtype)
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int32_column"] = int32col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int32_column", "int32")]),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_int64():
|
||||
int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int64_column"] = int64col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int64_column", "int")]),
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
See generate_legacy_storage_files.py for the creation of the legacy files.
|
||||
|
||||
"""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def current_pickle_data():
|
||||
# our current version pickle data
|
||||
from .generate_legacy_storage_files import create_pickle_data
|
||||
|
||||
return create_pickle_data()
|
||||
|
||||
|
||||
files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
|
||||
|
||||
|
||||
@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
|
||||
def legacy_pickle(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"shapely 2.0/pygeos-based unpickling currently only works for "
|
||||
"shapely-2.0/pygeos-written files"
|
||||
),
|
||||
)
|
||||
def test_legacy_pickles(current_pickle_data, legacy_pickle):
|
||||
result = pd.read_pickle(legacy_pickle)
|
||||
|
||||
for name, value in result.items():
|
||||
expected = current_pickle_data[name]
|
||||
assert_geodataframe_equal(value, expected)
|
||||
|
||||
|
||||
def test_round_trip_current(tmpdir, current_pickle_data):
|
||||
data = current_pickle_data
|
||||
|
||||
for name, value in data.items():
|
||||
path = str(tmpdir / "{}.pickle".format(name))
|
||||
value.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, value)
|
||||
assert isinstance(result.has_sindex, bool)
|
||||
@@ -0,0 +1,878 @@
|
||||
"""
|
||||
Tests here include reading/writing to different types of spatial databases.
|
||||
The spatial database tests may not work without additional system
|
||||
configuration. postGIS tests require a test database to have been setup;
|
||||
see geopandas.tests.util for more information.
|
||||
"""
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from importlib.util import find_spec
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.io.sql import _get_conn as get_conn
|
||||
from geopandas.io.sql import _write_postgis as write_postgis
|
||||
|
||||
import pytest
|
||||
from geopandas.tests.util import (
|
||||
create_postgis,
|
||||
create_spatialite,
|
||||
mock,
|
||||
validate_boro_df,
|
||||
)
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
# Avoid local imports for text in all sqlalchemy tests
|
||||
# all tests using text use engine_postgis, which ensures sqlalchemy is available
|
||||
text = str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb(nybb_filename):
|
||||
df = read_file(nybb_filename)
|
||||
return df
|
||||
|
||||
|
||||
def check_available_postgis_drivers() -> list[str]:
|
||||
"""Work out which of psycopg2 and psycopg are available.
|
||||
This prevents tests running if the relevant package isn't installed
|
||||
(rather than being skipped, as skips are treated as failures during postgis CI)
|
||||
"""
|
||||
drivers = []
|
||||
if find_spec("psycopg"):
|
||||
drivers.append("psycopg")
|
||||
if find_spec("psycopg2"):
|
||||
drivers.append("psycopg2")
|
||||
return drivers
|
||||
|
||||
|
||||
POSTGIS_DRIVERS = check_available_postgis_drivers()
|
||||
|
||||
|
||||
def prepare_database_credentials() -> dict:
|
||||
"""Gather postgres connection credentials from environment variables."""
|
||||
return {
|
||||
"dbname": "test_geopandas",
|
||||
"user": os.environ.get("PGUSER"),
|
||||
"password": os.environ.get("PGPASSWORD"),
|
||||
"host": os.environ.get("PGHOST"),
|
||||
"port": os.environ.get("PGPORT"),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis(request):
|
||||
"""Create a postgres connection using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
|
||||
psycopg = pytest.importorskip(request.param)
|
||||
|
||||
try:
|
||||
con = psycopg.connect(**prepare_database_credentials())
|
||||
except psycopg.OperationalError:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="pandas only supports SQLAlchemy connectable.*"
|
||||
)
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_postgis(request):
|
||||
"""
|
||||
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
|
||||
"""
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
credentials = prepare_database_credentials()
|
||||
try:
|
||||
con = sqlalchemy.create_engine(
|
||||
URL.create(
|
||||
drivername=f"postgresql+{request.param}",
|
||||
username=credentials["user"],
|
||||
database=credentials["dbname"],
|
||||
password=credentials["password"],
|
||||
host=credentials["host"],
|
||||
port=credentials["port"],
|
||||
)
|
||||
)
|
||||
con.connect()
|
||||
except Exception:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
|
||||
yield con
|
||||
con.dispose()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_spatialite():
|
||||
"""
|
||||
Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
|
||||
|
||||
`The sqlite3 module must be built with loadable extension support
|
||||
<https://docs.python.org/3/library/sqlite3.html#f1>`_ and
|
||||
`SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
|
||||
must be available on the system as a SQLite module.
|
||||
Packages available on Anaconda meet requirements.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
``AttributeError`` on missing support for loadable SQLite extensions
|
||||
``sqlite3.OperationalError`` on missing SpatiaLite
|
||||
"""
|
||||
sqlite3 = pytest.importorskip("sqlite3")
|
||||
try:
|
||||
with sqlite3.connect(":memory:") as con:
|
||||
con.enable_load_extension(True)
|
||||
con.load_extension("mod_spatialite")
|
||||
con.execute("SELECT InitSpatialMetaData(TRUE)")
|
||||
except Exception:
|
||||
con.close()
|
||||
pytest.skip("Cannot setup spatialite database")
|
||||
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
def drop_table_if_exists(conn_or_engine, table):
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
|
||||
if sqlalchemy.inspect(conn_or_engine).has_table(table):
|
||||
metadata = sqlalchemy.MetaData()
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Did not recognize type 'geometry' of column.*"
|
||||
)
|
||||
metadata.reflect(conn_or_engine)
|
||||
table = metadata.tables.get(table)
|
||||
if table is not None:
|
||||
table.drop(conn_or_engine, checkfirst=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_mixed_single_and_multi():
|
||||
from shapely.geometry import LineString, MultiLineString, Point
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
|
||||
Point(0, 1),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_geom_collection():
|
||||
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
GeometryCollection(
|
||||
[
|
||||
Polygon([(0, 0), (1, 1), (0, 1)]),
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
Point(0, 0),
|
||||
]
|
||||
)
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_linear_ring():
|
||||
from shapely.geometry import LinearRing
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_3D_geoms():
|
||||
from shapely.geometry import LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0, 0), (1, 1, 1)]),
|
||||
Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
|
||||
Point(0, 1, 2),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestIO:
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_get_conn(self, engine_postgis):
|
||||
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
|
||||
|
||||
engine = engine_postgis
|
||||
with get_conn(engine) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with engine.connect() as conn:
|
||||
with get_conn(conn) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with pytest.raises(ValueError):
|
||||
with get_conn(object()):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
|
||||
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
|
||||
con = connection_postgis
|
||||
orig_geom = "geom"
|
||||
out_geom = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=orig_geom)
|
||||
|
||||
sql = """SELECT borocode, boroname, shape_leng, shape_area,
|
||||
{} as {} FROM nybb;""".format(
|
||||
orig_geom, out_geom
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=out_geom)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
|
||||
con = connection_postgis
|
||||
crs = "epsg:4269"
|
||||
df_reproj = df_nybb.to_crs(crs)
|
||||
create_postgis(con, df_reproj, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that a user specified CRS overrides the geodatabase SRID."""
|
||||
con = connection_postgis
|
||||
orig_crs = df_nybb.crs
|
||||
create_postgis(con, df_nybb, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, crs=orig_crs)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == orig_crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry with NULL is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
df_nybb.geometry.iat[0] = None
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_binary(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry read as binary is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_default(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
engine = engine_postgis
|
||||
table = "nybb"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
|
||||
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
|
||||
engine = engine_postgis
|
||||
table = "aTestTable"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text('SELECT * FROM "{table}";'.format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
with engine_postgis.begin() as con:
|
||||
table = "nybb_con"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(con, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=con, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, con, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that uploading the same table raises error when: if_replace='fail'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
try:
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
except ValueError as e:
|
||||
if "already exists" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that replacing a table is possible when: if_replace='replace'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Overwrite
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that appending to existing table produces correct results when:
|
||||
if_replace='append'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
orig_rows, orig_cols = df_nybb.shape
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
new_rows, new_cols = df.shape
|
||||
|
||||
# There should be twice as many rows in the new table
|
||||
assert new_rows == orig_rows * 2, (
|
||||
"There should be {target} rows,found: {current}".format(
|
||||
target=orig_rows * 2, current=new_rows
|
||||
),
|
||||
)
|
||||
# Number of columns should stay the same
|
||||
assert new_cols == orig_cols, (
|
||||
"There should be {target} columns,found: {current}".format(
|
||||
target=orig_cols, current=new_cols
|
||||
),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS without CRS information.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb.geometry.array.crs = None
|
||||
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is -1
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
|
||||
CRS information (GH #2414).
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb_esri = df_nybb.to_crs("ESRI:102003")
|
||||
write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is 102003
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_geometry_collection(
|
||||
self, engine_postgis, df_geom_collection
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of different geometry types is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
|
||||
assert geom_type.upper() == "GEOMETRYCOLLECTION"
|
||||
assert df.geom_type.unique()[0] == "GeometryCollection"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_mixed_geometry_types(
|
||||
self, engine_postgis, df_mixed_single_and_multi
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of single and MultiGeometries is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
|
||||
)
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
|
||||
"""
|
||||
Tests that writing a LinearRing.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
|
||||
assert geom_type.upper() == "LINESTRING"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
|
||||
"""
|
||||
Tests writing a LinearRing works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi,
|
||||
con=engine,
|
||||
name=table,
|
||||
if_exists="replace",
|
||||
chunksize=1,
|
||||
)
|
||||
# Validate row count
|
||||
sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
|
||||
with engine.connect() as conn:
|
||||
row_cnt = conn.execute(sql).fetchone()[0]
|
||||
assert row_cnt == 3
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema_when_table_exists(
|
||||
self, engine_postgis, df_nybb
|
||||
):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
try:
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(
|
||||
schema=schema_to_use, table=table
|
||||
)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
# Should raise a ValueError when table exists
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try with replace flag on
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
|
||||
"""
|
||||
Tests writing a geometries with 3 dimensions works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that all geometries have 3 dimensions
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert list(df.geometry.has_z) == [True, True, True]
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_row_order(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the row order in db table follows the order of the original frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "row_order_test"
|
||||
correct_order = df_nybb["BoroCode"].tolist()
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert df["BoroCode"].tolist() == correct_order
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_before_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that insert works with if_exists='append' when table does not exist yet.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_with_different_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the warning is raised if table CRS differs from frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Reproject
|
||||
df_nybb2 = df_nybb.to_crs(epsg=4326)
|
||||
|
||||
# Should raise error when appending
|
||||
with pytest.raises(ValueError, match="CRS of the target table"):
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_without_crs(self, engine_postgis, df_nybb):
|
||||
# This test was included in #3328 when the default value for no
|
||||
# CRS was changed from an SRID of -1 to 0. This resolves issues
|
||||
# of appending dataframes to postgis that have no CRS as postgis
|
||||
# no CRS value is 0.
|
||||
engine = engine_postgis
|
||||
df_nybb = df_nybb.set_crs(None, allow_override=True)
|
||||
table = "nybb"
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# append another dataframe with no crs
|
||||
|
||||
df_nybb2 = df_nybb
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
@pytest.mark.xfail(
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
|
||||
)
|
||||
def test_duplicate_geometry_column_fails(self, engine_postgis):
|
||||
"""
|
||||
Tests that a ValueError is raised if an SQL query returns two geometry columns.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
read_postgis(sql, engine, geom_col="geom")
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
@mock.patch("shapely.get_srid")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
|
||||
# mock a non-existent srid for edge case if shapely has an srid
|
||||
# not present in postgis table.
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
mock_get_srid.return_value = 99999
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
|
||||
with pytest.warns(UserWarning, match="Could not find srid 99999"):
|
||||
read_postgis(sql, con)
|
||||
|
||||
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_no_spatial_ref_sys_table_in_postgis(
|
||||
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
|
||||
):
|
||||
# mock for a non-existent spatial_ref_sys database
|
||||
|
||||
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb, srid=4326)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(
|
||||
UserWarning, match="Could not find the spatial reference system table"
|
||||
):
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
assert df.crs == "EPSG:4326"
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument with non epsg crs"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
@@ -0,0 +1,118 @@
|
||||
"""Vendored, cut down version of pyogrio/util.py for use with fiona"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
def vsi_path(path: str) -> str:
|
||||
"""
|
||||
Ensure path is a local path or a GDAL-compatible vsi path.
|
||||
|
||||
"""
|
||||
|
||||
# path is already in GDAL format
|
||||
if path.startswith("/vsi"):
|
||||
return path
|
||||
|
||||
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
|
||||
# URL schemes
|
||||
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
|
||||
if not path.split("!")[0].endswith(".zip"):
|
||||
return path
|
||||
|
||||
# prefix then allow to proceed with remaining parsing
|
||||
path = f"zip://{path}"
|
||||
|
||||
path, archive, scheme = _parse_uri(path)
|
||||
|
||||
if scheme or archive or path.endswith(".zip"):
|
||||
return _construct_vsi_path(path, archive, scheme)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
# Supported URI schemes and their mapping to GDAL's VSI suffix.
|
||||
SCHEMES = {
|
||||
"file": "file",
|
||||
"zip": "zip",
|
||||
"tar": "tar",
|
||||
"gzip": "gzip",
|
||||
"http": "curl",
|
||||
"https": "curl",
|
||||
"ftp": "curl",
|
||||
"s3": "s3",
|
||||
"gs": "gs",
|
||||
"az": "az",
|
||||
"adls": "adls",
|
||||
"adl": "adls", # fsspec uses this
|
||||
"hdfs": "hdfs",
|
||||
"webhdfs": "webhdfs",
|
||||
# GDAL additionally supports oss and swift for remote filesystems, but
|
||||
# those are for now not added as supported URI
|
||||
}
|
||||
|
||||
CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"}
|
||||
|
||||
|
||||
def _parse_uri(path: str):
|
||||
"""
|
||||
Parse a URI
|
||||
|
||||
Returns a tuples of (path, archive, scheme)
|
||||
|
||||
path : str
|
||||
Parsed path. Includes the hostname and query string in the case
|
||||
of a URI.
|
||||
archive : str
|
||||
Parsed archive path.
|
||||
scheme : str
|
||||
URI scheme such as "https" or "zip+s3".
|
||||
"""
|
||||
parts = urlparse(path, allow_fragments=False)
|
||||
|
||||
# if the scheme is not one of GDAL's supported schemes, return raw path
|
||||
if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")):
|
||||
return path, "", ""
|
||||
|
||||
# we have a URI
|
||||
path = parts.path
|
||||
scheme = parts.scheme or ""
|
||||
|
||||
if parts.query:
|
||||
path += "?" + parts.query
|
||||
|
||||
if parts.scheme and parts.netloc:
|
||||
path = parts.netloc + path
|
||||
|
||||
parts = path.split("!")
|
||||
path = parts.pop() if parts else ""
|
||||
archive = parts.pop() if parts else ""
|
||||
return (path, archive, scheme)
|
||||
|
||||
|
||||
def _construct_vsi_path(path, archive, scheme) -> str:
|
||||
"""Convert a parsed path to a GDAL VSI path"""
|
||||
|
||||
prefix = ""
|
||||
suffix = ""
|
||||
schemes = scheme.split("+")
|
||||
|
||||
if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
|
||||
schemes.insert(0, "zip")
|
||||
|
||||
if schemes:
|
||||
prefix = "/".join(
|
||||
"vsi{0}".format(SCHEMES[p]) for p in schemes if p and p != "file"
|
||||
)
|
||||
|
||||
if schemes[-1] in CURLSCHEMES:
|
||||
suffix = f"{schemes[-1]}://"
|
||||
|
||||
if prefix:
|
||||
if archive:
|
||||
return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/"))
|
||||
else:
|
||||
return "/{}/{}{}".format(prefix, suffix, path)
|
||||
|
||||
return path
|
||||
@@ -0,0 +1,977 @@
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import CategoricalDtype
|
||||
from pandas.plotting import PlotAccessor
|
||||
|
||||
import geopandas
|
||||
|
||||
from ._decorator import doc
|
||||
|
||||
|
||||
def _sanitize_geoms(geoms, prefix="Multi"):
|
||||
"""
|
||||
Returns Series like geoms and index, except that any Multi geometries
|
||||
are split into their components and indices are repeated for all component
|
||||
in the same Multi geometry. At the same time, empty or missing geometries are
|
||||
filtered out. Maintains 1:1 matching of geometry to value.
|
||||
|
||||
Prefix specifies type of geometry to be flatten. 'Multi' for MultiPoint and similar,
|
||||
"Geom" for GeometryCollection.
|
||||
|
||||
Returns
|
||||
-------
|
||||
components : list of geometry
|
||||
|
||||
component_index : index array
|
||||
indices are repeated for all components in the same Multi geometry
|
||||
"""
|
||||
# TODO(shapely) look into simplifying this with
|
||||
# shapely.get_parts(geoms, return_index=True) from shapely 2.0
|
||||
components, component_index = [], []
|
||||
|
||||
if (
|
||||
not geoms.geom_type.str.startswith(prefix).any()
|
||||
and not geoms.is_empty.any()
|
||||
and not geoms.isna().any()
|
||||
):
|
||||
return geoms, np.arange(len(geoms))
|
||||
|
||||
for ix, geom in enumerate(geoms):
|
||||
if geom is not None and geom.geom_type.startswith(prefix) and not geom.is_empty:
|
||||
for poly in geom.geoms:
|
||||
components.append(poly)
|
||||
component_index.append(ix)
|
||||
elif geom is None or geom.is_empty:
|
||||
continue
|
||||
else:
|
||||
components.append(geom)
|
||||
component_index.append(ix)
|
||||
|
||||
return components, np.array(component_index)
|
||||
|
||||
|
||||
def _expand_kwargs(kwargs, multiindex):
|
||||
"""
|
||||
Most arguments to the plot functions must be a (single) value, or a sequence
|
||||
of values. This function checks each key-value pair in 'kwargs' and expands
|
||||
it (in place) to the correct length/formats with help of 'multiindex', unless
|
||||
the value appears to already be a valid (single) value for the key.
|
||||
"""
|
||||
from typing import Iterable
|
||||
|
||||
from matplotlib.colors import is_color_like
|
||||
|
||||
scalar_kwargs = ["marker", "path_effects"]
|
||||
for att, value in kwargs.items():
|
||||
if "color" in att: # color(s), edgecolor(s), facecolor(s)
|
||||
if is_color_like(value):
|
||||
continue
|
||||
elif "linestyle" in att: # linestyle(s)
|
||||
# A single linestyle can be 2-tuple of a number and an iterable.
|
||||
if (
|
||||
isinstance(value, tuple)
|
||||
and len(value) == 2
|
||||
and isinstance(value[1], Iterable)
|
||||
):
|
||||
continue
|
||||
elif att in scalar_kwargs:
|
||||
# For these attributes, only a single value is allowed, so never expand.
|
||||
continue
|
||||
|
||||
if pd.api.types.is_list_like(value):
|
||||
kwargs[att] = np.take(value, multiindex, axis=0)
|
||||
|
||||
|
||||
def _PolygonPatch(polygon, **kwargs):
|
||||
"""Constructs a matplotlib patch from a Polygon geometry
|
||||
|
||||
The `kwargs` are those supported by the matplotlib.patches.PathPatch class
|
||||
constructor. Returns an instance of matplotlib.patches.PathPatch.
|
||||
|
||||
Example (using Shapely Point and a matplotlib axes)::
|
||||
|
||||
b = shapely.geometry.Point(0, 0).buffer(1.0)
|
||||
patch = _PolygonPatch(b, fc='blue', ec='blue', alpha=0.5)
|
||||
ax.add_patch(patch)
|
||||
|
||||
GeoPandas originally relied on the descartes package by Sean Gillies
|
||||
(BSD license, https://pypi.org/project/descartes) for PolygonPatch, but
|
||||
this dependency was removed in favor of the below matplotlib code.
|
||||
"""
|
||||
from matplotlib.patches import PathPatch
|
||||
from matplotlib.path import Path
|
||||
|
||||
path = Path.make_compound_path(
|
||||
Path(np.asarray(polygon.exterior.coords)[:, :2]),
|
||||
*[Path(np.asarray(ring.coords)[:, :2]) for ring in polygon.interiors],
|
||||
)
|
||||
return PathPatch(path, **kwargs)
|
||||
|
||||
|
||||
def _plot_polygon_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
autolim=True,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of Polygon and MultiPolygon geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` Polygons and/or MultiPolygons (can be mixed)
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
Otherwise follows `color` / `facecolor` kwargs.
|
||||
edgecolor : single color or sequence of `N` colors
|
||||
Color for the edge of the polygons
|
||||
facecolor : single color or sequence of `N` colors
|
||||
Color to fill the polygons. Cannot be used together with `values`.
|
||||
color : single color or sequence of `N` colors
|
||||
Sets both `edgecolor` and `facecolor`
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to the collection
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import PatchCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# PatchCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
collection = PatchCollection([_PolygonPatch(poly) for poly in geoms], **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=autolim)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
def _plot_linestring_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
autolim=True,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of LineString and MultiLineString geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` LineStrings and/or MultiLineStrings (can be
|
||||
mixed)
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
color : single color or sequence of `N` colors
|
||||
Cannot be used together with `values`.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import LineCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# LineCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
segments = [np.array(linestring.coords)[:, :2] for linestring in geoms]
|
||||
collection = LineCollection(segments, **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=autolim)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
def _plot_point_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
marker="o",
|
||||
markersize=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of Point and MultiPoint geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : sequence of `N` Points or MultiPoints
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values mapped to colors using vmin, vmax, and cmap.
|
||||
Cannot be specified together with `color`.
|
||||
markersize : scalar or array-like, optional
|
||||
Size of the markers. Note that under the hood ``scatter`` is
|
||||
used, so the specified value will be proportional to the
|
||||
area of the marker (size in points^2).
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
if values is not None and color is not None:
|
||||
raise ValueError("Can only specify one of 'values' and 'color' kwargs")
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
# values are expanded below as kwargs["c"]
|
||||
|
||||
x = [p.x if not p.is_empty else None for p in geoms]
|
||||
y = [p.y if not p.is_empty else None for p in geoms]
|
||||
|
||||
# matplotlib 1.4 does not support c=None, and < 2.0 does not support s=None
|
||||
if values is not None:
|
||||
kwargs["c"] = values
|
||||
if markersize is not None:
|
||||
kwargs["s"] = markersize
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
if marker is not None:
|
||||
kwargs["marker"] = marker
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
if "norm" not in kwargs:
|
||||
collection = ax.scatter(x, y, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
|
||||
else:
|
||||
collection = ax.scatter(x, y, cmap=cmap, **kwargs)
|
||||
|
||||
return collection
|
||||
|
||||
|
||||
def plot_series(
|
||||
s,
|
||||
cmap=None,
|
||||
color=None,
|
||||
ax=None,
|
||||
figsize=None,
|
||||
aspect="auto",
|
||||
autolim=True,
|
||||
**style_kwds,
|
||||
):
|
||||
"""
|
||||
Plot a GeoSeries.
|
||||
|
||||
Generate a plot of a GeoSeries geometry with matplotlib.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : Series
|
||||
The GeoSeries to be plotted. Currently Polygon,
|
||||
MultiPolygon, LineString, MultiLineString, Point and MultiPoint
|
||||
geometries can be plotted.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib. Any
|
||||
colormap will work, but categorical colormaps are
|
||||
generally recommended. Examples of useful discrete
|
||||
colormaps include:
|
||||
|
||||
tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2
|
||||
|
||||
color : str, np.array, pd.Series, List (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
figsize : pair of floats (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
ax is given explicitly, figsize is ignored.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(s_y * pi/180) with s_y the y coordinate of the middle of
|
||||
the GeoSeries (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**style_kwds : dict
|
||||
Color options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
"""
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if s.crs and s.crs.is_geographic:
|
||||
bounds = s.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
if s.empty:
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if s.is_empty.all():
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"composed of empty geometries. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
# have colors been given for all geometries?
|
||||
color_given = pd.api.types.is_list_like(color) and len(color) == len(s)
|
||||
|
||||
# if cmap is specified, create range of colors based on cmap
|
||||
values = None
|
||||
if cmap is not None:
|
||||
values = np.arange(len(s))
|
||||
if hasattr(cmap, "N"):
|
||||
values = values % cmap.N
|
||||
style_kwds["vmin"] = style_kwds.get("vmin", values.min())
|
||||
style_kwds["vmax"] = style_kwds.get("vmax", values.max())
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(s.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0) if cmap else None
|
||||
# ensure indexes are consistent
|
||||
if color_given and isinstance(color, pd.Series):
|
||||
color = color.reindex(s.index)
|
||||
expl_color = np.take(color, multiindex, axis=0) if color_given else color
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx]
|
||||
if not polys.empty:
|
||||
# color overrides both face and edgecolor. As we want people to be
|
||||
# able to use edgecolor as well, pass color to facecolor
|
||||
facecolor = style_kwds.pop("facecolor", None)
|
||||
color_ = expl_color[poly_idx] if color_given else color
|
||||
if color is not None:
|
||||
facecolor = color_
|
||||
|
||||
values_ = values[poly_idx] if cmap else None
|
||||
_plot_polygon_collection(
|
||||
ax,
|
||||
polys,
|
||||
values_,
|
||||
facecolor=facecolor,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx]
|
||||
if not lines.empty:
|
||||
values_ = values[line_idx] if cmap else None
|
||||
color_ = expl_color[line_idx] if color_given else color
|
||||
|
||||
_plot_linestring_collection(
|
||||
ax, lines, values_, color=color_, cmap=cmap, autolim=autolim, **style_kwds
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx]
|
||||
if not points.empty:
|
||||
values_ = values[point_idx] if cmap else None
|
||||
color_ = expl_color[point_idx] if color_given else color
|
||||
|
||||
_plot_point_collection(
|
||||
ax, points, values_, color=color_, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
ax.figure.canvas.draw_idle()
|
||||
return ax
|
||||
|
||||
|
||||
def plot_dataframe(
|
||||
df,
|
||||
column=None,
|
||||
cmap=None,
|
||||
color=None,
|
||||
ax=None,
|
||||
cax=None,
|
||||
categorical=False,
|
||||
legend=False,
|
||||
scheme=None,
|
||||
k=5,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
markersize=None,
|
||||
figsize=None,
|
||||
legend_kwds=None,
|
||||
categories=None,
|
||||
classification_kwds=None,
|
||||
missing_kwds=None,
|
||||
aspect="auto",
|
||||
autolim=True,
|
||||
**style_kwds,
|
||||
):
|
||||
"""
|
||||
Plot a GeoDataFrame.
|
||||
|
||||
Generate a plot of a GeoDataFrame with matplotlib. If a
|
||||
column is specified, the plot coloring will be based on values
|
||||
in that column.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
column : str, np.array, pd.Series (default None)
|
||||
The name of the dataframe column, np.array, or pd.Series to be plotted.
|
||||
If np.array or pd.Series are used then it must have same length as
|
||||
dataframe. Values are used to color the plot. Ignored if `color` is
|
||||
also set.
|
||||
kind: str
|
||||
The kind of plots to produce. The default is to create a map ("geo").
|
||||
Other supported kinds of plots from pandas:
|
||||
|
||||
- 'line' : line plot
|
||||
- 'bar' : vertical bar plot
|
||||
- 'barh' : horizontal bar plot
|
||||
- 'hist' : histogram
|
||||
- 'box' : BoxPlot
|
||||
- 'kde' : Kernel Density Estimation plot
|
||||
- 'density' : same as 'kde'
|
||||
- 'area' : area plot
|
||||
- 'pie' : pie plot
|
||||
- 'scatter' : scatter plot
|
||||
- 'hexbin' : hexbin plot.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib.
|
||||
color : str, np.array, pd.Series (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
cax : matplotlib.pyplot Artist (default None)
|
||||
axes on which to draw the legend in case of color map.
|
||||
categorical : bool (default False)
|
||||
If False, cmap will reflect numerical values of the
|
||||
column being plotted. For non-numerical columns, this
|
||||
will be set to True.
|
||||
legend : bool (default False)
|
||||
Plot a legend. Ignored if no `column` is given, or if `color` is given.
|
||||
scheme : str (default None)
|
||||
Name of a choropleth classification scheme (requires mapclassify).
|
||||
A mapclassify.MapClassifier object will be used
|
||||
under the hood. Supported are all schemes provided by mapclassify (e.g.
|
||||
'BoxPlot', 'EqualInterval', 'FisherJenks', 'FisherJenksSampled',
|
||||
'HeadTailBreaks', 'JenksCaspall', 'JenksCaspallForced',
|
||||
'JenksCaspallSampled', 'MaxP', 'MaximumBreaks',
|
||||
'NaturalBreaks', 'Quantiles', 'Percentiles', 'StdMean',
|
||||
'UserDefined'). Arguments can be passed in classification_kwds.
|
||||
k : int (default 5)
|
||||
Number of classes (ignored if scheme is None)
|
||||
vmin : None or float (default None)
|
||||
Minimum value of cmap. If None, the minimum data value
|
||||
in the column to be plotted is used.
|
||||
vmax : None or float (default None)
|
||||
Maximum value of cmap. If None, the maximum data value
|
||||
in the column to be plotted is used.
|
||||
markersize : str or float or sequence (default None)
|
||||
Only applies to point geometries within a frame.
|
||||
If a str, will use the values in the column of the frame specified
|
||||
by markersize to set the size of markers. Otherwise can be a value
|
||||
to apply to all points, or a sequence of the same length as the
|
||||
number of points.
|
||||
figsize : tuple of integers (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
axes is given explicitly, figsize is ignored.
|
||||
legend_kwds : dict (default None)
|
||||
Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or
|
||||
:func:`matplotlib.pyplot.colorbar`.
|
||||
Additional accepted keywords when `scheme` is specified:
|
||||
|
||||
fmt : string
|
||||
A formatting specification for the bin edges of the classes in the
|
||||
legend. For example, to have no decimals: ``{"fmt": "{:.0f}"}``.
|
||||
labels : list-like
|
||||
A list of legend labels to override the auto-generated labels.
|
||||
Needs to have the same number of elements as the number of
|
||||
classes (`k`).
|
||||
interval : boolean (default False)
|
||||
An option to control brackets from mapclassify legend.
|
||||
If True, open/closed interval brackets are shown in the legend.
|
||||
categories : list-like
|
||||
Ordered list-like object of categories to be used for categorical plot.
|
||||
classification_kwds : dict (default None)
|
||||
Keyword arguments to pass to mapclassify
|
||||
missing_kwds : dict (default None)
|
||||
Keyword arguments specifying color options (as style_kwds)
|
||||
to be passed on to geometries with missing values in addition to
|
||||
or overwriting other style kwds. If None, geometries with missing
|
||||
values are not plotted.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(df_y * pi/180) with df_y the y coordinate of the middle of
|
||||
the GeoDataFrame (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**style_kwds : dict
|
||||
Style options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import geodatasets
|
||||
>>> df = geopandas.read_file(geodatasets.get_path("nybb"))
|
||||
>>> df.head() # doctest: +SKIP
|
||||
BoroCode ... geometry
|
||||
0 5 ... MULTIPOLYGON (((970217.022 145643.332, 970227....
|
||||
1 4 ... MULTIPOLYGON (((1029606.077 156073.814, 102957...
|
||||
2 3 ... MULTIPOLYGON (((1021176.479 151374.797, 102100...
|
||||
3 1 ... MULTIPOLYGON (((981219.056 188655.316, 980940....
|
||||
4 2 ... MULTIPOLYGON (((1012821.806 229228.265, 101278...
|
||||
|
||||
>>> df.plot("BoroName", cmap="Set1") # doctest: +SKIP
|
||||
|
||||
See the User Guide page :doc:`../../user_guide/mapping` for details.
|
||||
|
||||
"""
|
||||
if column is not None and color is not None:
|
||||
warnings.warn(
|
||||
"Only specify one of 'column' or 'color'. Using 'color'.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
column = None
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
if cax is not None:
|
||||
raise ValueError("'ax' can not be None if 'cax' is not.")
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if df.crs and df.crs.is_geographic:
|
||||
bounds = df.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
# GH 1555
|
||||
# if legend_kwds set, copy so we don't update it in place
|
||||
if legend_kwds is not None:
|
||||
legend_kwds = legend_kwds.copy()
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"The GeoDataFrame you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if isinstance(markersize, str):
|
||||
markersize = df[markersize].values
|
||||
|
||||
if column is None:
|
||||
return plot_series(
|
||||
df.geometry,
|
||||
cmap=cmap,
|
||||
color=color,
|
||||
ax=ax,
|
||||
figsize=figsize,
|
||||
markersize=markersize,
|
||||
aspect=aspect,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# To accept pd.Series and np.arrays as column
|
||||
if isinstance(column, (np.ndarray, pd.Series)):
|
||||
if column.shape[0] != df.shape[0]:
|
||||
raise ValueError(
|
||||
"The dataframe and given column have different number of rows."
|
||||
)
|
||||
else:
|
||||
values = column
|
||||
|
||||
# Make sure index of a Series matches index of df
|
||||
if isinstance(values, pd.Series):
|
||||
values = values.reindex(df.index)
|
||||
else:
|
||||
values = df[column]
|
||||
|
||||
if isinstance(values.dtype, CategoricalDtype):
|
||||
if categories is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify 'categories' when column has categorical dtype"
|
||||
)
|
||||
categorical = True
|
||||
elif (
|
||||
pd.api.types.is_object_dtype(values.dtype)
|
||||
or pd.api.types.is_bool_dtype(values.dtype)
|
||||
or pd.api.types.is_string_dtype(values.dtype)
|
||||
or categories
|
||||
):
|
||||
categorical = True
|
||||
|
||||
nan_idx = np.asarray(pd.isna(values), dtype="bool")
|
||||
|
||||
if scheme is not None:
|
||||
mc_err = (
|
||||
"The 'mapclassify' package (>= 2.4.0) is "
|
||||
"required to use the 'scheme' keyword."
|
||||
)
|
||||
try:
|
||||
import mapclassify
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if Version(mapclassify.__version__) < Version("2.4.0"):
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if classification_kwds is None:
|
||||
classification_kwds = {}
|
||||
if "k" not in classification_kwds:
|
||||
classification_kwds["k"] = k
|
||||
|
||||
binning = mapclassify.classify(
|
||||
np.asarray(values[~nan_idx]), scheme, **classification_kwds
|
||||
)
|
||||
# set categorical to True for creating the legend
|
||||
categorical = True
|
||||
if legend_kwds is not None and "labels" in legend_kwds:
|
||||
if len(legend_kwds["labels"]) != binning.k:
|
||||
raise ValueError(
|
||||
"Number of labels must match number of bins, "
|
||||
"received {} labels for {} bins".format(
|
||||
len(legend_kwds["labels"]), binning.k
|
||||
)
|
||||
)
|
||||
else:
|
||||
labels = list(legend_kwds.pop("labels"))
|
||||
else:
|
||||
fmt = "{:.2f}"
|
||||
if legend_kwds is not None and "fmt" in legend_kwds:
|
||||
fmt = legend_kwds.pop("fmt")
|
||||
|
||||
labels = binning.get_legend_classes(fmt)
|
||||
if legend_kwds is not None:
|
||||
show_interval = legend_kwds.pop("interval", False)
|
||||
else:
|
||||
show_interval = False
|
||||
if not show_interval:
|
||||
labels = [c[1:-1] for c in labels]
|
||||
|
||||
values = pd.Categorical(
|
||||
[np.nan] * len(values), categories=binning.bins, ordered=True
|
||||
)
|
||||
values[~nan_idx] = pd.Categorical.from_codes(
|
||||
binning.yb, categories=binning.bins, ordered=True
|
||||
)
|
||||
if cmap is None:
|
||||
cmap = "viridis"
|
||||
|
||||
# Define `values` as a Series
|
||||
if categorical:
|
||||
if cmap is None:
|
||||
cmap = "tab10"
|
||||
|
||||
cat = pd.Categorical(values, categories=categories)
|
||||
categories = list(cat.categories)
|
||||
|
||||
# values missing in the Categorical but not in original values
|
||||
missing = list(np.unique(values[~nan_idx & cat.isna()]))
|
||||
if missing:
|
||||
raise ValueError(
|
||||
"Column contains values not listed in categories. "
|
||||
"Missing categories: {}.".format(missing)
|
||||
)
|
||||
|
||||
values = cat.codes[~nan_idx]
|
||||
vmin = 0 if vmin is None else vmin
|
||||
vmax = len(categories) - 1 if vmax is None else vmax
|
||||
|
||||
# fill values with placeholder where were NaNs originally to map them properly
|
||||
# (after removing them in categorical or scheme)
|
||||
if categorical:
|
||||
for n in np.where(nan_idx)[0]:
|
||||
values = np.insert(values, n, values[0])
|
||||
|
||||
mn = values[~np.isnan(values)].min() if vmin is None else vmin
|
||||
mx = values[~np.isnan(values)].max() if vmax is None else vmax
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(df.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
nan_idx = np.take(nan_idx, multiindex, axis=0)
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx & np.invert(nan_idx)]
|
||||
subset = values[poly_idx & np.invert(nan_idx)]
|
||||
if not polys.empty:
|
||||
_plot_polygon_collection(
|
||||
ax,
|
||||
polys,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx & np.invert(nan_idx)]
|
||||
subset = values[line_idx & np.invert(nan_idx)]
|
||||
if not lines.empty:
|
||||
_plot_linestring_collection(
|
||||
ax,
|
||||
lines,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx & np.invert(nan_idx)]
|
||||
subset = values[point_idx & np.invert(nan_idx)]
|
||||
if not points.empty:
|
||||
if isinstance(markersize, np.ndarray):
|
||||
markersize = np.take(markersize, multiindex, axis=0)
|
||||
markersize = markersize[point_idx & np.invert(nan_idx)]
|
||||
_plot_point_collection(
|
||||
ax,
|
||||
points,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
markersize=markersize,
|
||||
cmap=cmap,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
missing_data = not expl_series[nan_idx].empty
|
||||
if missing_kwds is not None and missing_data:
|
||||
if color:
|
||||
if "color" not in missing_kwds:
|
||||
missing_kwds["color"] = color
|
||||
|
||||
merged_kwds = style_kwds.copy()
|
||||
merged_kwds.update(missing_kwds)
|
||||
|
||||
plot_series(expl_series[nan_idx], ax=ax, **merged_kwds)
|
||||
|
||||
if legend and not color:
|
||||
if legend_kwds is None:
|
||||
legend_kwds = {}
|
||||
if "fmt" in legend_kwds:
|
||||
legend_kwds.pop("fmt")
|
||||
|
||||
from matplotlib import cm
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
norm = style_kwds.get("norm", None)
|
||||
if not norm:
|
||||
norm = Normalize(vmin=mn, vmax=mx)
|
||||
n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)
|
||||
if categorical:
|
||||
if scheme is not None:
|
||||
categories = labels
|
||||
patches = []
|
||||
for i in range(len(categories)):
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=style_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=n_cmap.to_rgba(i),
|
||||
markeredgewidth=0,
|
||||
)
|
||||
)
|
||||
if missing_kwds is not None and missing_data:
|
||||
if "color" in merged_kwds:
|
||||
merged_kwds["facecolor"] = merged_kwds["color"]
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=merged_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=merged_kwds.get("facecolor", None),
|
||||
markeredgecolor=merged_kwds.get("edgecolor", None),
|
||||
markeredgewidth=merged_kwds.get(
|
||||
"linewidth", 1 if merged_kwds.get("edgecolor", False) else 0
|
||||
),
|
||||
)
|
||||
)
|
||||
categories.append(merged_kwds.get("label", "NaN"))
|
||||
legend_kwds.setdefault("numpoints", 1)
|
||||
legend_kwds.setdefault("loc", "best")
|
||||
legend_kwds.setdefault("handles", patches)
|
||||
legend_kwds.setdefault("labels", categories)
|
||||
ax.legend(**legend_kwds)
|
||||
else:
|
||||
if cax is not None:
|
||||
legend_kwds.setdefault("cax", cax)
|
||||
else:
|
||||
legend_kwds.setdefault("ax", ax)
|
||||
|
||||
n_cmap.set_array(np.array([]))
|
||||
ax.get_figure().colorbar(n_cmap, **legend_kwds)
|
||||
|
||||
ax.figure.canvas.draw_idle()
|
||||
return ax
|
||||
|
||||
|
||||
@doc(plot_dataframe)
|
||||
class GeoplotAccessor(PlotAccessor):
|
||||
_pandas_kinds = PlotAccessor._all_kinds
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
data = self._parent.copy()
|
||||
kind = kwargs.pop("kind", "geo")
|
||||
if kind == "geo":
|
||||
return plot_dataframe(data, *args, **kwargs)
|
||||
if kind in self._pandas_kinds:
|
||||
# Access pandas plots
|
||||
return PlotAccessor(data)(kind=kind, **kwargs)
|
||||
else:
|
||||
# raise error
|
||||
raise ValueError(f"{kind} is not a valid plot kind")
|
||||
|
||||
def geo(self, *args, **kwargs):
|
||||
return self(kind="geo", *args, **kwargs) # noqa: B026
|
||||
@@ -0,0 +1,505 @@
|
||||
import numpy as np
|
||||
|
||||
import shapely
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import _compat as compat
|
||||
from . import array, geoseries
|
||||
|
||||
PREDICATES = {p.name for p in shapely.strtree.BinaryPredicate} | {None}
|
||||
|
||||
if compat.GEOS_GE_310:
|
||||
PREDICATES.update(["dwithin"])
|
||||
|
||||
|
||||
class SpatialIndex:
|
||||
"""A simple wrapper around Shapely's STRTree.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : np.array of Shapely geometries
|
||||
Geometries from which to build the spatial index.
|
||||
"""
|
||||
|
||||
def __init__(self, geometry):
|
||||
# set empty geometries to None to avoid segfault on GEOS <= 3.6
|
||||
# see:
|
||||
# https://github.com/pygeos/pygeos/issues/146
|
||||
# https://github.com/pygeos/pygeos/issues/147
|
||||
non_empty = geometry.copy()
|
||||
non_empty[shapely.is_empty(non_empty)] = None
|
||||
# set empty geometries to None to maintain indexing
|
||||
self._tree = shapely.STRtree(non_empty)
|
||||
# store geometries, including empty geometries for user access
|
||||
self.geometries = geometry.copy()
|
||||
|
||||
@property
|
||||
def valid_query_predicates(self):
|
||||
"""Returns valid predicates for the spatial index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
set
|
||||
Set of valid predicates for this spatial index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries([Point(0, 0), Point(1, 1)])
|
||||
>>> s.sindex.valid_query_predicates # doctest: +SKIP
|
||||
{None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "dwithin", "intersects", "overlaps", "touches", "within"}
|
||||
"""
|
||||
return PREDICATES
|
||||
|
||||
def query(
|
||||
self, geometry, predicate=None, sort=False, distance=None, output_format="tuple"
|
||||
):
|
||||
"""
|
||||
Return the integer indices of all combinations of each input geometry
|
||||
and tree geometries where the bounding box of each input geometry
|
||||
intersects the bounding box of a tree geometry.
|
||||
|
||||
If the input geometry is a scalar, this returns an array of shape (n, ) with
|
||||
the indices of the matching tree geometries. If the input geometry is an
|
||||
array_like, this returns an array with shape (2,n) where the subarrays
|
||||
correspond to the indices of the input geometries and indices of the
|
||||
tree geometries associated with each. To generate an array of pairs of
|
||||
input geometry index and tree geometry index, simply transpose the
|
||||
result.
|
||||
|
||||
If a predicate is provided, the tree geometries are first queried based
|
||||
on the bounding box of the input geometry and then are further filtered
|
||||
to those that meet the predicate when comparing the input geometry to
|
||||
the tree geometry: ``predicate(geometry, tree_geometry)``.
|
||||
|
||||
The 'dwithin' predicate requires GEOS >= 3.10.
|
||||
|
||||
Bounding boxes are limited to two dimensions and are axis-aligned
|
||||
(equivalent to the ``bounds`` property of a geometry); any Z values
|
||||
present in input geometries are ignored when querying the tree.
|
||||
|
||||
Any input geometry that is None or empty will never match geometries in
|
||||
the tree.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : shapely.Geometry or array-like of geometries \
|
||||
(numpy.ndarray, GeoSeries, GeometryArray)
|
||||
A single shapely geometry or array of geometries to query against
|
||||
the spatial index. For array-like, accepts both GeoPandas geometry
|
||||
iterables (GeoSeries, GeometryArray) or a numpy array of Shapely
|
||||
geometries.
|
||||
predicate : {None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "intersects", "overlaps", "touches", "within", "dwithin"}, optional
|
||||
If predicate is provided, the input geometries are tested
|
||||
using the predicate function against each item in the tree
|
||||
whose extent intersects the envelope of the input geometry:
|
||||
``predicate(input_geometry, tree_geometry)``.
|
||||
If possible, prepared geometries are used to help speed up the
|
||||
predicate operation.
|
||||
sort : bool, default False
|
||||
If True, the results will be sorted in ascending order. In case
|
||||
of 2D array, the result is sorted lexicographically using the
|
||||
geometries' indexes as the primary key and the sindex's indexes
|
||||
as the secondary key.
|
||||
If False, no additional sorting is applied (results are often
|
||||
sorted but there is no guarantee).
|
||||
distance : number or array_like, optional
|
||||
Distances around each input geometry within which to query the tree for
|
||||
the 'dwithin' predicate. If array_like, shape must be broadcastable to shape
|
||||
of geometry. Required if ``predicate='dwithin'``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray with shape (n,) if geometry is a scalar
|
||||
Integer indices for matching geometries from the spatial index
|
||||
tree geometries.
|
||||
|
||||
OR
|
||||
|
||||
ndarray with shape (2, n) if geometry is an array_like
|
||||
The first subarray contains input geometry integer indices.
|
||||
The second subarray contains tree geometry integer indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
Querying the tree with a scalar geometry:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="contains")
|
||||
array([2])
|
||||
|
||||
Querying the tree with an array of geometries:
|
||||
|
||||
>>> s2 = geopandas.GeoSeries([box(2, 2, 4, 4), box(5, 5, 6, 6)])
|
||||
>>> s2
|
||||
0 POLYGON ((4 2, 4 4, 2 4, 2 2, 4 2))
|
||||
1 POLYGON ((6 5, 6 6, 5 6, 5 5, 6 5))
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.query(s2)
|
||||
array([[0, 0, 0, 1, 1],
|
||||
[2, 3, 4, 5, 6]])
|
||||
|
||||
>>> s.sindex.query(s2, predicate="contains")
|
||||
array([[0],
|
||||
[3]])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=0)
|
||||
array([1, 2, 3])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=2)
|
||||
array([0, 1, 2, 3, 4])
|
||||
|
||||
Notes
|
||||
-----
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries. This
|
||||
effectively performs an inner join, where only those combinations of
|
||||
geometries that can be joined based on overlapping bounding boxes or
|
||||
optional predicate are returned.
|
||||
"""
|
||||
if predicate not in self.valid_query_predicates:
|
||||
if predicate == "dwithin":
|
||||
raise ValueError("predicate = 'dwithin' requires GEOS >= 3.10.0")
|
||||
|
||||
raise ValueError(
|
||||
"Got predicate='{}'; ".format(predicate)
|
||||
+ "`predicate` must be one of {}".format(self.valid_query_predicates)
|
||||
)
|
||||
|
||||
# distance argument requirement of predicate `dwithin`
|
||||
# and only valid for predicate `dwithin`
|
||||
kwargs = {}
|
||||
if predicate == "dwithin":
|
||||
if distance is None:
|
||||
# the distance parameter is needed
|
||||
raise ValueError(
|
||||
"'distance' parameter is required for 'dwithin' predicate"
|
||||
)
|
||||
# add distance to kwargs
|
||||
kwargs["distance"] = distance
|
||||
|
||||
elif distance is not None:
|
||||
# distance parameter is invalid
|
||||
raise ValueError(
|
||||
"'distance' parameter is only supported in combination with "
|
||||
"'dwithin' predicate"
|
||||
)
|
||||
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
|
||||
indices = self._tree.query(geometry, predicate=predicate, **kwargs)
|
||||
|
||||
if output_format != "tuple":
|
||||
sort = True
|
||||
|
||||
if sort:
|
||||
if indices.ndim == 1:
|
||||
indices = np.sort(indices)
|
||||
else:
|
||||
# sort by first array (geometry) and then second (tree)
|
||||
geo_idx, tree_idx = indices
|
||||
sort_indexer = np.lexsort((tree_idx, geo_idx))
|
||||
indices = np.vstack((geo_idx[sort_indexer], tree_idx[sort_indexer]))
|
||||
|
||||
if output_format == "sparse":
|
||||
from scipy.sparse import coo_array
|
||||
|
||||
return coo_array(
|
||||
(np.ones(len(indices[0]), dtype=np.bool_), indices),
|
||||
shape=(len(self.geometries), len(geometry)),
|
||||
dtype=np.bool_,
|
||||
)
|
||||
|
||||
if output_format == "dense":
|
||||
dense = np.zeros((len(self.geometries), len(geometry)), dtype=bool)
|
||||
dense[indices] = True
|
||||
return dense
|
||||
|
||||
if output_format == "tuple":
|
||||
return indices
|
||||
|
||||
raise ValueError("Invalid output_format: {}".format(output_format))
|
||||
|
||||
@staticmethod
|
||||
def _as_geometry_array(geometry):
|
||||
"""Convert geometry into a numpy array of Shapely geometries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry
|
||||
An array-like of Shapely geometries, a GeoPandas GeoSeries/GeometryArray,
|
||||
shapely.geometry or list of shapely geometries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
A numpy array of Shapely geometries.
|
||||
"""
|
||||
if isinstance(geometry, np.ndarray):
|
||||
return array.from_shapely(geometry)._data
|
||||
elif isinstance(geometry, geoseries.GeoSeries):
|
||||
return geometry.values._data
|
||||
elif isinstance(geometry, array.GeometryArray):
|
||||
return geometry._data
|
||||
elif isinstance(geometry, BaseGeometry):
|
||||
return geometry
|
||||
elif geometry is None:
|
||||
return None
|
||||
else:
|
||||
return np.asarray(geometry)
|
||||
|
||||
def nearest(
|
||||
self,
|
||||
geometry,
|
||||
return_all=True,
|
||||
max_distance=None,
|
||||
return_distance=False,
|
||||
exclusive=False,
|
||||
):
|
||||
"""
|
||||
Return the nearest geometry in the tree for each input geometry in
|
||||
``geometry``.
|
||||
|
||||
If multiple tree geometries have the same distance from an input geometry,
|
||||
multiple results will be returned for that input geometry by default.
|
||||
Specify ``return_all=False`` to only get a single nearest geometry
|
||||
(non-deterministic which nearest is returned).
|
||||
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries.
|
||||
If ``max_distance`` is not set, this will effectively be a left join
|
||||
because every geometry in ``geometry`` will have a nearest geometry in
|
||||
the tree. However, if ``max_distance`` is used, this becomes an
|
||||
inner join, since some geometries in ``geometry`` may not have a match
|
||||
in the tree.
|
||||
|
||||
For performance reasons, it is highly recommended that you set
|
||||
the ``max_distance`` parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : {shapely.geometry, GeoSeries, GeometryArray, numpy.array of Shapely \
|
||||
geometries}
|
||||
A single shapely geometry, one of the GeoPandas geometry iterables
|
||||
(GeoSeries, GeometryArray), or a numpy array of Shapely geometries to query
|
||||
against the spatial index.
|
||||
return_all : bool, default True
|
||||
If there are multiple equidistant or intersecting nearest
|
||||
geometries, return all those geometries instead of a single
|
||||
nearest geometry.
|
||||
max_distance : float, optional
|
||||
Maximum distance within which to query for nearest items in tree.
|
||||
Must be greater than 0. By default None, indicating no distance limit.
|
||||
return_distance : bool, optional
|
||||
If True, will return distances in addition to indexes. By default False
|
||||
exclusive : bool, optional
|
||||
if True, the nearest geometries that are equal to the input geometry
|
||||
will not be returned. By default False. Requires Shapely >= 2.0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Indices or tuple of (indices, distances)
|
||||
Indices is an ndarray of shape (2,n) and distances (if present) an
|
||||
ndarray of shape (n).
|
||||
The first subarray of indices contains input geometry indices.
|
||||
The second subarray of indices contains tree geometry indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s.head()
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(Point(1, 1))
|
||||
array([[0],
|
||||
[1]])
|
||||
|
||||
>>> s.sindex.nearest([box(4.9, 4.9, 5.1, 5.1)])
|
||||
array([[0],
|
||||
[5]])
|
||||
|
||||
>>> s2 = geopandas.GeoSeries(geopandas.points_from_xy([7.6, 10], [7.6, 10]))
|
||||
>>> s2
|
||||
0 POINT (7.6 7.6)
|
||||
1 POINT (10 10)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(s2)
|
||||
array([[0, 1],
|
||||
[8, 9]])
|
||||
"""
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
if isinstance(geometry, BaseGeometry) or geometry is None:
|
||||
geometry = [geometry]
|
||||
|
||||
result = self._tree.query_nearest(
|
||||
geometry,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
all_matches=return_all,
|
||||
exclusive=exclusive,
|
||||
)
|
||||
if return_distance:
|
||||
indices, distances = result
|
||||
else:
|
||||
indices = result
|
||||
|
||||
if return_distance:
|
||||
return indices, distances
|
||||
else:
|
||||
return indices
|
||||
|
||||
def intersection(self, coordinates):
|
||||
"""Compatibility wrapper for rtree.index.Index.intersection,
|
||||
use ``query`` instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
coordinates : sequence or array
|
||||
Sequence of the form (min_x, min_y, max_x, max_y)
|
||||
to query a rectangle or (x, y) to query a point.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.intersection(box(1, 1, 3, 3).bounds)
|
||||
array([1, 2, 3])
|
||||
|
||||
Alternatively, you can use ``query``:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
"""
|
||||
# TODO: we should deprecate this
|
||||
# convert bounds to geometry
|
||||
# the old API uses tuples of bound, but Shapely uses geometries
|
||||
try:
|
||||
iter(coordinates)
|
||||
except TypeError:
|
||||
# likely not an iterable
|
||||
# this is a check that rtree does, we mimic it
|
||||
# to ensure a useful failure message
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
# need to convert tuple of bounds to a geometry object
|
||||
if len(coordinates) == 4:
|
||||
indexes = self._tree.query(shapely.box(*coordinates))
|
||||
elif len(coordinates) == 2:
|
||||
indexes = self._tree.query(shapely.points(*coordinates))
|
||||
else:
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
return indexes
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""Size of the spatial index
|
||||
|
||||
Number of leaves (input geometries) in the index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.size
|
||||
10
|
||||
"""
|
||||
return len(self._tree)
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
"""Check if the spatial index is empty
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.is_empty
|
||||
False
|
||||
|
||||
>>> s2 = geopandas.GeoSeries()
|
||||
>>> s2.sindex.is_empty
|
||||
True
|
||||
"""
|
||||
return len(self._tree) == 0
|
||||
|
||||
def __len__(self):
|
||||
return len(self._tree)
|
||||
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
Testing functionality for geopandas objects.
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import GeometryDtype
|
||||
|
||||
|
||||
def _isna(this):
|
||||
"""isna version that works for both scalars and (Geo)Series"""
|
||||
with warnings.catch_warnings():
|
||||
# GeoSeries.isna will raise a warning about no longer returning True
|
||||
# for empty geometries. This helper is used below always in combination
|
||||
# with an is_empty check to preserve behaviour, and thus we ignore the
|
||||
# warning here to avoid it bubbling up to the user
|
||||
warnings.filterwarnings(
|
||||
"ignore", r"GeoSeries.isna\(\) previously returned", UserWarning
|
||||
)
|
||||
if hasattr(this, "isna"):
|
||||
return this.isna()
|
||||
elif hasattr(this, "isnull"):
|
||||
return this.isnull()
|
||||
else:
|
||||
return pd.isnull(this)
|
||||
|
||||
|
||||
def _geom_equals_mask(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals(that)
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_equals(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return _geom_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def _geom_almost_equals_mask(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left almost equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals_exact(that, tolerance=0.5 * 10 ** (-6))
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_almost_equals(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
property)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left almost equal geometries in right
|
||||
"""
|
||||
if isinstance(this, GeoDataFrame) and isinstance(that, GeoDataFrame):
|
||||
this = this.geometry
|
||||
that = that.geometry
|
||||
|
||||
return _geom_almost_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def assert_geoseries_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type=False,
|
||||
check_series_type=True,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Test util for checking that two GeoSeries are equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoSeries
|
||||
check_dtype : bool, default False
|
||||
If True, check geo dtype [only included so it's a drop-in replacement
|
||||
for assert_series_equal].
|
||||
check_index_type : bool, default False
|
||||
Check that index types are equal.
|
||||
check_series_type : bool, default True
|
||||
Check that both are same type (*and* are GeoSeries). If False,
|
||||
will attempt to convert both into GeoSeries.
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact with relative error of 0.5e-6.
|
||||
If False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_series_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
assert len(left) == len(right), "%d != %d" % (len(left), len(right))
|
||||
|
||||
if check_dtype:
|
||||
msg = "dtype should be a GeometryDtype, got {0}"
|
||||
assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
|
||||
if check_index_type:
|
||||
assert isinstance(left.index, type(right.index))
|
||||
|
||||
if check_series_type:
|
||||
assert isinstance(left, GeoSeries)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoSeries):
|
||||
left = GeoSeries(left)
|
||||
if not isinstance(right, GeoSeries):
|
||||
right = GeoSeries(right, index=left.index)
|
||||
|
||||
assert left.index.equals(right.index), "index: %s != %s" % (left.index, right.index)
|
||||
|
||||
if check_geom_type:
|
||||
assert (left.geom_type == right.geom_type).all(), "type: %s != %s" % (
|
||||
left.geom_type,
|
||||
right.geom_type,
|
||||
)
|
||||
|
||||
if normalize:
|
||||
left = GeoSeries(left.array.normalize())
|
||||
right = GeoSeries(right.array.normalize())
|
||||
|
||||
if not check_crs:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", "CRS mismatch", UserWarning)
|
||||
_check_equality(left, right, check_less_precise)
|
||||
else:
|
||||
_check_equality(left, right, check_less_precise)
|
||||
|
||||
|
||||
def _truncated_string(geom):
|
||||
"""Truncated WKT repr of geom"""
|
||||
s = str(geom)
|
||||
if len(s) > 100:
|
||||
return s[:100] + "..."
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def _check_equality(left, right, check_less_precise):
|
||||
assert_error_message = (
|
||||
"{0} out of {1} geometries are not {3}equal.\n"
|
||||
"Indices where geometries are not {3}equal: {2} \n"
|
||||
"The first not {3}equal geometry:\n"
|
||||
"Left: {4}\n"
|
||||
"Right: {5}\n"
|
||||
)
|
||||
if check_less_precise:
|
||||
precise = "almost "
|
||||
equal = _geom_almost_equals_mask(left, right)
|
||||
else:
|
||||
precise = ""
|
||||
equal = _geom_equals_mask(left, right)
|
||||
|
||||
if not equal.all():
|
||||
unequal_left_geoms = left[~equal]
|
||||
unequal_right_geoms = right[~equal]
|
||||
raise AssertionError(
|
||||
assert_error_message.format(
|
||||
len(unequal_left_geoms),
|
||||
len(left),
|
||||
unequal_left_geoms.index.to_list(),
|
||||
precise,
|
||||
_truncated_string(unequal_left_geoms.iloc[0]),
|
||||
_truncated_string(unequal_right_geoms.iloc[0]),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def assert_geodataframe_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type="equiv",
|
||||
check_column_type="equiv",
|
||||
check_frame_type=True,
|
||||
check_like=False,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Check that two GeoDataFrames are equal/
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoDataFrames
|
||||
check_dtype : bool, default True
|
||||
Whether to check the DataFrame dtype is identical.
|
||||
check_index_type, check_column_type : bool, default 'equiv'
|
||||
Check that index types are equal.
|
||||
check_frame_type : bool, default True
|
||||
Check that both are same type (*and* are GeoDataFrames). If False,
|
||||
will attempt to convert both into GeoDataFrame.
|
||||
check_like : bool, default False
|
||||
If true, ignore the order of rows & columns
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact. if False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_frame_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
try:
|
||||
# added from pandas 0.20
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal
|
||||
except ImportError:
|
||||
from pandas.util.testing import assert_frame_equal, assert_index_equal
|
||||
|
||||
# instance validation
|
||||
if check_frame_type:
|
||||
assert isinstance(left, GeoDataFrame)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
# allow if neither left and right has an active geometry column
|
||||
if (
|
||||
left._geometry_column_name is None
|
||||
and right._geometry_column_name is None
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
left._geometry_column_name not in left.columns
|
||||
and right._geometry_column_name not in right.columns
|
||||
):
|
||||
pass
|
||||
# no crs can be either None or {}
|
||||
elif not left.crs and not right.crs:
|
||||
pass
|
||||
else:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoDataFrame):
|
||||
left = GeoDataFrame(left)
|
||||
if not isinstance(right, GeoDataFrame):
|
||||
right = GeoDataFrame(right)
|
||||
|
||||
# shape comparison
|
||||
assert left.shape == right.shape, (
|
||||
"GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n"
|
||||
"Left columns: {lcols!r}, right columns: {rcols!r}"
|
||||
).format(
|
||||
lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns
|
||||
)
|
||||
|
||||
if check_like:
|
||||
left = left.reindex_like(right)
|
||||
|
||||
# column comparison
|
||||
assert_index_equal(
|
||||
left.columns, right.columns, exact=check_column_type, obj="GeoDataFrame.columns"
|
||||
)
|
||||
|
||||
# geometry comparison
|
||||
for col, dtype in left.dtypes.items():
|
||||
if isinstance(dtype, GeometryDtype):
|
||||
assert_geoseries_equal(
|
||||
left[col],
|
||||
right[col],
|
||||
normalize=normalize,
|
||||
check_dtype=check_dtype,
|
||||
check_less_precise=check_less_precise,
|
||||
check_geom_type=check_geom_type,
|
||||
check_crs=check_crs,
|
||||
)
|
||||
|
||||
# ensure the active geometry column is the same
|
||||
assert left._geometry_column_name == right._geometry_column_name
|
||||
|
||||
# drop geometries and check remaining columns
|
||||
left2 = left.select_dtypes(exclude="geometry")
|
||||
right2 = right.select_dtypes(exclude="geometry")
|
||||
assert_frame_equal(
|
||||
left2,
|
||||
right2,
|
||||
check_dtype=check_dtype,
|
||||
check_index_type=check_index_type,
|
||||
check_column_type=check_column_type,
|
||||
obj="GeoDataFrame",
|
||||
)
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
|
||||
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def test_no_additional_imports():
|
||||
# test that 'import geopandas' does not import any of the optional or
|
||||
# development dependencies
|
||||
blacklist = {
|
||||
"pytest",
|
||||
"py",
|
||||
"ipython",
|
||||
# fiona actually gets imported if installed (but error suppressed until used)
|
||||
# "fiona",
|
||||
# "matplotlib", # matplotlib gets imported by pandas, see below
|
||||
"mapclassify",
|
||||
"sqlalchemy",
|
||||
"psycopg",
|
||||
"psycopg2",
|
||||
"geopy",
|
||||
"geoalchemy2",
|
||||
"matplotlib",
|
||||
}
|
||||
|
||||
code = """
|
||||
import sys
|
||||
import geopandas
|
||||
blacklist = {0!r}
|
||||
|
||||
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
|
||||
if mods:
|
||||
sys.stderr.write('err: geopandas should not import: {{}}'.format(', '.join(mods)))
|
||||
sys.exit(len(mods))
|
||||
""".format(
|
||||
blacklist
|
||||
)
|
||||
call = [sys.executable, "-c", code]
|
||||
returncode = subprocess.run(call, check=False).returncode
|
||||
assert returncode == 0
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
from geopandas._compat import import_optional_dependency
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_import_optional_dependency_present():
|
||||
# pandas is not optional, but we know it is present
|
||||
pandas = import_optional_dependency("pandas")
|
||||
assert pandas is not None
|
||||
|
||||
# module imported normally must be same
|
||||
import pandas as pd
|
||||
|
||||
assert pandas == pd
|
||||
|
||||
|
||||
def test_import_optional_dependency_absent():
|
||||
with pytest.raises(ImportError, match="Missing optional dependency 'foo'"):
|
||||
import_optional_dependency("foo")
|
||||
|
||||
with pytest.raises(ImportError, match="foo is required"):
|
||||
import_optional_dependency("foo", extra="foo is required")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}]
|
||||
)
|
||||
def test_import_optional_dependency_invalid(bad_import):
|
||||
with pytest.raises(ValueError, match="Invalid module name"):
|
||||
import_optional_dependency(bad_import)
|
||||
@@ -0,0 +1,47 @@
|
||||
import geopandas
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_options():
|
||||
assert "display_precision: " in repr(geopandas.options)
|
||||
|
||||
assert set(dir(geopandas.options)) == {
|
||||
"display_precision",
|
||||
"use_pygeos",
|
||||
"io_engine",
|
||||
}
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
geopandas.options.non_existing_option
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
geopandas.options.non_existing_option = 10
|
||||
|
||||
|
||||
def test_options_display_precision():
|
||||
assert geopandas.options.display_precision is None
|
||||
geopandas.options.display_precision = 5
|
||||
assert geopandas.options.display_precision == 5
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.display_precision = "abc"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.display_precision = -1
|
||||
|
||||
geopandas.options.display_precision = None
|
||||
|
||||
|
||||
def test_options_io_engine():
|
||||
assert geopandas.options.io_engine is None
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
assert geopandas.options.io_engine == "pyogrio"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.io_engine = "abc"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.io_engine = -1
|
||||
|
||||
geopandas.options.io_engine = None
|
||||
@@ -0,0 +1,747 @@
|
||||
import random
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import LineString, Point, Polygon
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries, points_from_xy, read_file
|
||||
from geopandas.array import GeometryArray, from_shapely, from_wkb, from_wkt
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
|
||||
def _create_df(x, y=None, crs=None):
|
||||
y = y or x
|
||||
x = np.asarray(x)
|
||||
y = np.asarray(y)
|
||||
|
||||
return GeoDataFrame(
|
||||
{"geometry": points_from_xy(x, y), "value1": x + y, "value2": x * y}, crs=crs
|
||||
)
|
||||
|
||||
|
||||
def df_epsg26918():
|
||||
# EPSG:26918
|
||||
# Center coordinates
|
||||
# -1683723.64 6689139.23
|
||||
return _create_df(
|
||||
x=range(-1683723, -1683723 + 10, 1),
|
||||
y=range(6689139, 6689139 + 10, 1),
|
||||
crs="epsg:26918",
|
||||
)
|
||||
|
||||
|
||||
def test_to_crs_transform():
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_transform__missing_data():
|
||||
# https://github.com/geopandas/geopandas/issues/1573
|
||||
df = df_epsg26918()
|
||||
df.loc[3, "geometry"] = None
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_transform__empty_data():
|
||||
df = df_epsg26918().iloc[:0]
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_inplace():
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
df.to_crs(epsg=4326, inplace=True)
|
||||
assert_geodataframe_equal(df, lonlat, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_geo_column_name():
|
||||
# Test to_crs() with different geometry column name (GH#339)
|
||||
df = df_epsg26918()
|
||||
df = df.rename(columns={"geometry": "geom"})
|
||||
df.set_geometry("geom", inplace=True)
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert lonlat.geometry.name == "geom"
|
||||
assert utm.geometry.name == "geom"
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_dimension_z():
|
||||
# preserve z dimension
|
||||
arr = points_from_xy([1, 2], [2, 3], [3, 4], crs=4326)
|
||||
assert arr.has_z.all()
|
||||
result = arr.to_crs(epsg=3857)
|
||||
assert result.has_z.all()
|
||||
|
||||
|
||||
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
|
||||
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
|
||||
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
|
||||
def test_to_crs_dimension_mixed():
|
||||
s = GeoSeries([Point(1, 2), LineString([(1, 2, 3), (4, 5, 6)])], crs=2056)
|
||||
result = s.to_crs(epsg=4326)
|
||||
assert not result[0].is_empty
|
||||
assert result.has_z.tolist() == [False, True]
|
||||
roundtrip = result.to_crs(epsg=2056)
|
||||
# TODO replace with assert_geoseries_equal once we expose tolerance keyword
|
||||
# assert_geoseries_equal(roundtrip, s, check_less_precise=True)
|
||||
for a, b in zip(roundtrip, s):
|
||||
np.testing.assert_allclose(a.coords[:], b.coords[:], atol=0.01)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Test different supported formats for CRS specification
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
4326,
|
||||
"epsg:4326",
|
||||
pytest.param(
|
||||
{"init": "epsg:4326"},
|
||||
),
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs",
|
||||
{"proj": "latlong", "ellps": "WGS84", "datum": "WGS84", "no_defs": True},
|
||||
],
|
||||
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
|
||||
)
|
||||
def epsg4326(request):
|
||||
if isinstance(request.param, int):
|
||||
return {"epsg": request.param}
|
||||
return {"crs": request.param}
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
26918,
|
||||
"epsg:26918",
|
||||
pytest.param(
|
||||
{"init": "epsg:26918", "no_defs": True},
|
||||
),
|
||||
"+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ",
|
||||
{"proj": "utm", "zone": 18, "datum": "NAD83", "units": "m", "no_defs": True},
|
||||
],
|
||||
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
|
||||
)
|
||||
def epsg26918(request):
|
||||
if isinstance(request.param, int):
|
||||
return {"epsg": request.param}
|
||||
return {"crs": request.param}
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning")
|
||||
@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning")
|
||||
def test_transform2(epsg4326, epsg26918):
|
||||
# with PROJ >= 7, the transformation using EPSG code vs proj4 string is
|
||||
# slightly different due to use of grid files or not -> turn off network
|
||||
# to not use grid files at all for this test
|
||||
pyproj.network.set_network_enabled(False)
|
||||
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(**epsg4326)
|
||||
utm = lonlat.to_crs(**epsg26918)
|
||||
# can't check for CRS equality, as the formats differ although representing
|
||||
# the same CRS
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True, check_crs=False)
|
||||
|
||||
|
||||
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
|
||||
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
|
||||
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
|
||||
def test_crs_axis_order__always_xy():
|
||||
df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918")
|
||||
lonlat = df.to_crs("epsg:4326")
|
||||
test_lonlat = GeoDataFrame(
|
||||
geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326"
|
||||
)
|
||||
assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
|
||||
|
||||
|
||||
def test_skip_exact_same():
|
||||
df = df_epsg26918()
|
||||
utm = df.to_crs(df.crs)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
# Test CRS on GeometryArray level
|
||||
class TestGeometryArrayCRS:
|
||||
def setup_method(self):
|
||||
self.osgb = pyproj.CRS(27700)
|
||||
self.wgs = pyproj.CRS(4326)
|
||||
|
||||
self.geoms = [Point(0, 0), Point(1, 1)]
|
||||
self.polys = [
|
||||
Polygon([(random.random(), random.random()) for i in range(3)])
|
||||
for _ in range(10)
|
||||
]
|
||||
self.arr = from_shapely(self.polys, crs=27700)
|
||||
|
||||
def test_array(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
arr.crs = 27700
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = GeometryArray(arr)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = GeometryArray(arr, crs=4326)
|
||||
assert arr.crs == self.wgs
|
||||
|
||||
def test_series(self):
|
||||
s = GeoSeries(crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
# manually change CRS
|
||||
s = s.set_crs(4326, allow_override=True)
|
||||
assert s.crs == self.wgs
|
||||
assert s.values.crs == self.wgs
|
||||
|
||||
s = GeoSeries(self.geoms, crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(arr)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="CRS mismatch between CRS of the passed geometries and 'crs'",
|
||||
):
|
||||
s = GeoSeries(arr, crs=4326)
|
||||
assert s.crs == self.osgb
|
||||
|
||||
def test_dataframe(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame(geometry=arr)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame(geometry=s)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# different passed CRS than array CRS is now an error
|
||||
match_str = "CRS mismatch between CRS of the passed geometries and 'crs'"
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
df = GeoDataFrame(geometry=s, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame(geometry=s, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame(df, crs=4326).crs
|
||||
|
||||
# manually change CRS
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame(geometry=s)
|
||||
df = df.set_crs(crs="epsg:4326", allow_override=True)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
GeoDataFrame(self.geoms, columns=["geom"], crs=27700)
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
GeoDataFrame(crs=27700)
|
||||
|
||||
df = GeoDataFrame(self.geoms, columns=["geom"])
|
||||
df = df.set_geometry("geom", crs=27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.geom.crs == self.osgb
|
||||
assert df.geom.values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame(geometry=self.geoms, crs=27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# new geometry with set CRS has priority over GDF CRS
|
||||
df = GeoDataFrame(geometry=self.geoms, crs=27700)
|
||||
df = df.set_geometry(self.geoms, crs=4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
df = df.set_geometry(s)
|
||||
assert df._geometry_column_name == "geometry"
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
df = df.set_geometry(arr)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
arr = from_shapely(self.geoms, crs=4326)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr})
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
# geometry column name None on init
|
||||
df = GeoDataFrame({"geometry": [0, 1]})
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Assigning CRS to a GeoDataFrame without a geometry",
|
||||
):
|
||||
df.crs = 27700
|
||||
|
||||
# geometry column without geometry
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", "Geometry column does not contain geometry", UserWarning
|
||||
)
|
||||
df = GeoDataFrame({"geometry": [Point(0, 1)]}).assign(geometry=[0])
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Assigning CRS to a GeoDataFrame without an active geometry",
|
||||
):
|
||||
df.crs = 27700
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match="The CRS attribute of a GeoDataFrame without an active",
|
||||
):
|
||||
assert df.crs == self.osgb
|
||||
|
||||
def test_dataframe_getitem_without_geometry_column(self):
|
||||
df = GeoDataFrame({"col": range(10)}, geometry=self.arr)
|
||||
df["geom2"] = df.geometry.centroid
|
||||
subset = df[["col", "geom2"]]
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match="The CRS attribute of a GeoDataFrame without an active",
|
||||
):
|
||||
assert subset.crs == self.osgb
|
||||
|
||||
def test_dataframe_setitem(self):
|
||||
# new geometry CRS has priority over GDF CRS
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = s
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = arr
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# test to_crs case (GH1960)
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
df["geometry"] = df["geometry"].to_crs(27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# test changing geometry crs not in the geometry column doesn't change the crs
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame(
|
||||
{"col1": [1, 2], "geometry": arr, "other_geom": arr}, crs=4326
|
||||
)
|
||||
df["other_geom"] = from_shapely(self.geoms, crs=27700)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df["geometry"].crs == self.wgs
|
||||
assert df["other_geom"].crs == self.osgb
|
||||
|
||||
def test_dataframe_setitem_without_geometry_column(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
|
||||
# override geometry with non geometry
|
||||
with pytest.warns(UserWarning):
|
||||
df["geometry"] = 1
|
||||
|
||||
# assigning a list of geometry object doesn't have cached access to 4326
|
||||
df["geometry"] = self.geoms
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])]
|
||||
)
|
||||
def test_scalar(self, scalar):
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = scalar
|
||||
df = df.set_crs(4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Accessing CRS")
|
||||
def test_crs_with_no_geom_fails(self):
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
df = GeoDataFrame()
|
||||
df.crs = 4326
|
||||
|
||||
def test_read_file(self, nybb_filename):
|
||||
df = read_file(nybb_filename)
|
||||
assert df.crs == pyproj.CRS(2263)
|
||||
assert df.geometry.crs == pyproj.CRS(2263)
|
||||
assert df.geometry.values.crs == pyproj.CRS(2263)
|
||||
|
||||
def test_multiple_geoms(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.col1.crs == self.wgs
|
||||
assert df.col1.values.crs == self.wgs
|
||||
|
||||
def test_multiple_geoms_set_geom(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df = df.set_geometry("col1")
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
assert df["geometry"].crs == self.osgb
|
||||
assert df["geometry"].values.crs == self.osgb
|
||||
|
||||
def test_assign_cols(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df["geom2"] = s
|
||||
df["geom3"] = s.values
|
||||
df["geom4"] = from_shapely(self.geoms)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.geom2.crs == self.wgs
|
||||
assert df.geom2.values.crs == self.wgs
|
||||
assert df.geom3.crs == self.wgs
|
||||
assert df.geom3.values.crs == self.wgs
|
||||
assert df.geom4.crs is None
|
||||
assert df.geom4.values.crs is None
|
||||
|
||||
def test_copy(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
|
||||
arr_copy = arr.copy()
|
||||
assert arr_copy.crs == arr.crs
|
||||
|
||||
s_copy = s.copy()
|
||||
assert s_copy.crs == s.crs
|
||||
assert s_copy.values.crs == s.values.crs
|
||||
|
||||
df_copy = df.copy()
|
||||
assert df_copy.crs == df.crs
|
||||
assert df_copy.geometry.crs == df.geometry.crs
|
||||
assert df_copy.geometry.values.crs == df.geometry.values.crs
|
||||
assert df_copy.col1.crs == df.col1.crs
|
||||
assert df_copy.col1.values.crs == df.col1.values.crs
|
||||
|
||||
def test_rename(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df = df.rename_geometry("geom2")
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df = df.rename(columns={"col1": "column1"})
|
||||
assert df.column1.crs == self.wgs
|
||||
assert df.column1.values.crs == self.wgs
|
||||
|
||||
def test_geoseries_to_crs(self):
|
||||
s = GeoSeries(self.geoms, crs=27700)
|
||||
s = s.to_crs(4326)
|
||||
assert s.crs == self.wgs
|
||||
assert s.values.crs == self.wgs
|
||||
|
||||
df = GeoDataFrame(geometry=s)
|
||||
assert df.crs == self.wgs
|
||||
df = df.to_crs(27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# make sure that only active geometry is transformed
|
||||
arr = from_shapely(self.geoms, crs=4326)
|
||||
df["col1"] = arr
|
||||
df = df.to_crs(3857)
|
||||
assert df.col1.crs == self.wgs
|
||||
assert df.col1.values.crs == self.wgs
|
||||
|
||||
def test_array_to_crs(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
arr = arr.to_crs(4326)
|
||||
assert arr.crs == self.wgs
|
||||
|
||||
def test_from_shapely(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_from_wkb(self):
|
||||
L_wkb = [p.wkb for p in self.geoms]
|
||||
arr = from_wkb(L_wkb, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_from_wkt(self):
|
||||
L_wkt = [p.wkt for p in self.geoms]
|
||||
arr = from_wkt(L_wkt, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_points_from_xy(self):
|
||||
df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
|
||||
arr = points_from_xy(df["x"], df["y"], crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
# setting CRS in GeoSeries should not set it in passed array without CRS
|
||||
def test_original(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
assert arr.crs is None
|
||||
assert s.crs == self.osgb
|
||||
|
||||
def test_ops(self):
|
||||
arr = self.arr
|
||||
bound = arr.boundary
|
||||
assert bound.crs == self.osgb
|
||||
|
||||
cent = arr.centroid
|
||||
assert cent.crs == self.osgb
|
||||
|
||||
hull = arr.convex_hull
|
||||
assert hull.crs == self.osgb
|
||||
|
||||
envelope = arr.envelope
|
||||
assert envelope.crs == self.osgb
|
||||
|
||||
exterior = arr.exterior
|
||||
assert exterior.crs == self.osgb
|
||||
|
||||
representative_point = arr.representative_point()
|
||||
assert representative_point.crs == self.osgb
|
||||
|
||||
def test_binary_ops(self):
|
||||
arr = self.arr
|
||||
quads = []
|
||||
while len(quads) < 10:
|
||||
geom = Polygon([(random.random(), random.random()) for i in range(4)])
|
||||
if geom.is_valid:
|
||||
quads.append(geom)
|
||||
|
||||
arr2 = from_shapely(quads, crs=27700)
|
||||
|
||||
difference = arr.difference(arr2)
|
||||
assert difference.crs == self.osgb
|
||||
|
||||
intersection = arr.intersection(arr2)
|
||||
assert intersection.crs == self.osgb
|
||||
|
||||
symmetric_difference = arr.symmetric_difference(arr2)
|
||||
assert symmetric_difference.crs == self.osgb
|
||||
|
||||
union = arr.union(arr2)
|
||||
assert union.crs == self.osgb
|
||||
|
||||
def test_other(self):
|
||||
arr = self.arr
|
||||
|
||||
buffer = arr.buffer(5)
|
||||
assert buffer.crs == self.osgb
|
||||
|
||||
interpolate = arr.exterior.interpolate(0.1)
|
||||
assert interpolate.crs == self.osgb
|
||||
|
||||
simplify = arr.simplify(5)
|
||||
assert simplify.crs == self.osgb
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr, arg",
|
||||
[
|
||||
("affine_transform", ([0, 1, 1, 0, 0, 0],)),
|
||||
("translate", ()),
|
||||
("rotate", (10,)),
|
||||
("scale", ()),
|
||||
("skew", ()),
|
||||
],
|
||||
)
|
||||
def test_affinity_methods(self, attr, arg):
|
||||
result = getattr(self.arr, attr)(*arg)
|
||||
|
||||
assert result.crs == self.osgb
|
||||
|
||||
def test_slice(self):
|
||||
s = GeoSeries(self.arr, crs=27700)
|
||||
assert s.iloc[1:].values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame({"col1": self.arr}, geometry=s)
|
||||
assert df.iloc[1:].geometry.values.crs == self.osgb
|
||||
assert df.iloc[1:].col1.values.crs == self.osgb
|
||||
|
||||
def test_concat(self):
|
||||
s = GeoSeries(self.arr, crs=27700)
|
||||
assert pd.concat([s, s]).values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s)
|
||||
assert pd.concat([df, df]).geometry.values.crs == self.osgb
|
||||
assert pd.concat([df, df]).col1.values.crs == self.wgs
|
||||
|
||||
def test_merge(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame({"col1": s}, geometry=arr)
|
||||
df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom")
|
||||
merged = df.merge(df2, left_index=True, right_index=True)
|
||||
assert merged.col1.values.crs == self.wgs
|
||||
assert merged.geometry.values.crs == self.osgb
|
||||
assert merged.col2.values.crs == self.wgs
|
||||
assert merged.geom.values.crs == self.osgb
|
||||
assert merged.crs == self.osgb
|
||||
|
||||
# make sure that geometry column from list has CRS (__setitem__)
|
||||
def test_setitem_geometry(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
|
||||
df["geometry"] = list(df.geometry)
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
df2["geometry"] = from_shapely(self.geoms, crs=4326)
|
||||
assert df2.geometry.values.crs == self.wgs
|
||||
|
||||
def test_astype(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
df2 = df.astype({"col1": str})
|
||||
assert df2.crs == self.osgb
|
||||
|
||||
def test_apply(self):
|
||||
s = GeoSeries(self.arr)
|
||||
assert s.crs == 27700
|
||||
|
||||
# apply preserves the CRS if the result is a GeoSeries
|
||||
result = s.apply(lambda x: x.centroid)
|
||||
assert result.crs == 27700
|
||||
|
||||
def test_apply_geodataframe(self):
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700)
|
||||
assert df.crs == 27700
|
||||
|
||||
# apply preserves the CRS if the result is a GeoDataFrame
|
||||
result = df.apply(lambda col: col, axis=0)
|
||||
assert result.crs == 27700
|
||||
result = df.apply(lambda row: row, axis=1)
|
||||
assert result.crs == 27700
|
||||
|
||||
|
||||
class TestSetCRS:
|
||||
@pytest.mark.parametrize(
|
||||
"constructor",
|
||||
[
|
||||
lambda geoms, crs: GeoSeries(geoms, crs=crs),
|
||||
lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs),
|
||||
],
|
||||
ids=["geoseries", "geodataframe"],
|
||||
)
|
||||
def test_set_crs(self, constructor):
|
||||
naive = constructor([Point(0, 0), Point(1, 1)], crs=None)
|
||||
assert naive.crs is None
|
||||
|
||||
# by default returns a copy
|
||||
result = naive.set_crs(crs="EPSG:4326")
|
||||
assert result.crs == "EPSG:4326"
|
||||
assert naive.crs is None
|
||||
|
||||
result = naive.set_crs(epsg=4326)
|
||||
assert result.crs == "EPSG:4326"
|
||||
assert naive.crs is None
|
||||
|
||||
# with inplace=True
|
||||
result = naive.set_crs(crs="EPSG:4326", inplace=True)
|
||||
assert result is naive
|
||||
assert result.crs == naive.crs == "EPSG:4326"
|
||||
|
||||
# raise for non-naive when crs would be overridden
|
||||
non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326")
|
||||
assert non_naive.crs == "EPSG:4326"
|
||||
with pytest.raises(ValueError, match="already has a CRS"):
|
||||
non_naive.set_crs("EPSG:3857")
|
||||
|
||||
# allow for equal crs
|
||||
result = non_naive.set_crs("EPSG:4326")
|
||||
assert result.crs == "EPSG:4326"
|
||||
|
||||
# replace with allow_override=True
|
||||
result = non_naive.set_crs("EPSG:3857", allow_override=True)
|
||||
assert non_naive.crs == "EPSG:4326"
|
||||
assert result.crs == "EPSG:3857"
|
||||
|
||||
result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True)
|
||||
assert non_naive.crs == "EPSG:3857"
|
||||
assert result.crs == "EPSG:3857"
|
||||
|
||||
# set CRS to None
|
||||
result = non_naive.set_crs(crs=None, allow_override=True)
|
||||
assert result.crs is None
|
||||
assert non_naive.crs == "EPSG:3857"
|
||||
@@ -0,0 +1,15 @@
|
||||
from geopandas import GeoDataFrame, read_file
|
||||
from geopandas.datasets import get_path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb", "foo"]
|
||||
)
|
||||
def test_read_paths(test_dataset):
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=r"The geopandas\.dataset has been deprecated and was removed",
|
||||
):
|
||||
assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
|
||||
@@ -0,0 +1,87 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from geopandas._decorator import doc
|
||||
|
||||
|
||||
@doc(method="cumsum", operation="sum")
|
||||
def cumsum(whatever):
|
||||
"""
|
||||
This is the {method} method.
|
||||
|
||||
It computes the cumulative {operation}.
|
||||
"""
|
||||
|
||||
|
||||
@doc(
|
||||
cumsum,
|
||||
dedent(
|
||||
"""
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> cumavg([1, 2, 3])
|
||||
2
|
||||
"""
|
||||
),
|
||||
method="cumavg",
|
||||
operation="average",
|
||||
)
|
||||
def cumavg(whatever): ...
|
||||
|
||||
|
||||
@doc(cumsum, method="cummax", operation="maximum")
|
||||
def cummax(whatever): ...
|
||||
|
||||
|
||||
@doc(cummax, method="cummin", operation="minimum")
|
||||
def cummin(whatever): ...
|
||||
|
||||
|
||||
def test_docstring_formatting():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cumsum method.
|
||||
|
||||
It computes the cumulative sum.
|
||||
"""
|
||||
)
|
||||
assert cumsum.__doc__ == docstr
|
||||
|
||||
|
||||
def test_docstring_appending():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cumavg method.
|
||||
|
||||
It computes the cumulative average.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> cumavg([1, 2, 3])
|
||||
2
|
||||
"""
|
||||
)
|
||||
assert cumavg.__doc__ == docstr
|
||||
|
||||
|
||||
def test_doc_template_from_func():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cummax method.
|
||||
|
||||
It computes the cumulative maximum.
|
||||
"""
|
||||
)
|
||||
assert cummax.__doc__ == docstr
|
||||
|
||||
|
||||
def test_inherit_doc_template():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cummin method.
|
||||
|
||||
It computes the cumulative minimum.
|
||||
"""
|
||||
)
|
||||
assert cummin.__doc__ == docstr
|
||||
@@ -0,0 +1,372 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_15, PANDAS_GE_20, PANDAS_GE_30
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, geom_almost_equals
|
||||
from pandas.testing import assert_frame_equal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nybb_polydf(nybb_filename):
|
||||
nybb_polydf = read_file(nybb_filename)
|
||||
nybb_polydf = nybb_polydf[["geometry", "BoroName", "BoroCode"]]
|
||||
nybb_polydf = nybb_polydf.rename(columns={"geometry": "myshapes"})
|
||||
nybb_polydf = nybb_polydf.set_geometry("myshapes")
|
||||
nybb_polydf["manhattan_bronx"] = 5
|
||||
nybb_polydf.loc[3:4, "manhattan_bronx"] = 6
|
||||
nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64")
|
||||
return nybb_polydf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def merged_shapes(nybb_polydf):
|
||||
# Merged geometry
|
||||
manhattan_bronx = nybb_polydf.loc[3:4]
|
||||
others = nybb_polydf.loc[0:2]
|
||||
|
||||
collapsed = [others.geometry.union_all(), manhattan_bronx.geometry.union_all()]
|
||||
merged_shapes = GeoDataFrame(
|
||||
{"myshapes": collapsed},
|
||||
geometry="myshapes",
|
||||
index=pd.Index([5, 6], name="manhattan_bronx"),
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
|
||||
return merged_shapes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def first(merged_shapes):
|
||||
first = merged_shapes.copy()
|
||||
first["BoroName"] = ["Staten Island", "Manhattan"]
|
||||
first["BoroCode"] = [5, 1]
|
||||
return first
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_mean(merged_shapes):
|
||||
test_mean = merged_shapes.copy()
|
||||
test_mean["BoroCode"] = [4, 1.5]
|
||||
return test_mean
|
||||
|
||||
|
||||
def test_geom_dissolve(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.geometry.name == "myshapes"
|
||||
assert geom_almost_equals(test, first)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_dissolve_retains_existing_crs(nybb_polydf):
|
||||
assert nybb_polydf.crs is not None
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.crs is not None
|
||||
|
||||
|
||||
def test_dissolve_retains_nonexisting_crs(nybb_polydf):
|
||||
nybb_polydf.geometry.array.crs = None
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.crs is None
|
||||
|
||||
|
||||
def test_first_dissolve(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert_frame_equal(first, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_mean_dissolve(nybb_polydf, first, expected_mean):
|
||||
if not PANDAS_GE_15:
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
|
||||
elif PANDAS_GE_15 and not PANDAS_GE_20:
|
||||
with pytest.warns(FutureWarning, match=".*used in dissolve is deprecated.*"):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
|
||||
else: # pandas 2.0
|
||||
test = nybb_polydf.dissolve(
|
||||
"manhattan_bronx", aggfunc="mean", numeric_only=True
|
||||
)
|
||||
# for non pandas "mean", numeric only cannot be applied. Drop columns manually
|
||||
test2 = nybb_polydf.drop(columns=["BoroName"]).dissolve(
|
||||
"manhattan_bronx", aggfunc="mean"
|
||||
)
|
||||
|
||||
assert_frame_equal(expected_mean, test, check_column_type=False)
|
||||
assert_frame_equal(expected_mean, test2, check_column_type=False)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not PANDAS_GE_15 or PANDAS_GE_20, reason="warning for pandas 1.5.x")
|
||||
def test_mean_dissolve_warning_capture(nybb_polydf, first, expected_mean):
|
||||
with pytest.warns(
|
||||
FutureWarning,
|
||||
match=".*used in dissolve is deprecated.*",
|
||||
):
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
|
||||
# test no warning for aggfunc first which doesn't have numeric only semantics
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc="first")
|
||||
|
||||
|
||||
def test_dissolve_emits_other_warnings(nybb_polydf):
|
||||
# we only do something special for pandas 1.5.x, but expect this
|
||||
# test to be true on any version
|
||||
def sum_and_warn(group):
|
||||
warnings.warn("foo") # noqa: B028
|
||||
if PANDAS_GE_20:
|
||||
return group.sum(numeric_only=False)
|
||||
else:
|
||||
return group.sum()
|
||||
|
||||
with pytest.warns(UserWarning, match="foo"):
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc=sum_and_warn)
|
||||
|
||||
|
||||
def test_multicolumn_dissolve(nybb_polydf, first):
|
||||
multi = nybb_polydf.copy()
|
||||
multi["dup_col"] = multi.manhattan_bronx
|
||||
multi_test = multi.dissolve(["manhattan_bronx", "dup_col"], aggfunc="first")
|
||||
|
||||
first_copy = first.copy()
|
||||
first_copy["dup_col"] = first_copy.index
|
||||
first_copy = first_copy.set_index([first_copy.index, "dup_col"])
|
||||
|
||||
assert_frame_equal(multi_test, first_copy, check_column_type=False)
|
||||
|
||||
|
||||
def test_reset_index(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", as_index=False)
|
||||
comparison = first.reset_index()
|
||||
assert_frame_equal(comparison, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_none(nybb_polydf):
|
||||
test = nybb_polydf.dissolve(by=None)
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
|
||||
"BoroName": ["Staten Island"],
|
||||
"BoroCode": [5],
|
||||
"manhattan_bronx": [5],
|
||||
},
|
||||
geometry=nybb_polydf.geometry.name,
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
assert_frame_equal(expected, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_none_mean(nybb_polydf):
|
||||
test = nybb_polydf.dissolve(aggfunc="mean", numeric_only=True)
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
|
||||
"BoroCode": [3.0],
|
||||
"manhattan_bronx": [5.4],
|
||||
},
|
||||
geometry=nybb_polydf.geometry.name,
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
assert_frame_equal(expected, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_level():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, 2, 2],
|
||||
"b": [3, 4, 4, 4],
|
||||
"c": [3, 4, 5, 6],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index(["a", "b", "c"])
|
||||
|
||||
expected_a = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "MULTIPOINT (2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_b = geopandas.GeoDataFrame(
|
||||
{
|
||||
"b": [3, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "MULTIPOINT (1 1, 2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index("b")
|
||||
expected_ab = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, 2],
|
||||
"b": [3, 4, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "MULTIPOINT (2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
|
||||
assert_frame_equal(expected_a, gdf.dissolve(level=0))
|
||||
assert_frame_equal(expected_a, gdf.dissolve(level="a"))
|
||||
assert_frame_equal(expected_b, gdf.dissolve(level=1))
|
||||
assert_frame_equal(expected_b, gdf.dissolve(level="b"))
|
||||
assert_frame_equal(expected_ab, gdf.dissolve(level=[0, 1]))
|
||||
assert_frame_equal(expected_ab, gdf.dissolve(level=["a", "b"]))
|
||||
|
||||
|
||||
def test_dissolve_sort():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [2, 1, 1],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
expected_unsorted = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [2, 1],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "MULTIPOINT (1 1, 2 2)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_sorted = expected_unsorted.sort_index()
|
||||
|
||||
assert_frame_equal(expected_sorted, gdf.dissolve("a"))
|
||||
assert_frame_equal(expected_unsorted, gdf.dissolve("a", sort=False))
|
||||
|
||||
|
||||
def test_dissolve_categorical():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "a", "b", "b"]),
|
||||
"noncat": [1, 1, 1, 2],
|
||||
"to_agg": [1, 2, 3, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# when observed=False we get an additional observation
|
||||
# that wasn't in the original data
|
||||
none_val = "GEOMETRYCOLLECTION EMPTY" if PANDAS_GE_30 else None
|
||||
expected_gdf_observed_false = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "a", "b", "b"]),
|
||||
"noncat": [1, 2, 1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
[
|
||||
"MULTIPOINT (0 0, 1 1)",
|
||||
none_val,
|
||||
"POINT (2 2)",
|
||||
"POINT (3 3)",
|
||||
]
|
||||
),
|
||||
"to_agg": [1, None, 3, 4],
|
||||
}
|
||||
).set_index(["cat", "noncat"])
|
||||
|
||||
# when observed=True we do not get any additional observations
|
||||
expected_gdf_observed_true = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "b", "b"]),
|
||||
"noncat": [1, 1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
"to_agg": [1, 3, 4],
|
||||
}
|
||||
).set_index(["cat", "noncat"])
|
||||
|
||||
assert_frame_equal(expected_gdf_observed_false, gdf.dissolve(["cat", "noncat"]))
|
||||
assert_frame_equal(
|
||||
expected_gdf_observed_true, gdf.dissolve(["cat", "noncat"], observed=True)
|
||||
)
|
||||
|
||||
|
||||
def test_dissolve_dropna():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, None],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
expected_with_na = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1.0, np.nan],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_no_na = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1.0],
|
||||
"geometry": geopandas.array.from_wkt(["MULTIPOINT (0 0, 1 1)"]),
|
||||
}
|
||||
).set_index("a")
|
||||
|
||||
assert_frame_equal(expected_with_na, gdf.dissolve("a", dropna=False))
|
||||
assert_frame_equal(expected_no_na, gdf.dissolve("a"))
|
||||
|
||||
|
||||
def test_dissolve_dropna_warn(nybb_polydf):
|
||||
# No warning with default params
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
nybb_polydf.dissolve()
|
||||
|
||||
for r in record:
|
||||
assert "dropna kwarg is not supported" not in str(r.message)
|
||||
|
||||
|
||||
def test_dissolve_multi_agg(nybb_polydf, merged_shapes):
|
||||
merged_shapes[("BoroCode", "min")] = [3, 1]
|
||||
merged_shapes[("BoroCode", "max")] = [5, 2]
|
||||
merged_shapes[("BoroName", "count")] = [3, 2]
|
||||
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
test = nybb_polydf.dissolve(
|
||||
by="manhattan_bronx",
|
||||
aggfunc={
|
||||
"BoroCode": ["min", "max"],
|
||||
"BoroName": "count",
|
||||
},
|
||||
)
|
||||
assert_geodataframe_equal(test, merged_shapes)
|
||||
assert len(record) == 0
|
||||
|
||||
|
||||
def test_coverage_dissolve(nybb_polydf):
|
||||
manhattan_bronx = nybb_polydf.loc[3:4]
|
||||
others = nybb_polydf.loc[0:2]
|
||||
|
||||
collapsed = [
|
||||
others.geometry.union_all(method="coverage"),
|
||||
manhattan_bronx.geometry.union_all(method="coverage"),
|
||||
]
|
||||
merged_shapes = GeoDataFrame(
|
||||
{"myshapes": collapsed},
|
||||
geometry="myshapes",
|
||||
index=pd.Index([5, 6], name="manhattan_bronx"),
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
|
||||
merged_shapes["BoroName"] = ["Staten Island", "Manhattan"]
|
||||
merged_shapes["BoroCode"] = [5, 1]
|
||||
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", method="coverage")
|
||||
assert_frame_equal(merged_shapes, test, check_column_type=False)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite (by inheriting the pandas test suite), and should
|
||||
contain no other tests.
|
||||
Other tests (eg related to the spatial functionality or integration
|
||||
with GeoSeries/GeoDataFrame) should be added to test_array.py and others.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
A set of fixtures are defined to provide data for the tests (the fixtures
|
||||
expected to be available to pytest by the inherited pandas tests).
|
||||
|
||||
"""
|
||||
|
||||
import itertools
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base as extension_tests
|
||||
|
||||
import shapely.geometry
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas._compat import PANDAS_GE_15, PANDAS_GE_21, PANDAS_GE_22
|
||||
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
|
||||
|
||||
import pytest
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Compat with extension tests in older pandas versions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
not_yet_implemented = pytest.mark.skip(reason="Not yet implemented")
|
||||
no_minmax = pytest.mark.skip(reason="Min/max not supported")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Required fixtures
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""A fixture providing the ExtensionDtype to validate."""
|
||||
return GeometryDtype()
|
||||
|
||||
|
||||
def make_data():
|
||||
a = np.empty(100, dtype=object)
|
||||
a[:] = [shapely.geometry.Point(i, i) for i in range(100)]
|
||||
ga = from_shapely(a)
|
||||
return ga
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not be equal
|
||||
"""
|
||||
return make_data()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
"""Length-100 array in which all the elements are two."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length-2 array with [NA, Valid]"""
|
||||
return from_shapely([None, shapely.geometry.Point(1, 1)])
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_repeated(data):
|
||||
"""
|
||||
Generate many datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : fixture implementing `data`
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[int], Generator]:
|
||||
A callable that takes a `count` argument and
|
||||
returns a generator yielding `count` datasets.
|
||||
"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield data
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
return from_shapely([Point(0, 1), Point(1, 1), Point(0, 0)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
return from_shapely([Point(1, 2), None, Point(0, 0)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
"""Binary operator for comparing NA values.
|
||||
Should return a function of two arguments that returns
|
||||
True if both arguments are (scalar) NA for your type.
|
||||
By default, uses ``operator.or``
|
||||
"""
|
||||
return lambda x, y: x is None and y is None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
"""The scalar missing value for this type. Default 'None'"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
return from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(1, 1),
|
||||
None,
|
||||
None,
|
||||
shapely.geometry.Point(0, 0),
|
||||
shapely.geometry.Point(0, 0),
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(2, 2),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def box_in_series(request):
|
||||
"""Whether to box the data in a Series"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def groupby_apply_op(request):
|
||||
"""
|
||||
Functions to test groupby.apply().
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_frame(request):
|
||||
"""
|
||||
Boolean fixture to support Series and Series.to_frame() comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_series(request):
|
||||
"""
|
||||
Boolean fixture to support arr and Series(arr) comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def use_numpy(request):
|
||||
"""
|
||||
Boolean fixture to support comparison testing of ExtensionDtype array
|
||||
and numpy array.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["ffill", "bfill"])
|
||||
def fillna_method(request):
|
||||
"""
|
||||
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
|
||||
Series.fillna(method=<method>) testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_array(request):
|
||||
"""
|
||||
Boolean fixture to support ExtensionDtype _from_sequence method testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def invalid_scalar(data):
|
||||
"""
|
||||
A scalar that *cannot* be held by this ExtensionArray.
|
||||
|
||||
The default should work for most subclasses, but is not guaranteed.
|
||||
|
||||
If the array can hold any item (i.e. object dtype), then use pytest.skip.
|
||||
"""
|
||||
return object.__new__(object)
|
||||
|
||||
|
||||
# Fixtures defined in pandas/conftest.py that are also needed: defining them
|
||||
# here instead of importing for compatibility
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=["sum", "max", "min", "mean", "prod", "std", "var", "median", "kurt", "skew"]
|
||||
)
|
||||
def all_numeric_reductions(request):
|
||||
"""
|
||||
Fixture for numeric reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["all", "any"])
|
||||
def all_boolean_reductions(request):
|
||||
"""
|
||||
Fixture for boolean reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# only == and != are support for GeometryArray
|
||||
# @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
|
||||
@pytest.fixture(params=["__eq__", "__ne__"])
|
||||
def all_compare_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common compare operations
|
||||
|
||||
* >=
|
||||
* >
|
||||
* ==
|
||||
* !=
|
||||
* <
|
||||
* <=
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, lambda x: x])
|
||||
def sort_by_key(request):
|
||||
"""
|
||||
Simple fixture for testing keys in sorting methods.
|
||||
Tests None (no key) and the identity key.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Inherited tests
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDtype(extension_tests.BaseDtypeTests):
|
||||
# additional tests
|
||||
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
assert dtype.construct_array_type() is GeometryArray
|
||||
|
||||
def test_registry(self, data, dtype):
|
||||
s = pd.Series(np.asarray(data), dtype=object)
|
||||
result = s.astype("geometry")
|
||||
assert isinstance(result.array, GeometryArray)
|
||||
expected = pd.Series(data)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestInterface(extension_tests.BaseInterfaceTests):
|
||||
def test_contains(self, data, data_missing):
|
||||
# overridden due to the inconsistency between
|
||||
# GeometryDtype.na_value = np.nan
|
||||
# and None being used as NA in array
|
||||
|
||||
# ensure data without missing values
|
||||
data = data[~data.isna()]
|
||||
|
||||
# first elements are non-missing
|
||||
assert data[0] in data
|
||||
assert data_missing[0] in data_missing
|
||||
|
||||
assert None in data_missing
|
||||
assert None not in data
|
||||
assert pd.NaT not in data_missing
|
||||
|
||||
|
||||
class TestConstructors(extension_tests.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(extension_tests.BaseReshapingTests):
|
||||
|
||||
# NOTE: this test is copied from pandas/tests/extension/base/reshaping.py
|
||||
# because starting with pandas 3.0 the assert_frame_equal is strict regarding
|
||||
# the exact missing value (None vs NaN)
|
||||
# Our `result` uses None, but the way the `expected` is created results in
|
||||
# NaNs (and specifying to use None as fill value in unstack also does not
|
||||
# help)
|
||||
# -> the only change compared to the upstream test is marked
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "a", 1),
|
||||
("A", "b", 0),
|
||||
("A", "a", 0),
|
||||
("B", "a", 0),
|
||||
("B", "c", 1),
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[: len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(
|
||||
isinstance(result[col].array, type(data)) for col in result.columns
|
||||
)
|
||||
|
||||
if obj == "series":
|
||||
# We should get the same result with to_frame+unstack+droplevel
|
||||
df = ser.to_frame()
|
||||
|
||||
alt = df.unstack(level=level).droplevel(0, axis=1)
|
||||
assert_frame_equal(result, alt)
|
||||
|
||||
obj_ser = ser.astype(object)
|
||||
|
||||
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
|
||||
if obj == "series":
|
||||
assert (expected.dtypes == object).all()
|
||||
# <------------ next line is added
|
||||
expected[expected.isna()] = None
|
||||
# ------------->
|
||||
|
||||
result = result.astype(object)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitem(extension_tests.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(extension_tests.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(extension_tests.BaseMissingTests):
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
# Fill with a scalar
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
filler = pd.Series(
|
||||
from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(2, 2),
|
||||
],
|
||||
)
|
||||
)
|
||||
result = ser.fillna(filler)
|
||||
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
filler = pd.Series(
|
||||
from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(2, 2),
|
||||
shapely.geometry.Point(1, 1),
|
||||
]
|
||||
),
|
||||
index=[10, 11],
|
||||
)
|
||||
result = ser.fillna(filler)
|
||||
assert_series_equal(result, ser)
|
||||
|
||||
# More `GeoSeries.fillna` testcases are in
|
||||
# `geopandas\tests\test_pandas_methods.py::test_fillna_scalar`
|
||||
# and `geopandas\tests\test_pandas_methods.py::test_fillna_series`.
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
super().test_fillna_series_method(data_missing, fillna_method)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
super().test_fillna_no_op_returns_copy(data)
|
||||
|
||||
|
||||
if PANDAS_GE_22:
|
||||
from pandas.tests.extension.base import BaseReduceTests
|
||||
else:
|
||||
from pandas.tests.extension.base import BaseNoReduceTests as BaseReduceTests
|
||||
|
||||
|
||||
class TestReduce(BaseReduceTests):
|
||||
@pytest.mark.skip("boolean reduce (any/all) tested in test_pandas_methods")
|
||||
def test_reduce_series_boolean(self):
|
||||
pass
|
||||
|
||||
|
||||
_all_arithmetic_operators = [
|
||||
"__add__",
|
||||
"__radd__",
|
||||
# '__sub__', '__rsub__',
|
||||
"__mul__",
|
||||
"__rmul__",
|
||||
"__floordiv__",
|
||||
"__rfloordiv__",
|
||||
"__truediv__",
|
||||
"__rtruediv__",
|
||||
"__pow__",
|
||||
"__rpow__",
|
||||
"__mod__",
|
||||
"__rmod__",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_arithmetic_operators)
|
||||
def all_arithmetic_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common arithmetic operations
|
||||
|
||||
Adapted to exclude __sub__, as this is implemented as "difference".
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# an inherited test from pandas creates a Series from a list of geometries, which
|
||||
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
|
||||
)
|
||||
class TestArithmeticOps(extension_tests.BaseArithmeticOpsTests):
|
||||
@pytest.mark.skip(reason="not applicable")
|
||||
def test_divmod_series_array(self, data, data_for_twos):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="not applicable")
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
pass
|
||||
|
||||
|
||||
# an inherited test from pandas creates a Series from a list of geometries, which
|
||||
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
|
||||
)
|
||||
class TestComparisonOps(extension_tests.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = getattr(operator, op_name.strip("_"))
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, data[0])
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data))
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
|
||||
class TestMethods(extension_tests.BaseMethodsTests):
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
|
||||
)
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
|
||||
)
|
||||
def test_value_counts_with_normalize(self, data):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
super().test_sort_values_frame(data_for_sorting, ascending)
|
||||
|
||||
@pytest.mark.skip(reason="searchsorted not supported")
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
pass
|
||||
|
||||
@not_yet_implemented
|
||||
def test_combine_le(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="addition not supported")
|
||||
def test_combine_add(self):
|
||||
pass
|
||||
|
||||
@not_yet_implemented
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax_empty_array(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax_all_na(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argreduce_series(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmax_argmin_no_skipna_notimplemented(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(extension_tests.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(extension_tests.BaseGroupbyTests):
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super().test_groupby_extension_agg(as_index, data_for_grouping)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
super().test_groupby_extension_transform(data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op",
|
||||
[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def test_groupby_extension_apply(self, data_for_grouping, op):
|
||||
super().test_groupby_extension_apply(data_for_grouping, op)
|
||||
|
||||
|
||||
class TestPrinting(extension_tests.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
@not_yet_implemented
|
||||
class TestParsing(extension_tests.BaseParsingTests):
|
||||
pass
|
||||
@@ -0,0 +1,170 @@
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.tools import geocode, reverse_geocode
|
||||
from geopandas.tools.geocoding import _prepare_geocode_result
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from geopandas.tests.util import assert_geoseries_equal, mock
|
||||
from pandas.testing import assert_series_equal
|
||||
|
||||
geopy = pytest.importorskip("geopy")
|
||||
|
||||
|
||||
class ForwardMock(mock.MagicMock):
|
||||
"""
|
||||
Mock the forward geocoding function.
|
||||
Returns the passed in address and (p, p+.5) where p increases
|
||||
at each call
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._n = 0.0
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.return_value = args[0], (self._n, self._n + 0.5)
|
||||
self._n += 1
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
class ReverseMock(mock.MagicMock):
|
||||
"""
|
||||
Mock the reverse geocoding function.
|
||||
Returns the passed in point and 'address{p}' where p increases
|
||||
at each call
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._n = 0
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.return_value = "address{0}".format(self._n), args[0]
|
||||
self._n += 1
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def locations():
|
||||
locations = ["260 Broadway, New York, NY", "77 Massachusetts Ave, Cambridge, MA"]
|
||||
return locations
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def points():
|
||||
points = [Point(-71.0597732, 42.3584308), Point(-77.0365305, 38.8977332)]
|
||||
return points
|
||||
|
||||
|
||||
def test_prepare_result():
|
||||
# Calls _prepare_result with sample results from the geocoder call
|
||||
# loop
|
||||
p0 = Point(12.3, -45.6) # Treat these as lat/lon
|
||||
p1 = Point(-23.4, 56.7)
|
||||
d = {"a": ("address0", p0.coords[0]), "b": ("address1", p1.coords[0])}
|
||||
|
||||
df = _prepare_geocode_result(d)
|
||||
assert type(df) is GeoDataFrame
|
||||
if HAS_PYPROJ:
|
||||
assert df.crs == "EPSG:4326"
|
||||
assert len(df) == 2
|
||||
assert "address" in df
|
||||
|
||||
coords = df.loc["a"]["geometry"].coords[0]
|
||||
test = p0.coords[0]
|
||||
# Output from the df should be lon/lat
|
||||
assert coords[0] == pytest.approx(test[1])
|
||||
assert coords[1] == pytest.approx(test[0])
|
||||
|
||||
coords = df.loc["b"]["geometry"].coords[0]
|
||||
test = p1.coords[0]
|
||||
assert coords[0] == pytest.approx(test[1])
|
||||
assert coords[1] == pytest.approx(test[0])
|
||||
|
||||
|
||||
def test_prepare_result_none():
|
||||
p0 = Point(12.3, -45.6) # Treat these as lat/lon
|
||||
d = {"a": ("address0", p0.coords[0]), "b": (None, None)}
|
||||
|
||||
df = _prepare_geocode_result(d)
|
||||
assert type(df) is GeoDataFrame
|
||||
if HAS_PYPROJ:
|
||||
assert df.crs == "EPSG:4326"
|
||||
assert len(df) == 2
|
||||
assert "address" in df
|
||||
|
||||
row = df.loc["b"]
|
||||
|
||||
# TODO we should probably replace this with a missing value instead of point?
|
||||
assert len(row["geometry"].coords) == 0
|
||||
assert row["geometry"].is_empty
|
||||
assert row["address"] is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geocode_result", (None, (None, None)))
|
||||
def test_prepare_geocode_result_when_result_is(geocode_result):
|
||||
result = {0: geocode_result}
|
||||
expected_output = GeoDataFrame(
|
||||
{"geometry": [Point()], "address": [None]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
output = _prepare_geocode_result(result)
|
||||
|
||||
assert_geodataframe_equal(output, expected_output)
|
||||
|
||||
|
||||
def test_bad_provider_forward():
|
||||
from geopy.exc import GeocoderNotFound
|
||||
|
||||
with pytest.raises(GeocoderNotFound):
|
||||
geocode(["cambridge, ma"], "badprovider")
|
||||
|
||||
|
||||
def test_bad_provider_reverse():
|
||||
from geopy.exc import GeocoderNotFound
|
||||
|
||||
with pytest.raises(GeocoderNotFound):
|
||||
reverse_geocode([Point(0, 0)], "badprovider")
|
||||
|
||||
|
||||
def test_forward(locations, points):
|
||||
from geopy.geocoders import Photon
|
||||
|
||||
for provider in ["photon", Photon]:
|
||||
with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m:
|
||||
g = geocode(locations, provider=provider, timeout=2)
|
||||
assert len(locations) == m.call_count
|
||||
|
||||
n = len(locations)
|
||||
assert isinstance(g, GeoDataFrame)
|
||||
expected = GeoSeries(
|
||||
[Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326"
|
||||
)
|
||||
assert_geoseries_equal(expected, g["geometry"])
|
||||
assert_series_equal(g["address"], pd.Series(locations, name="address"))
|
||||
|
||||
|
||||
def test_reverse(locations, points):
|
||||
from geopy.geocoders import Photon
|
||||
|
||||
for provider in ["photon", Photon]:
|
||||
with mock.patch("geopy.geocoders.Photon.reverse", ReverseMock()) as m:
|
||||
g = reverse_geocode(points, provider=provider, timeout=2)
|
||||
assert len(points) == m.call_count
|
||||
|
||||
assert isinstance(g, GeoDataFrame)
|
||||
|
||||
expected = GeoSeries(points, crs="EPSG:4326")
|
||||
assert_geoseries_equal(expected, g["geometry"])
|
||||
address = pd.Series(
|
||||
["address" + str(x) for x in range(len(points))], name="address"
|
||||
)
|
||||
assert_series_equal(g["address"], address)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,747 @@
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import tempfile
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import (
|
||||
GeometryCollection,
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, GeoSeries, clip, read_file
|
||||
from geopandas.array import GeometryArray, GeometryDtype
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geoseries_equal, geom_almost_equals
|
||||
from geopandas.tests.util import geom_equals
|
||||
from numpy.testing import assert_array_equal
|
||||
from pandas.testing import assert_index_equal, assert_series_equal
|
||||
|
||||
|
||||
class TestSeries:
|
||||
def setup_method(self):
|
||||
self.tempdir = tempfile.mkdtemp()
|
||||
self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
|
||||
self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
|
||||
self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
||||
self.g1 = GeoSeries([self.t1, self.sq])
|
||||
self.g2 = GeoSeries([self.sq, self.t1])
|
||||
self.g3 = GeoSeries([self.t1, self.t2], crs="epsg:4326")
|
||||
self.g4 = GeoSeries([self.t2, self.t1])
|
||||
self.na = GeoSeries([self.t1, self.t2, Polygon()])
|
||||
self.na_none = GeoSeries([self.t1, self.t2, None])
|
||||
self.a1 = self.g1.copy()
|
||||
self.a1.index = ["A", "B"]
|
||||
self.a2 = self.g2.copy()
|
||||
self.a2.index = ["B", "C"]
|
||||
self.esb = Point(-73.9847, 40.7484)
|
||||
self.sol = Point(-74.0446, 40.6893)
|
||||
self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326")
|
||||
self.l1 = LineString([(0, 0), (0, 1), (1, 1)])
|
||||
self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
|
||||
self.g5 = GeoSeries([self.l1, self.l2])
|
||||
self.esb3857 = Point(-8235939.130493107, 4975301.253789809)
|
||||
self.sol3857 = Point(-8242607.167991625, 4966620.938285081)
|
||||
self.landmarks3857 = GeoSeries([self.esb3857, self.sol3857], crs="epsg:3857")
|
||||
|
||||
def teardown_method(self):
|
||||
shutil.rmtree(self.tempdir)
|
||||
|
||||
def test_copy(self):
|
||||
gc = self.g3.copy()
|
||||
assert type(gc) is GeoSeries
|
||||
assert self.g3.name == gc.name
|
||||
assert self.g3.crs == gc.crs
|
||||
|
||||
def test_in(self):
|
||||
assert self.t1 in self.g1
|
||||
assert self.sq in self.g1
|
||||
assert self.t1 in self.a1
|
||||
assert self.t2 in self.g3
|
||||
assert self.sq not in self.g3
|
||||
assert 5 not in self.g3
|
||||
|
||||
def test_align(self):
|
||||
a1, a2 = self.a1.align(self.a2)
|
||||
assert isinstance(a1, GeoSeries)
|
||||
assert isinstance(a2, GeoSeries)
|
||||
assert a2["A"] is None
|
||||
assert a1["B"].equals(a2["B"])
|
||||
assert a1["C"] is None
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_align_crs(self):
|
||||
a1 = self.a1.set_crs("epsg:4326")
|
||||
a2 = self.a2.set_crs("epsg:31370")
|
||||
|
||||
res1, res2 = a1.align(a2)
|
||||
assert res1.crs == "epsg:4326"
|
||||
assert res2.crs == "epsg:31370"
|
||||
|
||||
res1, res2 = a1.align(a2.set_crs(None, allow_override=True))
|
||||
assert res1.crs == "epsg:4326"
|
||||
assert res2.crs is None
|
||||
|
||||
def test_align_mixed(self):
|
||||
a1 = self.a1
|
||||
s2 = pd.Series([1, 2], index=["B", "C"])
|
||||
res1, res2 = a1.align(s2)
|
||||
|
||||
exp2 = pd.Series([np.nan, 1, 2], index=["A", "B", "C"])
|
||||
assert_series_equal(res2, exp2)
|
||||
|
||||
def test_warning_if_not_aligned(self):
|
||||
# GH-816
|
||||
# Test that warning is issued when operating on non-aligned series
|
||||
|
||||
# _series_op
|
||||
with pytest.warns(UserWarning, match="The indices .+ not equal"):
|
||||
self.a1.contains(self.a2)
|
||||
|
||||
# _geo_op
|
||||
with pytest.warns(UserWarning, match="The indices .+ not equal"):
|
||||
self.a1.union(self.a2)
|
||||
|
||||
def test_no_warning_if_aligned(self):
|
||||
# GH-816
|
||||
# Test that warning is not issued when operating on aligned series
|
||||
a1, a2 = self.a1.align(self.a2)
|
||||
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
a1.contains(a2) # _series_op, explicitly aligned
|
||||
self.g1.intersects(self.g2) # _series_op, implicitly aligned
|
||||
a2.union(a1) # _geo_op, explicitly aligned
|
||||
self.g2.intersection(self.g1) # _geo_op, implicitly aligned
|
||||
|
||||
user_warnings = [w for w in record if w.category is UserWarning]
|
||||
assert not user_warnings, user_warnings[0].message
|
||||
|
||||
def test_geom_equals(self):
|
||||
assert np.all(self.g1.geom_equals(self.g1))
|
||||
assert_array_equal(self.g1.geom_equals(self.sq), [False, True])
|
||||
|
||||
def test_geom_equals_align(self):
|
||||
a = self.a1.geom_equals(self.a2, align=True)
|
||||
exp = pd.Series([False, True, False], index=["A", "B", "C"])
|
||||
assert_series_equal(a, exp)
|
||||
|
||||
a = self.a1.geom_equals(self.a2, align=False)
|
||||
exp = pd.Series([False, False], index=["A", "B"])
|
||||
assert_series_equal(a, exp)
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:The 'geom_almost_equals\(\)':FutureWarning")
|
||||
def test_geom_almost_equals(self):
|
||||
# TODO: test decimal parameter
|
||||
assert np.all(self.g1.geom_almost_equals(self.g1))
|
||||
assert_array_equal(self.g1.geom_almost_equals(self.sq), [False, True])
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"The indices of the left and right GeoSeries' are not equal",
|
||||
UserWarning,
|
||||
)
|
||||
assert_array_equal(
|
||||
self.a1.geom_almost_equals(self.a2, align=True),
|
||||
[False, True, False],
|
||||
)
|
||||
assert_array_equal(
|
||||
self.a1.geom_almost_equals(self.a2, align=False), [False, False]
|
||||
)
|
||||
|
||||
def test_geom_equals_exact(self):
|
||||
# TODO: test tolerance parameter
|
||||
assert np.all(self.g1.geom_equals_exact(self.g1, 0.001))
|
||||
assert_array_equal(self.g1.geom_equals_exact(self.sq, 0.001), [False, True])
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"The indices of the left and right GeoSeries' are not equal",
|
||||
UserWarning,
|
||||
)
|
||||
assert_array_equal(
|
||||
self.a1.geom_equals_exact(self.a2, 0.001, align=True),
|
||||
[False, True, False],
|
||||
)
|
||||
assert_array_equal(
|
||||
self.a1.geom_equals_exact(self.a2, 0.001, align=False), [False, False]
|
||||
)
|
||||
|
||||
def test_equal_comp_op(self):
|
||||
s = GeoSeries([Point(x, x) for x in range(3)])
|
||||
res = s == Point(1, 1)
|
||||
exp = pd.Series([False, True, False])
|
||||
assert_series_equal(res, exp)
|
||||
|
||||
def test_to_file(self):
|
||||
"""Test to_file and from_file"""
|
||||
tempfilename = os.path.join(self.tempdir, "test.shp")
|
||||
self.g3.to_file(tempfilename)
|
||||
# Read layer back in?
|
||||
s = GeoSeries.from_file(tempfilename)
|
||||
assert all(self.g3.geom_equals(s))
|
||||
# TODO: compare crs
|
||||
|
||||
def test_to_json(self):
|
||||
"""
|
||||
Test whether GeoSeries.to_json works and returns an actual json file.
|
||||
"""
|
||||
json_str = self.g3.to_json()
|
||||
data = json.loads(json_str)
|
||||
assert "id" in data["features"][0].keys()
|
||||
assert "bbox" in data["features"][0].keys()
|
||||
# TODO : verify the output is a valid GeoJSON.
|
||||
|
||||
def test_to_json_drop_id(self):
|
||||
"""
|
||||
Test whether GeoSeries.to_json works when drop_id is True.
|
||||
"""
|
||||
json_str = self.g3.to_json(drop_id=True)
|
||||
data = json.loads(json_str)
|
||||
assert "id" not in data["features"][0].keys()
|
||||
|
||||
def test_to_json_no_bbox(self):
|
||||
"""
|
||||
Test whether GeoSeries.to_json works when show_bbox is False.
|
||||
"""
|
||||
json_str = self.g3.to_json(show_bbox=False)
|
||||
data = json.loads(json_str)
|
||||
assert "bbox" not in data["features"][0].keys()
|
||||
|
||||
def test_to_json_no_bbox_drop_id(self):
|
||||
"""
|
||||
Test whether GeoSeries.to_json works when show_bbox is False
|
||||
and drop_id is True.
|
||||
"""
|
||||
json_str = self.g3.to_json(show_bbox=False, drop_id=True)
|
||||
data = json.loads(json_str)
|
||||
assert "id" not in data["features"][0].keys()
|
||||
assert "bbox" not in data["features"][0].keys()
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
|
||||
def test_to_json_wgs84(self):
|
||||
"""
|
||||
Test whether the wgs84 conversion works as intended.
|
||||
"""
|
||||
text = self.landmarks3857.to_json(to_wgs84=True)
|
||||
data = json.loads(text)
|
||||
assert data["type"] == "FeatureCollection"
|
||||
assert "id" in data["features"][0].keys()
|
||||
coord1 = data["features"][0]["geometry"]["coordinates"]
|
||||
coord2 = data["features"][1]["geometry"]["coordinates"]
|
||||
np.testing.assert_allclose(coord1, self.esb.coords[0])
|
||||
np.testing.assert_allclose(coord2, self.sol.coords[0])
|
||||
|
||||
def test_to_json_wgs84_false(self):
|
||||
"""
|
||||
Ensure no conversion to wgs84
|
||||
"""
|
||||
text = self.landmarks3857.to_json()
|
||||
data = json.loads(text)
|
||||
coord1 = data["features"][0]["geometry"]["coordinates"]
|
||||
coord2 = data["features"][1]["geometry"]["coordinates"]
|
||||
assert coord1 == [-8235939.130493107, 4975301.253789809]
|
||||
assert coord2 == [-8242607.167991625, 4966620.938285081]
|
||||
|
||||
def test_representative_point(self):
|
||||
assert np.all(self.g1.contains(self.g1.representative_point()))
|
||||
assert np.all(self.g2.contains(self.g2.representative_point()))
|
||||
assert np.all(self.g3.contains(self.g3.representative_point()))
|
||||
assert np.all(self.g4.contains(self.g4.representative_point()))
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_transform(self):
|
||||
utm18n = self.landmarks.to_crs(epsg=26918)
|
||||
lonlat = utm18n.to_crs(epsg=4326)
|
||||
assert geom_almost_equals(self.landmarks, lonlat)
|
||||
with pytest.raises(ValueError):
|
||||
self.g1.to_crs(epsg=4326)
|
||||
with pytest.raises(ValueError):
|
||||
self.landmarks.to_crs(crs=None, epsg=None)
|
||||
|
||||
def test_estimate_utm_crs__geographic(self):
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
assert self.landmarks.estimate_utm_crs() == pyproj.CRS("EPSG:32618")
|
||||
assert self.landmarks.estimate_utm_crs("NAD83") == pyproj.CRS("EPSG:26918")
|
||||
|
||||
def test_estimate_utm_crs__projected(self):
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == pyproj.CRS(
|
||||
"EPSG:32618"
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_estimate_utm_crs__out_of_bounds(self):
|
||||
with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
|
||||
GeoSeries(
|
||||
[Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
|
||||
).estimate_utm_crs()
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_estimate_utm_crs__missing_crs(self):
|
||||
with pytest.raises(RuntimeError, match="crs must be set"):
|
||||
GeoSeries([Polygon([(0, 90), (1, 90), (2, 90)])]).estimate_utm_crs()
|
||||
|
||||
def test_fillna(self):
|
||||
# default is to fill with empty geometry
|
||||
na = self.na_none.fillna()
|
||||
assert isinstance(na[2], BaseGeometry)
|
||||
assert na[2].is_empty
|
||||
assert geom_equals(self.na_none[:2], na[:2])
|
||||
# XXX: method works inconsistently for different pandas versions
|
||||
# self.na_none.fillna(method='backfill')
|
||||
|
||||
def test_coord_slice(self):
|
||||
"""Test CoordinateSlicer"""
|
||||
# need some better test cases
|
||||
assert geom_equals(self.g3, self.g3.cx[:, :])
|
||||
assert geom_equals(self.g3[[True, False]], self.g3.cx[0.9:, :0.1])
|
||||
assert geom_equals(self.g3[[False, True]], self.g3.cx[0:0.1, 0.9:1.0])
|
||||
|
||||
def test_coord_slice_with_zero(self):
|
||||
# Test that CoordinateSlice correctly handles zero slice (#GH477).
|
||||
|
||||
gs = GeoSeries([Point(x, x) for x in range(-3, 4)])
|
||||
assert geom_equals(gs.cx[:0, :0], gs.loc[:3])
|
||||
assert geom_equals(gs.cx[:, :0], gs.loc[:3])
|
||||
assert geom_equals(gs.cx[:0, :], gs.loc[:3])
|
||||
assert geom_equals(gs.cx[0:, 0:], gs.loc[3:])
|
||||
assert geom_equals(gs.cx[0:, :], gs.loc[3:])
|
||||
assert geom_equals(gs.cx[:, 0:], gs.loc[3:])
|
||||
|
||||
def test_geoseries_geointerface(self):
|
||||
assert self.g1.__geo_interface__["type"] == "FeatureCollection"
|
||||
assert len(self.g1.__geo_interface__["features"]) == self.g1.shape[0]
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_proj4strings(self):
|
||||
# As string
|
||||
reprojected = self.g3.to_crs("+proj=utm +zone=30")
|
||||
reprojected_back = reprojected.to_crs(epsg=4326)
|
||||
assert geom_almost_equals(self.g3, reprojected_back)
|
||||
|
||||
# As dict
|
||||
reprojected = self.g3.to_crs({"proj": "utm", "zone": "30"})
|
||||
reprojected_back = reprojected.to_crs(epsg=4326)
|
||||
assert geom_almost_equals(self.g3, reprojected_back)
|
||||
|
||||
# Set to equivalent string, convert, compare to original
|
||||
copy = self.g3.copy().set_crs("epsg:4326", allow_override=True)
|
||||
reprojected = copy.to_crs({"proj": "utm", "zone": "30"})
|
||||
reprojected_back = reprojected.to_crs(epsg=4326)
|
||||
assert geom_almost_equals(self.g3, reprojected_back)
|
||||
|
||||
# Conversions by different format
|
||||
reprojected_string = self.g3.to_crs("+proj=utm +zone=30")
|
||||
reprojected_dict = self.g3.to_crs({"proj": "utm", "zone": "30"})
|
||||
assert geom_almost_equals(reprojected_string, reprojected_dict)
|
||||
|
||||
def test_from_wkb(self):
|
||||
assert_geoseries_equal(self.g1, GeoSeries.from_wkb([self.t1.wkb, self.sq.wkb]))
|
||||
|
||||
def test_from_wkb_on_invalid(self):
|
||||
# Single point LineString hex WKB: invalid
|
||||
invalid_wkb_hex = "01020000000100000000000000000008400000000000000840"
|
||||
message = "point array must contain 0 or >1 elements"
|
||||
|
||||
with pytest.raises(Exception, match=message):
|
||||
GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="raise")
|
||||
|
||||
with pytest.warns(Warning, match=message):
|
||||
res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="warn")
|
||||
assert res[0] is None
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="ignore")
|
||||
assert res[0] is None
|
||||
|
||||
def test_from_wkb_series(self):
|
||||
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[1, 2])
|
||||
expected = self.g1.copy()
|
||||
expected.index = pd.Index([1, 2])
|
||||
assert_geoseries_equal(expected, GeoSeries.from_wkb(s))
|
||||
|
||||
def test_from_wkb_series_with_index(self):
|
||||
index = [0]
|
||||
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[0, 2])
|
||||
expected = self.g1.reindex(index)
|
||||
assert_geoseries_equal(expected, GeoSeries.from_wkb(s, index=index))
|
||||
|
||||
def test_from_wkt(self):
|
||||
assert_geoseries_equal(self.g1, GeoSeries.from_wkt([self.t1.wkt, self.sq.wkt]))
|
||||
|
||||
def test_from_wkt_on_invalid(self):
|
||||
# Single point LineString WKT: invalid
|
||||
invalid_wkt = "LINESTRING(0 0)"
|
||||
message = "point array must contain 0 or >1 elements"
|
||||
|
||||
with pytest.raises(Exception, match=message):
|
||||
GeoSeries.from_wkt([invalid_wkt], on_invalid="raise")
|
||||
|
||||
with pytest.warns(Warning, match=message):
|
||||
res = GeoSeries.from_wkt([invalid_wkt], on_invalid="warn")
|
||||
assert res[0] is None
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
res = GeoSeries.from_wkt([invalid_wkt], on_invalid="ignore")
|
||||
assert res[0] is None
|
||||
|
||||
def test_from_wkt_series(self):
|
||||
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[1, 2])
|
||||
expected = self.g1.copy()
|
||||
expected.index = pd.Index([1, 2])
|
||||
assert_geoseries_equal(expected, GeoSeries.from_wkt(s))
|
||||
|
||||
def test_from_wkt_series_with_index(self):
|
||||
index = [0]
|
||||
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[0, 2])
|
||||
expected = self.g1.reindex(index)
|
||||
assert_geoseries_equal(expected, GeoSeries.from_wkt(s, index=index))
|
||||
|
||||
def test_to_wkb(self):
|
||||
assert_series_equal(pd.Series([self.t1.wkb, self.sq.wkb]), self.g1.to_wkb())
|
||||
assert_series_equal(
|
||||
pd.Series([self.t1.wkb_hex, self.sq.wkb_hex]), self.g1.to_wkb(hex=True)
|
||||
)
|
||||
|
||||
def test_to_wkt(self):
|
||||
assert_series_equal(pd.Series([self.t1.wkt, self.sq.wkt]), self.g1.to_wkt())
|
||||
|
||||
def test_clip(self, naturalearth_lowres, naturalearth_cities):
|
||||
left = read_file(naturalearth_cities)
|
||||
world = read_file(naturalearth_lowres)
|
||||
south_america = world[world["continent"] == "South America"]
|
||||
|
||||
expected = clip(left.geometry, south_america)
|
||||
result = left.geometry.clip(south_america)
|
||||
assert_geoseries_equal(result, expected)
|
||||
|
||||
def test_clip_sorting(self, naturalearth_cities, naturalearth_lowres):
|
||||
"""
|
||||
Test sorting of geodseries when clipping.
|
||||
"""
|
||||
cities = read_file(naturalearth_cities)
|
||||
world = read_file(naturalearth_lowres)
|
||||
south_america = world[world["continent"] == "South America"]
|
||||
|
||||
unsorted_clipped_cities = clip(cities, south_america, sort=False)
|
||||
sorted_clipped_cities = clip(cities, south_america, sort=True)
|
||||
|
||||
expected_sorted_index = pd.Index(
|
||||
[55, 59, 62, 88, 101, 114, 122, 169, 181, 189, 210, 230, 236, 238, 239]
|
||||
)
|
||||
|
||||
assert not (
|
||||
sorted(unsorted_clipped_cities.index) == unsorted_clipped_cities.index
|
||||
).all()
|
||||
assert (
|
||||
sorted(sorted_clipped_cities.index) == sorted_clipped_cities.index
|
||||
).all()
|
||||
assert_index_equal(expected_sorted_index, sorted_clipped_cities.index)
|
||||
|
||||
def test_from_xy_points(self):
|
||||
x = self.landmarks.x.values
|
||||
y = self.landmarks.y.values
|
||||
index = self.landmarks.index.tolist()
|
||||
crs = self.landmarks.crs
|
||||
assert_geoseries_equal(
|
||||
self.landmarks, GeoSeries.from_xy(x, y, index=index, crs=crs)
|
||||
)
|
||||
assert_geoseries_equal(
|
||||
self.landmarks,
|
||||
GeoSeries.from_xy(self.landmarks.x, self.landmarks.y, crs=crs),
|
||||
)
|
||||
|
||||
def test_from_xy_points_w_z(self):
|
||||
index_values = [5, 6, 7]
|
||||
x = pd.Series([0, -1, 2], index=index_values)
|
||||
y = pd.Series([8, 3, 1], index=index_values)
|
||||
z = pd.Series([5, -6, 7], index=index_values)
|
||||
expected = GeoSeries(
|
||||
[Point(0, 8, 5), Point(-1, 3, -6), Point(2, 1, 7)], index=index_values
|
||||
)
|
||||
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
|
||||
|
||||
def test_from_xy_points_unequal_index(self):
|
||||
x = self.landmarks.x
|
||||
y = self.landmarks.y
|
||||
y.index = -np.arange(len(y))
|
||||
crs = self.landmarks.crs
|
||||
assert_geoseries_equal(
|
||||
self.landmarks, GeoSeries.from_xy(x, y, index=x.index, crs=crs)
|
||||
)
|
||||
unindexed_landmarks = self.landmarks.copy()
|
||||
unindexed_landmarks.reset_index(inplace=True, drop=True)
|
||||
assert_geoseries_equal(
|
||||
unindexed_landmarks,
|
||||
GeoSeries.from_xy(x, y, crs=crs),
|
||||
)
|
||||
|
||||
def test_from_xy_points_indexless(self):
|
||||
x = np.array([0.0, 3.0])
|
||||
y = np.array([2.0, 5.0])
|
||||
z = np.array([-1.0, 4.0])
|
||||
expected = GeoSeries([Point(0, 2, -1), Point(3, 5, 4)])
|
||||
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
|
||||
|
||||
@pytest.mark.skipif(compat.HAS_PYPROJ, reason="pyproj installed")
|
||||
def test_set_crs_pyproj_error(self):
|
||||
with pytest.raises(
|
||||
ImportError, match="The 'pyproj' package is required for set_crs"
|
||||
):
|
||||
self.g1.set_crs(3857)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::UserWarning")
|
||||
def test_missing_values():
|
||||
s = GeoSeries([Point(1, 1), None, np.nan, GeometryCollection(), Polygon()])
|
||||
|
||||
# construction -> missing values get normalized to None
|
||||
assert s[1] is None
|
||||
assert s[2] is None
|
||||
assert s[3].is_empty
|
||||
assert s[4].is_empty
|
||||
|
||||
# isna / is_empty
|
||||
assert s.isna().tolist() == [False, True, True, False, False]
|
||||
assert s.is_empty.tolist() == [False, False, False, True, True]
|
||||
assert s.notna().tolist() == [True, False, False, True, True]
|
||||
|
||||
# fillna defaults to fill with empty geometry -> no missing values anymore
|
||||
assert not s.fillna().isna().any()
|
||||
|
||||
# dropna drops the missing values
|
||||
assert not s.dropna().isna().any()
|
||||
assert len(s.dropna()) == 3
|
||||
|
||||
|
||||
def test_isna_empty_geoseries():
|
||||
# ensure that isna() result for empty GeoSeries has the correct bool dtype
|
||||
s = GeoSeries([])
|
||||
result = s.isna()
|
||||
assert_series_equal(result, pd.Series([], dtype="bool"))
|
||||
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_geoseries_crs():
|
||||
gs = GeoSeries().set_crs("IGNF:ETRS89UTM28")
|
||||
assert gs.crs.to_authority() == ("IGNF", "ETRS89UTM28")
|
||||
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
|
||||
def test_geoseries_override_existing_crs_warning():
|
||||
gs = GeoSeries(crs="epsg:4326")
|
||||
with pytest.warns(
|
||||
DeprecationWarning,
|
||||
match="Overriding the CRS of a GeoSeries that already has CRS",
|
||||
):
|
||||
gs.crs = "epsg:2100"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# # Constructor tests
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_geoseries(s):
|
||||
assert isinstance(s, GeoSeries)
|
||||
assert isinstance(s.geometry, GeoSeries)
|
||||
assert isinstance(s.dtype, GeometryDtype)
|
||||
assert isinstance(s.values, GeometryArray)
|
||||
|
||||
|
||||
class TestConstructor:
|
||||
def test_constructor(self):
|
||||
s = GeoSeries([Point(x, x) for x in range(3)])
|
||||
check_geoseries(s)
|
||||
|
||||
def test_single_geom_constructor(self):
|
||||
p = Point(1, 2)
|
||||
line = LineString([(2, 3), (4, 5), (5, 6)])
|
||||
poly = Polygon(
|
||||
[(0, 0), (1, 0), (1, 1), (0, 1)], [[(0.1, 0.1), (0.9, 0.1), (0.9, 0.9)]]
|
||||
)
|
||||
mp = MultiPoint([(1, 2), (3, 4), (5, 6)])
|
||||
mline = MultiLineString([[(1, 2), (3, 4), (5, 6)], [(7, 8), (9, 10)]])
|
||||
|
||||
poly2 = Polygon(
|
||||
[(0, 0), (0, -1), (-1, -1), (-1, 0)],
|
||||
[[(-0.1, -0.1), (-0.1, -0.5), (-0.5, -0.5), (-0.5, -0.1)]],
|
||||
)
|
||||
mpoly = MultiPolygon([poly, poly2])
|
||||
|
||||
geoms = [p, line, poly, mp, mline, mpoly]
|
||||
index = ["a", "b", "c", "d"]
|
||||
|
||||
for g in geoms:
|
||||
gs = GeoSeries(g)
|
||||
assert len(gs) == 1
|
||||
# accessing elements no longer give identical objects
|
||||
assert gs.iloc[0].equals(g)
|
||||
|
||||
gs = GeoSeries(g, index=index)
|
||||
assert len(gs) == len(index)
|
||||
for x in gs:
|
||||
assert x.equals(g)
|
||||
|
||||
def test_non_geometry_raises(self):
|
||||
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
|
||||
GeoSeries([True, False, True])
|
||||
|
||||
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
|
||||
GeoSeries(["a", "b", "c"])
|
||||
|
||||
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
|
||||
GeoSeries([[1, 2], [3, 4]])
|
||||
|
||||
def test_empty(self):
|
||||
s = GeoSeries([])
|
||||
check_geoseries(s)
|
||||
|
||||
s = GeoSeries()
|
||||
check_geoseries(s)
|
||||
|
||||
def test_data_is_none(self):
|
||||
s = GeoSeries(index=range(3))
|
||||
check_geoseries(s)
|
||||
|
||||
def test_empty_array(self):
|
||||
# with empty data that have an explicit dtype, we use the fallback or
|
||||
# not depending on the dtype
|
||||
|
||||
# dtypes that can never hold geometry-like data
|
||||
for arr in [
|
||||
np.array([], dtype="bool"),
|
||||
np.array([], dtype="int64"),
|
||||
np.array([], dtype="float32"),
|
||||
# this gets converted to object dtype by pandas
|
||||
# np.array([], dtype="str"),
|
||||
]:
|
||||
with pytest.raises(
|
||||
TypeError, match="Non geometry data passed to GeoSeries"
|
||||
):
|
||||
GeoSeries(arr)
|
||||
|
||||
# dtypes that can potentially hold geometry-like data (object) or
|
||||
# can come from empty data (float64)
|
||||
for arr in [
|
||||
np.array([], dtype="object"),
|
||||
np.array([], dtype="float64"),
|
||||
np.array([], dtype="str"),
|
||||
]:
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
s = GeoSeries(arr)
|
||||
assert not record
|
||||
assert isinstance(s, GeoSeries)
|
||||
|
||||
def test_from_series(self):
|
||||
shapes = [
|
||||
Polygon([(random.random(), random.random()) for _ in range(3)])
|
||||
for _ in range(10)
|
||||
]
|
||||
|
||||
s = pd.Series(shapes, index=list("abcdefghij"), name="foo")
|
||||
g = GeoSeries(s)
|
||||
check_geoseries(g)
|
||||
|
||||
assert [a.equals(b) for a, b in zip(s, g)]
|
||||
assert s.name == g.name
|
||||
assert s.index is g.index
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_from_series_no_set_crs_on_construction(self):
|
||||
# https://github.com/geopandas/geopandas/issues/2492
|
||||
# also when passing Series[geometry], ensure we don't change crs of
|
||||
# original data
|
||||
gs = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
|
||||
s = pd.Series(gs)
|
||||
result = GeoSeries(s, crs=4326)
|
||||
assert s.values.crs is None
|
||||
assert gs.crs is None
|
||||
assert result.crs == "EPSG:4326"
|
||||
|
||||
def test_copy(self):
|
||||
# default is to copy with CoW / pandas 3+
|
||||
arr = np.array([Point(x, x) for x in range(3)], dtype=object)
|
||||
result = GeoSeries(arr)
|
||||
# modifying result doesn't change original array
|
||||
result.loc[0] = Point(10, 10)
|
||||
if compat.PANDAS_GE_30 or getattr(pd.options.mode, "copy_on_write", False):
|
||||
assert arr[0] == Point(0, 0)
|
||||
else:
|
||||
assert arr[0] == Point(10, 10)
|
||||
|
||||
# avoid copy with copy=False
|
||||
arr = np.array([Point(x, x) for x in range(3)], dtype=object)
|
||||
result = GeoSeries(arr, copy=False)
|
||||
assert result.array._data.flags.writeable
|
||||
# now modifying result also updates original array
|
||||
result.loc[0] = Point(10, 10)
|
||||
assert arr[0] == Point(10, 10)
|
||||
|
||||
# GH 1216
|
||||
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
|
||||
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
|
||||
def test_reset_index(self, name, crs):
|
||||
s = GeoSeries(
|
||||
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])],
|
||||
name=name,
|
||||
crs=crs,
|
||||
)
|
||||
s = s.explode(index_parts=True)
|
||||
df = s.reset_index()
|
||||
assert type(df) == GeoDataFrame
|
||||
# name None -> 0, otherwise name preserved
|
||||
assert df.geometry.name == (name if name is not None else 0)
|
||||
assert df.crs == s.crs
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
|
||||
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
|
||||
def test_to_frame(self, name, crs):
|
||||
s = GeoSeries([Point(0, 0), Point(1, 1)], name=name, crs=crs)
|
||||
df = s.to_frame()
|
||||
assert type(df) == GeoDataFrame
|
||||
# name None -> 0, otherwise name preserved
|
||||
expected_name = name if name is not None else 0
|
||||
assert df.geometry.name == expected_name
|
||||
assert df._geometry_column_name == expected_name
|
||||
assert df.crs == s.crs
|
||||
|
||||
# if name is provided to to_frame, it should override
|
||||
df2 = s.to_frame(name="geom")
|
||||
assert type(df) == GeoDataFrame
|
||||
assert df2.geometry.name == "geom"
|
||||
assert df2.crs == s.crs
|
||||
|
||||
def test_explode_without_multiindex(self):
|
||||
s = GeoSeries(
|
||||
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
|
||||
)
|
||||
s = s.explode(index_parts=False)
|
||||
expected_index = pd.Index([0, 0, 1, 1, 1])
|
||||
assert_index_equal(s.index, expected_index)
|
||||
|
||||
def test_explode_ignore_index(self):
|
||||
s = GeoSeries(
|
||||
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
|
||||
)
|
||||
s = s.explode(ignore_index=True)
|
||||
expected_index = pd.Index(range(len(s)))
|
||||
assert_index_equal(s.index, expected_index)
|
||||
|
||||
# index_parts is ignored if ignore_index=True
|
||||
s = s.explode(index_parts=True, ignore_index=True)
|
||||
assert_index_equal(s.index, expected_index)
|
||||
@@ -0,0 +1,230 @@
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_21
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from pandas.testing import assert_index_equal
|
||||
|
||||
|
||||
class TestMerging:
|
||||
def setup_method(self):
|
||||
self.gseries = GeoSeries([Point(i, i) for i in range(3)])
|
||||
self.series = pd.Series([1, 2, 3])
|
||||
self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
|
||||
self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})
|
||||
|
||||
def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
|
||||
assert gdf._geometry_column_name == geometry_column_name
|
||||
assert gdf.crs == crs
|
||||
|
||||
def test_merge(self):
|
||||
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
|
||||
|
||||
# check result is a GeoDataFrame
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
|
||||
# check geometry property gives GeoSeries
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
|
||||
# check metadata
|
||||
self._check_metadata(res)
|
||||
|
||||
# test that crs and other geometry name are preserved
|
||||
self.gdf.crs = "epsg:4326"
|
||||
self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
|
||||
"points"
|
||||
)
|
||||
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
self._check_metadata(res, "points", self.gdf.crs)
|
||||
|
||||
def test_concat_axis0(self):
|
||||
# frame
|
||||
res = pd.concat([self.gdf, self.gdf])
|
||||
assert res.shape == (6, 2)
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
self._check_metadata(res)
|
||||
exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
|
||||
assert_geodataframe_equal(exp, res)
|
||||
|
||||
# series
|
||||
res = pd.concat([self.gdf.geometry, self.gdf.geometry])
|
||||
assert res.shape == (6,)
|
||||
assert isinstance(res, GeoSeries)
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_concat_axis0_crs(self):
|
||||
# CRS not set for both GeoDataFrame
|
||||
res = pd.concat([self.gdf, self.gdf])
|
||||
self._check_metadata(res)
|
||||
|
||||
# CRS set for both GeoDataFrame, same CRS
|
||||
res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
|
||||
self._check_metadata(res1, crs="epsg:4326")
|
||||
|
||||
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
|
||||
with pytest.warns(
|
||||
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
|
||||
):
|
||||
res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
|
||||
self._check_metadata(res2, crs="epsg:4326")
|
||||
|
||||
# CRS set for both GeoDataFrame, different CRS
|
||||
with pytest.raises(
|
||||
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
|
||||
):
|
||||
pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])
|
||||
|
||||
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
|
||||
# same CRS
|
||||
with pytest.warns(
|
||||
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
|
||||
):
|
||||
res3 = pd.concat(
|
||||
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
|
||||
)
|
||||
self._check_metadata(res3, crs="epsg:4326")
|
||||
|
||||
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
|
||||
# different CRS
|
||||
with pytest.raises(
|
||||
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
|
||||
):
|
||||
pd.concat(
|
||||
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_concat_axis0_unaligned_cols(self):
|
||||
# https://github.com/geopandas/geopandas/issues/2679
|
||||
gdf = self.gdf.set_crs("epsg:4326").assign(
|
||||
geom=self.gdf.geometry.set_crs("epsg:4327")
|
||||
)
|
||||
both_geom_cols = gdf[["geom", "geometry"]]
|
||||
single_geom_col = gdf[["geometry"]]
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
pd.concat([both_geom_cols, single_geom_col])
|
||||
# Check order of mismatch doesn't matter
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
pd.concat([single_geom_col, both_geom_cols])
|
||||
|
||||
# Side effect of this fix, explicitly provided all none geoseries
|
||||
# will not be warned for (ideally this would still warn)
|
||||
explicit_all_none_case = gdf[["geometry"]].assign(
|
||||
geom=GeoSeries([None for _ in range(len(gdf))])
|
||||
)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
pd.concat([both_geom_cols, explicit_all_none_case])
|
||||
|
||||
# Check concat with partially None col is not affected by the special casing
|
||||
# for all None no CRS handling
|
||||
with pytest.warns(
|
||||
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
|
||||
):
|
||||
partial_none_case = self.gdf[["geometry"]]
|
||||
partial_none_case.iloc[0] = None
|
||||
pd.concat([single_geom_col, partial_none_case])
|
||||
|
||||
def test_concat_axis0_crs_wkt_mismatch(self):
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
# https://github.com/geopandas/geopandas/issues/326#issuecomment-1727958475
|
||||
wkt_template = """GEOGCRS["WGS 84",
|
||||
ENSEMBLE["World Geodetic System 1984 ensemble",
|
||||
MEMBER["World Geodetic System 1984 (Transit)"],
|
||||
MEMBER["World Geodetic System 1984 (G730)"],
|
||||
MEMBER["World Geodetic System 1984 (G873)"],
|
||||
MEMBER["World Geodetic System 1984 (G1150)"],
|
||||
MEMBER["World Geodetic System 1984 (G1674)"],
|
||||
MEMBER["World Geodetic System 1984 (G1762)"],
|
||||
MEMBER["World Geodetic System 1984 (G2139)"],
|
||||
ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],
|
||||
ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,
|
||||
ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],
|
||||
AXIS["geodetic latitude (Lat)",north,ORDER[1],
|
||||
ANGLEUNIT["degree",0.0174532925199433]],
|
||||
AXIS["geodetic longitude (Lon)",east,ORDER[2],
|
||||
ANGLEUNIT["degree",0.0174532925199433]],
|
||||
USAGE[SCOPE["Horizontal component of 3D system."],
|
||||
AREA["World.{}"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"""
|
||||
wkt_v1 = wkt_template.format("")
|
||||
wkt_v2 = wkt_template.format(" ") # add additional whitespace
|
||||
crs1 = pyproj.CRS.from_wkt(wkt_v1)
|
||||
crs2 = pyproj.CRS.from_wkt(wkt_v2)
|
||||
# pyproj crs __hash__ based on WKT strings means these are distinct in a
|
||||
# set are but equal by equality
|
||||
assert len({crs1, crs2}) == 2
|
||||
assert crs1 == crs2
|
||||
expected = pd.concat([self.gdf, self.gdf]).set_crs(crs1)
|
||||
res = pd.concat([self.gdf.set_crs(crs1), self.gdf.set_crs(crs2)])
|
||||
assert_geodataframe_equal(expected, res)
|
||||
|
||||
def test_concat_axis1(self):
|
||||
res = pd.concat([self.gdf, self.df], axis=1)
|
||||
|
||||
assert res.shape == (3, 4)
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
self._check_metadata(res)
|
||||
|
||||
def test_concat_axis1_multiple_geodataframes(self):
|
||||
# https://github.com/geopandas/geopandas/issues/1230
|
||||
# Expect that concat should fail gracefully if duplicate column names belonging
|
||||
# to geometry columns are introduced.
|
||||
if PANDAS_GE_21:
|
||||
# _constructor_from_mgr changes mean we now get the concat specific error
|
||||
# message in this case too
|
||||
expected_err = (
|
||||
"Concat operation has resulted in multiple columns using the geometry "
|
||||
"column name 'geometry'."
|
||||
)
|
||||
else:
|
||||
expected_err = (
|
||||
"GeoDataFrame does not support multiple columns using the geometry"
|
||||
" column name 'geometry'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_err):
|
||||
pd.concat([self.gdf, self.gdf], axis=1)
|
||||
|
||||
# Check case is handled if custom geometry column name is used
|
||||
df2 = self.gdf.rename_geometry("geom")
|
||||
expected_err2 = (
|
||||
"Concat operation has resulted in multiple columns using the geometry "
|
||||
"column name 'geom'."
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_err2):
|
||||
pd.concat([df2, df2], axis=1)
|
||||
|
||||
if HAS_PYPROJ:
|
||||
# Check that two geometry columns is fine, if they have different names
|
||||
res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
|
||||
# check metadata comes from first df
|
||||
self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Accessing CRS")
|
||||
def test_concat_axis1_geoseries(self):
|
||||
gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
|
||||
result = pd.concat([gseries2, self.gseries], axis=1)
|
||||
# Note this is not consistent with concat([gdf, gdf], axis=1) where the
|
||||
# left metadata is set on the result. This is deliberate for now.
|
||||
assert type(result) is GeoDataFrame
|
||||
assert result._geometry_column_name is None
|
||||
assert_index_equal(pd.Index([0, 1]), result.columns)
|
||||
|
||||
gseries2.name = "foo"
|
||||
result2 = pd.concat([gseries2, self.gseries], axis=1)
|
||||
assert type(result2) is GeoDataFrame
|
||||
assert result._geometry_column_name is None
|
||||
assert_index_equal(pd.Index(["foo", 0]), result2.columns)
|
||||
@@ -0,0 +1,411 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
crs_osgb = pyproj.CRS(27700)
|
||||
crs_wgs = pyproj.CRS(4326)
|
||||
|
||||
|
||||
N = 10
|
||||
|
||||
|
||||
@pytest.fixture(params=["geometry", "point"])
|
||||
def df(request):
|
||||
geo_name = request.param
|
||||
|
||||
df = GeoDataFrame(
|
||||
[
|
||||
{
|
||||
"value1": x + y,
|
||||
"value2": x * y,
|
||||
geo_name: Point(x, y), # rename this col in tests
|
||||
}
|
||||
for x, y in zip(range(N), range(N))
|
||||
],
|
||||
crs=crs_wgs,
|
||||
geometry=geo_name,
|
||||
)
|
||||
# want geometry2 to be a GeoSeries not Series, test behaviour of non geom col
|
||||
df["geometry2"] = df[geo_name].set_crs(crs_osgb, allow_override=True)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df2():
|
||||
"""For constructor_sliced tests"""
|
||||
return GeoDataFrame(
|
||||
{
|
||||
"geometry": GeoSeries([Point(x, x) for x in range(3)]),
|
||||
"geometry2": GeoSeries([Point(x, x) for x in range(3)]),
|
||||
"geometry3": GeoSeries([Point(x, x) for x in range(3)]),
|
||||
"value": [1, 2, 1],
|
||||
"value_nan": np.nan,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _check_metadata_gdf(gdf, geo_name="geometry", crs=crs_wgs):
|
||||
assert gdf._geometry_column_name == geo_name
|
||||
assert gdf.geometry.name == geo_name
|
||||
assert gdf.crs == crs
|
||||
|
||||
|
||||
def _check_metadata_gs(gs, name="geometry", crs=crs_wgs):
|
||||
assert gs.name == name
|
||||
assert gs.crs == crs
|
||||
|
||||
|
||||
def assert_object(result, expected_type, geo_name="geometry", crs=crs_wgs):
|
||||
"""
|
||||
Helper method to make tests easier to read. Checks result is of the expected
|
||||
type. If result is a GeoDataFrame or GeoSeries, checks geo_name
|
||||
and crs match. If geo_name is None, then we expect a GeoDataFrame
|
||||
where the geometry column is invalid/ isn't set. This is never desirable,
|
||||
but is a reality of this first stage of implementation.
|
||||
"""
|
||||
assert type(result) is expected_type
|
||||
|
||||
if expected_type == GeoDataFrame:
|
||||
assert geo_name is not None
|
||||
_check_metadata_gdf(result, geo_name=geo_name, crs=crs)
|
||||
|
||||
elif expected_type == GeoSeries:
|
||||
_check_metadata_gs(result, name=geo_name, crs=crs)
|
||||
|
||||
|
||||
def assert_obj_no_active_geo_col(result, expected_type, geo_colname=None):
|
||||
"""
|
||||
Helper method to make tests easier to read. Checks result is of the expected
|
||||
type. Asserts that accessing result.geometry.name raises, corresponding to
|
||||
_geometry_column_name being in an invalid state
|
||||
(either None, or a column no longer present)
|
||||
This amounts to testing the assertion raised (geometry column is unset, vs
|
||||
old geometry column is missing)
|
||||
|
||||
We assert that _geometry_column_name = int_geo_colname
|
||||
|
||||
"""
|
||||
if expected_type == GeoDataFrame:
|
||||
if geo_colname is None:
|
||||
assert result._geometry_column_name is None
|
||||
else:
|
||||
assert geo_colname == result._geometry_column_name
|
||||
|
||||
if result._geometry_column_name is None:
|
||||
msg = (
|
||||
"You are calling a geospatial method on the GeoDataFrame, "
|
||||
"but the active"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"You are calling a geospatial method on the GeoDataFrame, but "
|
||||
r"the active geometry column \("
|
||||
rf"'{result._geometry_column_name}'\) is not present"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
result.geometry.name # be explicit that geometry is invalid here
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def test_getitem(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df[["value1", "value2"]], pd.DataFrame)
|
||||
assert_object(df[[geo_name, "geometry2"]], GeoDataFrame, geo_name)
|
||||
assert_object(df[[geo_name]], GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(df[["geometry2", "value1"]], GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(df[["geometry2"]], GeoDataFrame, geo_name)
|
||||
assert_object(df[["value1"]], pd.DataFrame)
|
||||
# Series
|
||||
assert_object(df[geo_name], GeoSeries, geo_name)
|
||||
assert_object(df["geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
|
||||
assert_object(df["value1"], pd.Series)
|
||||
|
||||
|
||||
def test_loc(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df.loc[:, ["value1", "value2"]], pd.DataFrame)
|
||||
assert_object(df.loc[:, [geo_name, "geometry2"]], GeoDataFrame, geo_name)
|
||||
assert_object(df.loc[:, [geo_name]], GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(
|
||||
df.loc[:, ["geometry2", "value1"]], GeoDataFrame, geo_name
|
||||
)
|
||||
assert_obj_no_active_geo_col(df.loc[:, ["geometry2"]], GeoDataFrame, geo_name)
|
||||
assert_object(df.loc[:, ["value1"]], pd.DataFrame)
|
||||
# Series
|
||||
assert_object(df.loc[:, geo_name], GeoSeries, geo_name)
|
||||
assert_object(df.loc[:, "geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
|
||||
assert_object(df.loc[:, "value1"], pd.Series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"geom_name",
|
||||
[
|
||||
"geometry",
|
||||
pytest.param(
|
||||
"geom",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="pre-regression behaviour only works for geometry col geometry"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_add_row(geom_name, nybb_filename):
|
||||
# https://github.com/geopandas/geopandas/issues/3119
|
||||
|
||||
nybb = geopandas.read_file(nybb_filename)[["BoroCode", "geometry"]]
|
||||
if geom_name != "geometry":
|
||||
nybb = nybb.rename_geometry(geom_name)
|
||||
# crs_orig = nybb.crs
|
||||
|
||||
# add a new row
|
||||
nybb.loc[5] = [6, nybb.geometry.iloc[0]]
|
||||
assert nybb.geometry.dtype == "geometry"
|
||||
assert nybb.crs is None # TODO this should be crs_orig, regressed in #2373
|
||||
|
||||
|
||||
def test_iloc(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df.iloc[:, 0:2], pd.DataFrame)
|
||||
assert_object(df.iloc[:, 2:4], GeoDataFrame, geo_name)
|
||||
assert_object(df.iloc[:, [2]], GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(df.iloc[:, [3, 0]], GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(df.iloc[:, [3]], GeoDataFrame, geo_name)
|
||||
assert_object(df.iloc[:, [0]], pd.DataFrame)
|
||||
# Series
|
||||
assert_object(df.iloc[:, 2], GeoSeries, geo_name)
|
||||
assert_object(df.iloc[:, 3], GeoSeries, "geometry2", crs=crs_osgb)
|
||||
assert_object(df.iloc[:, 0], pd.Series)
|
||||
|
||||
|
||||
def test_squeeze(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df[[geo_name]].squeeze(), GeoSeries, geo_name)
|
||||
assert_object(df[["geometry2"]].squeeze(), GeoSeries, "geometry2", crs=crs_osgb)
|
||||
|
||||
|
||||
def test_to_frame(df):
|
||||
geo_name = df.geometry.name
|
||||
res1 = df[geo_name].to_frame()
|
||||
assert_object(res1, GeoDataFrame, geo_name, crs=df[geo_name].crs)
|
||||
|
||||
res2 = df["geometry2"].to_frame()
|
||||
assert_object(res2, GeoDataFrame, "geometry2", crs=crs_osgb)
|
||||
|
||||
res3 = df["value1"].to_frame()
|
||||
assert_object(res3, pd.DataFrame)
|
||||
|
||||
|
||||
def test_reindex(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df.reindex(columns=["value1", "value2"]), pd.DataFrame)
|
||||
assert_object(df.reindex(columns=[geo_name, "geometry2"]), GeoDataFrame, geo_name)
|
||||
assert_object(df.reindex(columns=[geo_name]), GeoDataFrame, geo_name)
|
||||
assert_object(df.reindex(columns=["new_col", geo_name]), GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(
|
||||
df.reindex(columns=["geometry2", "value1"]), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_obj_no_active_geo_col(
|
||||
df.reindex(columns=["geometry2"]), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_object(df.reindex(columns=["value1"]), pd.DataFrame)
|
||||
|
||||
# reindexing the rows always preserves the GeoDataFrame
|
||||
assert_object(df.reindex(index=[0, 1, 20]), GeoDataFrame, geo_name)
|
||||
|
||||
# reindexing both rows and columns
|
||||
assert_object(
|
||||
df.reindex(index=[0, 1, 20], columns=[geo_name]), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_object(df.reindex(index=[0, 1, 20], columns=["value1"]), pd.DataFrame)
|
||||
|
||||
|
||||
def test_drop(df):
|
||||
geo_name = df.geometry.name
|
||||
assert_object(df.drop(columns=[geo_name, "geometry2"]), pd.DataFrame)
|
||||
assert_object(df.drop(columns=["value1", "value2"]), GeoDataFrame, geo_name)
|
||||
cols = ["value1", "value2", "geometry2"]
|
||||
assert_object(df.drop(columns=cols), GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(
|
||||
df.drop(columns=[geo_name, "value2"]), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_obj_no_active_geo_col(
|
||||
df.drop(columns=["value1", "value2", geo_name]), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_object(df.drop(columns=["geometry2", "value2", geo_name]), pd.DataFrame)
|
||||
|
||||
|
||||
def test_apply(df):
|
||||
geo_name = df.geometry.name
|
||||
|
||||
def identity(x):
|
||||
return x
|
||||
|
||||
# axis = 0
|
||||
assert_object(df[["value1", "value2"]].apply(identity), pd.DataFrame)
|
||||
assert_object(df[[geo_name, "geometry2"]].apply(identity), GeoDataFrame, geo_name)
|
||||
assert_object(df[[geo_name]].apply(identity), GeoDataFrame, geo_name)
|
||||
|
||||
res = df[["geometry2", "value1"]].apply(identity)
|
||||
assert_obj_no_active_geo_col(res, GeoDataFrame, geo_name)
|
||||
assert_obj_no_active_geo_col(
|
||||
df[["geometry2"]].apply(identity), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_object(df[["value1"]].apply(identity), pd.DataFrame)
|
||||
|
||||
# axis = 0, Series
|
||||
assert_object(df[geo_name].apply(identity), GeoSeries, geo_name)
|
||||
assert_object(df["geometry2"].apply(identity), GeoSeries, "geometry2", crs=crs_osgb)
|
||||
assert_object(df["value1"].apply(identity), pd.Series)
|
||||
|
||||
# axis = 0, Series, no longer geometry
|
||||
assert_object(df[geo_name].apply(lambda x: str(x)), pd.Series)
|
||||
assert_object(df["geometry2"].apply(lambda x: str(x)), pd.Series)
|
||||
|
||||
# axis = 1
|
||||
assert_object(df[["value1", "value2"]].apply(identity, axis=1), pd.DataFrame)
|
||||
assert_object(
|
||||
df[[geo_name, "geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
|
||||
)
|
||||
assert_object(df[[geo_name]].apply(identity, axis=1), GeoDataFrame, geo_name)
|
||||
# TODO below should be a GeoDataFrame to be consistent with new getitem logic
|
||||
# leave as follow up as quite complicated
|
||||
# FrameColumnApply.series_generator returns object dtypes Series, so will have
|
||||
# patch result of apply
|
||||
assert_object(df[["geometry2", "value1"]].apply(identity, axis=1), pd.DataFrame)
|
||||
|
||||
assert_object(df[["value1"]].apply(identity, axis=1), pd.DataFrame)
|
||||
|
||||
|
||||
def test_apply_axis1_secondary_geo_cols(df):
|
||||
geo_name = df.geometry.name
|
||||
|
||||
def identity(x):
|
||||
return x
|
||||
|
||||
assert_obj_no_active_geo_col(
|
||||
df[["geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
|
||||
)
|
||||
|
||||
|
||||
def test_expanddim_in_apply():
|
||||
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
|
||||
s = GeoSeries.from_xy([0, 1], [0, 1])
|
||||
result = s.apply(lambda x: pd.Series([x.x, x.y]))
|
||||
assert_object(result, pd.DataFrame)
|
||||
|
||||
|
||||
def test_expandim_in_groupby_aggregate_multiple_funcs():
|
||||
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
|
||||
# There are two calls to _constructor_expanddim here
|
||||
# SeriesGroupBy._aggregate_multiple_funcs() and
|
||||
# SeriesGroupBy._wrap_series_output() len(output) > 1
|
||||
|
||||
s = GeoSeries.from_xy([0, 1, 2], [0, 1, 3])
|
||||
|
||||
def union(s):
|
||||
return s.union_all()
|
||||
|
||||
def total_area(s):
|
||||
return s.area.sum()
|
||||
|
||||
grouped = s.groupby([0, 1, 0])
|
||||
agg = grouped.agg([total_area, union])
|
||||
assert_obj_no_active_geo_col(agg, GeoDataFrame, geo_colname=None)
|
||||
result = grouped.agg([union, total_area])
|
||||
assert_obj_no_active_geo_col(result, GeoDataFrame, geo_colname=None)
|
||||
assert_object(grouped.agg([total_area, total_area]), pd.DataFrame)
|
||||
assert_object(grouped.agg([total_area]), pd.DataFrame)
|
||||
|
||||
|
||||
def test_expanddim_in_unstack():
|
||||
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
|
||||
s = GeoSeries.from_xy(
|
||||
[0, 1, 2],
|
||||
[0, 1, 3],
|
||||
index=pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")]),
|
||||
)
|
||||
unstack = s.unstack()
|
||||
expected_geo_name = None
|
||||
assert_obj_no_active_geo_col(unstack, GeoDataFrame, geo_colname=expected_geo_name)
|
||||
|
||||
# https://github.com/geopandas/geopandas/issues/2486
|
||||
s.name = "geometry"
|
||||
unstack = s.unstack()
|
||||
assert_obj_no_active_geo_col(unstack, GeoDataFrame, expected_geo_name)
|
||||
|
||||
|
||||
# indexing / constructor_sliced tests
|
||||
|
||||
test_case_column_sets = [
|
||||
["geometry"],
|
||||
["geometry2"],
|
||||
["geometry", "geometry2"],
|
||||
# non active geo col case
|
||||
["geometry", "value"],
|
||||
["geometry", "value_nan"],
|
||||
["geometry2", "value"],
|
||||
["geometry2", "value_nan"],
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"column_set",
|
||||
test_case_column_sets,
|
||||
ids=[", ".join(i) for i in test_case_column_sets],
|
||||
)
|
||||
def test_constructor_sliced_row_slices(df2, column_set):
|
||||
# https://github.com/geopandas/geopandas/issues/2282
|
||||
df_subset = df2[column_set]
|
||||
assert isinstance(df_subset, GeoDataFrame)
|
||||
res = df_subset.loc[0]
|
||||
# row slices shouldn't be GeoSeries, even if they have a geometry col
|
||||
assert type(res) == pd.Series
|
||||
if "geometry" in column_set:
|
||||
assert not isinstance(res.geometry, pd.Series)
|
||||
assert res.geometry == Point(0, 0)
|
||||
|
||||
|
||||
def test_constructor_sliced_column_slices(df2):
|
||||
# Note loc doesn't use _constructor_sliced so it's not tested here
|
||||
geo_idx = df2.columns.get_loc("geometry")
|
||||
sub = df2.head(1)
|
||||
# column slices should be GeoSeries if of geometry type
|
||||
assert type(sub.iloc[:, geo_idx]) == GeoSeries
|
||||
assert type(sub.iloc[[0], geo_idx]) == GeoSeries
|
||||
sub = df2.head(2)
|
||||
assert type(sub.iloc[:, geo_idx]) == GeoSeries
|
||||
assert type(sub.iloc[[0, 1], geo_idx]) == GeoSeries
|
||||
|
||||
# check iloc row slices are pd.Series instead
|
||||
assert type(df2.iloc[0, :]) == pd.Series
|
||||
|
||||
|
||||
def test_constructor_sliced_in_pandas_methods(df2):
|
||||
# constructor sliced is used in many places, checking a sample of non
|
||||
# geometry cases are sensible
|
||||
assert type(df2.count()) == pd.Series
|
||||
# drop the secondary geometry columns as not hashable
|
||||
hashable_test_df = df2.drop(columns=["geometry2", "geometry3"])
|
||||
assert type(hashable_test_df.duplicated()) == pd.Series
|
||||
assert type(df2.quantile(numeric_only=True)) == pd.Series
|
||||
assert type(df2.memory_usage()) == pd.Series
|
||||
|
||||
|
||||
def test_merge_preserve_geodataframe():
|
||||
# https://github.com/geopandas/geopandas/issues/2932
|
||||
ser = GeoSeries.from_xy([1], [1])
|
||||
df = GeoDataFrame({"geo": ser})
|
||||
res = df.merge(df, left_index=True, right_index=True)
|
||||
assert_obj_no_active_geo_col(res, GeoDataFrame, geo_colname=None)
|
||||
expected = GeoDataFrame({"geo_x": ser, "geo_y": ser})
|
||||
assert_geodataframe_equal(expected, res)
|
||||
@@ -0,0 +1,891 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely import make_valid
|
||||
from shapely.geometry import GeometryCollection, LineString, Point, Polygon, box
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries, overlay, read_file
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
|
||||
try:
|
||||
from fiona.errors import DriverError
|
||||
except ImportError:
|
||||
|
||||
class DriverError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dfs(request):
|
||||
s1 = GeoSeries(
|
||||
[
|
||||
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
s2 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
|
||||
df2 = GeoDataFrame({"col2": [1, 2], "geometry": s2})
|
||||
return df1, df2
|
||||
|
||||
|
||||
@pytest.fixture(params=["default-index", "int-index", "string-index"])
|
||||
def dfs_index(request, dfs):
|
||||
df1, df2 = dfs
|
||||
if request.param == "int-index":
|
||||
df1.index = [1, 2]
|
||||
df2.index = [0, 2]
|
||||
if request.param == "string-index":
|
||||
df1.index = ["row1", "row2"]
|
||||
return df1, df2
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=["union", "intersection", "difference", "symmetric_difference", "identity"]
|
||||
)
|
||||
def how(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def keep_geom_type(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_overlay(dfs_index, how):
|
||||
"""
|
||||
Basic overlay test with small dummy example dataframes (from docs).
|
||||
Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools ->
|
||||
Intersection / Union / ...), saved to GeoJSON
|
||||
"""
|
||||
df1, df2 = dfs_index
|
||||
result = overlay(df1, df2, how=how)
|
||||
|
||||
# construction of result
|
||||
|
||||
def _read(name):
|
||||
expected = read_file(
|
||||
os.path.join(DATA, "polys", "df1_df2-{0}.geojson".format(name))
|
||||
)
|
||||
expected.geometry.array.crs = None
|
||||
for col in expected.columns[expected.dtypes == "int32"]:
|
||||
expected[col] = expected[col].astype("int64")
|
||||
return expected
|
||||
|
||||
if how == "identity":
|
||||
expected_intersection = _read("intersection")
|
||||
expected_difference = _read("difference")
|
||||
expected = pd.concat(
|
||||
[expected_intersection, expected_difference], ignore_index=True, sort=False
|
||||
)
|
||||
expected["col1"] = expected["col1"].astype(float)
|
||||
else:
|
||||
expected = _read(how)
|
||||
|
||||
# TODO needed adaptations to result
|
||||
if how == "union":
|
||||
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
|
||||
elif how == "difference":
|
||||
result = result.reset_index(drop=True)
|
||||
|
||||
assert_geodataframe_equal(result, expected, check_column_type=False)
|
||||
|
||||
# for difference also reversed
|
||||
if how == "difference":
|
||||
result = overlay(df2, df1, how=how)
|
||||
result = result.reset_index(drop=True)
|
||||
expected = _read("difference-inverse")
|
||||
assert_geodataframe_equal(result, expected, check_column_type=False)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:GeoSeries crs mismatch:UserWarning")
|
||||
def test_overlay_nybb(how, nybb_filename):
|
||||
polydf = read_file(nybb_filename)
|
||||
|
||||
# The circles have been constructed and saved at the time the expected
|
||||
# results were created (exact output of buffer algorithm can slightly
|
||||
# change over time -> use saved ones)
|
||||
# # construct circles dataframe
|
||||
# N = 10
|
||||
# b = [int(x) for x in polydf.total_bounds]
|
||||
# polydf2 = GeoDataFrame(
|
||||
# [
|
||||
# {"geometry": Point(x, y).buffer(10000), "value1": x + y, "value2": x - y}
|
||||
# for x, y in zip(
|
||||
# range(b[0], b[2], int((b[2] - b[0]) / N)),
|
||||
# range(b[1], b[3], int((b[3] - b[1]) / N)),
|
||||
# )
|
||||
# ],
|
||||
# crs=polydf.crs,
|
||||
# )
|
||||
polydf2 = read_file(os.path.join(DATA, "nybb_qgis", "polydf2.shp"))
|
||||
|
||||
result = overlay(polydf, polydf2, how=how)
|
||||
|
||||
cols = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area", "value1", "value2"]
|
||||
if how == "difference":
|
||||
cols = cols[:-2]
|
||||
|
||||
# expected result
|
||||
|
||||
if how == "identity":
|
||||
# read union one, further down below we take the appropriate subset
|
||||
expected = read_file(os.path.join(DATA, "nybb_qgis", "qgis-union.shp"))
|
||||
else:
|
||||
expected = read_file(
|
||||
os.path.join(DATA, "nybb_qgis", "qgis-{0}.shp".format(how))
|
||||
)
|
||||
|
||||
# The result of QGIS for 'union' contains incorrect geometries:
|
||||
# 24 is a full original circle overlapping with unioned geometries, and
|
||||
# 27 is a completely duplicated row)
|
||||
if how == "union":
|
||||
expected = expected.drop([24, 27])
|
||||
expected.reset_index(inplace=True, drop=True)
|
||||
# Eliminate observations without geometries (issue from QGIS)
|
||||
expected = expected[expected.is_valid]
|
||||
expected.reset_index(inplace=True, drop=True)
|
||||
|
||||
if how == "identity":
|
||||
expected = expected[expected.BoroCode.notnull()].copy()
|
||||
|
||||
# Order GeoDataFrames
|
||||
expected = expected.sort_values(cols).reset_index(drop=True)
|
||||
|
||||
# TODO needed adaptations to result
|
||||
result = result.sort_values(cols).reset_index(drop=True)
|
||||
|
||||
if how in ("union", "identity"):
|
||||
# concat < 0.23 sorts, so changes the order of the columns
|
||||
# but at least we ensure 'geometry' is the last column
|
||||
assert result.columns[-1] == "geometry"
|
||||
assert len(result.columns) == len(expected.columns)
|
||||
result = result.reindex(columns=expected.columns)
|
||||
|
||||
# the ordering of the spatial index results causes slight deviations
|
||||
# in the resultant geometries for multipolygons
|
||||
# for more details on the discussion, see:
|
||||
# https://github.com/geopandas/geopandas/pull/1338
|
||||
# https://github.com/geopandas/geopandas/issues/1337
|
||||
|
||||
# Temporary workaround below:
|
||||
|
||||
# simplify multipolygon geometry comparison
|
||||
# since the order of the constituent polygons depends on
|
||||
# the ordering of spatial indexing results, we cannot
|
||||
# compare symmetric_difference results directly when the
|
||||
# resultant geometry is a multipolygon
|
||||
|
||||
# first, check that all bounds and areas are approx equal
|
||||
# this is a very rough check for multipolygon equality
|
||||
kwargs = {}
|
||||
pd.testing.assert_series_equal(
|
||||
result.geometry.area, expected.geometry.area, **kwargs
|
||||
)
|
||||
pd.testing.assert_frame_equal(
|
||||
result.geometry.bounds, expected.geometry.bounds, **kwargs
|
||||
)
|
||||
|
||||
# There are two cases where the multipolygon have a different number
|
||||
# of sub-geometries -> not solved by normalize (and thus drop for now)
|
||||
if how == "symmetric_difference":
|
||||
expected.loc[9, "geometry"] = None
|
||||
result.loc[9, "geometry"] = None
|
||||
|
||||
if how == "union":
|
||||
expected.loc[24, "geometry"] = None
|
||||
result.loc[24, "geometry"] = None
|
||||
|
||||
# missing values get read as None in read_file for a string column, but
|
||||
# are introduced as NaN by overlay
|
||||
expected["BoroName"] = expected["BoroName"].fillna(np.nan)
|
||||
|
||||
assert_geodataframe_equal(
|
||||
result,
|
||||
expected,
|
||||
normalize=True,
|
||||
check_crs=False,
|
||||
check_column_type=False,
|
||||
check_less_precise=True,
|
||||
)
|
||||
|
||||
|
||||
def test_overlay_overlap(how):
|
||||
"""
|
||||
Overlay test with overlapping geometries in both dataframes.
|
||||
Test files are created with::
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoSeries, GeoDataFrame
|
||||
from shapely.geometry import Point, Polygon, LineString
|
||||
|
||||
s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2)
|
||||
s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2)
|
||||
|
||||
df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]})
|
||||
df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]})
|
||||
|
||||
ax = df1.plot(alpha=0.5)
|
||||
df2.plot(alpha=0.5, ax=ax, color='C1')
|
||||
|
||||
df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson',
|
||||
driver='GeoJSON')
|
||||
df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson',
|
||||
driver='GeoJSON')
|
||||
|
||||
and then overlay results are obtained from using QGIS 2.16
|
||||
(Vector -> Geoprocessing Tools -> Intersection / Union / ...),
|
||||
saved to GeoJSON.
|
||||
"""
|
||||
df1 = read_file(os.path.join(DATA, "overlap", "df1_overlap.geojson"))
|
||||
df2 = read_file(os.path.join(DATA, "overlap", "df2_overlap.geojson"))
|
||||
|
||||
result = overlay(df1, df2, how=how)
|
||||
|
||||
if how == "identity":
|
||||
raise pytest.skip()
|
||||
|
||||
expected = read_file(
|
||||
os.path.join(DATA, "overlap", "df1_df2_overlap-{0}.geojson".format(how))
|
||||
)
|
||||
|
||||
if how == "union":
|
||||
# the QGIS result has the last row duplicated, so removing this
|
||||
expected = expected.iloc[:-1]
|
||||
|
||||
# TODO needed adaptations to result
|
||||
result = result.reset_index(drop=True)
|
||||
if how == "union":
|
||||
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
|
||||
|
||||
assert_geodataframe_equal(
|
||||
result,
|
||||
expected,
|
||||
normalize=True,
|
||||
check_column_type=False,
|
||||
check_less_precise=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other_geometry", [False, True])
|
||||
def test_geometry_not_named_geometry(dfs, how, other_geometry):
|
||||
# Issue #306
|
||||
# Add points and flip names
|
||||
df1, df2 = dfs
|
||||
df3 = df1.copy()
|
||||
df3 = df3.rename(columns={"geometry": "polygons"})
|
||||
df3 = df3.set_geometry("polygons")
|
||||
if other_geometry:
|
||||
df3["geometry"] = df1.centroid.geometry
|
||||
assert df3.geometry.name == "polygons"
|
||||
|
||||
res1 = overlay(df1, df2, how=how)
|
||||
res2 = overlay(df3, df2, how=how)
|
||||
|
||||
assert df3.geometry.name == "polygons"
|
||||
|
||||
if how == "difference":
|
||||
# in case of 'difference', column names of left frame are preserved
|
||||
assert res2.geometry.name == "polygons"
|
||||
if other_geometry:
|
||||
assert "geometry" in res2.columns
|
||||
assert_geoseries_equal(
|
||||
res2["geometry"], df3["geometry"], check_series_type=False
|
||||
)
|
||||
res2 = res2.drop(["geometry"], axis=1)
|
||||
res2 = res2.rename(columns={"polygons": "geometry"})
|
||||
res2 = res2.set_geometry("geometry")
|
||||
|
||||
# TODO if existing column is overwritten -> geometry not last column
|
||||
if other_geometry and how == "intersection":
|
||||
res2 = res2.reindex(columns=res1.columns)
|
||||
assert_geodataframe_equal(res1, res2)
|
||||
|
||||
df4 = df2.copy()
|
||||
df4 = df4.rename(columns={"geometry": "geom"})
|
||||
df4 = df4.set_geometry("geom")
|
||||
if other_geometry:
|
||||
df4["geometry"] = df2.centroid.geometry
|
||||
assert df4.geometry.name == "geom"
|
||||
|
||||
res1 = overlay(df1, df2, how=how)
|
||||
res2 = overlay(df1, df4, how=how)
|
||||
assert_geodataframe_equal(res1, res2)
|
||||
|
||||
|
||||
def test_bad_how(dfs):
|
||||
df1, df2 = dfs
|
||||
with pytest.raises(ValueError):
|
||||
overlay(df1, df2, how="spandex")
|
||||
|
||||
|
||||
def test_duplicate_column_name(dfs, how):
|
||||
if how == "difference":
|
||||
pytest.skip("Difference uses columns from one df only.")
|
||||
df1, df2 = dfs
|
||||
df2r = df2.rename(columns={"col2": "col1"})
|
||||
res = overlay(df1, df2r, how=how)
|
||||
assert ("col1_1" in res.columns) and ("col1_2" in res.columns)
|
||||
|
||||
|
||||
def test_geoseries_warning(dfs):
|
||||
df1, df2 = dfs
|
||||
# Issue #305
|
||||
with pytest.raises(NotImplementedError):
|
||||
overlay(df1, df2.geometry, how="union")
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_preserve_crs(dfs, how):
|
||||
df1, df2 = dfs
|
||||
result = overlay(df1, df2, how=how)
|
||||
assert result.crs is None
|
||||
crs = "epsg:4326"
|
||||
df1.crs = crs
|
||||
df2.crs = crs
|
||||
result = overlay(df1, df2, how=how)
|
||||
assert result.crs == crs
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_crs_mismatch(dfs, how):
|
||||
df1, df2 = dfs
|
||||
df1.crs = 4326
|
||||
df2.crs = 3857
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
overlay(df1, df2, how=how)
|
||||
|
||||
|
||||
def test_empty_intersection(dfs):
|
||||
df1, df2 = dfs
|
||||
polys3 = GeoSeries(
|
||||
[
|
||||
Polygon([(-1, -1), (-3, -1), (-3, -3), (-1, -3)]),
|
||||
Polygon([(-3, -3), (-5, -3), (-5, -5), (-3, -5)]),
|
||||
]
|
||||
)
|
||||
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2]})
|
||||
expected = GeoDataFrame([], columns=["col1", "col3", "geometry"])
|
||||
result = overlay(df1, df3)
|
||||
assert_geodataframe_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
def test_correct_index(dfs):
|
||||
# GH883 - case where the index was not properly reset
|
||||
df1, df2 = dfs
|
||||
polys3 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2, 3]})
|
||||
i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)])
|
||||
i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)])
|
||||
expected = GeoDataFrame(
|
||||
[[1, 1, i1], [3, 2, i2]], columns=["col3", "col2", "geometry"]
|
||||
)
|
||||
result = overlay(df3, df2, keep_geom_type=True)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
|
||||
def test_warn_on_keep_geom_type(dfs):
|
||||
df1, df2 = dfs
|
||||
polys3 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df3 = GeoDataFrame({"geometry": polys3})
|
||||
|
||||
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
|
||||
overlay(df2, df3, keep_geom_type=None)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"geom_types", ["polys", "poly_line", "poly_point", "line_poly", "point_poly"]
|
||||
)
|
||||
def test_overlay_strict(how, keep_geom_type, geom_types):
|
||||
"""
|
||||
Test of mixed geometry types on input and output. Expected results initially
|
||||
generated using following snippet.
|
||||
|
||||
polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
|
||||
df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1})
|
||||
|
||||
polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
|
||||
df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]})
|
||||
|
||||
lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]),
|
||||
LineString([(0, 3), (6, 3)])])
|
||||
df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1})
|
||||
points1 = gpd.GeoSeries([Point((2, 2)),
|
||||
Point((3, 3))])
|
||||
df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1})
|
||||
|
||||
params=["union", "intersection", "difference", "symmetric_difference",
|
||||
"identity"]
|
||||
stricts = [True, False]
|
||||
|
||||
for p in params:
|
||||
for s in stricts:
|
||||
exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s)
|
||||
if not exp.empty:
|
||||
exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s),
|
||||
driver='GeoJSON')
|
||||
|
||||
for p in params:
|
||||
for s in stricts:
|
||||
exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s)
|
||||
if not exp.empty:
|
||||
exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s),
|
||||
driver='GeoJSON')
|
||||
for p in params:
|
||||
for s in stricts:
|
||||
exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s)
|
||||
if not exp.empty:
|
||||
exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s),
|
||||
driver='GeoJSON')
|
||||
"""
|
||||
polys1 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
|
||||
|
||||
polys2 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]})
|
||||
lines1 = GeoSeries(
|
||||
[LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]
|
||||
)
|
||||
df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1})
|
||||
points1 = GeoSeries([Point((2, 2)), Point((3, 3))])
|
||||
df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1})
|
||||
|
||||
if geom_types == "polys":
|
||||
result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
|
||||
elif geom_types == "poly_line":
|
||||
result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type)
|
||||
elif geom_types == "poly_point":
|
||||
result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type)
|
||||
elif geom_types == "line_poly":
|
||||
result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type)
|
||||
elif geom_types == "point_poly":
|
||||
result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type)
|
||||
|
||||
try:
|
||||
expected = read_file(
|
||||
os.path.join(
|
||||
DATA,
|
||||
"strict",
|
||||
"{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type),
|
||||
)
|
||||
)
|
||||
|
||||
# the order depends on the spatial index used
|
||||
# so we sort the resultant dataframes to get a consistent order
|
||||
# independently of the spatial index implementation
|
||||
assert all(expected.columns == result.columns), "Column name mismatch"
|
||||
cols = list(set(result.columns) - {"geometry"})
|
||||
expected = expected.sort_values(cols, axis=0).reset_index(drop=True)
|
||||
result = result.sort_values(cols, axis=0).reset_index(drop=True)
|
||||
|
||||
# some columns are all-NaN in the result, but get read as object dtype
|
||||
# column of None values in read_file
|
||||
for col in ["col1", "col3", "col4"]:
|
||||
if col in expected.columns and expected[col].isna().all():
|
||||
expected[col] = expected[col].astype("float64")
|
||||
|
||||
assert_geodataframe_equal(
|
||||
result,
|
||||
expected,
|
||||
normalize=True,
|
||||
check_column_type=False,
|
||||
check_less_precise=True,
|
||||
check_crs=False,
|
||||
check_dtype=False,
|
||||
)
|
||||
|
||||
except DriverError: # fiona >= 1.8
|
||||
assert result.empty
|
||||
|
||||
except OSError: # fiona < 1.8
|
||||
assert result.empty
|
||||
|
||||
except RuntimeError: # pyogrio.DataSourceError
|
||||
assert result.empty
|
||||
|
||||
|
||||
def test_mixed_geom_error():
|
||||
polys1 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
|
||||
mixed = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
dfmixed = GeoDataFrame({"col1": [1, 2], "geometry": mixed})
|
||||
with pytest.raises(NotImplementedError):
|
||||
overlay(df1, dfmixed, keep_geom_type=True)
|
||||
|
||||
|
||||
def test_keep_geom_type_error():
|
||||
gcol = GeoSeries(
|
||||
GeometryCollection(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
)
|
||||
dfcol = GeoDataFrame({"col1": [2], "geometry": gcol})
|
||||
polys1 = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
|
||||
with pytest.raises(TypeError):
|
||||
overlay(dfcol, df1, keep_geom_type=True)
|
||||
|
||||
|
||||
def test_keep_geom_type_geometry_collection():
|
||||
# GH 1581
|
||||
|
||||
df1 = read_file(os.path.join(DATA, "geom_type", "df1.geojson"))
|
||||
df2 = read_file(os.path.join(DATA, "geom_type", "df2.geojson"))
|
||||
|
||||
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
|
||||
intersection = overlay(df1, df2, keep_geom_type=None)
|
||||
assert len(intersection) == 1
|
||||
assert (intersection.geom_type == "Polygon").all()
|
||||
|
||||
intersection = overlay(df1, df2, keep_geom_type=True)
|
||||
assert len(intersection) == 1
|
||||
assert (intersection.geom_type == "Polygon").all()
|
||||
|
||||
intersection = overlay(df1, df2, keep_geom_type=False)
|
||||
assert len(intersection) == 1
|
||||
assert (intersection.geom_type == "GeometryCollection").all()
|
||||
|
||||
|
||||
def test_keep_geom_type_geometry_collection2():
|
||||
polys1 = [
|
||||
box(0, 0, 1, 1),
|
||||
box(1, 1, 3, 3).union(box(1, 3, 5, 5)),
|
||||
]
|
||||
|
||||
polys2 = [
|
||||
box(0, 0, 1, 1),
|
||||
box(3, 1, 4, 2).union(box(4, 1, 5, 4)),
|
||||
]
|
||||
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
|
||||
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
|
||||
|
||||
result1 = overlay(df1, df2, keep_geom_type=True)
|
||||
expected1 = GeoDataFrame(
|
||||
{
|
||||
"left": [0, 1],
|
||||
"right": [0, 1],
|
||||
"geometry": [box(0, 0, 1, 1), box(4, 3, 5, 4)],
|
||||
}
|
||||
)
|
||||
assert_geodataframe_equal(result1, expected1)
|
||||
|
||||
result1 = overlay(df1, df2, keep_geom_type=False)
|
||||
expected1 = GeoDataFrame(
|
||||
{
|
||||
"left": [0, 1, 1],
|
||||
"right": [0, 0, 1],
|
||||
"geometry": [
|
||||
box(0, 0, 1, 1),
|
||||
Point(1, 1),
|
||||
GeometryCollection([box(4, 3, 5, 4), LineString([(3, 1), (3, 2)])]),
|
||||
],
|
||||
}
|
||||
)
|
||||
assert_geodataframe_equal(result1, expected1)
|
||||
|
||||
|
||||
def test_keep_geom_type_geomcoll_different_types():
|
||||
polys1 = [box(0, 1, 1, 3), box(10, 10, 12, 12)]
|
||||
polys2 = [
|
||||
Polygon([(1, 0), (3, 0), (3, 3), (1, 3), (1, 2), (2, 2), (2, 1), (1, 1)]),
|
||||
box(11, 11, 13, 13),
|
||||
]
|
||||
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
|
||||
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
|
||||
result1 = overlay(df1, df2, keep_geom_type=True)
|
||||
expected1 = GeoDataFrame(
|
||||
{
|
||||
"left": [1],
|
||||
"right": [1],
|
||||
"geometry": [box(11, 11, 12, 12)],
|
||||
}
|
||||
)
|
||||
assert_geodataframe_equal(result1, expected1)
|
||||
|
||||
result2 = overlay(df1, df2, keep_geom_type=False)
|
||||
expected2 = GeoDataFrame(
|
||||
{
|
||||
"left": [0, 1],
|
||||
"right": [0, 1],
|
||||
"geometry": [
|
||||
GeometryCollection([LineString([(1, 2), (1, 3)]), Point(1, 1)]),
|
||||
box(11, 11, 12, 12),
|
||||
],
|
||||
}
|
||||
)
|
||||
assert_geodataframe_equal(result2, expected2)
|
||||
|
||||
|
||||
def test_keep_geom_type_geometry_collection_difference():
|
||||
# GH 2163
|
||||
|
||||
polys1 = [
|
||||
box(0, 0, 1, 1),
|
||||
box(1, 1, 2, 2),
|
||||
]
|
||||
|
||||
# the tiny sliver in the second geometry may be converted to a
|
||||
# linestring during the overlay process due to floating point errors
|
||||
# on some platforms
|
||||
polys2 = [
|
||||
box(0, 0, 1, 1),
|
||||
box(1, 1, 2, 3).union(box(2, 2, 3, 2.00000000000000001)),
|
||||
]
|
||||
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
|
||||
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
|
||||
|
||||
result1 = overlay(df2, df1, keep_geom_type=True, how="difference")
|
||||
expected1 = GeoDataFrame(
|
||||
{
|
||||
"right": [1],
|
||||
"geometry": [box(1, 2, 2, 3)],
|
||||
},
|
||||
)
|
||||
|
||||
assert_geodataframe_equal(result1, expected1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("should_make_valid", [True, False])
|
||||
def test_overlap_make_valid(should_make_valid):
|
||||
bowtie = Polygon([(1, 1), (9, 9), (9, 1), (1, 9), (1, 1)])
|
||||
assert not bowtie.is_valid
|
||||
fixed_bowtie = make_valid(bowtie)
|
||||
assert fixed_bowtie.is_valid
|
||||
|
||||
df1 = GeoDataFrame({"col1": ["region"], "geometry": GeoSeries([box(0, 0, 10, 10)])})
|
||||
df_bowtie = GeoDataFrame(
|
||||
{"col1": ["invalid", "valid"], "geometry": GeoSeries([bowtie, fixed_bowtie])}
|
||||
)
|
||||
|
||||
if should_make_valid:
|
||||
df_overlay_bowtie = overlay(df1, df_bowtie, make_valid=should_make_valid)
|
||||
assert df_overlay_bowtie.at[0, "geometry"].equals(fixed_bowtie)
|
||||
assert df_overlay_bowtie.at[1, "geometry"].equals(fixed_bowtie)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="1 invalid input geometries"):
|
||||
overlay(df1, df_bowtie, make_valid=should_make_valid)
|
||||
|
||||
|
||||
def test_empty_overlay_return_non_duplicated_columns(nybb_filename):
|
||||
nybb = geopandas.read_file(nybb_filename)
|
||||
nybb2 = nybb.copy()
|
||||
nybb2.geometry = nybb2.translate(20000000)
|
||||
|
||||
result = geopandas.overlay(nybb, nybb2)
|
||||
|
||||
expected = GeoDataFrame(
|
||||
columns=[
|
||||
"BoroCode_1",
|
||||
"BoroName_1",
|
||||
"Shape_Leng_1",
|
||||
"Shape_Area_1",
|
||||
"BoroCode_2",
|
||||
"BoroName_2",
|
||||
"Shape_Leng_2",
|
||||
"Shape_Area_2",
|
||||
"geometry",
|
||||
],
|
||||
crs=nybb.crs,
|
||||
)
|
||||
assert_geodataframe_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
def test_non_overlapping(how):
|
||||
p1 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
|
||||
p2 = Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])
|
||||
df1 = GeoDataFrame({"col1": [1], "geometry": [p1]})
|
||||
df2 = GeoDataFrame({"col2": [2], "geometry": [p2]})
|
||||
result = overlay(df1, df2, how=how)
|
||||
|
||||
if how == "intersection":
|
||||
if PANDAS_GE_20:
|
||||
index = None
|
||||
else:
|
||||
index = pd.Index([], dtype="object")
|
||||
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"col1": np.array([], dtype="int64"),
|
||||
"col2": np.array([], dtype="int64"),
|
||||
"geometry": [],
|
||||
},
|
||||
index=index,
|
||||
)
|
||||
elif how == "union":
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"col1": [1, np.nan],
|
||||
"col2": [np.nan, 2],
|
||||
"geometry": [p1, p2],
|
||||
}
|
||||
)
|
||||
elif how == "identity":
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"col1": [1.0],
|
||||
"col2": [np.nan],
|
||||
"geometry": [p1],
|
||||
}
|
||||
)
|
||||
elif how == "symmetric_difference":
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"col1": [1, np.nan],
|
||||
"col2": [np.nan, 2],
|
||||
"geometry": [p1, p2],
|
||||
}
|
||||
)
|
||||
elif how == "difference":
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"col1": [1],
|
||||
"geometry": [p1],
|
||||
}
|
||||
)
|
||||
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
|
||||
def test_no_intersection():
|
||||
# overlapping bounds but non-overlapping geometries
|
||||
gs = GeoSeries([Point(x, x).buffer(0.1) for x in range(3)])
|
||||
gdf1 = GeoDataFrame({"foo": ["a", "b", "c"]}, geometry=gs)
|
||||
gdf2 = GeoDataFrame({"bar": ["1", "3", "5"]}, geometry=gs.translate(1))
|
||||
|
||||
expected = GeoDataFrame(columns=["foo", "bar", "geometry"])
|
||||
result = overlay(gdf1, gdf2, how="intersection")
|
||||
assert_geodataframe_equal(result, expected, check_index_type=False)
|
||||
|
||||
|
||||
class TestOverlayWikiExample:
|
||||
def setup_method(self):
|
||||
self.layer_a = GeoDataFrame(geometry=[box(0, 2, 6, 6)])
|
||||
|
||||
self.layer_b = GeoDataFrame(geometry=[box(4, 0, 10, 4)])
|
||||
|
||||
self.intersection = GeoDataFrame(geometry=[box(4, 2, 6, 4)])
|
||||
|
||||
self.union = GeoDataFrame(
|
||||
geometry=[
|
||||
box(4, 2, 6, 4),
|
||||
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
|
||||
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
|
||||
]
|
||||
)
|
||||
|
||||
self.a_difference_b = GeoDataFrame(
|
||||
geometry=[Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)])]
|
||||
)
|
||||
|
||||
self.b_difference_a = GeoDataFrame(
|
||||
geometry=[
|
||||
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)])
|
||||
]
|
||||
)
|
||||
|
||||
self.symmetric_difference = GeoDataFrame(
|
||||
geometry=[
|
||||
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
|
||||
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
|
||||
]
|
||||
)
|
||||
|
||||
self.a_identity_b = GeoDataFrame(
|
||||
geometry=[
|
||||
box(4, 2, 6, 4),
|
||||
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
|
||||
]
|
||||
)
|
||||
|
||||
self.b_identity_a = GeoDataFrame(
|
||||
geometry=[
|
||||
box(4, 2, 6, 4),
|
||||
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
|
||||
]
|
||||
)
|
||||
|
||||
def test_intersection(self):
|
||||
df_result = overlay(self.layer_a, self.layer_b, how="intersection")
|
||||
assert df_result.geom_equals(self.intersection).all()
|
||||
|
||||
def test_union(self):
|
||||
df_result = overlay(self.layer_a, self.layer_b, how="union")
|
||||
assert_geodataframe_equal(df_result, self.union)
|
||||
|
||||
def test_a_difference_b(self):
|
||||
df_result = overlay(self.layer_a, self.layer_b, how="difference")
|
||||
assert_geodataframe_equal(df_result, self.a_difference_b)
|
||||
|
||||
def test_b_difference_a(self):
|
||||
df_result = overlay(self.layer_b, self.layer_a, how="difference")
|
||||
assert_geodataframe_equal(df_result, self.b_difference_a)
|
||||
|
||||
def test_symmetric_difference(self):
|
||||
df_result = overlay(self.layer_a, self.layer_b, how="symmetric_difference")
|
||||
assert_geodataframe_equal(df_result, self.symmetric_difference)
|
||||
|
||||
def test_a_identity_b(self):
|
||||
df_result = overlay(self.layer_a, self.layer_b, how="identity")
|
||||
assert_geodataframe_equal(df_result, self.a_identity_b)
|
||||
|
||||
def test_b_identity_a(self):
|
||||
df_result = overlay(self.layer_b, self.layer_a, how="identity")
|
||||
assert_geodataframe_equal(df_result, self.b_identity_a)
|
||||
@@ -0,0 +1,890 @@
|
||||
import os
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import GeometryCollection, LinearRing, LineString, Point
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import from_shapely
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from numpy.testing import assert_array_equal
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s():
|
||||
return GeoSeries([Point(x, y) for x, y in zip(range(3), range(3))])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return GeoDataFrame(
|
||||
{
|
||||
"geometry": [Point(x, x) for x in range(3)],
|
||||
"value1": np.arange(3, dtype="int64"),
|
||||
"value2": np.array([1, 2, 1], dtype="int64"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_repr(s, df):
|
||||
assert "POINT" in repr(s)
|
||||
assert "POINT" in repr(df)
|
||||
assert "POINT" in df._repr_html_()
|
||||
|
||||
|
||||
@pytest.mark.skipif(shapely.geos_version < (3, 9, 0), reason="requires GEOS>=3.9")
|
||||
def test_repr_boxed_display_precision():
|
||||
# geographic coordinates
|
||||
p1 = Point(10.123456789, 50.123456789)
|
||||
p2 = Point(4.123456789, 20.123456789)
|
||||
s1 = GeoSeries([p1, p2, None])
|
||||
assert "POINT (10.12346 50.12346)" in repr(s1)
|
||||
|
||||
# geographic coordinates 4326
|
||||
s3 = GeoSeries([p1, p2], crs=4326)
|
||||
assert "POINT (10.12346 50.12346)" in repr(s3)
|
||||
|
||||
# projected coordinates
|
||||
p1 = Point(3000.123456789, 3000.123456789)
|
||||
p2 = Point(4000.123456789, 4000.123456789)
|
||||
s2 = GeoSeries([p1, p2, None])
|
||||
assert "POINT (3000.123 3000.123)" in repr(s2)
|
||||
|
||||
# projected geographic coordinate
|
||||
s4 = GeoSeries([p1, p2], crs=3857)
|
||||
assert "POINT (3000.123 3000.123)" in repr(s4)
|
||||
|
||||
geopandas.options.display_precision = 1
|
||||
assert "POINT (10.1 50.1)" in repr(s1)
|
||||
|
||||
geopandas.options.display_precision = 9
|
||||
assert "POINT (10.123456789 50.123456789)" in repr(s1)
|
||||
|
||||
|
||||
def test_repr_all_missing():
|
||||
# https://github.com/geopandas/geopandas/issues/1195
|
||||
s = GeoSeries([None, None, None])
|
||||
assert "None" in repr(s)
|
||||
df = GeoDataFrame({"a": [1, 2, 3], "geometry": s})
|
||||
assert "None" in repr(df)
|
||||
assert "geometry" in df._repr_html_()
|
||||
|
||||
|
||||
def test_repr_empty():
|
||||
# https://github.com/geopandas/geopandas/issues/1195
|
||||
s = GeoSeries([])
|
||||
assert repr(s) == "GeoSeries([], dtype: geometry)"
|
||||
df = GeoDataFrame({"a": [], "geometry": s})
|
||||
assert "Empty GeoDataFrame" in repr(df)
|
||||
# https://github.com/geopandas/geopandas/issues/1184
|
||||
assert "geometry" in df._repr_html_()
|
||||
|
||||
|
||||
def test_repr_linearring():
|
||||
# https://github.com/geopandas/geopandas/pull/2689
|
||||
# specifically, checking internal shapely/wkt/wkb conversions
|
||||
# preserve LinearRing
|
||||
s = GeoSeries([LinearRing([(0, 0), (1, 1), (1, -1)])])
|
||||
assert "LINEARRING" in str(s.iloc[0]) # shapely scalar repr
|
||||
assert "LINEARRING" in str(s) # GeoSeries repr
|
||||
|
||||
# check something coercible to linearring is not converted
|
||||
s2 = GeoSeries(
|
||||
[
|
||||
LineString([(0, 0), (1, 1), (1, -1)]),
|
||||
LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
|
||||
]
|
||||
)
|
||||
assert "LINEARRING" not in str(s2)
|
||||
|
||||
|
||||
def test_indexing(s, df):
|
||||
# accessing scalar from the geometry (column)
|
||||
exp = Point(1, 1)
|
||||
assert s[1] == exp
|
||||
assert s.loc[1] == exp
|
||||
assert s.iloc[1] == exp
|
||||
assert df.loc[1, "geometry"] == exp
|
||||
assert df.iloc[1, 0] == exp
|
||||
|
||||
# multiple values
|
||||
exp = GeoSeries([Point(2, 2), Point(0, 0)], index=[2, 0])
|
||||
assert_geoseries_equal(s.loc[[2, 0]], exp)
|
||||
assert_geoseries_equal(s.iloc[[2, 0]], exp)
|
||||
assert_geoseries_equal(s.reindex([2, 0]), exp)
|
||||
assert_geoseries_equal(df.loc[[2, 0], "geometry"], exp)
|
||||
# TODO here iloc does not return a GeoSeries
|
||||
assert_series_equal(
|
||||
df.iloc[[2, 0], 0], exp, check_series_type=False, check_names=False
|
||||
)
|
||||
|
||||
# boolean indexing
|
||||
exp = GeoSeries([Point(0, 0), Point(2, 2)], index=[0, 2])
|
||||
mask = np.array([True, False, True])
|
||||
assert_geoseries_equal(s[mask], exp)
|
||||
assert_geoseries_equal(s.loc[mask], exp)
|
||||
assert_geoseries_equal(df[mask]["geometry"], exp)
|
||||
assert_geoseries_equal(df.loc[mask, "geometry"], exp)
|
||||
|
||||
# slices
|
||||
s.index = [1, 2, 3]
|
||||
exp = GeoSeries([Point(1, 1), Point(2, 2)], index=[2, 3])
|
||||
assert_series_equal(s[1:], exp)
|
||||
assert_series_equal(s.iloc[1:], exp)
|
||||
assert_series_equal(s.loc[2:], exp)
|
||||
|
||||
|
||||
def test_reindex(s, df):
|
||||
# GeoSeries reindex
|
||||
res = s.reindex([1, 2, 3])
|
||||
exp = GeoSeries([Point(1, 1), Point(2, 2), None], index=[1, 2, 3])
|
||||
assert_geoseries_equal(res, exp)
|
||||
|
||||
# GeoDataFrame reindex index
|
||||
res = df.reindex(index=[1, 2, 3])
|
||||
assert_geoseries_equal(res.geometry, exp)
|
||||
|
||||
# GeoDataFrame reindex columns
|
||||
res = df.reindex(columns=["value1", "geometry"])
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
assert isinstance(res.geometry, GeoSeries)
|
||||
assert_frame_equal(res, df[["value1", "geometry"]])
|
||||
|
||||
res = df.reindex(columns=["value1", "value2"])
|
||||
assert type(res) == pd.DataFrame
|
||||
assert_frame_equal(res, df[["value1", "value2"]])
|
||||
|
||||
|
||||
def test_take(s, df):
|
||||
inds = np.array([0, 2])
|
||||
|
||||
# GeoSeries take
|
||||
result = s.take(inds)
|
||||
expected = s.iloc[[0, 2]]
|
||||
assert isinstance(result, GeoSeries)
|
||||
assert_geoseries_equal(result, expected)
|
||||
|
||||
# GeoDataFrame take axis 0
|
||||
result = df.take(inds, axis=0)
|
||||
expected = df.iloc[[0, 2], :]
|
||||
assert isinstance(result, GeoDataFrame)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
# GeoDataFrame take axis 1
|
||||
df = df.reindex(columns=["value1", "value2", "geometry"]) # ensure consistent order
|
||||
result = df.take(inds, axis=1)
|
||||
expected = df[["value1", "geometry"]]
|
||||
assert isinstance(result, GeoDataFrame)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
result = df.take(np.array([0, 1]), axis=1)
|
||||
expected = df[["value1", "value2"]]
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_empty(s, df):
|
||||
# ensure that index type is preserved in an empty take
|
||||
# https://github.com/geopandas/geopandas/issues/1190
|
||||
inds = np.array([], dtype="int64")
|
||||
|
||||
# use non-default index
|
||||
df.index = pd.date_range("2012-01-01", periods=len(df))
|
||||
|
||||
result = df.take(inds, axis=0)
|
||||
assert isinstance(result, GeoDataFrame)
|
||||
assert result.shape == (0, 3)
|
||||
assert isinstance(result.index, pd.DatetimeIndex)
|
||||
|
||||
# the original bug report was an empty boolean mask
|
||||
for result in [df.loc[df["value1"] > 100], df[df["value1"] > 100]]:
|
||||
assert isinstance(result, GeoDataFrame)
|
||||
assert result.shape == (0, 3)
|
||||
assert isinstance(result.index, pd.DatetimeIndex)
|
||||
|
||||
|
||||
def test_assignment(s, df):
|
||||
exp = GeoSeries([Point(10, 10), Point(1, 1), Point(2, 2)])
|
||||
|
||||
s2 = s.copy()
|
||||
s2[0] = Point(10, 10)
|
||||
assert_geoseries_equal(s2, exp)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.loc[0] = Point(10, 10)
|
||||
assert_geoseries_equal(s2, exp)
|
||||
|
||||
s2 = s.copy()
|
||||
s2.iloc[0] = Point(10, 10)
|
||||
assert_geoseries_equal(s2, exp)
|
||||
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "geometry"] = Point(10, 10)
|
||||
assert_geoseries_equal(df2["geometry"], exp)
|
||||
|
||||
df2 = df.copy()
|
||||
df2.iloc[0, 0] = Point(10, 10)
|
||||
assert_geoseries_equal(df2["geometry"], exp)
|
||||
|
||||
|
||||
def test_assign(df):
|
||||
res = df.assign(new=1)
|
||||
exp = df.copy()
|
||||
exp["new"] = 1
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
def test_astype(s, df):
|
||||
# check geoseries functionality
|
||||
with pytest.raises(TypeError):
|
||||
s.astype(int)
|
||||
|
||||
assert s.astype(str)[0] == "POINT (0 0)"
|
||||
|
||||
res = s.astype(object)
|
||||
if not (
|
||||
(Version(pd.__version__) == Version("2.1.0"))
|
||||
or (Version(pd.__version__) == Version("2.1.1"))
|
||||
):
|
||||
# https://github.com/geopandas/geopandas/issues/2948 - bug in pandas 2.1.0
|
||||
assert isinstance(res, pd.Series) and not isinstance(res, GeoSeries)
|
||||
assert res.dtype == object
|
||||
|
||||
df = df.rename_geometry("geom_list")
|
||||
|
||||
# check whether returned object is a geodataframe
|
||||
res = df.astype({"value1": float})
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
|
||||
# check whether returned object is a dataframe
|
||||
res = df.astype(str)
|
||||
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
|
||||
|
||||
res = df.astype({"geom_list": str})
|
||||
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
|
||||
|
||||
res = df.astype(object)
|
||||
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
|
||||
assert res["geom_list"].dtype == object
|
||||
|
||||
|
||||
def test_astype_invalid_geodataframe():
|
||||
# https://github.com/geopandas/geopandas/issues/1144
|
||||
# a GeoDataFrame without geometry column should not error in astype
|
||||
df = GeoDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
res = df.astype(object)
|
||||
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
|
||||
assert res["a"].dtype == object
|
||||
|
||||
|
||||
def test_convert_dtypes(df):
|
||||
# https://github.com/geopandas/geopandas/issues/1870
|
||||
|
||||
# Test geometry col is first col, first, geom_col_name=geometry
|
||||
# (order is important in concat, used internally)
|
||||
res1 = df.convert_dtypes()
|
||||
|
||||
expected1 = GeoDataFrame(
|
||||
pd.DataFrame(df).convert_dtypes(), crs=df.crs, geometry=df.geometry.name
|
||||
)
|
||||
|
||||
# Checking type and metadata are right
|
||||
assert_geodataframe_equal(expected1, res1)
|
||||
|
||||
# Test geom last, geom_col_name=geometry
|
||||
res2 = df[["value1", "value2", "geometry"]].convert_dtypes()
|
||||
assert_geodataframe_equal(expected1[["value1", "value2", "geometry"]], res2)
|
||||
|
||||
if compat.HAS_PYPROJ:
|
||||
# Test again with crs set and custom geom col name
|
||||
df2 = df.set_crs(epsg=4326).rename_geometry("points")
|
||||
expected2 = GeoDataFrame(
|
||||
pd.DataFrame(df2).convert_dtypes(), crs=df2.crs, geometry=df2.geometry.name
|
||||
)
|
||||
res3 = df2.convert_dtypes()
|
||||
assert_geodataframe_equal(expected2, res3)
|
||||
|
||||
# Test geom last, geom_col=geometry
|
||||
res4 = df2[["value1", "value2", "points"]].convert_dtypes()
|
||||
assert_geodataframe_equal(expected2[["value1", "value2", "points"]], res4)
|
||||
|
||||
|
||||
def test_to_csv(df):
|
||||
exp = (
|
||||
"geometry,value1,value2\nPOINT (0 0),0,1\nPOINT (1 1),1,2\nPOINT (2 2),2,1\n"
|
||||
).replace("\n", os.linesep)
|
||||
assert df.to_csv(index=False) == exp
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Dropping of nuisance columns in DataFrame reductions"
|
||||
)
|
||||
def test_numerical_operations(s, df):
|
||||
# df methods ignore the geometry column
|
||||
exp = pd.Series([3, 4], index=["value1", "value2"])
|
||||
if not compat.PANDAS_GE_20:
|
||||
res = df.sum()
|
||||
else:
|
||||
res = df.sum(numeric_only=True)
|
||||
assert_series_equal(res, exp)
|
||||
|
||||
# series methods raise error (not supported for geometry)
|
||||
with pytest.raises(TypeError):
|
||||
s.sum()
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s.max()
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
# TODO: remove ValueError after pandas-dev/pandas#32749
|
||||
s.idxmax()
|
||||
|
||||
# numerical ops raise an error
|
||||
with pytest.raises(TypeError):
|
||||
df + 1
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
s + 1
|
||||
|
||||
# boolean comparisons work
|
||||
res = df == 100
|
||||
exp = pd.DataFrame(False, index=df.index, columns=df.columns)
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
def test_where(s):
|
||||
res = s.where(np.array([True, False, True]))
|
||||
exp = GeoSeries([Point(0, 0), None, Point(2, 2)])
|
||||
assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_select_dtypes(df):
|
||||
res = df.select_dtypes(include=[np.number])
|
||||
exp = df[["value1", "value2"]]
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
def test_equals(s, df):
|
||||
# https://github.com/geopandas/geopandas/issues/1420
|
||||
s2 = s.copy()
|
||||
assert s.equals(s2) is True
|
||||
s2.iloc[0] = None
|
||||
assert s.equals(s2) is False
|
||||
|
||||
df2 = df.copy()
|
||||
assert df.equals(df2) is True
|
||||
df2.loc[0, "geometry"] = Point(10, 10)
|
||||
assert df.equals(df2) is False
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "value1"] = 10
|
||||
assert df.equals(df2) is False
|
||||
|
||||
|
||||
# Missing values
|
||||
|
||||
|
||||
def test_fillna_scalar(s, df):
|
||||
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
|
||||
|
||||
res = s2.fillna(Point(1, 1))
|
||||
assert_geoseries_equal(res, s)
|
||||
|
||||
# allow np.nan although this does not change anything
|
||||
# https://github.com/geopandas/geopandas/issues/1149
|
||||
res = s2.fillna(np.nan)
|
||||
assert_geoseries_equal(res, s2)
|
||||
|
||||
# raise exception if trying to fill missing geometry w/ non-geometry
|
||||
df2 = df.copy()
|
||||
df2["geometry"] = s2
|
||||
res = df2.fillna(Point(1, 1))
|
||||
assert_geodataframe_equal(res, df)
|
||||
with pytest.raises((NotImplementedError, TypeError)): # GH2351
|
||||
df2.fillna(0)
|
||||
|
||||
# allow non-geometry fill value if there are no missing values
|
||||
# https://github.com/geopandas/geopandas/issues/1149
|
||||
df3 = df.copy()
|
||||
df3.loc[0, "value1"] = np.nan
|
||||
res = df3.fillna(0)
|
||||
assert_geodataframe_equal(res.astype({"value1": "int64"}), df)
|
||||
|
||||
|
||||
def test_fillna_series(s):
|
||||
# fill na with another GeoSeries
|
||||
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
|
||||
|
||||
# check na filled with the same index
|
||||
res = s2.fillna(GeoSeries([Point(1, 1)] * 3))
|
||||
assert_geoseries_equal(res, s)
|
||||
|
||||
# check na filled based on index, not position
|
||||
index = [3, 2, 1]
|
||||
res = s2.fillna(GeoSeries([Point(i, i) for i in index], index=index))
|
||||
assert_geoseries_equal(res, s)
|
||||
|
||||
# check na filled but the input length is different
|
||||
res = s2.fillna(GeoSeries([Point(1, 1)], index=[1]))
|
||||
assert_geoseries_equal(res, s)
|
||||
|
||||
# check na filled but the inputting index is different
|
||||
res = s2.fillna(GeoSeries([Point(1, 1)], index=[9]))
|
||||
assert_geoseries_equal(res, s2)
|
||||
|
||||
|
||||
def test_fillna_inplace(s):
|
||||
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
|
||||
arr = s2.array
|
||||
s2.fillna(Point(1, 1), inplace=True)
|
||||
assert_geoseries_equal(s2, s)
|
||||
if compat.PANDAS_GE_21:
|
||||
# starting from pandas 2.1, there is support to do this actually inplace
|
||||
assert s2.array is arr
|
||||
|
||||
|
||||
def test_dropna():
|
||||
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
|
||||
res = s2.dropna()
|
||||
exp = s2.loc[[0, 2]]
|
||||
assert_geoseries_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("NA", [None, np.nan])
|
||||
def test_isna(NA):
|
||||
s2 = GeoSeries([Point(0, 0), NA, Point(2, 2)], index=[2, 4, 5], name="tt")
|
||||
exp = pd.Series([False, True, False], index=[2, 4, 5], name="tt")
|
||||
res = s2.isnull()
|
||||
assert type(res) == pd.Series
|
||||
assert_series_equal(res, exp)
|
||||
res = s2.isna()
|
||||
assert_series_equal(res, exp)
|
||||
res = s2.notnull()
|
||||
assert_series_equal(res, ~exp)
|
||||
res = s2.notna()
|
||||
assert_series_equal(res, ~exp)
|
||||
|
||||
|
||||
# Any / all
|
||||
|
||||
|
||||
def test_any_all():
|
||||
empty = GeometryCollection([])
|
||||
s = GeoSeries([empty, Point(1, 1)])
|
||||
assert not s.all()
|
||||
assert s.any()
|
||||
|
||||
s = GeoSeries([Point(1, 1), Point(1, 1)])
|
||||
assert s.all()
|
||||
assert s.any()
|
||||
|
||||
s = GeoSeries([empty, empty])
|
||||
assert not s.all()
|
||||
assert not s.any()
|
||||
|
||||
|
||||
# Groupby / algos
|
||||
|
||||
|
||||
def test_sort_values():
|
||||
s = GeoSeries([Point(0, 0), Point(2, 2), Point(0, 2)])
|
||||
res = s.sort_values()
|
||||
assert res.index.tolist() == [0, 2, 1]
|
||||
res2 = s.sort_values(ascending=False)
|
||||
assert res2.index.tolist() == [1, 2, 0]
|
||||
|
||||
# empty geoseries
|
||||
assert_geoseries_equal(s.iloc[:0].sort_values(), s.iloc[:0])
|
||||
|
||||
|
||||
def test_sort_values_empty_missing():
|
||||
s = GeoSeries([Point(0, 0), None, Point(), Point(1, 1)])
|
||||
# default: NA sorts last, empty first
|
||||
res = s.sort_values()
|
||||
assert res.index.tolist() == [2, 0, 3, 1]
|
||||
|
||||
# descending: NA sorts last, empty last
|
||||
res = s.sort_values(ascending=False)
|
||||
assert res.index.tolist() == [3, 0, 2, 1]
|
||||
|
||||
# NAs first, empty first after NAs
|
||||
res = s.sort_values(na_position="first")
|
||||
assert res.index.tolist() == [1, 2, 0, 3]
|
||||
|
||||
# NAs first, descending with empyt last
|
||||
res = s.sort_values(ascending=False, na_position="first")
|
||||
assert res.index.tolist() == [1, 3, 0, 2]
|
||||
|
||||
# all missing / empty
|
||||
s = GeoSeries([None, None, None])
|
||||
res = s.sort_values()
|
||||
assert res.index.tolist() == [0, 1, 2]
|
||||
|
||||
s = GeoSeries([Point(), Point(), Point()])
|
||||
res = s.sort_values()
|
||||
assert res.index.tolist() == [0, 1, 2]
|
||||
|
||||
s = GeoSeries([Point(), None, Point()])
|
||||
res = s.sort_values()
|
||||
assert res.index.tolist() == [0, 2, 1]
|
||||
|
||||
|
||||
def test_unique():
|
||||
s = GeoSeries([Point(0, 0), Point(0, 0), Point(2, 2)])
|
||||
exp = from_shapely([Point(0, 0), Point(2, 2)])
|
||||
# TODO should have specialized GeometryArray assert method
|
||||
assert_array_equal(s.unique(), exp)
|
||||
|
||||
|
||||
def pd14_compat_index(index):
|
||||
if compat.PANDAS_GE_14:
|
||||
return from_shapely(index)
|
||||
else:
|
||||
return index
|
||||
|
||||
|
||||
def test_value_counts():
|
||||
# each object is considered unique
|
||||
s = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)])
|
||||
res = s.value_counts()
|
||||
if compat.PANDAS_GE_20:
|
||||
name = "count"
|
||||
else:
|
||||
name = None
|
||||
exp = pd.Series(
|
||||
[2, 1], index=pd14_compat_index([Point(0, 0), Point(1, 1)]), name=name
|
||||
)
|
||||
assert_series_equal(res, exp)
|
||||
# Check crs doesn't make a difference - note it is not kept in output index anyway
|
||||
s2 = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)], crs="EPSG:4326")
|
||||
res2 = s2.value_counts()
|
||||
assert_series_equal(res2, exp)
|
||||
if compat.PANDAS_GE_14:
|
||||
# TODO should/ can we fix CRS being lost
|
||||
assert s2.value_counts().index.array.crs is None
|
||||
|
||||
# check mixed geometry
|
||||
s3 = GeoSeries([Point(0, 0), LineString([[1, 1], [2, 2]]), Point(0, 0)])
|
||||
res3 = s3.value_counts()
|
||||
index = pd14_compat_index([Point(0, 0), LineString([[1, 1], [2, 2]])])
|
||||
exp3 = pd.Series([2, 1], index=index, name=name)
|
||||
assert_series_equal(res3, exp3)
|
||||
|
||||
# check None is handled
|
||||
s4 = GeoSeries([Point(0, 0), None, Point(0, 0)])
|
||||
res4 = s4.value_counts(dropna=True)
|
||||
exp4_dropna = pd.Series([2], index=pd14_compat_index([Point(0, 0)]), name=name)
|
||||
assert_series_equal(res4, exp4_dropna)
|
||||
exp4_keepna = pd.Series(
|
||||
[2, 1], index=pd14_compat_index([Point(0, 0), None]), name=name
|
||||
)
|
||||
res4_keepna = s4.value_counts(dropna=False)
|
||||
assert_series_equal(res4_keepna, exp4_keepna)
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False)
|
||||
def test_drop_duplicates_series():
|
||||
# duplicated does not yet use EA machinery
|
||||
# (https://github.com/pandas-dev/pandas/issues/27264)
|
||||
# but relies on unstable hashing of unhashable objects in numpy array
|
||||
# giving flaky test (https://github.com/pandas-dev/pandas/issues/27035)
|
||||
dups = GeoSeries([Point(0, 0), Point(0, 0)])
|
||||
dropped = dups.drop_duplicates()
|
||||
assert len(dropped) == 1
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False)
|
||||
def test_drop_duplicates_frame():
|
||||
# duplicated does not yet use EA machinery, see above
|
||||
gdf_len = 3
|
||||
dup_gdf = GeoDataFrame(
|
||||
{"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)}
|
||||
)
|
||||
dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
|
||||
assert len(dropped_geometry) == 1
|
||||
dropped_all = dup_gdf.drop_duplicates()
|
||||
assert len(dropped_all) == gdf_len
|
||||
|
||||
|
||||
def test_groupby(df):
|
||||
# counts work fine
|
||||
res = df.groupby("value2").count()
|
||||
exp = pd.DataFrame(
|
||||
{"geometry": [2, 1], "value1": [2, 1], "value2": [1, 2]}
|
||||
).set_index("value2")
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
# reductions ignore geometry column
|
||||
if not compat.PANDAS_GE_20:
|
||||
res = df.groupby("value2").sum()
|
||||
else:
|
||||
res = df.groupby("value2").sum(numeric_only=True)
|
||||
exp = pd.DataFrame({"value1": [2, 1], "value2": [1, 2]}, dtype="int64").set_index(
|
||||
"value2"
|
||||
)
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
# applying on the geometry column
|
||||
res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all())
|
||||
|
||||
exp = GeoSeries(
|
||||
[shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)],
|
||||
index=pd.Index([1, 2], name="value2"),
|
||||
name="geometry",
|
||||
)
|
||||
assert_series_equal(res, exp)
|
||||
|
||||
# apply on geometry column not resulting in new geometry
|
||||
res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all().area)
|
||||
exp = pd.Series([0.0, 0.0], index=pd.Index([1, 2], name="value2"), name="geometry")
|
||||
|
||||
assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_groupby_groups(df):
|
||||
g = df.groupby("value2")
|
||||
res = g.get_group(1)
|
||||
assert isinstance(res, GeoDataFrame)
|
||||
exp = df.loc[[0, 2]]
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
|
||||
@pytest.mark.parametrize("geometry_name", ["geometry", "geom"])
|
||||
def test_groupby_metadata(crs, geometry_name):
|
||||
if crs and not compat.HAS_PYPROJ:
|
||||
pytest.skip("requires pyproj")
|
||||
# https://github.com/geopandas/geopandas/issues/2294
|
||||
df = GeoDataFrame(
|
||||
{
|
||||
geometry_name: [Point(0, 0), Point(1, 1), Point(0, 0)],
|
||||
"value1": np.arange(3, dtype="int64"),
|
||||
"value2": np.array([1, 2, 1], dtype="int64"),
|
||||
},
|
||||
crs=crs,
|
||||
geometry=geometry_name,
|
||||
)
|
||||
|
||||
kwargs = {}
|
||||
if compat.PANDAS_GE_22:
|
||||
# pandas is deprecating that the group key is present as column in the
|
||||
# dataframe passed to `func`. To suppress this warning, it introduced
|
||||
# a new include_groups keyword
|
||||
kwargs = dict(include_groups=False)
|
||||
|
||||
# dummy test asserting we can access the crs
|
||||
def func(group):
|
||||
assert isinstance(group, GeoDataFrame)
|
||||
assert group.crs == crs
|
||||
|
||||
df.groupby("value2").apply(func, **kwargs)
|
||||
# selecting the non-group columns -> no need to pass the keyword
|
||||
if (
|
||||
compat.PANDAS_GE_22
|
||||
or (compat.PANDAS_GE_20 and geometry_name == "geometry")
|
||||
or not compat.PANDAS_GE_20
|
||||
):
|
||||
df.groupby("value2")[[geometry_name, "value1"]].apply(func)
|
||||
else:
|
||||
# https://github.com/geopandas/geopandas/pull/2966#issuecomment-1878816712
|
||||
# with pandas 2.0 and 2.1 with geom col != geometry this is failing
|
||||
with pytest.raises(AttributeError):
|
||||
df.groupby("value2")[[geometry_name, "value1"]].apply(func)
|
||||
|
||||
# actual test with functionality
|
||||
res = df.groupby("value2").apply(
|
||||
lambda x: geopandas.sjoin(x, x[[geometry_name, "value1"]], how="inner"),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
expected = (
|
||||
df.take([0, 0, 2, 2, 1])
|
||||
.set_index("value2", drop=compat.PANDAS_GE_22, append=True)
|
||||
.swaplevel()
|
||||
.rename(columns={"value1": "value1_left"})
|
||||
.assign(value1_right=[0, 2, 0, 2, 1])
|
||||
)
|
||||
assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
|
||||
|
||||
|
||||
def test_apply(s):
|
||||
# function that returns geometry preserves GeoSeries class
|
||||
def geom_func(geom):
|
||||
assert isinstance(geom, Point)
|
||||
return geom
|
||||
|
||||
result = s.apply(geom_func)
|
||||
assert isinstance(result, GeoSeries)
|
||||
assert_geoseries_equal(result, s)
|
||||
|
||||
# function that returns non-geometry results in Series
|
||||
def numeric_func(geom):
|
||||
assert isinstance(geom, Point)
|
||||
return geom.x
|
||||
|
||||
result = s.apply(numeric_func)
|
||||
assert not isinstance(result, GeoSeries)
|
||||
assert_series_equal(result, pd.Series([0.0, 1.0, 2.0]))
|
||||
|
||||
|
||||
def test_apply_loc_len1(df):
|
||||
# subset of len 1 with loc -> bug in pandas with inconsistent Block ndim
|
||||
# resulting in bug in apply
|
||||
# https://github.com/geopandas/geopandas/issues/1078
|
||||
subset = df.loc[[0], "geometry"]
|
||||
result = subset.apply(lambda geom: geom.is_empty)
|
||||
expected = subset.is_empty
|
||||
np.testing.assert_allclose(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(compat.PANDAS_GE_30, reason="convert_dtype is removed in pandas 3")
|
||||
def test_apply_convert_dtypes_keyword(s):
|
||||
# ensure the convert_dtypes keyword is accepted
|
||||
if not compat.PANDAS_GE_21:
|
||||
recorder = warnings.catch_warnings(record=True)
|
||||
else:
|
||||
recorder = pytest.warns()
|
||||
|
||||
with recorder as record:
|
||||
res = s.apply(lambda x: x, convert_dtype=True, args=())
|
||||
assert_geoseries_equal(res, s)
|
||||
|
||||
if compat.PANDAS_GE_21:
|
||||
assert len(record) == 1
|
||||
assert "the convert_dtype parameter" in str(record[0].message)
|
||||
else:
|
||||
assert len(record) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
|
||||
def test_apply_no_geometry_result(df, crs):
|
||||
if crs:
|
||||
if not compat.HAS_PYPROJ:
|
||||
pytest.skip("requires pyproj")
|
||||
df = df.set_crs(crs)
|
||||
result = df.apply(lambda col: col.astype(str), axis=0)
|
||||
assert type(result) is pd.DataFrame
|
||||
expected = df.astype(str)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
result = df.apply(lambda col: col.astype(str), axis=1)
|
||||
assert type(result) is pd.DataFrame
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_preserves_geom_col_name(df):
|
||||
df = df.rename_geometry("geom")
|
||||
result = df.apply(lambda col: col, axis=0)
|
||||
assert result.geometry.name == "geom"
|
||||
|
||||
|
||||
def test_df_apply_returning_series(df):
|
||||
# https://github.com/geopandas/geopandas/issues/2283
|
||||
result = df.apply(lambda row: row.geometry, axis=1)
|
||||
assert_geoseries_equal(result, df.geometry, check_crs=False)
|
||||
|
||||
result = df.apply(lambda row: row.value1, axis=1)
|
||||
assert_series_equal(result, df["value1"].rename(None))
|
||||
# https://github.com/geopandas/geopandas/issues/2480
|
||||
result = df.apply(lambda x: float("NaN"), axis=1)
|
||||
assert result.dtype == "float64"
|
||||
# assert list of nones is not promoted to GeometryDtype
|
||||
result = df.apply(lambda x: None, axis=1)
|
||||
assert result.dtype == "object"
|
||||
|
||||
# https://github.com/geopandas/geopandas/issues/2889
|
||||
# contrived case such that `from_shapely` receives an array of geodataframes
|
||||
res = df.apply(lambda row: df.geometry.to_frame(), axis=1)
|
||||
assert res.dtype == "object"
|
||||
|
||||
|
||||
def test_df_apply_geometry_dtypes(df):
|
||||
# https://github.com/geopandas/geopandas/issues/1852
|
||||
apply_types = []
|
||||
|
||||
def get_dtypes(srs):
|
||||
apply_types.append((srs.name, type(srs)))
|
||||
|
||||
df["geom2"] = df.geometry
|
||||
df.apply(get_dtypes)
|
||||
expected = [
|
||||
("geometry", GeoSeries),
|
||||
("value1", pd.Series),
|
||||
("value2", pd.Series),
|
||||
("geom2", GeoSeries),
|
||||
]
|
||||
assert apply_types == expected
|
||||
|
||||
|
||||
def test_pivot(df):
|
||||
# https://github.com/geopandas/geopandas/issues/2057
|
||||
# pivot failing due to creating a MultiIndex
|
||||
result = df.pivot(columns="value1")
|
||||
expected = GeoDataFrame(pd.DataFrame(df).pivot(columns="value1"))
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
|
||||
def test_preserve_attrs(df):
|
||||
# https://github.com/geopandas/geopandas/issues/1654
|
||||
df.attrs["name"] = "my_name"
|
||||
attrs = {"name": "my_name"}
|
||||
assert df.attrs == attrs
|
||||
|
||||
# preserve attrs in indexing operations
|
||||
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
|
||||
assert df.attrs == attrs
|
||||
|
||||
# preserve attrs in methods
|
||||
df2 = df.reset_index()
|
||||
assert df2.attrs == attrs
|
||||
|
||||
# https://github.com/geopandas/geopandas/issues/1875
|
||||
df3 = df2.explode(index_parts=True)
|
||||
assert df3.attrs == attrs
|
||||
|
||||
|
||||
def test_preserve_flags(df):
|
||||
# https://github.com/geopandas/geopandas/issues/1654
|
||||
df = df.set_flags(allows_duplicate_labels=False)
|
||||
assert df.flags.allows_duplicate_labels is False
|
||||
|
||||
# preserve flags in indexing operations
|
||||
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
|
||||
assert df.flags.allows_duplicate_labels is False
|
||||
|
||||
# preserve attrs in methods
|
||||
df2 = df.reset_index()
|
||||
assert df2.flags.allows_duplicate_labels is False
|
||||
|
||||
# it is honored for operations that introduce duplicate labels
|
||||
with pytest.raises(ValueError):
|
||||
df.reindex([0, 0, 1])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
df[["value1", "value1", "geometry"]]
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
pd.concat([df, df])
|
||||
|
||||
|
||||
def test_ufunc():
|
||||
# this is calling a shapely ufunc, but we currently rely on pandas' implementation
|
||||
# of `__array_ufunc__` to wrap the result back into a GeoSeries
|
||||
ser = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
|
||||
result = shapely.buffer(ser, 2)
|
||||
assert isinstance(result, GeoSeries)
|
||||
|
||||
# ensure the result is still writeable
|
||||
# (https://github.com/geopandas/geopandas/issues/3178)
|
||||
assert result.array._data.flags.writeable
|
||||
result.loc[0] = Point(10, 10)
|
||||
assert result.iloc[0] == Point(10, 10)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,51 @@
|
||||
from geopandas.tools._show_versions import (
|
||||
_get_C_info,
|
||||
_get_deps_info,
|
||||
_get_sys_info,
|
||||
show_versions,
|
||||
)
|
||||
|
||||
|
||||
def test_get_sys_info():
|
||||
sys_info = _get_sys_info()
|
||||
|
||||
assert "python" in sys_info
|
||||
assert "executable" in sys_info
|
||||
assert "machine" in sys_info
|
||||
|
||||
|
||||
def test_get_c_info():
|
||||
C_info = _get_C_info()
|
||||
|
||||
assert "GEOS" in C_info
|
||||
assert "GEOS lib" in C_info
|
||||
assert "GDAL" in C_info
|
||||
assert "GDAL data dir" in C_info
|
||||
assert "PROJ" in C_info
|
||||
assert "PROJ data dir" in C_info
|
||||
|
||||
|
||||
def test_get_deps_info():
|
||||
deps_info = _get_deps_info()
|
||||
|
||||
assert "geopandas" in deps_info
|
||||
assert "pandas" in deps_info
|
||||
assert "fiona" in deps_info
|
||||
assert "numpy" in deps_info
|
||||
assert "shapely" in deps_info
|
||||
assert "pyproj" in deps_info
|
||||
assert "matplotlib" in deps_info
|
||||
assert "mapclassify" in deps_info
|
||||
assert "geopy" in deps_info
|
||||
assert "psycopg" in deps_info
|
||||
assert "psycopg2" in deps_info
|
||||
assert "geoalchemy2" in deps_info
|
||||
|
||||
|
||||
def test_show_versions(capsys):
|
||||
show_versions()
|
||||
out, err = capsys.readouterr()
|
||||
|
||||
assert "python" in out
|
||||
assert "GEOS" in out
|
||||
assert "geopandas" in out
|
||||
@@ -0,0 +1,959 @@
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import (
|
||||
GeometryCollection,
|
||||
LineString,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
box,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries, read_file
|
||||
from geopandas import _compat as compat
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
|
||||
class TestSeriesSindex:
|
||||
def test_has_sindex(self):
|
||||
"""Test the has_sindex method."""
|
||||
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
|
||||
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
|
||||
|
||||
d = GeoDataFrame({"geom": [t1, t2]}, geometry="geom")
|
||||
assert not d.has_sindex
|
||||
d.sindex
|
||||
assert d.has_sindex
|
||||
d.geometry.values._sindex = None
|
||||
assert not d.has_sindex
|
||||
d.sindex
|
||||
assert d.has_sindex
|
||||
|
||||
s = GeoSeries([t1, t2])
|
||||
assert not s.has_sindex
|
||||
s.sindex
|
||||
assert s.has_sindex
|
||||
s.values._sindex = None
|
||||
assert not s.has_sindex
|
||||
s.sindex
|
||||
assert s.has_sindex
|
||||
|
||||
def test_empty_geoseries(self):
|
||||
"""Tests creating a spatial index from an empty GeoSeries."""
|
||||
s = GeoSeries(dtype=object)
|
||||
assert not s.sindex
|
||||
assert len(s.sindex) == 0
|
||||
|
||||
def test_point(self):
|
||||
s = GeoSeries([Point(0, 0)])
|
||||
assert s.sindex.size == 1
|
||||
hits = s.sindex.intersection((-1, -1, 1, 1))
|
||||
assert len(list(hits)) == 1
|
||||
hits = s.sindex.intersection((-2, -2, -1, -1))
|
||||
assert len(list(hits)) == 0
|
||||
|
||||
def test_empty_point(self):
|
||||
"""Tests that a single empty Point results in an empty tree."""
|
||||
s = GeoSeries([Point()])
|
||||
assert not s.sindex
|
||||
assert len(s.sindex) == 0
|
||||
|
||||
def test_polygons(self):
|
||||
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
|
||||
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
|
||||
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
||||
s = GeoSeries([t1, t2, sq])
|
||||
assert s.sindex.size == 3
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The series.append method is deprecated")
|
||||
@pytest.mark.skipif(compat.PANDAS_GE_20, reason="append removed in pandas 2.0")
|
||||
def test_polygons_append(self):
|
||||
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
|
||||
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
|
||||
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
|
||||
s = GeoSeries([t1, t2, sq])
|
||||
t = GeoSeries([t1, t2, sq], [3, 4, 5])
|
||||
s = s.append(t)
|
||||
assert len(s) == 6
|
||||
assert s.sindex.size == 6
|
||||
|
||||
def test_lazy_build(self):
|
||||
s = GeoSeries([Point(0, 0)])
|
||||
assert s.values._sindex is None
|
||||
assert s.sindex.size == 1
|
||||
assert s.values._sindex is not None
|
||||
|
||||
def test_rebuild_on_item_change(self):
|
||||
s = GeoSeries([Point(0, 0)])
|
||||
original_index = s.sindex
|
||||
s.iloc[0] = Point(0, 0)
|
||||
assert s.sindex is not original_index
|
||||
|
||||
def test_rebuild_on_slice(self):
|
||||
s = GeoSeries([Point(0, 0), Point(0, 0)])
|
||||
original_index = s.sindex
|
||||
# Select a couple of rows
|
||||
sliced = s.iloc[:1]
|
||||
assert sliced.sindex is not original_index
|
||||
# Select all rows
|
||||
sliced = s.iloc[:]
|
||||
assert sliced.sindex is original_index
|
||||
# Select all rows and flip
|
||||
sliced = s.iloc[::-1]
|
||||
assert sliced.sindex is not original_index
|
||||
|
||||
|
||||
class TestFrameSindex:
|
||||
def setup_method(self):
|
||||
data = {
|
||||
"A": range(5),
|
||||
"B": range(-5, 0),
|
||||
"geom": [Point(x, y) for x, y in zip(range(5), range(5))],
|
||||
}
|
||||
self.df = GeoDataFrame(data, geometry="geom")
|
||||
|
||||
def test_sindex(self):
|
||||
self.df.crs = "epsg:4326"
|
||||
assert self.df.sindex.size == 5
|
||||
hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4)))
|
||||
assert len(hits) == 2
|
||||
assert hits[0] == 3
|
||||
|
||||
def test_lazy_build(self):
|
||||
assert self.df.geometry.values._sindex is None
|
||||
assert self.df.sindex.size == 5
|
||||
assert self.df.geometry.values._sindex is not None
|
||||
|
||||
def test_sindex_rebuild_on_set_geometry(self):
|
||||
# First build the sindex
|
||||
assert self.df.sindex is not None
|
||||
original_index = self.df.sindex
|
||||
self.df.set_geometry(
|
||||
[Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True
|
||||
)
|
||||
assert self.df.sindex is not original_index
|
||||
|
||||
def test_rebuild_on_row_slice(self):
|
||||
# Select a subset of rows rebuilds
|
||||
original_index = self.df.sindex
|
||||
sliced = self.df.iloc[:1]
|
||||
assert sliced.sindex is not original_index
|
||||
# Slicing all does not rebuild
|
||||
original_index = self.df.sindex
|
||||
sliced = self.df.iloc[:]
|
||||
assert sliced.sindex is original_index
|
||||
# Re-ordering rebuilds
|
||||
sliced = self.df.iloc[::-1]
|
||||
assert sliced.sindex is not original_index
|
||||
|
||||
def test_rebuild_on_single_col_selection(self):
|
||||
"""Selecting a single column should not rebuild the spatial index."""
|
||||
# Selecting geometry column preserves the index
|
||||
original_index = self.df.sindex
|
||||
geometry_col = self.df["geom"]
|
||||
assert geometry_col.sindex is original_index
|
||||
geometry_col = self.df.geometry
|
||||
assert geometry_col.sindex is original_index
|
||||
|
||||
def test_rebuild_on_multiple_col_selection(self):
|
||||
"""Selecting a subset of columns preserves the index."""
|
||||
original_index = self.df.sindex
|
||||
# Selecting a subset of columns preserves the index for pandas < 2.0
|
||||
# with pandas 2.0, the column is now copied, losing the index. But
|
||||
# with pandas >= 3.0 and Copy-on-Write this is preserved again
|
||||
subset1 = self.df[["geom", "A"]]
|
||||
if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
|
||||
assert subset1.sindex is not original_index
|
||||
else:
|
||||
assert subset1.sindex is original_index
|
||||
subset2 = self.df[["A", "geom"]]
|
||||
if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
|
||||
assert subset2.sindex is not original_index
|
||||
else:
|
||||
assert subset2.sindex is original_index
|
||||
|
||||
def test_rebuild_on_update_inplace(self):
|
||||
gdf = self.df.copy()
|
||||
old_sindex = gdf.sindex
|
||||
# sorting in place
|
||||
gdf.sort_values("A", ascending=False, inplace=True)
|
||||
# spatial index should be invalidated
|
||||
assert not gdf.has_sindex
|
||||
new_sindex = gdf.sindex
|
||||
# and should be different
|
||||
assert new_sindex is not old_sindex
|
||||
|
||||
# sorting should still have happened though
|
||||
assert gdf.index.tolist() == [4, 3, 2, 1, 0]
|
||||
|
||||
def test_update_inplace_no_rebuild(self):
|
||||
gdf = self.df.copy()
|
||||
old_sindex = gdf.sindex
|
||||
gdf.rename(columns={"A": "AA"}, inplace=True)
|
||||
# a rename shouldn't invalidate the index
|
||||
assert gdf.has_sindex
|
||||
# and the "new" should be the same
|
||||
new_sindex = gdf.sindex
|
||||
assert old_sindex is new_sindex
|
||||
|
||||
|
||||
# Skip to accommodate Shapely geometries being unhashable # TODO unskip?
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.usefixtures("_setup_class_nybb_filename")
|
||||
class TestJoinSindex:
|
||||
def setup_method(self):
|
||||
self.boros = read_file(self.nybb_filename)
|
||||
|
||||
def test_merge_geo(self):
|
||||
# First check that we gets hits from the boros frame.
|
||||
tree = self.boros.sindex
|
||||
hits = tree.intersection((1012821.80, 229228.26))
|
||||
res = [self.boros.iloc[hit]["BoroName"] for hit in hits]
|
||||
assert res == ["Bronx", "Queens"]
|
||||
|
||||
# Check that we only get the Bronx from this view.
|
||||
first = self.boros[self.boros["BoroCode"] < 3]
|
||||
tree = first.sindex
|
||||
hits = tree.intersection((1012821.80, 229228.26))
|
||||
res = [first.iloc[hit]["BoroName"] for hit in hits]
|
||||
assert res == ["Bronx"]
|
||||
|
||||
# Check that we only get Queens from this view.
|
||||
second = self.boros[self.boros["BoroCode"] >= 3]
|
||||
tree = second.sindex
|
||||
hits = tree.intersection((1012821.80, 229228.26))
|
||||
res = ([second.iloc[hit]["BoroName"] for hit in hits],)
|
||||
assert res == ["Queens"]
|
||||
|
||||
# Get both the Bronx and Queens again.
|
||||
merged = first.merge(second, how="outer")
|
||||
assert len(merged) == 5
|
||||
assert merged.sindex.size == 5
|
||||
tree = merged.sindex
|
||||
hits = tree.intersection((1012821.80, 229228.26))
|
||||
res = [merged.iloc[hit]["BoroName"] for hit in hits]
|
||||
assert res == ["Bronx", "Queens"]
|
||||
|
||||
|
||||
class TestShapelyInterface:
|
||||
def setup_method(self):
|
||||
data = {
|
||||
"geom": [Point(x, y) for x, y in zip(range(5), range(5))]
|
||||
+ [box(10, 10, 20, 20)] # include a box geometry
|
||||
}
|
||||
self.df = GeoDataFrame(data, geometry="geom")
|
||||
self.expected_size = len(data["geom"])
|
||||
|
||||
# --------------------------- `intersection` tests -------------------------- #
|
||||
@pytest.mark.parametrize(
|
||||
"test_geom, expected",
|
||||
(
|
||||
((-1, -1, -0.5, -0.5), []),
|
||||
((-0.5, -0.5, 0.5, 0.5), [0]),
|
||||
((0, 0, 1, 1), [0, 1]),
|
||||
((0, 0), [0]),
|
||||
),
|
||||
)
|
||||
def test_intersection_bounds_tuple(self, test_geom, expected):
|
||||
"""Tests the `intersection` method with valid inputs."""
|
||||
res = list(self.df.sindex.intersection(test_geom))
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("test_geom", ((-1, -1, -0.5), -0.5, None, Point(0, 0)))
|
||||
def test_intersection_invalid_bounds_tuple(self, test_geom):
|
||||
"""Tests the `intersection` method with invalid inputs."""
|
||||
with pytest.raises(TypeError):
|
||||
# we raise a useful TypeError
|
||||
self.df.sindex.intersection(test_geom)
|
||||
|
||||
# ------------------------------ `query` tests ------------------------------ #
|
||||
@pytest.mark.parametrize(
|
||||
"predicate, test_geom, expected",
|
||||
(
|
||||
(None, box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
|
||||
(None, box(-0.5, -0.5, 0.5, 0.5), [0]), # bbox intersects
|
||||
(None, box(0, 0, 1, 1), [0, 1]), # bbox intersects multiple
|
||||
(
|
||||
None,
|
||||
LineString([(0, 1), (1, 0)]),
|
||||
[0, 1],
|
||||
), # bbox intersects but not geometry
|
||||
("intersects", box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
|
||||
(
|
||||
"intersects",
|
||||
box(-0.5, -0.5, 0.5, 0.5),
|
||||
[0],
|
||||
), # bbox and geometry intersect
|
||||
(
|
||||
"intersects",
|
||||
box(0, 0, 1, 1),
|
||||
[0, 1],
|
||||
), # bbox and geometry intersect multiple
|
||||
(
|
||||
"intersects",
|
||||
LineString([(0, 1), (1, 0)]),
|
||||
[],
|
||||
), # bbox intersects but not geometry
|
||||
("within", box(0.25, 0.28, 0.75, 0.75), []), # does not intersect
|
||||
("within", box(0, 0, 10, 10), []), # intersects but is not within
|
||||
("within", box(11, 11, 12, 12), [5]), # intersects and is within
|
||||
("within", LineString([(0, 1), (1, 0)]), []), # intersects but not within
|
||||
("contains", box(0, 0, 1, 1), []), # intersects but does not contain
|
||||
("contains", box(0, 0, 1.001, 1.001), [1]), # intersects and contains
|
||||
("contains", box(0.5, 0.5, 1.5, 1.5), [1]), # intersects and contains
|
||||
("contains", box(-1, -1, 2, 2), [0, 1]), # intersects and contains multiple
|
||||
(
|
||||
"contains",
|
||||
LineString([(0, 1), (1, 0)]),
|
||||
[],
|
||||
), # intersects but not contains
|
||||
("touches", box(-1, -1, 0, 0), [0]), # bbox intersects and touches
|
||||
(
|
||||
"touches",
|
||||
box(-0.5, -0.5, 1.5, 1.5),
|
||||
[],
|
||||
), # bbox intersects but geom does not touch
|
||||
(
|
||||
"contains",
|
||||
box(10, 10, 20, 20),
|
||||
[5],
|
||||
), # contains but does not contains_properly
|
||||
(
|
||||
"covers",
|
||||
box(-0.5, -0.5, 1, 1),
|
||||
[0, 1],
|
||||
), # covers (0, 0) and (1, 1)
|
||||
(
|
||||
"covers",
|
||||
box(0.001, 0.001, 0.99, 0.99),
|
||||
[],
|
||||
), # does not cover any
|
||||
(
|
||||
"covers",
|
||||
box(0, 0, 1, 1),
|
||||
[0, 1],
|
||||
), # covers but does not contain
|
||||
(
|
||||
"contains_properly",
|
||||
box(0, 0, 1, 1),
|
||||
[],
|
||||
), # intersects but does not contain
|
||||
(
|
||||
"contains_properly",
|
||||
box(0, 0, 1.001, 1.001),
|
||||
[1],
|
||||
), # intersects 2 and contains 1
|
||||
(
|
||||
"contains_properly",
|
||||
box(0.5, 0.5, 1.001, 1.001),
|
||||
[1],
|
||||
), # intersects 1 and contains 1
|
||||
(
|
||||
"contains_properly",
|
||||
box(0.5, 0.5, 1.5, 1.5),
|
||||
[1],
|
||||
), # intersects and contains
|
||||
(
|
||||
"contains_properly",
|
||||
box(-1, -1, 2, 2),
|
||||
[0, 1],
|
||||
), # intersects and contains multiple
|
||||
(
|
||||
"contains_properly",
|
||||
box(10, 10, 20, 20),
|
||||
[],
|
||||
), # contains but does not contains_properly
|
||||
),
|
||||
)
|
||||
def test_query(self, predicate, test_geom, expected):
|
||||
"""Tests the `query` method with valid inputs and valid predicates."""
|
||||
res = self.df.sindex.query(test_geom, predicate=predicate)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
def test_query_invalid_geometry(self):
|
||||
"""Tests the `query` method with invalid geometry."""
|
||||
with pytest.raises(TypeError):
|
||||
self.df.sindex.query("notavalidgeom")
|
||||
|
||||
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
|
||||
@pytest.mark.parametrize(
|
||||
"distance, test_geom, expected",
|
||||
(
|
||||
# bounds don't intersect and not within distance=0
|
||||
(
|
||||
0,
|
||||
box(9.0, 9.0, 9.9, 9.9),
|
||||
[],
|
||||
),
|
||||
# bounds don't intersect but is within distance=1
|
||||
(
|
||||
1,
|
||||
box(9.0, 9.0, 9.9, 9.9),
|
||||
[5],
|
||||
),
|
||||
# within 1-D absolute distance in both axes, but not euclidean distance
|
||||
(
|
||||
0.5,
|
||||
Point(0.5, 0.5),
|
||||
[],
|
||||
),
|
||||
# same as before but within euclidean distance
|
||||
(
|
||||
sqrt(2 * 0.5**2) + 1e-9,
|
||||
Point(0.5, 0.5),
|
||||
[0, 1],
|
||||
),
|
||||
# less than euclidean distance between points, multi-object
|
||||
(
|
||||
sqrt(2) - 1e-9,
|
||||
[
|
||||
Polygon([(0, 0), (1, 0), (1, 1)]),
|
||||
Polygon([(1, 1), (2, 1), (2, 2)]),
|
||||
], # multi-object test
|
||||
[[0, 0, 1, 1], [0, 1, 1, 2]],
|
||||
),
|
||||
# more than euclidean distance between points, multi-object
|
||||
(
|
||||
sqrt(2) + 1e-9,
|
||||
[
|
||||
Polygon([(0, 0), (1, 0), (1, 1)]),
|
||||
Polygon([(1, 1), (2, 1), (2, 2)]),
|
||||
],
|
||||
[[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]],
|
||||
),
|
||||
# distance is array-like, broadcastable to geometry
|
||||
(
|
||||
[2, 10],
|
||||
[Point(0.5, 0.5), Point(1, 1)],
|
||||
[[0, 0, 1, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 4]],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_query_dwithin(self, distance, test_geom, expected):
|
||||
"""Tests the `query` method with predicates that require keyword arguments."""
|
||||
res = self.df.sindex.query(test_geom, predicate="dwithin", distance=distance)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
|
||||
def test_dwithin_no_distance(self):
|
||||
"""Tests the `query` method with keyword arguments that are
|
||||
invalid for certain predicates."""
|
||||
with pytest.raises(
|
||||
ValueError, match="'distance' parameter is required for 'dwithin' predicate"
|
||||
):
|
||||
self.df.sindex.query(Point(0, 0), predicate="dwithin")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"predicate",
|
||||
[
|
||||
None,
|
||||
"contains",
|
||||
"contains_properly",
|
||||
"covered_by",
|
||||
"covers",
|
||||
"crosses",
|
||||
"intersects",
|
||||
"overlaps",
|
||||
"touches",
|
||||
"within",
|
||||
],
|
||||
)
|
||||
def test_query_distance_invalid(self, predicate):
|
||||
"""Tests the `query` method with keyword arguments that are
|
||||
invalid for certain predicates."""
|
||||
msg = "'distance' parameter is only supported in combination with 'dwithin'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.df.sindex.query(Point(0, 0), predicate=predicate, distance=0)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
compat.GEOS_GE_310, reason="Test for 'dwithin'-incompatible versions of GEOS"
|
||||
)
|
||||
def test_dwithin_requirements(self):
|
||||
"""Tests whether a ValueError is raised when trying to use dwithin with
|
||||
incompatible versions of shapely or pyGEOS
|
||||
"""
|
||||
with pytest.raises(
|
||||
ValueError, match="predicate = 'dwithin' requires GEOS >= 3.10.0"
|
||||
):
|
||||
self.df.sindex.query(Point(0, 0), predicate="dwithin", distance=0)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_geom, expected_value",
|
||||
[
|
||||
(None, []),
|
||||
(GeometryCollection(), []),
|
||||
(Point(), []),
|
||||
(MultiPolygon(), []),
|
||||
(Polygon(), []),
|
||||
],
|
||||
)
|
||||
def test_query_empty_geometry(self, test_geom, expected_value):
|
||||
"""Tests the `query` method with empty geometry."""
|
||||
res = self.df.sindex.query(test_geom)
|
||||
assert_array_equal(res, expected_value)
|
||||
|
||||
def test_query_invalid_predicate(self):
|
||||
"""Tests the `query` method with invalid predicates."""
|
||||
test_geom = box(-1, -1, -0.5, -0.5)
|
||||
with pytest.raises(ValueError):
|
||||
self.df.sindex.query(test_geom, predicate="test")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort, expected",
|
||||
(
|
||||
(True, [[0, 0, 0], [0, 1, 2]]),
|
||||
# False could be anything, at least we'll know if it changes
|
||||
(False, [[0, 0, 0], [0, 1, 2]]),
|
||||
),
|
||||
)
|
||||
def test_query_sorting(self, sort, expected):
|
||||
"""Check that results from `query` don't depend on the
|
||||
order of geometries.
|
||||
"""
|
||||
# these geometries come from a reported issue:
|
||||
# https://github.com/geopandas/geopandas/issues/1337
|
||||
# there is no theoretical reason they were chosen
|
||||
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
|
||||
tree_polys = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
expected = [0, 1, 2]
|
||||
|
||||
test_geo = test_polys.values[0]
|
||||
res = tree_polys.sindex.query(test_geo, sort=sort)
|
||||
|
||||
# asserting the same elements
|
||||
assert sorted(res) == sorted(expected)
|
||||
# asserting the exact array can fail if sort=False
|
||||
try:
|
||||
assert_array_equal(res, expected)
|
||||
except AssertionError as e:
|
||||
if sort is False:
|
||||
pytest.xfail(
|
||||
"rtree results are known to be unordered, see "
|
||||
"https://github.com/geopandas/geopandas/issues/1337\n"
|
||||
"Expected:\n {}\n".format(expected)
|
||||
+ "Got:\n {}\n".format(res.tolist())
|
||||
)
|
||||
raise e
|
||||
|
||||
# ------------------------- `query_bulk` tests -------------------------- #
|
||||
@pytest.mark.parametrize(
|
||||
"predicate, test_geom, expected",
|
||||
(
|
||||
(None, [(-1, -1, -0.5, -0.5)], [[], []]),
|
||||
(None, [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
|
||||
(None, [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
|
||||
("intersects", [(-1, -1, -0.5, -0.5)], [[], []]),
|
||||
("intersects", [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
|
||||
("intersects", [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
|
||||
# only second geom intersects
|
||||
("intersects", [(-1, -1, -0.5, -0.5), (-0.5, -0.5, 0.5, 0.5)], [[1], [0]]),
|
||||
# both geoms intersect
|
||||
(
|
||||
"intersects",
|
||||
[(-1, -1, 1, 1), (-0.5, -0.5, 0.5, 0.5)],
|
||||
[[0, 0, 1], [0, 1, 0]],
|
||||
),
|
||||
("within", [(0.25, 0.28, 0.75, 0.75)], [[], []]), # does not intersect
|
||||
("within", [(0, 0, 10, 10)], [[], []]), # intersects but is not within
|
||||
("within", [(11, 11, 12, 12)], [[0], [5]]), # intersects and is within
|
||||
(
|
||||
"contains",
|
||||
[(0, 0, 1, 1)],
|
||||
[[], []],
|
||||
), # intersects and covers, but does not contain
|
||||
(
|
||||
"contains",
|
||||
[(0, 0, 1.001, 1.001)],
|
||||
[[0], [1]],
|
||||
), # intersects 2 and contains 1
|
||||
(
|
||||
"contains",
|
||||
[(0.5, 0.5, 1.001, 1.001)],
|
||||
[[0], [1]],
|
||||
), # intersects 1 and contains 1
|
||||
("contains", [(0.5, 0.5, 1.5, 1.5)], [[0], [1]]), # intersects and contains
|
||||
(
|
||||
"contains",
|
||||
[(-1, -1, 2, 2)],
|
||||
[[0, 0], [0, 1]],
|
||||
), # intersects and contains multiple
|
||||
(
|
||||
"contains",
|
||||
[(10, 10, 20, 20)],
|
||||
[[0], [5]],
|
||||
), # contains but does not contains_properly
|
||||
("touches", [(-1, -1, 0, 0)], [[0], [0]]), # bbox intersects and touches
|
||||
(
|
||||
"touches",
|
||||
[(-0.5, -0.5, 1.5, 1.5)],
|
||||
[[], []],
|
||||
), # bbox intersects but geom does not touch
|
||||
(
|
||||
"covers",
|
||||
[(-0.5, -0.5, 1, 1)],
|
||||
[[0, 0], [0, 1]],
|
||||
), # covers (0, 0) and (1, 1)
|
||||
(
|
||||
"covers",
|
||||
[(0.001, 0.001, 0.99, 0.99)],
|
||||
[[], []],
|
||||
), # does not cover any
|
||||
(
|
||||
"covers",
|
||||
[(0, 0, 1, 1)],
|
||||
[[0, 0], [0, 1]],
|
||||
), # covers but does not contain
|
||||
(
|
||||
"contains_properly",
|
||||
[(0, 0, 1, 1)],
|
||||
[[], []],
|
||||
), # intersects but does not contain
|
||||
(
|
||||
"contains_properly",
|
||||
[(0, 0, 1.001, 1.001)],
|
||||
[[0], [1]],
|
||||
), # intersects 2 and contains 1
|
||||
(
|
||||
"contains_properly",
|
||||
[(0.5, 0.5, 1.001, 1.001)],
|
||||
[[0], [1]],
|
||||
), # intersects 1 and contains 1
|
||||
(
|
||||
"contains_properly",
|
||||
[(0.5, 0.5, 1.5, 1.5)],
|
||||
[[0], [1]],
|
||||
), # intersects and contains
|
||||
(
|
||||
"contains_properly",
|
||||
[(-1, -1, 2, 2)],
|
||||
[[0, 0], [0, 1]],
|
||||
), # intersects and contains multiple
|
||||
(
|
||||
"contains_properly",
|
||||
[(10, 10, 20, 20)],
|
||||
[[], []],
|
||||
), # contains but does not contains_properly
|
||||
),
|
||||
)
|
||||
def test_query_bulk(self, predicate, test_geom, expected):
|
||||
"""Tests the `query` method with valid
|
||||
inputs and valid predicates.
|
||||
"""
|
||||
res = self.df.sindex.query(
|
||||
[box(*geom) for geom in test_geom], predicate=predicate
|
||||
)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_geoms, expected_value",
|
||||
[
|
||||
# single empty geometry
|
||||
([GeometryCollection()], [[], []]),
|
||||
# None should be skipped
|
||||
([GeometryCollection(), None], [[], []]),
|
||||
([None], [[], []]),
|
||||
([None, box(-0.5, -0.5, 0.5, 0.5), None], [[1], [0]]),
|
||||
],
|
||||
)
|
||||
def test_query_bulk_empty_geometry(self, test_geoms, expected_value):
|
||||
"""Tests the `query` method with an empty geometries."""
|
||||
res = self.df.sindex.query(test_geoms)
|
||||
assert_array_equal(res, expected_value)
|
||||
|
||||
def test_query_bulk_empty_input_array(self):
|
||||
"""Tests the `query` method with an empty input array."""
|
||||
test_array = np.array([], dtype=object)
|
||||
expected_value = [[], []]
|
||||
res = self.df.sindex.query(test_array)
|
||||
assert_array_equal(res, expected_value)
|
||||
|
||||
def test_query_bulk_invalid_input_geometry(self):
|
||||
"""
|
||||
Tests the `query` method with invalid input for the `geometry` parameter.
|
||||
"""
|
||||
test_array = "notanarray"
|
||||
with pytest.raises(TypeError):
|
||||
self.df.sindex.query(test_array)
|
||||
|
||||
def test_query_bulk_invalid_predicate(self):
|
||||
"""Tests the `query` method with invalid predicates."""
|
||||
test_geom_bounds = (-1, -1, -0.5, -0.5)
|
||||
test_predicate = "test"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
self.df.sindex.query([box(*test_geom_bounds)], predicate=test_predicate)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"predicate, test_geom, expected",
|
||||
(
|
||||
(None, (-1, -1, -0.5, -0.5), [[], []]),
|
||||
("intersects", (-1, -1, -0.5, -0.5), [[], []]),
|
||||
("contains", (-1, -1, 1, 1), [[0], [0]]),
|
||||
),
|
||||
)
|
||||
def test_query_bulk_input_type(self, predicate, test_geom, expected):
|
||||
"""Tests that query can accept a GeoSeries, GeometryArray or
|
||||
numpy array.
|
||||
"""
|
||||
# pass through GeoSeries to test input type
|
||||
test_geom = geopandas.GeoSeries([box(*test_geom)], index=["0"])
|
||||
|
||||
# test GeoSeries
|
||||
res = self.df.sindex.query(test_geom, predicate=predicate)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
# test GeometryArray
|
||||
res = self.df.sindex.query(test_geom.geometry, predicate=predicate)
|
||||
assert_array_equal(res, expected)
|
||||
res = self.df.sindex.query(test_geom.geometry.values, predicate=predicate)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
# test numpy array
|
||||
res = self.df.sindex.query(
|
||||
test_geom.geometry.values.to_numpy(), predicate=predicate
|
||||
)
|
||||
assert_array_equal(res, expected)
|
||||
res = self.df.sindex.query(
|
||||
test_geom.geometry.values.to_numpy(), predicate=predicate
|
||||
)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort, expected",
|
||||
(
|
||||
(True, [[0, 0, 0], [0, 1, 2]]),
|
||||
# False could be anything, at least we'll know if it changes
|
||||
(False, [[0, 0, 0], [0, 1, 2]]),
|
||||
),
|
||||
)
|
||||
def test_query_bulk_sorting(self, sort, expected):
|
||||
"""Check that results from `query` don't depend
|
||||
on the order of geometries.
|
||||
"""
|
||||
# these geometries come from a reported issue:
|
||||
# https://github.com/geopandas/geopandas/issues/1337
|
||||
# there is no theoretical reason they were chosen
|
||||
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
|
||||
tree_polys = GeoSeries(
|
||||
[
|
||||
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
|
||||
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
|
||||
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
|
||||
]
|
||||
)
|
||||
|
||||
res = tree_polys.sindex.query(test_polys, sort=sort)
|
||||
|
||||
# asserting the same elements
|
||||
assert sorted(res[0]) == sorted(expected[0])
|
||||
assert sorted(res[1]) == sorted(expected[1])
|
||||
# asserting the exact array can fail if sort=False
|
||||
try:
|
||||
assert_array_equal(res, expected)
|
||||
except AssertionError as e:
|
||||
if sort is False:
|
||||
pytest.xfail(
|
||||
"rtree results are known to be unordered, see "
|
||||
"https://github.com/geopandas/geopandas/issues/1337\n"
|
||||
"Expected:\n {}\n".format(expected)
|
||||
+ "Got:\n {}\n".format(res.tolist())
|
||||
)
|
||||
raise e
|
||||
|
||||
# ------------------------- `nearest` tests ------------------------- #
|
||||
@pytest.mark.parametrize("return_all", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry,expected",
|
||||
[
|
||||
([0.25, 0.25], [[0], [0]]),
|
||||
([0.75, 0.75], [[0], [1]]),
|
||||
],
|
||||
)
|
||||
def test_nearest_single(self, geometry, expected, return_all):
|
||||
geoms = shapely.points(np.arange(10), np.arange(10))
|
||||
df = geopandas.GeoDataFrame({"geometry": geoms})
|
||||
|
||||
p = Point(geometry)
|
||||
res = df.sindex.nearest(p, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
p = shapely.points(geometry)
|
||||
res = df.sindex.nearest(p, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("return_all", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry,expected",
|
||||
[
|
||||
([(1, 1), (0, 0)], [[0, 1], [1, 0]]),
|
||||
([(1, 1), (0.25, 1)], [[0, 1], [1, 1]]),
|
||||
],
|
||||
)
|
||||
def test_nearest_multi(self, geometry, expected, return_all):
|
||||
geoms = shapely.points(np.arange(10), np.arange(10))
|
||||
df = geopandas.GeoDataFrame({"geometry": geoms})
|
||||
|
||||
ps = [Point(p) for p in geometry]
|
||||
res = df.sindex.nearest(ps, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
ps = shapely.points(geometry)
|
||||
res = df.sindex.nearest(ps, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
s = geopandas.GeoSeries(ps)
|
||||
res = df.sindex.nearest(s, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
x, y = zip(*geometry)
|
||||
ga = geopandas.points_from_xy(x, y)
|
||||
res = df.sindex.nearest(ga, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("return_all", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry,expected",
|
||||
[
|
||||
(None, [[], []]),
|
||||
([None], [[], []]),
|
||||
],
|
||||
)
|
||||
def test_nearest_none(self, geometry, expected, return_all):
|
||||
geoms = shapely.points(np.arange(10), np.arange(10))
|
||||
df = geopandas.GeoDataFrame({"geometry": geoms})
|
||||
|
||||
res = df.sindex.nearest(geometry, return_all=return_all)
|
||||
assert_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("return_distance", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"return_all,max_distance,expected",
|
||||
[
|
||||
(True, None, ([[0, 0, 1], [0, 1, 5]], [sqrt(0.5), sqrt(0.5), sqrt(50)])),
|
||||
(False, None, ([[0, 1], [0, 5]], [sqrt(0.5), sqrt(50)])),
|
||||
(True, 1, ([[0, 0], [0, 1]], [sqrt(0.5), sqrt(0.5)])),
|
||||
(False, 1, ([[0], [0]], [sqrt(0.5)])),
|
||||
],
|
||||
)
|
||||
def test_nearest_max_distance(
|
||||
self, expected, max_distance, return_all, return_distance
|
||||
):
|
||||
geoms = shapely.points(np.arange(10), np.arange(10))
|
||||
df = geopandas.GeoDataFrame({"geometry": geoms})
|
||||
|
||||
ps = [Point(0.5, 0.5), Point(0, 10)]
|
||||
res = df.sindex.nearest(
|
||||
ps,
|
||||
return_all=return_all,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
)
|
||||
if return_distance:
|
||||
assert_array_equal(res[0], expected[0])
|
||||
assert_array_equal(res[1], expected[1])
|
||||
else:
|
||||
assert_array_equal(res, expected[0])
|
||||
|
||||
@pytest.mark.parametrize("return_distance", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"return_all,max_distance,exclusive,expected",
|
||||
[
|
||||
(False, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
|
||||
(False, None, True, ([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3]], 5 * [sqrt(2)])),
|
||||
(True, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
True,
|
||||
([[0, 1, 1, 2, 2, 3, 3, 4], [1, 0, 2, 1, 3, 2, 4, 3]], 8 * [sqrt(2)]),
|
||||
),
|
||||
(False, 1.1, True, ([[1, 2, 5], [5, 5, 1]], 3 * [1])),
|
||||
(True, 1.1, True, ([[1, 2, 5, 5], [5, 5, 1, 2]], 4 * [1])),
|
||||
],
|
||||
)
|
||||
def test_nearest_exclusive(
|
||||
self, expected, max_distance, return_all, return_distance, exclusive
|
||||
):
|
||||
geoms = shapely.points(np.arange(5), np.arange(5))
|
||||
if max_distance:
|
||||
# add a non grid point
|
||||
geoms = np.append(geoms, [Point(1, 2)])
|
||||
|
||||
df = geopandas.GeoDataFrame({"geometry": geoms})
|
||||
|
||||
ps = geoms
|
||||
res = df.sindex.nearest(
|
||||
ps,
|
||||
return_all=return_all,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
exclusive=exclusive,
|
||||
)
|
||||
if return_distance:
|
||||
assert_array_equal(res[0], expected[0])
|
||||
assert_array_equal(res[1], expected[1])
|
||||
else:
|
||||
assert_array_equal(res, expected[0])
|
||||
|
||||
# --------------------------- misc tests ---------------------------- #
|
||||
|
||||
def test_empty_tree_geometries(self):
|
||||
"""Tests building sindex with interleaved empty geometries."""
|
||||
geoms = [Point(0, 0), None, Point(), Point(1, 1), Point()]
|
||||
df = geopandas.GeoDataFrame(geometry=geoms)
|
||||
assert df.sindex.query(Point(1, 1))[0] == 3
|
||||
|
||||
def test_size(self):
|
||||
"""Tests the `size` property."""
|
||||
assert self.df.sindex.size == self.expected_size
|
||||
|
||||
def test_len(self):
|
||||
"""Tests the `__len__` method of spatial indexes."""
|
||||
assert len(self.df.sindex) == self.expected_size
|
||||
|
||||
def test_is_empty(self):
|
||||
"""Tests the `is_empty` property."""
|
||||
# create empty tree
|
||||
empty = geopandas.GeoSeries([], dtype=object)
|
||||
assert empty.sindex.is_empty
|
||||
empty = geopandas.GeoSeries([None])
|
||||
assert empty.sindex.is_empty
|
||||
empty = geopandas.GeoSeries([Point()])
|
||||
assert empty.sindex.is_empty
|
||||
# create a non-empty tree
|
||||
non_empty = geopandas.GeoSeries([Point(0, 0)])
|
||||
assert not non_empty.sindex.is_empty
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"predicate, expected_shape",
|
||||
[
|
||||
(None, (2, 471)),
|
||||
("intersects", (2, 213)),
|
||||
("within", (2, 213)),
|
||||
("contains", (2, 0)),
|
||||
("overlaps", (2, 0)),
|
||||
("crosses", (2, 0)),
|
||||
("touches", (2, 0)),
|
||||
],
|
||||
)
|
||||
def test_integration_natural_earth(
|
||||
self, predicate, expected_shape, naturalearth_lowres, naturalearth_cities
|
||||
):
|
||||
"""Tests output sizes for the naturalearth datasets."""
|
||||
world = read_file(naturalearth_lowres)
|
||||
capitals = read_file(naturalearth_cities)
|
||||
|
||||
res = world.sindex.query(capitals.geometry, predicate)
|
||||
assert res.shape == expected_shape
|
||||
@@ -0,0 +1,186 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
from shapely.geometry import Point, Polygon
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.array import from_shapely
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
|
||||
s1 = GeoSeries(
|
||||
[
|
||||
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
s2 = GeoSeries(
|
||||
[
|
||||
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
s3 = Series(
|
||||
[
|
||||
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
|
||||
a = from_shapely(
|
||||
[
|
||||
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
|
||||
s4 = Series(a)
|
||||
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
|
||||
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s2})
|
||||
|
||||
s4 = s1.copy()
|
||||
s4.array.crs = 4326
|
||||
s5 = s2.copy()
|
||||
s5.array.crs = 27700
|
||||
|
||||
s6 = GeoSeries(
|
||||
[
|
||||
Polygon([(0, 3), (0, 0), (2, 0), (2, 2)]),
|
||||
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
|
||||
]
|
||||
)
|
||||
|
||||
df4 = GeoDataFrame(
|
||||
{"col1": [1, 2], "geometry": s1.copy(), "geom2": s4.copy(), "geom3": s5.copy()},
|
||||
crs=3857,
|
||||
)
|
||||
df5 = GeoDataFrame(
|
||||
{"col1": [1, 2], "geometry": s1.copy(), "geom3": s5.copy(), "geom2": s4.copy()},
|
||||
crs=3857,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::UserWarning")
|
||||
def test_geoseries():
|
||||
assert_geoseries_equal(s1, s2)
|
||||
assert_geoseries_equal(s1, s3, check_series_type=False, check_dtype=False)
|
||||
assert_geoseries_equal(s3, s2, check_series_type=False, check_dtype=False)
|
||||
assert_geoseries_equal(s1, s4, check_series_type=False)
|
||||
|
||||
with pytest.raises(AssertionError) as error:
|
||||
assert_geoseries_equal(s1, s2, check_less_precise=True)
|
||||
assert "1 out of 2 geometries are not almost equal" in str(error.value)
|
||||
assert "not almost equal: [0]" in str(error.value)
|
||||
|
||||
with pytest.raises(AssertionError) as error:
|
||||
assert_geoseries_equal(s2, s6, check_less_precise=False)
|
||||
assert "1 out of 2 geometries are not equal" in str(error.value)
|
||||
assert "not equal: [0]" in str(error.value)
|
||||
|
||||
|
||||
def test_geodataframe():
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geodataframe_equal(df1, df2, check_less_precise=True)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geodataframe_equal(df1, df2[["geometry", "col1"]])
|
||||
|
||||
assert_geodataframe_equal(df1, df2[["geometry", "col1"]], check_like=True)
|
||||
|
||||
df3 = df2.copy()
|
||||
df3.loc[0, "col1"] = 10
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geodataframe_equal(df1, df3)
|
||||
|
||||
assert_geodataframe_equal(df5, df4, check_like=True)
|
||||
if HAS_PYPROJ:
|
||||
df5["geom2"] = df5.geom2.set_crs(3857, allow_override=True)
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geodataframe_equal(df5, df4, check_like=True)
|
||||
|
||||
|
||||
def test_equal_nans():
|
||||
s = GeoSeries([Point(0, 0), np.nan])
|
||||
assert_geoseries_equal(s, s.copy())
|
||||
assert_geoseries_equal(s, s.copy(), check_less_precise=True)
|
||||
|
||||
|
||||
def test_no_crs():
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs=None)
|
||||
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs={})
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_ignore_crs_mismatch():
|
||||
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1.copy()}, crs="EPSG:4326")
|
||||
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:31370")
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
# assert that with `check_crs=False` the assert passes, and also does not
|
||||
# generate any warning from comparing both geometries with different crs
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
assert_geodataframe_equal(df1, df2, check_crs=False)
|
||||
|
||||
assert len(record) == 0
|
||||
|
||||
|
||||
def test_almost_equal_but_not_equal():
|
||||
s_origin = GeoSeries([Point(0, 0)])
|
||||
s_almost_origin = GeoSeries([Point(0.0000001, 0)])
|
||||
assert_geoseries_equal(s_origin, s_almost_origin, check_less_precise=True)
|
||||
with pytest.raises(AssertionError):
|
||||
assert_geoseries_equal(s_origin, s_almost_origin)
|
||||
|
||||
|
||||
def test_geodataframe_no_active_geometry_column():
|
||||
def create_dataframe():
|
||||
gdf = GeoDataFrame({"value": [1, 2], "geometry": [Point(1, 1), Point(2, 2)]})
|
||||
gdf["geom2"] = GeoSeries([Point(3, 3), Point(4, 4)])
|
||||
return gdf
|
||||
|
||||
# no active geometry column (None)
|
||||
df1 = create_dataframe()
|
||||
df1._geometry_column_name = None
|
||||
df2 = create_dataframe()
|
||||
df2._geometry_column_name = None
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
# active geometry column ("geometry") not present
|
||||
df1 = create_dataframe()[["value", "geom2"]]
|
||||
df2 = create_dataframe()[["value", "geom2"]]
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
df1 = GeoDataFrame(create_dataframe()[["value"]])
|
||||
df2 = GeoDataFrame(create_dataframe()[["value"]])
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
|
||||
def test_geodataframe_multiindex():
|
||||
def create_dataframe():
|
||||
gdf = DataFrame([[Point(0, 0), Point(1, 1)], [Point(2, 2), Point(3, 3)]])
|
||||
gdf = GeoDataFrame(gdf.astype("geometry"))
|
||||
gdf.columns = pd.MultiIndex.from_product([["geometry"], [0, 1]])
|
||||
return gdf
|
||||
|
||||
df1 = create_dataframe()
|
||||
df2 = create_dataframe()
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
|
||||
df1 = create_dataframe()
|
||||
df1._geometry_column_name = None
|
||||
df2 = create_dataframe()
|
||||
df2._geometry_column_name = None
|
||||
assert_geodataframe_equal(df1, df2)
|
||||
@@ -0,0 +1,85 @@
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
|
||||
class TestSeries:
|
||||
def setup_method(self):
|
||||
N = self.N = 10
|
||||
r = 0.5
|
||||
self.pts = GeoSeries([Point(x, y) for x, y in zip(range(N), range(N))])
|
||||
self.polys = self.pts.buffer(r)
|
||||
|
||||
def test_slice(self):
|
||||
assert type(self.pts[:2]) is GeoSeries
|
||||
assert type(self.pts[::2]) is GeoSeries
|
||||
assert type(self.polys[:2]) is GeoSeries
|
||||
|
||||
def test_head(self):
|
||||
assert type(self.pts.head()) is GeoSeries
|
||||
|
||||
def test_tail(self):
|
||||
assert type(self.pts.tail()) is GeoSeries
|
||||
|
||||
def test_sort_index(self):
|
||||
assert type(self.pts.sort_index()) is GeoSeries
|
||||
|
||||
def test_loc(self):
|
||||
assert type(self.pts.loc[5:]) is GeoSeries
|
||||
|
||||
def test_iloc(self):
|
||||
assert type(self.pts.iloc[5:]) is GeoSeries
|
||||
|
||||
def test_fancy(self):
|
||||
idx = (self.pts.index.to_series() % 2).astype(bool)
|
||||
assert type(self.pts[idx]) is GeoSeries
|
||||
|
||||
def test_take(self):
|
||||
assert type(self.pts.take(list(range(0, self.N, 2)))) is GeoSeries
|
||||
|
||||
def test_groupby(self):
|
||||
for f, s in self.pts.groupby(lambda x: x % 2):
|
||||
assert type(s) is GeoSeries
|
||||
|
||||
|
||||
class TestDataFrame:
|
||||
def setup_method(self):
|
||||
N = 10
|
||||
self.df = GeoDataFrame(
|
||||
[
|
||||
{"geometry": Point(x, y), "value1": x + y, "value2": x * y}
|
||||
for x, y in zip(range(N), range(N))
|
||||
]
|
||||
)
|
||||
|
||||
def test_geometry(self):
|
||||
assert type(self.df.geometry) is GeoSeries
|
||||
# still GeoSeries if different name
|
||||
df2 = GeoDataFrame(
|
||||
{
|
||||
"coords": [Point(x, y) for x, y in zip(range(5), range(5))],
|
||||
"nums": range(5),
|
||||
},
|
||||
geometry="coords",
|
||||
)
|
||||
assert type(df2.geometry) is GeoSeries
|
||||
assert type(df2["coords"]) is GeoSeries
|
||||
|
||||
def test_nongeometry(self):
|
||||
assert type(self.df["value1"]) is Series
|
||||
|
||||
def test_geometry_multiple(self):
|
||||
assert type(self.df[["geometry", "value1"]]) is GeoDataFrame
|
||||
|
||||
def test_nongeometry_multiple(self):
|
||||
assert type(self.df[["value1", "value2"]]) is DataFrame
|
||||
|
||||
def test_slice(self):
|
||||
assert type(self.df[:2]) is GeoDataFrame
|
||||
assert type(self.df[::2]) is GeoDataFrame
|
||||
|
||||
def test_fancy(self):
|
||||
idx = (self.df.index.to_series() % 2).astype(bool)
|
||||
assert type(self.df[idx]) is GeoDataFrame
|
||||
@@ -0,0 +1,151 @@
|
||||
import os.path
|
||||
|
||||
from pandas import Series
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas.testing import ( # noqa: F401
|
||||
assert_geoseries_equal,
|
||||
geom_almost_equals,
|
||||
geom_equals,
|
||||
)
|
||||
|
||||
HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
PACKAGE_DIR = os.path.dirname(os.path.dirname(HERE))
|
||||
|
||||
_TEST_DATA_DIR = os.path.join(PACKAGE_DIR, "geopandas", "tests", "data")
|
||||
_NYBB = "zip://" + os.path.join(_TEST_DATA_DIR, "nybb_16a.zip")
|
||||
_NATURALEARTH_CITIES = os.path.join(
|
||||
_TEST_DATA_DIR, "naturalearth_cities", "naturalearth_cities.shp"
|
||||
)
|
||||
_NATURALEARTH_LOWRES = os.path.join(
|
||||
_TEST_DATA_DIR, "naturalearth_lowres", "naturalearth_lowres.shp"
|
||||
)
|
||||
|
||||
|
||||
# mock not used here, but the import from here is used in other modules
|
||||
try:
|
||||
from unittest import mock
|
||||
except ImportError:
|
||||
import mock # noqa: F401
|
||||
|
||||
|
||||
def validate_boro_df(df, case_sensitive=False):
|
||||
"""Tests a GeoDataFrame that has been read in from the nybb dataset."""
|
||||
assert isinstance(df, GeoDataFrame)
|
||||
# Make sure all the columns are there and the geometries
|
||||
# were properly loaded as MultiPolygons
|
||||
assert len(df) == 5
|
||||
columns = ("BoroCode", "BoroName", "Shape_Leng", "Shape_Area")
|
||||
if case_sensitive:
|
||||
for col in columns:
|
||||
assert col in df.columns
|
||||
else:
|
||||
for col in columns:
|
||||
assert col.lower() in (dfcol.lower() for dfcol in df.columns)
|
||||
assert Series(df.geometry.geom_type).dropna().eq("MultiPolygon").all()
|
||||
|
||||
|
||||
def get_srid(df):
|
||||
"""Return srid from `df.crs`."""
|
||||
if df.crs is not None:
|
||||
return df.crs.to_epsg() or 0
|
||||
return 0
|
||||
|
||||
|
||||
def create_spatialite(con, df):
|
||||
"""
|
||||
Return a SpatiaLite connection containing the nybb table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`con`: ``sqlite3.Connection``
|
||||
`df`: ``GeoDataFrame``
|
||||
"""
|
||||
|
||||
with con:
|
||||
geom_col = df.geometry.name
|
||||
srid = get_srid(df)
|
||||
con.execute(
|
||||
"CREATE TABLE IF NOT EXISTS nybb "
|
||||
"( ogc_fid INTEGER PRIMARY KEY"
|
||||
", borocode INTEGER"
|
||||
", boroname TEXT"
|
||||
", shape_leng REAL"
|
||||
", shape_area REAL"
|
||||
")"
|
||||
)
|
||||
con.execute(
|
||||
"SELECT AddGeometryColumn(?, ?, ?, ?)",
|
||||
("nybb", geom_col, srid, df.geom_type.dropna().iat[0].upper()),
|
||||
)
|
||||
con.execute("SELECT CreateSpatialIndex(?, ?)", ("nybb", geom_col))
|
||||
sql_row = "INSERT INTO nybb VALUES(?, ?, ?, ?, ?, GeomFromText(?, ?))"
|
||||
con.executemany(
|
||||
sql_row,
|
||||
(
|
||||
(
|
||||
None,
|
||||
row.BoroCode,
|
||||
row.BoroName,
|
||||
row.Shape_Leng,
|
||||
row.Shape_Area,
|
||||
row.geometry.wkt if row.geometry else None,
|
||||
srid,
|
||||
)
|
||||
for row in df.itertuples(index=False)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def create_postgis(con, df, srid=None, geom_col="geom"):
|
||||
"""
|
||||
Create a nybb table in the test_geopandas PostGIS database.
|
||||
Returns a boolean indicating whether the database table was successfully
|
||||
created
|
||||
"""
|
||||
# Try to create the database, skip the db tests if something goes
|
||||
# wrong
|
||||
# If you'd like these tests to run, create a database called
|
||||
# 'test_geopandas' and enable postgis in it:
|
||||
# > createdb test_geopandas
|
||||
# > psql -c "CREATE EXTENSION postgis" -d test_geopandas
|
||||
if srid is not None:
|
||||
geom_schema = "geometry(MULTIPOLYGON, {})".format(srid)
|
||||
geom_insert = "ST_SetSRID(ST_GeometryFromText(%s), {})".format(srid)
|
||||
else:
|
||||
geom_schema = "geometry"
|
||||
geom_insert = "ST_GeometryFromText(%s)"
|
||||
try:
|
||||
cursor = con.cursor()
|
||||
cursor.execute("DROP TABLE IF EXISTS nybb;")
|
||||
|
||||
sql = """CREATE TABLE nybb (
|
||||
{geom_col} {geom_schema},
|
||||
borocode integer,
|
||||
boroname varchar(40),
|
||||
shape_leng float,
|
||||
shape_area float
|
||||
);""".format(
|
||||
geom_col=geom_col, geom_schema=geom_schema
|
||||
)
|
||||
cursor.execute(sql)
|
||||
|
||||
for i, row in df.iterrows():
|
||||
sql = """INSERT INTO nybb VALUES ({}, %s, %s, %s, %s
|
||||
);""".format(
|
||||
geom_insert
|
||||
)
|
||||
cursor.execute(
|
||||
sql,
|
||||
(
|
||||
row["geometry"].wkt,
|
||||
row["BoroCode"],
|
||||
row["BoroName"],
|
||||
row["Shape_Leng"],
|
||||
row["Shape_Area"],
|
||||
),
|
||||
)
|
||||
finally:
|
||||
cursor.close()
|
||||
con.commit()
|
||||
@@ -0,0 +1,15 @@
|
||||
from .clip import clip
|
||||
from .geocoding import geocode, reverse_geocode
|
||||
from .overlay import overlay
|
||||
from .sjoin import sjoin, sjoin_nearest
|
||||
from .util import collect
|
||||
|
||||
__all__ = [
|
||||
"collect",
|
||||
"geocode",
|
||||
"overlay",
|
||||
"reverse_geocode",
|
||||
"sjoin",
|
||||
"sjoin_nearest",
|
||||
"clip",
|
||||
]
|
||||
@@ -0,0 +1,84 @@
|
||||
from warnings import warn
|
||||
|
||||
import numpy
|
||||
|
||||
from shapely.geometry import MultiPoint
|
||||
|
||||
from geopandas.array import from_shapely, points_from_xy
|
||||
from geopandas.geoseries import GeoSeries
|
||||
|
||||
|
||||
def uniform(geom, size, rng=None):
|
||||
"""
|
||||
|
||||
Sample uniformly at random from a geometry.
|
||||
|
||||
For polygons, this samples uniformly within the area of the polygon. For lines,
|
||||
this samples uniformly along the length of the linestring. For multi-part
|
||||
geometries, the weights of each part are selected according to their relevant
|
||||
attribute (area for Polygons, length for LineStrings), and then points are
|
||||
sampled from each part uniformly.
|
||||
|
||||
Any other geometry type (e.g. Point, GeometryCollection) are ignored, and an
|
||||
empty MultiPoint geometry is returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geom : any shapely.geometry.BaseGeometry type
|
||||
the shape that describes the area in which to sample.
|
||||
|
||||
size : integer
|
||||
an integer denoting how many points to sample
|
||||
|
||||
Returns
|
||||
-------
|
||||
shapely.MultiPoint geometry containing the sampled points
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import box
|
||||
>>> square = box(0,0,1,1)
|
||||
>>> uniform(square, size=102) # doctest: +SKIP
|
||||
"""
|
||||
generator = numpy.random.default_rng(seed=rng)
|
||||
|
||||
if geom is None or geom.is_empty:
|
||||
return MultiPoint()
|
||||
|
||||
if geom.geom_type in ("Polygon", "MultiPolygon"):
|
||||
return _uniform_polygon(geom, size=size, generator=generator)
|
||||
|
||||
if geom.geom_type in ("LineString", "MultiLineString"):
|
||||
return _uniform_line(geom, size=size, generator=generator)
|
||||
|
||||
warn(
|
||||
f"Sampling is not supported for {geom.geom_type} geometry type.",
|
||||
UserWarning,
|
||||
stacklevel=8,
|
||||
)
|
||||
return MultiPoint()
|
||||
|
||||
|
||||
def _uniform_line(geom, size, generator):
|
||||
"""
|
||||
Sample points from an input shapely linestring
|
||||
"""
|
||||
|
||||
fracs = generator.uniform(size=size)
|
||||
return from_shapely(geom.interpolate(fracs, normalized=True)).union_all()
|
||||
|
||||
|
||||
def _uniform_polygon(geom, size, generator):
|
||||
"""
|
||||
Sample uniformly from within a polygon using batched sampling.
|
||||
"""
|
||||
xmin, ymin, xmax, ymax = geom.bounds
|
||||
candidates = []
|
||||
while len(candidates) < size:
|
||||
batch = points_from_xy(
|
||||
x=generator.uniform(xmin, xmax, size=size),
|
||||
y=generator.uniform(ymin, ymax, size=size),
|
||||
)
|
||||
valid_samples = batch[batch.sindex.query(geom, predicate="contains")]
|
||||
candidates.extend(valid_samples)
|
||||
return GeoSeries(candidates[:size]).union_all()
|
||||
@@ -0,0 +1,169 @@
|
||||
import importlib
|
||||
import platform
|
||||
import sys
|
||||
|
||||
|
||||
def _get_sys_info():
|
||||
"""System information
|
||||
|
||||
Returns
|
||||
-------
|
||||
sys_info : dict
|
||||
system and Python version information
|
||||
"""
|
||||
python = sys.version.replace("\n", " ")
|
||||
|
||||
blob = [
|
||||
("python", python),
|
||||
("executable", sys.executable),
|
||||
("machine", platform.platform()),
|
||||
]
|
||||
|
||||
return dict(blob)
|
||||
|
||||
|
||||
def _get_C_info():
|
||||
"""Information on system PROJ, GDAL, GEOS
|
||||
Returns
|
||||
-------
|
||||
c_info: dict
|
||||
system PROJ information
|
||||
"""
|
||||
try:
|
||||
import pyproj
|
||||
|
||||
proj_version = pyproj.proj_version_str
|
||||
except Exception:
|
||||
proj_version = None
|
||||
try:
|
||||
import pyproj
|
||||
|
||||
proj_dir = pyproj.datadir.get_data_dir()
|
||||
except Exception:
|
||||
proj_dir = None
|
||||
|
||||
try:
|
||||
import shapely._buildcfg
|
||||
|
||||
geos_version = "{}.{}.{}".format(*shapely._buildcfg.geos_version)
|
||||
geos_dir = shapely._buildcfg.geos_library_path
|
||||
except Exception:
|
||||
try:
|
||||
from shapely import geos_version_string
|
||||
|
||||
geos_version = geos_version_string
|
||||
geos_dir = None
|
||||
except Exception:
|
||||
geos_version = None
|
||||
geos_dir = None
|
||||
|
||||
try:
|
||||
import pyogrio
|
||||
|
||||
gdal_version = pyogrio.__gdal_version_string__
|
||||
gdal_dir = pyogrio.get_gdal_data_path()
|
||||
except Exception:
|
||||
gdal_version = None
|
||||
gdal_dir = None
|
||||
|
||||
if gdal_version is None:
|
||||
try:
|
||||
import fiona
|
||||
|
||||
gdal_version = fiona.env.get_gdal_release_name()
|
||||
except Exception:
|
||||
gdal_version = None
|
||||
try:
|
||||
import fiona
|
||||
|
||||
gdal_dir = fiona.env.GDALDataFinder().search()
|
||||
except Exception:
|
||||
gdal_dir = None
|
||||
|
||||
blob = [
|
||||
("GEOS", geos_version),
|
||||
("GEOS lib", geos_dir),
|
||||
("GDAL", gdal_version),
|
||||
("GDAL data dir", gdal_dir),
|
||||
("PROJ", proj_version),
|
||||
("PROJ data dir", proj_dir),
|
||||
]
|
||||
|
||||
return dict(blob)
|
||||
|
||||
|
||||
def _get_deps_info():
|
||||
"""Overview of the installed version of main dependencies
|
||||
|
||||
Returns
|
||||
-------
|
||||
deps_info: dict
|
||||
version information on relevant Python libraries
|
||||
"""
|
||||
deps = [
|
||||
"geopandas",
|
||||
# required deps
|
||||
"numpy",
|
||||
"pandas",
|
||||
"pyproj",
|
||||
"shapely",
|
||||
# optional deps
|
||||
"pyogrio",
|
||||
"geoalchemy2",
|
||||
"geopy",
|
||||
"matplotlib",
|
||||
"mapclassify",
|
||||
"fiona",
|
||||
"psycopg",
|
||||
"psycopg2",
|
||||
"pyarrow",
|
||||
]
|
||||
|
||||
def get_version(module):
|
||||
return module.__version__
|
||||
|
||||
deps_info = {}
|
||||
|
||||
for modname in deps:
|
||||
try:
|
||||
if modname in sys.modules:
|
||||
mod = sys.modules[modname]
|
||||
else:
|
||||
mod = importlib.import_module(modname)
|
||||
ver = get_version(mod)
|
||||
deps_info[modname] = ver
|
||||
except Exception:
|
||||
deps_info[modname] = None
|
||||
|
||||
return deps_info
|
||||
|
||||
|
||||
def show_versions():
|
||||
"""
|
||||
Print system information and installed module versions.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
::
|
||||
|
||||
$ python -c "import geopandas; geopandas.show_versions()"
|
||||
"""
|
||||
sys_info = _get_sys_info()
|
||||
deps_info = _get_deps_info()
|
||||
proj_info = _get_C_info()
|
||||
|
||||
maxlen = max(len(x) for x in deps_info)
|
||||
tpl = "{{k:<{maxlen}}}: {{stat}}".format(maxlen=maxlen)
|
||||
print("\nSYSTEM INFO")
|
||||
print("-----------")
|
||||
for k, stat in sys_info.items():
|
||||
print(tpl.format(k=k, stat=stat))
|
||||
print("\nGEOS, GDAL, PROJ INFO")
|
||||
print("---------------------")
|
||||
for k, stat in proj_info.items():
|
||||
print(tpl.format(k=k, stat=stat))
|
||||
print("\nPYTHON DEPENDENCIES")
|
||||
print("-------------------")
|
||||
for k, stat in deps_info.items():
|
||||
print(tpl.format(k=k, stat=stat))
|
||||
@@ -0,0 +1,257 @@
|
||||
"""
|
||||
geopandas.clip
|
||||
==============
|
||||
|
||||
A module to clip vector data using GeoPandas.
|
||||
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas.api.types
|
||||
|
||||
from shapely.geometry import MultiPolygon, Polygon, box
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
|
||||
|
||||
def _mask_is_list_like_rectangle(mask):
|
||||
return pandas.api.types.is_list_like(mask) and not isinstance(
|
||||
mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)
|
||||
)
|
||||
|
||||
|
||||
def _clip_gdf_with_mask(gdf, mask, sort=False):
|
||||
"""Clip geometry to the polygon/rectangle extent.
|
||||
|
||||
Clip an input GeoDataFrame to the polygon extent of the polygon
|
||||
parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
gdf : GeoDataFrame, GeoSeries
|
||||
Dataframe to clip.
|
||||
|
||||
mask : (Multi)Polygon, list-like
|
||||
Reference polygon/rectangle for clipping.
|
||||
|
||||
sort : boolean, default False
|
||||
If True, the results will be sorted in ascending order using the
|
||||
geometries' indexes as the primary key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
The returned GeoDataFrame is a clipped subset of gdf
|
||||
that intersects with polygon/rectangle.
|
||||
"""
|
||||
clipping_by_rectangle = _mask_is_list_like_rectangle(mask)
|
||||
if clipping_by_rectangle:
|
||||
intersection_polygon = box(*mask)
|
||||
else:
|
||||
intersection_polygon = mask
|
||||
|
||||
gdf_sub = gdf.iloc[
|
||||
gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
|
||||
]
|
||||
|
||||
# For performance reasons points don't need to be intersected with poly
|
||||
non_point_mask = gdf_sub.geom_type != "Point"
|
||||
|
||||
if not non_point_mask.any():
|
||||
# only points, directly return
|
||||
return gdf_sub
|
||||
|
||||
# Clip the data with the polygon
|
||||
if isinstance(gdf_sub, GeoDataFrame):
|
||||
clipped = gdf_sub.copy()
|
||||
if clipping_by_rectangle:
|
||||
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
||||
gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
|
||||
)
|
||||
else:
|
||||
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
||||
gdf_sub.geometry.values[non_point_mask].intersection(mask)
|
||||
)
|
||||
else:
|
||||
# GeoSeries
|
||||
clipped = gdf_sub.copy()
|
||||
if clipping_by_rectangle:
|
||||
clipped[non_point_mask] = gdf_sub.values[non_point_mask].clip_by_rect(*mask)
|
||||
else:
|
||||
clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(mask)
|
||||
|
||||
if clipping_by_rectangle:
|
||||
# clip_by_rect might return empty geometry collections in edge cases
|
||||
clipped = clipped[~clipped.is_empty]
|
||||
return clipped
|
||||
|
||||
|
||||
def clip(gdf, mask, keep_geom_type=False, sort=False):
|
||||
"""Clip points, lines, or polygon geometries to the mask extent.
|
||||
|
||||
Both layers must be in the same Coordinate Reference System (CRS).
|
||||
The ``gdf`` will be clipped to the full extent of the clip object.
|
||||
|
||||
If there are multiple polygons in mask, data from ``gdf`` will be
|
||||
clipped to the total boundary of all polygons in mask.
|
||||
|
||||
If the ``mask`` is list-like with four elements ``(minx, miny, maxx, maxy)``, a
|
||||
faster rectangle clipping algorithm will be used. Note that this can lead to
|
||||
slightly different results in edge cases, e.g. if a line would be reduced to a
|
||||
point, this point might not be returned.
|
||||
The geometry is clipped in a fast but possibly dirty way. The output is not
|
||||
guaranteed to be valid. No exceptions will be raised for topological errors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
gdf : GeoDataFrame or GeoSeries
|
||||
Vector layer (point, line, polygon) to be clipped to mask.
|
||||
mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like
|
||||
Polygon vector layer used to clip ``gdf``.
|
||||
The mask's geometry is dissolved into one geometric feature
|
||||
and intersected with ``gdf``.
|
||||
If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``,
|
||||
``clip`` will use a faster rectangle clipping (:meth:`~GeoSeries.clip_by_rect`),
|
||||
possibly leading to slightly different results.
|
||||
keep_geom_type : boolean, default False
|
||||
If True, return only geometries of original type in case of intersection
|
||||
resulting in multiple geometry types or GeometryCollections.
|
||||
If False, return all resulting geometries (potentially mixed-types).
|
||||
sort : boolean, default False
|
||||
If True, the results will be sorted in ascending order using the
|
||||
geometries' indexes as the primary key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame or GeoSeries
|
||||
Vector data (points, lines, polygons) from ``gdf`` clipped to
|
||||
polygon boundary from mask.
|
||||
|
||||
See also
|
||||
--------
|
||||
GeoDataFrame.clip : equivalent GeoDataFrame method
|
||||
GeoSeries.clip : equivalent GeoSeries method
|
||||
|
||||
Examples
|
||||
--------
|
||||
Clip points (grocery stores) with polygons (the Near West Side community):
|
||||
|
||||
>>> import geodatasets
|
||||
>>> chicago = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.chicago_health")
|
||||
... )
|
||||
>>> near_west_side = chicago[chicago["community"] == "NEAR WEST SIDE"]
|
||||
>>> groceries = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.groceries")
|
||||
... ).to_crs(chicago.crs)
|
||||
>>> groceries.shape
|
||||
(148, 8)
|
||||
|
||||
>>> nws_groceries = geopandas.clip(groceries, near_west_side)
|
||||
>>> nws_groceries.shape
|
||||
(7, 8)
|
||||
"""
|
||||
if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
|
||||
raise TypeError(
|
||||
"'gdf' should be GeoDataFrame or GeoSeries, got {}".format(type(gdf))
|
||||
)
|
||||
|
||||
mask_is_list_like = _mask_is_list_like_rectangle(mask)
|
||||
if (
|
||||
not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon))
|
||||
and not mask_is_list_like
|
||||
):
|
||||
raise TypeError(
|
||||
"'mask' should be GeoDataFrame, GeoSeries,"
|
||||
f"(Multi)Polygon or list-like, got {type(mask)}"
|
||||
)
|
||||
|
||||
if mask_is_list_like and len(mask) != 4:
|
||||
raise TypeError(
|
||||
"If 'mask' is list-like, it must have four values (minx, miny, maxx, maxy)"
|
||||
)
|
||||
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
if not _check_crs(gdf, mask):
|
||||
_crs_mismatch_warn(gdf, mask, stacklevel=3)
|
||||
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
box_mask = mask.total_bounds
|
||||
elif mask_is_list_like:
|
||||
box_mask = mask
|
||||
else:
|
||||
# Avoid empty tuple returned by .bounds when geometry is empty. A tuple of
|
||||
# all nan values is consistent with the behavior of
|
||||
# {GeoSeries, GeoDataFrame}.total_bounds for empty geometries.
|
||||
# TODO(shapely) can simpely use mask.bounds once relying on Shapely 2.0
|
||||
box_mask = mask.bounds if not mask.is_empty else (np.nan,) * 4
|
||||
box_gdf = gdf.total_bounds
|
||||
if not (
|
||||
((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2]))
|
||||
and ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3]))
|
||||
):
|
||||
return gdf.iloc[:0]
|
||||
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
combined_mask = mask.geometry.union_all()
|
||||
else:
|
||||
combined_mask = mask
|
||||
|
||||
clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)
|
||||
|
||||
if keep_geom_type:
|
||||
geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
|
||||
geomcoll_orig = (gdf.geom_type == "GeometryCollection").any()
|
||||
|
||||
new_collection = geomcoll_concat and not geomcoll_orig
|
||||
|
||||
if geomcoll_orig:
|
||||
warnings.warn(
|
||||
"keep_geom_type can not be called on a "
|
||||
"GeoDataFrame with GeometryCollection.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
polys = ["Polygon", "MultiPolygon"]
|
||||
lines = ["LineString", "MultiLineString", "LinearRing"]
|
||||
points = ["Point", "MultiPoint"]
|
||||
|
||||
# Check that the gdf for multiple geom types (points, lines and/or polys)
|
||||
orig_types_total = sum(
|
||||
[
|
||||
gdf.geom_type.isin(polys).any(),
|
||||
gdf.geom_type.isin(lines).any(),
|
||||
gdf.geom_type.isin(points).any(),
|
||||
]
|
||||
)
|
||||
|
||||
# Check how many geometry types are in the clipped GeoDataFrame
|
||||
clip_types_total = sum(
|
||||
[
|
||||
clipped.geom_type.isin(polys).any(),
|
||||
clipped.geom_type.isin(lines).any(),
|
||||
clipped.geom_type.isin(points).any(),
|
||||
]
|
||||
)
|
||||
|
||||
# Check there aren't any new geom types in the clipped GeoDataFrame
|
||||
more_types = orig_types_total < clip_types_total
|
||||
|
||||
if orig_types_total > 1:
|
||||
warnings.warn(
|
||||
"keep_geom_type can not be called on a mixed type GeoDataFrame.",
|
||||
stacklevel=2,
|
||||
)
|
||||
elif new_collection or more_types:
|
||||
orig_type = gdf.geom_type.iloc[0]
|
||||
if new_collection:
|
||||
clipped = clipped.explode(index_parts=False)
|
||||
if orig_type in polys:
|
||||
clipped = clipped.loc[clipped.geom_type.isin(polys)]
|
||||
elif orig_type in lines:
|
||||
clipped = clipped.loc[clipped.geom_type.isin(lines)]
|
||||
|
||||
return clipped
|
||||
@@ -0,0 +1,184 @@
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
import geopandas
|
||||
|
||||
|
||||
def _get_throttle_time(provider):
|
||||
"""
|
||||
Amount of time to wait between requests to a geocoding API, for providers
|
||||
that specify rate limits in their terms of service.
|
||||
"""
|
||||
import geopy.geocoders
|
||||
|
||||
# https://operations.osmfoundation.org/policies/nominatim/
|
||||
if provider == geopy.geocoders.Nominatim:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def geocode(strings, provider=None, **kwargs):
|
||||
"""
|
||||
Geocode a set of strings and get a GeoDataFrame of the resulting points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
strings : list or Series of addresses to geocode
|
||||
provider : str or geopy.geocoder
|
||||
Specifies geocoding service to use. If none is provided,
|
||||
will use 'photon' (see the Photon's terms of service at:
|
||||
https://photon.komoot.io).
|
||||
|
||||
Either the string name used by geopy (as specified in
|
||||
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
|
||||
(e.g., geopy.geocoders.Photon) may be used.
|
||||
|
||||
Some providers require additional arguments such as access keys
|
||||
See each geocoder's specific parameters in geopy.geocoders
|
||||
|
||||
Notes
|
||||
-----
|
||||
Ensure proper use of the results by consulting the Terms of Service for
|
||||
your provider.
|
||||
|
||||
Geocoding requires geopy. Install it using 'pip install geopy'. See also
|
||||
https://github.com/geopy/geopy
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.tools.geocode( # doctest: +SKIP
|
||||
... ["boston, ma", "1600 pennsylvania ave. washington, dc"]
|
||||
... )
|
||||
>>> df # doctest: +SKIP
|
||||
geometry address
|
||||
0 POINT (-71.05863 42.35899) Boston, MA, United States
|
||||
1 POINT (-77.03651 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
|
||||
"""
|
||||
|
||||
if provider is None:
|
||||
provider = "photon"
|
||||
throttle_time = _get_throttle_time(provider)
|
||||
|
||||
return _query(strings, True, provider, throttle_time, **kwargs)
|
||||
|
||||
|
||||
def reverse_geocode(points, provider=None, **kwargs):
|
||||
"""
|
||||
Reverse geocode a set of points and get a GeoDataFrame of the resulting
|
||||
addresses.
|
||||
|
||||
The points
|
||||
|
||||
Parameters
|
||||
----------
|
||||
points : list or Series of Shapely Point objects.
|
||||
x coordinate is longitude
|
||||
y coordinate is latitude
|
||||
provider : str or geopy.geocoder (opt)
|
||||
Specifies geocoding service to use. If none is provided,
|
||||
will use 'photon' (see the Photon's terms of service at:
|
||||
https://photon.komoot.io).
|
||||
|
||||
Either the string name used by geopy (as specified in
|
||||
geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
|
||||
(e.g., geopy.geocoders.Photon) may be used.
|
||||
|
||||
Some providers require additional arguments such as access keys
|
||||
See each geocoder's specific parameters in geopy.geocoders
|
||||
|
||||
Notes
|
||||
-----
|
||||
Ensure proper use of the results by consulting the Terms of Service for
|
||||
your provider.
|
||||
|
||||
Reverse geocoding requires geopy. Install it using 'pip install geopy'.
|
||||
See also https://github.com/geopy/geopy
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> df = geopandas.tools.reverse_geocode( # doctest: +SKIP
|
||||
... [Point(-71.0594869, 42.3584697), Point(-77.0365305, 38.8977332)]
|
||||
... )
|
||||
>>> df # doctest: +SKIP
|
||||
geometry address
|
||||
0 POINT (-71.05941 42.35837) 29 Court Sq, Boston, MA 02108, United States
|
||||
1 POINT (-77.03641 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
|
||||
"""
|
||||
|
||||
if provider is None:
|
||||
provider = "photon"
|
||||
throttle_time = _get_throttle_time(provider)
|
||||
|
||||
return _query(points, False, provider, throttle_time, **kwargs)
|
||||
|
||||
|
||||
def _query(data, forward, provider, throttle_time, **kwargs):
|
||||
# generic wrapper for calls over lists to geopy Geocoders
|
||||
from geopy.geocoders import get_geocoder_for_service
|
||||
from geopy.geocoders.base import GeocoderQueryError
|
||||
|
||||
if forward:
|
||||
if not isinstance(data, pd.Series):
|
||||
data = pd.Series(data)
|
||||
else:
|
||||
if not isinstance(data, geopandas.GeoSeries):
|
||||
data = geopandas.GeoSeries(data)
|
||||
|
||||
if isinstance(provider, str):
|
||||
provider = get_geocoder_for_service(provider)
|
||||
|
||||
coder = provider(**kwargs)
|
||||
results = {}
|
||||
for i, s in data.items():
|
||||
try:
|
||||
if forward:
|
||||
results[i] = coder.geocode(s)
|
||||
else:
|
||||
results[i] = coder.reverse((s.y, s.x), exactly_one=True)
|
||||
except (GeocoderQueryError, ValueError):
|
||||
results[i] = (None, None)
|
||||
time.sleep(throttle_time)
|
||||
|
||||
df = _prepare_geocode_result(results)
|
||||
return df
|
||||
|
||||
|
||||
def _prepare_geocode_result(results):
|
||||
"""
|
||||
Helper function for the geocode function
|
||||
|
||||
Takes a dict where keys are index entries, values are tuples containing:
|
||||
(address, (lat, lon))
|
||||
|
||||
"""
|
||||
# Prepare the data for the DataFrame as a dict of lists
|
||||
d = defaultdict(list)
|
||||
index = []
|
||||
|
||||
for i, s in results.items():
|
||||
if s is None:
|
||||
p = Point()
|
||||
address = None
|
||||
|
||||
else:
|
||||
address, loc = s
|
||||
|
||||
# loc is lat, lon and we want lon, lat
|
||||
if loc is None:
|
||||
p = Point()
|
||||
else:
|
||||
p = Point(loc[1], loc[0])
|
||||
|
||||
d["geometry"].append(p)
|
||||
d["address"].append(address)
|
||||
index.append(i)
|
||||
|
||||
df = geopandas.GeoDataFrame(d, index=index, crs="EPSG:4326")
|
||||
|
||||
return df
|
||||
@@ -0,0 +1,188 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _hilbert_distance(geoms, total_bounds=None, level=16):
|
||||
"""
|
||||
Calculate the distance along a Hilbert curve.
|
||||
|
||||
The distances are calculated for the midpoints of the geometries in the
|
||||
GeoDataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geoms : GeometryArray
|
||||
total_bounds : 4-element array
|
||||
Total bounds of geometries - array
|
||||
level : int (1 - 16), default 16
|
||||
Determines the precision of the curve (points on the curve will
|
||||
have coordinates in the range [0, 2^level - 1]).
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
Array containing distances along the Hilbert curve
|
||||
|
||||
"""
|
||||
if geoms.is_empty.any() | geoms.isna().any():
|
||||
raise ValueError(
|
||||
"Hilbert distance cannot be computed on a GeoSeries with empty or "
|
||||
"missing geometries.",
|
||||
)
|
||||
# Calculate bounds as numpy array
|
||||
bounds = geoms.bounds
|
||||
|
||||
# Calculate discrete coords based on total bounds and bounds
|
||||
x, y = _continuous_to_discrete_coords(bounds, level, total_bounds)
|
||||
# Compute distance along hilbert curve
|
||||
distances = _encode(level, x, y)
|
||||
|
||||
return distances
|
||||
|
||||
|
||||
def _continuous_to_discrete_coords(bounds, level, total_bounds):
|
||||
"""
|
||||
Calculates mid points & ranges of geoms and returns
|
||||
as discrete coords
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
bounds : Bounds of each geometry - array
|
||||
|
||||
p : The number of iterations used in constructing the Hilbert curve
|
||||
|
||||
total_bounds : Total bounds of geometries - array
|
||||
|
||||
Returns
|
||||
-------
|
||||
Discrete two-dimensional numpy array
|
||||
Two-dimensional array Array of hilbert distances for each geom
|
||||
|
||||
"""
|
||||
# Hilbert Side length
|
||||
side_length = (2**level) - 1
|
||||
|
||||
# Calculate mid points for x and y bound coords - returns array
|
||||
x_mids = (bounds[:, 0] + bounds[:, 2]) / 2.0
|
||||
y_mids = (bounds[:, 1] + bounds[:, 3]) / 2.0
|
||||
|
||||
# Calculate x and y range of total bound coords - returns array
|
||||
if total_bounds is None:
|
||||
total_bounds = (
|
||||
np.nanmin(x_mids),
|
||||
np.nanmin(y_mids),
|
||||
np.nanmax(x_mids),
|
||||
np.nanmax(y_mids),
|
||||
)
|
||||
|
||||
xmin, ymin, xmax, ymax = total_bounds
|
||||
|
||||
# Transform continuous value to discrete integer for each dimension
|
||||
x_int = _continuous_to_discrete(x_mids, (xmin, xmax), side_length)
|
||||
y_int = _continuous_to_discrete(y_mids, (ymin, ymax), side_length)
|
||||
|
||||
return x_int, y_int
|
||||
|
||||
|
||||
def _continuous_to_discrete(vals, val_range, n):
|
||||
"""
|
||||
Convert a continuous one-dimensional array to discrete integer values
|
||||
based their ranges
|
||||
|
||||
Parameters
|
||||
----------
|
||||
vals : Array of continuous values
|
||||
|
||||
val_range : Tuple containing range of continuous values
|
||||
|
||||
n : Number of discrete values
|
||||
|
||||
Returns
|
||||
-------
|
||||
One-dimensional array of discrete ints
|
||||
|
||||
"""
|
||||
width = val_range[1] - val_range[0]
|
||||
if width == 0:
|
||||
return np.zeros_like(vals, dtype=np.uint32)
|
||||
res = (vals - val_range[0]) * (n / width)
|
||||
|
||||
np.clip(res, 0, n, out=res)
|
||||
return res.astype(np.uint32)
|
||||
|
||||
|
||||
# Fast Hilbert curve algorithm by http://threadlocalmutex.com/
|
||||
# From C++ https://github.com/rawrunprotected/hilbert_curves
|
||||
# (public domain)
|
||||
|
||||
|
||||
MAX_LEVEL = 16
|
||||
|
||||
|
||||
def _interleave(x):
|
||||
x = (x | (x << 8)) & 0x00FF00FF
|
||||
x = (x | (x << 4)) & 0x0F0F0F0F
|
||||
x = (x | (x << 2)) & 0x33333333
|
||||
x = (x | (x << 1)) & 0x55555555
|
||||
return x
|
||||
|
||||
|
||||
def _encode(level, x, y):
|
||||
x = np.asarray(x, dtype="uint32")
|
||||
y = np.asarray(y, dtype="uint32")
|
||||
|
||||
if level > MAX_LEVEL:
|
||||
raise ValueError("Level out of range")
|
||||
|
||||
x = x << (16 - level)
|
||||
y = y << (16 - level)
|
||||
|
||||
# Initial prefix scan round, prime with x and y
|
||||
a = x ^ y
|
||||
b = 0xFFFF ^ a
|
||||
c = 0xFFFF ^ (x | y)
|
||||
d = x & (y ^ 0xFFFF)
|
||||
|
||||
A = a | (b >> 1)
|
||||
B = (a >> 1) ^ a
|
||||
C = ((c >> 1) ^ (b & (d >> 1))) ^ c
|
||||
D = ((a & (c >> 1)) ^ (d >> 1)) ^ d
|
||||
|
||||
a = A.copy()
|
||||
b = B.copy()
|
||||
c = C.copy()
|
||||
d = D.copy()
|
||||
|
||||
A = (a & (a >> 2)) ^ (b & (b >> 2))
|
||||
B = (a & (b >> 2)) ^ (b & ((a ^ b) >> 2))
|
||||
C ^= (a & (c >> 2)) ^ (b & (d >> 2))
|
||||
D ^= (b & (c >> 2)) ^ ((a ^ b) & (d >> 2))
|
||||
|
||||
a = A.copy()
|
||||
b = B.copy()
|
||||
c = C.copy()
|
||||
d = D.copy()
|
||||
|
||||
A = (a & (a >> 4)) ^ (b & (b >> 4))
|
||||
B = (a & (b >> 4)) ^ (b & ((a ^ b) >> 4))
|
||||
C ^= (a & (c >> 4)) ^ (b & (d >> 4))
|
||||
D ^= (b & (c >> 4)) ^ ((a ^ b) & (d >> 4))
|
||||
|
||||
# Final round and projection
|
||||
a = A.copy()
|
||||
b = B.copy()
|
||||
c = C.copy()
|
||||
d = D.copy()
|
||||
|
||||
C ^= (a & (c >> 8)) ^ (b & (d >> 8))
|
||||
D ^= (b & (c >> 8)) ^ ((a ^ b) & (d >> 8))
|
||||
|
||||
# Undo transformation prefix scan
|
||||
a = C ^ (C >> 1)
|
||||
b = D ^ (D >> 1)
|
||||
|
||||
# Recover index bits
|
||||
i0 = x ^ y
|
||||
i1 = b | (0xFFFF ^ (i0 | a))
|
||||
|
||||
return ((_interleave(i1) << 1) | _interleave(i0)) >> (32 - 2 * level)
|
||||
@@ -0,0 +1,399 @@
|
||||
import warnings
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import PANDAS_GE_30
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
|
||||
|
||||
def _ensure_geometry_column(df):
|
||||
"""
|
||||
Helper function to ensure the geometry column is called 'geometry'.
|
||||
If another column with that name exists, it will be dropped.
|
||||
"""
|
||||
if not df._geometry_column_name == "geometry":
|
||||
if PANDAS_GE_30:
|
||||
if "geometry" in df.columns:
|
||||
df = df.drop("geometry", axis=1)
|
||||
df = df.rename_geometry("geometry")
|
||||
else:
|
||||
if "geometry" in df.columns:
|
||||
df.drop("geometry", axis=1, inplace=True)
|
||||
df.rename_geometry("geometry", inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def _overlay_intersection(df1, df2):
|
||||
"""
|
||||
Overlay Intersection operation used in overlay function
|
||||
"""
|
||||
# Spatial Index to create intersections
|
||||
idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
|
||||
# Create pairs of geometries in both dataframes to be intersected
|
||||
if idx1.size > 0 and idx2.size > 0:
|
||||
left = df1.geometry.take(idx1)
|
||||
left.reset_index(drop=True, inplace=True)
|
||||
right = df2.geometry.take(idx2)
|
||||
right.reset_index(drop=True, inplace=True)
|
||||
intersections = left.intersection(right)
|
||||
poly_ix = intersections.geom_type.isin(["Polygon", "MultiPolygon"])
|
||||
intersections.loc[poly_ix] = intersections[poly_ix].make_valid()
|
||||
|
||||
# only keep actual intersecting geometries
|
||||
pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2})
|
||||
geom_intersect = intersections
|
||||
|
||||
# merge data for intersecting geometries
|
||||
df1 = df1.reset_index(drop=True)
|
||||
df2 = df2.reset_index(drop=True)
|
||||
dfinter = pairs_intersect.merge(
|
||||
df1.drop(df1._geometry_column_name, axis=1),
|
||||
left_on="__idx1",
|
||||
right_index=True,
|
||||
)
|
||||
dfinter = dfinter.merge(
|
||||
df2.drop(df2._geometry_column_name, axis=1),
|
||||
left_on="__idx2",
|
||||
right_index=True,
|
||||
suffixes=("_1", "_2"),
|
||||
)
|
||||
|
||||
return GeoDataFrame(dfinter, geometry=geom_intersect, crs=df1.crs)
|
||||
else:
|
||||
result = df1.iloc[:0].merge(
|
||||
df2.iloc[:0].drop(df2.geometry.name, axis=1),
|
||||
left_index=True,
|
||||
right_index=True,
|
||||
suffixes=("_1", "_2"),
|
||||
)
|
||||
result["__idx1"] = np.nan
|
||||
result["__idx2"] = np.nan
|
||||
return result[
|
||||
result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
|
||||
]
|
||||
|
||||
|
||||
def _overlay_difference(df1, df2):
|
||||
"""
|
||||
Overlay Difference operation used in overlay function
|
||||
"""
|
||||
# spatial index query to find intersections
|
||||
idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
|
||||
idx1_unique, idx1_unique_indices = np.unique(idx1, return_index=True)
|
||||
idx2_split = np.split(idx2, idx1_unique_indices[1:])
|
||||
sidx = [
|
||||
idx2_split.pop(0) if idx in idx1_unique else []
|
||||
for idx in range(df1.geometry.size)
|
||||
]
|
||||
# Create differences
|
||||
new_g = []
|
||||
for geom, neighbours in zip(df1.geometry, sidx):
|
||||
new = reduce(
|
||||
lambda x, y: x.difference(y), [geom] + list(df2.geometry.iloc[neighbours])
|
||||
)
|
||||
new_g.append(new)
|
||||
differences = GeoSeries(new_g, index=df1.index, crs=df1.crs)
|
||||
poly_ix = differences.geom_type.isin(["Polygon", "MultiPolygon"])
|
||||
differences.loc[poly_ix] = differences[poly_ix].make_valid()
|
||||
geom_diff = differences[~differences.is_empty].copy()
|
||||
dfdiff = df1[~differences.is_empty].copy()
|
||||
dfdiff[dfdiff._geometry_column_name] = geom_diff
|
||||
return dfdiff
|
||||
|
||||
|
||||
def _overlay_symmetric_diff(df1, df2):
|
||||
"""
|
||||
Overlay Symmetric Difference operation used in overlay function
|
||||
"""
|
||||
dfdiff1 = _overlay_difference(df1, df2)
|
||||
dfdiff2 = _overlay_difference(df2, df1)
|
||||
dfdiff1["__idx1"] = range(len(dfdiff1))
|
||||
dfdiff2["__idx2"] = range(len(dfdiff2))
|
||||
dfdiff1["__idx2"] = np.nan
|
||||
dfdiff2["__idx1"] = np.nan
|
||||
# ensure geometry name (otherwise merge goes wrong)
|
||||
dfdiff1 = _ensure_geometry_column(dfdiff1)
|
||||
dfdiff2 = _ensure_geometry_column(dfdiff2)
|
||||
# combine both 'difference' dataframes
|
||||
dfsym = dfdiff1.merge(
|
||||
dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2")
|
||||
)
|
||||
geometry = dfsym.geometry_1.copy()
|
||||
geometry.name = "geometry"
|
||||
# https://github.com/pandas-dev/pandas/issues/26468 use loc for now
|
||||
geometry.loc[dfsym.geometry_1.isnull()] = dfsym.loc[
|
||||
dfsym.geometry_1.isnull(), "geometry_2"
|
||||
]
|
||||
dfsym.drop(["geometry_1", "geometry_2"], axis=1, inplace=True)
|
||||
dfsym.reset_index(drop=True, inplace=True)
|
||||
dfsym = GeoDataFrame(dfsym, geometry=geometry, crs=df1.crs)
|
||||
return dfsym
|
||||
|
||||
|
||||
def _overlay_union(df1, df2):
|
||||
"""
|
||||
Overlay Union operation used in overlay function
|
||||
"""
|
||||
dfinter = _overlay_intersection(df1, df2)
|
||||
dfsym = _overlay_symmetric_diff(df1, df2)
|
||||
dfunion = pd.concat([dfinter, dfsym], ignore_index=True, sort=False)
|
||||
# keep geometry column last
|
||||
columns = list(dfunion.columns)
|
||||
columns.remove("geometry")
|
||||
columns.append("geometry")
|
||||
return dfunion.reindex(columns=columns)
|
||||
|
||||
|
||||
def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
|
||||
"""Perform spatial overlay between two GeoDataFrames.
|
||||
|
||||
Currently only supports data GeoDataFrames with uniform geometry types,
|
||||
i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
|
||||
combination of (Multi)LineString and LinearRing shapes.
|
||||
Implements several methods that are all effectively subsets of the union.
|
||||
|
||||
See the User Guide page :doc:`../../user_guide/set_operations` for details.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df1 : GeoDataFrame
|
||||
df2 : GeoDataFrame
|
||||
how : string
|
||||
Method of spatial overlay: 'intersection', 'union',
|
||||
'identity', 'symmetric_difference' or 'difference'.
|
||||
keep_geom_type : bool
|
||||
If True, return only geometries of the same geometry type as df1 has,
|
||||
if False, return all resulting geometries. Default is None,
|
||||
which will set keep_geom_type to True but warn upon dropping
|
||||
geometries.
|
||||
make_valid : bool, default True
|
||||
If True, any invalid input geometries are corrected with a call to make_valid(),
|
||||
if False, a `ValueError` is raised if any input geometries are invalid.
|
||||
|
||||
Returns
|
||||
-------
|
||||
df : GeoDataFrame
|
||||
GeoDataFrame with new set of polygons and attributes
|
||||
resulting from the overlay
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Polygon
|
||||
>>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
|
||||
... Polygon([(2,2), (4,2), (4,4), (2,4)])])
|
||||
>>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
|
||||
... Polygon([(3,3), (5,3), (5,5), (3,5)])])
|
||||
>>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
|
||||
>>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='union')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 1.0 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2.0 1.0 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2.0 2.0 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
3 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
4 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
5 NaN 1.0 MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
|
||||
6 NaN 2.0 POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='intersection')
|
||||
df1_data df2_data geometry
|
||||
0 1 1 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2 1 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2 2 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='symmetric_difference')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
1 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
2 NaN 1.0 MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
|
||||
3 NaN 2.0 POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='difference')
|
||||
geometry df1_data
|
||||
0 POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0)) 1
|
||||
1 MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4... 2
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='identity')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 1.0 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2.0 1.0 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2.0 2.0 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
3 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
4 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
|
||||
See also
|
||||
--------
|
||||
sjoin : spatial join
|
||||
GeoDataFrame.overlay : equivalent method
|
||||
|
||||
Notes
|
||||
-----
|
||||
Every operation in GeoPandas is planar, i.e. the potential third
|
||||
dimension is not taken into account.
|
||||
"""
|
||||
# Allowed operations
|
||||
allowed_hows = [
|
||||
"intersection",
|
||||
"union",
|
||||
"identity",
|
||||
"symmetric_difference",
|
||||
"difference", # aka erase
|
||||
]
|
||||
# Error Messages
|
||||
if how not in allowed_hows:
|
||||
raise ValueError(
|
||||
"`how` was '{0}' but is expected to be in {1}".format(how, allowed_hows)
|
||||
)
|
||||
|
||||
if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
|
||||
raise NotImplementedError(
|
||||
"overlay currently only implemented for GeoDataFrames"
|
||||
)
|
||||
|
||||
if not _check_crs(df1, df2):
|
||||
_crs_mismatch_warn(df1, df2, stacklevel=3)
|
||||
|
||||
if keep_geom_type is None:
|
||||
keep_geom_type = True
|
||||
keep_geom_type_warning = True
|
||||
else:
|
||||
keep_geom_type_warning = False
|
||||
|
||||
polys = ["Polygon", "MultiPolygon"]
|
||||
lines = ["LineString", "MultiLineString", "LinearRing"]
|
||||
points = ["Point", "MultiPoint"]
|
||||
for i, df in enumerate([df1, df2]):
|
||||
poly_check = df.geom_type.isin(polys).any()
|
||||
lines_check = df.geom_type.isin(lines).any()
|
||||
points_check = df.geom_type.isin(points).any()
|
||||
if sum([poly_check, lines_check, points_check]) > 1:
|
||||
raise NotImplementedError(
|
||||
"df{} contains mixed geometry types.".format(i + 1)
|
||||
)
|
||||
|
||||
if how == "intersection":
|
||||
box_gdf1 = df1.total_bounds
|
||||
box_gdf2 = df2.total_bounds
|
||||
|
||||
if not (
|
||||
((box_gdf1[0] <= box_gdf2[2]) and (box_gdf2[0] <= box_gdf1[2]))
|
||||
and ((box_gdf1[1] <= box_gdf2[3]) and (box_gdf2[1] <= box_gdf1[3]))
|
||||
):
|
||||
result = df1.iloc[:0].merge(
|
||||
df2.iloc[:0].drop(df2.geometry.name, axis=1),
|
||||
left_index=True,
|
||||
right_index=True,
|
||||
suffixes=("_1", "_2"),
|
||||
)
|
||||
return result[
|
||||
result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
|
||||
]
|
||||
|
||||
# Computations
|
||||
def _make_valid(df):
|
||||
df = df.copy()
|
||||
if df.geom_type.isin(polys).all():
|
||||
mask = ~df.geometry.is_valid
|
||||
col = df._geometry_column_name
|
||||
if make_valid:
|
||||
df.loc[mask, col] = df.loc[mask, col].make_valid()
|
||||
elif mask.any():
|
||||
raise ValueError(
|
||||
"You have passed make_valid=False along with "
|
||||
f"{mask.sum()} invalid input geometries. "
|
||||
"Use make_valid=True or make sure that all geometries "
|
||||
"are valid before using overlay."
|
||||
)
|
||||
return df
|
||||
|
||||
df1 = _make_valid(df1)
|
||||
df2 = _make_valid(df2)
|
||||
|
||||
with warnings.catch_warnings(): # CRS checked above, suppress array-level warning
|
||||
warnings.filterwarnings("ignore", message="CRS mismatch between the CRS")
|
||||
if how == "difference":
|
||||
result = _overlay_difference(df1, df2)
|
||||
elif how == "intersection":
|
||||
result = _overlay_intersection(df1, df2)
|
||||
elif how == "symmetric_difference":
|
||||
result = _overlay_symmetric_diff(df1, df2)
|
||||
elif how == "union":
|
||||
result = _overlay_union(df1, df2)
|
||||
elif how == "identity":
|
||||
dfunion = _overlay_union(df1, df2)
|
||||
result = dfunion[dfunion["__idx1"].notnull()].copy()
|
||||
|
||||
if how in ["intersection", "symmetric_difference", "union", "identity"]:
|
||||
result.drop(["__idx1", "__idx2"], axis=1, inplace=True)
|
||||
|
||||
if keep_geom_type:
|
||||
geom_type = df1.geom_type.iloc[0]
|
||||
|
||||
# First we filter the geometry types inside GeometryCollections objects
|
||||
# (e.g. GeometryCollection([polygon, point]) -> polygon)
|
||||
# we do this separately on only the relevant rows, as this is an expensive
|
||||
# operation (an expensive no-op for geometry types other than collections)
|
||||
is_collection = result.geom_type == "GeometryCollection"
|
||||
if is_collection.any():
|
||||
geom_col = result._geometry_column_name
|
||||
collections = result[[geom_col]][is_collection]
|
||||
|
||||
exploded = collections.reset_index(drop=True).explode(index_parts=True)
|
||||
exploded = exploded.reset_index(level=0)
|
||||
|
||||
orig_num_geoms_exploded = exploded.shape[0]
|
||||
if geom_type in polys:
|
||||
exploded.loc[~exploded.geom_type.isin(polys), geom_col] = None
|
||||
elif geom_type in lines:
|
||||
exploded.loc[~exploded.geom_type.isin(lines), geom_col] = None
|
||||
elif geom_type in points:
|
||||
exploded.loc[~exploded.geom_type.isin(points), geom_col] = None
|
||||
else:
|
||||
raise TypeError(
|
||||
"`keep_geom_type` does not support {}.".format(geom_type)
|
||||
)
|
||||
num_dropped_collection = (
|
||||
orig_num_geoms_exploded - exploded.geometry.isna().sum()
|
||||
)
|
||||
|
||||
# level_0 created with above reset_index operation
|
||||
# and represents the original geometry collections
|
||||
# TODO avoiding dissolve to call union_all in this case could further
|
||||
# improve performance (we only need to collect geometries in their
|
||||
# respective Multi version)
|
||||
dissolved = exploded.dissolve(by="level_0")
|
||||
result.loc[is_collection, geom_col] = dissolved[geom_col].values
|
||||
else:
|
||||
num_dropped_collection = 0
|
||||
|
||||
# Now we filter all geometries (in theory we don't need to do this
|
||||
# again for the rows handled above for GeometryCollections, but filtering
|
||||
# them out is probably more expensive as simply including them when this
|
||||
# is typically about only a few rows)
|
||||
orig_num_geoms = result.shape[0]
|
||||
if geom_type in polys:
|
||||
result = result.loc[result.geom_type.isin(polys)]
|
||||
elif geom_type in lines:
|
||||
result = result.loc[result.geom_type.isin(lines)]
|
||||
elif geom_type in points:
|
||||
result = result.loc[result.geom_type.isin(points)]
|
||||
else:
|
||||
raise TypeError("`keep_geom_type` does not support {}.".format(geom_type))
|
||||
num_dropped = orig_num_geoms - result.shape[0]
|
||||
|
||||
if (num_dropped > 0 or num_dropped_collection > 0) and keep_geom_type_warning:
|
||||
warnings.warn(
|
||||
"`keep_geom_type=True` in overlay resulted in {} dropped "
|
||||
"geometries of different geometry types than df1 has. "
|
||||
"Set `keep_geom_type=False` to retain all "
|
||||
"geometries".format(num_dropped + num_dropped_collection),
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
result.reset_index(drop=True, inplace=True)
|
||||
return result
|
||||
@@ -0,0 +1,734 @@
|
||||
import warnings
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import PANDAS_GE_30
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
|
||||
|
||||
def sjoin(
|
||||
left_df,
|
||||
right_df,
|
||||
how="inner",
|
||||
predicate="intersects",
|
||||
lsuffix="left",
|
||||
rsuffix="right",
|
||||
distance=None,
|
||||
on_attribute=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Spatial join of two GeoDataFrames.
|
||||
|
||||
See the User Guide page :doc:`../../user_guide/mergingdata` for details.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left_df, right_df : GeoDataFrames
|
||||
how : string, default 'inner'
|
||||
The type of join:
|
||||
|
||||
* 'left': use keys from left_df; retain only left_df geometry column
|
||||
* 'right': use keys from right_df; retain only right_df geometry column
|
||||
* 'inner': use intersection of keys from both dfs; retain only
|
||||
left_df geometry column
|
||||
predicate : string, default 'intersects'
|
||||
Binary predicate. Valid values are determined by the spatial index used.
|
||||
You can check the valid values in left_df or right_df as
|
||||
``left_df.sindex.valid_query_predicates`` or
|
||||
``right_df.sindex.valid_query_predicates``
|
||||
Replaces deprecated ``op`` parameter.
|
||||
lsuffix : string, default 'left'
|
||||
Suffix to apply to overlapping column names (left GeoDataFrame).
|
||||
rsuffix : string, default 'right'
|
||||
Suffix to apply to overlapping column names (right GeoDataFrame).
|
||||
distance : number or array_like, optional
|
||||
Distance(s) around each input geometry within which to query the tree
|
||||
for the 'dwithin' predicate. If array_like, must be
|
||||
one-dimesional with length equal to length of left GeoDataFrame.
|
||||
Required if ``predicate='dwithin'``.
|
||||
on_attribute : string, list or tuple
|
||||
Column name(s) to join on as an additional join restriction on top
|
||||
of the spatial predicate. These must be found in both DataFrames.
|
||||
If set, observations are joined only if the predicate applies
|
||||
and values in specified columns match.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import geodatasets
|
||||
>>> chicago = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.chicago_health")
|
||||
... )
|
||||
>>> groceries = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.groceries")
|
||||
... ).to_crs(chicago.crs)
|
||||
|
||||
>>> chicago.head() # doctest: +SKIP
|
||||
ComAreaID ... geometry
|
||||
0 35 ... POLYGON ((-87.60914 41.84469, -87.60915 41.844...
|
||||
1 36 ... POLYGON ((-87.59215 41.81693, -87.59231 41.816...
|
||||
2 37 ... POLYGON ((-87.62880 41.80189, -87.62879 41.801...
|
||||
3 38 ... POLYGON ((-87.60671 41.81681, -87.60670 41.816...
|
||||
4 39 ... POLYGON ((-87.59215 41.81693, -87.59215 41.816...
|
||||
[5 rows x 87 columns]
|
||||
|
||||
>>> groceries.head() # doctest: +SKIP
|
||||
OBJECTID Ycoord ... Category geometry
|
||||
0 16 41.973266 ... NaN MULTIPOINT (-87.65661 41.97321)
|
||||
1 18 41.696367 ... NaN MULTIPOINT (-87.68136 41.69713)
|
||||
2 22 41.868634 ... NaN MULTIPOINT (-87.63918 41.86847)
|
||||
3 23 41.877590 ... new MULTIPOINT (-87.65495 41.87783)
|
||||
4 27 41.737696 ... NaN MULTIPOINT (-87.62715 41.73623)
|
||||
[5 rows x 8 columns]
|
||||
|
||||
>>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
|
||||
>>> groceries_w_communities.head() # doctest: +SKIP
|
||||
OBJECTID community geometry
|
||||
0 16 UPTOWN MULTIPOINT ((-87.65661 41.97321))
|
||||
1 18 MORGAN PARK MULTIPOINT ((-87.68136 41.69713))
|
||||
2 22 NEAR WEST SIDE MULTIPOINT ((-87.63918 41.86847))
|
||||
3 23 NEAR WEST SIDE MULTIPOINT ((-87.65495 41.87783))
|
||||
4 27 CHATHAM MULTIPOINT ((-87.62715 41.73623))
|
||||
[5 rows x 95 columns]
|
||||
|
||||
See also
|
||||
--------
|
||||
overlay : overlay operation resulting in a new geometry
|
||||
GeoDataFrame.sjoin : equivalent method
|
||||
|
||||
Notes
|
||||
-----
|
||||
Every operation in GeoPandas is planar, i.e. the potential third
|
||||
dimension is not taken into account.
|
||||
"""
|
||||
if kwargs:
|
||||
first = next(iter(kwargs.keys()))
|
||||
raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")
|
||||
|
||||
on_attribute = _maybe_make_list(on_attribute)
|
||||
|
||||
_basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),
|
||||
|
||||
indices = _geom_predicate_query(
|
||||
left_df, right_df, predicate, distance, on_attribute=on_attribute
|
||||
)
|
||||
|
||||
joined, _ = _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
None,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
predicate,
|
||||
on_attribute=on_attribute,
|
||||
)
|
||||
|
||||
return joined
|
||||
|
||||
|
||||
def _maybe_make_list(obj):
|
||||
if isinstance(obj, tuple):
|
||||
return list(obj)
|
||||
if obj is not None and not isinstance(obj, list):
|
||||
return [obj]
|
||||
return obj
|
||||
|
||||
|
||||
def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
|
||||
"""Checks the validity of join input parameters.
|
||||
|
||||
`how` must be one of the valid options.
|
||||
`'index_'` concatenated with `lsuffix` or `rsuffix` must not already
|
||||
exist as columns in the left or right data frames.
|
||||
|
||||
Parameters
|
||||
------------
|
||||
left_df : GeoDataFrame
|
||||
right_df : GeoData Frame
|
||||
how : str, one of 'left', 'right', 'inner'
|
||||
join type
|
||||
lsuffix : str
|
||||
left index suffix
|
||||
rsuffix : str
|
||||
right index suffix
|
||||
on_attribute : list, default None
|
||||
list of column names to merge on along with geometry
|
||||
"""
|
||||
if not isinstance(left_df, GeoDataFrame):
|
||||
raise ValueError(
|
||||
"'left_df' should be GeoDataFrame, got {}".format(type(left_df))
|
||||
)
|
||||
|
||||
if not isinstance(right_df, GeoDataFrame):
|
||||
raise ValueError(
|
||||
"'right_df' should be GeoDataFrame, got {}".format(type(right_df))
|
||||
)
|
||||
|
||||
allowed_hows = ["left", "right", "inner"]
|
||||
if how not in allowed_hows:
|
||||
raise ValueError(
|
||||
'`how` was "{}" but is expected to be in {}'.format(how, allowed_hows)
|
||||
)
|
||||
|
||||
if not _check_crs(left_df, right_df):
|
||||
_crs_mismatch_warn(left_df, right_df, stacklevel=4)
|
||||
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
if (attr not in left_df) and (attr not in right_df):
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from both of the dataframes."
|
||||
)
|
||||
if attr not in left_df:
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from the left dataframe."
|
||||
)
|
||||
if attr not in right_df:
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from the right dataframe."
|
||||
)
|
||||
if attr in (left_df.geometry.name, right_df.geometry.name):
|
||||
raise ValueError(
|
||||
"Active geometry column cannot be used as an input "
|
||||
"for on_attribute parameter."
|
||||
)
|
||||
|
||||
|
||||
def _geom_predicate_query(left_df, right_df, predicate, distance, on_attribute=None):
|
||||
"""Compute geometric comparisons and get matching indices.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left_df : GeoDataFrame
|
||||
right_df : GeoDataFrame
|
||||
predicate : string
|
||||
Binary predicate to query.
|
||||
on_attribute: list, default None
|
||||
list of column names to merge on along with geometry
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
DataFrame with matching indices in
|
||||
columns named `_key_left` and `_key_right`.
|
||||
"""
|
||||
|
||||
original_predicate = predicate
|
||||
|
||||
if predicate == "within":
|
||||
# within is implemented as the inverse of contains
|
||||
# contains is a faster predicate
|
||||
# see discussion at https://github.com/geopandas/geopandas/pull/1421
|
||||
predicate = "contains"
|
||||
sindex = left_df.sindex
|
||||
input_geoms = right_df.geometry
|
||||
else:
|
||||
# all other predicates are symmetric
|
||||
# keep them the same
|
||||
sindex = right_df.sindex
|
||||
input_geoms = left_df.geometry
|
||||
|
||||
if sindex:
|
||||
l_idx, r_idx = sindex.query(
|
||||
input_geoms, predicate=predicate, sort=False, distance=distance
|
||||
)
|
||||
else:
|
||||
# when sindex is empty / has no valid geometries
|
||||
l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
|
||||
|
||||
if original_predicate == "within":
|
||||
# within is implemented as the inverse of contains
|
||||
# flip back the results
|
||||
r_idx, l_idx = l_idx, r_idx
|
||||
indexer = np.lexsort((r_idx, l_idx))
|
||||
l_idx = l_idx[indexer]
|
||||
r_idx = r_idx[indexer]
|
||||
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
(l_idx, r_idx), _ = _filter_shared_attribute(
|
||||
left_df, right_df, l_idx, r_idx, attr
|
||||
)
|
||||
|
||||
return l_idx, r_idx
|
||||
|
||||
|
||||
def _reset_index_with_suffix(df, suffix, other):
|
||||
"""
|
||||
Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
|
||||
column names.
|
||||
"""
|
||||
index_original = df.index.names
|
||||
if PANDAS_GE_30:
|
||||
df_reset = df.reset_index()
|
||||
else:
|
||||
# we already made a copy of the dataframe in _frame_join before getting here
|
||||
df_reset = df
|
||||
df_reset.reset_index(inplace=True)
|
||||
column_names = df_reset.columns.to_numpy(copy=True)
|
||||
for i, label in enumerate(index_original):
|
||||
# if the original label was None, add suffix to auto-generated name
|
||||
if label is None:
|
||||
new_label = column_names[i]
|
||||
if "level" in new_label:
|
||||
# reset_index of MultiIndex gives "level_i" names, preserve the "i"
|
||||
lev = new_label.split("_")[1]
|
||||
new_label = f"index_{suffix}{lev}"
|
||||
else:
|
||||
new_label = f"index_{suffix}"
|
||||
# check new label will not be in other dataframe
|
||||
if new_label in df.columns or new_label in other.columns:
|
||||
raise ValueError(
|
||||
"'{0}' cannot be a column name in the frames being"
|
||||
" joined".format(new_label)
|
||||
)
|
||||
column_names[i] = new_label
|
||||
return df_reset, pd.Index(column_names)
|
||||
|
||||
|
||||
def _process_column_names_with_suffix(
|
||||
left: pd.Index, right: pd.Index, suffixes, left_df, right_df
|
||||
):
|
||||
"""
|
||||
Add suffixes to overlapping labels (ignoring the geometry column).
|
||||
|
||||
This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
|
||||
a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
|
||||
"""
|
||||
to_rename = left.intersection(right)
|
||||
if len(to_rename) == 0:
|
||||
return left, right
|
||||
|
||||
lsuffix, rsuffix = suffixes
|
||||
|
||||
if not lsuffix and not rsuffix:
|
||||
raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
|
||||
|
||||
def renamer(x, suffix, geometry):
|
||||
if x in to_rename and x != geometry and suffix is not None:
|
||||
return f"{x}_{suffix}"
|
||||
return x
|
||||
|
||||
lrenamer = partial(
|
||||
renamer,
|
||||
suffix=lsuffix,
|
||||
geometry=getattr(left_df, "_geometry_column_name", None),
|
||||
)
|
||||
rrenamer = partial(
|
||||
renamer,
|
||||
suffix=rsuffix,
|
||||
geometry=getattr(right_df, "_geometry_column_name", None),
|
||||
)
|
||||
|
||||
# TODO retain index name?
|
||||
left_renamed = pd.Index([lrenamer(lab) for lab in left])
|
||||
right_renamed = pd.Index([rrenamer(lab) for lab in right])
|
||||
|
||||
dups = []
|
||||
if not left_renamed.is_unique:
|
||||
# Only warn when duplicates are caused because of suffixes, already duplicated
|
||||
# columns in origin should not warn
|
||||
dups = left_renamed[(left_renamed.duplicated()) & (~left.duplicated())].tolist()
|
||||
if not right_renamed.is_unique:
|
||||
dups.extend(
|
||||
right_renamed[(right_renamed.duplicated()) & (~right.duplicated())].tolist()
|
||||
)
|
||||
# TODO turn this into an error (pandas has done so as well)
|
||||
if dups:
|
||||
warnings.warn(
|
||||
f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
|
||||
f"result is deprecated and will raise a MergeError in a future version.",
|
||||
FutureWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
return left_renamed, right_renamed
|
||||
|
||||
|
||||
def _restore_index(joined, index_names, index_names_original):
|
||||
"""
|
||||
Set back the the original index columns, and restoring their name as `None`
|
||||
if they didn't have a name originally.
|
||||
"""
|
||||
if PANDAS_GE_30:
|
||||
joined = joined.set_index(list(index_names))
|
||||
else:
|
||||
joined.set_index(list(index_names), inplace=True)
|
||||
|
||||
# restore the fact that the index didn't have a name
|
||||
joined_index_names = list(joined.index.names)
|
||||
for i, label in enumerate(index_names_original):
|
||||
if label is None:
|
||||
joined_index_names[i] = None
|
||||
joined.index.names = joined_index_names
|
||||
return joined
|
||||
|
||||
|
||||
def _adjust_indexers(indices, distances, original_length, how, predicate):
|
||||
"""
|
||||
The left/right indexers from the query represents an inner join.
|
||||
For a left or right join, we need to adjust them to include the rows
|
||||
that would not be present in an inner join.
|
||||
"""
|
||||
# the indices represent an inner join, no adjustment needed
|
||||
if how == "inner":
|
||||
return indices, distances
|
||||
|
||||
l_idx, r_idx = indices
|
||||
|
||||
if how == "right":
|
||||
# re-sort so it is sorted by the right indexer
|
||||
indexer = np.lexsort((l_idx, r_idx))
|
||||
l_idx, r_idx = l_idx[indexer], r_idx[indexer]
|
||||
if distances is not None:
|
||||
distances = distances[indexer]
|
||||
|
||||
# switch order
|
||||
r_idx, l_idx = l_idx, r_idx
|
||||
|
||||
# determine which indices are missing and where they would need to be inserted
|
||||
idx = np.arange(original_length)
|
||||
l_idx_missing = idx[~np.isin(idx, l_idx)]
|
||||
insert_idx = np.searchsorted(l_idx, l_idx_missing)
|
||||
# for the left indexer, insert those missing indices
|
||||
l_idx = np.insert(l_idx, insert_idx, l_idx_missing)
|
||||
# for the right indexer, insert -1 -> to get missing values in pandas' reindexing
|
||||
r_idx = np.insert(r_idx, insert_idx, -1)
|
||||
# for the indices, already insert those missing values manually
|
||||
if distances is not None:
|
||||
distances = np.insert(distances, insert_idx, np.nan)
|
||||
|
||||
if how == "right":
|
||||
# switch back
|
||||
l_idx, r_idx = r_idx, l_idx
|
||||
|
||||
return (l_idx, r_idx), distances
|
||||
|
||||
|
||||
def _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
distances,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
predicate,
|
||||
on_attribute=None,
|
||||
):
|
||||
"""Join the GeoDataFrames at the DataFrame level.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left_df : GeoDataFrame
|
||||
right_df : GeoDataFrame
|
||||
indices : tuple of ndarray
|
||||
Indices returned by the geometric join. Tuple with with integer
|
||||
indices representing the matches from `left_df` and `right_df`
|
||||
respectively.
|
||||
distances : ndarray, optional
|
||||
Passed trough and adapted based on the indices, if needed.
|
||||
how : string
|
||||
The type of join to use on the DataFrame level.
|
||||
lsuffix : string
|
||||
Suffix to apply to overlapping column names (left GeoDataFrame).
|
||||
rsuffix : string
|
||||
Suffix to apply to overlapping column names (right GeoDataFrame).
|
||||
on_attribute: list, default None
|
||||
list of column names to merge on along with geometry
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
Joined GeoDataFrame.
|
||||
"""
|
||||
if on_attribute: # avoid renaming or duplicating shared column
|
||||
right_df = right_df.drop(on_attribute, axis=1)
|
||||
|
||||
if how in ("inner", "left"):
|
||||
right_df = right_df.drop(right_df.geometry.name, axis=1)
|
||||
else: # how == 'right':
|
||||
left_df = left_df.drop(left_df.geometry.name, axis=1)
|
||||
|
||||
left_df = left_df.copy(deep=False)
|
||||
left_nlevels = left_df.index.nlevels
|
||||
left_index_original = left_df.index.names
|
||||
left_df, left_column_names = _reset_index_with_suffix(left_df, lsuffix, right_df)
|
||||
|
||||
right_df = right_df.copy(deep=False)
|
||||
right_nlevels = right_df.index.nlevels
|
||||
right_index_original = right_df.index.names
|
||||
right_df, right_column_names = _reset_index_with_suffix(right_df, rsuffix, left_df)
|
||||
|
||||
# if conflicting names in left and right, add suffix
|
||||
left_column_names, right_column_names = _process_column_names_with_suffix(
|
||||
left_column_names,
|
||||
right_column_names,
|
||||
(lsuffix, rsuffix),
|
||||
left_df,
|
||||
right_df,
|
||||
)
|
||||
left_df.columns = left_column_names
|
||||
right_df.columns = right_column_names
|
||||
left_index = left_df.columns[:left_nlevels]
|
||||
right_index = right_df.columns[:right_nlevels]
|
||||
|
||||
# perform join on the dataframes
|
||||
original_length = len(right_df) if how == "right" else len(left_df)
|
||||
(l_idx, r_idx), distances = _adjust_indexers(
|
||||
indices, distances, original_length, how, predicate
|
||||
)
|
||||
# the `take` method doesn't allow introducing NaNs with -1 indices
|
||||
# left = left_df.take(l_idx)
|
||||
# therefore we are using the private _reindex_with_indexers as workaround
|
||||
new_index = pd.RangeIndex(len(l_idx))
|
||||
left = left_df._reindex_with_indexers({0: (new_index, l_idx)})
|
||||
right = right_df._reindex_with_indexers({0: (new_index, r_idx)})
|
||||
if PANDAS_GE_30:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = dict(copy=False)
|
||||
joined = pd.concat([left, right], axis=1, **kwargs)
|
||||
|
||||
if how in ("inner", "left"):
|
||||
joined = _restore_index(joined, left_index, left_index_original)
|
||||
else: # how == 'right':
|
||||
joined = joined.set_geometry(right_df.geometry.name)
|
||||
joined = _restore_index(joined, right_index, right_index_original)
|
||||
|
||||
return joined, distances
|
||||
|
||||
|
||||
def _nearest_query(
|
||||
left_df: GeoDataFrame,
|
||||
right_df: GeoDataFrame,
|
||||
max_distance: float,
|
||||
how: str,
|
||||
return_distance: bool,
|
||||
exclusive: bool,
|
||||
on_attribute: Optional[list] = None,
|
||||
):
|
||||
# use the opposite of the join direction for the index
|
||||
use_left_as_sindex = how == "right"
|
||||
if use_left_as_sindex:
|
||||
sindex = left_df.sindex
|
||||
query = right_df.geometry
|
||||
else:
|
||||
sindex = right_df.sindex
|
||||
query = left_df.geometry
|
||||
if sindex:
|
||||
res = sindex.nearest(
|
||||
query,
|
||||
return_all=True,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
exclusive=exclusive,
|
||||
)
|
||||
if return_distance:
|
||||
(input_idx, tree_idx), distances = res
|
||||
else:
|
||||
(input_idx, tree_idx) = res
|
||||
distances = None
|
||||
if use_left_as_sindex:
|
||||
l_idx, r_idx = tree_idx, input_idx
|
||||
sort_order = np.argsort(l_idx, kind="stable")
|
||||
l_idx, r_idx = l_idx[sort_order], r_idx[sort_order]
|
||||
if distances is not None:
|
||||
distances = distances[sort_order]
|
||||
else:
|
||||
l_idx, r_idx = input_idx, tree_idx
|
||||
else:
|
||||
# when sindex is empty / has no valid geometries
|
||||
l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
|
||||
if return_distance:
|
||||
distances = np.array([], dtype=np.float64)
|
||||
else:
|
||||
distances = None
|
||||
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
(l_idx, r_idx), shared_attribute_rows = _filter_shared_attribute(
|
||||
left_df, right_df, l_idx, r_idx, attr
|
||||
)
|
||||
distances = distances[shared_attribute_rows]
|
||||
|
||||
return (l_idx, r_idx), distances
|
||||
|
||||
|
||||
def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
|
||||
"""
|
||||
Returns the indices for the left and right dataframe that share the same entry
|
||||
in the attribute column. Also returns a Boolean `shared_attribute_rows` for rows
|
||||
with the same entry.
|
||||
"""
|
||||
shared_attribute_rows = (
|
||||
left_df[attribute].iloc[l_idx].values == right_df[attribute].iloc[r_idx].values
|
||||
)
|
||||
|
||||
l_idx = l_idx[shared_attribute_rows]
|
||||
r_idx = r_idx[shared_attribute_rows]
|
||||
return (l_idx, r_idx), shared_attribute_rows
|
||||
|
||||
|
||||
def sjoin_nearest(
|
||||
left_df: GeoDataFrame,
|
||||
right_df: GeoDataFrame,
|
||||
how: str = "inner",
|
||||
max_distance: Optional[float] = None,
|
||||
lsuffix: str = "left",
|
||||
rsuffix: str = "right",
|
||||
distance_col: Optional[str] = None,
|
||||
exclusive: bool = False,
|
||||
) -> GeoDataFrame:
|
||||
"""Spatial join of two GeoDataFrames based on the distance between their geometries.
|
||||
|
||||
Results will include multiple output records for a single input record
|
||||
where there are multiple equidistant nearest or intersected neighbors.
|
||||
|
||||
Distance is calculated in CRS units and can be returned using the
|
||||
`distance_col` parameter.
|
||||
|
||||
See the User Guide page
|
||||
https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html
|
||||
for more details.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left_df, right_df : GeoDataFrames
|
||||
how : string, default 'inner'
|
||||
The type of join:
|
||||
|
||||
* 'left': use keys from left_df; retain only left_df geometry column
|
||||
* 'right': use keys from right_df; retain only right_df geometry column
|
||||
* 'inner': use intersection of keys from both dfs; retain only
|
||||
left_df geometry column
|
||||
max_distance : float, default None
|
||||
Maximum distance within which to query for nearest geometry.
|
||||
Must be greater than 0.
|
||||
The max_distance used to search for nearest items in the tree may have a
|
||||
significant impact on performance by reducing the number of input
|
||||
geometries that are evaluated for nearest items in the tree.
|
||||
lsuffix : string, default 'left'
|
||||
Suffix to apply to overlapping column names (left GeoDataFrame).
|
||||
rsuffix : string, default 'right'
|
||||
Suffix to apply to overlapping column names (right GeoDataFrame).
|
||||
distance_col : string, default None
|
||||
If set, save the distances computed between matching geometries under a
|
||||
column of this name in the joined GeoDataFrame.
|
||||
exclusive : bool, default False
|
||||
If True, the nearest geometries that are equal to the input geometry
|
||||
will not be returned, default False.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import geodatasets
|
||||
>>> groceries = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.groceries")
|
||||
... )
|
||||
>>> chicago = geopandas.read_file(
|
||||
... geodatasets.get_path("geoda.chicago_health")
|
||||
... ).to_crs(groceries.crs)
|
||||
|
||||
>>> chicago.head() # doctest: +SKIP
|
||||
ComAreaID ... geometry
|
||||
0 35 ... POLYGON ((-87.60914 41.84469, -87.60915 41.844...
|
||||
1 36 ... POLYGON ((-87.59215 41.81693, -87.59231 41.816...
|
||||
2 37 ... POLYGON ((-87.62880 41.80189, -87.62879 41.801...
|
||||
3 38 ... POLYGON ((-87.60671 41.81681, -87.60670 41.816...
|
||||
4 39 ... POLYGON ((-87.59215 41.81693, -87.59215 41.816...
|
||||
[5 rows x 87 columns]
|
||||
|
||||
>>> groceries.head() # doctest: +SKIP
|
||||
OBJECTID Ycoord ... Category geometry
|
||||
0 16 41.973266 ... NaN MULTIPOINT ((-87.65661 41.97321))
|
||||
1 18 41.696367 ... NaN MULTIPOINT ((-87.68136 41.69713))
|
||||
2 22 41.868634 ... NaN MULTIPOINT ((-87.63918 41.86847))
|
||||
3 23 41.877590 ... new MULTIPOINT ((-87.65495 41.87783))
|
||||
4 27 41.737696 ... NaN MULTIPOINT ((-87.62715 41.73623))
|
||||
[5 rows x 8 columns]
|
||||
|
||||
>>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago)
|
||||
>>> groceries_w_communities[["Chain", "community", "geometry"]].head(2)
|
||||
Chain community geometry
|
||||
0 VIET HOA PLAZA UPTOWN MULTIPOINT ((1168268.672 1933554.35))
|
||||
1 COUNTY FAIR FOODS MORGAN PARK MULTIPOINT ((1162302.618 1832900.224))
|
||||
|
||||
|
||||
To include the distances:
|
||||
|
||||
>>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago, \
|
||||
distance_col="distances")
|
||||
>>> groceries_w_communities[["Chain", "community", \
|
||||
"distances"]].head(2)
|
||||
Chain community distances
|
||||
0 VIET HOA PLAZA UPTOWN 0.0
|
||||
1 COUNTY FAIR FOODS MORGAN PARK 0.0
|
||||
|
||||
In the following example, we get multiple groceries for Uptown because all
|
||||
results are equidistant (in this case zero because they intersect).
|
||||
In fact, we get 4 results in total:
|
||||
|
||||
>>> chicago_w_groceries = geopandas.sjoin_nearest(groceries, chicago, \
|
||||
distance_col="distances", how="right")
|
||||
>>> uptown_results = \
|
||||
chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
|
||||
>>> uptown_results[["Chain", "community"]]
|
||||
Chain community
|
||||
30 VIET HOA PLAZA UPTOWN
|
||||
30 JEWEL OSCO UPTOWN
|
||||
30 TARGET UPTOWN
|
||||
30 Mariano's UPTOWN
|
||||
|
||||
See also
|
||||
--------
|
||||
sjoin : binary predicate joins
|
||||
GeoDataFrame.sjoin_nearest : equivalent method
|
||||
|
||||
Notes
|
||||
-----
|
||||
Since this join relies on distances, results will be inaccurate
|
||||
if your geometries are in a geographic CRS.
|
||||
|
||||
Every operation in GeoPandas is planar, i.e. the potential third
|
||||
dimension is not taken into account.
|
||||
"""
|
||||
|
||||
_basic_checks(left_df, right_df, how, lsuffix, rsuffix)
|
||||
|
||||
left_df.geometry.values.check_geographic_crs(stacklevel=1)
|
||||
right_df.geometry.values.check_geographic_crs(stacklevel=1)
|
||||
|
||||
return_distance = distance_col is not None
|
||||
|
||||
indices, distances = _nearest_query(
|
||||
left_df,
|
||||
right_df,
|
||||
max_distance,
|
||||
how,
|
||||
return_distance,
|
||||
exclusive,
|
||||
)
|
||||
joined, distances = _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
distances,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
None,
|
||||
)
|
||||
|
||||
if return_distance:
|
||||
joined[distance_col] = distances
|
||||
|
||||
return joined
|
||||
@@ -0,0 +1,484 @@
|
||||
"""Tests for the clip module."""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import (
|
||||
GeometryCollection,
|
||||
LinearRing,
|
||||
LineString,
|
||||
MultiPoint,
|
||||
Point,
|
||||
Polygon,
|
||||
box,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries, clip
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.tools.clip import _mask_is_list_like_rectangle
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from pandas.testing import assert_index_equal
|
||||
|
||||
mask_variants_single_rectangle = [
|
||||
"single_rectangle_gdf",
|
||||
"single_rectangle_gdf_list_bounds",
|
||||
"single_rectangle_gdf_tuple_bounds",
|
||||
"single_rectangle_gdf_array_bounds",
|
||||
]
|
||||
mask_variants_large_rectangle = [
|
||||
"larger_single_rectangle_gdf",
|
||||
"larger_single_rectangle_gdf_bounds",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def point_gdf():
|
||||
"""Create a point GeoDataFrame."""
|
||||
pts = np.array([[2, 2], [3, 4], [9, 8], [-12, -15]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def point_gdf2():
|
||||
"""Create a point GeoDataFrame."""
|
||||
pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pointsoutside_nooverlap_gdf():
|
||||
"""Create a point GeoDataFrame. Its points are all outside the single
|
||||
rectangle, and its bounds are outside the single rectangle's."""
|
||||
pts = np.array([[5, 15], [15, 15], [15, 20]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pointsoutside_overlap_gdf():
|
||||
"""Create a point GeoDataFrame. Its points are all outside the single
|
||||
rectangle, and its bounds are overlapping the single rectangle's."""
|
||||
pts = np.array([[5, 15], [15, 15], [15, 5]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_rectangle_gdf():
|
||||
"""Create a single rectangle for clipping."""
|
||||
poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
|
||||
gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
|
||||
gdf["attr2"] = "site-boundary"
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_rectangle_gdf_tuple_bounds(single_rectangle_gdf):
|
||||
"""Bounds of the created single rectangle"""
|
||||
return tuple(single_rectangle_gdf.total_bounds)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_rectangle_gdf_list_bounds(single_rectangle_gdf):
|
||||
"""Bounds of the created single rectangle"""
|
||||
return list(single_rectangle_gdf.total_bounds)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_rectangle_gdf_array_bounds(single_rectangle_gdf):
|
||||
"""Bounds of the created single rectangle"""
|
||||
return single_rectangle_gdf.total_bounds
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def larger_single_rectangle_gdf():
|
||||
"""Create a slightly larger rectangle for clipping.
|
||||
The smaller single rectangle is used to test the edge case where slivers
|
||||
are returned when you clip polygons. This fixture is larger which
|
||||
eliminates the slivers in the clip return.
|
||||
"""
|
||||
poly_inters = Polygon([(-5, -5), (-5, 15), (15, 15), (15, -5), (-5, -5)])
|
||||
gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
|
||||
gdf["attr2"] = ["study area"]
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def larger_single_rectangle_gdf_bounds(larger_single_rectangle_gdf):
|
||||
"""Bounds of the created single rectangle"""
|
||||
return tuple(larger_single_rectangle_gdf.total_bounds)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def buffered_locations(point_gdf):
|
||||
"""Buffer points to create a multi-polygon."""
|
||||
buffered_locs = point_gdf
|
||||
buffered_locs["geometry"] = buffered_locs.buffer(4)
|
||||
buffered_locs["type"] = "plot"
|
||||
return buffered_locs
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def donut_geometry(buffered_locations, single_rectangle_gdf):
|
||||
"""Make a geometry with a hole in the middle (a donut)."""
|
||||
donut = geopandas.overlay(
|
||||
buffered_locations, single_rectangle_gdf, how="symmetric_difference"
|
||||
)
|
||||
return donut
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def two_line_gdf():
|
||||
"""Create Line Objects For Testing"""
|
||||
linea = LineString([(1, 1), (2, 2), (3, 2), (5, 3)])
|
||||
lineb = LineString([(3, 4), (5, 7), (12, 2), (10, 5), (9, 7.5)])
|
||||
gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_poly_gdf(donut_geometry):
|
||||
"""Create a multi-polygon GeoDataFrame."""
|
||||
multi_poly = donut_geometry.union_all()
|
||||
out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
|
||||
out_df["attr"] = ["pool"]
|
||||
return out_df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_line(two_line_gdf):
|
||||
"""Create a multi-line GeoDataFrame.
|
||||
This GDF has one multiline and one regular line."""
|
||||
# Create a single and multi line object
|
||||
multiline_feat = two_line_gdf.union_all()
|
||||
linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
|
||||
out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
|
||||
out_df["attr"] = ["road", "stream"]
|
||||
return out_df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multi_point(point_gdf):
|
||||
"""Create a multi-point GeoDataFrame."""
|
||||
multi_point = point_gdf.union_all()
|
||||
out_df = GeoDataFrame(
|
||||
geometry=GeoSeries(
|
||||
[multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
|
||||
),
|
||||
crs="EPSG:3857",
|
||||
)
|
||||
out_df["attr"] = ["tree", "another tree", "shrub", "berries"]
|
||||
return out_df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_gdf():
|
||||
"""Create a Mixed Polygon and LineString For Testing"""
|
||||
point = Point(2, 3)
|
||||
line = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
|
||||
poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
|
||||
ring = LinearRing([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
|
||||
gdf = GeoDataFrame(
|
||||
[1, 2, 3, 4], geometry=[point, poly, line, ring], crs="EPSG:3857"
|
||||
)
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def geomcol_gdf():
|
||||
"""Create a Mixed Polygon and LineString For Testing"""
|
||||
point = Point(2, 3)
|
||||
poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
|
||||
coll = GeometryCollection([point, poly])
|
||||
gdf = GeoDataFrame([1], geometry=[coll], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sliver_line():
|
||||
"""Create a line that will create a point when clipped."""
|
||||
linea = LineString([(10, 5), (13, 5), (15, 5)])
|
||||
lineb = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
|
||||
gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
def test_not_gdf(single_rectangle_gdf):
|
||||
"""Non-GeoDataFrame inputs raise attribute errors."""
|
||||
with pytest.raises(TypeError):
|
||||
clip((2, 3), single_rectangle_gdf)
|
||||
with pytest.raises(TypeError):
|
||||
clip(single_rectangle_gdf, "foobar")
|
||||
with pytest.raises(TypeError):
|
||||
clip(single_rectangle_gdf, (1, 2, 3))
|
||||
with pytest.raises(TypeError):
|
||||
clip(single_rectangle_gdf, (1, 2, 3, 4, 5))
|
||||
|
||||
|
||||
def test_non_overlapping_geoms():
|
||||
"""Test that a bounding box returns empty if the extents don't overlap"""
|
||||
unit_box = Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])
|
||||
unit_gdf = GeoDataFrame([1], geometry=[unit_box], crs="EPSG:3857")
|
||||
non_overlapping_gdf = unit_gdf.copy()
|
||||
non_overlapping_gdf = non_overlapping_gdf.geometry.apply(
|
||||
lambda x: shapely.affinity.translate(x, xoff=20)
|
||||
)
|
||||
out = clip(unit_gdf, non_overlapping_gdf)
|
||||
assert_geodataframe_equal(out, unit_gdf.iloc[:0])
|
||||
out2 = clip(unit_gdf.geometry, non_overlapping_gdf)
|
||||
assert_geoseries_equal(out2, GeoSeries(crs=unit_gdf.crs))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mask_fixture_name", mask_variants_single_rectangle)
|
||||
class TestClipWithSingleRectangleGdf:
|
||||
@pytest.fixture
|
||||
def mask(self, mask_fixture_name, request):
|
||||
return request.getfixturevalue(mask_fixture_name)
|
||||
|
||||
def test_returns_gdf(self, point_gdf, mask):
|
||||
"""Test that function returns a GeoDataFrame (or GDF-like) object."""
|
||||
out = clip(point_gdf, mask)
|
||||
assert isinstance(out, GeoDataFrame)
|
||||
|
||||
def test_returns_series(self, point_gdf, mask):
|
||||
"""Test that function returns a GeoSeries if GeoSeries is passed."""
|
||||
out = clip(point_gdf.geometry, mask)
|
||||
assert isinstance(out, GeoSeries)
|
||||
|
||||
def test_clip_points(self, point_gdf, mask):
|
||||
"""Test clipping a points GDF with a generic polygon geometry."""
|
||||
clip_pts = clip(point_gdf, mask)
|
||||
pts = np.array([[2, 2], [3, 4], [9, 8]])
|
||||
exp = GeoDataFrame(
|
||||
[Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857"
|
||||
)
|
||||
assert_geodataframe_equal(clip_pts, exp)
|
||||
|
||||
def test_clip_points_geom_col_rename(self, point_gdf, mask):
|
||||
"""Test clipping a points GDF with a generic polygon geometry."""
|
||||
point_gdf_geom_col_rename = point_gdf.rename_geometry("geometry2")
|
||||
clip_pts = clip(point_gdf_geom_col_rename, mask)
|
||||
pts = np.array([[2, 2], [3, 4], [9, 8]])
|
||||
exp = GeoDataFrame(
|
||||
[Point(xy) for xy in pts],
|
||||
columns=["geometry2"],
|
||||
crs="EPSG:3857",
|
||||
geometry="geometry2",
|
||||
)
|
||||
assert_geodataframe_equal(clip_pts, exp)
|
||||
|
||||
def test_clip_poly(self, buffered_locations, mask):
|
||||
"""Test clipping a polygon GDF with a generic polygon geometry."""
|
||||
clipped_poly = clip(buffered_locations, mask)
|
||||
assert len(clipped_poly.geometry) == 3
|
||||
assert all(clipped_poly.geom_type == "Polygon")
|
||||
|
||||
def test_clip_poly_geom_col_rename(self, buffered_locations, mask):
|
||||
"""Test clipping a polygon GDF with a generic polygon geometry."""
|
||||
|
||||
poly_gdf_geom_col_rename = buffered_locations.rename_geometry("geometry2")
|
||||
clipped_poly = clip(poly_gdf_geom_col_rename, mask)
|
||||
assert len(clipped_poly.geometry) == 3
|
||||
assert "geometry" not in clipped_poly.keys()
|
||||
assert "geometry2" in clipped_poly.keys()
|
||||
|
||||
def test_clip_poly_series(self, buffered_locations, mask):
|
||||
"""Test clipping a polygon GDF with a generic polygon geometry."""
|
||||
clipped_poly = clip(buffered_locations.geometry, mask)
|
||||
assert len(clipped_poly) == 3
|
||||
assert all(clipped_poly.geom_type == "Polygon")
|
||||
|
||||
def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask):
|
||||
"""Test a multi poly object where the return includes a sliver.
|
||||
Also the bounds of the object should == the bounds of the clip object
|
||||
if they fully overlap (as they do in these fixtures)."""
|
||||
clipped = clip(multi_poly_gdf, mask, keep_geom_type=True)
|
||||
expected_bounds = (
|
||||
mask if _mask_is_list_like_rectangle(mask) else mask.total_bounds
|
||||
)
|
||||
assert np.array_equal(clipped.total_bounds, expected_bounds)
|
||||
# Assert returned data is a not geometry collection
|
||||
assert (clipped.geom_type.isin(["Polygon", "MultiPolygon"])).all()
|
||||
|
||||
def test_clip_multiline(self, multi_line, mask):
|
||||
"""Test that clipping a multiline feature with a poly returns expected
|
||||
output."""
|
||||
clipped = clip(multi_line, mask)
|
||||
assert clipped.geom_type[0] == "MultiLineString"
|
||||
|
||||
def test_clip_multipoint(self, multi_point, mask):
|
||||
"""Clipping a multipoint feature with a polygon works as expected.
|
||||
should return a geodataframe with a single multi point feature"""
|
||||
clipped = clip(multi_point, mask)
|
||||
assert clipped.geom_type[0] == "MultiPoint"
|
||||
assert hasattr(clipped, "attr")
|
||||
# All points should intersect the clip geom
|
||||
assert len(clipped) == 2
|
||||
clipped_mutltipoint = MultiPoint(
|
||||
[
|
||||
Point(2, 2),
|
||||
Point(3, 4),
|
||||
Point(9, 8),
|
||||
]
|
||||
)
|
||||
assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
|
||||
shape_for_points = (
|
||||
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
|
||||
)
|
||||
assert all(clipped.intersects(shape_for_points))
|
||||
|
||||
def test_clip_lines(self, two_line_gdf, mask):
|
||||
"""Test what happens when you give the clip_extent a line GDF."""
|
||||
clip_line = clip(two_line_gdf, mask)
|
||||
assert len(clip_line.geometry) == 2
|
||||
|
||||
def test_mixed_geom(self, mixed_gdf, mask):
|
||||
"""Test clipping a mixed GeoDataFrame"""
|
||||
clipped = clip(mixed_gdf, mask)
|
||||
assert (
|
||||
clipped.geom_type[0] == "Point"
|
||||
and clipped.geom_type[1] == "Polygon"
|
||||
and clipped.geom_type[2] == "LineString"
|
||||
)
|
||||
|
||||
def test_mixed_series(self, mixed_gdf, mask):
|
||||
"""Test clipping a mixed GeoSeries"""
|
||||
clipped = clip(mixed_gdf.geometry, mask)
|
||||
assert (
|
||||
clipped.geom_type[0] == "Point"
|
||||
and clipped.geom_type[1] == "Polygon"
|
||||
and clipped.geom_type[2] == "LineString"
|
||||
)
|
||||
|
||||
def test_clip_with_line_extra_geom(self, sliver_line, mask):
|
||||
"""When the output of a clipped line returns a geom collection,
|
||||
and keep_geom_type is True, no geometry collections should be returned."""
|
||||
clipped = clip(sliver_line, mask, keep_geom_type=True)
|
||||
assert len(clipped.geometry) == 1
|
||||
# Assert returned data is a not geometry collection
|
||||
assert not (clipped.geom_type == "GeometryCollection").any()
|
||||
|
||||
def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask):
|
||||
"""Test clip when intersection is empty and boxes do not overlap."""
|
||||
clipped = clip(pointsoutside_nooverlap_gdf, mask)
|
||||
assert len(clipped) == 0
|
||||
|
||||
def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask):
|
||||
"""Test clip when intersection is empty and boxes do overlap."""
|
||||
clipped = clip(pointsoutside_overlap_gdf, mask)
|
||||
assert len(clipped) == 0
|
||||
|
||||
def test_warning_extra_geoms_mixed(self, mixed_gdf, mask):
|
||||
"""Test the correct warnings are raised if keep_geom_type is
|
||||
called on a mixed GDF"""
|
||||
with pytest.warns(UserWarning):
|
||||
clip(mixed_gdf, mask, keep_geom_type=True)
|
||||
|
||||
def test_warning_geomcoll(self, geomcol_gdf, mask):
|
||||
"""Test the correct warnings are raised if keep_geom_type is
|
||||
called on a GDF with GeometryCollection"""
|
||||
with pytest.warns(UserWarning):
|
||||
clip(geomcol_gdf, mask, keep_geom_type=True)
|
||||
|
||||
|
||||
def test_clip_line_keep_slivers(sliver_line, single_rectangle_gdf):
|
||||
"""Test the correct output if a point is returned
|
||||
from a line only geometry type."""
|
||||
clipped = clip(sliver_line, single_rectangle_gdf)
|
||||
# Assert returned data is a geometry collection given sliver geoms
|
||||
assert "Point" == clipped.geom_type[0]
|
||||
assert "LineString" == clipped.geom_type[1]
|
||||
|
||||
|
||||
def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
|
||||
"""Test a multi poly object where the return includes a sliver.
|
||||
Also the bounds of the object should == the bounds of the clip object
|
||||
if they fully overlap (as they do in these fixtures)."""
|
||||
clipped = clip(multi_poly_gdf, single_rectangle_gdf)
|
||||
assert np.array_equal(clipped.total_bounds, single_rectangle_gdf.total_bounds)
|
||||
# Assert returned data is a geometry collection given sliver geoms
|
||||
assert "GeometryCollection" in clipped.geom_type[0]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
clip(point_gdf, single_rectangle_gdf.to_crs(4326))
|
||||
|
||||
|
||||
def test_clip_with_polygon(single_rectangle_gdf):
|
||||
"""Test clip when using a shapely object"""
|
||||
polygon = Polygon([(0, 0), (5, 12), (10, 0), (0, 0)])
|
||||
clipped = clip(single_rectangle_gdf, polygon)
|
||||
exp_poly = polygon.intersection(
|
||||
Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
|
||||
)
|
||||
exp = GeoDataFrame([1], geometry=[exp_poly], crs="EPSG:3857")
|
||||
exp["attr2"] = "site-boundary"
|
||||
assert_geodataframe_equal(clipped, exp)
|
||||
|
||||
|
||||
def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf):
|
||||
"""Test clipping a polygon with a multipolygon."""
|
||||
multi = buffered_locations.dissolve(by="type").reset_index()
|
||||
clipped = clip(single_rectangle_gdf, multi)
|
||||
assert clipped.geom_type[0] == "Polygon"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mask_fixture_name",
|
||||
mask_variants_large_rectangle,
|
||||
)
|
||||
def test_clip_single_multipoly_no_extra_geoms(
|
||||
buffered_locations, mask_fixture_name, request
|
||||
):
|
||||
"""When clipping a multi-polygon feature, no additional geom types
|
||||
should be returned."""
|
||||
masks = request.getfixturevalue(mask_fixture_name)
|
||||
multi = buffered_locations.dissolve(by="type").reset_index()
|
||||
clipped = clip(multi, masks)
|
||||
assert clipped.geom_type[0] == "Polygon"
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered")
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
Polygon(),
|
||||
(np.nan,) * 4,
|
||||
(np.nan, 0, np.nan, 1),
|
||||
GeoSeries([Polygon(), Polygon()], crs="EPSG:3857"),
|
||||
GeoSeries([Polygon(), Polygon()], crs="EPSG:3857").to_frame(),
|
||||
GeoSeries([], crs="EPSG:3857"),
|
||||
GeoSeries([], crs="EPSG:3857").to_frame(),
|
||||
],
|
||||
)
|
||||
def test_clip_empty_mask(buffered_locations, mask):
|
||||
"""Test that clipping with empty mask returns an empty result."""
|
||||
clipped = clip(buffered_locations, mask)
|
||||
assert_geodataframe_equal(
|
||||
clipped,
|
||||
GeoDataFrame([], columns=["geometry", "type"], crs="EPSG:3857"),
|
||||
check_index_type=False,
|
||||
)
|
||||
clipped = clip(buffered_locations.geometry, mask)
|
||||
assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
|
||||
|
||||
|
||||
def test_clip_sorting(point_gdf2):
|
||||
"""Test the sorting kwarg in clip"""
|
||||
bbox = shapely.geometry.box(0, 0, 2, 2)
|
||||
unsorted_clipped_gdf = point_gdf2.clip(bbox)
|
||||
sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
|
||||
|
||||
expected_sorted_index = pd.Index([1, 3, 5])
|
||||
|
||||
assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
|
||||
assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
|
||||
assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
|
||||
@@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
|
||||
from shapely.geometry import Point
|
||||
from shapely.wkt import loads
|
||||
|
||||
import geopandas
|
||||
|
||||
import pytest
|
||||
from pandas.testing import assert_series_equal
|
||||
|
||||
|
||||
def test_hilbert_distance():
|
||||
# test the actual Hilbert Code algorithm against some hardcoded values
|
||||
geoms = geopandas.GeoSeries.from_wkt(
|
||||
[
|
||||
"POINT (0 0)",
|
||||
"POINT (1 1)",
|
||||
"POINT (1 0)",
|
||||
"POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
|
||||
]
|
||||
)
|
||||
result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=2)
|
||||
assert result.tolist() == [0, 10, 15, 2]
|
||||
|
||||
result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=3)
|
||||
assert result.tolist() == [0, 42, 63, 10]
|
||||
|
||||
result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=16)
|
||||
assert result.tolist() == [0, 2863311530, 4294967295, 715827882]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def geoseries_points():
|
||||
p1 = Point(1, 2)
|
||||
p2 = Point(2, 3)
|
||||
p3 = Point(3, 4)
|
||||
p4 = Point(4, 1)
|
||||
return geopandas.GeoSeries([p1, p2, p3, p4])
|
||||
|
||||
|
||||
def test_hilbert_distance_level(geoseries_points):
|
||||
with pytest.raises(ValueError):
|
||||
geoseries_points.hilbert_distance(level=20)
|
||||
|
||||
|
||||
def test_specified_total_bounds(geoseries_points):
|
||||
result = geoseries_points.hilbert_distance(
|
||||
total_bounds=geoseries_points.total_bounds
|
||||
)
|
||||
expected = geoseries_points.hilbert_distance()
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"empty",
|
||||
[
|
||||
None,
|
||||
loads("POLYGON EMPTY"),
|
||||
],
|
||||
)
|
||||
def test_empty(geoseries_points, empty):
|
||||
s = geoseries_points
|
||||
s.iloc[-1] = empty
|
||||
with pytest.raises(
|
||||
ValueError, match="cannot be computed on a GeoSeries with empty"
|
||||
):
|
||||
s.hilbert_distance()
|
||||
|
||||
|
||||
def test_zero_width():
|
||||
# special case of all points on the same line -> avoid warnings because
|
||||
# of division by 0 and introducing NaN
|
||||
s = geopandas.GeoSeries([Point(0, 0), Point(0, 2), Point(0, 1)])
|
||||
with np.errstate(all="raise"):
|
||||
result = s.hilbert_distance()
|
||||
assert np.array(result).argsort().tolist() == [0, 2, 1]
|
||||
@@ -0,0 +1,67 @@
|
||||
import numpy
|
||||
|
||||
import geopandas
|
||||
from geopandas.tools._random import uniform
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multipolygons(nybb_filename):
|
||||
return geopandas.read_file(nybb_filename).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def polygons(multipolygons):
|
||||
return multipolygons.explode(ignore_index=True).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multilinestrings(multipolygons):
|
||||
return multipolygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def linestrings(polygons):
|
||||
return polygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def points(multipolygons):
|
||||
return multipolygons.centroid
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [10, 100])
|
||||
@pytest.mark.parametrize(
|
||||
"geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
|
||||
)
|
||||
def test_uniform(geom_fixture, size, request):
|
||||
geom = request.getfixturevalue(geom_fixture)[0]
|
||||
sample = uniform(geom, size=size, rng=1)
|
||||
sample_series = (
|
||||
geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
|
||||
)
|
||||
assert len(sample_series) == size
|
||||
sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
|
||||
geom, predicate="intersects"
|
||||
)
|
||||
assert len(sample_in_geom) == size
|
||||
|
||||
|
||||
def test_uniform_unsupported(points):
|
||||
with pytest.warns(UserWarning, match="Sampling is not supported"):
|
||||
sample = uniform(points[0], size=10, rng=1)
|
||||
assert sample.is_empty
|
||||
|
||||
|
||||
def test_uniform_generator(polygons):
|
||||
sample = uniform(polygons[0], size=10, rng=1)
|
||||
sample2 = uniform(polygons[0], size=10, rng=1)
|
||||
assert sample.equals(sample2)
|
||||
|
||||
generator = numpy.random.default_rng(seed=1)
|
||||
gen_sample = uniform(polygons[0], size=10, rng=generator)
|
||||
gen_sample2 = uniform(polygons[0], size=10, rng=generator)
|
||||
|
||||
assert sample.equals(gen_sample)
|
||||
assert not sample.equals(gen_sample2)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,51 @@
|
||||
from shapely.geometry import LineString, MultiPoint, Point
|
||||
|
||||
from geopandas import GeoSeries
|
||||
from geopandas.tools import collect
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTools:
|
||||
def setup_method(self):
|
||||
self.p1 = Point(0, 0)
|
||||
self.p2 = Point(1, 1)
|
||||
self.p3 = Point(2, 2)
|
||||
self.mpc = MultiPoint([self.p1, self.p2, self.p3])
|
||||
|
||||
self.mp1 = MultiPoint([self.p1, self.p2])
|
||||
self.line1 = LineString([(3, 3), (4, 4)])
|
||||
|
||||
def test_collect_single(self):
|
||||
result = collect(self.p1)
|
||||
assert self.p1.equals(result)
|
||||
|
||||
def test_collect_single_force_multi(self):
|
||||
result = collect(self.p1, multi=True)
|
||||
expected = MultiPoint([self.p1])
|
||||
assert expected.equals(result)
|
||||
|
||||
def test_collect_multi(self):
|
||||
result = collect(self.mp1)
|
||||
assert self.mp1.equals(result)
|
||||
|
||||
def test_collect_multi_force_multi(self):
|
||||
result = collect(self.mp1)
|
||||
assert self.mp1.equals(result)
|
||||
|
||||
def test_collect_list(self):
|
||||
result = collect([self.p1, self.p2, self.p3])
|
||||
assert self.mpc.equals(result)
|
||||
|
||||
def test_collect_GeoSeries(self):
|
||||
s = GeoSeries([self.p1, self.p2, self.p3])
|
||||
result = collect(s)
|
||||
assert self.mpc.equals(result)
|
||||
|
||||
def test_collect_mixed_types(self):
|
||||
with pytest.raises(ValueError):
|
||||
collect([self.p1, self.line1])
|
||||
|
||||
def test_collect_mixed_multi(self):
|
||||
with pytest.raises(ValueError):
|
||||
collect([self.mpc, self.mp1])
|
||||
@@ -0,0 +1,45 @@
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
_multi_type_map = {
|
||||
"Point": MultiPoint,
|
||||
"LineString": MultiLineString,
|
||||
"Polygon": MultiPolygon,
|
||||
}
|
||||
|
||||
|
||||
def collect(x, multi=False):
|
||||
"""
|
||||
Collect single part geometries into their Multi* counterpart
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : an iterable or Series of Shapely geometries, a GeoSeries, or
|
||||
a single Shapely geometry
|
||||
multi : boolean, default False
|
||||
if True, force returned geometries to be Multi* even if they
|
||||
only have one component.
|
||||
|
||||
"""
|
||||
if isinstance(x, BaseGeometry):
|
||||
x = [x]
|
||||
elif isinstance(x, pd.Series):
|
||||
x = list(x)
|
||||
|
||||
# We cannot create GeometryCollection here so all types
|
||||
# must be the same. If there is more than one element,
|
||||
# they cannot be Multi*, i.e., can't pass in combination of
|
||||
# Point and MultiPoint... or even just MultiPoint
|
||||
t = x[0].geom_type
|
||||
if not all(g.geom_type == t for g in x):
|
||||
raise ValueError("Geometry type must be homogeneous")
|
||||
if len(x) > 1 and t.startswith("Multi"):
|
||||
raise ValueError("Cannot collect {0}. Must have single geometries".format(t))
|
||||
|
||||
if len(x) == 1 and (t.startswith("Multi") or not multi):
|
||||
# If there's only one single part geom and we're not forcing to
|
||||
# multi, then just return it
|
||||
return x[0]
|
||||
return _multi_type_map[t](x)
|
||||
Reference in New Issue
Block a user