refactor: excel parse

2026-04-16 10:01:11 +08:00
parent 680ecc320f
commit f62f95ec02
7941 changed files with 2899112 additions and 0 deletions
@@ -0,0 +1,29 @@
+from geopandas._config import options
+
+from geopandas.geoseries import GeoSeries
+from geopandas.geodataframe import GeoDataFrame
+from geopandas.array import points_from_xy
+
+from geopandas.io.file import _read_file as read_file
+from geopandas.io.file import _list_layers as list_layers
+from geopandas.io.arrow import _read_parquet as read_parquet
+from geopandas.io.arrow import _read_feather as read_feather
+from geopandas.io.sql import _read_postgis as read_postgis
+from geopandas.tools import sjoin, sjoin_nearest
+from geopandas.tools import overlay
+from geopandas.tools._show_versions import show_versions
+from geopandas.tools import clip
+
+
+import geopandas.datasets
+
+
+# make the interactive namespace easier to use
+# for `from geopandas import *` demos.
+import geopandas as gpd
+import pandas as pd
+import numpy as np
+
+from . import _version
+
+__version__ = _version.get_versions()["version"]
@@ -0,0 +1,92 @@
+import importlib
+from packaging.version import Version
+
+import pandas as pd
+
+import shapely
+import shapely.geos
+
+# -----------------------------------------------------------------------------
+# pandas compat
+# -----------------------------------------------------------------------------
+
+PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
+PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
+PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
+PANDAS_GE_202 = Version(pd.__version__) >= Version("2.0.2")
+PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
+PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0")
+PANDAS_GE_30 = Version(pd.__version__) >= Version("3.0.0.dev0")
+
+
+# -----------------------------------------------------------------------------
+# Shapely / GEOS compat
+# -----------------------------------------------------------------------------
+
+SHAPELY_GE_204 = Version(shapely.__version__) >= Version("2.0.4")
+
+GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0)
+GEOS_GE_310 = shapely.geos.geos_version >= (3, 10, 0)
+
+
+def import_optional_dependency(name: str, extra: str = ""):
+    """
+    Import an optional dependency.
+
+    Adapted from pandas.compat._optional::import_optional_dependency
+
+    Raises a formatted ImportError if the module is not present.
+
+    Parameters
+    ----------
+    name : str
+        The module name.
+    extra : str
+        Additional text to include in the ImportError message.
+    Returns
+    -------
+    module
+    """
+    msg = """Missing optional dependency '{name}'. {extra}  "
+        "Use pip or conda to install {name}.""".format(
+        name=name, extra=extra
+    )
+
+    if not isinstance(name, str):
+        raise ValueError(
+            "Invalid module name: '{name}'; must be a string".format(name=name)
+        )
+
+    try:
+        module = importlib.import_module(name)
+
+    except ImportError:
+        raise ImportError(msg) from None
+
+    return module
+
+
+# -----------------------------------------------------------------------------
+# pyproj compat
+# -----------------------------------------------------------------------------
+try:
+    import pyproj  # noqa: F401
+
+    HAS_PYPROJ = True
+
+except ImportError as err:
+    HAS_PYPROJ = False
+    pyproj_import_error = str(err)
+
+
+def requires_pyproj(func):
+    def wrapper(*args, **kwargs):
+        if not HAS_PYPROJ:
+            raise ImportError(
+                f"The 'pyproj' package is required for {func.__name__} to work. "
+                "Install it and initialize the object with a CRS before using it."
+                f"\nImporting pyproj resulted in: {pyproj_import_error}"
+            )
+        return func(*args, **kwargs)
+
+    return wrapper
@@ -0,0 +1,133 @@
+"""
+Lightweight options machinery.
+
+Based on https://github.com/topper-123/optioneer, but simplified (don't deal
+with nested options, deprecated options, ..), just the attribute-style dict
+like holding the options and giving a nice repr.
+"""
+
+import textwrap
+import warnings
+from collections import namedtuple
+
+Option = namedtuple("Option", "key default_value doc validator callback")
+
+
+class Options(object):
+    """Provide attribute-style access to configuration dict."""
+
+    def __init__(self, options):
+        super().__setattr__("_options", options)
+        # populate with default values
+        config = {}
+        for key, option in options.items():
+            config[key] = option.default_value
+
+        super().__setattr__("_config", config)
+
+    def __setattr__(self, key, value):
+        # you can't set new keys
+        if key in self._config:
+            option = self._options[key]
+            if option.validator:
+                option.validator(value)
+            self._config[key] = value
+            if option.callback:
+                option.callback(key, value)
+        else:
+            msg = "You can only set the value of existing options"
+            raise AttributeError(msg)
+
+    def __getattr__(self, key):
+        try:
+            return self._config[key]
+        except KeyError:
+            raise AttributeError("No such option")
+
+    def __dir__(self):
+        return list(self._config.keys())
+
+    def __repr__(self):
+        cls = self.__class__.__name__
+        description = ""
+        for key, option in self._options.items():
+            descr = "{key}: {cur!r} [default: {default!r}]\n".format(
+                key=key, cur=self._config[key], default=option.default_value
+            )
+            description += descr
+
+            if option.doc:
+                doc_text = "\n".join(textwrap.wrap(option.doc, width=70))
+            else:
+                doc_text = "No description available."
+            doc_text = textwrap.indent(doc_text, prefix="    ")
+            description += doc_text + "\n"
+        space = "\n  "
+        description = description.replace("\n", space)
+        return "{}({}{})".format(cls, space, description)
+
+
+def _validate_display_precision(value):
+    if value is not None:
+        if not isinstance(value, int) or not (0 <= value <= 16):
+            raise ValueError("Invalid value, needs to be an integer [0-16]")
+
+
+display_precision = Option(
+    key="display_precision",
+    default_value=None,
+    doc=(
+        "The precision (maximum number of decimals) of the coordinates in "
+        "the WKT representation in the Series/DataFrame display. "
+        "By default (None), it tries to infer and use 3 decimals for projected "
+        "coordinates and 5 decimals for geographic coordinates."
+    ),
+    validator=_validate_display_precision,
+    callback=None,
+)
+
+
+def _warn_use_pygeos_deprecated(_value):
+    warnings.warn(
+        "pygeos support was removed in 1.0. "
+        "geopandas.use_pygeos is a no-op and will be removed in geopandas 1.1.",
+        stacklevel=3,
+    )
+
+
+def _validate_io_engine(value):
+    if value is not None:
+        if value not in ("pyogrio", "fiona"):
+            raise ValueError(f"Expected 'pyogrio' or 'fiona', got '{value}'")
+
+
+io_engine = Option(
+    key="io_engine",
+    default_value=None,
+    doc=(
+        "The default engine for ``read_file`` and ``to_file``. "
+        "Options are 'pyogrio' and 'fiona'."
+    ),
+    validator=_validate_io_engine,
+    callback=None,
+)
+
+# TODO: deprecate this
+use_pygeos = Option(
+    key="use_pygeos",
+    default_value=False,
+    doc=(
+        "Deprecated option previously used to enable PyGEOS. "
+        "It will be removed in GeoPandas 1.1."
+    ),
+    validator=_warn_use_pygeos_deprecated,
+    callback=None,
+)
+
+options = Options(
+    {
+        "display_precision": display_precision,
+        "use_pygeos": use_pygeos,
+        "io_engine": io_engine,
+    }
+)
@@ -0,0 +1,52 @@
+from textwrap import dedent
+from typing import Callable, Union
+
+# doc decorator function ported with modifications from Pandas
+# https://github.com/pandas-dev/pandas/blob/master/pandas/util/_decorators.py
+
+
+def doc(*docstrings: Union[str, Callable], **params) -> Callable:
+    """
+    A decorator take docstring templates, concatenate them and perform string
+    substitution on it.
+    This decorator will add a variable "_docstring_components" to the wrapped
+    callable to keep track the original docstring template for potential usage.
+    If it should be consider as a template, it will be saved as a string.
+    Otherwise, it will be saved as callable, and later user __doc__ and dedent
+    to get docstring.
+
+    Parameters
+    ----------
+    *docstrings : str or callable
+        The string / docstring / docstring template to be appended in order
+        after default docstring under callable.
+    **params
+        The string which would be used to format docstring template.
+    """
+
+    def decorator(decorated: Callable) -> Callable:
+        # collecting docstring and docstring templates
+        docstring_components: list[Union[str, Callable]] = []
+        if decorated.__doc__:
+            docstring_components.append(dedent(decorated.__doc__))
+
+        for docstring in docstrings:
+            if hasattr(docstring, "_docstring_components"):
+                docstring_components.extend(docstring._docstring_components)
+            elif isinstance(docstring, str) or docstring.__doc__:
+                docstring_components.append(docstring)
+
+        # formatting templates and concatenating docstring
+        decorated.__doc__ = "".join(
+            (
+                component.format(**params)
+                if isinstance(component, str)
+                else dedent(component.__doc__ or "")
+            )
+            for component in docstring_components
+        )
+
+        decorated._docstring_components = docstring_components
+        return decorated
+
+    return decorator
@@ -0,0 +1,21 @@
+
+# This file was generated by 'versioneer.py' (0.29) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+{
+ "date": "2024-07-02T14:23:16+0200",
+ "dirty": false,
+ "error": null,
+ "full-revisionid": "747d66ee6fcf00b819c08f11ecded53736c4652b",
+ "version": "1.0.1"
+}
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
@@ -0,0 +1,47 @@
+import os.path
+
+import geopandas
+
+import pytest
+from geopandas.tests.util import _NATURALEARTH_CITIES, _NATURALEARTH_LOWRES, _NYBB
+
+
+@pytest.fixture(autouse=True)
+def add_geopandas(doctest_namespace):
+    doctest_namespace["geopandas"] = geopandas
+
+
+# Datasets used in our tests
+
+
+@pytest.fixture(scope="session")
+def naturalearth_lowres() -> str:
+    # skip if data missing, unless on github actions
+    if os.path.isfile(_NATURALEARTH_LOWRES) or os.getenv("GITHUB_ACTIONS"):
+        return _NATURALEARTH_LOWRES
+    else:
+        pytest.skip("Naturalearth lowres dataset not found")
+
+
+@pytest.fixture(scope="session")
+def naturalearth_cities() -> str:
+    # skip if data missing, unless on github actions
+    if os.path.isfile(_NATURALEARTH_CITIES) or os.getenv("GITHUB_ACTIONS"):
+        return _NATURALEARTH_CITIES
+    else:
+        pytest.skip("Naturalearth cities dataset not found")
+
+
+@pytest.fixture(scope="session")
+def nybb_filename() -> str:
+    # skip if data missing, unless on github actions
+    if os.path.isfile(_NYBB[len("zip://") :]) or os.getenv("GITHUB_ACTIONS"):
+        return _NYBB
+    else:
+        pytest.skip("NYBB dataset not found")
+
+
+@pytest.fixture(scope="class")
+def _setup_class_nybb_filename(nybb_filename, request):
+    """Attach nybb_filename class attribute for unittest style setup_method"""
+    request.cls.nybb_filename = nybb_filename
@@ -0,0 +1,25 @@
+__all__ = []
+available = []  # previously part of __all__
+_prev_available = ["naturalearth_cities", "naturalearth_lowres", "nybb"]
+
+
+def get_path(dataset):
+    ne_message = "https://www.naturalearthdata.com/downloads/110m-cultural-vectors/."
+    nybb_message = (
+        "the geodatasets package.\n\nfrom geodatasets import get_path\n"
+        "path_to_file = get_path('nybb')\n"
+    )
+    error_msg = (
+        "The geopandas.dataset has been deprecated and was removed in GeoPandas "
+        f"1.0. You can get the original '{dataset}' data from "
+        f"{ne_message if 'natural' in dataset else nybb_message}"
+    )
+    if dataset in _prev_available:
+        raise AttributeError(error_msg)
+    else:
+        error_msg = (
+            "The geopandas.dataset has been deprecated and "
+            "was removed in GeoPandas 1.0. New sample datasets are now available "
+            "in the geodatasets package (https://geodatasets.readthedocs.io/en/latest/)"
+        )
+        raise AttributeError(error_msg)
@@ -0,0 +1,614 @@
+import json
+from packaging.version import Version
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from numpy.typing import NDArray
+
+import shapely
+from shapely import GeometryType
+
+from geopandas import GeoDataFrame
+from geopandas._compat import SHAPELY_GE_204
+from geopandas.array import from_shapely, from_wkb
+
+GEOARROW_ENCODINGS = [
+    "point",
+    "linestring",
+    "polygon",
+    "multipoint",
+    "multilinestring",
+    "multipolygon",
+]
+
+
+## GeoPandas -> GeoArrow
+
+
+class ArrowTable:
+    """
+    Wrapper class for Arrow data.
+
+    This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
+    ``__arrow_c_stream__`` method). This object can then be consumed by
+    your Arrow implementation of choice that supports this protocol.
+
+    .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+    Example
+    -------
+    >>> import pyarrow as pa
+    >>> pa.table(gdf.to_arrow())  # doctest: +SKIP
+
+    """
+
+    def __init__(self, pa_table):
+        self._pa_table = pa_table
+
+    def __arrow_c_stream__(self, requested_schema=None):
+        return self._pa_table.__arrow_c_stream__(requested_schema=requested_schema)
+
+
+class GeoArrowArray:
+    """
+    Wrapper class for a geometry array as Arrow data.
+
+    This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
+    ``__arrow_c_array/stream__`` method). This object can then be consumed by
+    your Arrow implementation of choice that supports this protocol.
+
+    .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+    Example
+    -------
+    >>> import pyarrow as pa
+    >>> pa.array(ser.to_arrow())  # doctest: +SKIP
+
+    """
+
+    def __init__(self, pa_field, pa_array):
+        self._pa_array = pa_array
+        self._pa_field = pa_field
+
+    def __arrow_c_array__(self, requested_schema=None):
+        if requested_schema is not None:
+            raise NotImplementedError(
+                "Requested schema is not supported for geometry arrays"
+            )
+        return (
+            self._pa_field.__arrow_c_schema__(),
+            self._pa_array.__arrow_c_array__()[1],
+        )
+
+
+def geopandas_to_arrow(
+    df,
+    index=None,
+    geometry_encoding="WKB",
+    interleaved=True,
+    include_z=None,
+):
+    """
+    Convert GeoDataFrame to a pyarrow.Table.
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+        The GeoDataFrame to convert.
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+        The GeoArrow encoding to use for the data conversion.
+    interleaved : bool, default True
+        Only relevant for 'geoarrow' encoding. If True, the geometries'
+        coordinates are interleaved in a single fixed size list array.
+        If False, the coordinates are stored as separate arrays in a
+        struct type.
+    include_z : bool, default None
+        Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+        of the individial geometries is preserved).
+        If False, return 2D geometries. If True, include the third dimension
+        in the output (if a geometry has no third dimension, the z-coordinates
+        will be NaN). By default, will infer the dimensionality from the
+        input geometries. Note that this inference can be unreliable with
+        empty geometries (for a guaranteed result, it is recommended to
+        specify the keyword).
+
+    """
+    mask = df.dtypes == "geometry"
+    geometry_columns = df.columns[mask]
+    geometry_indices = np.asarray(mask).nonzero()[0]
+
+    df_attr = pd.DataFrame(df.copy(deep=False))
+
+    # replace geometry columns with dummy values -> will get converted to
+    # Arrow null column (not holding any memory), so we can afterwards
+    # fill the resulting table with the correct geometry fields
+    for col in geometry_columns:
+        df_attr[col] = None
+
+    table = pa.Table.from_pandas(df_attr, preserve_index=index)
+
+    geometry_encoding_dict = {}
+
+    if geometry_encoding.lower() == "geoarrow":
+        if Version(pa.__version__) < Version("10.0.0"):
+            raise ValueError("Converting to 'geoarrow' requires pyarrow >= 10.0.")
+
+        # Encode all geometry columns to GeoArrow
+        for i, col in zip(geometry_indices, geometry_columns):
+            field, geom_arr = construct_geometry_array(
+                np.array(df[col].array),
+                include_z=include_z,
+                field_name=col,
+                crs=df[col].crs,
+                interleaved=interleaved,
+            )
+            table = table.set_column(i, field, geom_arr)
+            geometry_encoding_dict[col] = (
+                field.metadata[b"ARROW:extension:name"]
+                .decode()
+                .removeprefix("geoarrow.")
+            )
+
+    elif geometry_encoding.lower() == "wkb":
+        # Encode all geometry columns to WKB
+        for i, col in zip(geometry_indices, geometry_columns):
+            field, wkb_arr = construct_wkb_array(
+                np.asarray(df[col].array), field_name=col, crs=df[col].crs
+            )
+            table = table.set_column(i, field, wkb_arr)
+            geometry_encoding_dict[col] = "WKB"
+
+    else:
+        raise ValueError(
+            f"Expected geometry encoding 'WKB' or 'geoarrow' got {geometry_encoding}"
+        )
+    return table, geometry_encoding_dict
+
+
+def construct_wkb_array(
+    shapely_arr: NDArray[np.object_],
+    *,
+    field_name: str = "geometry",
+    crs: Optional[str] = None,
+) -> Tuple[pa.Field, pa.Array]:
+
+    if shapely.geos_version > (3, 10, 0):
+        kwargs = {"flavor": "iso"}
+    else:
+        if shapely.has_z(shapely_arr).any():
+            raise ValueError("Cannot write 3D geometries with GEOS<3.10")
+        kwargs = {}
+
+    wkb_arr = shapely.to_wkb(shapely_arr, **kwargs)
+    extension_metadata = {"ARROW:extension:name": "geoarrow.wkb"}
+    if crs is not None:
+        extension_metadata["ARROW:extension:metadata"] = json.dumps(
+            {"crs": crs.to_json()}
+        )
+    else:
+        # In theory this should not be needed, but otherwise pyarrow < 17
+        # crashes on receiving such data through C Data Interface
+        # https://github.com/apache/arrow/issues/41741
+        extension_metadata["ARROW:extension:metadata"] = "{}"
+
+    field = pa.field(
+        field_name, type=pa.binary(), nullable=True, metadata=extension_metadata
+    )
+    parr = pa.array(np.asarray(wkb_arr), pa.binary())
+    return field, parr
+
+
+def _convert_inner_coords(coords, interleaved, dims, mask=None):
+    if interleaved:
+        coords_field = pa.field(dims, pa.float64(), nullable=False)
+        typ = pa.list_(coords_field, len(dims))
+        if mask is None:
+            # mask keyword only added in pyarrow 15.0.0
+            parr = pa.FixedSizeListArray.from_arrays(coords.ravel(), type=typ)
+        else:
+            parr = pa.FixedSizeListArray.from_arrays(
+                coords.ravel(), type=typ, mask=mask
+            )
+    else:
+        if dims == "xy":
+            fields = [
+                pa.field("x", pa.float64(), nullable=False),
+                pa.field("y", pa.float64(), nullable=False),
+            ]
+            parr = pa.StructArray.from_arrays(
+                [coords[:, 0].copy(), coords[:, 1].copy()], fields=fields, mask=mask
+            )
+        else:
+            fields = [
+                pa.field("x", pa.float64(), nullable=False),
+                pa.field("y", pa.float64(), nullable=False),
+                pa.field("z", pa.float64(), nullable=False),
+            ]
+            parr = pa.StructArray.from_arrays(
+                [coords[:, 0].copy(), coords[:, 1].copy(), coords[:, 2].copy()],
+                fields=fields,
+                mask=mask,
+            )
+    return parr
+
+
+def _linestring_type(point_type):
+    return pa.list_(pa.field("vertices", point_type, nullable=False))
+
+
+def _polygon_type(point_type):
+    return pa.list_(
+        pa.field(
+            "rings",
+            pa.list_(pa.field("vertices", point_type, nullable=False)),
+            nullable=False,
+        )
+    )
+
+
+def _multipoint_type(point_type):
+    return pa.list_(pa.field("points", point_type, nullable=False))
+
+
+def _multilinestring_type(point_type):
+    return pa.list_(
+        pa.field("linestrings", _linestring_type(point_type), nullable=False)
+    )
+
+
+def _multipolygon_type(point_type):
+    return pa.list_(pa.field("polygons", _polygon_type(point_type), nullable=False))
+
+
+def construct_geometry_array(
+    shapely_arr: NDArray[np.object_],
+    include_z: Optional[bool] = None,
+    *,
+    field_name: str = "geometry",
+    crs: Optional[str] = None,
+    interleaved: bool = True,
+) -> Tuple[pa.Field, pa.Array]:
+    # NOTE: this implementation returns a (field, array) pair so that it can set the
+    # extension metadata on the field without instantiating extension types into the
+    # global pyarrow registry
+    geom_type, coords, offsets = shapely.to_ragged_array(
+        shapely_arr, include_z=include_z
+    )
+
+    mask = shapely.is_missing(shapely_arr)
+    if mask.any():
+        if (
+            geom_type == GeometryType.POINT
+            and interleaved
+            and Version(pa.__version__) < Version("15.0.0")
+        ):
+            raise ValueError(
+                "Converting point geometries with missing values is not supported "
+                "for interleaved coordinates with pyarrow < 15.0.0. Please "
+                "upgrade to a newer version of pyarrow."
+            )
+        mask = pa.array(mask, type=pa.bool_())
+
+        if geom_type == GeometryType.POINT and not SHAPELY_GE_204:
+            # bug in shapely < 2.0.4, see https://github.com/shapely/shapely/pull/2034
+            # this workaround only works if there are no empty points
+            indices = np.nonzero(mask)[0]
+            indices = indices - np.arange(len(indices))
+            coords = np.insert(coords, indices, np.nan, axis=0)
+
+    else:
+        mask = None
+
+    if coords.shape[-1] == 2:
+        dims = "xy"
+    elif coords.shape[-1] == 3:
+        dims = "xyz"
+    else:
+        raise ValueError(f"Unexpected coords dimensions: {coords.shape}")
+
+    extension_metadata: Dict[str, str] = {}
+    if crs is not None:
+        extension_metadata["ARROW:extension:metadata"] = json.dumps(
+            {"crs": crs.to_json()}
+        )
+    else:
+        # In theory this should not be needed, but otherwise pyarrow < 17
+        # crashes on receiving such data through C Data Interface
+        # https://github.com/apache/arrow/issues/41741
+        extension_metadata["ARROW:extension:metadata"] = "{}"
+
+    if geom_type == GeometryType.POINT:
+        parr = _convert_inner_coords(coords, interleaved, dims, mask=mask)
+        extension_metadata["ARROW:extension:name"] = "geoarrow.point"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.LINESTRING:
+        assert len(offsets) == 1, "Expected one offsets array"
+        (geom_offsets,) = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        parr = pa.ListArray.from_arrays(
+            pa.array(geom_offsets), _parr, _linestring_type(_parr.type), mask=mask
+        )
+        extension_metadata["ARROW:extension:name"] = "geoarrow.linestring"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.POLYGON:
+        assert len(offsets) == 2, "Expected two offsets arrays"
+        ring_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
+        parr = parr.cast(_polygon_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.polygon"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTIPOINT:
+        assert len(offsets) == 1, "Expected one offsets array"
+        (geom_offsets,) = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        parr = pa.ListArray.from_arrays(
+            pa.array(geom_offsets), _parr, type=_multipoint_type(_parr.type), mask=mask
+        )
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multipoint"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTILINESTRING:
+        assert len(offsets) == 2, "Expected two offsets arrays"
+        ring_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
+        parr = parr.cast(_multilinestring_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multilinestring"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTIPOLYGON:
+        assert len(offsets) == 3, "Expected three offsets arrays"
+        ring_offsets, polygon_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        _parr2 = pa.ListArray.from_arrays(pa.array(polygon_offsets), _parr1)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr2, mask=mask)
+        parr = parr.cast(_multipolygon_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multipolygon"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    else:
+        raise ValueError(f"Unsupported type for geoarrow: {geom_type}")
+
+
+## GeoArrow -> GeoPandas
+
+
+def _get_arrow_geometry_field(field):
+    if (meta := field.metadata) is not None:
+        if (ext_name := meta.get(b"ARROW:extension:name", None)) is not None:
+            if ext_name.startswith(b"geoarrow."):
+                if (
+                    ext_meta := meta.get(b"ARROW:extension:metadata", None)
+                ) is not None:
+                    ext_meta = json.loads(ext_meta.decode())
+                return ext_name.decode(), ext_meta
+
+    if isinstance(field.type, pa.ExtensionType):
+        ext_name = field.type.extension_name
+        if ext_name.startswith("geoarrow."):
+            ext_meta_ser = field.type.__arrow_ext_serialize__()
+            if ext_meta_ser:
+                ext_meta = json.loads(ext_meta_ser.decode())
+            else:
+                ext_meta = None
+            return ext_name, ext_meta
+
+    return None
+
+
+def arrow_to_geopandas(table, geometry=None):
+    """
+    Convert Arrow table object to a GeoDataFrame based on GeoArrow extension types.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+        The Arrow table to convert.
+    geometry : str, default None
+        The name of the geometry column to set as the active geometry
+        column. If None, the first geometry column found will be used.
+
+    Returns
+    -------
+    GeoDataFrame
+
+    """
+    if not isinstance(table, pa.Table):
+        table = pa.table(table)
+
+    geom_fields = []
+
+    for i, field in enumerate(table.schema):
+        geom = _get_arrow_geometry_field(field)
+        if geom is not None:
+            geom_fields.append((i, field.name, *geom))
+
+    if len(geom_fields) == 0:
+        raise ValueError("No geometry column found in the Arrow table.")
+
+    table_attr = table.drop([f[1] for f in geom_fields])
+    df = table_attr.to_pandas()
+
+    for i, col, ext_name, ext_meta in geom_fields:
+        crs = None
+        if ext_meta is not None and "crs" in ext_meta:
+            crs = ext_meta["crs"]
+
+        if ext_name == "geoarrow.wkb":
+            geom_arr = from_wkb(np.array(table[col]), crs=crs)
+        elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
+
+            geom_arr = from_shapely(
+                construct_shapely_array(table[col].combine_chunks(), ext_name), crs=crs
+            )
+        else:
+            raise TypeError(f"Unknown GeoArrow extension type: {ext_name}")
+
+        df.insert(i, col, geom_arr)
+
+    return GeoDataFrame(df, geometry=geometry or geom_fields[0][1])
+
+
+def arrow_to_geometry_array(arr):
+    """
+    Convert Arrow array object (representing single GeoArrow array) to a
+    geopandas GeometryArray.
+
+    Specifically for GeoSeries.from_arrow.
+    """
+    if Version(pa.__version__) < Version("14.0.0"):
+        raise ValueError("Importing from Arrow requires pyarrow >= 14.0.")
+
+    schema_capsule, array_capsule = arr.__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    pa_arr = pa.Array._import_from_c_capsule(field.__arrow_c_schema__(), array_capsule)
+
+    geom_info = _get_arrow_geometry_field(field)
+    if geom_info is None:
+        raise ValueError("No GeoArrow geometry field found.")
+    ext_name, ext_meta = geom_info
+
+    crs = None
+    if ext_meta is not None and "crs" in ext_meta:
+        crs = ext_meta["crs"]
+
+    if ext_name == "geoarrow.wkb":
+        geom_arr = from_wkb(np.array(pa_arr), crs=crs)
+    elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
+
+        geom_arr = from_shapely(construct_shapely_array(pa_arr, ext_name), crs=crs)
+    else:
+        raise ValueError(f"Unknown GeoArrow extension type: {ext_name}")
+
+    return geom_arr
+
+
+def _get_inner_coords(arr):
+    if pa.types.is_struct(arr.type):
+        if arr.type.num_fields == 2:
+            coords = np.column_stack(
+                [np.asarray(arr.field("x")), np.asarray(arr.field("y"))]
+            )
+        else:
+            coords = np.column_stack(
+                [
+                    np.asarray(arr.field("x")),
+                    np.asarray(arr.field("y")),
+                    np.asarray(arr.field("z")),
+                ]
+            )
+        return coords
+    else:
+        # fixed size list
+        return np.asarray(arr.values).reshape(len(arr), -1)
+
+
+def construct_shapely_array(arr: pa.Array, extension_name: str):
+    """
+    Construct a NumPy array of shapely geometries from a pyarrow.Array
+    with GeoArrow extension type.
+
+    """
+    if isinstance(arr, pa.ExtensionArray):
+        arr = arr.storage
+
+    if extension_name == "geoarrow.point":
+        coords = _get_inner_coords(arr)
+        result = shapely.from_ragged_array(GeometryType.POINT, coords, None)
+
+    elif extension_name == "geoarrow.linestring":
+        coords = _get_inner_coords(arr.values)
+        offsets1 = np.asarray(arr.offsets)
+        offsets = (offsets1,)
+        result = shapely.from_ragged_array(GeometryType.LINESTRING, coords, offsets)
+
+    elif extension_name == "geoarrow.polygon":
+        coords = _get_inner_coords(arr.values.values)
+        offsets2 = np.asarray(arr.offsets)
+        offsets1 = np.asarray(arr.values.offsets)
+        offsets = (offsets1, offsets2)
+        result = shapely.from_ragged_array(GeometryType.POLYGON, coords, offsets)
+
+    elif extension_name == "geoarrow.multipoint":
+        coords = _get_inner_coords(arr.values)
+        offsets1 = np.asarray(arr.offsets)
+        offsets = (offsets1,)
+        result = shapely.from_ragged_array(GeometryType.MULTIPOINT, coords, offsets)
+
+    elif extension_name == "geoarrow.multilinestring":
+        coords = _get_inner_coords(arr.values.values)
+        offsets2 = np.asarray(arr.offsets)
+        offsets1 = np.asarray(arr.values.offsets)
+        offsets = (offsets1, offsets2)
+        result = shapely.from_ragged_array(
+            GeometryType.MULTILINESTRING, coords, offsets
+        )
+
+    elif extension_name == "geoarrow.multipolygon":
+        coords = _get_inner_coords(arr.values.values.values)
+        offsets3 = np.asarray(arr.offsets)
+        offsets2 = np.asarray(arr.values.offsets)
+        offsets1 = np.asarray(arr.values.values.offsets)
+        offsets = (offsets1, offsets2, offsets3)
+        result = shapely.from_ragged_array(GeometryType.MULTIPOLYGON, coords, offsets)
+
+    else:
+        raise ValueError(extension_name)
+
+    # apply validity mask
+    if arr.null_count:
+        mask = np.asarray(arr.is_null())
+        result = np.where(mask, None, result)
+
+    return result
@@ -0,0 +1,72 @@
+from packaging.version import Version
+
+import pyarrow
+
+_ERROR_MSG = """\
+Disallowed deserialization of 'arrow.py_extension_type':
+storage_type = {storage_type}
+serialized = {serialized}
+pickle disassembly:\n{pickle_disassembly}
+
+Reading of untrusted Parquet or Feather files with a PyExtensionType column
+allows arbitrary code execution.
+If you trust this file, you can enable reading the extension type by one of:
+
+- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
+- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
+  `import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
+
+We strongly recommend updating your Parquet/Feather files to use extension types
+derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
+See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
+for more details.
+"""
+
+
+def patch_pyarrow():
+    # starting from pyarrow 14.0.1, it has its own mechanism
+    if Version(pyarrow.__version__) >= Version("14.0.1"):
+        return
+
+    # if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
+    # installed, use this instead (which also ensures it works if they had
+    # called `pyarrow_hotfix.uninstall()`)
+    try:
+        import pyarrow_hotfix  # noqa: F401
+    except ImportError:
+        pass
+    else:
+        return
+
+    # if the hotfix is already installed and enabled
+    if getattr(pyarrow, "_hotfix_installed", False):
+        return
+
+    class ForbiddenExtensionType(pyarrow.ExtensionType):
+        def __arrow_ext_serialize__(self):
+            return b""
+
+        @classmethod
+        def __arrow_ext_deserialize__(cls, storage_type, serialized):
+            import io
+            import pickletools
+
+            out = io.StringIO()
+            pickletools.dis(serialized, out)
+            raise RuntimeError(
+                _ERROR_MSG.format(
+                    storage_type=storage_type,
+                    serialized=serialized,
+                    pickle_disassembly=out.getvalue(),
+                )
+            )
+
+    pyarrow.unregister_extension_type("arrow.py_extension_type")
+    pyarrow.register_extension_type(
+        ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
+    )
+
+    pyarrow._hotfix_installed = True
+
+
+patch_pyarrow()
@@ -0,0 +1,913 @@
+import json
+import warnings
+from packaging.version import Version
+
+import numpy as np
+from pandas import DataFrame, Series
+
+import shapely
+
+import geopandas
+from geopandas import GeoDataFrame
+from geopandas._compat import import_optional_dependency
+from geopandas.array import from_shapely, from_wkb
+
+from .file import _expand_user
+
+METADATA_VERSION = "1.0.0"
+SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
+GEOARROW_ENCODINGS = [
+    "point",
+    "linestring",
+    "polygon",
+    "multipoint",
+    "multilinestring",
+    "multipolygon",
+]
+SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
+
+# reference: https://github.com/opengeospatial/geoparquet
+
+# Metadata structure:
+# {
+#     "geo": {
+#         "columns": {
+#             "<name>": {
+#                 "encoding": "WKB"
+#                 "geometry_types": <list of str: REQUIRED>
+#                 "crs": "<PROJJSON or None: OPTIONAL>",
+#                 "orientation": "<'counterclockwise' or None: OPTIONAL>"
+#                 "edges": "planar"
+#                 "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL>
+#                 "epoch": <float: OPTIONAL>
+#             }
+#         },
+#         "primary_column": "<str: REQUIRED>",
+#         "version": "<METADATA_VERSION>",
+#
+#         # Additional GeoPandas specific metadata (not in metadata spec)
+#         "creator": {
+#             "library": "geopandas",
+#             "version": "<geopandas.__version__>"
+#         }
+#     }
+# }
+
+
+def _is_fsspec_url(url):
+    return (
+        isinstance(url, str)
+        and "://" in url
+        and not url.startswith(("http://", "https://"))
+    )
+
+
+def _remove_id_from_member_of_ensembles(json_dict):
+    """
+    Older PROJ versions will not recognize IDs of datum ensemble members that
+    were added in more recent PROJ database versions.
+
+    Cf https://github.com/opengeospatial/geoparquet/discussions/110
+    and https://github.com/OSGeo/PROJ/pull/3221
+
+    Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
+    """
+    for key, value in json_dict.items():
+        if isinstance(value, dict):
+            _remove_id_from_member_of_ensembles(value)
+        elif key == "members" and isinstance(value, list):
+            for member in value:
+                member.pop("id", None)
+
+
+# type ids 0 to 7
+_geometry_type_names = [
+    "Point",
+    "LineString",
+    "LineString",
+    "Polygon",
+    "MultiPoint",
+    "MultiLineString",
+    "MultiPolygon",
+    "GeometryCollection",
+]
+_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
+
+
+def _get_geometry_types(series):
+    """
+    Get unique geometry types from a GeoSeries.
+    """
+    arr_geometry_types = shapely.get_type_id(series.array._data)
+    # ensure to include "... Z" for 3D geometries
+    has_z = shapely.has_z(series.array._data)
+    arr_geometry_types[has_z] += 8
+
+    geometry_types = Series(arr_geometry_types).unique().tolist()
+    # drop missing values (shapely.get_type_id returns -1 for those)
+    if -1 in geometry_types:
+        geometry_types.remove(-1)
+
+    return sorted([_geometry_type_names[idx] for idx in geometry_types])
+
+
+def _create_metadata(
+    df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
+):
+    """Create and encode geo metadata dict.
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
+        GeoParquet specification version; if not provided will default to
+        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.
+
+    Returns
+    -------
+    dict
+    """
+    if schema_version is None:
+        if geometry_encoding and any(
+            encoding != "WKB" for encoding in geometry_encoding.values()
+        ):
+            schema_version = "1.1.0"
+        else:
+            schema_version = METADATA_VERSION
+
+    if schema_version not in SUPPORTED_VERSIONS:
+        raise ValueError(
+            f"schema_version must be one of: {', '.join(SUPPORTED_VERSIONS)}"
+        )
+
+    # Construct metadata for each geometry
+    column_metadata = {}
+    for col in df.columns[df.dtypes == "geometry"]:
+        series = df[col]
+
+        geometry_types = _get_geometry_types(series)
+        if schema_version[0] == "0":
+            geometry_types_name = "geometry_type"
+            if len(geometry_types) == 1:
+                geometry_types = geometry_types[0]
+        else:
+            geometry_types_name = "geometry_types"
+
+        crs = None
+        if series.crs:
+            if schema_version == "0.1.0":
+                crs = series.crs.to_wkt()
+            else:  # version >= 0.4.0
+                crs = series.crs.to_json_dict()
+                _remove_id_from_member_of_ensembles(crs)
+
+        column_metadata[col] = {
+            "encoding": geometry_encoding[col],
+            "crs": crs,
+            geometry_types_name: geometry_types,
+        }
+
+        bbox = series.total_bounds.tolist()
+        if np.isfinite(bbox).all():
+            # don't add bbox with NaNs for empty / all-NA geometry column
+            column_metadata[col]["bbox"] = bbox
+
+        if write_covering_bbox:
+            column_metadata[col]["covering"] = {
+                "bbox": {
+                    "xmin": ["bbox", "xmin"],
+                    "ymin": ["bbox", "ymin"],
+                    "xmax": ["bbox", "xmax"],
+                    "ymax": ["bbox", "ymax"],
+                },
+            }
+
+    return {
+        "primary_column": df._geometry_column_name,
+        "columns": column_metadata,
+        "version": schema_version,
+        "creator": {"library": "geopandas", "version": geopandas.__version__},
+    }
+
+
+def _encode_metadata(metadata):
+    """Encode metadata dict to UTF-8 JSON string
+
+    Parameters
+    ----------
+    metadata : dict
+
+    Returns
+    -------
+    UTF-8 encoded JSON string
+    """
+    return json.dumps(metadata).encode("utf-8")
+
+
+def _decode_metadata(metadata_str):
+    """Decode a UTF-8 encoded JSON string to dict
+
+    Parameters
+    ----------
+    metadata_str : string (UTF-8 encoded)
+
+    Returns
+    -------
+    dict
+    """
+    if metadata_str is None:
+        return None
+
+    return json.loads(metadata_str.decode("utf-8"))
+
+
+def _validate_dataframe(df):
+    """Validate that the GeoDataFrame conforms to requirements for writing
+    to Parquet format.
+
+    Raises `ValueError` if the GeoDataFrame is not valid.
+
+    copied from `pandas.io.parquet`
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+    """
+
+    if not isinstance(df, DataFrame):
+        raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames")
+
+    # must have value column names (strings only)
+    if df.columns.inferred_type not in {"string", "unicode", "empty"}:
+        raise ValueError("Writing to Parquet/Feather requires string column names")
+
+    # index level names must be strings
+    valid_names = all(
+        isinstance(name, str) for name in df.index.names if name is not None
+    )
+    if not valid_names:
+        raise ValueError("Index level names must be strings")
+
+
+def _validate_geo_metadata(metadata):
+    """Validate geo metadata.
+    Must not be empty, and must contain the structure specified above.
+
+    Raises ValueError if metadata is not valid.
+
+    Parameters
+    ----------
+    metadata : dict
+    """
+
+    if not metadata:
+        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
+
+    # version was schema_version in 0.1.0
+    version = metadata.get("version", metadata.get("schema_version"))
+    if not version:
+        raise ValueError(
+            "'geo' metadata in Parquet/Feather file is missing required key: "
+            "'version'"
+        )
+
+    required_keys = ("primary_column", "columns")
+    for key in required_keys:
+        if metadata.get(key, None) is None:
+            raise ValueError(
+                "'geo' metadata in Parquet/Feather file is missing required key: "
+                "'{key}'".format(key=key)
+            )
+
+    if not isinstance(metadata["columns"], dict):
+        raise ValueError("'columns' in 'geo' metadata must be a dict")
+
+    # Validate that geometry columns have required metadata and values
+    # leaving out "geometry_type" for compatibility with 0.1
+    required_col_keys = ("encoding",)
+    for col, column_metadata in metadata["columns"].items():
+        for key in required_col_keys:
+            if key not in column_metadata:
+                raise ValueError(
+                    "'geo' metadata in Parquet/Feather file is missing required key "
+                    "'{key}' for column '{col}'".format(key=key, col=col)
+                )
+
+        if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
+            raise ValueError(
+                "Only WKB geometry encoding or one of the native encodings "
+                f"({GEOARROW_ENCODINGS!r}) are supported, "
+                f"got: {column_metadata['encoding']}"
+            )
+
+        if column_metadata.get("edges", "planar") == "spherical":
+            warnings.warn(
+                f"The geo metadata indicate that column '{col}' has spherical edges, "
+                "but because GeoPandas currently does not support spherical "
+                "geometry, it ignores this metadata and will interpret the edges of "
+                "the geometries as planar.",
+                UserWarning,
+                stacklevel=4,
+            )
+
+        if "covering" in column_metadata:
+            covering = column_metadata["covering"]
+            if "bbox" in covering:
+                bbox = covering["bbox"]
+                for var in ["xmin", "ymin", "xmax", "ymax"]:
+                    if var not in bbox.keys():
+                        raise ValueError("Metadata for bbox column is malformed.")
+
+
+def _geopandas_to_arrow(
+    df,
+    index=None,
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=None,
+):
+    """
+    Helper function with main, shared logic for to_parquet/to_feather.
+    """
+    from pyarrow import StructArray
+
+    from geopandas.io._geoarrow import geopandas_to_arrow
+
+    _validate_dataframe(df)
+
+    if schema_version is not None:
+        if geometry_encoding != "WKB" and schema_version != "1.1.0":
+            raise ValueError(
+                "'geoarrow' encoding is only supported with schema version >= 1.1.0"
+            )
+
+    table, geometry_encoding_dict = geopandas_to_arrow(
+        df, geometry_encoding=geometry_encoding, index=index, interleaved=False
+    )
+    geo_metadata = _create_metadata(
+        df,
+        schema_version=schema_version,
+        geometry_encoding=geometry_encoding_dict,
+        write_covering_bbox=write_covering_bbox,
+    )
+
+    if write_covering_bbox:
+        if "bbox" in df.columns:
+            raise ValueError(
+                "An existing column 'bbox' already exists in the dataframe. "
+                "Please rename to write covering bbox."
+            )
+        bounds = df.bounds
+        bbox_array = StructArray.from_arrays(
+            [bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
+            names=["xmin", "ymin", "xmax", "ymax"],
+        )
+        table = table.append_column("bbox", bbox_array)
+
+    # Store geopandas specific file-level metadata
+    # This must be done AFTER creating the table or it is not persisted
+    metadata = table.schema.metadata
+    metadata.update({b"geo": _encode_metadata(geo_metadata)})
+
+    return table.replace_schema_metadata(metadata)
+
+
+def _to_parquet(
+    df,
+    path,
+    index=None,
+    compression="snappy",
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=False,
+    **kwargs,
+):
+    """
+    Write a GeoDataFrame to the Parquet format.
+
+    Any geometry columns present are serialized to WKB format in the file.
+
+    Requires 'pyarrow'.
+
+    This is tracking version 1.0.0 of the GeoParquet specification at:
+    https://github.com/opengeospatial/geoparquet. Writing older versions is
+    supported using the `schema_version` keyword.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
+    geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
+        The encoding to use for the geometry columns. Defaults to "WKB"
+        for maximum interoperability. Specify "geoarrow" to use one of the
+        native GeoArrow-based single-geometry type encodings.
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
+        GeoParquet specification version; if not provided will default to
+        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.
+    **kwargs
+        Additional keyword arguments passed to pyarrow.parquet.write_table().
+    """
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+
+    path = _expand_user(path)
+    table = _geopandas_to_arrow(
+        df,
+        index=index,
+        geometry_encoding=geometry_encoding,
+        schema_version=schema_version,
+        write_covering_bbox=write_covering_bbox,
+    )
+    parquet.write_table(table, path, compression=compression, **kwargs)
+
+
+def _to_feather(df, path, index=None, compression=None, schema_version=None, **kwargs):
+    """
+    Write a GeoDataFrame to the Feather format.
+
+    Any geometry columns present are serialized to WKB format in the file.
+
+    Requires 'pyarrow' >= 0.17.
+
+    This is tracking version 1.0.0 of the GeoParquet specification for
+    the metadata at: https://github.com/opengeospatial/geoparquet. Writing
+    older versions is supported using the `schema_version` keyword.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    compression : {'zstd', 'lz4', 'uncompressed'}, optional
+        Name of the compression to use. Use ``"uncompressed"`` for no
+        compression. By default uses LZ4 if available, otherwise uncompressed.
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
+        GeoParquet specification version for the metadata; if not provided
+        will default to latest supported version.
+    kwargs
+        Additional keyword arguments passed to pyarrow.feather.write_feather().
+    """
+    feather = import_optional_dependency(
+        "pyarrow.feather", extra="pyarrow is required for Feather support."
+    )
+    # TODO move this into `import_optional_dependency`
+    import pyarrow
+
+    if Version(pyarrow.__version__) < Version("0.17.0"):
+        raise ImportError("pyarrow >= 0.17 required for Feather support")
+
+    path = _expand_user(path)
+    table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
+    feather.write_feather(table, path, compression=compression, **kwargs)
+
+
+def _arrow_to_geopandas(table, geo_metadata=None):
+    """
+    Helper function with main, shared logic for read_parquet/read_feather.
+    """
+    if geo_metadata is None:
+        # Note: this path of not passing metadata is also used by dask-geopandas
+        geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
+
+    # Find all geometry columns that were read from the file.  May
+    # be a subset if 'columns' parameter is used.
+    geometry_columns = [
+        col for col in geo_metadata["columns"] if col in table.column_names
+    ]
+    result_column_names = list(table.slice(0, 0).to_pandas().columns)
+    geometry_columns.sort(key=result_column_names.index)
+
+    if not len(geometry_columns):
+        raise ValueError(
+            """No geometry columns are included in the columns read from
+            the Parquet/Feather file.  To read this file without geometry columns,
+            use pandas.read_parquet/read_feather() instead."""
+        )
+
+    geometry = geo_metadata["primary_column"]
+
+    # Missing geometry likely indicates a subset of columns was read;
+    # promote the first available geometry to the primary geometry.
+    if len(geometry_columns) and geometry not in geometry_columns:
+        geometry = geometry_columns[0]
+
+        # if there are multiple non-primary geometry columns, raise a warning
+        if len(geometry_columns) > 1:
+            warnings.warn(
+                "Multiple non-primary geometry columns read from Parquet/Feather "
+                "file. The first column read was promoted to the primary geometry.",
+                stacklevel=3,
+            )
+
+    table_attr = table.drop(geometry_columns)
+    df = table_attr.to_pandas()
+
+    # Convert the WKB columns that are present back to geometry.
+    for col in geometry_columns:
+        col_metadata = geo_metadata["columns"][col]
+        if "crs" in col_metadata:
+            crs = col_metadata["crs"]
+            if isinstance(crs, dict):
+                _remove_id_from_member_of_ensembles(crs)
+        else:
+            # per the GeoParquet spec, missing CRS is to be interpreted as
+            # OGC:CRS84
+            crs = "OGC:CRS84"
+
+        if col_metadata["encoding"] == "WKB":
+            geom_arr = from_wkb(np.array(table[col]), crs=crs)
+        else:
+            from geopandas.io._geoarrow import construct_shapely_array
+
+            geom_arr = from_shapely(
+                construct_shapely_array(
+                    table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
+                ),
+                crs=crs,
+            )
+
+        df.insert(result_column_names.index(col), col, geom_arr)
+
+    return GeoDataFrame(df, geometry=geometry)
+
+
+def _get_filesystem_path(path, filesystem=None, storage_options=None):
+    """
+    Get the filesystem and path for a given filesystem and path.
+
+    If the filesystem is not None then it's just returned as is.
+    """
+    import pyarrow
+
+    if (
+        isinstance(path, str)
+        and storage_options is None
+        and filesystem is None
+        and Version(pyarrow.__version__) >= Version("5.0.0")
+    ):
+        # Use the native pyarrow filesystem if possible.
+        try:
+            from pyarrow.fs import FileSystem
+
+            filesystem, path = FileSystem.from_uri(path)
+        except Exception:
+            # fallback to use get_handle / fsspec for filesystems
+            # that pyarrow doesn't support
+            pass
+
+    if _is_fsspec_url(path) and filesystem is None:
+        fsspec = import_optional_dependency(
+            "fsspec", extra="fsspec is requred for 'storage_options'."
+        )
+        filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
+
+    if filesystem is None and storage_options:
+        raise ValueError(
+            "Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
+        )
+
+    return filesystem, path
+
+
+def _ensure_arrow_fs(filesystem):
+    """
+    Simplified version of pyarrow.fs._ensure_filesystem. This is only needed
+    below because `pyarrow.parquet.read_metadata` does not yet accept a
+    filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719)
+    """
+    from pyarrow import fs
+
+    if isinstance(filesystem, fs.FileSystem):
+        return filesystem
+
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+    except ImportError:
+        pass
+    else:
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            return fs.PyFileSystem(fs.FSSpecHandler(filesystem))
+
+    return filesystem
+
+
+def _validate_and_decode_metadata(metadata):
+    if metadata is None or b"geo" not in metadata:
+        raise ValueError(
+            """Missing geo metadata in Parquet/Feather file.
+            Use pandas.read_parquet/read_feather() instead."""
+        )
+
+    # check for malformed metadata
+    try:
+        decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
+    except (TypeError, json.decoder.JSONDecodeError):
+        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
+
+    _validate_geo_metadata(decoded_geo_metadata)
+    return decoded_geo_metadata
+
+
+def _read_parquet_schema_and_metadata(path, filesystem):
+    """
+    Opening the Parquet file/dataset a first time to get the schema and metadata.
+
+    TODO: we should look into how we can reuse opened dataset for reading the
+    actual data, to avoid discovering the dataset twice (problem right now is
+    that the ParquetDataset interface doesn't allow passing the filters on read)
+
+    """
+    import pyarrow
+    from pyarrow import parquet
+
+    kwargs = {}
+    if Version(pyarrow.__version__) < Version("15.0.0"):
+        kwargs = dict(use_legacy_dataset=False)
+
+    try:
+        schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
+    except Exception:
+        schema = parquet.read_schema(path, filesystem=filesystem)
+
+    metadata = schema.metadata
+
+    # read metadata separately to get the raw Parquet FileMetaData metadata
+    # (pyarrow doesn't properly exposes those in schema.metadata for files
+    # created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
+    if metadata is None or b"geo" not in metadata:
+        try:
+            metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
+        except Exception:
+            pass
+
+    return schema, metadata
+
+
+def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
+    """
+    Load a Parquet object from the file path, returning a GeoDataFrame.
+
+    You can read a subset of columns in the file using the ``columns`` parameter.
+    However, the structure of the returned GeoDataFrame will depend on which
+    columns you read:
+
+    * if no geometry columns are read, this will raise a ``ValueError`` - you
+      should use the pandas `read_parquet` method instead.
+    * if the primary geometry column saved to this file is not included in
+      columns, the first available geometry column will be set as the geometry
+      column of the returned GeoDataFrame.
+
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
+    specification at: https://github.com/opengeospatial/geoparquet
+
+    If 'crs' key is not present in the GeoParquet metadata associated with the
+    Parquet object, it will default to "OGC:CRS84" according to the specification.
+
+    Requires 'pyarrow'.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    columns : list-like of strings, default=None
+        If not None, only these columns will be read from the file.  If
+        the primary geometry column is not included, the first secondary
+        geometry read from the file will be set as the geometry column
+        of the returned GeoDataFrame.  If no geometry columns are present,
+        a ``ValueError`` will be raised.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g. host,
+        port, username, password, etc. For HTTP(S) URLs the key-value pairs are
+        forwarded to urllib as header options. For other URLs (e.g. starting with
+        "s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
+        see fsspec and urllib for more details.
+
+        When no storage options are provided and a filesystem is implemented by
+        both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
+        filesystem is preferred. Provide the instantiated fsspec filesystem using
+        the ``filesystem`` keyword if you wish to use its implementation.
+    bbox : tuple, optional
+        Bounding box to be used to filter selection from geoparquet data. This
+        is only usable if the data was saved with the bbox covering metadata.
+        Input is of the tuple format (xmin, ymin, xmax, ymax).
+
+    **kwargs
+        Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    >>> df = geopandas.read_parquet("data.parquet")  # doctest: +SKIP
+
+    Specifying columns to read:
+
+    >>> df = geopandas.read_parquet(
+    ...     "data.parquet",
+    ...     columns=["geometry", "pop_est"]
+    ... )  # doctest: +SKIP
+    """
+
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
+
+    # TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
+    # adds filesystem as a keyword and match that.
+    filesystem = kwargs.pop("filesystem", None)
+    filesystem, path = _get_filesystem_path(
+        path, filesystem=filesystem, storage_options=storage_options
+    )
+    path = _expand_user(path)
+    schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
+
+    geo_metadata = _validate_and_decode_metadata(metadata)
+
+    bbox_filter = (
+        _get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
+    )
+
+    if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
+
+    # by default, bbox column is not read in, so must specify which
+    # columns are read in if it exists.
+    if not columns and if_bbox_column_exists:
+        columns = _get_non_bbox_columns(schema, geo_metadata)
+
+    # if both bbox and filters kwargs are used, must splice together.
+    if "filters" in kwargs:
+        filters_kwarg = kwargs.pop("filters")
+        filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
+    else:
+        filters = bbox_filter
+
+    kwargs["use_pandas_metadata"] = True
+
+    table = parquet.read_table(
+        path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
+    )
+
+    return _arrow_to_geopandas(table, geo_metadata)
+
+
+def _read_feather(path, columns=None, **kwargs):
+    """
+    Load a Feather object from the file path, returning a GeoDataFrame.
+
+    You can read a subset of columns in the file using the ``columns`` parameter.
+    However, the structure of the returned GeoDataFrame will depend on which
+    columns you read:
+
+    * if no geometry columns are read, this will raise a ``ValueError`` - you
+      should use the pandas `read_feather` method instead.
+    * if the primary geometry column saved to this file is not included in
+      columns, the first available geometry column will be set as the geometry
+      column of the returned GeoDataFrame.
+
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
+    specification at: https://github.com/opengeospatial/geoparquet
+
+    If 'crs' key is not present in the Feather metadata associated with the
+    Parquet object, it will default to "OGC:CRS84" according to the specification.
+
+    Requires 'pyarrow' >= 0.17.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    columns : list-like of strings, default=None
+        If not None, only these columns will be read from the file.  If
+        the primary geometry column is not included, the first secondary
+        geometry read from the file will be set as the geometry column
+        of the returned GeoDataFrame.  If no geometry columns are present,
+        a ``ValueError`` will be raised.
+    **kwargs
+        Any additional kwargs passed to pyarrow.feather.read_table().
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    >>> df = geopandas.read_feather("data.feather")  # doctest: +SKIP
+
+    Specifying columns to read:
+
+    >>> df = geopandas.read_feather(
+    ...     "data.feather",
+    ...     columns=["geometry", "pop_est"]
+    ... )  # doctest: +SKIP
+    """
+
+    feather = import_optional_dependency(
+        "pyarrow.feather", extra="pyarrow is required for Feather support."
+    )
+    # TODO move this into `import_optional_dependency`
+    import pyarrow
+
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
+
+    if Version(pyarrow.__version__) < Version("0.17.0"):
+        raise ImportError("pyarrow >= 0.17 required for Feather support")
+
+    path = _expand_user(path)
+
+    table = feather.read_table(path, columns=columns, **kwargs)
+    return _arrow_to_geopandas(table)
+
+
+def _get_parquet_bbox_filter(geo_metadata, bbox):
+    primary_column = geo_metadata["primary_column"]
+
+    if _check_if_covering_in_geo_metadata(geo_metadata):
+        bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+        return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
+
+    elif geo_metadata["columns"][primary_column]["encoding"] == "point":
+        import pyarrow.compute as pc
+
+        return (
+            (pc.field((primary_column, "x")) >= bbox[0])
+            & (pc.field((primary_column, "x")) <= bbox[2])
+            & (pc.field((primary_column, "y")) >= bbox[1])
+            & (pc.field((primary_column, "y")) <= bbox[3])
+        )
+
+    else:
+        raise ValueError(
+            "Specifying 'bbox' not supported for this Parquet file (it should either "
+            "have a bbox covering column or use 'point' encoding)."
+        )
+
+
+def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
+    import pyarrow.compute as pc
+
+    return ~(
+        (pc.field((bbox_column_name, "xmin")) > bbox[2])
+        | (pc.field((bbox_column_name, "ymin")) > bbox[3])
+        | (pc.field((bbox_column_name, "xmax")) < bbox[0])
+        | (pc.field((bbox_column_name, "ymax")) < bbox[1])
+    )
+
+
+def _check_if_covering_in_geo_metadata(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return "covering" in geo_metadata["columns"][primary_column].keys()
+
+
+def _get_bbox_encoding_column_name(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
+
+
+def _get_non_bbox_columns(schema, geo_metadata):
+
+    bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+    columns = schema.names
+    if bbox_column_name in columns:
+        columns.remove(bbox_column_name)
+    return columns
+
+
+def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+    if bbox_filter is None:
+        return kwarg_filters
+
+    filters_expression = parquet.filters_to_expression(kwarg_filters)
+    return bbox_filter & filters_expression
@@ -0,0 +1,851 @@
+from __future__ import annotations
+
+import os
+import urllib.request
+import warnings
+from io import IOBase
+from packaging.version import Version
+from pathlib import Path
+
+# Adapted from pandas.io.common
+from urllib.parse import urlparse as parse_url
+from urllib.parse import uses_netloc, uses_params, uses_relative
+
+import numpy as np
+import pandas as pd
+from pandas.api.types import is_integer_dtype
+
+import shapely
+from shapely.geometry import mapping
+from shapely.geometry.base import BaseGeometry
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
+from geopandas.io.util import vsi_path
+
+_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
+_VALID_URLS.discard("")
+# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
+_VALID_URLS.discard("file")
+
+fiona = None
+fiona_env = None
+fiona_import_error = None
+FIONA_GE_19 = False
+
+
+def _import_fiona():
+    global fiona
+    global fiona_env
+    global fiona_import_error
+    global FIONA_GE_19
+
+    if fiona is None:
+        try:
+            import fiona
+
+            # only try to import fiona.Env if the main fiona import succeeded
+            # (otherwise you can get confusing "AttributeError: module 'fiona'
+            # has no attribute '_loading'" / partially initialized module errors)
+            try:
+                from fiona import Env as fiona_env
+            except ImportError:
+                try:
+                    from fiona import drivers as fiona_env
+                except ImportError:
+                    fiona_env = None
+
+            FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
+                "1.9.0"
+            )
+
+        except ImportError as err:
+            fiona = False
+            fiona_import_error = str(err)
+
+
+pyogrio = None
+pyogrio_import_error = None
+
+
+def _import_pyogrio():
+    global pyogrio
+    global pyogrio_import_error
+
+    if pyogrio is None:
+        try:
+            import pyogrio
+
+        except ImportError as err:
+            pyogrio = False
+            pyogrio_import_error = str(err)
+
+
+def _check_fiona(func):
+    if not fiona:
+        raise ImportError(
+            f"the {func} requires the 'fiona' package, but it is not installed or does "
+            f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
+        )
+
+
+def _check_pyogrio(func):
+    if not pyogrio:
+        raise ImportError(
+            f"the {func} requires the 'pyogrio' package, but it is not installed "
+            "or does not import correctly."
+            "\nImporting pyogrio resulted in: {pyogrio_import_error}"
+        )
+
+
+def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
+    if metadata is None:
+        return
+    if driver != "GPKG":
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for the GPKG driver."
+        )
+
+    if engine == "fiona" and not FIONA_GE_19:
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for Fiona >= 1.9."
+        )
+
+
+def _check_engine(engine, func):
+    # if not specified through keyword or option, then default to "pyogrio" if
+    # installed, otherwise try fiona
+    if engine is None:
+        import geopandas
+
+        engine = geopandas.options.io_engine
+
+    if engine is None:
+        _import_pyogrio()
+        if pyogrio:
+            engine = "pyogrio"
+        else:
+            _import_fiona()
+            if fiona:
+                engine = "fiona"
+
+    if engine == "pyogrio":
+        _import_pyogrio()
+        _check_pyogrio(func)
+    elif engine == "fiona":
+        _import_fiona()
+        _check_fiona(func)
+    elif engine is None:
+        raise ImportError(
+            f"The {func} requires the 'pyogrio' or 'fiona' package, "
+            "but neither is installed or imports correctly."
+            f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
+            f"\nImporting fiona resulted in: {fiona_import_error}"
+        )
+
+    return engine
+
+
+_EXTENSION_TO_DRIVER = {
+    ".bna": "BNA",
+    ".dxf": "DXF",
+    ".csv": "CSV",
+    ".shp": "ESRI Shapefile",
+    ".dbf": "ESRI Shapefile",
+    ".json": "GeoJSON",
+    ".geojson": "GeoJSON",
+    ".geojsonl": "GeoJSONSeq",
+    ".geojsons": "GeoJSONSeq",
+    ".gpkg": "GPKG",
+    ".gml": "GML",
+    ".xml": "GML",
+    ".gpx": "GPX",
+    ".gtm": "GPSTrackMaker",
+    ".gtz": "GPSTrackMaker",
+    ".tab": "MapInfo File",
+    ".mif": "MapInfo File",
+    ".mid": "MapInfo File",
+    ".dgn": "DGN",
+    ".fgb": "FlatGeobuf",
+}
+
+
+def _expand_user(path):
+    """Expand paths that use ~."""
+    if isinstance(path, str):
+        path = os.path.expanduser(path)
+    elif isinstance(path, Path):
+        path = path.expanduser()
+    return path
+
+
+def _is_url(url):
+    """Check to see if *url* has a valid protocol."""
+    try:
+        return parse_url(url).scheme in _VALID_URLS
+    except Exception:
+        return False
+
+
+def _read_file(
+    filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
+):
+    """
+    Returns a GeoDataFrame from a file or URL.
+
+    Parameters
+    ----------
+    filename : str, path object or file-like object
+        Either the absolute or relative path to the file or URL to
+        be opened, or any object with a read() method (such as an open file
+        or StringIO)
+    bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
+        Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely
+        geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries
+        or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the
+        dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of
+        shapely geometry objects. Cannot be used with mask.
+    mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
+        Filter for features that intersect with the given dict-like geojson
+        geometry, GeoSeries, GeoDataFrame or shapely geometry.
+        CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
+        Cannot be used with bbox. If multiple geometries are passed, this will
+        first union all geometries, which may be computationally expensive.
+    columns : list, optional
+        List of column names to import from the data source. Column names
+        must exactly match the names in the data source. To avoid reading
+        any columns (besides the geometry column), pass an empty list-like.
+        By default reads all columns.
+    rows : int or slice, default None
+        Load in specific rows by passing an integer (first `n` rows) or a
+        slice() object.
+    engine : str,  "pyogrio" or "fiona"
+        The underlying library that is used to read the file. Currently, the
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
+    **kwargs :
+        Keyword args to be passed to the engine, and can be used to write
+        to multi-layer data, store data within archives (zip files), etc.
+        In case of the "pyogrio" engine, the keyword arguments are passed to
+        `pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
+        arguments are passed to fiona.open`. For more information on possible
+        keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
+
+
+    Examples
+    --------
+    >>> df = geopandas.read_file("nybb.shp")  # doctest: +SKIP
+
+    Specifying layer of GPKG:
+
+    >>> df = geopandas.read_file("file.gpkg", layer='cities')  # doctest: +SKIP
+
+    Reading only first 10 rows:
+
+    >>> df = geopandas.read_file("nybb.shp", rows=10)  # doctest: +SKIP
+
+    Reading only geometries intersecting ``mask``:
+
+    >>> df = geopandas.read_file("nybb.shp", mask=polygon)  # doctest: +SKIP
+
+    Reading only geometries intersecting ``bbox``:
+
+    >>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20))  # doctest: +SKIP
+
+    Returns
+    -------
+    :obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
+        If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.
+
+    Notes
+    -----
+    The format drivers will attempt to detect the encoding of your data, but
+    may fail. In this case, the proper encoding can be specified explicitly
+    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
+
+    When specifying a URL, geopandas will check if the server supports reading
+    partial data and in that case pass the URL as is to the underlying engine,
+    which will then use the network file system handler of GDAL to read from
+    the URL. Otherwise geopandas will download the data from the URL and pass
+    all data in-memory to the underlying engine.
+    If you need more control over how the URL is read, you can specify the
+    GDAL virtual filesystem manually (e.g. ``/vsicurl/https://...``). See the
+    GDAL documentation on filesystems for more details
+    (https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
+
+    """
+    engine = _check_engine(engine, "'read_file' function")
+
+    filename = _expand_user(filename)
+
+    from_bytes = False
+    if _is_url(filename):
+        # if it is a url that supports random access -> pass through to
+        # pyogrio/fiona as is (to support downloading only part of the file)
+        # otherwise still download manually because pyogrio/fiona don't support
+        # all types of urls (https://github.com/geopandas/geopandas/issues/2908)
+        with urllib.request.urlopen(filename) as response:
+            if not response.headers.get("Accept-Ranges") == "bytes":
+                filename = response.read()
+                from_bytes = True
+
+    if engine == "pyogrio":
+        return _read_file_pyogrio(
+            filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
+        )
+
+    elif engine == "fiona":
+        if pd.api.types.is_file_like(filename):
+            data = filename.read()
+            path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data
+            from_bytes = True
+        else:
+            path_or_bytes = filename
+
+        return _read_file_fiona(
+            path_or_bytes,
+            from_bytes,
+            bbox=bbox,
+            mask=mask,
+            columns=columns,
+            rows=rows,
+            **kwargs,
+        )
+
+    else:
+        raise ValueError(f"unknown engine '{engine}'")
+
+
+def _read_file_fiona(
+    path_or_bytes,
+    from_bytes,
+    bbox=None,
+    mask=None,
+    columns=None,
+    rows=None,
+    where=None,
+    **kwargs,
+):
+    if where is not None and not FIONA_GE_19:
+        raise NotImplementedError("where requires fiona 1.9+")
+
+    if columns is not None:
+        if "include_fields" in kwargs:
+            raise ValueError(
+                "Cannot specify both 'include_fields' and 'columns' keywords"
+            )
+        if not FIONA_GE_19:
+            raise NotImplementedError("'columns' keyword requires fiona 1.9+")
+        kwargs["include_fields"] = columns
+    elif "include_fields" in kwargs:
+        # alias to columns, as this variable is used below to specify column order
+        # in the dataframe creation
+        columns = kwargs["include_fields"]
+
+    if not from_bytes:
+        # Opening a file via URL or file-like-object above automatically detects a
+        # zipped file. In order to match that behavior, attempt to add a zip scheme
+        # if missing.
+        path_or_bytes = vsi_path(str(path_or_bytes))
+
+    if from_bytes:
+        reader = fiona.BytesCollection
+    else:
+        reader = fiona.open
+
+    with fiona_env():
+        with reader(path_or_bytes, **kwargs) as features:
+            crs = features.crs_wkt
+            # attempt to get EPSG code
+            try:
+                # fiona 1.9+
+                epsg = features.crs.to_epsg(confidence_threshold=100)
+                if epsg is not None:
+                    crs = epsg
+            except AttributeError:
+                # fiona <= 1.8
+                try:
+                    crs = features.crs["init"]
+                except (TypeError, KeyError):
+                    pass
+
+            # handle loading the bounding box
+            if bbox is not None:
+                if isinstance(bbox, (GeoDataFrame, GeoSeries)):
+                    bbox = tuple(bbox.to_crs(crs).total_bounds)
+                elif isinstance(bbox, BaseGeometry):
+                    bbox = bbox.bounds
+                assert len(bbox) == 4
+            # handle loading the mask
+            elif isinstance(mask, (GeoDataFrame, GeoSeries)):
+                mask = mapping(mask.to_crs(crs).union_all())
+            elif isinstance(mask, BaseGeometry):
+                mask = mapping(mask)
+
+            filters = {}
+            if bbox is not None:
+                filters["bbox"] = bbox
+            if mask is not None:
+                filters["mask"] = mask
+            if where is not None:
+                filters["where"] = where
+
+            # setup the data loading filter
+            if rows is not None:
+                if isinstance(rows, int):
+                    rows = slice(rows)
+                elif not isinstance(rows, slice):
+                    raise TypeError("'rows' must be an integer or a slice.")
+                f_filt = features.filter(rows.start, rows.stop, rows.step, **filters)
+            elif filters:
+                f_filt = features.filter(**filters)
+            else:
+                f_filt = features
+            # get list of columns
+            columns = columns or list(features.schema["properties"])
+            datetime_fields = [
+                k for (k, v) in features.schema["properties"].items() if v == "datetime"
+            ]
+            if (
+                kwargs.get("ignore_geometry", False)
+                or features.schema["geometry"] == "None"
+            ):
+                df = pd.DataFrame(
+                    [record["properties"] for record in f_filt], columns=columns
+                )
+            else:
+                df = GeoDataFrame.from_features(
+                    f_filt, crs=crs, columns=columns + ["geometry"]
+                )
+            for k in datetime_fields:
+                as_dt = None
+                # plain try catch for when pandas will raise in the future
+                # TODO we can tighten the exception type in future when it does
+                try:
+                    with warnings.catch_warnings():
+                        # pandas 2.x does not yet enforce this behaviour but raises a
+                        # warning  -> we want to to suppress this warning for our users,
+                        # and do this by turning it into an error so we take the
+                        # `except` code path to try again with utc=True
+                        warnings.filterwarnings(
+                            "error",
+                            "In a future version of pandas, parsing datetimes with "
+                            "mixed time zones will raise an error",
+                            FutureWarning,
+                        )
+                        as_dt = pd.to_datetime(df[k])
+                except Exception:
+                    pass
+                if as_dt is None or as_dt.dtype == "object":
+                    # if to_datetime failed, try again for mixed timezone offsets
+                    # This can still fail if there are invalid datetimes
+                    try:
+                        as_dt = pd.to_datetime(df[k], utc=True)
+                    except Exception:
+                        pass
+                # if to_datetime succeeded, round datetimes as
+                # fiona only supports up to ms precision (any microseconds are
+                # floating point rounding error)
+                if as_dt is not None and not (as_dt.dtype == "object"):
+                    if PANDAS_GE_20:
+                        df[k] = as_dt.dt.as_unit("ms")
+                    else:
+                        df[k] = as_dt.dt.round(freq="ms")
+            return df
+
+
+def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs):
+    import pyogrio
+
+    if rows is not None:
+        if isinstance(rows, int):
+            kwargs["max_features"] = rows
+        elif isinstance(rows, slice):
+            if rows.start is not None:
+                if rows.start < 0:
+                    raise ValueError(
+                        "Negative slice start not supported with the 'pyogrio' engine."
+                    )
+                kwargs["skip_features"] = rows.start
+            if rows.stop is not None:
+                kwargs["max_features"] = rows.stop - (rows.start or 0)
+            if rows.step is not None:
+                raise ValueError("slice with step is not supported")
+        else:
+            raise TypeError("'rows' must be an integer or a slice.")
+
+    if bbox is not None and mask is not None:
+        # match error message from Fiona
+        raise ValueError("mask and bbox can not be set together")
+
+    if bbox is not None:
+        if isinstance(bbox, (GeoDataFrame, GeoSeries)):
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            bbox = tuple(bbox.to_crs(crs).total_bounds)
+        elif isinstance(bbox, BaseGeometry):
+            bbox = bbox.bounds
+        if len(bbox) != 4:
+            raise ValueError("'bbox' should be a length-4 tuple.")
+
+    if mask is not None:
+        # NOTE: mask cannot be used at same time as bbox keyword
+        if isinstance(mask, (GeoDataFrame, GeoSeries)):
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
+        elif isinstance(mask, BaseGeometry):
+            mask = shapely.unary_union(mask)
+        elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
+            # convert GeoJSON to shapely geometry
+            mask = shapely.geometry.shape(mask)
+
+        kwargs["mask"] = mask
+
+    if kwargs.pop("ignore_geometry", False):
+        kwargs["read_geometry"] = False
+
+    # translate `ignore_fields`/`include_fields` keyword for back compat with fiona
+    if "ignore_fields" in kwargs and "include_fields" in kwargs:
+        raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
+    elif "ignore_fields" in kwargs:
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'ignore_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        ignore_fields = kwargs.pop("ignore_fields")
+        fields = pyogrio.read_info(path_or_bytes)["fields"]
+        include_fields = [col for col in fields if col not in ignore_fields]
+        kwargs["columns"] = include_fields
+    elif "include_fields" in kwargs:
+        # translate `include_fields` keyword for back compat with fiona engine
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'include_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        kwargs["columns"] = kwargs.pop("include_fields")
+
+    return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
+
+
+def _detect_driver(path):
+    """
+    Attempt to auto-detect driver based on the extension
+    """
+    try:
+        # in case the path is a file handle
+        path = path.name
+    except AttributeError:
+        pass
+    try:
+        return _EXTENSION_TO_DRIVER[Path(path).suffix.lower()]
+    except KeyError:
+        # Assume it is a shapefile folder for now. In the future,
+        # will likely raise an exception when the expected
+        # folder writing behavior is more clearly defined.
+        return "ESRI Shapefile"
+
+
+def _to_file(
+    df,
+    filename,
+    driver=None,
+    schema=None,
+    index=None,
+    mode="w",
+    crs=None,
+    engine=None,
+    metadata=None,
+    **kwargs,
+):
+    """
+    Write this GeoDataFrame to an OGR data source
+
+    A dictionary of supported OGR providers is available via:
+
+    >>> import pyogrio
+    >>> pyogrio.list_drivers()  # doctest: +SKIP
+
+    Parameters
+    ----------
+    df : GeoDataFrame to be written
+    filename : string
+        File path or file handle to write to. The path may specify a
+        GDAL VSI scheme.
+    driver : string, default None
+        The OGR format driver used to write the vector file.
+        If not specified, it attempts to infer it from the file extension.
+        If no extension is specified, it saves ESRI Shapefile to a folder.
+    schema : dict, default None
+        If specified, the schema dictionary is passed to Fiona to
+        better control how the file is written. If None, GeoPandas
+        will determine the schema based on each column's dtype.
+        Not supported for the "pyogrio" engine.
+    index : bool, default None
+        If True, write index into one or more columns (for MultiIndex).
+        Default None writes the index into one or more columns only if
+        the index is named, is a MultiIndex, or has a non-integer data
+        type. If False, no index is written.
+
+        .. versionadded:: 0.7
+            Previously the index was not written.
+    mode : string, default 'w'
+        The write mode, 'w' to overwrite the existing file and 'a' to append;
+        when using the pyogrio engine, you can also pass ``append=True``.
+        Not all drivers support appending. For the fiona engine, the drivers
+        that support appending are listed in fiona.supported_drivers or
+        https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py.
+        For the pyogrio engine, you should be able to use any driver that
+        is available in your installation of GDAL that supports append
+        capability; see the specific driver entry at
+        https://gdal.org/drivers/vector/index.html for more information.
+    crs : pyproj.CRS, default None
+        If specified, the CRS is passed to Fiona to
+        better control how the file is written. If None, GeoPandas
+        will determine the crs based on crs df attribute.
+        The value can be anything accepted
+        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
+        such as an authority string (eg "EPSG:4326") or a WKT string.
+    engine : str,  "pyogrio" or "fiona"
+        The underlying library that is used to read the file. Currently, the
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
+    metadata : dict[str, str], default None
+        Optional metadata to be stored in the file. Keys and values must be
+        strings. Only supported for the "GPKG" driver
+        (requires Fiona >= 1.9 or pyogrio >= 0.6).
+    **kwargs :
+        Keyword args to be passed to the engine, and can be used to write
+        to multi-layer data, store data within archives (zip files), etc.
+        In case of the "fiona" engine, the keyword arguments are passed to
+        fiona.open`. For more information on possible keywords, type:
+        ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
+        the keyword arguments are passed to `pyogrio.write_dataframe`.
+
+    Notes
+    -----
+    The format drivers will attempt to detect the encoding of your data, but
+    may fail. In this case, the proper encoding can be specified explicitly
+    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
+    """
+    engine = _check_engine(engine, "'to_file' method")
+
+    filename = _expand_user(filename)
+
+    if index is None:
+        # Determine if index attribute(s) should be saved to file
+        # (only if they are named or are non-integer)
+        index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype)
+    if index:
+        df = df.reset_index(drop=False)
+
+    if driver is None:
+        driver = _detect_driver(filename)
+
+    if driver == "ESRI Shapefile" and any(len(c) > 10 for c in df.columns.tolist()):
+        warnings.warn(
+            "Column names longer than 10 characters will be truncated when saved to "
+            "ESRI Shapefile.",
+            stacklevel=3,
+        )
+
+    if (df.dtypes == "geometry").sum() > 1:
+        raise ValueError(
+            "GeoDataFrame contains multiple geometry columns but GeoDataFrame.to_file "
+            "supports only a single geometry column. Use a GeoDataFrame.to_parquet or "
+            "GeoDataFrame.to_feather, drop additional geometry columns or convert them "
+            "to a supported format like a well-known text (WKT) using "
+            "`GeoSeries.to_wkt()`.",
+        )
+    _check_metadata_supported(metadata, engine, driver)
+
+    if mode not in ("w", "a"):
+        raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
+
+    if engine == "pyogrio":
+        _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
+    elif engine == "fiona":
+        _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
+    else:
+        raise ValueError(f"unknown engine '{engine}'")
+
+
+def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
+    if not HAS_PYPROJ and crs:
+        raise ImportError(
+            "The 'pyproj' package is required to write a file with a CRS, but it is not"
+            " installed or does not import correctly."
+        )
+
+    if schema is None:
+        schema = infer_schema(df)
+
+    if crs:
+        from pyproj import CRS
+
+        crs = CRS.from_user_input(crs)
+    else:
+        crs = df.crs
+
+    with fiona_env():
+        crs_wkt = None
+        try:
+            gdal_version = Version(
+                fiona.env.get_gdal_release_name().strip("e")
+            )  # GH3147
+        except (AttributeError, ValueError):
+            gdal_version = Version("2.0.0")  # just assume it is not the latest
+        if gdal_version >= Version("3.0.0") and crs:
+            crs_wkt = crs.to_wkt()
+        elif crs:
+            crs_wkt = crs.to_wkt("WKT1_GDAL")
+        with fiona.open(
+            filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
+        ) as colxn:
+            if metadata is not None:
+                colxn.update_tags(metadata)
+            colxn.writerecords(df.iterfeatures())
+
+
+def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
+    import pyogrio
+
+    if schema is not None:
+        raise ValueError(
+            "The 'schema' argument is not supported with the 'pyogrio' engine."
+        )
+
+    if mode == "a":
+        kwargs["append"] = True
+
+    if crs is not None:
+        raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
+
+    # for the fiona engine, this check is done in gdf.iterfeatures()
+    if not df.columns.is_unique:
+        raise ValueError("GeoDataFrame cannot contain duplicated column names.")
+
+    pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
+
+
+def infer_schema(df):
+    from collections import OrderedDict
+
+    # TODO: test pandas string type and boolean type once released
+    types = {
+        "Int32": "int32",
+        "int32": "int32",
+        "Int64": "int",
+        "string": "str",
+        "boolean": "bool",
+    }
+
+    def convert_type(column, in_type):
+        if in_type == object:
+            return "str"
+        if in_type.name.startswith("datetime64"):
+            # numpy datetime type regardless of frequency
+            return "datetime"
+        if str(in_type) in types:
+            out_type = types[str(in_type)]
+        else:
+            out_type = type(np.zeros(1, in_type).item()).__name__
+        if out_type == "long":
+            out_type = "int"
+        return out_type
+
+    properties = OrderedDict(
+        [
+            (col, convert_type(col, _type))
+            for col, _type in zip(df.columns, df.dtypes)
+            if col != df._geometry_column_name
+        ]
+    )
+
+    if df.empty:
+        warnings.warn(
+            "You are attempting to write an empty DataFrame to file. "
+            "For some drivers, this operation may fail.",
+            UserWarning,
+            stacklevel=3,
+        )
+
+    # Since https://github.com/Toblerity/Fiona/issues/446 resolution,
+    # Fiona allows a list of geometry types
+    geom_types = _geometry_types(df)
+
+    schema = {"geometry": geom_types, "properties": properties}
+
+    return schema
+
+
+def _geometry_types(df):
+    """
+    Determine the geometry types in the GeoDataFrame for the schema.
+    """
+    geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
+    geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
+    geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
+    geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
+    geom_types = geom_types_3D + geom_types_2D
+
+    if len(geom_types) == 0:
+        # Default geometry type supported by Fiona
+        # (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
+        return "Unknown"
+
+    if len(geom_types) == 1:
+        geom_types = geom_types[0]
+
+    return geom_types
+
+
+def _list_layers(filename) -> pd.DataFrame:
+    """List layers available in a file.
+
+    Provides an overview of layers available in a file or URL together with their
+    geometry types. When supported by the data source, this includes both spatial and
+    non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
+    column being ``None``. GeoPandas will not read such layers but they can be read into
+    a pd.DataFrame using :func:`pyogrio.read_dataframe`.
+
+    Parameters
+    ----------
+    filename : str, path object or file-like object
+        Either the absolute or relative path to the file or URL to
+        be opened, or any object with a read() method (such as an open file
+        or StringIO)
+
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame with columns "name" and "geometry_type" and one row per layer.
+    """
+    _import_pyogrio()
+    _check_pyogrio("list_layers")
+
+    import pyogrio
+
+    return pd.DataFrame(
+        pyogrio.list_layers(filename), columns=["name", "geometry_type"]
+    )
@@ -0,0 +1,473 @@
+import warnings
+from contextlib import contextmanager
+from functools import lru_cache
+
+import pandas as pd
+
+import shapely
+import shapely.wkb
+
+from geopandas import GeoDataFrame
+
+
+@contextmanager
+def _get_conn(conn_or_engine):
+    """
+    Yield a connection within a transaction context.
+
+    Engine.begin() returns a Connection with an implicit Transaction while
+    Connection.begin() returns the Transaction. This helper will always return a
+    Connection with an implicit (possibly nested) Transaction.
+
+    Parameters
+    ----------
+    conn_or_engine : Connection or Engine
+        A sqlalchemy Connection or Engine instance
+    Returns
+    -------
+    Connection
+    """
+    from sqlalchemy.engine.base import Connection, Engine
+
+    if isinstance(conn_or_engine, Connection):
+        if not conn_or_engine.in_transaction():
+            with conn_or_engine.begin():
+                yield conn_or_engine
+        else:
+            yield conn_or_engine
+    elif isinstance(conn_or_engine, Engine):
+        with conn_or_engine.begin() as conn:
+            yield conn
+    else:
+        raise ValueError(f"Unknown Connectable: {conn_or_engine}")
+
+
+def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
+    """
+    Transforms a pandas DataFrame into a GeoDataFrame.
+    The column 'geom_col' must be a geometry column in WKB representation.
+    To be used to convert df based on pd.read_sql to gdf.
+    Parameters
+    ----------
+    df : DataFrame
+        pandas DataFrame with geometry column in WKB representation.
+    geom_col : string, default 'geom'
+        column name to convert to shapely geometries
+    crs : pyproj.CRS, optional
+        CRS to use for the returned GeoDataFrame. The value can be anything accepted
+        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
+        such as an authority string (eg "EPSG:4326") or a WKT string.
+        If not set, tries to determine CRS from the SRID associated with the
+        first geometry in the database, and assigns that to all geometries.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the database to query.
+    Returns
+    -------
+    GeoDataFrame
+    """
+
+    if geom_col not in df:
+        raise ValueError("Query missing geometry column '{}'".format(geom_col))
+
+    if df.columns.to_list().count(geom_col) > 1:
+        raise ValueError(
+            f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
+            "one geometry column is allowed."
+        )
+
+    geoms = df[geom_col].dropna()
+
+    if not geoms.empty:
+        load_geom_bytes = shapely.wkb.loads
+        """Load from Python 3 binary."""
+
+        def load_geom_text(x):
+            """Load from binary encoded as text."""
+            return shapely.wkb.loads(str(x), hex=True)
+
+        if isinstance(geoms.iat[0], bytes):
+            load_geom = load_geom_bytes
+        else:
+            load_geom = load_geom_text
+
+        df[geom_col] = geoms = geoms.apply(load_geom)
+        if crs is None:
+            srid = shapely.get_srid(geoms.iat[0])
+            # if no defined SRID in geodatabase, returns SRID of 0
+            if srid != 0:
+                try:
+                    spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
+                except pd.errors.DatabaseError:
+                    warning_msg = (
+                        f"Could not find the spatial reference system table "
+                        f"(spatial_ref_sys) in PostGIS."
+                        f"Trying epsg:{srid} as a fallback."
+                    )
+                    warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                    crs = "epsg:{}".format(srid)
+                else:
+                    if not spatial_ref_sys_df.empty:
+                        auth_name = spatial_ref_sys_df["auth_name"].item()
+                        crs = f"{auth_name}:{srid}"
+                    else:
+                        warning_msg = (
+                            f"Could not find srid {srid} in the "
+                            f"spatial_ref_sys table. "
+                            f"Trying epsg:{srid} as a fallback."
+                        )
+                        warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                        crs = "epsg:{}".format(srid)
+
+    return GeoDataFrame(df, crs=crs, geometry=geom_col)
+
+
+def _read_postgis(
+    sql,
+    con,
+    geom_col="geom",
+    crs=None,
+    index_col=None,
+    coerce_float=True,
+    parse_dates=None,
+    params=None,
+    chunksize=None,
+):
+    """
+    Returns a GeoDataFrame corresponding to the result of the query
+    string, which must contain a geometry column in WKB representation.
+
+    It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
+    Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
+
+    Parameters
+    ----------
+    sql : string
+        SQL query to execute in selecting entries from database, or name
+        of the table to read from the database.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the database to query.
+    geom_col : string, default 'geom'
+        column name to convert to shapely geometries
+    crs : dict or str, optional
+        CRS to use for the returned GeoDataFrame; if not set, tries to
+        determine CRS from the SRID associated with the first geometry in
+        the database, and assigns that to all geometries.
+    chunksize : int, default None
+        If specified, return an iterator where chunksize is the number of rows to
+        include in each chunk.
+
+    See the documentation for pandas.read_sql for further explanation
+    of the following parameters:
+    index_col, coerce_float, parse_dates, params, chunksize
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    PostGIS
+
+    >>> from sqlalchemy import create_engine  # doctest: +SKIP
+    >>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
+    >>> con = create_engine(db_connection_url)  # doctest: +SKIP
+    >>> sql = "SELECT geom, highway FROM roads"
+    >>> df = geopandas.read_postgis(sql, con)  # doctest: +SKIP
+
+    SpatiaLite
+
+    >>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
+    >>> df = geopandas.read_postgis(sql, con)  # doctest: +SKIP
+    """
+
+    if chunksize is None:
+        # read all in one chunk and return a single GeoDataFrame
+        df = pd.read_sql(
+            sql,
+            con,
+            index_col=index_col,
+            coerce_float=coerce_float,
+            parse_dates=parse_dates,
+            params=params,
+            chunksize=chunksize,
+        )
+        return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
+
+    else:
+        # read data in chunks and return a generator
+        df_generator = pd.read_sql(
+            sql,
+            con,
+            index_col=index_col,
+            coerce_float=coerce_float,
+            parse_dates=parse_dates,
+            params=params,
+            chunksize=chunksize,
+        )
+        return (
+            _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
+        )
+
+
+def _get_geometry_type(gdf):
+    """
+    Get basic geometry type of a GeoDataFrame. See more info from:
+    https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
+
+    Following rules apply:
+     - if geometries all share the same geometry-type,
+       geometries are inserted with the given GeometryType with following types:
+        - Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
+          GeometryCollection.
+        - LinearRing geometries will be converted into LineString -objects.
+     - in all other cases, geometries will be inserted with type GEOMETRY:
+        - a mix of Polygons and MultiPolygons in GeoSeries
+        - a mix of Points and LineStrings in GeoSeries
+        - geometry is of type GeometryCollection,
+          such as GeometryCollection([Point, LineStrings])
+     - if any of the geometries has Z-coordinate, all records will
+       be written with 3D.
+    """
+    geom_types = list(gdf.geometry.geom_type.unique())
+    has_curve = False
+
+    for gt in geom_types:
+        if gt is None:
+            continue
+        elif "LinearRing" in gt:
+            has_curve = True
+
+    if len(geom_types) == 1:
+        if has_curve:
+            target_geom_type = "LINESTRING"
+        else:
+            if geom_types[0] is None:
+                raise ValueError("No valid geometries in the data.")
+            else:
+                target_geom_type = geom_types[0].upper()
+    else:
+        target_geom_type = "GEOMETRY"
+
+    # Check for 3D-coordinates
+    if any(gdf.geometry.has_z):
+        target_geom_type += "Z"
+
+    return target_geom_type, has_curve
+
+
+def _get_srid_from_crs(gdf):
+    """
+    Get EPSG code from CRS if available. If not, return 0.
+    """
+
+    # Use geoalchemy2 default for srid
+    # Note: undefined srid in PostGIS is 0
+    srid = None
+    warning_msg = (
+        "Could not parse CRS from the GeoDataFrame. "
+        "Inserting data without defined CRS."
+    )
+    if gdf.crs is not None:
+        try:
+            for confidence in (100, 70, 25):
+                srid = gdf.crs.to_epsg(min_confidence=confidence)
+                if srid is not None:
+                    break
+                auth_srid = gdf.crs.to_authority(
+                    auth_name="ESRI", min_confidence=confidence
+                )
+                if auth_srid is not None:
+                    srid = int(auth_srid[1])
+                    break
+        except Exception:
+            warnings.warn(warning_msg, UserWarning, stacklevel=2)
+
+    if srid is None:
+        srid = 0
+        warnings.warn(warning_msg, UserWarning, stacklevel=2)
+
+    return srid
+
+
+def _convert_linearring_to_linestring(gdf, geom_name):
+    from shapely.geometry import LineString
+
+    # Todo: Use shapely function once it's implemented:
+    # https://github.com/shapely/shapely/issues/1617
+
+    mask = gdf.geom_type == "LinearRing"
+    gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
+        lambda geom: LineString(geom)
+    )
+    return gdf
+
+
+def _convert_to_ewkb(gdf, geom_name, srid):
+    """Convert geometries to ewkb."""
+    geoms = shapely.to_wkb(
+        shapely.set_srid(gdf[geom_name].values._data, srid=srid),
+        hex=True,
+        include_srid=True,
+    )
+
+    # The gdf will warn that the geometry column doesn't hold in-memory geometries
+    # now that they are EWKB, so convert back to a regular dataframe to avoid warning
+    # the user that the dtypes are unexpected.
+    df = pd.DataFrame(gdf, copy=False)
+    df[geom_name] = geoms
+    return df
+
+
+def _psql_insert_copy(tbl, conn, keys, data_iter):
+    import csv
+    import io
+
+    s_buf = io.StringIO()
+    writer = csv.writer(s_buf)
+    writer.writerows(data_iter)
+    s_buf.seek(0)
+
+    columns = ", ".join('"{}"'.format(k) for k in keys)
+
+    dbapi_conn = conn.connection
+    sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
+        tbl.table.schema, tbl.table.name, columns
+    )
+    with dbapi_conn.cursor() as cur:
+        # Use psycopg method if it's available
+        if hasattr(cur, "copy") and callable(cur.copy):
+            with cur.copy(sql) as copy:
+                copy.write(s_buf.read())
+        else:  # otherwise use psycopg2 method
+            cur.copy_expert(sql, s_buf)
+
+
+def _write_postgis(
+    gdf,
+    name,
+    con,
+    schema=None,
+    if_exists="fail",
+    index=False,
+    index_label=None,
+    chunksize=None,
+    dtype=None,
+):
+    """
+    Upload GeoDataFrame into PostGIS database.
+
+    This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
+    Python driver (e.g. psycopg2) to be installed.
+
+    Parameters
+    ----------
+    name : str
+        Name of the target table.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the PostGIS database.
+    if_exists : {'fail', 'replace', 'append'}, default 'fail'
+        How to behave if the table already exists:
+
+        - fail: Raise a ValueError.
+        - replace: Drop the table before inserting new values.
+        - append: Insert new values to the existing table.
+    schema : string, optional
+        Specify the schema. If None, use default schema: 'public'.
+    index : bool, default True
+        Write DataFrame index as a column.
+        Uses *index_label* as the column name in the table.
+    index_label : string or sequence, default None
+        Column label for index column(s).
+        If None is given (default) and index is True,
+        then the index names are used.
+    chunksize : int, optional
+        Rows will be written in batches of this size at a time.
+        By default, all rows will be written at once.
+    dtype : dict of column name to SQL type, default None
+        Specifying the datatype for columns.
+        The keys should be the column names and the values
+        should be the SQLAlchemy types.
+
+    Examples
+    --------
+
+    >>> from sqlalchemy import create_engine  # doctest: +SKIP
+    >>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
+/mydatabase";)  # doctest: +SKIP
+    >>> gdf.to_postgis("my_table", engine)  # doctest: +SKIP
+    """
+    try:
+        from geoalchemy2 import Geometry
+        from sqlalchemy import text
+    except ImportError:
+        raise ImportError("'to_postgis()' requires geoalchemy2 package.")
+
+    gdf = gdf.copy()
+    geom_name = gdf.geometry.name
+
+    # Get srid
+    srid = _get_srid_from_crs(gdf)
+
+    # Get geometry type and info whether data contains LinearRing.
+    geometry_type, has_curve = _get_geometry_type(gdf)
+
+    # Build dtype with Geometry
+    if dtype is not None:
+        dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
+    else:
+        dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
+
+    # Convert LinearRing geometries to LineString
+    if has_curve:
+        gdf = _convert_linearring_to_linestring(gdf, geom_name)
+
+    # Convert geometries to EWKB
+    gdf = _convert_to_ewkb(gdf, geom_name, srid)
+
+    if schema is not None:
+        schema_name = schema
+    else:
+        schema_name = "public"
+
+    if if_exists == "append":
+        # Check that the geometry srid matches with the current GeoDataFrame
+        with _get_conn(con) as connection:
+            # Only check SRID if table exists
+            if connection.dialect.has_table(connection, name, schema):
+                target_srid = connection.execute(
+                    text(
+                        "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                            schema=schema_name, table=name, geom_col=geom_name
+                        )
+                    )
+                ).fetchone()[0]
+
+                if target_srid != srid:
+                    msg = (
+                        "The CRS of the target table (EPSG:{epsg_t}) differs from the "
+                        "CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
+                            epsg_t=target_srid, epsg_src=srid
+                        )
+                    )
+                    raise ValueError(msg)
+
+    with _get_conn(con) as connection:
+        gdf.to_sql(
+            name,
+            connection,
+            schema=schema_name,
+            if_exists=if_exists,
+            index=index,
+            index_label=index_label,
+            chunksize=chunksize,
+            dtype=dtype,
+            method=_psql_insert_copy,
+        )
+
+
+@lru_cache
+def _get_spatial_ref_sys_df(con, srid):
+    spatial_ref_sys_sql = (
+        f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
+    )
+    return pd.read_sql(spatial_ref_sys_sql, con)
@@ -0,0 +1,100 @@
+"""
+Script to create the data and write legacy storage (pickle) files.
+
+Based on pandas' generate_legacy_storage_files.py script.
+
+To use this script, create an environment for which you want to
+generate pickles, activate the environment, and run this script as:
+
+$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
+    geopandas/geopandas/io/tests/data/pickle/ pickle
+
+This script generates a storage file for the current arch, system,
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of geopandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with master). These are then compared.
+
+"""
+
+import os
+import pickle
+import platform
+import sys
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+
+
+def create_pickle_data():
+    """create the pickle data"""
+
+    # custom geometry column name
+    gdf_the_geom = geopandas.GeoDataFrame(
+        {"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        geometry="the_geom",
+    )
+
+    # with crs
+    gdf_crs = geopandas.GeoDataFrame(
+        {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        crs="EPSG:4326",
+    )
+
+    return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
+
+
+def platform_name():
+    return "_".join(
+        [
+            str(geopandas.__version__),
+            "pd-" + str(pd.__version__),
+            "py-" + str(platform.python_version()),
+            str(platform.machine()),
+            str(platform.system().lower()),
+        ]
+    )
+
+
+def write_legacy_pickles(output_dir):
+    print(
+        "This script generates a storage file for the current arch, system, "
+        "and python version"
+    )
+    print("geopandas version: {}").format(geopandas.__version__)
+    print("   output dir    : {}".format(output_dir))
+    print("   storage format: pickle")
+
+    pth = "{}.pickle".format(platform_name())
+
+    fh = open(os.path.join(output_dir, pth), "wb")
+    pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
+    fh.close()
+
+    print("created pickle file: {}".format(pth))
+
+
+def main():
+    if len(sys.argv) != 3:
+        sys.exit(
+            "Specify output directory and storage type: generate_legacy_"
+            "storage_files.py <output_dir> <storage_type> "
+        )
+
+    output_dir = str(sys.argv[1])
+    storage_type = str(sys.argv[2])
+
+    if storage_type == "pickle":
+        write_legacy_pickles(output_dir=output_dir)
+    else:
+        sys.exit("storage_type must be one of {'pickle'}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,328 @@
+import os
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+import geopandas
+from geopandas import GeoDataFrame
+
+from .test_file import FIONA_MARK, PYOGRIO_MARK
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+
+
+# *****************************************
+# TEST TOOLING
+
+
+class _ExpectedError:
+    def __init__(self, error_type, error_message_match):
+        self.type = error_type
+        self.match = error_message_match
+
+
+class _ExpectedErrorBuilder:
+    def __init__(self, composite_key):
+        self.composite_key = composite_key
+
+    def to_raise(self, error_type, error_match):
+        _expected_exceptions[self.composite_key] = _ExpectedError(
+            error_type, error_match
+        )
+
+
+def _expect_writing(gdf, ogr_driver):
+    return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
+
+
+def _composite_key(gdf, ogr_driver):
+    return frozenset([id(gdf), ogr_driver])
+
+
+def _expected_error_on(gdf, ogr_driver):
+    composite_key = _composite_key(gdf, ogr_driver)
+    return _expected_exceptions.get(composite_key, None)
+
+
+# *****************************************
+# TEST CASES
+_geodataframes_to_write = []
+_expected_exceptions = {}
+_CRS = "epsg:4326"
+
+# ------------------
+# gdf with Points
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPoint([city_hall_balcony, city_hall_council_chamber]),
+        MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Points and MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
+)
+_geodataframes_to_write.append(gdf)
+# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
+# Polygon/MultiPolygon but does not mention Point/MultiPoint
+# see https://www.gdal.org/drv_shapefile.html
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with LineStrings
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with LineStrings and MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygons
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1]},
+    crs=_CRS,
+    geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygon and MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_boundaries,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and 3D Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometries only
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with all shape types mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with all 2D shape types and 3D Point mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6, 7]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+        point_3D,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+
+@pytest.fixture(params=_geodataframes_to_write)
+def geodataframe(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("GeoJSON", ".geojson"),
+        ("ESRI Shapefile", ".shp"),
+        ("GPKG", ".gpkg"),
+        ("SQLite", ".sqlite"),
+    ]
+)
+def ogr_driver(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        pytest.param("fiona", marks=FIONA_MARK),
+        pytest.param("pyogrio", marks=PYOGRIO_MARK),
+    ]
+)
+def engine(request):
+    return request.param
+
+
+def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
+    driver, ext = ogr_driver
+    output_file = os.path.join(str(tmpdir), "output_file" + ext)
+    write_kwargs = {}
+    if driver == "SQLite":
+        write_kwargs["spatialite"] = True
+
+        # This if statement can be removed once minimal fiona version >= 1.8.20
+        if engine == "fiona":
+            from packaging.version import Version
+
+            import fiona
+
+            if Version(fiona.__version__) < Version("1.8.20"):
+                pytest.skip("SQLite driver only available from version 1.8.20")
+
+        # If only 3D Points, geometry_type needs to be specified for spatialite at the
+        # moment. This if can be removed once the following PR is released:
+        # https://github.com/geopandas/pyogrio/pull/223
+        if (
+            engine == "pyogrio"
+            and len(geodataframe == 2)
+            and geodataframe.geometry[0] is None
+            and geodataframe.geometry[1] is not None
+            and geodataframe.geometry[1].has_z
+        ):
+            write_kwargs["geometry_type"] = "Point Z"
+
+    expected_error = _expected_error_on(geodataframe, driver)
+    if expected_error:
+        with pytest.raises(
+            RuntimeError, match="Failed to write record|Could not add feature to layer"
+        ):
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+    else:
+        if driver == "SQLite" and engine == "pyogrio":
+            try:
+                geodataframe.to_file(
+                    output_file, driver=driver, engine=engine, **write_kwargs
+                )
+            except ValueError as e:
+                if "unrecognized option 'SPATIALITE'" in str(e):
+                    pytest.xfail(
+                        "pyogrio wheels from PyPI do not come with SpatiaLite support. "
+                        f"Error: {e}"
+                    )
+                raise
+        else:
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+
+        reloaded = geopandas.read_file(output_file, engine=engine)
+
+        if driver == "GeoJSON" and engine == "pyogrio":
+            # For GeoJSON files, the int64 column comes back as int32
+            reloaded["a"] = reloaded["a"].astype("int64")
+
+        assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
@@ -0,0 +1,537 @@
+import contextlib
+import json
+import os
+import pathlib
+from packaging.version import Version
+
+import numpy as np
+
+import shapely
+from shapely import MultiPoint, Point, box
+
+from geopandas import GeoDataFrame, GeoSeries
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+pytest.importorskip("pyarrow")
+import pyarrow as pa
+import pyarrow.compute as pc
+from pyarrow import feather
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+def pa_table(table):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return table._pa_table
+    else:
+        return pa.table(table)
+
+
+def pa_array(array):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return array._pa_array
+    else:
+        return pa.array(array)
+
+
+def assert_table_equal(left, right, check_metadata=True):
+    geom_type = left["geometry"].type
+    # in case of Points (directly the inner fixed_size_list or struct type)
+    # -> there are NaNs for empties -> we need to compare them separately
+    # and then fill, because pyarrow.Table.equals considers NaNs as not equal
+    if pa.types.is_fixed_size_list(geom_type):
+        left_values = left["geometry"].chunk(0).values
+        right_values = right["geometry"].chunk(0).values
+        assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
+        left_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
+            type=left["geometry"].type,
+        )
+        right_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
+            type=right["geometry"].type,
+        )
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    elif pa.types.is_struct(geom_type):
+        left_arr = left["geometry"].chunk(0)
+        right_arr = right["geometry"].chunk(0)
+
+        for i in range(left_arr.type.num_fields):
+            assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
+
+        left_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
+                )
+                for i in range(left_arr.type.num_fields)
+            ],
+            fields=list(left["geometry"].type),
+        )
+        right_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
+                )
+                for i in range(right_arr.type.num_fields)
+            ],
+            fields=list(right["geometry"].type),
+        )
+
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    if left.equals(right, check_metadata=check_metadata):
+        return
+
+    if not left.schema.equals(right.schema):
+        raise AssertionError(
+            "Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                left.schema, right.schema
+            )
+        )
+
+    if check_metadata:
+        if not left.schema.equals(right.schema, check_metadata=True):
+            if not left.schema.metadata == right.schema.metadata:
+                raise AssertionError(
+                    "Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                        left.schema.metadata, right.schema.metadata
+                    )
+                )
+        for col in left.schema.names:
+            assert left.schema.field(col).equals(
+                right.schema.field(col), check_metadata=True
+            )
+
+    for col in left.column_names:
+        a_left = pa.concat_arrays(left.column(col).chunks)
+        a_right = pa.concat_arrays(right.column(col).chunks)
+        if not a_left.equals(a_right):
+            raise AssertionError(
+                "Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
+            )
+
+    raise AssertionError("Tables not equal for unknown reason")
+
+
+@pytest.mark.skipif(
+    shapely.geos_version < (3, 9, 0),
+    reason="Checking for empty is buggy with GEOS<3.9",
+)  # an old GEOS is installed in the CI builds with the defaults channel
+@pytest.mark.parametrize(
+    "dim",
+    [
+        "xy",
+        pytest.param(
+            "xyz",
+            marks=pytest.mark.skipif(
+                shapely.geos_version < (3, 10, 0),
+                reason="Cannot write 3D geometries with GEOS<3.10",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+@pytest.mark.parametrize(
+    "geometry_encoding, interleaved",
+    [("WKB", None), ("geoarrow", True), ("geoarrow", False)],
+    ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
+)
+def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    # Read the expected data
+    if geometry_encoding == "WKB":
+        filename = f"example-{suffix}-wkb.arrow"
+    else:
+        filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
+    expected = feather.read_table(base_path / filename)
+
+    # GeoDataFrame -> Arrow Table
+    result = pa_table(
+        df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
+    )
+    # remove the "pandas" metadata
+    result = result.replace_schema_metadata(None)
+
+    mask_nonempty = None
+    if (
+        geometry_encoding == "WKB"
+        and dim == "xyz"
+        and geometry_type.startswith("multi")
+    ):
+        # for collections with z dimension, drop the empties because those don't
+        # roundtrip correctly to WKB
+        # (https://github.com/libgeos/geos/issues/888)
+        mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
+        result = result.filter(mask_nonempty)
+        expected = expected.filter(mask_nonempty)
+
+    assert_table_equal(result, expected)
+
+    # GeoSeries -> Arrow array
+    if geometry_encoding != "WKB" and geometry_type == "point":
+        # for points, we again have to handle NaNs separately, we already did that
+        # for table so let's just skip this part
+        return
+    result_arr = pa_array(
+        df.geometry.to_arrow(
+            geometry_encoding=geometry_encoding, interleaved=interleaved
+        )
+    )
+    if mask_nonempty is not None:
+        result_arr = result_arr.filter(mask_nonempty)
+    assert result_arr.equals(expected["geometry"].chunk(0))
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_multiple_geometry_crs(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+    gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
+
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
+    meta1 = json.loads(
+        result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta1["crs"])["id"]["code"] == 4326
+    meta2 = json.loads(
+        result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta2["crs"])["id"]["code"] == 3857
+
+    roundtripped = GeoDataFrame.from_arrow(result)
+    assert_geodataframe_equal(gdf, roundtripped)
+    assert gdf.geometry.crs == "epsg:4326"
+    assert gdf.geom2.crs == "epsg:3857"
+
+
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_series_name_crs(encoding):
+    pytest.importorskip("pyproj")
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == "geom"
+    assert (
+        field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
+        if encoding == "WKB"
+        else b"geoarrow.polygon"
+    )
+    meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
+    assert json.loads(meta["crs"])["id"]["code"] == 4326
+
+    # ensure it also works without a name
+    gser = GeoSeries([box(0, 0, 10, 10)])
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == ""
+
+
+def test_geoarrow_unsupported_encoding():
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.to_arrow(geometry_encoding="invalid")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.geometry.to_arrow(geometry_encoding="invalid")
+
+
+def test_geoarrow_mixed_geometry_types():
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
+        crs="epsg:4326",
+    )
+
+    with pytest.raises(ValueError, match="Geometry type combination is not supported"):
+        gdf.to_arrow(geometry_encoding="geoarrow")
+
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
+        crs="epsg:4326",
+    )
+    result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert (
+        result.schema.field("geometry").metadata[b"ARROW:extension:name"]
+        == b"geoarrow.multipoint"
+    )
+
+
+@pytest.mark.parametrize("geom_type", ["point", "polygon"])
+@pytest.mark.parametrize(
+    "encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
+)
+def test_geoarrow_missing(encoding, interleaved, geom_type):
+    # dummy test for single geometry type until missing values are included
+    # in the test data for test_geoarrow_export
+    gdf = GeoDataFrame(
+        geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
+        crs="epsg:4326",
+    )
+    if (
+        encoding == "geoarrow"
+        and geom_type == "point"
+        and interleaved
+        and Version(pa.__version__) < Version("15.0.0")
+    ):
+        with pytest.raises(
+            ValueError,
+            match="Converting point geometries with missing values is not supported",
+        ):
+            gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
+        return
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
+    assert result["geometry"].null_count == 1
+    assert result["geometry"].is_null().to_pylist() == [False, True]
+
+
+def test_geoarrow_include_z():
+    gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+    assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
+
+    gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+
+@contextlib.contextmanager
+def with_geoarrow_extension_types():
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    gp.register_extension_types()
+    try:
+        yield
+    finally:
+        gp.unregister_extension_types()
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_export_with_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
+        assert isinstance(result1["geometry"].type, pa.ExtensionType)
+
+        result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        assert isinstance(result2["geometry"].type, pa.ExtensionType)
+
+        result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        assert isinstance(result3["geometry"].type, pa.ExtensionType)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    [
+        "point",
+        "linestring",
+        "polygon",
+        "multipoint",
+        "multilinestring",
+        "multipolygon",
+    ],
+)
+def test_geoarrow_import(geometry_type, dim):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df)
+    df.geometry.crs = None
+
+    table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
+    result1 = GeoDataFrame.from_arrow(table1)
+    assert_geodataframe_equal(result1, df)
+
+    table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
+    result2 = GeoDataFrame.from_arrow(table2)
+    assert_geodataframe_equal(result2, df)
+
+    table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
+    result3 = GeoDataFrame.from_arrow(table3)
+    assert_geodataframe_equal(result3, df)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_import_geometry_column(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
+    gdf["centroid"] = gdf.geometry.centroid
+
+    result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
+    assert_geodataframe_equal(result, gdf)
+    assert result.active_geometry_name == "geometry"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
+    )
+    assert result.active_geometry_name == "centroid"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
+    )
+    assert result.active_geometry_name == "centroid"
+    assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
+
+
+def test_geoarrow_import_missing_geometry():
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+    with pytest.raises(ValueError, match="No geometry column found"):
+        GeoDataFrame.from_arrow(table)
+
+    with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
+        GeoSeries.from_arrow(table["a"].chunk(0))
+
+
+def test_geoarrow_import_capsule_interface():
+    # ensure we can import non-pyarrow object
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+
+    result = GeoDataFrame.from_arrow(gdf.to_arrow())
+    assert_geodataframe_equal(result, gdf)
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_import_from_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    pytest.importorskip("pyproj")
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df, crs="EPSG:3857")
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="WKB"))
+        )
+        assert_geodataframe_equal(result1, df)
+
+        result2 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        )
+        assert_geodataframe_equal(result2, df)
+
+        result3 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        )
+        assert_geodataframe_equal(result3, df)
+
+
+def test_geoarrow_import_geoseries():
+    pytest.importorskip("pyproj")
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
+
+    with with_geoarrow_extension_types():
+        arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        # the name is lost when going through a pyarrow.Array
+        ser.name = "name"
+        arr = gp.array(ser.to_arrow())
+        result = GeoSeries.from_arrow(arr)
+        assert result.name is None
+        # we can specify the name as one of the kwargs
+        result = GeoSeries.from_arrow(arr, name="test")
+        assert_geoseries_equal(result, ser)
+
+
+def test_geoarrow_import_unknown_geoarrow_type():
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+    table = pa_table(gdf.to_arrow())
+    schema = table.schema
+    new_field = schema.field("geometry").with_metadata(
+        {
+            b"ARROW:extension:name": b"geoarrow.unknown",
+            b"ARROW:extension:metadata": b"{}",
+        }
+    )
+
+    new_schema = pa.schema([schema.field(0), new_field])
+    new_table = table.cast(new_schema)
+
+    with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
+        GeoDataFrame.from_arrow(new_table)
@@ -0,0 +1,306 @@
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+from geopandas import GeoDataFrame
+from geopandas.io.file import infer_schema
+
+import pytest
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+linestring_3D = LineString(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5546126200639, 45.5086813829106, 300),
+        (-73.5540185061397, 45.5084409343852, 300),
+    )
+)
+polygon_3D = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5535801792994, 45.5089539203786, 300),
+        (-73.5541107525234, 45.5091983609661, 300),
+    )
+)
+
+
+def test_infer_schema_only_points():
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_points_and_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPoint", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint(
+                [city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
+            )
+        ]
+    )
+
+    assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
+
+
+def test_infer_schema_only_linestrings():
+    df = GeoDataFrame(geometry=city_hall_walls)
+
+    assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
+
+
+def test_infer_schema_linestrings_and_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiLineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
+
+    assert infer_schema(df) == {
+        "geometry": "MultiLineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_polygons():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
+
+    assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_polygons_and_multipolygons():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPolygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipolygons():
+    df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
+
+    assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_multiple_shape_types():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_shape_type():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+            point_3D,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "3D Point",
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Point():
+    df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Point", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Points():
+    df = GeoDataFrame(geometry=[point_3D, point_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_mixed_3D_linestring():
+    df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D LineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_linestrings():
+    df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": "3D LineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Polygon():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Polygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Polygons():
+    df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_2D_point():
+    df = GeoDataFrame(geometry=[None, city_hall_entrance])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_3D_point():
+    df = GeoDataFrame(geometry=[None, point_3D])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_all():
+    df = GeoDataFrame(geometry=[None, None])
+
+    # None geometry type in then replaced by 'Unknown'
+    # (default geometry type supported by Fiona)
+    assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
+
+
+@pytest.mark.parametrize(
+    "array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
+)
+def test_infer_schema_int32(array_data, dtype):
+    int32col = pd.array(data=array_data, dtype=dtype)
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int32_column"] = int32col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int32_column", "int32")]),
+    }
+
+
+def test_infer_schema_int64():
+    int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int64_column"] = int64col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int64_column", "int")]),
+    }
@@ -0,0 +1,56 @@
+"""
+See generate_legacy_storage_files.py for the creation of the legacy files.
+
+"""
+
+import glob
+import os
+import pathlib
+
+import pandas as pd
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+@pytest.fixture(scope="module")
+def current_pickle_data():
+    # our current version pickle data
+    from .generate_legacy_storage_files import create_pickle_data
+
+    return create_pickle_data()
+
+
+files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
+
+
+@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
+def legacy_pickle(request):
+    return request.param
+
+
+@pytest.mark.skip(
+    reason=(
+        "shapely 2.0/pygeos-based unpickling currently only works for "
+        "shapely-2.0/pygeos-written files"
+    ),
+)
+def test_legacy_pickles(current_pickle_data, legacy_pickle):
+    result = pd.read_pickle(legacy_pickle)
+
+    for name, value in result.items():
+        expected = current_pickle_data[name]
+        assert_geodataframe_equal(value, expected)
+
+
+def test_round_trip_current(tmpdir, current_pickle_data):
+    data = current_pickle_data
+
+    for name, value in data.items():
+        path = str(tmpdir / "{}.pickle".format(name))
+        value.to_pickle(path)
+        result = pd.read_pickle(path)
+        assert_geodataframe_equal(result, value)
+        assert isinstance(result.has_sindex, bool)
@@ -0,0 +1,878 @@
+"""
+Tests here include reading/writing to different types of spatial databases.
+The spatial database tests may not work without additional system
+configuration. postGIS tests require a test database to have been setup;
+see geopandas.tests.util for more information.
+"""
+
+import os
+import warnings
+from importlib.util import find_spec
+
+import pandas as pd
+
+import geopandas
+import geopandas._compat as compat
+from geopandas import GeoDataFrame, read_file, read_postgis
+from geopandas._compat import HAS_PYPROJ
+from geopandas.io.sql import _get_conn as get_conn
+from geopandas.io.sql import _write_postgis as write_postgis
+
+import pytest
+from geopandas.tests.util import (
+    create_postgis,
+    create_spatialite,
+    mock,
+    validate_boro_df,
+)
+
+try:
+    from sqlalchemy import text
+except ImportError:
+    # Avoid local imports for text in all sqlalchemy tests
+    # all tests using text use engine_postgis, which ensures sqlalchemy is available
+    text = str
+
+
+@pytest.fixture
+def df_nybb(nybb_filename):
+    df = read_file(nybb_filename)
+    return df
+
+
+def check_available_postgis_drivers() -> list[str]:
+    """Work out which of psycopg2 and psycopg are available.
+    This prevents tests running if the relevant package isn't installed
+    (rather than being skipped, as skips are treated as failures during postgis CI)
+    """
+    drivers = []
+    if find_spec("psycopg"):
+        drivers.append("psycopg")
+    if find_spec("psycopg2"):
+        drivers.append("psycopg2")
+    return drivers
+
+
+POSTGIS_DRIVERS = check_available_postgis_drivers()
+
+
+def prepare_database_credentials() -> dict:
+    """Gather postgres connection credentials from environment variables."""
+    return {
+        "dbname": "test_geopandas",
+        "user": os.environ.get("PGUSER"),
+        "password": os.environ.get("PGPASSWORD"),
+        "host": os.environ.get("PGHOST"),
+        "port": os.environ.get("PGPORT"),
+    }
+
+
+@pytest.fixture()
+def connection_postgis(request):
+    """Create a postgres connection using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
+    psycopg = pytest.importorskip(request.param)
+
+    try:
+        con = psycopg.connect(**prepare_database_credentials())
+    except psycopg.OperationalError:
+        pytest.skip("Cannot connect with postgresql database")
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message="pandas only supports SQLAlchemy connectable.*"
+        )
+        yield con
+    con.close()
+
+
+@pytest.fixture()
+def engine_postgis(request):
+    """
+    Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
+    """
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+    from sqlalchemy.engine.url import URL
+
+    credentials = prepare_database_credentials()
+    try:
+        con = sqlalchemy.create_engine(
+            URL.create(
+                drivername=f"postgresql+{request.param}",
+                username=credentials["user"],
+                database=credentials["dbname"],
+                password=credentials["password"],
+                host=credentials["host"],
+                port=credentials["port"],
+            )
+        )
+        con.connect()
+    except Exception:
+        pytest.skip("Cannot connect with postgresql database")
+
+    yield con
+    con.dispose()
+
+
+@pytest.fixture()
+def connection_spatialite():
+    """
+    Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
+
+    `The sqlite3 module must be built with loadable extension support
+    <https://docs.python.org/3/library/sqlite3.html#f1>`_ and
+    `SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
+    must be available on the system as a SQLite module.
+    Packages available on Anaconda meet requirements.
+
+    Exceptions
+    ----------
+    ``AttributeError`` on missing support for loadable SQLite extensions
+    ``sqlite3.OperationalError`` on missing SpatiaLite
+    """
+    sqlite3 = pytest.importorskip("sqlite3")
+    try:
+        with sqlite3.connect(":memory:") as con:
+            con.enable_load_extension(True)
+            con.load_extension("mod_spatialite")
+            con.execute("SELECT InitSpatialMetaData(TRUE)")
+    except Exception:
+        con.close()
+        pytest.skip("Cannot setup spatialite database")
+
+    yield con
+    con.close()
+
+
+def drop_table_if_exists(conn_or_engine, table):
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+
+    if sqlalchemy.inspect(conn_or_engine).has_table(table):
+        metadata = sqlalchemy.MetaData()
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", message="Did not recognize type 'geometry' of column.*"
+            )
+            metadata.reflect(conn_or_engine)
+        table = metadata.tables.get(table)
+        if table is not None:
+            table.drop(conn_or_engine, checkfirst=True)
+
+
+@pytest.fixture
+def df_mixed_single_and_multi():
+    from shapely.geometry import LineString, MultiLineString, Point
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0), (1, 1)]),
+                MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+                Point(0, 1),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_geom_collection():
+    from shapely.geometry import GeometryCollection, LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (1, 1), (0, 1)]),
+                        LineString([(0, 0), (1, 1)]),
+                        Point(0, 0),
+                    ]
+                )
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_linear_ring():
+    from shapely.geometry import LinearRing
+
+    df = geopandas.GeoDataFrame(
+        {"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
+    )
+    return df
+
+
+@pytest.fixture
+def df_3D_geoms():
+    from shapely.geometry import LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0, 0), (1, 1, 1)]),
+                Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
+                Point(0, 1, 2),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+class TestIO:
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_get_conn(self, engine_postgis):
+        Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
+
+        engine = engine_postgis
+        with get_conn(engine) as output:
+            assert isinstance(output, Connection)
+        with engine.connect() as conn:
+            with get_conn(conn) as output:
+                assert isinstance(output, Connection)
+        with pytest.raises(ValueError):
+            with get_conn(object()):
+                pass
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
+        """Tests that a SELECT {geom} AS {some_other_geom} works."""
+        con = connection_postgis
+        orig_geom = "geom"
+        out_geom = "the_geom"
+        create_postgis(con, df_nybb, geom_col=orig_geom)
+
+        sql = """SELECT borocode, boroname, shape_leng, shape_area,
+                    {} as {} FROM nybb;""".format(
+            orig_geom, out_geom
+        )
+        df = read_postgis(sql, con, geom_col=out_geom)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
+        """Tests that an SRID can be read from a geodatabase (GH #451)."""
+        con = connection_postgis
+        crs = "epsg:4269"
+        df_reproj = df_nybb.to_crs(crs)
+        create_postgis(con, df_reproj, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        assert df.crs == crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
+        """Tests that a user specified CRS overrides the geodatabase SRID."""
+        con = connection_postgis
+        orig_crs = df_nybb.crs
+        create_postgis(con, df_nybb, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, crs=orig_crs)
+
+        validate_boro_df(df)
+        assert df.crs == orig_crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
+        """Tests that geometry with NULL is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        df_nybb.geometry.iat[0] = None
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    def test_read_postgis_binary(self, connection_spatialite, df_nybb):
+        """Tests that geometry read as binary is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument"""
+        chunksize = 2
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_default(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        engine = engine_postgis
+        table = "nybb"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
+        """Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
+        engine = engine_postgis
+        table = "aTestTable"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text('SELECT * FROM "{table}";'.format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        with engine_postgis.begin() as con:
+            table = "nybb_con"
+
+            # If table exists, delete it before trying to write with defaults
+            drop_table_if_exists(con, table)
+
+            # Write to db
+            write_postgis(df_nybb, con=con, name=table, if_exists="fail")
+            # Validate
+            sql = text("SELECT * FROM {table};".format(table=table))
+            df = read_postgis(sql, con, geom_col="geometry")
+            validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that uploading the same table raises error when: if_replace='fail'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        try:
+            write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        except ValueError as e:
+            if "already exists" in str(e):
+                pass
+            else:
+                raise e
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that replacing a table is possible when: if_replace='replace'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Overwrite
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that appending to existing table produces correct results when:
+        if_replace='append'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        orig_rows, orig_cols = df_nybb.shape
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        new_rows, new_cols = df.shape
+
+        # There should be twice as many rows in the new table
+        assert new_rows == orig_rows * 2, (
+            "There should be {target} rows,found: {current}".format(
+                target=orig_rows * 2, current=new_rows
+            ),
+        )
+        # Number of columns should stay the same
+        assert new_cols == orig_cols, (
+            "There should be {target} columns,found: {current}".format(
+                target=orig_cols, current=new_cols
+            ),
+        )
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS without CRS information.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb.geometry.array.crs = None
+        with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
+            write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate that srid is -1
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 0, "SRID should be 0, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
+        CRS information (GH #2414).
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb_esri = df_nybb.to_crs("ESRI:102003")
+        write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
+        # Validate that srid is 102003
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_geometry_collection(
+        self, engine_postgis, df_geom_collection
+    ):
+        """
+        Tests that writing a mix of different geometry types is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+
+        assert geom_type.upper() == "GEOMETRYCOLLECTION"
+        assert df.geom_type.unique()[0] == "GeometryCollection"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_mixed_geometry_types(
+        self, engine_postgis, df_mixed_single_and_multi
+    ):
+        """
+        Tests that writing a mix of single and MultiGeometries is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
+        )
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
+        """
+        Tests that writing a LinearRing.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+
+        assert geom_type.upper() == "LINESTRING"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
+        """
+        Tests writing a LinearRing works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi,
+            con=engine,
+            name=table,
+            if_exists="replace",
+            chunksize=1,
+        )
+        # Validate row count
+        sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
+        with engine.connect() as conn:
+            row_cnt = conn.execute(sql).fetchone()[0]
+        assert row_cnt == 3
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema_when_table_exists(
+        self, engine_postgis, df_nybb
+    ):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        try:
+            write_postgis(
+                df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
+            )
+            # Validate
+            sql = text(
+                "SELECT * FROM {schema}.{table};".format(
+                    schema=schema_to_use, table=table
+                )
+            )
+
+            df = read_postgis(sql, engine, geom_col="geometry")
+            validate_boro_df(df)
+
+        # Should raise a ValueError when table exists
+        except ValueError:
+            pass
+
+        # Try with replace flag on
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
+        """
+        Tests writing a geometries with 3 dimensions works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
+
+        # Check that all geometries have 3 dimensions
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert list(df.geometry.has_z) == [True, True, True]
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_row_order(self, engine_postgis, df_nybb):
+        """
+        Tests that the row order in db table follows the order of the original frame.
+        """
+        engine = engine_postgis
+
+        table = "row_order_test"
+        correct_order = df_nybb["BoroCode"].tolist()
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert df["BoroCode"].tolist() == correct_order
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_before_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that insert works with if_exists='append' when table does not exist yet.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_with_different_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that the warning is raised if table CRS differs from frame.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Reproject
+        df_nybb2 = df_nybb.to_crs(epsg=4326)
+
+        # Should raise error when appending
+        with pytest.raises(ValueError, match="CRS of the target table"):
+            write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_without_crs(self, engine_postgis, df_nybb):
+        # This test was included in #3328 when the default value for no
+        # CRS was changed from an SRID of -1 to 0. This resolves issues
+        # of appending dataframes to postgis that have no CRS as postgis
+        # no CRS value is 0.
+        engine = engine_postgis
+        df_nybb = df_nybb.set_crs(None, allow_override=True)
+        table = "nybb"
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # append another dataframe with no crs
+
+        df_nybb2 = df_nybb
+        write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    @pytest.mark.xfail(
+        compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
+        reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
+    )
+    def test_duplicate_geometry_column_fails(self, engine_postgis):
+        """
+        Tests that a ValueError is raised if an SQL query returns two geometry columns.
+        """
+        engine = engine_postgis
+
+        sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
+
+        with pytest.raises(ValueError):
+            read_postgis(sql, engine, geom_col="geom")
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+    @mock.patch("shapely.get_srid")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
+        # mock a non-existent srid for edge case if shapely has an srid
+        # not present in postgis table.
+        pyproj = pytest.importorskip("pyproj")
+
+        mock_get_srid.return_value = 99999
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
+            with pytest.warns(UserWarning, match="Could not find srid 99999"):
+                read_postgis(sql, con)
+
+    @mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_no_spatial_ref_sys_table_in_postgis(
+        self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
+    ):
+        # mock for a non-existent spatial_ref_sys database
+
+        mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb, srid=4326)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.warns(
+            UserWarning, match="Could not find the spatial reference system table"
+        ):
+            df = read_postgis(sql, con)
+
+        assert df.crs == "EPSG:4326"
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument with non epsg crs"""
+        chunksize = 2
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
@@ -0,0 +1,118 @@
+"""Vendored, cut down version of pyogrio/util.py for use with fiona"""
+
+import re
+import sys
+from urllib.parse import urlparse
+
+
+def vsi_path(path: str) -> str:
+    """
+    Ensure path is a local path or a GDAL-compatible vsi path.
+
+    """
+
+    # path is already in GDAL format
+    if path.startswith("/vsi"):
+        return path
+
+    # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
+    # URL schemes
+    if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
+        if not path.split("!")[0].endswith(".zip"):
+            return path
+
+        # prefix then allow to proceed with remaining parsing
+        path = f"zip://{path}"
+
+    path, archive, scheme = _parse_uri(path)
+
+    if scheme or archive or path.endswith(".zip"):
+        return _construct_vsi_path(path, archive, scheme)
+
+    return path
+
+
+# Supported URI schemes and their mapping to GDAL's VSI suffix.
+SCHEMES = {
+    "file": "file",
+    "zip": "zip",
+    "tar": "tar",
+    "gzip": "gzip",
+    "http": "curl",
+    "https": "curl",
+    "ftp": "curl",
+    "s3": "s3",
+    "gs": "gs",
+    "az": "az",
+    "adls": "adls",
+    "adl": "adls",  # fsspec uses this
+    "hdfs": "hdfs",
+    "webhdfs": "webhdfs",
+    # GDAL additionally supports oss and swift for remote filesystems, but
+    # those are for now not added as supported URI
+}
+
+CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"}
+
+
+def _parse_uri(path: str):
+    """
+    Parse a URI
+
+    Returns a tuples of (path, archive, scheme)
+
+    path : str
+        Parsed path. Includes the hostname and query string in the case
+        of a URI.
+    archive : str
+        Parsed archive path.
+    scheme : str
+        URI scheme such as "https" or "zip+s3".
+    """
+    parts = urlparse(path, allow_fragments=False)
+
+    # if the scheme is not one of GDAL's supported schemes, return raw path
+    if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")):
+        return path, "", ""
+
+    # we have a URI
+    path = parts.path
+    scheme = parts.scheme or ""
+
+    if parts.query:
+        path += "?" + parts.query
+
+    if parts.scheme and parts.netloc:
+        path = parts.netloc + path
+
+    parts = path.split("!")
+    path = parts.pop() if parts else ""
+    archive = parts.pop() if parts else ""
+    return (path, archive, scheme)
+
+
+def _construct_vsi_path(path, archive, scheme) -> str:
+    """Convert a parsed path to a GDAL VSI path"""
+
+    prefix = ""
+    suffix = ""
+    schemes = scheme.split("+")
+
+    if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
+        schemes.insert(0, "zip")
+
+    if schemes:
+        prefix = "/".join(
+            "vsi{0}".format(SCHEMES[p]) for p in schemes if p and p != "file"
+        )
+
+        if schemes[-1] in CURLSCHEMES:
+            suffix = f"{schemes[-1]}://"
+
+    if prefix:
+        if archive:
+            return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/"))
+        else:
+            return "/{}/{}{}".format(prefix, suffix, path)
+
+    return path
@@ -0,0 +1,977 @@
+import warnings
+from packaging.version import Version
+
+import numpy as np
+import pandas as pd
+from pandas import CategoricalDtype
+from pandas.plotting import PlotAccessor
+
+import geopandas
+
+from ._decorator import doc
+
+
+def _sanitize_geoms(geoms, prefix="Multi"):
+    """
+    Returns Series like geoms and index, except that any Multi geometries
+    are split into their components and indices are repeated for all component
+    in the same Multi geometry. At the same time, empty or missing geometries are
+    filtered out.  Maintains 1:1 matching of geometry to value.
+
+    Prefix specifies type of geometry to be flatten. 'Multi' for MultiPoint and similar,
+    "Geom" for GeometryCollection.
+
+    Returns
+    -------
+    components : list of geometry
+
+    component_index : index array
+        indices are repeated for all components in the same Multi geometry
+    """
+    # TODO(shapely) look into simplifying this with
+    # shapely.get_parts(geoms, return_index=True) from shapely 2.0
+    components, component_index = [], []
+
+    if (
+        not geoms.geom_type.str.startswith(prefix).any()
+        and not geoms.is_empty.any()
+        and not geoms.isna().any()
+    ):
+        return geoms, np.arange(len(geoms))
+
+    for ix, geom in enumerate(geoms):
+        if geom is not None and geom.geom_type.startswith(prefix) and not geom.is_empty:
+            for poly in geom.geoms:
+                components.append(poly)
+                component_index.append(ix)
+        elif geom is None or geom.is_empty:
+            continue
+        else:
+            components.append(geom)
+            component_index.append(ix)
+
+    return components, np.array(component_index)
+
+
+def _expand_kwargs(kwargs, multiindex):
+    """
+    Most arguments to the plot functions must be a (single) value, or a sequence
+    of values. This function checks each key-value pair in 'kwargs' and expands
+    it (in place) to the correct length/formats with help of 'multiindex', unless
+    the value appears to already be a valid (single) value for the key.
+    """
+    from typing import Iterable
+
+    from matplotlib.colors import is_color_like
+
+    scalar_kwargs = ["marker", "path_effects"]
+    for att, value in kwargs.items():
+        if "color" in att:  # color(s), edgecolor(s), facecolor(s)
+            if is_color_like(value):
+                continue
+        elif "linestyle" in att:  # linestyle(s)
+            # A single linestyle can be 2-tuple of a number and an iterable.
+            if (
+                isinstance(value, tuple)
+                and len(value) == 2
+                and isinstance(value[1], Iterable)
+            ):
+                continue
+        elif att in scalar_kwargs:
+            # For these attributes, only a single value is allowed, so never expand.
+            continue
+
+        if pd.api.types.is_list_like(value):
+            kwargs[att] = np.take(value, multiindex, axis=0)
+
+
+def _PolygonPatch(polygon, **kwargs):
+    """Constructs a matplotlib patch from a Polygon geometry
+
+    The `kwargs` are those supported by the matplotlib.patches.PathPatch class
+    constructor. Returns an instance of matplotlib.patches.PathPatch.
+
+    Example (using Shapely Point and a matplotlib axes)::
+
+        b = shapely.geometry.Point(0, 0).buffer(1.0)
+        patch = _PolygonPatch(b, fc='blue', ec='blue', alpha=0.5)
+        ax.add_patch(patch)
+
+    GeoPandas originally relied on the descartes package by Sean Gillies
+    (BSD license, https://pypi.org/project/descartes) for PolygonPatch, but
+    this dependency was removed in favor of the below matplotlib code.
+    """
+    from matplotlib.patches import PathPatch
+    from matplotlib.path import Path
+
+    path = Path.make_compound_path(
+        Path(np.asarray(polygon.exterior.coords)[:, :2]),
+        *[Path(np.asarray(ring.coords)[:, :2]) for ring in polygon.interiors],
+    )
+    return PathPatch(path, **kwargs)
+
+
+def _plot_polygon_collection(
+    ax,
+    geoms,
+    values=None,
+    color=None,
+    cmap=None,
+    vmin=None,
+    vmax=None,
+    autolim=True,
+    **kwargs,
+):
+    """
+    Plots a collection of Polygon and MultiPolygon geometries to `ax`
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes
+        where shapes will be plotted
+    geoms : a sequence of `N` Polygons and/or MultiPolygons (can be mixed)
+
+    values : a sequence of `N` values, optional
+        Values will be mapped to colors using vmin/vmax/cmap. They should
+        have 1:1 correspondence with the geometries (not their components).
+        Otherwise follows `color` / `facecolor` kwargs.
+    edgecolor : single color or sequence of `N` colors
+        Color for the edge of the polygons
+    facecolor : single color or sequence of `N` colors
+        Color to fill the polygons. Cannot be used together with `values`.
+    color : single color or sequence of `N` colors
+        Sets both `edgecolor` and `facecolor`
+    autolim : bool (default True)
+        Update axes data limits to contain the new geometries.
+    **kwargs
+        Additional keyword arguments passed to the collection
+
+    Returns
+    -------
+    collection : matplotlib.collections.Collection that was plotted
+    """
+    from matplotlib.collections import PatchCollection
+
+    geoms, multiindex = _sanitize_geoms(geoms)
+    if values is not None:
+        values = np.take(values, multiindex, axis=0)
+
+    # PatchCollection does not accept some kwargs.
+    kwargs = {
+        att: value
+        for att, value in kwargs.items()
+        if att not in ["markersize", "marker"]
+    }
+
+    # Add to kwargs for easier checking below.
+    if color is not None:
+        kwargs["color"] = color
+
+    _expand_kwargs(kwargs, multiindex)
+
+    collection = PatchCollection([_PolygonPatch(poly) for poly in geoms], **kwargs)
+
+    if values is not None:
+        collection.set_array(np.asarray(values))
+        collection.set_cmap(cmap)
+        if "norm" not in kwargs:
+            collection.set_clim(vmin, vmax)
+
+    ax.add_collection(collection, autolim=autolim)
+    ax.autoscale_view()
+    return collection
+
+
+def _plot_linestring_collection(
+    ax,
+    geoms,
+    values=None,
+    color=None,
+    cmap=None,
+    vmin=None,
+    vmax=None,
+    autolim=True,
+    **kwargs,
+):
+    """
+    Plots a collection of LineString and MultiLineString geometries to `ax`
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes
+        where shapes will be plotted
+    geoms : a sequence of `N` LineStrings and/or MultiLineStrings (can be
+            mixed)
+    values : a sequence of `N` values, optional
+        Values will be mapped to colors using vmin/vmax/cmap. They should
+        have 1:1 correspondence with the geometries (not their components).
+    color : single color or sequence of `N` colors
+        Cannot be used together with `values`.
+    autolim : bool (default True)
+        Update axes data limits to contain the new geometries.
+
+    Returns
+    -------
+    collection : matplotlib.collections.Collection that was plotted
+    """
+    from matplotlib.collections import LineCollection
+
+    geoms, multiindex = _sanitize_geoms(geoms)
+    if values is not None:
+        values = np.take(values, multiindex, axis=0)
+
+    # LineCollection does not accept some kwargs.
+    kwargs = {
+        att: value
+        for att, value in kwargs.items()
+        if att not in ["markersize", "marker"]
+    }
+
+    # Add to kwargs for easier checking below.
+    if color is not None:
+        kwargs["color"] = color
+
+    _expand_kwargs(kwargs, multiindex)
+
+    segments = [np.array(linestring.coords)[:, :2] for linestring in geoms]
+    collection = LineCollection(segments, **kwargs)
+
+    if values is not None:
+        collection.set_array(np.asarray(values))
+        collection.set_cmap(cmap)
+        if "norm" not in kwargs:
+            collection.set_clim(vmin, vmax)
+
+    ax.add_collection(collection, autolim=autolim)
+    ax.autoscale_view()
+    return collection
+
+
+def _plot_point_collection(
+    ax,
+    geoms,
+    values=None,
+    color=None,
+    cmap=None,
+    vmin=None,
+    vmax=None,
+    marker="o",
+    markersize=None,
+    **kwargs,
+):
+    """
+    Plots a collection of Point and MultiPoint geometries to `ax`
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes
+        where shapes will be plotted
+    geoms : sequence of `N` Points or MultiPoints
+
+    values : a sequence of `N` values, optional
+        Values mapped to colors using vmin, vmax, and cmap.
+        Cannot be specified together with `color`.
+    markersize : scalar or array-like, optional
+        Size of the markers. Note that under the hood ``scatter`` is
+        used, so the specified value will be proportional to the
+        area of the marker (size in points^2).
+
+    Returns
+    -------
+    collection : matplotlib.collections.Collection that was plotted
+    """
+    if values is not None and color is not None:
+        raise ValueError("Can only specify one of 'values' and 'color' kwargs")
+
+    geoms, multiindex = _sanitize_geoms(geoms)
+    # values are expanded below as kwargs["c"]
+
+    x = [p.x if not p.is_empty else None for p in geoms]
+    y = [p.y if not p.is_empty else None for p in geoms]
+
+    # matplotlib 1.4 does not support c=None, and < 2.0 does not support s=None
+    if values is not None:
+        kwargs["c"] = values
+    if markersize is not None:
+        kwargs["s"] = markersize
+
+    # Add to kwargs for easier checking below.
+    if color is not None:
+        kwargs["color"] = color
+    if marker is not None:
+        kwargs["marker"] = marker
+    _expand_kwargs(kwargs, multiindex)
+
+    if "norm" not in kwargs:
+        collection = ax.scatter(x, y, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
+    else:
+        collection = ax.scatter(x, y, cmap=cmap, **kwargs)
+
+    return collection
+
+
+def plot_series(
+    s,
+    cmap=None,
+    color=None,
+    ax=None,
+    figsize=None,
+    aspect="auto",
+    autolim=True,
+    **style_kwds,
+):
+    """
+    Plot a GeoSeries.
+
+    Generate a plot of a GeoSeries geometry with matplotlib.
+
+    Parameters
+    ----------
+    s : Series
+        The GeoSeries to be plotted. Currently Polygon,
+        MultiPolygon, LineString, MultiLineString, Point and MultiPoint
+        geometries can be plotted.
+    cmap : str (default None)
+        The name of a colormap recognized by matplotlib. Any
+        colormap will work, but categorical colormaps are
+        generally recommended. Examples of useful discrete
+        colormaps include:
+
+            tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2
+
+    color : str, np.array, pd.Series, List (default None)
+        If specified, all objects will be colored uniformly.
+    ax : matplotlib.pyplot.Artist (default None)
+        axes on which to draw the plot
+    figsize : pair of floats (default None)
+        Size of the resulting matplotlib.figure.Figure. If the argument
+        ax is given explicitly, figsize is ignored.
+    aspect : 'auto', 'equal', None or float (default 'auto')
+        Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
+        however data are not projected (coordinates are long/lat), the aspect is by
+        default set to 1/cos(s_y * pi/180) with s_y the y coordinate of the middle of
+        the GeoSeries (the mean of the y range of bounding box) so that a long/lat
+        square appears square in the middle of the plot. This implies an
+        Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
+        also be set manually (float) as the ratio of y-unit to x-unit.
+    autolim : bool (default True)
+        Update axes data limits to contain the new geometries.
+    **style_kwds : dict
+        Color options to be passed on to the actual plot function, such
+        as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
+        ``alpha``.
+
+    Returns
+    -------
+    ax : matplotlib axes instance
+    """
+
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        raise ImportError(
+            "The matplotlib package is required for plotting in geopandas. "
+            "You can install it using 'conda install -c conda-forge matplotlib' or "
+            "'pip install matplotlib'."
+        )
+
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize)
+
+    if aspect == "auto":
+        if s.crs and s.crs.is_geographic:
+            bounds = s.total_bounds
+            y_coord = np.mean([bounds[1], bounds[3]])
+            ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
+            # formula ported from R package sp
+            # https://github.com/edzer/sp/blob/master/R/mapasp.R
+        else:
+            ax.set_aspect("equal")
+    elif aspect is not None:
+        ax.set_aspect(aspect)
+
+    if s.empty:
+        warnings.warn(
+            "The GeoSeries you are attempting to plot is "
+            "empty. Nothing has been displayed.",
+            UserWarning,
+            stacklevel=3,
+        )
+        return ax
+
+    if s.is_empty.all():
+        warnings.warn(
+            "The GeoSeries you are attempting to plot is "
+            "composed of empty geometries. Nothing has been displayed.",
+            UserWarning,
+            stacklevel=3,
+        )
+        return ax
+
+    # have colors been given for all geometries?
+    color_given = pd.api.types.is_list_like(color) and len(color) == len(s)
+
+    # if cmap is specified, create range of colors based on cmap
+    values = None
+    if cmap is not None:
+        values = np.arange(len(s))
+        if hasattr(cmap, "N"):
+            values = values % cmap.N
+        style_kwds["vmin"] = style_kwds.get("vmin", values.min())
+        style_kwds["vmax"] = style_kwds.get("vmax", values.max())
+
+    # decompose GeometryCollections
+    geoms, multiindex = _sanitize_geoms(s.geometry, prefix="Geom")
+    values = np.take(values, multiindex, axis=0) if cmap else None
+    # ensure indexes are consistent
+    if color_given and isinstance(color, pd.Series):
+        color = color.reindex(s.index)
+    expl_color = np.take(color, multiindex, axis=0) if color_given else color
+    expl_series = geopandas.GeoSeries(geoms)
+
+    geom_types = expl_series.geom_type
+    poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
+    line_idx = np.asarray(
+        (geom_types == "LineString")
+        | (geom_types == "MultiLineString")
+        | (geom_types == "LinearRing")
+    )
+    point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
+
+    # plot all Polygons and all MultiPolygon components in the same collection
+    polys = expl_series[poly_idx]
+    if not polys.empty:
+        # color overrides both face and edgecolor. As we want people to be
+        # able to use edgecolor as well, pass color to facecolor
+        facecolor = style_kwds.pop("facecolor", None)
+        color_ = expl_color[poly_idx] if color_given else color
+        if color is not None:
+            facecolor = color_
+
+        values_ = values[poly_idx] if cmap else None
+        _plot_polygon_collection(
+            ax,
+            polys,
+            values_,
+            facecolor=facecolor,
+            cmap=cmap,
+            autolim=autolim,
+            **style_kwds,
+        )
+
+    # plot all LineStrings and MultiLineString components in same collection
+    lines = expl_series[line_idx]
+    if not lines.empty:
+        values_ = values[line_idx] if cmap else None
+        color_ = expl_color[line_idx] if color_given else color
+
+        _plot_linestring_collection(
+            ax, lines, values_, color=color_, cmap=cmap, autolim=autolim, **style_kwds
+        )
+
+    # plot all Points in the same collection
+    points = expl_series[point_idx]
+    if not points.empty:
+        values_ = values[point_idx] if cmap else None
+        color_ = expl_color[point_idx] if color_given else color
+
+        _plot_point_collection(
+            ax, points, values_, color=color_, cmap=cmap, **style_kwds
+        )
+
+    ax.figure.canvas.draw_idle()
+    return ax
+
+
+def plot_dataframe(
+    df,
+    column=None,
+    cmap=None,
+    color=None,
+    ax=None,
+    cax=None,
+    categorical=False,
+    legend=False,
+    scheme=None,
+    k=5,
+    vmin=None,
+    vmax=None,
+    markersize=None,
+    figsize=None,
+    legend_kwds=None,
+    categories=None,
+    classification_kwds=None,
+    missing_kwds=None,
+    aspect="auto",
+    autolim=True,
+    **style_kwds,
+):
+    """
+    Plot a GeoDataFrame.
+
+    Generate a plot of a GeoDataFrame with matplotlib.  If a
+    column is specified, the plot coloring will be based on values
+    in that column.
+
+    Parameters
+    ----------
+    column : str, np.array, pd.Series (default None)
+        The name of the dataframe column, np.array, or pd.Series to be plotted.
+        If np.array or pd.Series are used then it must have same length as
+        dataframe. Values are used to color the plot. Ignored if `color` is
+        also set.
+    kind: str
+        The kind of plots to produce. The default is to create a map ("geo").
+        Other supported kinds of plots from pandas:
+
+        - 'line' : line plot
+        - 'bar' : vertical bar plot
+        - 'barh' : horizontal bar plot
+        - 'hist' : histogram
+        - 'box' : BoxPlot
+        - 'kde' : Kernel Density Estimation plot
+        - 'density' : same as 'kde'
+        - 'area' : area plot
+        - 'pie' : pie plot
+        - 'scatter' : scatter plot
+        - 'hexbin' : hexbin plot.
+    cmap : str (default None)
+        The name of a colormap recognized by matplotlib.
+    color : str, np.array, pd.Series (default None)
+        If specified, all objects will be colored uniformly.
+    ax : matplotlib.pyplot.Artist (default None)
+        axes on which to draw the plot
+    cax : matplotlib.pyplot Artist (default None)
+        axes on which to draw the legend in case of color map.
+    categorical : bool (default False)
+        If False, cmap will reflect numerical values of the
+        column being plotted.  For non-numerical columns, this
+        will be set to True.
+    legend : bool (default False)
+        Plot a legend. Ignored if no `column` is given, or if `color` is given.
+    scheme : str (default None)
+        Name of a choropleth classification scheme (requires mapclassify).
+        A mapclassify.MapClassifier object will be used
+        under the hood. Supported are all schemes provided by mapclassify (e.g.
+        'BoxPlot', 'EqualInterval', 'FisherJenks', 'FisherJenksSampled',
+        'HeadTailBreaks', 'JenksCaspall', 'JenksCaspallForced',
+        'JenksCaspallSampled', 'MaxP', 'MaximumBreaks',
+        'NaturalBreaks', 'Quantiles', 'Percentiles', 'StdMean',
+        'UserDefined'). Arguments can be passed in classification_kwds.
+    k : int (default 5)
+        Number of classes (ignored if scheme is None)
+    vmin : None or float (default None)
+        Minimum value of cmap. If None, the minimum data value
+        in the column to be plotted is used.
+    vmax : None or float (default None)
+        Maximum value of cmap. If None, the maximum data value
+        in the column to be plotted is used.
+    markersize : str or float or sequence (default None)
+        Only applies to point geometries within a frame.
+        If a str, will use the values in the column of the frame specified
+        by markersize to set the size of markers. Otherwise can be a value
+        to apply to all points, or a sequence of the same length as the
+        number of points.
+    figsize : tuple of integers (default None)
+        Size of the resulting matplotlib.figure.Figure. If the argument
+        axes is given explicitly, figsize is ignored.
+    legend_kwds : dict (default None)
+        Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or
+        :func:`matplotlib.pyplot.colorbar`.
+        Additional accepted keywords when `scheme` is specified:
+
+        fmt : string
+            A formatting specification for the bin edges of the classes in the
+            legend. For example, to have no decimals: ``{"fmt": "{:.0f}"}``.
+        labels : list-like
+            A list of legend labels to override the auto-generated labels.
+            Needs to have the same number of elements as the number of
+            classes (`k`).
+        interval : boolean (default False)
+            An option to control brackets from mapclassify legend.
+            If True, open/closed interval brackets are shown in the legend.
+    categories : list-like
+        Ordered list-like object of categories to be used for categorical plot.
+    classification_kwds : dict (default None)
+        Keyword arguments to pass to mapclassify
+    missing_kwds : dict (default None)
+        Keyword arguments specifying color options (as style_kwds)
+        to be passed on to geometries with missing values in addition to
+        or overwriting other style kwds. If None, geometries with missing
+        values are not plotted.
+    aspect : 'auto', 'equal', None or float (default 'auto')
+        Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
+        however data are not projected (coordinates are long/lat), the aspect is by
+        default set to 1/cos(df_y * pi/180) with df_y the y coordinate of the middle of
+        the GeoDataFrame (the mean of the y range of bounding box) so that a long/lat
+        square appears square in the middle of the plot. This implies an
+        Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
+        also be set manually (float) as the ratio of y-unit to x-unit.
+    autolim : bool (default True)
+        Update axes data limits to contain the new geometries.
+    **style_kwds : dict
+        Style options to be passed on to the actual plot function, such
+        as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
+        ``alpha``.
+
+    Returns
+    -------
+    ax : matplotlib axes instance
+
+    Examples
+    --------
+    >>> import geodatasets
+    >>> df = geopandas.read_file(geodatasets.get_path("nybb"))
+    >>> df.head()  # doctest: +SKIP
+       BoroCode  ...                                           geometry
+    0         5  ...  MULTIPOLYGON (((970217.022 145643.332, 970227....
+    1         4  ...  MULTIPOLYGON (((1029606.077 156073.814, 102957...
+    2         3  ...  MULTIPOLYGON (((1021176.479 151374.797, 102100...
+    3         1  ...  MULTIPOLYGON (((981219.056 188655.316, 980940....
+    4         2  ...  MULTIPOLYGON (((1012821.806 229228.265, 101278...
+
+    >>> df.plot("BoroName", cmap="Set1")  # doctest: +SKIP
+
+    See the User Guide page :doc:`../../user_guide/mapping` for details.
+
+    """
+    if column is not None and color is not None:
+        warnings.warn(
+            "Only specify one of 'column' or 'color'. Using 'color'.",
+            UserWarning,
+            stacklevel=3,
+        )
+        column = None
+
+    try:
+        import matplotlib.pyplot as plt
+    except ImportError:
+        raise ImportError(
+            "The matplotlib package is required for plotting in geopandas. "
+            "You can install it using 'conda install -c conda-forge matplotlib' or "
+            "'pip install matplotlib'."
+        )
+
+    if ax is None:
+        if cax is not None:
+            raise ValueError("'ax' can not be None if 'cax' is not.")
+        fig, ax = plt.subplots(figsize=figsize)
+
+    if aspect == "auto":
+        if df.crs and df.crs.is_geographic:
+            bounds = df.total_bounds
+            y_coord = np.mean([bounds[1], bounds[3]])
+            ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
+            # formula ported from R package sp
+            # https://github.com/edzer/sp/blob/master/R/mapasp.R
+        else:
+            ax.set_aspect("equal")
+    elif aspect is not None:
+        ax.set_aspect(aspect)
+
+    # GH 1555
+    # if legend_kwds set, copy so we don't update it in place
+    if legend_kwds is not None:
+        legend_kwds = legend_kwds.copy()
+
+    if df.empty:
+        warnings.warn(
+            "The GeoDataFrame you are attempting to plot is "
+            "empty. Nothing has been displayed.",
+            UserWarning,
+            stacklevel=3,
+        )
+        return ax
+
+    if isinstance(markersize, str):
+        markersize = df[markersize].values
+
+    if column is None:
+        return plot_series(
+            df.geometry,
+            cmap=cmap,
+            color=color,
+            ax=ax,
+            figsize=figsize,
+            markersize=markersize,
+            aspect=aspect,
+            autolim=autolim,
+            **style_kwds,
+        )
+
+    # To accept pd.Series and np.arrays as column
+    if isinstance(column, (np.ndarray, pd.Series)):
+        if column.shape[0] != df.shape[0]:
+            raise ValueError(
+                "The dataframe and given column have different number of rows."
+            )
+        else:
+            values = column
+
+            # Make sure index of a Series matches index of df
+            if isinstance(values, pd.Series):
+                values = values.reindex(df.index)
+    else:
+        values = df[column]
+
+    if isinstance(values.dtype, CategoricalDtype):
+        if categories is not None:
+            raise ValueError(
+                "Cannot specify 'categories' when column has categorical dtype"
+            )
+        categorical = True
+    elif (
+        pd.api.types.is_object_dtype(values.dtype)
+        or pd.api.types.is_bool_dtype(values.dtype)
+        or pd.api.types.is_string_dtype(values.dtype)
+        or categories
+    ):
+        categorical = True
+
+    nan_idx = np.asarray(pd.isna(values), dtype="bool")
+
+    if scheme is not None:
+        mc_err = (
+            "The 'mapclassify' package (>= 2.4.0) is "
+            "required to use the 'scheme' keyword."
+        )
+        try:
+            import mapclassify
+
+        except ImportError:
+            raise ImportError(mc_err)
+
+        if Version(mapclassify.__version__) < Version("2.4.0"):
+            raise ImportError(mc_err)
+
+        if classification_kwds is None:
+            classification_kwds = {}
+        if "k" not in classification_kwds:
+            classification_kwds["k"] = k
+
+        binning = mapclassify.classify(
+            np.asarray(values[~nan_idx]), scheme, **classification_kwds
+        )
+        # set categorical to True for creating the legend
+        categorical = True
+        if legend_kwds is not None and "labels" in legend_kwds:
+            if len(legend_kwds["labels"]) != binning.k:
+                raise ValueError(
+                    "Number of labels must match number of bins, "
+                    "received {} labels for {} bins".format(
+                        len(legend_kwds["labels"]), binning.k
+                    )
+                )
+            else:
+                labels = list(legend_kwds.pop("labels"))
+        else:
+            fmt = "{:.2f}"
+            if legend_kwds is not None and "fmt" in legend_kwds:
+                fmt = legend_kwds.pop("fmt")
+
+            labels = binning.get_legend_classes(fmt)
+            if legend_kwds is not None:
+                show_interval = legend_kwds.pop("interval", False)
+            else:
+                show_interval = False
+            if not show_interval:
+                labels = [c[1:-1] for c in labels]
+
+        values = pd.Categorical(
+            [np.nan] * len(values), categories=binning.bins, ordered=True
+        )
+        values[~nan_idx] = pd.Categorical.from_codes(
+            binning.yb, categories=binning.bins, ordered=True
+        )
+        if cmap is None:
+            cmap = "viridis"
+
+    # Define `values` as a Series
+    if categorical:
+        if cmap is None:
+            cmap = "tab10"
+
+        cat = pd.Categorical(values, categories=categories)
+        categories = list(cat.categories)
+
+        # values missing in the Categorical but not in original values
+        missing = list(np.unique(values[~nan_idx & cat.isna()]))
+        if missing:
+            raise ValueError(
+                "Column contains values not listed in categories. "
+                "Missing categories: {}.".format(missing)
+            )
+
+        values = cat.codes[~nan_idx]
+        vmin = 0 if vmin is None else vmin
+        vmax = len(categories) - 1 if vmax is None else vmax
+
+    # fill values with placeholder where were NaNs originally to map them properly
+    # (after removing them in categorical or scheme)
+    if categorical:
+        for n in np.where(nan_idx)[0]:
+            values = np.insert(values, n, values[0])
+
+    mn = values[~np.isnan(values)].min() if vmin is None else vmin
+    mx = values[~np.isnan(values)].max() if vmax is None else vmax
+
+    # decompose GeometryCollections
+    geoms, multiindex = _sanitize_geoms(df.geometry, prefix="Geom")
+    values = np.take(values, multiindex, axis=0)
+    nan_idx = np.take(nan_idx, multiindex, axis=0)
+    expl_series = geopandas.GeoSeries(geoms)
+
+    geom_types = expl_series.geom_type
+    poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
+    line_idx = np.asarray(
+        (geom_types == "LineString")
+        | (geom_types == "MultiLineString")
+        | (geom_types == "LinearRing")
+    )
+    point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
+
+    # plot all Polygons and all MultiPolygon components in the same collection
+    polys = expl_series[poly_idx & np.invert(nan_idx)]
+    subset = values[poly_idx & np.invert(nan_idx)]
+    if not polys.empty:
+        _plot_polygon_collection(
+            ax,
+            polys,
+            subset,
+            vmin=mn,
+            vmax=mx,
+            cmap=cmap,
+            autolim=autolim,
+            **style_kwds,
+        )
+
+    # plot all LineStrings and MultiLineString components in same collection
+    lines = expl_series[line_idx & np.invert(nan_idx)]
+    subset = values[line_idx & np.invert(nan_idx)]
+    if not lines.empty:
+        _plot_linestring_collection(
+            ax,
+            lines,
+            subset,
+            vmin=mn,
+            vmax=mx,
+            cmap=cmap,
+            autolim=autolim,
+            **style_kwds,
+        )
+
+    # plot all Points in the same collection
+    points = expl_series[point_idx & np.invert(nan_idx)]
+    subset = values[point_idx & np.invert(nan_idx)]
+    if not points.empty:
+        if isinstance(markersize, np.ndarray):
+            markersize = np.take(markersize, multiindex, axis=0)
+            markersize = markersize[point_idx & np.invert(nan_idx)]
+        _plot_point_collection(
+            ax,
+            points,
+            subset,
+            vmin=mn,
+            vmax=mx,
+            markersize=markersize,
+            cmap=cmap,
+            **style_kwds,
+        )
+
+    missing_data = not expl_series[nan_idx].empty
+    if missing_kwds is not None and missing_data:
+        if color:
+            if "color" not in missing_kwds:
+                missing_kwds["color"] = color
+
+        merged_kwds = style_kwds.copy()
+        merged_kwds.update(missing_kwds)
+
+        plot_series(expl_series[nan_idx], ax=ax, **merged_kwds)
+
+    if legend and not color:
+        if legend_kwds is None:
+            legend_kwds = {}
+        if "fmt" in legend_kwds:
+            legend_kwds.pop("fmt")
+
+        from matplotlib import cm
+        from matplotlib.colors import Normalize
+        from matplotlib.lines import Line2D
+
+        norm = style_kwds.get("norm", None)
+        if not norm:
+            norm = Normalize(vmin=mn, vmax=mx)
+        n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)
+        if categorical:
+            if scheme is not None:
+                categories = labels
+            patches = []
+            for i in range(len(categories)):
+                patches.append(
+                    Line2D(
+                        [0],
+                        [0],
+                        linestyle="none",
+                        marker="o",
+                        alpha=style_kwds.get("alpha", 1),
+                        markersize=10,
+                        markerfacecolor=n_cmap.to_rgba(i),
+                        markeredgewidth=0,
+                    )
+                )
+            if missing_kwds is not None and missing_data:
+                if "color" in merged_kwds:
+                    merged_kwds["facecolor"] = merged_kwds["color"]
+                patches.append(
+                    Line2D(
+                        [0],
+                        [0],
+                        linestyle="none",
+                        marker="o",
+                        alpha=merged_kwds.get("alpha", 1),
+                        markersize=10,
+                        markerfacecolor=merged_kwds.get("facecolor", None),
+                        markeredgecolor=merged_kwds.get("edgecolor", None),
+                        markeredgewidth=merged_kwds.get(
+                            "linewidth", 1 if merged_kwds.get("edgecolor", False) else 0
+                        ),
+                    )
+                )
+                categories.append(merged_kwds.get("label", "NaN"))
+            legend_kwds.setdefault("numpoints", 1)
+            legend_kwds.setdefault("loc", "best")
+            legend_kwds.setdefault("handles", patches)
+            legend_kwds.setdefault("labels", categories)
+            ax.legend(**legend_kwds)
+        else:
+            if cax is not None:
+                legend_kwds.setdefault("cax", cax)
+            else:
+                legend_kwds.setdefault("ax", ax)
+
+            n_cmap.set_array(np.array([]))
+            ax.get_figure().colorbar(n_cmap, **legend_kwds)
+
+    ax.figure.canvas.draw_idle()
+    return ax
+
+
+@doc(plot_dataframe)
+class GeoplotAccessor(PlotAccessor):
+    _pandas_kinds = PlotAccessor._all_kinds
+
+    def __call__(self, *args, **kwargs):
+        data = self._parent.copy()
+        kind = kwargs.pop("kind", "geo")
+        if kind == "geo":
+            return plot_dataframe(data, *args, **kwargs)
+        if kind in self._pandas_kinds:
+            # Access pandas plots
+            return PlotAccessor(data)(kind=kind, **kwargs)
+        else:
+            # raise error
+            raise ValueError(f"{kind} is not a valid plot kind")
+
+    def geo(self, *args, **kwargs):
+        return self(kind="geo", *args, **kwargs)  # noqa: B026
@@ -0,0 +1,505 @@
+import numpy as np
+
+import shapely
+from shapely.geometry.base import BaseGeometry
+
+from . import _compat as compat
+from . import array, geoseries
+
+PREDICATES = {p.name for p in shapely.strtree.BinaryPredicate} | {None}
+
+if compat.GEOS_GE_310:
+    PREDICATES.update(["dwithin"])
+
+
+class SpatialIndex:
+    """A simple wrapper around Shapely's STRTree.
+
+
+    Parameters
+    ----------
+    geometry : np.array of Shapely geometries
+        Geometries from which to build the spatial index.
+    """
+
+    def __init__(self, geometry):
+        # set empty geometries to None to avoid segfault on GEOS <= 3.6
+        # see:
+        # https://github.com/pygeos/pygeos/issues/146
+        # https://github.com/pygeos/pygeos/issues/147
+        non_empty = geometry.copy()
+        non_empty[shapely.is_empty(non_empty)] = None
+        # set empty geometries to None to maintain indexing
+        self._tree = shapely.STRtree(non_empty)
+        # store geometries, including empty geometries for user access
+        self.geometries = geometry.copy()
+
+    @property
+    def valid_query_predicates(self):
+        """Returns valid predicates for the spatial index.
+
+        Returns
+        -------
+        set
+            Set of valid predicates for this spatial index.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point
+        >>> s = geopandas.GeoSeries([Point(0, 0), Point(1, 1)])
+        >>> s.sindex.valid_query_predicates  # doctest: +SKIP
+        {None, "contains", "contains_properly", "covered_by", "covers", \
+"crosses", "dwithin", "intersects", "overlaps", "touches", "within"}
+        """
+        return PREDICATES
+
+    def query(
+        self, geometry, predicate=None, sort=False, distance=None, output_format="tuple"
+    ):
+        """
+        Return the integer indices of all combinations of each input geometry
+        and tree geometries where the bounding box of each input geometry
+        intersects the bounding box of a tree geometry.
+
+        If the input geometry is a scalar, this returns an array of shape (n, ) with
+        the indices of the matching tree geometries.  If the input geometry is an
+        array_like, this returns an array with shape (2,n) where the subarrays
+        correspond to the indices of the input geometries and indices of the
+        tree geometries associated with each.  To generate an array of pairs of
+        input geometry index and tree geometry index, simply transpose the
+        result.
+
+        If a predicate is provided, the tree geometries are first queried based
+        on the bounding box of the input geometry and then are further filtered
+        to those that meet the predicate when comparing the input geometry to
+        the tree geometry: ``predicate(geometry, tree_geometry)``.
+
+        The 'dwithin' predicate requires GEOS >= 3.10.
+
+        Bounding boxes are limited to two dimensions and are axis-aligned
+        (equivalent to the ``bounds`` property of a geometry); any Z values
+        present in input geometries are ignored when querying the tree.
+
+        Any input geometry that is None or empty will never match geometries in
+        the tree.
+
+        Parameters
+        ----------
+        geometry : shapely.Geometry or array-like of geometries \
+(numpy.ndarray, GeoSeries, GeometryArray)
+            A single shapely geometry or array of geometries to query against
+            the spatial index. For array-like, accepts both GeoPandas geometry
+            iterables (GeoSeries, GeometryArray) or a numpy array of Shapely
+            geometries.
+        predicate : {None, "contains", "contains_properly", "covered_by", "covers", \
+"crosses", "intersects", "overlaps", "touches", "within", "dwithin"}, optional
+            If predicate is provided, the input geometries are tested
+            using the predicate function against each item in the tree
+            whose extent intersects the envelope of the input geometry:
+            ``predicate(input_geometry, tree_geometry)``.
+            If possible, prepared geometries are used to help speed up the
+            predicate operation.
+        sort : bool, default False
+            If True, the results will be sorted in ascending order. In case
+            of 2D array, the result is sorted lexicographically using the
+            geometries' indexes as the primary key and the sindex's indexes
+            as the secondary key.
+            If False, no additional sorting is applied (results are often
+            sorted but there is no guarantee).
+        distance : number or array_like, optional
+            Distances around each input geometry within which to query the tree for
+            the 'dwithin' predicate. If array_like, shape must be broadcastable to shape
+            of geometry. Required if ``predicate='dwithin'``.
+
+        Returns
+        -------
+        ndarray with shape (n,) if geometry is a scalar
+            Integer indices for matching geometries from the spatial index
+            tree geometries.
+
+        OR
+
+        ndarray with shape (2, n) if geometry is an array_like
+            The first subarray contains input geometry integer indices.
+            The second subarray contains tree geometry integer indices.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point, box
+        >>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
+        >>> s
+        0    POINT (0 0)
+        1    POINT (1 1)
+        2    POINT (2 2)
+        3    POINT (3 3)
+        4    POINT (4 4)
+        5    POINT (5 5)
+        6    POINT (6 6)
+        7    POINT (7 7)
+        8    POINT (8 8)
+        9    POINT (9 9)
+        dtype: geometry
+
+        Querying the tree with a scalar geometry:
+
+        >>> s.sindex.query(box(1, 1, 3, 3))
+        array([1, 2, 3])
+
+        >>> s.sindex.query(box(1, 1, 3, 3), predicate="contains")
+        array([2])
+
+        Querying the tree with an array of geometries:
+
+        >>> s2 = geopandas.GeoSeries([box(2, 2, 4, 4), box(5, 5, 6, 6)])
+        >>> s2
+        0    POLYGON ((4 2, 4 4, 2 4, 2 2, 4 2))
+        1    POLYGON ((6 5, 6 6, 5 6, 5 5, 6 5))
+        dtype: geometry
+
+        >>> s.sindex.query(s2)
+        array([[0, 0, 0, 1, 1],
+               [2, 3, 4, 5, 6]])
+
+        >>> s.sindex.query(s2, predicate="contains")
+        array([[0],
+               [3]])
+
+        >>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=0)
+        array([1, 2, 3])
+
+        >>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=2)
+        array([0, 1, 2, 3, 4])
+
+        Notes
+        -----
+        In the context of a spatial join, input geometries are the "left"
+        geometries that determine the order of the results, and tree geometries
+        are "right" geometries that are joined against the left geometries. This
+        effectively performs an inner join, where only those combinations of
+        geometries that can be joined based on overlapping bounding boxes or
+        optional predicate are returned.
+        """
+        if predicate not in self.valid_query_predicates:
+            if predicate == "dwithin":
+                raise ValueError("predicate = 'dwithin' requires GEOS >= 3.10.0")
+
+            raise ValueError(
+                "Got predicate='{}'; ".format(predicate)
+                + "`predicate` must be one of {}".format(self.valid_query_predicates)
+            )
+
+        # distance argument requirement of predicate `dwithin`
+        # and only valid for predicate `dwithin`
+        kwargs = {}
+        if predicate == "dwithin":
+            if distance is None:
+                # the distance parameter is needed
+                raise ValueError(
+                    "'distance' parameter is required for 'dwithin' predicate"
+                )
+            # add distance to kwargs
+            kwargs["distance"] = distance
+
+        elif distance is not None:
+            # distance parameter is invalid
+            raise ValueError(
+                "'distance' parameter is only supported in combination with "
+                "'dwithin' predicate"
+            )
+
+        geometry = self._as_geometry_array(geometry)
+
+        indices = self._tree.query(geometry, predicate=predicate, **kwargs)
+
+        if output_format != "tuple":
+            sort = True
+
+        if sort:
+            if indices.ndim == 1:
+                indices = np.sort(indices)
+            else:
+                # sort by first array (geometry) and then second (tree)
+                geo_idx, tree_idx = indices
+                sort_indexer = np.lexsort((tree_idx, geo_idx))
+                indices = np.vstack((geo_idx[sort_indexer], tree_idx[sort_indexer]))
+
+        if output_format == "sparse":
+            from scipy.sparse import coo_array
+
+            return coo_array(
+                (np.ones(len(indices[0]), dtype=np.bool_), indices),
+                shape=(len(self.geometries), len(geometry)),
+                dtype=np.bool_,
+            )
+
+        if output_format == "dense":
+            dense = np.zeros((len(self.geometries), len(geometry)), dtype=bool)
+            dense[indices] = True
+            return dense
+
+        if output_format == "tuple":
+            return indices
+
+        raise ValueError("Invalid output_format: {}".format(output_format))
+
+    @staticmethod
+    def _as_geometry_array(geometry):
+        """Convert geometry into a numpy array of Shapely geometries.
+
+        Parameters
+        ----------
+        geometry
+            An array-like of Shapely geometries, a GeoPandas GeoSeries/GeometryArray,
+            shapely.geometry or list of shapely geometries.
+
+        Returns
+        -------
+        np.ndarray
+            A numpy array of Shapely geometries.
+        """
+        if isinstance(geometry, np.ndarray):
+            return array.from_shapely(geometry)._data
+        elif isinstance(geometry, geoseries.GeoSeries):
+            return geometry.values._data
+        elif isinstance(geometry, array.GeometryArray):
+            return geometry._data
+        elif isinstance(geometry, BaseGeometry):
+            return geometry
+        elif geometry is None:
+            return None
+        else:
+            return np.asarray(geometry)
+
+    def nearest(
+        self,
+        geometry,
+        return_all=True,
+        max_distance=None,
+        return_distance=False,
+        exclusive=False,
+    ):
+        """
+        Return the nearest geometry in the tree for each input geometry in
+        ``geometry``.
+
+        If multiple tree geometries have the same distance from an input geometry,
+        multiple results will be returned for that input geometry by default.
+        Specify ``return_all=False`` to only get a single nearest geometry
+        (non-deterministic which nearest is returned).
+
+        In the context of a spatial join, input geometries are the "left"
+        geometries that determine the order of the results, and tree geometries
+        are "right" geometries that are joined against the left geometries.
+        If ``max_distance`` is not set, this will effectively be a left join
+        because every geometry in ``geometry`` will have a nearest geometry in
+        the tree. However, if ``max_distance`` is used, this becomes an
+        inner join, since some geometries in ``geometry`` may not have a match
+        in the tree.
+
+        For performance reasons, it is highly recommended that you set
+        the ``max_distance`` parameter.
+
+        Parameters
+        ----------
+        geometry : {shapely.geometry, GeoSeries, GeometryArray, numpy.array of Shapely \
+geometries}
+            A single shapely geometry, one of the GeoPandas geometry iterables
+            (GeoSeries, GeometryArray), or a numpy array of Shapely geometries to query
+            against the spatial index.
+        return_all : bool, default True
+            If there are multiple equidistant or intersecting nearest
+            geometries, return all those geometries instead of a single
+            nearest geometry.
+        max_distance : float, optional
+            Maximum distance within which to query for nearest items in tree.
+            Must be greater than 0. By default None, indicating no distance limit.
+        return_distance : bool, optional
+            If True, will return distances in addition to indexes. By default False
+        exclusive : bool, optional
+            if True, the nearest geometries that are equal to the input geometry
+            will not be returned. By default False.  Requires Shapely >= 2.0.
+
+        Returns
+        -------
+        Indices or tuple of (indices, distances)
+            Indices is an ndarray of shape (2,n) and distances (if present) an
+            ndarray of shape (n).
+            The first subarray of indices contains input geometry indices.
+            The second subarray of indices contains tree geometry indices.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point, box
+        >>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
+        >>> s.head()
+        0    POINT (0 0)
+        1    POINT (1 1)
+        2    POINT (2 2)
+        3    POINT (3 3)
+        4    POINT (4 4)
+        dtype: geometry
+
+        >>> s.sindex.nearest(Point(1, 1))
+        array([[0],
+               [1]])
+
+        >>> s.sindex.nearest([box(4.9, 4.9, 5.1, 5.1)])
+        array([[0],
+               [5]])
+
+        >>> s2 = geopandas.GeoSeries(geopandas.points_from_xy([7.6, 10], [7.6, 10]))
+        >>> s2
+        0    POINT (7.6 7.6)
+        1    POINT (10 10)
+        dtype: geometry
+
+        >>> s.sindex.nearest(s2)
+        array([[0, 1],
+               [8, 9]])
+        """
+        geometry = self._as_geometry_array(geometry)
+        if isinstance(geometry, BaseGeometry) or geometry is None:
+            geometry = [geometry]
+
+        result = self._tree.query_nearest(
+            geometry,
+            max_distance=max_distance,
+            return_distance=return_distance,
+            all_matches=return_all,
+            exclusive=exclusive,
+        )
+        if return_distance:
+            indices, distances = result
+        else:
+            indices = result
+
+        if return_distance:
+            return indices, distances
+        else:
+            return indices
+
+    def intersection(self, coordinates):
+        """Compatibility wrapper for rtree.index.Index.intersection,
+        use ``query`` instead.
+
+        Parameters
+        ----------
+        coordinates : sequence or array
+            Sequence of the form (min_x, min_y, max_x, max_y)
+            to query a rectangle or (x, y) to query a point.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point, box
+        >>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
+        >>> s
+        0    POINT (0 0)
+        1    POINT (1 1)
+        2    POINT (2 2)
+        3    POINT (3 3)
+        4    POINT (4 4)
+        5    POINT (5 5)
+        6    POINT (6 6)
+        7    POINT (7 7)
+        8    POINT (8 8)
+        9    POINT (9 9)
+        dtype: geometry
+
+        >>> s.sindex.intersection(box(1, 1, 3, 3).bounds)
+        array([1, 2, 3])
+
+        Alternatively, you can use ``query``:
+
+        >>> s.sindex.query(box(1, 1, 3, 3))
+        array([1, 2, 3])
+
+        """
+        # TODO: we should deprecate this
+        # convert bounds to geometry
+        # the old API uses tuples of bound, but Shapely uses geometries
+        try:
+            iter(coordinates)
+        except TypeError:
+            # likely not an iterable
+            # this is a check that rtree does, we mimic it
+            # to ensure a useful failure message
+            raise TypeError(
+                "Invalid coordinates, must be iterable in format "
+                "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
+                "Got `coordinates` = {}.".format(coordinates)
+            )
+
+        # need to convert tuple of bounds to a geometry object
+        if len(coordinates) == 4:
+            indexes = self._tree.query(shapely.box(*coordinates))
+        elif len(coordinates) == 2:
+            indexes = self._tree.query(shapely.points(*coordinates))
+        else:
+            raise TypeError(
+                "Invalid coordinates, must be iterable in format "
+                "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
+                "Got `coordinates` = {}.".format(coordinates)
+            )
+
+        return indexes
+
+    @property
+    def size(self):
+        """Size of the spatial index
+
+        Number of leaves (input geometries) in the index.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point
+        >>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
+        >>> s
+        0    POINT (0 0)
+        1    POINT (1 1)
+        2    POINT (2 2)
+        3    POINT (3 3)
+        4    POINT (4 4)
+        5    POINT (5 5)
+        6    POINT (6 6)
+        7    POINT (7 7)
+        8    POINT (8 8)
+        9    POINT (9 9)
+        dtype: geometry
+
+        >>> s.sindex.size
+        10
+        """
+        return len(self._tree)
+
+    @property
+    def is_empty(self):
+        """Check if the spatial index is empty
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point
+        >>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
+        >>> s
+        0    POINT (0 0)
+        1    POINT (1 1)
+        2    POINT (2 2)
+        3    POINT (3 3)
+        4    POINT (4 4)
+        5    POINT (5 5)
+        6    POINT (6 6)
+        7    POINT (7 7)
+        8    POINT (8 8)
+        9    POINT (9 9)
+        dtype: geometry
+
+        >>> s.sindex.is_empty
+        False
+
+        >>> s2 = geopandas.GeoSeries()
+        >>> s2.sindex.is_empty
+        True
+        """
+        return len(self._tree) == 0
+
+    def __len__(self):
+        return len(self._tree)
@@ -0,0 +1,358 @@
+"""
+Testing functionality for geopandas objects.
+"""
+
+import warnings
+
+import pandas as pd
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas.array import GeometryDtype
+
+
+def _isna(this):
+    """isna version that works for both scalars and (Geo)Series"""
+    with warnings.catch_warnings():
+        # GeoSeries.isna will raise a warning about no longer returning True
+        # for empty geometries. This helper is used below always in combination
+        # with an is_empty check to preserve behaviour, and thus we ignore the
+        # warning here to avoid it bubbling up to the user
+        warnings.filterwarnings(
+            "ignore", r"GeoSeries.isna\(\) previously returned", UserWarning
+        )
+        if hasattr(this, "isna"):
+            return this.isna()
+        elif hasattr(this, "isnull"):
+            return this.isnull()
+        else:
+            return pd.isnull(this)
+
+
+def _geom_equals_mask(this, that):
+    """
+    Test for geometric equality. Empty or missing geometries are considered
+    equal.
+
+    Parameters
+    ----------
+    this, that : arrays of Geo objects (or anything that has an `is_empty`
+                 attribute)
+
+    Returns
+    -------
+    Series
+        boolean Series, True if geometries in left equal geometries in right
+    """
+
+    return (
+        this.geom_equals(that)
+        | (this.is_empty & that.is_empty)
+        | (_isna(this) & _isna(that))
+    )
+
+
+def geom_equals(this, that):
+    """
+    Test for geometric equality. Empty or missing geometries are considered
+    equal.
+
+    Parameters
+    ----------
+    this, that : arrays of Geo objects (or anything that has an `is_empty`
+                 attribute)
+
+    Returns
+    -------
+    bool
+        True if all geometries in left equal geometries in right
+    """
+
+    return _geom_equals_mask(this, that).all()
+
+
+def _geom_almost_equals_mask(this, that):
+    """
+    Test for 'almost' geometric equality. Empty or missing geometries
+    considered equal.
+
+    This method allows small difference in the coordinates, but this
+    requires coordinates be in the same order for all components of a geometry.
+
+    Parameters
+    ----------
+    this, that : arrays of Geo objects
+
+    Returns
+    -------
+    Series
+        boolean Series, True if geometries in left almost equal geometries in right
+    """
+
+    return (
+        this.geom_equals_exact(that, tolerance=0.5 * 10 ** (-6))
+        | (this.is_empty & that.is_empty)
+        | (_isna(this) & _isna(that))
+    )
+
+
+def geom_almost_equals(this, that):
+    """
+    Test for 'almost' geometric equality. Empty or missing geometries
+    considered equal.
+
+    This method allows small difference in the coordinates, but this
+    requires coordinates be in the same order for all components of a geometry.
+
+    Parameters
+    ----------
+    this, that : arrays of Geo objects (or anything that has an `is_empty`
+                 property)
+
+    Returns
+    -------
+    bool
+        True if all geometries in left almost equal geometries in right
+    """
+    if isinstance(this, GeoDataFrame) and isinstance(that, GeoDataFrame):
+        this = this.geometry
+        that = that.geometry
+
+    return _geom_almost_equals_mask(this, that).all()
+
+
+def assert_geoseries_equal(
+    left,
+    right,
+    check_dtype=True,
+    check_index_type=False,
+    check_series_type=True,
+    check_less_precise=False,
+    check_geom_type=False,
+    check_crs=True,
+    normalize=False,
+):
+    """
+    Test util for checking that two GeoSeries are equal.
+
+    Parameters
+    ----------
+    left, right : two GeoSeries
+    check_dtype : bool, default False
+        If True, check geo dtype [only included so it's a drop-in replacement
+        for assert_series_equal].
+    check_index_type : bool, default False
+        Check that index types are equal.
+    check_series_type : bool, default True
+        Check that both are same type (*and* are GeoSeries). If False,
+        will attempt to convert both into GeoSeries.
+    check_less_precise : bool, default False
+        If True, use geom_equals_exact with relative error of 0.5e-6.
+        If False, use geom_equals.
+    check_geom_type : bool, default False
+        If True, check that all the geom types are equal.
+    check_crs: bool, default True
+        If `check_series_type` is True, then also check that the
+        crs matches.
+    normalize: bool, default False
+        If True, normalize the geometries before comparing equality.
+        Typically useful with ``check_less_precise=True``, which uses
+        ``geom_equals_exact`` and requires exact coordinate order.
+    """
+    assert len(left) == len(right), "%d != %d" % (len(left), len(right))
+
+    if check_dtype:
+        msg = "dtype should be a GeometryDtype, got {0}"
+        assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype)
+        assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype)
+
+    if check_index_type:
+        assert isinstance(left.index, type(right.index))
+
+    if check_series_type:
+        assert isinstance(left, GeoSeries)
+        assert isinstance(left, type(right))
+
+        if check_crs:
+            assert left.crs == right.crs
+    else:
+        if not isinstance(left, GeoSeries):
+            left = GeoSeries(left)
+        if not isinstance(right, GeoSeries):
+            right = GeoSeries(right, index=left.index)
+
+    assert left.index.equals(right.index), "index: %s != %s" % (left.index, right.index)
+
+    if check_geom_type:
+        assert (left.geom_type == right.geom_type).all(), "type: %s != %s" % (
+            left.geom_type,
+            right.geom_type,
+        )
+
+    if normalize:
+        left = GeoSeries(left.array.normalize())
+        right = GeoSeries(right.array.normalize())
+
+    if not check_crs:
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "CRS mismatch", UserWarning)
+            _check_equality(left, right, check_less_precise)
+    else:
+        _check_equality(left, right, check_less_precise)
+
+
+def _truncated_string(geom):
+    """Truncated WKT repr of geom"""
+    s = str(geom)
+    if len(s) > 100:
+        return s[:100] + "..."
+    else:
+        return s
+
+
+def _check_equality(left, right, check_less_precise):
+    assert_error_message = (
+        "{0} out of {1} geometries are not {3}equal.\n"
+        "Indices where geometries are not {3}equal: {2} \n"
+        "The first not {3}equal geometry:\n"
+        "Left: {4}\n"
+        "Right: {5}\n"
+    )
+    if check_less_precise:
+        precise = "almost "
+        equal = _geom_almost_equals_mask(left, right)
+    else:
+        precise = ""
+        equal = _geom_equals_mask(left, right)
+
+    if not equal.all():
+        unequal_left_geoms = left[~equal]
+        unequal_right_geoms = right[~equal]
+        raise AssertionError(
+            assert_error_message.format(
+                len(unequal_left_geoms),
+                len(left),
+                unequal_left_geoms.index.to_list(),
+                precise,
+                _truncated_string(unequal_left_geoms.iloc[0]),
+                _truncated_string(unequal_right_geoms.iloc[0]),
+            )
+        )
+
+
+def assert_geodataframe_equal(
+    left,
+    right,
+    check_dtype=True,
+    check_index_type="equiv",
+    check_column_type="equiv",
+    check_frame_type=True,
+    check_like=False,
+    check_less_precise=False,
+    check_geom_type=False,
+    check_crs=True,
+    normalize=False,
+):
+    """
+    Check that two GeoDataFrames are equal/
+
+    Parameters
+    ----------
+    left, right : two GeoDataFrames
+    check_dtype : bool, default True
+        Whether to check the DataFrame dtype is identical.
+    check_index_type, check_column_type : bool, default 'equiv'
+        Check that index types are equal.
+    check_frame_type : bool, default True
+        Check that both are same type (*and* are GeoDataFrames). If False,
+        will attempt to convert both into GeoDataFrame.
+    check_like : bool, default False
+        If true, ignore the order of rows & columns
+    check_less_precise : bool, default False
+        If True, use geom_equals_exact. if False, use geom_equals.
+    check_geom_type : bool, default False
+        If True, check that all the geom types are equal.
+    check_crs: bool, default True
+        If `check_frame_type` is True, then also check that the
+        crs matches.
+    normalize: bool, default False
+        If True, normalize the geometries before comparing equality.
+        Typically useful with ``check_less_precise=True``, which uses
+        ``geom_equals_exact`` and requires exact coordinate order.
+    """
+    try:
+        # added from pandas 0.20
+        from pandas.testing import assert_frame_equal, assert_index_equal
+    except ImportError:
+        from pandas.util.testing import assert_frame_equal, assert_index_equal
+
+    # instance validation
+    if check_frame_type:
+        assert isinstance(left, GeoDataFrame)
+        assert isinstance(left, type(right))
+
+        if check_crs:
+            # allow if neither left and right has an active geometry column
+            if (
+                left._geometry_column_name is None
+                and right._geometry_column_name is None
+            ):
+                pass
+            elif (
+                left._geometry_column_name not in left.columns
+                and right._geometry_column_name not in right.columns
+            ):
+                pass
+            # no crs can be either None or {}
+            elif not left.crs and not right.crs:
+                pass
+            else:
+                assert left.crs == right.crs
+    else:
+        if not isinstance(left, GeoDataFrame):
+            left = GeoDataFrame(left)
+        if not isinstance(right, GeoDataFrame):
+            right = GeoDataFrame(right)
+
+    # shape comparison
+    assert left.shape == right.shape, (
+        "GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n"
+        "Left columns: {lcols!r}, right columns: {rcols!r}"
+    ).format(
+        lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns
+    )
+
+    if check_like:
+        left = left.reindex_like(right)
+
+    # column comparison
+    assert_index_equal(
+        left.columns, right.columns, exact=check_column_type, obj="GeoDataFrame.columns"
+    )
+
+    # geometry comparison
+    for col, dtype in left.dtypes.items():
+        if isinstance(dtype, GeometryDtype):
+            assert_geoseries_equal(
+                left[col],
+                right[col],
+                normalize=normalize,
+                check_dtype=check_dtype,
+                check_less_precise=check_less_precise,
+                check_geom_type=check_geom_type,
+                check_crs=check_crs,
+            )
+
+    # ensure the active geometry column is the same
+    assert left._geometry_column_name == right._geometry_column_name
+
+    # drop geometries and check remaining columns
+    left2 = left.select_dtypes(exclude="geometry")
+    right2 = right.select_dtypes(exclude="geometry")
+    assert_frame_equal(
+        left2,
+        right2,
+        check_dtype=check_dtype,
+        check_index_type=check_index_type,
+        check_column_type=check_column_type,
+        obj="GeoDataFrame",
+    )
@@ -0,0 +1,9 @@
+{
+"type": "FeatureCollection",
+"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
+                                                                                
+"features": [
+{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
+{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
+]
+}
@@ -0,0 +1,38 @@
+import subprocess
+import sys
+
+
+def test_no_additional_imports():
+    # test that 'import geopandas' does not import any of the optional or
+    # development dependencies
+    blacklist = {
+        "pytest",
+        "py",
+        "ipython",
+        # fiona actually gets imported if installed (but error suppressed until used)
+        # "fiona",
+        # "matplotlib",  # matplotlib gets imported by pandas, see below
+        "mapclassify",
+        "sqlalchemy",
+        "psycopg",
+        "psycopg2",
+        "geopy",
+        "geoalchemy2",
+        "matplotlib",
+    }
+
+    code = """
+import sys
+import geopandas
+blacklist = {0!r}
+
+mods = blacklist & set(m.split('.')[0] for m in sys.modules)
+if mods:
+    sys.stderr.write('err: geopandas should not import: {{}}'.format(', '.join(mods)))
+    sys.exit(len(mods))
+""".format(
+        blacklist
+    )
+    call = [sys.executable, "-c", code]
+    returncode = subprocess.run(call, check=False).returncode
+    assert returncode == 0
@@ -0,0 +1,30 @@
+from geopandas._compat import import_optional_dependency
+
+import pytest
+
+
+def test_import_optional_dependency_present():
+    # pandas is not optional, but we know it is present
+    pandas = import_optional_dependency("pandas")
+    assert pandas is not None
+
+    # module imported normally must be same
+    import pandas as pd
+
+    assert pandas == pd
+
+
+def test_import_optional_dependency_absent():
+    with pytest.raises(ImportError, match="Missing optional dependency 'foo'"):
+        import_optional_dependency("foo")
+
+    with pytest.raises(ImportError, match="foo is required"):
+        import_optional_dependency("foo", extra="foo is required")
+
+
+@pytest.mark.parametrize(
+    "bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}]
+)
+def test_import_optional_dependency_invalid(bad_import):
+    with pytest.raises(ValueError, match="Invalid module name"):
+        import_optional_dependency(bad_import)
@@ -0,0 +1,47 @@
+import geopandas
+
+import pytest
+
+
+def test_options():
+    assert "display_precision: " in repr(geopandas.options)
+
+    assert set(dir(geopandas.options)) == {
+        "display_precision",
+        "use_pygeos",
+        "io_engine",
+    }
+
+    with pytest.raises(AttributeError):
+        geopandas.options.non_existing_option
+
+    with pytest.raises(AttributeError):
+        geopandas.options.non_existing_option = 10
+
+
+def test_options_display_precision():
+    assert geopandas.options.display_precision is None
+    geopandas.options.display_precision = 5
+    assert geopandas.options.display_precision == 5
+
+    with pytest.raises(ValueError):
+        geopandas.options.display_precision = "abc"
+
+    with pytest.raises(ValueError):
+        geopandas.options.display_precision = -1
+
+    geopandas.options.display_precision = None
+
+
+def test_options_io_engine():
+    assert geopandas.options.io_engine is None
+    geopandas.options.io_engine = "pyogrio"
+    assert geopandas.options.io_engine == "pyogrio"
+
+    with pytest.raises(ValueError):
+        geopandas.options.io_engine = "abc"
+
+    with pytest.raises(ValueError):
+        geopandas.options.io_engine = -1
+
+    geopandas.options.io_engine = None
@@ -0,0 +1,747 @@
+import random
+import warnings
+
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import LineString, Point, Polygon
+
+from geopandas import GeoDataFrame, GeoSeries, points_from_xy, read_file
+from geopandas.array import GeometryArray, from_shapely, from_wkb, from_wkt
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+pyproj = pytest.importorskip("pyproj")
+
+
+def _create_df(x, y=None, crs=None):
+    y = y or x
+    x = np.asarray(x)
+    y = np.asarray(y)
+
+    return GeoDataFrame(
+        {"geometry": points_from_xy(x, y), "value1": x + y, "value2": x * y}, crs=crs
+    )
+
+
+def df_epsg26918():
+    # EPSG:26918
+    # Center coordinates
+    # -1683723.64 6689139.23
+    return _create_df(
+        x=range(-1683723, -1683723 + 10, 1),
+        y=range(6689139, 6689139 + 10, 1),
+        crs="epsg:26918",
+    )
+
+
+def test_to_crs_transform():
+    df = df_epsg26918()
+    lonlat = df.to_crs(epsg=4326)
+    utm = lonlat.to_crs(epsg=26918)
+    assert_geodataframe_equal(df, utm, check_less_precise=True)
+
+
+def test_to_crs_transform__missing_data():
+    # https://github.com/geopandas/geopandas/issues/1573
+    df = df_epsg26918()
+    df.loc[3, "geometry"] = None
+    lonlat = df.to_crs(epsg=4326)
+    utm = lonlat.to_crs(epsg=26918)
+    assert_geodataframe_equal(df, utm, check_less_precise=True)
+
+
+def test_to_crs_transform__empty_data():
+    df = df_epsg26918().iloc[:0]
+    lonlat = df.to_crs(epsg=4326)
+    utm = lonlat.to_crs(epsg=26918)
+    assert_geodataframe_equal(df, utm, check_less_precise=True)
+
+
+def test_to_crs_inplace():
+    df = df_epsg26918()
+    lonlat = df.to_crs(epsg=4326)
+    df.to_crs(epsg=4326, inplace=True)
+    assert_geodataframe_equal(df, lonlat, check_less_precise=True)
+
+
+def test_to_crs_geo_column_name():
+    # Test to_crs() with different geometry column name (GH#339)
+    df = df_epsg26918()
+    df = df.rename(columns={"geometry": "geom"})
+    df.set_geometry("geom", inplace=True)
+    lonlat = df.to_crs(epsg=4326)
+    utm = lonlat.to_crs(epsg=26918)
+    assert lonlat.geometry.name == "geom"
+    assert utm.geometry.name == "geom"
+    assert_geodataframe_equal(df, utm, check_less_precise=True)
+
+
+def test_to_crs_dimension_z():
+    # preserve z dimension
+    arr = points_from_xy([1, 2], [2, 3], [3, 4], crs=4326)
+    assert arr.has_z.all()
+    result = arr.to_crs(epsg=3857)
+    assert result.has_z.all()
+
+
+# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
+# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
+@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
+def test_to_crs_dimension_mixed():
+    s = GeoSeries([Point(1, 2), LineString([(1, 2, 3), (4, 5, 6)])], crs=2056)
+    result = s.to_crs(epsg=4326)
+    assert not result[0].is_empty
+    assert result.has_z.tolist() == [False, True]
+    roundtrip = result.to_crs(epsg=2056)
+    # TODO replace with assert_geoseries_equal once we expose tolerance keyword
+    # assert_geoseries_equal(roundtrip, s, check_less_precise=True)
+    for a, b in zip(roundtrip, s):
+        np.testing.assert_allclose(a.coords[:], b.coords[:], atol=0.01)
+
+
+# -----------------------------------------------------------------------------
+# Test different supported formats for CRS specification
+
+
+@pytest.fixture(
+    params=[
+        4326,
+        "epsg:4326",
+        pytest.param(
+            {"init": "epsg:4326"},
+        ),
+        "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs",
+        {"proj": "latlong", "ellps": "WGS84", "datum": "WGS84", "no_defs": True},
+    ],
+    ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
+)
+def epsg4326(request):
+    if isinstance(request.param, int):
+        return {"epsg": request.param}
+    return {"crs": request.param}
+
+
+@pytest.fixture(
+    params=[
+        26918,
+        "epsg:26918",
+        pytest.param(
+            {"init": "epsg:26918", "no_defs": True},
+        ),
+        "+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ",
+        {"proj": "utm", "zone": 18, "datum": "NAD83", "units": "m", "no_defs": True},
+    ],
+    ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
+)
+def epsg26918(request):
+    if isinstance(request.param, int):
+        return {"epsg": request.param}
+    return {"crs": request.param}
+
+
+@pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning")
+def test_transform2(epsg4326, epsg26918):
+    # with PROJ >= 7, the transformation using EPSG code vs proj4 string is
+    # slightly different due to use of grid files or not -> turn off network
+    # to not use grid files at all for this test
+    pyproj.network.set_network_enabled(False)
+
+    df = df_epsg26918()
+    lonlat = df.to_crs(**epsg4326)
+    utm = lonlat.to_crs(**epsg26918)
+    # can't check for CRS equality, as the formats differ although representing
+    # the same CRS
+    assert_geodataframe_equal(df, utm, check_less_precise=True, check_crs=False)
+
+
+# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
+# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
+@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
+def test_crs_axis_order__always_xy():
+    df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918")
+    lonlat = df.to_crs("epsg:4326")
+    test_lonlat = GeoDataFrame(
+        geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326"
+    )
+    assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
+
+
+def test_skip_exact_same():
+    df = df_epsg26918()
+    utm = df.to_crs(df.crs)
+    assert_geodataframe_equal(df, utm, check_less_precise=True)
+
+
+# Test CRS on GeometryArray level
+class TestGeometryArrayCRS:
+    def setup_method(self):
+        self.osgb = pyproj.CRS(27700)
+        self.wgs = pyproj.CRS(4326)
+
+        self.geoms = [Point(0, 0), Point(1, 1)]
+        self.polys = [
+            Polygon([(random.random(), random.random()) for i in range(3)])
+            for _ in range(10)
+        ]
+        self.arr = from_shapely(self.polys, crs=27700)
+
+    def test_array(self):
+        arr = from_shapely(self.geoms)
+        arr.crs = 27700
+        assert arr.crs == self.osgb
+
+        arr = from_shapely(self.geoms, crs=27700)
+        assert arr.crs == self.osgb
+
+        arr = GeometryArray(arr)
+        assert arr.crs == self.osgb
+
+        arr = GeometryArray(arr, crs=4326)
+        assert arr.crs == self.wgs
+
+    def test_series(self):
+        s = GeoSeries(crs=27700)
+        assert s.crs == self.osgb
+        assert s.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        assert s.crs == self.osgb
+        assert s.values.crs == self.osgb
+
+        # manually change CRS
+        s = s.set_crs(4326, allow_override=True)
+        assert s.crs == self.wgs
+        assert s.values.crs == self.wgs
+
+        s = GeoSeries(self.geoms, crs=27700)
+        assert s.crs == self.osgb
+        assert s.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(arr)
+        assert s.crs == self.osgb
+        assert s.values.crs == self.osgb
+
+        with pytest.raises(
+            ValueError,
+            match="CRS mismatch between CRS of the passed geometries and 'crs'",
+        ):
+            s = GeoSeries(arr, crs=4326)
+        assert s.crs == self.osgb
+
+    def test_dataframe(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        df = GeoDataFrame(geometry=arr)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        df = GeoDataFrame(geometry=s)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        # different passed CRS than array CRS is now an error
+        match_str = "CRS mismatch between CRS of the passed geometries and 'crs'"
+        with pytest.raises(ValueError, match=match_str):
+            df = GeoDataFrame(geometry=s, crs=4326)
+        with pytest.raises(ValueError, match=match_str):
+            GeoDataFrame(geometry=s, crs=4326)
+        with pytest.raises(ValueError, match=match_str):
+            GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326)
+        with pytest.raises(ValueError, match=match_str):
+            GeoDataFrame(df, crs=4326).crs
+
+        # manually change CRS
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        df = GeoDataFrame(geometry=s)
+        df = df.set_crs(crs="epsg:4326", allow_override=True)
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+
+        with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
+            GeoDataFrame(self.geoms, columns=["geom"], crs=27700)
+        with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
+            GeoDataFrame(crs=27700)
+
+        df = GeoDataFrame(self.geoms, columns=["geom"])
+        df = df.set_geometry("geom", crs=27700)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+        assert df.geom.crs == self.osgb
+        assert df.geom.values.crs == self.osgb
+
+        df = GeoDataFrame(geometry=self.geoms, crs=27700)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        # new geometry with set CRS has priority over GDF CRS
+        df = GeoDataFrame(geometry=self.geoms, crs=27700)
+        df = df.set_geometry(self.geoms, crs=4326)
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        df = GeoDataFrame()
+        df = df.set_geometry(s)
+        assert df._geometry_column_name == "geometry"
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms, crs=27700)
+        df = GeoDataFrame()
+        df = df.set_geometry(arr)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms)
+        df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+
+        arr = from_shapely(self.geoms, crs=4326)
+        df = GeoDataFrame({"col1": [1, 2], "geometry": arr})
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+
+        # geometry column name None on init
+        df = GeoDataFrame({"geometry": [0, 1]})
+        with pytest.raises(
+            ValueError,
+            match="Assigning CRS to a GeoDataFrame without a geometry",
+        ):
+            df.crs = 27700
+
+        # geometry column without geometry
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", "Geometry column does not contain geometry", UserWarning
+            )
+            df = GeoDataFrame({"geometry": [Point(0, 1)]}).assign(geometry=[0])
+        with pytest.raises(
+            ValueError,
+            match="Assigning CRS to a GeoDataFrame without an active geometry",
+        ):
+            df.crs = 27700
+        with pytest.raises(
+            AttributeError,
+            match="The CRS attribute of a GeoDataFrame without an active",
+        ):
+            assert df.crs == self.osgb
+
+    def test_dataframe_getitem_without_geometry_column(self):
+        df = GeoDataFrame({"col": range(10)}, geometry=self.arr)
+        df["geom2"] = df.geometry.centroid
+        subset = df[["col", "geom2"]]
+        with pytest.raises(
+            AttributeError,
+            match="The CRS attribute of a GeoDataFrame without an active",
+        ):
+            assert subset.crs == self.osgb
+
+    def test_dataframe_setitem(self):
+        # new geometry CRS has priority over GDF CRS
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        df = GeoDataFrame()
+        with pytest.warns(
+            FutureWarning, match="You are adding a column named 'geometry'"
+        ):
+            df["geometry"] = s
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        arr = from_shapely(self.geoms, crs=27700)
+        df = GeoDataFrame()
+        with pytest.warns(
+            FutureWarning, match="You are adding a column named 'geometry'"
+        ):
+            df["geometry"] = arr
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        # test to_crs case (GH1960)
+        arr = from_shapely(self.geoms)
+        df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
+        df["geometry"] = df["geometry"].to_crs(27700)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        # test changing geometry crs not in the geometry column doesn't change the crs
+        arr = from_shapely(self.geoms)
+        df = GeoDataFrame(
+            {"col1": [1, 2], "geometry": arr, "other_geom": arr}, crs=4326
+        )
+        df["other_geom"] = from_shapely(self.geoms, crs=27700)
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df["geometry"].crs == self.wgs
+        assert df["other_geom"].crs == self.osgb
+
+    def test_dataframe_setitem_without_geometry_column(self):
+        arr = from_shapely(self.geoms)
+        df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
+
+        # override geometry with non geometry
+        with pytest.warns(UserWarning):
+            df["geometry"] = 1
+
+        # assigning a list of geometry object doesn't have cached access to 4326
+        df["geometry"] = self.geoms
+        assert df.crs is None
+
+    @pytest.mark.parametrize(
+        "scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])]
+    )
+    def test_scalar(self, scalar):
+        df = GeoDataFrame()
+        with pytest.warns(
+            FutureWarning, match="You are adding a column named 'geometry'"
+        ):
+            df["geometry"] = scalar
+        df = df.set_crs(4326)
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+
+    @pytest.mark.filterwarnings("ignore:Accessing CRS")
+    def test_crs_with_no_geom_fails(self):
+        with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
+            df = GeoDataFrame()
+            df.crs = 4326
+
+    def test_read_file(self, nybb_filename):
+        df = read_file(nybb_filename)
+        assert df.crs == pyproj.CRS(2263)
+        assert df.geometry.crs == pyproj.CRS(2263)
+        assert df.geometry.values.crs == pyproj.CRS(2263)
+
+    def test_multiple_geoms(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame(s, geometry=arr, columns=["col1"])
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+        assert df.col1.crs == self.wgs
+        assert df.col1.values.crs == self.wgs
+
+    def test_multiple_geoms_set_geom(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame(s, geometry=arr, columns=["col1"])
+        df = df.set_geometry("col1")
+        assert df.crs == self.wgs
+        assert df.geometry.crs == self.wgs
+        assert df.geometry.values.crs == self.wgs
+        assert df["geometry"].crs == self.osgb
+        assert df["geometry"].values.crs == self.osgb
+
+    def test_assign_cols(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame(s, geometry=arr, columns=["col1"])
+        df["geom2"] = s
+        df["geom3"] = s.values
+        df["geom4"] = from_shapely(self.geoms)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+        assert df.geom2.crs == self.wgs
+        assert df.geom2.values.crs == self.wgs
+        assert df.geom3.crs == self.wgs
+        assert df.geom3.values.crs == self.wgs
+        assert df.geom4.crs is None
+        assert df.geom4.values.crs is None
+
+    def test_copy(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame(s, geometry=arr, columns=["col1"])
+
+        arr_copy = arr.copy()
+        assert arr_copy.crs == arr.crs
+
+        s_copy = s.copy()
+        assert s_copy.crs == s.crs
+        assert s_copy.values.crs == s.values.crs
+
+        df_copy = df.copy()
+        assert df_copy.crs == df.crs
+        assert df_copy.geometry.crs == df.geometry.crs
+        assert df_copy.geometry.values.crs == df.geometry.values.crs
+        assert df_copy.col1.crs == df.col1.crs
+        assert df_copy.col1.values.crs == df.col1.values.crs
+
+    def test_rename(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame(s, geometry=arr, columns=["col1"])
+        df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        df = df.rename_geometry("geom2")
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        df = df.rename(columns={"col1": "column1"})
+        assert df.column1.crs == self.wgs
+        assert df.column1.values.crs == self.wgs
+
+    def test_geoseries_to_crs(self):
+        s = GeoSeries(self.geoms, crs=27700)
+        s = s.to_crs(4326)
+        assert s.crs == self.wgs
+        assert s.values.crs == self.wgs
+
+        df = GeoDataFrame(geometry=s)
+        assert df.crs == self.wgs
+        df = df.to_crs(27700)
+        assert df.crs == self.osgb
+        assert df.geometry.crs == self.osgb
+        assert df.geometry.values.crs == self.osgb
+
+        # make sure that only active geometry is transformed
+        arr = from_shapely(self.geoms, crs=4326)
+        df["col1"] = arr
+        df = df.to_crs(3857)
+        assert df.col1.crs == self.wgs
+        assert df.col1.values.crs == self.wgs
+
+    def test_array_to_crs(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        arr = arr.to_crs(4326)
+        assert arr.crs == self.wgs
+
+    def test_from_shapely(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        assert arr.crs == self.osgb
+
+    def test_from_wkb(self):
+        L_wkb = [p.wkb for p in self.geoms]
+        arr = from_wkb(L_wkb, crs=27700)
+        assert arr.crs == self.osgb
+
+    def test_from_wkt(self):
+        L_wkt = [p.wkt for p in self.geoms]
+        arr = from_wkt(L_wkt, crs=27700)
+        assert arr.crs == self.osgb
+
+    def test_points_from_xy(self):
+        df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
+        arr = points_from_xy(df["x"], df["y"], crs=27700)
+        assert arr.crs == self.osgb
+
+    # setting CRS in GeoSeries should not set it in passed array without CRS
+    def test_original(self):
+        arr = from_shapely(self.geoms)
+        s = GeoSeries(arr, crs=27700)
+        assert arr.crs is None
+        assert s.crs == self.osgb
+
+    def test_ops(self):
+        arr = self.arr
+        bound = arr.boundary
+        assert bound.crs == self.osgb
+
+        cent = arr.centroid
+        assert cent.crs == self.osgb
+
+        hull = arr.convex_hull
+        assert hull.crs == self.osgb
+
+        envelope = arr.envelope
+        assert envelope.crs == self.osgb
+
+        exterior = arr.exterior
+        assert exterior.crs == self.osgb
+
+        representative_point = arr.representative_point()
+        assert representative_point.crs == self.osgb
+
+    def test_binary_ops(self):
+        arr = self.arr
+        quads = []
+        while len(quads) < 10:
+            geom = Polygon([(random.random(), random.random()) for i in range(4)])
+            if geom.is_valid:
+                quads.append(geom)
+
+        arr2 = from_shapely(quads, crs=27700)
+
+        difference = arr.difference(arr2)
+        assert difference.crs == self.osgb
+
+        intersection = arr.intersection(arr2)
+        assert intersection.crs == self.osgb
+
+        symmetric_difference = arr.symmetric_difference(arr2)
+        assert symmetric_difference.crs == self.osgb
+
+        union = arr.union(arr2)
+        assert union.crs == self.osgb
+
+    def test_other(self):
+        arr = self.arr
+
+        buffer = arr.buffer(5)
+        assert buffer.crs == self.osgb
+
+        interpolate = arr.exterior.interpolate(0.1)
+        assert interpolate.crs == self.osgb
+
+        simplify = arr.simplify(5)
+        assert simplify.crs == self.osgb
+
+    @pytest.mark.parametrize(
+        "attr, arg",
+        [
+            ("affine_transform", ([0, 1, 1, 0, 0, 0],)),
+            ("translate", ()),
+            ("rotate", (10,)),
+            ("scale", ()),
+            ("skew", ()),
+        ],
+    )
+    def test_affinity_methods(self, attr, arg):
+        result = getattr(self.arr, attr)(*arg)
+
+        assert result.crs == self.osgb
+
+    def test_slice(self):
+        s = GeoSeries(self.arr, crs=27700)
+        assert s.iloc[1:].values.crs == self.osgb
+
+        df = GeoDataFrame({"col1": self.arr}, geometry=s)
+        assert df.iloc[1:].geometry.values.crs == self.osgb
+        assert df.iloc[1:].col1.values.crs == self.osgb
+
+    def test_concat(self):
+        s = GeoSeries(self.arr, crs=27700)
+        assert pd.concat([s, s]).values.crs == self.osgb
+
+        df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s)
+        assert pd.concat([df, df]).geometry.values.crs == self.osgb
+        assert pd.concat([df, df]).col1.values.crs == self.wgs
+
+    def test_merge(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        s = GeoSeries(self.geoms, crs=4326)
+        df = GeoDataFrame({"col1": s}, geometry=arr)
+        df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom")
+        merged = df.merge(df2, left_index=True, right_index=True)
+        assert merged.col1.values.crs == self.wgs
+        assert merged.geometry.values.crs == self.osgb
+        assert merged.col2.values.crs == self.wgs
+        assert merged.geom.values.crs == self.osgb
+        assert merged.crs == self.osgb
+
+    # make sure that geometry column from list has CRS (__setitem__)
+    def test_setitem_geometry(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
+
+        df["geometry"] = list(df.geometry)
+        assert df.geometry.values.crs == self.osgb
+
+        df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
+        df2["geometry"] = from_shapely(self.geoms, crs=4326)
+        assert df2.geometry.values.crs == self.wgs
+
+    def test_astype(self):
+        arr = from_shapely(self.geoms, crs=27700)
+        df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
+        df2 = df.astype({"col1": str})
+        assert df2.crs == self.osgb
+
+    def test_apply(self):
+        s = GeoSeries(self.arr)
+        assert s.crs == 27700
+
+        # apply preserves the CRS if the result is a GeoSeries
+        result = s.apply(lambda x: x.centroid)
+        assert result.crs == 27700
+
+    def test_apply_geodataframe(self):
+        df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700)
+        assert df.crs == 27700
+
+        # apply preserves the CRS if the result is a GeoDataFrame
+        result = df.apply(lambda col: col, axis=0)
+        assert result.crs == 27700
+        result = df.apply(lambda row: row, axis=1)
+        assert result.crs == 27700
+
+
+class TestSetCRS:
+    @pytest.mark.parametrize(
+        "constructor",
+        [
+            lambda geoms, crs: GeoSeries(geoms, crs=crs),
+            lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs),
+        ],
+        ids=["geoseries", "geodataframe"],
+    )
+    def test_set_crs(self, constructor):
+        naive = constructor([Point(0, 0), Point(1, 1)], crs=None)
+        assert naive.crs is None
+
+        # by default returns a copy
+        result = naive.set_crs(crs="EPSG:4326")
+        assert result.crs == "EPSG:4326"
+        assert naive.crs is None
+
+        result = naive.set_crs(epsg=4326)
+        assert result.crs == "EPSG:4326"
+        assert naive.crs is None
+
+        # with inplace=True
+        result = naive.set_crs(crs="EPSG:4326", inplace=True)
+        assert result is naive
+        assert result.crs == naive.crs == "EPSG:4326"
+
+        # raise for non-naive when crs would be overridden
+        non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326")
+        assert non_naive.crs == "EPSG:4326"
+        with pytest.raises(ValueError, match="already has a CRS"):
+            non_naive.set_crs("EPSG:3857")
+
+        # allow for equal crs
+        result = non_naive.set_crs("EPSG:4326")
+        assert result.crs == "EPSG:4326"
+
+        # replace with allow_override=True
+        result = non_naive.set_crs("EPSG:3857", allow_override=True)
+        assert non_naive.crs == "EPSG:4326"
+        assert result.crs == "EPSG:3857"
+
+        result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True)
+        assert non_naive.crs == "EPSG:3857"
+        assert result.crs == "EPSG:3857"
+
+        # set CRS to None
+        result = non_naive.set_crs(crs=None, allow_override=True)
+        assert result.crs is None
+        assert non_naive.crs == "EPSG:3857"
@@ -0,0 +1,15 @@
+from geopandas import GeoDataFrame, read_file
+from geopandas.datasets import get_path
+
+import pytest
+
+
+@pytest.mark.parametrize(
+    "test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb", "foo"]
+)
+def test_read_paths(test_dataset):
+    with pytest.raises(
+        AttributeError,
+        match=r"The geopandas\.dataset has been deprecated and was removed",
+    ):
+        assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
@@ -0,0 +1,87 @@
+from textwrap import dedent
+
+from geopandas._decorator import doc
+
+
+@doc(method="cumsum", operation="sum")
+def cumsum(whatever):
+    """
+    This is the {method} method.
+
+    It computes the cumulative {operation}.
+    """
+
+
+@doc(
+    cumsum,
+    dedent(
+        """
+        Examples
+        --------
+
+        >>> cumavg([1, 2, 3])
+        2
+        """
+    ),
+    method="cumavg",
+    operation="average",
+)
+def cumavg(whatever): ...
+
+
+@doc(cumsum, method="cummax", operation="maximum")
+def cummax(whatever): ...
+
+
+@doc(cummax, method="cummin", operation="minimum")
+def cummin(whatever): ...
+
+
+def test_docstring_formatting():
+    docstr = dedent(
+        """
+        This is the cumsum method.
+
+        It computes the cumulative sum.
+        """
+    )
+    assert cumsum.__doc__ == docstr
+
+
+def test_docstring_appending():
+    docstr = dedent(
+        """
+        This is the cumavg method.
+
+        It computes the cumulative average.
+
+        Examples
+        --------
+
+        >>> cumavg([1, 2, 3])
+        2
+        """
+    )
+    assert cumavg.__doc__ == docstr
+
+
+def test_doc_template_from_func():
+    docstr = dedent(
+        """
+        This is the cummax method.
+
+        It computes the cumulative maximum.
+        """
+    )
+    assert cummax.__doc__ == docstr
+
+
+def test_inherit_doc_template():
+    docstr = dedent(
+        """
+        This is the cummin method.
+
+        It computes the cumulative minimum.
+        """
+    )
+    assert cummin.__doc__ == docstr
@@ -0,0 +1,372 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+
+import geopandas
+from geopandas import GeoDataFrame, read_file
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_15, PANDAS_GE_20, PANDAS_GE_30
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, geom_almost_equals
+from pandas.testing import assert_frame_equal
+
+
+@pytest.fixture
+def nybb_polydf(nybb_filename):
+    nybb_polydf = read_file(nybb_filename)
+    nybb_polydf = nybb_polydf[["geometry", "BoroName", "BoroCode"]]
+    nybb_polydf = nybb_polydf.rename(columns={"geometry": "myshapes"})
+    nybb_polydf = nybb_polydf.set_geometry("myshapes")
+    nybb_polydf["manhattan_bronx"] = 5
+    nybb_polydf.loc[3:4, "manhattan_bronx"] = 6
+    nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64")
+    return nybb_polydf
+
+
+@pytest.fixture
+def merged_shapes(nybb_polydf):
+    # Merged geometry
+    manhattan_bronx = nybb_polydf.loc[3:4]
+    others = nybb_polydf.loc[0:2]
+
+    collapsed = [others.geometry.union_all(), manhattan_bronx.geometry.union_all()]
+    merged_shapes = GeoDataFrame(
+        {"myshapes": collapsed},
+        geometry="myshapes",
+        index=pd.Index([5, 6], name="manhattan_bronx"),
+        crs=nybb_polydf.crs,
+    )
+
+    return merged_shapes
+
+
+@pytest.fixture
+def first(merged_shapes):
+    first = merged_shapes.copy()
+    first["BoroName"] = ["Staten Island", "Manhattan"]
+    first["BoroCode"] = [5, 1]
+    return first
+
+
+@pytest.fixture
+def expected_mean(merged_shapes):
+    test_mean = merged_shapes.copy()
+    test_mean["BoroCode"] = [4, 1.5]
+    return test_mean
+
+
+def test_geom_dissolve(nybb_polydf, first):
+    test = nybb_polydf.dissolve("manhattan_bronx")
+    assert test.geometry.name == "myshapes"
+    assert geom_almost_equals(test, first)
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+def test_dissolve_retains_existing_crs(nybb_polydf):
+    assert nybb_polydf.crs is not None
+    test = nybb_polydf.dissolve("manhattan_bronx")
+    assert test.crs is not None
+
+
+def test_dissolve_retains_nonexisting_crs(nybb_polydf):
+    nybb_polydf.geometry.array.crs = None
+    test = nybb_polydf.dissolve("manhattan_bronx")
+    assert test.crs is None
+
+
+def test_first_dissolve(nybb_polydf, first):
+    test = nybb_polydf.dissolve("manhattan_bronx")
+    assert_frame_equal(first, test, check_column_type=False)
+
+
+def test_mean_dissolve(nybb_polydf, first, expected_mean):
+    if not PANDAS_GE_15:
+        test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
+        test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
+    elif PANDAS_GE_15 and not PANDAS_GE_20:
+        with pytest.warns(FutureWarning, match=".*used in dissolve is deprecated.*"):
+            test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
+            test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
+    else:  # pandas 2.0
+        test = nybb_polydf.dissolve(
+            "manhattan_bronx", aggfunc="mean", numeric_only=True
+        )
+        # for non pandas "mean", numeric only cannot be applied. Drop columns manually
+        test2 = nybb_polydf.drop(columns=["BoroName"]).dissolve(
+            "manhattan_bronx", aggfunc="mean"
+        )
+
+    assert_frame_equal(expected_mean, test, check_column_type=False)
+    assert_frame_equal(expected_mean, test2, check_column_type=False)
+
+
+@pytest.mark.skipif(not PANDAS_GE_15 or PANDAS_GE_20, reason="warning for pandas 1.5.x")
+def test_mean_dissolve_warning_capture(nybb_polydf, first, expected_mean):
+    with pytest.warns(
+        FutureWarning,
+        match=".*used in dissolve is deprecated.*",
+    ):
+        nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
+
+    # test no warning for aggfunc first which doesn't have numeric only semantics
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        nybb_polydf.dissolve("manhattan_bronx", aggfunc="first")
+
+
+def test_dissolve_emits_other_warnings(nybb_polydf):
+    # we only do something special for pandas 1.5.x, but expect this
+    # test to be true on any version
+    def sum_and_warn(group):
+        warnings.warn("foo")  # noqa: B028
+        if PANDAS_GE_20:
+            return group.sum(numeric_only=False)
+        else:
+            return group.sum()
+
+    with pytest.warns(UserWarning, match="foo"):
+        nybb_polydf.dissolve("manhattan_bronx", aggfunc=sum_and_warn)
+
+
+def test_multicolumn_dissolve(nybb_polydf, first):
+    multi = nybb_polydf.copy()
+    multi["dup_col"] = multi.manhattan_bronx
+    multi_test = multi.dissolve(["manhattan_bronx", "dup_col"], aggfunc="first")
+
+    first_copy = first.copy()
+    first_copy["dup_col"] = first_copy.index
+    first_copy = first_copy.set_index([first_copy.index, "dup_col"])
+
+    assert_frame_equal(multi_test, first_copy, check_column_type=False)
+
+
+def test_reset_index(nybb_polydf, first):
+    test = nybb_polydf.dissolve("manhattan_bronx", as_index=False)
+    comparison = first.reset_index()
+    assert_frame_equal(comparison, test, check_column_type=False)
+
+
+def test_dissolve_none(nybb_polydf):
+    test = nybb_polydf.dissolve(by=None)
+    expected = GeoDataFrame(
+        {
+            nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
+            "BoroName": ["Staten Island"],
+            "BoroCode": [5],
+            "manhattan_bronx": [5],
+        },
+        geometry=nybb_polydf.geometry.name,
+        crs=nybb_polydf.crs,
+    )
+    assert_frame_equal(expected, test, check_column_type=False)
+
+
+def test_dissolve_none_mean(nybb_polydf):
+    test = nybb_polydf.dissolve(aggfunc="mean", numeric_only=True)
+    expected = GeoDataFrame(
+        {
+            nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
+            "BoroCode": [3.0],
+            "manhattan_bronx": [5.4],
+        },
+        geometry=nybb_polydf.geometry.name,
+        crs=nybb_polydf.crs,
+    )
+    assert_frame_equal(expected, test, check_column_type=False)
+
+
+def test_dissolve_level():
+    gdf = geopandas.GeoDataFrame(
+        {
+            "a": [1, 1, 2, 2],
+            "b": [3, 4, 4, 4],
+            "c": [3, 4, 5, 6],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
+            ),
+        }
+    ).set_index(["a", "b", "c"])
+
+    expected_a = geopandas.GeoDataFrame(
+        {
+            "a": [1, 2],
+            "geometry": geopandas.array.from_wkt(
+                ["MULTIPOINT (0 0, 1 1)", "MULTIPOINT (2 2, 3 3)"]
+            ),
+        }
+    ).set_index("a")
+    expected_b = geopandas.GeoDataFrame(
+        {
+            "b": [3, 4],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "MULTIPOINT (1 1, 2 2, 3 3)"]
+            ),
+        }
+    ).set_index("b")
+    expected_ab = geopandas.GeoDataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": [3, 4, 4],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "POINT (1 1)", "MULTIPOINT (2 2, 3 3)"]
+            ),
+        }
+    ).set_index(["a", "b"])
+
+    assert_frame_equal(expected_a, gdf.dissolve(level=0))
+    assert_frame_equal(expected_a, gdf.dissolve(level="a"))
+    assert_frame_equal(expected_b, gdf.dissolve(level=1))
+    assert_frame_equal(expected_b, gdf.dissolve(level="b"))
+    assert_frame_equal(expected_ab, gdf.dissolve(level=[0, 1]))
+    assert_frame_equal(expected_ab, gdf.dissolve(level=["a", "b"]))
+
+
+def test_dissolve_sort():
+    gdf = geopandas.GeoDataFrame(
+        {
+            "a": [2, 1, 1],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
+            ),
+        }
+    )
+
+    expected_unsorted = geopandas.GeoDataFrame(
+        {
+            "a": [2, 1],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "MULTIPOINT (1 1, 2 2)"]
+            ),
+        }
+    ).set_index("a")
+    expected_sorted = expected_unsorted.sort_index()
+
+    assert_frame_equal(expected_sorted, gdf.dissolve("a"))
+    assert_frame_equal(expected_unsorted, gdf.dissolve("a", sort=False))
+
+
+def test_dissolve_categorical():
+    gdf = geopandas.GeoDataFrame(
+        {
+            "cat": pd.Categorical(["a", "a", "b", "b"]),
+            "noncat": [1, 1, 1, 2],
+            "to_agg": [1, 2, 3, 4],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
+            ),
+        }
+    )
+
+    # when observed=False we get an additional observation
+    # that wasn't in the original data
+    none_val = "GEOMETRYCOLLECTION EMPTY" if PANDAS_GE_30 else None
+    expected_gdf_observed_false = geopandas.GeoDataFrame(
+        {
+            "cat": pd.Categorical(["a", "a", "b", "b"]),
+            "noncat": [1, 2, 1, 2],
+            "geometry": geopandas.array.from_wkt(
+                [
+                    "MULTIPOINT (0 0, 1 1)",
+                    none_val,
+                    "POINT (2 2)",
+                    "POINT (3 3)",
+                ]
+            ),
+            "to_agg": [1, None, 3, 4],
+        }
+    ).set_index(["cat", "noncat"])
+
+    # when observed=True we do not get any additional observations
+    expected_gdf_observed_true = geopandas.GeoDataFrame(
+        {
+            "cat": pd.Categorical(["a", "b", "b"]),
+            "noncat": [1, 1, 2],
+            "geometry": geopandas.array.from_wkt(
+                ["MULTIPOINT (0 0, 1 1)", "POINT (2 2)", "POINT (3 3)"]
+            ),
+            "to_agg": [1, 3, 4],
+        }
+    ).set_index(["cat", "noncat"])
+
+    assert_frame_equal(expected_gdf_observed_false, gdf.dissolve(["cat", "noncat"]))
+    assert_frame_equal(
+        expected_gdf_observed_true, gdf.dissolve(["cat", "noncat"], observed=True)
+    )
+
+
+def test_dissolve_dropna():
+    gdf = geopandas.GeoDataFrame(
+        {
+            "a": [1, 1, None],
+            "geometry": geopandas.array.from_wkt(
+                ["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
+            ),
+        }
+    )
+
+    expected_with_na = geopandas.GeoDataFrame(
+        {
+            "a": [1.0, np.nan],
+            "geometry": geopandas.array.from_wkt(
+                ["MULTIPOINT (0 0, 1 1)", "POINT (2 2)"]
+            ),
+        }
+    ).set_index("a")
+    expected_no_na = geopandas.GeoDataFrame(
+        {
+            "a": [1.0],
+            "geometry": geopandas.array.from_wkt(["MULTIPOINT (0 0, 1 1)"]),
+        }
+    ).set_index("a")
+
+    assert_frame_equal(expected_with_na, gdf.dissolve("a", dropna=False))
+    assert_frame_equal(expected_no_na, gdf.dissolve("a"))
+
+
+def test_dissolve_dropna_warn(nybb_polydf):
+    # No warning with default params
+    with warnings.catch_warnings(record=True) as record:
+        nybb_polydf.dissolve()
+
+    for r in record:
+        assert "dropna kwarg is not supported" not in str(r.message)
+
+
+def test_dissolve_multi_agg(nybb_polydf, merged_shapes):
+    merged_shapes[("BoroCode", "min")] = [3, 1]
+    merged_shapes[("BoroCode", "max")] = [5, 2]
+    merged_shapes[("BoroName", "count")] = [3, 2]
+
+    with warnings.catch_warnings(record=True) as record:
+        test = nybb_polydf.dissolve(
+            by="manhattan_bronx",
+            aggfunc={
+                "BoroCode": ["min", "max"],
+                "BoroName": "count",
+            },
+        )
+    assert_geodataframe_equal(test, merged_shapes)
+    assert len(record) == 0
+
+
+def test_coverage_dissolve(nybb_polydf):
+    manhattan_bronx = nybb_polydf.loc[3:4]
+    others = nybb_polydf.loc[0:2]
+
+    collapsed = [
+        others.geometry.union_all(method="coverage"),
+        manhattan_bronx.geometry.union_all(method="coverage"),
+    ]
+    merged_shapes = GeoDataFrame(
+        {"myshapes": collapsed},
+        geometry="myshapes",
+        index=pd.Index([5, 6], name="manhattan_bronx"),
+        crs=nybb_polydf.crs,
+    )
+
+    merged_shapes["BoroName"] = ["Staten Island", "Manhattan"]
+    merged_shapes["BoroCode"] = [5, 1]
+
+    test = nybb_polydf.dissolve("manhattan_bronx", method="coverage")
+    assert_frame_equal(merged_shapes, test, check_column_type=False)
@@ -0,0 +1,648 @@
+"""
+This file contains a minimal set of tests for compliance with the extension
+array interface test suite (by inheriting the pandas test suite), and should
+contain no other tests.
+Other tests (eg related to the spatial functionality or integration
+with GeoSeries/GeoDataFrame) should be added to test_array.py and others.
+
+The tests in this file are inherited from the BaseExtensionTests, and only
+minimal tweaks should be applied to get the tests passing (by overwriting a
+parent method).
+
+A set of fixtures are defined to provide data for the tests (the fixtures
+expected to be available to pytest by the inherited pandas tests).
+
+"""
+
+import itertools
+import operator
+
+import numpy as np
+import pandas as pd
+from pandas.tests.extension import base as extension_tests
+
+import shapely.geometry
+from shapely.geometry import Point
+
+from geopandas._compat import PANDAS_GE_15, PANDAS_GE_21, PANDAS_GE_22
+from geopandas.array import GeometryArray, GeometryDtype, from_shapely
+
+import pytest
+from pandas.testing import assert_frame_equal, assert_series_equal
+
+# -----------------------------------------------------------------------------
+# Compat with extension tests in older pandas versions
+# -----------------------------------------------------------------------------
+
+
+not_yet_implemented = pytest.mark.skip(reason="Not yet implemented")
+no_minmax = pytest.mark.skip(reason="Min/max not supported")
+
+
+# -----------------------------------------------------------------------------
+# Required fixtures
+# -----------------------------------------------------------------------------
+
+
+@pytest.fixture
+def dtype():
+    """A fixture providing the ExtensionDtype to validate."""
+    return GeometryDtype()
+
+
+def make_data():
+    a = np.empty(100, dtype=object)
+    a[:] = [shapely.geometry.Point(i, i) for i in range(100)]
+    ga = from_shapely(a)
+    return ga
+
+
+@pytest.fixture
+def data():
+    """Length-100 array for this type.
+
+    * data[0] and data[1] should both be non missing
+    * data[0] and data[1] should not be equal
+    """
+    return make_data()
+
+
+@pytest.fixture
+def data_for_twos():
+    """Length-100 array in which all the elements are two."""
+    raise NotImplementedError
+
+
+@pytest.fixture
+def data_missing():
+    """Length-2 array with [NA, Valid]"""
+    return from_shapely([None, shapely.geometry.Point(1, 1)])
+
+
+@pytest.fixture(params=["data", "data_missing"])
+def all_data(request, data, data_missing):
+    """Parametrized fixture giving 'data' and 'data_missing'"""
+    if request.param == "data":
+        return data
+    elif request.param == "data_missing":
+        return data_missing
+
+
+@pytest.fixture
+def data_repeated(data):
+    """
+    Generate many datasets.
+
+    Parameters
+    ----------
+    data : fixture implementing `data`
+
+    Returns
+    -------
+    Callable[[int], Generator]:
+        A callable that takes a `count` argument and
+        returns a generator yielding `count` datasets.
+    """
+
+    def gen(count):
+        for _ in range(count):
+            yield data
+
+    return gen
+
+
+@pytest.fixture
+def data_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, C, A] with
+    A < B < C
+    """
+    return from_shapely([Point(0, 1), Point(1, 1), Point(0, 0)])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, NA, A] with
+    A < B and NA missing.
+    """
+    return from_shapely([Point(1, 2), None, Point(0, 0)])
+
+
+@pytest.fixture
+def na_cmp():
+    """Binary operator for comparing NA values.
+    Should return a function of two arguments that returns
+    True if both arguments are (scalar) NA for your type.
+    By default, uses ``operator.or``
+    """
+    return lambda x, y: x is None and y is None
+
+
+@pytest.fixture
+def na_value():
+    """The scalar missing value for this type. Default 'None'"""
+    return None
+
+
+@pytest.fixture
+def data_for_grouping():
+    """Data for factorization, grouping, and unique tests.
+
+    Expected to be like [B, B, NA, NA, A, A, B, C]
+
+    Where A < B < C and NA is missing
+    """
+    return from_shapely(
+        [
+            shapely.geometry.Point(1, 1),
+            shapely.geometry.Point(1, 1),
+            None,
+            None,
+            shapely.geometry.Point(0, 0),
+            shapely.geometry.Point(0, 0),
+            shapely.geometry.Point(1, 1),
+            shapely.geometry.Point(2, 2),
+        ]
+    )
+
+
+@pytest.fixture(params=[True, False])
+def box_in_series(request):
+    """Whether to box the data in a Series"""
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        lambda x: 1,
+        lambda x: [1] * len(x),
+        lambda x: pd.Series([1] * len(x)),
+        lambda x: x,
+    ],
+    ids=["scalar", "list", "series", "object"],
+)
+def groupby_apply_op(request):
+    """
+    Functions to test groupby.apply().
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def as_frame(request):
+    """
+    Boolean fixture to support Series and Series.to_frame() comparison testing.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def as_series(request):
+    """
+    Boolean fixture to support arr and Series(arr) comparison testing.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def use_numpy(request):
+    """
+    Boolean fixture to support comparison testing of ExtensionDtype array
+    and numpy array.
+    """
+    return request.param
+
+
+@pytest.fixture(params=["ffill", "bfill"])
+def fillna_method(request):
+    """
+    Parametrized fixture giving method parameters 'ffill' and 'bfill' for
+    Series.fillna(method=<method>) testing.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def as_array(request):
+    """
+    Boolean fixture to support ExtensionDtype _from_sequence method testing.
+    """
+    return request.param
+
+
+@pytest.fixture
+def invalid_scalar(data):
+    """
+    A scalar that *cannot* be held by this ExtensionArray.
+
+    The default should work for most subclasses, but is not guaranteed.
+
+    If the array can hold any item (i.e. object dtype), then use pytest.skip.
+    """
+    return object.__new__(object)
+
+
+# Fixtures defined in pandas/conftest.py that are also needed: defining them
+# here instead of importing for compatibility
+
+
+@pytest.fixture(
+    params=["sum", "max", "min", "mean", "prod", "std", "var", "median", "kurt", "skew"]
+)
+def all_numeric_reductions(request):
+    """
+    Fixture for numeric reduction names
+    """
+    return request.param
+
+
+@pytest.fixture(params=["all", "any"])
+def all_boolean_reductions(request):
+    """
+    Fixture for boolean reduction names
+    """
+    return request.param
+
+
+# only == and != are support for GeometryArray
+# @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
+@pytest.fixture(params=["__eq__", "__ne__"])
+def all_compare_operators(request):
+    """
+    Fixture for dunder names for common compare operations
+
+    * >=
+    * >
+    * ==
+    * !=
+    * <
+    * <=
+    """
+    return request.param
+
+
+@pytest.fixture(params=[None, lambda x: x])
+def sort_by_key(request):
+    """
+    Simple fixture for testing keys in sorting methods.
+    Tests None (no key) and the identity key.
+    """
+    return request.param
+
+
+# -----------------------------------------------------------------------------
+# Inherited tests
+# -----------------------------------------------------------------------------
+
+
+class TestDtype(extension_tests.BaseDtypeTests):
+    # additional tests
+
+    def test_array_type_with_arg(self, data, dtype):
+        assert dtype.construct_array_type() is GeometryArray
+
+    def test_registry(self, data, dtype):
+        s = pd.Series(np.asarray(data), dtype=object)
+        result = s.astype("geometry")
+        assert isinstance(result.array, GeometryArray)
+        expected = pd.Series(data)
+        assert_series_equal(result, expected)
+
+
+class TestInterface(extension_tests.BaseInterfaceTests):
+    def test_contains(self, data, data_missing):
+        # overridden due to the inconsistency between
+        # GeometryDtype.na_value = np.nan
+        # and None being used as NA in array
+
+        # ensure data without missing values
+        data = data[~data.isna()]
+
+        # first elements are non-missing
+        assert data[0] in data
+        assert data_missing[0] in data_missing
+
+        assert None in data_missing
+        assert None not in data
+        assert pd.NaT not in data_missing
+
+
+class TestConstructors(extension_tests.BaseConstructorsTests):
+    pass
+
+
+class TestReshaping(extension_tests.BaseReshapingTests):
+
+    # NOTE: this test is copied from pandas/tests/extension/base/reshaping.py
+    # because starting with pandas 3.0 the assert_frame_equal is strict regarding
+    # the exact missing value (None vs NaN)
+    # Our `result` uses None, but the way the `expected` is created results in
+    # NaNs (and specifying to use None as fill value in unstack also does not
+    # help)
+    # -> the only change compared to the upstream test is marked
+    @pytest.mark.parametrize(
+        "index",
+        [
+            # Two levels, uniform.
+            pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
+            # non-uniform
+            pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
+            # three levels, non-uniform
+            pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
+            pd.MultiIndex.from_tuples(
+                [
+                    ("A", "a", 1),
+                    ("A", "b", 0),
+                    ("A", "a", 0),
+                    ("B", "a", 0),
+                    ("B", "c", 1),
+                ]
+            ),
+        ],
+    )
+    @pytest.mark.parametrize("obj", ["series", "frame"])
+    def test_unstack(self, data, index, obj):
+        data = data[: len(index)]
+        if obj == "series":
+            ser = pd.Series(data, index=index)
+        else:
+            ser = pd.DataFrame({"A": data, "B": data}, index=index)
+
+        n = index.nlevels
+        levels = list(range(n))
+        # [0, 1, 2]
+        # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
+        combinations = itertools.chain.from_iterable(
+            itertools.permutations(levels, i) for i in range(1, n)
+        )
+
+        for level in combinations:
+            result = ser.unstack(level=level)
+            assert all(
+                isinstance(result[col].array, type(data)) for col in result.columns
+            )
+
+            if obj == "series":
+                # We should get the same result with to_frame+unstack+droplevel
+                df = ser.to_frame()
+
+                alt = df.unstack(level=level).droplevel(0, axis=1)
+                assert_frame_equal(result, alt)
+
+            obj_ser = ser.astype(object)
+
+            expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
+            if obj == "series":
+                assert (expected.dtypes == object).all()
+            # <------------ next line is added
+            expected[expected.isna()] = None
+            # ------------->
+
+            result = result.astype(object)
+            assert_frame_equal(result, expected)
+
+
+class TestGetitem(extension_tests.BaseGetitemTests):
+    pass
+
+
+class TestSetitem(extension_tests.BaseSetitemTests):
+    pass
+
+
+class TestMissing(extension_tests.BaseMissingTests):
+    def test_fillna_series(self, data_missing):
+        fill_value = data_missing[1]
+        ser = pd.Series(data_missing)
+
+        # Fill with a scalar
+        result = ser.fillna(fill_value)
+        expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
+        assert_series_equal(result, expected)
+
+        # Fill with a series
+        filler = pd.Series(
+            from_shapely(
+                [
+                    shapely.geometry.Point(1, 1),
+                    shapely.geometry.Point(2, 2),
+                ],
+            )
+        )
+        result = ser.fillna(filler)
+        expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
+        assert_series_equal(result, expected)
+
+        # Fill with a series not affecting the missing values
+        filler = pd.Series(
+            from_shapely(
+                [
+                    shapely.geometry.Point(2, 2),
+                    shapely.geometry.Point(1, 1),
+                ]
+            ),
+            index=[10, 11],
+        )
+        result = ser.fillna(filler)
+        assert_series_equal(result, ser)
+
+        # More `GeoSeries.fillna` testcases are in
+        # `geopandas\tests\test_pandas_methods.py::test_fillna_scalar`
+        # and `geopandas\tests\test_pandas_methods.py::test_fillna_series`.
+
+    @pytest.mark.skipif(
+        not PANDAS_GE_21, reason="fillna method not supported with older pandas"
+    )
+    def test_fillna_limit_pad(self, data_missing):
+        super().test_fillna_limit_pad(data_missing)
+
+    @pytest.mark.skipif(
+        not PANDAS_GE_21, reason="fillna method not supported with older pandas"
+    )
+    def test_fillna_limit_backfill(self, data_missing):
+        super().test_fillna_limit_backfill(data_missing)
+
+    @pytest.mark.skipif(
+        not PANDAS_GE_21, reason="fillna method not supported with older pandas"
+    )
+    def test_fillna_series_method(self, data_missing, fillna_method):
+        super().test_fillna_series_method(data_missing, fillna_method)
+
+    @pytest.mark.skipif(
+        not PANDAS_GE_21, reason="fillna method not supported with older pandas"
+    )
+    def test_fillna_no_op_returns_copy(self, data):
+        super().test_fillna_no_op_returns_copy(data)
+
+
+if PANDAS_GE_22:
+    from pandas.tests.extension.base import BaseReduceTests
+else:
+    from pandas.tests.extension.base import BaseNoReduceTests as BaseReduceTests
+
+
+class TestReduce(BaseReduceTests):
+    @pytest.mark.skip("boolean reduce (any/all) tested in test_pandas_methods")
+    def test_reduce_series_boolean(self):
+        pass
+
+
+_all_arithmetic_operators = [
+    "__add__",
+    "__radd__",
+    # '__sub__', '__rsub__',
+    "__mul__",
+    "__rmul__",
+    "__floordiv__",
+    "__rfloordiv__",
+    "__truediv__",
+    "__rtruediv__",
+    "__pow__",
+    "__rpow__",
+    "__mod__",
+    "__rmod__",
+]
+
+
+@pytest.fixture(params=_all_arithmetic_operators)
+def all_arithmetic_operators(request):
+    """
+    Fixture for dunder names for common arithmetic operations
+
+    Adapted to exclude __sub__, as this is implemented as "difference".
+    """
+    return request.param
+
+
+# an inherited test from pandas creates a Series from a list of geometries, which
+# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
+@pytest.mark.filterwarnings(
+    "ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
+)
+class TestArithmeticOps(extension_tests.BaseArithmeticOpsTests):
+    @pytest.mark.skip(reason="not applicable")
+    def test_divmod_series_array(self, data, data_for_twos):
+        pass
+
+    @pytest.mark.skip(reason="not applicable")
+    def test_add_series_with_extension_array(self, data):
+        pass
+
+
+# an inherited test from pandas creates a Series from a list of geometries, which
+# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
+@pytest.mark.filterwarnings(
+    "ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
+)
+class TestComparisonOps(extension_tests.BaseComparisonOpsTests):
+    def _compare_other(self, s, data, op_name, other):
+        op = getattr(operator, op_name.strip("_"))
+        result = op(s, other)
+        expected = s.combine(other, op)
+        assert_series_equal(result, expected)
+
+    def test_compare_scalar(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        self._compare_other(s, data, op_name, data[0])
+
+    def test_compare_array(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        other = pd.Series([data[0]] * len(data))
+        self._compare_other(s, data, op_name, other)
+
+
+class TestMethods(extension_tests.BaseMethodsTests):
+    @pytest.mark.skipif(
+        not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
+    )
+    @pytest.mark.parametrize("dropna", [True, False])
+    def test_value_counts(self, all_data, dropna):
+        pass
+
+    @pytest.mark.skipif(
+        not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
+    )
+    def test_value_counts_with_normalize(self, data):
+        pass
+
+    @pytest.mark.parametrize("ascending", [True, False])
+    def test_sort_values_frame(self, data_for_sorting, ascending):
+        super().test_sort_values_frame(data_for_sorting, ascending)
+
+    @pytest.mark.skip(reason="searchsorted not supported")
+    def test_searchsorted(self, data_for_sorting, as_series):
+        pass
+
+    @not_yet_implemented
+    def test_combine_le(self):
+        pass
+
+    @pytest.mark.skip(reason="addition not supported")
+    def test_combine_add(self):
+        pass
+
+    @not_yet_implemented
+    def test_fillna_length_mismatch(self, data_missing):
+        msg = "Length of 'value' does not match."
+        with pytest.raises(ValueError, match=msg):
+            data_missing.fillna(data_missing.take([1]))
+
+    @no_minmax
+    def test_argmin_argmax(self):
+        pass
+
+    @no_minmax
+    def test_argmin_argmax_empty_array(self):
+        pass
+
+    @no_minmax
+    def test_argmin_argmax_all_na(self):
+        pass
+
+    @no_minmax
+    def test_argreduce_series(self):
+        pass
+
+    @no_minmax
+    def test_argmax_argmin_no_skipna_notimplemented(self):
+        pass
+
+
+class TestCasting(extension_tests.BaseCastingTests):
+    pass
+
+
+class TestGroupby(extension_tests.BaseGroupbyTests):
+    @pytest.mark.parametrize("as_index", [True, False])
+    def test_groupby_extension_agg(self, as_index, data_for_grouping):
+        super().test_groupby_extension_agg(as_index, data_for_grouping)
+
+    def test_groupby_extension_transform(self, data_for_grouping):
+        super().test_groupby_extension_transform(data_for_grouping)
+
+    @pytest.mark.parametrize(
+        "op",
+        [
+            lambda x: 1,
+            lambda x: [1] * len(x),
+            lambda x: pd.Series([1] * len(x)),
+            lambda x: x,
+        ],
+        ids=["scalar", "list", "series", "object"],
+    )
+    def test_groupby_extension_apply(self, data_for_grouping, op):
+        super().test_groupby_extension_apply(data_for_grouping, op)
+
+
+class TestPrinting(extension_tests.BasePrintingTests):
+    pass
+
+
+@not_yet_implemented
+class TestParsing(extension_tests.BaseParsingTests):
+    pass
@@ -0,0 +1,170 @@
+import pandas as pd
+
+from shapely.geometry import Point
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import HAS_PYPROJ
+from geopandas.tools import geocode, reverse_geocode
+from geopandas.tools.geocoding import _prepare_geocode_result
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+from geopandas.tests.util import assert_geoseries_equal, mock
+from pandas.testing import assert_series_equal
+
+geopy = pytest.importorskip("geopy")
+
+
+class ForwardMock(mock.MagicMock):
+    """
+    Mock the forward geocoding function.
+    Returns the passed in address and (p, p+.5) where p increases
+    at each call
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._n = 0.0
+
+    def __call__(self, *args, **kwargs):
+        self.return_value = args[0], (self._n, self._n + 0.5)
+        self._n += 1
+        return super().__call__(*args, **kwargs)
+
+
+class ReverseMock(mock.MagicMock):
+    """
+    Mock the reverse geocoding function.
+    Returns the passed in point and 'address{p}' where p increases
+    at each call
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._n = 0
+
+    def __call__(self, *args, **kwargs):
+        self.return_value = "address{0}".format(self._n), args[0]
+        self._n += 1
+        return super().__call__(*args, **kwargs)
+
+
+@pytest.fixture
+def locations():
+    locations = ["260 Broadway, New York, NY", "77 Massachusetts Ave, Cambridge, MA"]
+    return locations
+
+
+@pytest.fixture
+def points():
+    points = [Point(-71.0597732, 42.3584308), Point(-77.0365305, 38.8977332)]
+    return points
+
+
+def test_prepare_result():
+    # Calls _prepare_result with sample results from the geocoder call
+    # loop
+    p0 = Point(12.3, -45.6)  # Treat these as lat/lon
+    p1 = Point(-23.4, 56.7)
+    d = {"a": ("address0", p0.coords[0]), "b": ("address1", p1.coords[0])}
+
+    df = _prepare_geocode_result(d)
+    assert type(df) is GeoDataFrame
+    if HAS_PYPROJ:
+        assert df.crs == "EPSG:4326"
+    assert len(df) == 2
+    assert "address" in df
+
+    coords = df.loc["a"]["geometry"].coords[0]
+    test = p0.coords[0]
+    # Output from the df should be lon/lat
+    assert coords[0] == pytest.approx(test[1])
+    assert coords[1] == pytest.approx(test[0])
+
+    coords = df.loc["b"]["geometry"].coords[0]
+    test = p1.coords[0]
+    assert coords[0] == pytest.approx(test[1])
+    assert coords[1] == pytest.approx(test[0])
+
+
+def test_prepare_result_none():
+    p0 = Point(12.3, -45.6)  # Treat these as lat/lon
+    d = {"a": ("address0", p0.coords[0]), "b": (None, None)}
+
+    df = _prepare_geocode_result(d)
+    assert type(df) is GeoDataFrame
+    if HAS_PYPROJ:
+        assert df.crs == "EPSG:4326"
+    assert len(df) == 2
+    assert "address" in df
+
+    row = df.loc["b"]
+
+    # TODO we should probably replace this with a missing value instead of point?
+    assert len(row["geometry"].coords) == 0
+    assert row["geometry"].is_empty
+    assert row["address"] is None
+
+
+@pytest.mark.parametrize("geocode_result", (None, (None, None)))
+def test_prepare_geocode_result_when_result_is(geocode_result):
+    result = {0: geocode_result}
+    expected_output = GeoDataFrame(
+        {"geometry": [Point()], "address": [None]},
+        crs="EPSG:4326",
+    )
+
+    output = _prepare_geocode_result(result)
+
+    assert_geodataframe_equal(output, expected_output)
+
+
+def test_bad_provider_forward():
+    from geopy.exc import GeocoderNotFound
+
+    with pytest.raises(GeocoderNotFound):
+        geocode(["cambridge, ma"], "badprovider")
+
+
+def test_bad_provider_reverse():
+    from geopy.exc import GeocoderNotFound
+
+    with pytest.raises(GeocoderNotFound):
+        reverse_geocode([Point(0, 0)], "badprovider")
+
+
+def test_forward(locations, points):
+    from geopy.geocoders import Photon
+
+    for provider in ["photon", Photon]:
+        with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m:
+            g = geocode(locations, provider=provider, timeout=2)
+            assert len(locations) == m.call_count
+
+        n = len(locations)
+        assert isinstance(g, GeoDataFrame)
+        expected = GeoSeries(
+            [Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326"
+        )
+        assert_geoseries_equal(expected, g["geometry"])
+        assert_series_equal(g["address"], pd.Series(locations, name="address"))
+
+
+def test_reverse(locations, points):
+    from geopy.geocoders import Photon
+
+    for provider in ["photon", Photon]:
+        with mock.patch("geopy.geocoders.Photon.reverse", ReverseMock()) as m:
+            g = reverse_geocode(points, provider=provider, timeout=2)
+            assert len(points) == m.call_count
+
+        assert isinstance(g, GeoDataFrame)
+
+        expected = GeoSeries(points, crs="EPSG:4326")
+        assert_geoseries_equal(expected, g["geometry"])
+        address = pd.Series(
+            ["address" + str(x) for x in range(len(points))], name="address"
+        )
+        assert_series_equal(g["address"], address)
@@ -0,0 +1,747 @@
+import json
+import os
+import random
+import shutil
+import tempfile
+import warnings
+
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import (
+    GeometryCollection,
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+from shapely.geometry.base import BaseGeometry
+
+import geopandas._compat as compat
+from geopandas import GeoDataFrame, GeoSeries, clip, read_file
+from geopandas.array import GeometryArray, GeometryDtype
+
+import pytest
+from geopandas.testing import assert_geoseries_equal, geom_almost_equals
+from geopandas.tests.util import geom_equals
+from numpy.testing import assert_array_equal
+from pandas.testing import assert_index_equal, assert_series_equal
+
+
+class TestSeries:
+    def setup_method(self):
+        self.tempdir = tempfile.mkdtemp()
+        self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+        self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        self.g1 = GeoSeries([self.t1, self.sq])
+        self.g2 = GeoSeries([self.sq, self.t1])
+        self.g3 = GeoSeries([self.t1, self.t2], crs="epsg:4326")
+        self.g4 = GeoSeries([self.t2, self.t1])
+        self.na = GeoSeries([self.t1, self.t2, Polygon()])
+        self.na_none = GeoSeries([self.t1, self.t2, None])
+        self.a1 = self.g1.copy()
+        self.a1.index = ["A", "B"]
+        self.a2 = self.g2.copy()
+        self.a2.index = ["B", "C"]
+        self.esb = Point(-73.9847, 40.7484)
+        self.sol = Point(-74.0446, 40.6893)
+        self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326")
+        self.l1 = LineString([(0, 0), (0, 1), (1, 1)])
+        self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
+        self.g5 = GeoSeries([self.l1, self.l2])
+        self.esb3857 = Point(-8235939.130493107, 4975301.253789809)
+        self.sol3857 = Point(-8242607.167991625, 4966620.938285081)
+        self.landmarks3857 = GeoSeries([self.esb3857, self.sol3857], crs="epsg:3857")
+
+    def teardown_method(self):
+        shutil.rmtree(self.tempdir)
+
+    def test_copy(self):
+        gc = self.g3.copy()
+        assert type(gc) is GeoSeries
+        assert self.g3.name == gc.name
+        assert self.g3.crs == gc.crs
+
+    def test_in(self):
+        assert self.t1 in self.g1
+        assert self.sq in self.g1
+        assert self.t1 in self.a1
+        assert self.t2 in self.g3
+        assert self.sq not in self.g3
+        assert 5 not in self.g3
+
+    def test_align(self):
+        a1, a2 = self.a1.align(self.a2)
+        assert isinstance(a1, GeoSeries)
+        assert isinstance(a2, GeoSeries)
+        assert a2["A"] is None
+        assert a1["B"].equals(a2["B"])
+        assert a1["C"] is None
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_align_crs(self):
+        a1 = self.a1.set_crs("epsg:4326")
+        a2 = self.a2.set_crs("epsg:31370")
+
+        res1, res2 = a1.align(a2)
+        assert res1.crs == "epsg:4326"
+        assert res2.crs == "epsg:31370"
+
+        res1, res2 = a1.align(a2.set_crs(None, allow_override=True))
+        assert res1.crs == "epsg:4326"
+        assert res2.crs is None
+
+    def test_align_mixed(self):
+        a1 = self.a1
+        s2 = pd.Series([1, 2], index=["B", "C"])
+        res1, res2 = a1.align(s2)
+
+        exp2 = pd.Series([np.nan, 1, 2], index=["A", "B", "C"])
+        assert_series_equal(res2, exp2)
+
+    def test_warning_if_not_aligned(self):
+        # GH-816
+        # Test that warning is issued when operating on non-aligned series
+
+        # _series_op
+        with pytest.warns(UserWarning, match="The indices .+ not equal"):
+            self.a1.contains(self.a2)
+
+        # _geo_op
+        with pytest.warns(UserWarning, match="The indices .+ not equal"):
+            self.a1.union(self.a2)
+
+    def test_no_warning_if_aligned(self):
+        # GH-816
+        # Test that warning is not issued when operating on aligned series
+        a1, a2 = self.a1.align(self.a2)
+
+        with warnings.catch_warnings(record=True) as record:
+            a1.contains(a2)  # _series_op, explicitly aligned
+            self.g1.intersects(self.g2)  # _series_op, implicitly aligned
+            a2.union(a1)  # _geo_op, explicitly aligned
+            self.g2.intersection(self.g1)  # _geo_op, implicitly aligned
+
+        user_warnings = [w for w in record if w.category is UserWarning]
+        assert not user_warnings, user_warnings[0].message
+
+    def test_geom_equals(self):
+        assert np.all(self.g1.geom_equals(self.g1))
+        assert_array_equal(self.g1.geom_equals(self.sq), [False, True])
+
+    def test_geom_equals_align(self):
+        a = self.a1.geom_equals(self.a2, align=True)
+        exp = pd.Series([False, True, False], index=["A", "B", "C"])
+        assert_series_equal(a, exp)
+
+        a = self.a1.geom_equals(self.a2, align=False)
+        exp = pd.Series([False, False], index=["A", "B"])
+        assert_series_equal(a, exp)
+
+    @pytest.mark.filterwarnings(r"ignore:The 'geom_almost_equals\(\)':FutureWarning")
+    def test_geom_almost_equals(self):
+        # TODO: test decimal parameter
+        assert np.all(self.g1.geom_almost_equals(self.g1))
+        assert_array_equal(self.g1.geom_almost_equals(self.sq), [False, True])
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                "The indices of the left and right GeoSeries' are not equal",
+                UserWarning,
+            )
+            assert_array_equal(
+                self.a1.geom_almost_equals(self.a2, align=True),
+                [False, True, False],
+            )
+        assert_array_equal(
+            self.a1.geom_almost_equals(self.a2, align=False), [False, False]
+        )
+
+    def test_geom_equals_exact(self):
+        # TODO: test tolerance parameter
+        assert np.all(self.g1.geom_equals_exact(self.g1, 0.001))
+        assert_array_equal(self.g1.geom_equals_exact(self.sq, 0.001), [False, True])
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                "The indices of the left and right GeoSeries' are not equal",
+                UserWarning,
+            )
+            assert_array_equal(
+                self.a1.geom_equals_exact(self.a2, 0.001, align=True),
+                [False, True, False],
+            )
+        assert_array_equal(
+            self.a1.geom_equals_exact(self.a2, 0.001, align=False), [False, False]
+        )
+
+    def test_equal_comp_op(self):
+        s = GeoSeries([Point(x, x) for x in range(3)])
+        res = s == Point(1, 1)
+        exp = pd.Series([False, True, False])
+        assert_series_equal(res, exp)
+
+    def test_to_file(self):
+        """Test to_file and from_file"""
+        tempfilename = os.path.join(self.tempdir, "test.shp")
+        self.g3.to_file(tempfilename)
+        # Read layer back in?
+        s = GeoSeries.from_file(tempfilename)
+        assert all(self.g3.geom_equals(s))
+        # TODO: compare crs
+
+    def test_to_json(self):
+        """
+        Test whether GeoSeries.to_json works and returns an actual json file.
+        """
+        json_str = self.g3.to_json()
+        data = json.loads(json_str)
+        assert "id" in data["features"][0].keys()
+        assert "bbox" in data["features"][0].keys()
+        # TODO : verify the output is a valid GeoJSON.
+
+    def test_to_json_drop_id(self):
+        """
+        Test whether GeoSeries.to_json works when drop_id is True.
+        """
+        json_str = self.g3.to_json(drop_id=True)
+        data = json.loads(json_str)
+        assert "id" not in data["features"][0].keys()
+
+    def test_to_json_no_bbox(self):
+        """
+        Test whether GeoSeries.to_json works when show_bbox is False.
+        """
+        json_str = self.g3.to_json(show_bbox=False)
+        data = json.loads(json_str)
+        assert "bbox" not in data["features"][0].keys()
+
+    def test_to_json_no_bbox_drop_id(self):
+        """
+        Test whether GeoSeries.to_json works when show_bbox is False
+        and drop_id is True.
+        """
+        json_str = self.g3.to_json(show_bbox=False, drop_id=True)
+        data = json.loads(json_str)
+        assert "id" not in data["features"][0].keys()
+        assert "bbox" not in data["features"][0].keys()
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
+    def test_to_json_wgs84(self):
+        """
+        Test whether the wgs84 conversion works as intended.
+        """
+        text = self.landmarks3857.to_json(to_wgs84=True)
+        data = json.loads(text)
+        assert data["type"] == "FeatureCollection"
+        assert "id" in data["features"][0].keys()
+        coord1 = data["features"][0]["geometry"]["coordinates"]
+        coord2 = data["features"][1]["geometry"]["coordinates"]
+        np.testing.assert_allclose(coord1, self.esb.coords[0])
+        np.testing.assert_allclose(coord2, self.sol.coords[0])
+
+    def test_to_json_wgs84_false(self):
+        """
+        Ensure no conversion to wgs84
+        """
+        text = self.landmarks3857.to_json()
+        data = json.loads(text)
+        coord1 = data["features"][0]["geometry"]["coordinates"]
+        coord2 = data["features"][1]["geometry"]["coordinates"]
+        assert coord1 == [-8235939.130493107, 4975301.253789809]
+        assert coord2 == [-8242607.167991625, 4966620.938285081]
+
+    def test_representative_point(self):
+        assert np.all(self.g1.contains(self.g1.representative_point()))
+        assert np.all(self.g2.contains(self.g2.representative_point()))
+        assert np.all(self.g3.contains(self.g3.representative_point()))
+        assert np.all(self.g4.contains(self.g4.representative_point()))
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_transform(self):
+        utm18n = self.landmarks.to_crs(epsg=26918)
+        lonlat = utm18n.to_crs(epsg=4326)
+        assert geom_almost_equals(self.landmarks, lonlat)
+        with pytest.raises(ValueError):
+            self.g1.to_crs(epsg=4326)
+        with pytest.raises(ValueError):
+            self.landmarks.to_crs(crs=None, epsg=None)
+
+    def test_estimate_utm_crs__geographic(self):
+        pyproj = pytest.importorskip("pyproj")
+        assert self.landmarks.estimate_utm_crs() == pyproj.CRS("EPSG:32618")
+        assert self.landmarks.estimate_utm_crs("NAD83") == pyproj.CRS("EPSG:26918")
+
+    def test_estimate_utm_crs__projected(self):
+        pyproj = pytest.importorskip("pyproj")
+        assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == pyproj.CRS(
+            "EPSG:32618"
+        )
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_estimate_utm_crs__out_of_bounds(self):
+        with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
+            GeoSeries(
+                [Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
+            ).estimate_utm_crs()
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_estimate_utm_crs__missing_crs(self):
+        with pytest.raises(RuntimeError, match="crs must be set"):
+            GeoSeries([Polygon([(0, 90), (1, 90), (2, 90)])]).estimate_utm_crs()
+
+    def test_fillna(self):
+        # default is to fill with empty geometry
+        na = self.na_none.fillna()
+        assert isinstance(na[2], BaseGeometry)
+        assert na[2].is_empty
+        assert geom_equals(self.na_none[:2], na[:2])
+        # XXX: method works inconsistently for different pandas versions
+        # self.na_none.fillna(method='backfill')
+
+    def test_coord_slice(self):
+        """Test CoordinateSlicer"""
+        # need some better test cases
+        assert geom_equals(self.g3, self.g3.cx[:, :])
+        assert geom_equals(self.g3[[True, False]], self.g3.cx[0.9:, :0.1])
+        assert geom_equals(self.g3[[False, True]], self.g3.cx[0:0.1, 0.9:1.0])
+
+    def test_coord_slice_with_zero(self):
+        # Test that CoordinateSlice correctly handles zero slice (#GH477).
+
+        gs = GeoSeries([Point(x, x) for x in range(-3, 4)])
+        assert geom_equals(gs.cx[:0, :0], gs.loc[:3])
+        assert geom_equals(gs.cx[:, :0], gs.loc[:3])
+        assert geom_equals(gs.cx[:0, :], gs.loc[:3])
+        assert geom_equals(gs.cx[0:, 0:], gs.loc[3:])
+        assert geom_equals(gs.cx[0:, :], gs.loc[3:])
+        assert geom_equals(gs.cx[:, 0:], gs.loc[3:])
+
+    def test_geoseries_geointerface(self):
+        assert self.g1.__geo_interface__["type"] == "FeatureCollection"
+        assert len(self.g1.__geo_interface__["features"]) == self.g1.shape[0]
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_proj4strings(self):
+        # As string
+        reprojected = self.g3.to_crs("+proj=utm +zone=30")
+        reprojected_back = reprojected.to_crs(epsg=4326)
+        assert geom_almost_equals(self.g3, reprojected_back)
+
+        # As dict
+        reprojected = self.g3.to_crs({"proj": "utm", "zone": "30"})
+        reprojected_back = reprojected.to_crs(epsg=4326)
+        assert geom_almost_equals(self.g3, reprojected_back)
+
+        # Set to equivalent string, convert, compare to original
+        copy = self.g3.copy().set_crs("epsg:4326", allow_override=True)
+        reprojected = copy.to_crs({"proj": "utm", "zone": "30"})
+        reprojected_back = reprojected.to_crs(epsg=4326)
+        assert geom_almost_equals(self.g3, reprojected_back)
+
+        # Conversions by different format
+        reprojected_string = self.g3.to_crs("+proj=utm +zone=30")
+        reprojected_dict = self.g3.to_crs({"proj": "utm", "zone": "30"})
+        assert geom_almost_equals(reprojected_string, reprojected_dict)
+
+    def test_from_wkb(self):
+        assert_geoseries_equal(self.g1, GeoSeries.from_wkb([self.t1.wkb, self.sq.wkb]))
+
+    def test_from_wkb_on_invalid(self):
+        # Single point LineString hex WKB: invalid
+        invalid_wkb_hex = "01020000000100000000000000000008400000000000000840"
+        message = "point array must contain 0 or >1 elements"
+
+        with pytest.raises(Exception, match=message):
+            GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="raise")
+
+        with pytest.warns(Warning, match=message):
+            res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="warn")
+        assert res[0] is None
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="ignore")
+        assert res[0] is None
+
+    def test_from_wkb_series(self):
+        s = pd.Series([self.t1.wkb, self.sq.wkb], index=[1, 2])
+        expected = self.g1.copy()
+        expected.index = pd.Index([1, 2])
+        assert_geoseries_equal(expected, GeoSeries.from_wkb(s))
+
+    def test_from_wkb_series_with_index(self):
+        index = [0]
+        s = pd.Series([self.t1.wkb, self.sq.wkb], index=[0, 2])
+        expected = self.g1.reindex(index)
+        assert_geoseries_equal(expected, GeoSeries.from_wkb(s, index=index))
+
+    def test_from_wkt(self):
+        assert_geoseries_equal(self.g1, GeoSeries.from_wkt([self.t1.wkt, self.sq.wkt]))
+
+    def test_from_wkt_on_invalid(self):
+        # Single point LineString WKT: invalid
+        invalid_wkt = "LINESTRING(0 0)"
+        message = "point array must contain 0 or >1 elements"
+
+        with pytest.raises(Exception, match=message):
+            GeoSeries.from_wkt([invalid_wkt], on_invalid="raise")
+
+        with pytest.warns(Warning, match=message):
+            res = GeoSeries.from_wkt([invalid_wkt], on_invalid="warn")
+        assert res[0] is None
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            res = GeoSeries.from_wkt([invalid_wkt], on_invalid="ignore")
+        assert res[0] is None
+
+    def test_from_wkt_series(self):
+        s = pd.Series([self.t1.wkt, self.sq.wkt], index=[1, 2])
+        expected = self.g1.copy()
+        expected.index = pd.Index([1, 2])
+        assert_geoseries_equal(expected, GeoSeries.from_wkt(s))
+
+    def test_from_wkt_series_with_index(self):
+        index = [0]
+        s = pd.Series([self.t1.wkt, self.sq.wkt], index=[0, 2])
+        expected = self.g1.reindex(index)
+        assert_geoseries_equal(expected, GeoSeries.from_wkt(s, index=index))
+
+    def test_to_wkb(self):
+        assert_series_equal(pd.Series([self.t1.wkb, self.sq.wkb]), self.g1.to_wkb())
+        assert_series_equal(
+            pd.Series([self.t1.wkb_hex, self.sq.wkb_hex]), self.g1.to_wkb(hex=True)
+        )
+
+    def test_to_wkt(self):
+        assert_series_equal(pd.Series([self.t1.wkt, self.sq.wkt]), self.g1.to_wkt())
+
+    def test_clip(self, naturalearth_lowres, naturalearth_cities):
+        left = read_file(naturalearth_cities)
+        world = read_file(naturalearth_lowres)
+        south_america = world[world["continent"] == "South America"]
+
+        expected = clip(left.geometry, south_america)
+        result = left.geometry.clip(south_america)
+        assert_geoseries_equal(result, expected)
+
+    def test_clip_sorting(self, naturalearth_cities, naturalearth_lowres):
+        """
+        Test sorting of geodseries when clipping.
+        """
+        cities = read_file(naturalearth_cities)
+        world = read_file(naturalearth_lowres)
+        south_america = world[world["continent"] == "South America"]
+
+        unsorted_clipped_cities = clip(cities, south_america, sort=False)
+        sorted_clipped_cities = clip(cities, south_america, sort=True)
+
+        expected_sorted_index = pd.Index(
+            [55, 59, 62, 88, 101, 114, 122, 169, 181, 189, 210, 230, 236, 238, 239]
+        )
+
+        assert not (
+            sorted(unsorted_clipped_cities.index) == unsorted_clipped_cities.index
+        ).all()
+        assert (
+            sorted(sorted_clipped_cities.index) == sorted_clipped_cities.index
+        ).all()
+        assert_index_equal(expected_sorted_index, sorted_clipped_cities.index)
+
+    def test_from_xy_points(self):
+        x = self.landmarks.x.values
+        y = self.landmarks.y.values
+        index = self.landmarks.index.tolist()
+        crs = self.landmarks.crs
+        assert_geoseries_equal(
+            self.landmarks, GeoSeries.from_xy(x, y, index=index, crs=crs)
+        )
+        assert_geoseries_equal(
+            self.landmarks,
+            GeoSeries.from_xy(self.landmarks.x, self.landmarks.y, crs=crs),
+        )
+
+    def test_from_xy_points_w_z(self):
+        index_values = [5, 6, 7]
+        x = pd.Series([0, -1, 2], index=index_values)
+        y = pd.Series([8, 3, 1], index=index_values)
+        z = pd.Series([5, -6, 7], index=index_values)
+        expected = GeoSeries(
+            [Point(0, 8, 5), Point(-1, 3, -6), Point(2, 1, 7)], index=index_values
+        )
+        assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
+
+    def test_from_xy_points_unequal_index(self):
+        x = self.landmarks.x
+        y = self.landmarks.y
+        y.index = -np.arange(len(y))
+        crs = self.landmarks.crs
+        assert_geoseries_equal(
+            self.landmarks, GeoSeries.from_xy(x, y, index=x.index, crs=crs)
+        )
+        unindexed_landmarks = self.landmarks.copy()
+        unindexed_landmarks.reset_index(inplace=True, drop=True)
+        assert_geoseries_equal(
+            unindexed_landmarks,
+            GeoSeries.from_xy(x, y, crs=crs),
+        )
+
+    def test_from_xy_points_indexless(self):
+        x = np.array([0.0, 3.0])
+        y = np.array([2.0, 5.0])
+        z = np.array([-1.0, 4.0])
+        expected = GeoSeries([Point(0, 2, -1), Point(3, 5, 4)])
+        assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
+
+    @pytest.mark.skipif(compat.HAS_PYPROJ, reason="pyproj installed")
+    def test_set_crs_pyproj_error(self):
+        with pytest.raises(
+            ImportError, match="The 'pyproj' package is required for set_crs"
+        ):
+            self.g1.set_crs(3857)
+
+
+@pytest.mark.filterwarnings("ignore::UserWarning")
+def test_missing_values():
+    s = GeoSeries([Point(1, 1), None, np.nan, GeometryCollection(), Polygon()])
+
+    # construction -> missing values get normalized to None
+    assert s[1] is None
+    assert s[2] is None
+    assert s[3].is_empty
+    assert s[4].is_empty
+
+    # isna / is_empty
+    assert s.isna().tolist() == [False, True, True, False, False]
+    assert s.is_empty.tolist() == [False, False, False, True, True]
+    assert s.notna().tolist() == [True, False, False, True, True]
+
+    # fillna defaults to fill with empty geometry -> no missing values anymore
+    assert not s.fillna().isna().any()
+
+    # dropna drops the missing values
+    assert not s.dropna().isna().any()
+    assert len(s.dropna()) == 3
+
+
+def test_isna_empty_geoseries():
+    # ensure that isna() result for empty GeoSeries has the correct bool dtype
+    s = GeoSeries([])
+    result = s.isna()
+    assert_series_equal(result, pd.Series([], dtype="bool"))
+
+
+@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+def test_geoseries_crs():
+    gs = GeoSeries().set_crs("IGNF:ETRS89UTM28")
+    assert gs.crs.to_authority() == ("IGNF", "ETRS89UTM28")
+
+
+@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
+def test_geoseries_override_existing_crs_warning():
+    gs = GeoSeries(crs="epsg:4326")
+    with pytest.warns(
+        DeprecationWarning,
+        match="Overriding the CRS of a GeoSeries that already has CRS",
+    ):
+        gs.crs = "epsg:2100"
+
+
+# -----------------------------------------------------------------------------
+# # Constructor tests
+# -----------------------------------------------------------------------------
+
+
+def check_geoseries(s):
+    assert isinstance(s, GeoSeries)
+    assert isinstance(s.geometry, GeoSeries)
+    assert isinstance(s.dtype, GeometryDtype)
+    assert isinstance(s.values, GeometryArray)
+
+
+class TestConstructor:
+    def test_constructor(self):
+        s = GeoSeries([Point(x, x) for x in range(3)])
+        check_geoseries(s)
+
+    def test_single_geom_constructor(self):
+        p = Point(1, 2)
+        line = LineString([(2, 3), (4, 5), (5, 6)])
+        poly = Polygon(
+            [(0, 0), (1, 0), (1, 1), (0, 1)], [[(0.1, 0.1), (0.9, 0.1), (0.9, 0.9)]]
+        )
+        mp = MultiPoint([(1, 2), (3, 4), (5, 6)])
+        mline = MultiLineString([[(1, 2), (3, 4), (5, 6)], [(7, 8), (9, 10)]])
+
+        poly2 = Polygon(
+            [(0, 0), (0, -1), (-1, -1), (-1, 0)],
+            [[(-0.1, -0.1), (-0.1, -0.5), (-0.5, -0.5), (-0.5, -0.1)]],
+        )
+        mpoly = MultiPolygon([poly, poly2])
+
+        geoms = [p, line, poly, mp, mline, mpoly]
+        index = ["a", "b", "c", "d"]
+
+        for g in geoms:
+            gs = GeoSeries(g)
+            assert len(gs) == 1
+            # accessing elements no longer give identical objects
+            assert gs.iloc[0].equals(g)
+
+            gs = GeoSeries(g, index=index)
+            assert len(gs) == len(index)
+            for x in gs:
+                assert x.equals(g)
+
+    def test_non_geometry_raises(self):
+        with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
+            GeoSeries([True, False, True])
+
+        with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
+            GeoSeries(["a", "b", "c"])
+
+        with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
+            GeoSeries([[1, 2], [3, 4]])
+
+    def test_empty(self):
+        s = GeoSeries([])
+        check_geoseries(s)
+
+        s = GeoSeries()
+        check_geoseries(s)
+
+    def test_data_is_none(self):
+        s = GeoSeries(index=range(3))
+        check_geoseries(s)
+
+    def test_empty_array(self):
+        # with empty data that have an explicit dtype, we use the fallback or
+        # not depending on the dtype
+
+        # dtypes that can never hold geometry-like data
+        for arr in [
+            np.array([], dtype="bool"),
+            np.array([], dtype="int64"),
+            np.array([], dtype="float32"),
+            # this gets converted to object dtype by pandas
+            # np.array([], dtype="str"),
+        ]:
+            with pytest.raises(
+                TypeError, match="Non geometry data passed to GeoSeries"
+            ):
+                GeoSeries(arr)
+
+        # dtypes that can potentially hold geometry-like data (object) or
+        # can come from empty data (float64)
+        for arr in [
+            np.array([], dtype="object"),
+            np.array([], dtype="float64"),
+            np.array([], dtype="str"),
+        ]:
+            with warnings.catch_warnings(record=True) as record:
+                s = GeoSeries(arr)
+            assert not record
+            assert isinstance(s, GeoSeries)
+
+    def test_from_series(self):
+        shapes = [
+            Polygon([(random.random(), random.random()) for _ in range(3)])
+            for _ in range(10)
+        ]
+
+        s = pd.Series(shapes, index=list("abcdefghij"), name="foo")
+        g = GeoSeries(s)
+        check_geoseries(g)
+
+        assert [a.equals(b) for a, b in zip(s, g)]
+        assert s.name == g.name
+        assert s.index is g.index
+
+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
+    def test_from_series_no_set_crs_on_construction(self):
+        # https://github.com/geopandas/geopandas/issues/2492
+        # also when passing Series[geometry], ensure we don't change crs of
+        # original data
+        gs = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+        s = pd.Series(gs)
+        result = GeoSeries(s, crs=4326)
+        assert s.values.crs is None
+        assert gs.crs is None
+        assert result.crs == "EPSG:4326"
+
+    def test_copy(self):
+        # default is to copy with CoW / pandas 3+
+        arr = np.array([Point(x, x) for x in range(3)], dtype=object)
+        result = GeoSeries(arr)
+        # modifying result doesn't change original array
+        result.loc[0] = Point(10, 10)
+        if compat.PANDAS_GE_30 or getattr(pd.options.mode, "copy_on_write", False):
+            assert arr[0] == Point(0, 0)
+        else:
+            assert arr[0] == Point(10, 10)
+
+        # avoid copy with copy=False
+        arr = np.array([Point(x, x) for x in range(3)], dtype=object)
+        result = GeoSeries(arr, copy=False)
+        assert result.array._data.flags.writeable
+        # now modifying result also updates original array
+        result.loc[0] = Point(10, 10)
+        assert arr[0] == Point(10, 10)
+
+    # GH 1216
+    @pytest.mark.parametrize("name", [None, "geometry", "Points"])
+    @pytest.mark.parametrize("crs", [None, "epsg:4326"])
+    def test_reset_index(self, name, crs):
+        s = GeoSeries(
+            [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])],
+            name=name,
+            crs=crs,
+        )
+        s = s.explode(index_parts=True)
+        df = s.reset_index()
+        assert type(df) == GeoDataFrame
+        # name None -> 0, otherwise name preserved
+        assert df.geometry.name == (name if name is not None else 0)
+        assert df.crs == s.crs
+
+    @pytest.mark.parametrize("name", [None, "geometry", "Points"])
+    @pytest.mark.parametrize("crs", [None, "epsg:4326"])
+    def test_to_frame(self, name, crs):
+        s = GeoSeries([Point(0, 0), Point(1, 1)], name=name, crs=crs)
+        df = s.to_frame()
+        assert type(df) == GeoDataFrame
+        # name None -> 0, otherwise name preserved
+        expected_name = name if name is not None else 0
+        assert df.geometry.name == expected_name
+        assert df._geometry_column_name == expected_name
+        assert df.crs == s.crs
+
+        # if name is provided to to_frame, it should override
+        df2 = s.to_frame(name="geom")
+        assert type(df) == GeoDataFrame
+        assert df2.geometry.name == "geom"
+        assert df2.crs == s.crs
+
+    def test_explode_without_multiindex(self):
+        s = GeoSeries(
+            [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
+        )
+        s = s.explode(index_parts=False)
+        expected_index = pd.Index([0, 0, 1, 1, 1])
+        assert_index_equal(s.index, expected_index)
+
+    def test_explode_ignore_index(self):
+        s = GeoSeries(
+            [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
+        )
+        s = s.explode(ignore_index=True)
+        expected_index = pd.Index(range(len(s)))
+        assert_index_equal(s.index, expected_index)
+
+        # index_parts is ignored if ignore_index=True
+        s = s.explode(index_parts=True, ignore_index=True)
+        assert_index_equal(s.index, expected_index)
@@ -0,0 +1,230 @@
+import warnings
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_21
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+from pandas.testing import assert_index_equal
+
+
+class TestMerging:
+    def setup_method(self):
+        self.gseries = GeoSeries([Point(i, i) for i in range(3)])
+        self.series = pd.Series([1, 2, 3])
+        self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
+        self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})
+
+    def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
+        assert gdf._geometry_column_name == geometry_column_name
+        assert gdf.crs == crs
+
+    def test_merge(self):
+        res = self.gdf.merge(self.df, left_on="values", right_on="col1")
+
+        # check result is a GeoDataFrame
+        assert isinstance(res, GeoDataFrame)
+
+        # check geometry property gives GeoSeries
+        assert isinstance(res.geometry, GeoSeries)
+
+        # check metadata
+        self._check_metadata(res)
+
+        # test that crs and other geometry name are preserved
+        self.gdf.crs = "epsg:4326"
+        self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
+            "points"
+        )
+        res = self.gdf.merge(self.df, left_on="values", right_on="col1")
+        assert isinstance(res, GeoDataFrame)
+        assert isinstance(res.geometry, GeoSeries)
+        self._check_metadata(res, "points", self.gdf.crs)
+
+    def test_concat_axis0(self):
+        # frame
+        res = pd.concat([self.gdf, self.gdf])
+        assert res.shape == (6, 2)
+        assert isinstance(res, GeoDataFrame)
+        assert isinstance(res.geometry, GeoSeries)
+        self._check_metadata(res)
+        exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
+        assert_geodataframe_equal(exp, res)
+
+        # series
+        res = pd.concat([self.gdf.geometry, self.gdf.geometry])
+        assert res.shape == (6,)
+        assert isinstance(res, GeoSeries)
+        assert isinstance(res.geometry, GeoSeries)
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+    def test_concat_axis0_crs(self):
+        # CRS not set for both GeoDataFrame
+        res = pd.concat([self.gdf, self.gdf])
+        self._check_metadata(res)
+
+        # CRS set for both GeoDataFrame, same CRS
+        res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
+        self._check_metadata(res1, crs="epsg:4326")
+
+        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
+        with pytest.warns(
+            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
+        ):
+            res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
+            self._check_metadata(res2, crs="epsg:4326")
+
+        # CRS set for both GeoDataFrame, different CRS
+        with pytest.raises(
+            ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
+        ):
+            pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])
+
+        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
+        # same CRS
+        with pytest.warns(
+            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
+        ):
+            res3 = pd.concat(
+                [self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
+            )
+            self._check_metadata(res3, crs="epsg:4326")
+
+        # CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
+        # different CRS
+        with pytest.raises(
+            ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
+        ):
+            pd.concat(
+                [self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
+            )
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+    def test_concat_axis0_unaligned_cols(self):
+        # https://github.com/geopandas/geopandas/issues/2679
+        gdf = self.gdf.set_crs("epsg:4326").assign(
+            geom=self.gdf.geometry.set_crs("epsg:4327")
+        )
+        both_geom_cols = gdf[["geom", "geometry"]]
+        single_geom_col = gdf[["geometry"]]
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            pd.concat([both_geom_cols, single_geom_col])
+        # Check order of mismatch doesn't matter
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            pd.concat([single_geom_col, both_geom_cols])
+
+        # Side effect of this fix, explicitly provided all none geoseries
+        # will not be warned for (ideally this would still warn)
+        explicit_all_none_case = gdf[["geometry"]].assign(
+            geom=GeoSeries([None for _ in range(len(gdf))])
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            pd.concat([both_geom_cols, explicit_all_none_case])
+
+        # Check concat with partially None col is not affected by the special casing
+        # for all None no CRS handling
+        with pytest.warns(
+            UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
+        ):
+            partial_none_case = self.gdf[["geometry"]]
+            partial_none_case.iloc[0] = None
+            pd.concat([single_geom_col, partial_none_case])
+
+    def test_concat_axis0_crs_wkt_mismatch(self):
+        pyproj = pytest.importorskip("pyproj")
+
+        # https://github.com/geopandas/geopandas/issues/326#issuecomment-1727958475
+        wkt_template = """GEOGCRS["WGS 84",
+        ENSEMBLE["World Geodetic System 1984 ensemble",
+        MEMBER["World Geodetic System 1984 (Transit)"],
+        MEMBER["World Geodetic System 1984 (G730)"],
+        MEMBER["World Geodetic System 1984 (G873)"],
+        MEMBER["World Geodetic System 1984 (G1150)"],
+        MEMBER["World Geodetic System 1984 (G1674)"],
+        MEMBER["World Geodetic System 1984 (G1762)"],
+        MEMBER["World Geodetic System 1984 (G2139)"],
+        ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],
+        ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,
+        ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],
+        AXIS["geodetic latitude (Lat)",north,ORDER[1],
+        ANGLEUNIT["degree",0.0174532925199433]],
+        AXIS["geodetic longitude (Lon)",east,ORDER[2],
+        ANGLEUNIT["degree",0.0174532925199433]],
+        USAGE[SCOPE["Horizontal component of 3D system."],
+        AREA["World.{}"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"""
+        wkt_v1 = wkt_template.format("")
+        wkt_v2 = wkt_template.format(" ")  # add additional whitespace
+        crs1 = pyproj.CRS.from_wkt(wkt_v1)
+        crs2 = pyproj.CRS.from_wkt(wkt_v2)
+        # pyproj crs __hash__ based on WKT strings means these are distinct in a
+        # set are but equal by equality
+        assert len({crs1, crs2}) == 2
+        assert crs1 == crs2
+        expected = pd.concat([self.gdf, self.gdf]).set_crs(crs1)
+        res = pd.concat([self.gdf.set_crs(crs1), self.gdf.set_crs(crs2)])
+        assert_geodataframe_equal(expected, res)
+
+    def test_concat_axis1(self):
+        res = pd.concat([self.gdf, self.df], axis=1)
+
+        assert res.shape == (3, 4)
+        assert isinstance(res, GeoDataFrame)
+        assert isinstance(res.geometry, GeoSeries)
+        self._check_metadata(res)
+
+    def test_concat_axis1_multiple_geodataframes(self):
+        # https://github.com/geopandas/geopandas/issues/1230
+        # Expect that concat should fail gracefully if duplicate column names belonging
+        # to geometry columns are introduced.
+        if PANDAS_GE_21:
+            # _constructor_from_mgr changes mean we now get the concat specific error
+            # message in this case too
+            expected_err = (
+                "Concat operation has resulted in multiple columns using the geometry "
+                "column name 'geometry'."
+            )
+        else:
+            expected_err = (
+                "GeoDataFrame does not support multiple columns using the geometry"
+                " column name 'geometry'"
+            )
+        with pytest.raises(ValueError, match=expected_err):
+            pd.concat([self.gdf, self.gdf], axis=1)
+
+        # Check case is handled if custom geometry column name is used
+        df2 = self.gdf.rename_geometry("geom")
+        expected_err2 = (
+            "Concat operation has resulted in multiple columns using the geometry "
+            "column name 'geom'."
+        )
+        with pytest.raises(ValueError, match=expected_err2):
+            pd.concat([df2, df2], axis=1)
+
+        if HAS_PYPROJ:
+            # Check that two geometry columns is fine, if they have different names
+            res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
+            # check metadata comes from first df
+            self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")
+
+    @pytest.mark.filterwarnings("ignore:Accessing CRS")
+    def test_concat_axis1_geoseries(self):
+        gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
+        result = pd.concat([gseries2, self.gseries], axis=1)
+        # Note this is not consistent with concat([gdf, gdf], axis=1) where the
+        # left metadata is set on the result. This is deliberate for now.
+        assert type(result) is GeoDataFrame
+        assert result._geometry_column_name is None
+        assert_index_equal(pd.Index([0, 1]), result.columns)
+
+        gseries2.name = "foo"
+        result2 = pd.concat([gseries2, self.gseries], axis=1)
+        assert type(result2) is GeoDataFrame
+        assert result._geometry_column_name is None
+        assert_index_equal(pd.Index(["foo", 0]), result2.columns)
@@ -0,0 +1,411 @@
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+from geopandas import GeoDataFrame, GeoSeries
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+pyproj = pytest.importorskip("pyproj")
+
+crs_osgb = pyproj.CRS(27700)
+crs_wgs = pyproj.CRS(4326)
+
+
+N = 10
+
+
+@pytest.fixture(params=["geometry", "point"])
+def df(request):
+    geo_name = request.param
+
+    df = GeoDataFrame(
+        [
+            {
+                "value1": x + y,
+                "value2": x * y,
+                geo_name: Point(x, y),  # rename this col in tests
+            }
+            for x, y in zip(range(N), range(N))
+        ],
+        crs=crs_wgs,
+        geometry=geo_name,
+    )
+    # want geometry2 to be a GeoSeries not Series, test behaviour of non geom col
+    df["geometry2"] = df[geo_name].set_crs(crs_osgb, allow_override=True)
+    return df
+
+
+@pytest.fixture
+def df2():
+    """For constructor_sliced tests"""
+    return GeoDataFrame(
+        {
+            "geometry": GeoSeries([Point(x, x) for x in range(3)]),
+            "geometry2": GeoSeries([Point(x, x) for x in range(3)]),
+            "geometry3": GeoSeries([Point(x, x) for x in range(3)]),
+            "value": [1, 2, 1],
+            "value_nan": np.nan,
+        }
+    )
+
+
+def _check_metadata_gdf(gdf, geo_name="geometry", crs=crs_wgs):
+    assert gdf._geometry_column_name == geo_name
+    assert gdf.geometry.name == geo_name
+    assert gdf.crs == crs
+
+
+def _check_metadata_gs(gs, name="geometry", crs=crs_wgs):
+    assert gs.name == name
+    assert gs.crs == crs
+
+
+def assert_object(result, expected_type, geo_name="geometry", crs=crs_wgs):
+    """
+    Helper method to make tests easier to read. Checks result is of the expected
+    type. If result is a GeoDataFrame or GeoSeries, checks geo_name
+    and crs match. If geo_name is None, then we expect a GeoDataFrame
+    where the geometry column is invalid/ isn't set. This is never desirable,
+    but is a reality of this first stage of implementation.
+    """
+    assert type(result) is expected_type
+
+    if expected_type == GeoDataFrame:
+        assert geo_name is not None
+        _check_metadata_gdf(result, geo_name=geo_name, crs=crs)
+
+    elif expected_type == GeoSeries:
+        _check_metadata_gs(result, name=geo_name, crs=crs)
+
+
+def assert_obj_no_active_geo_col(result, expected_type, geo_colname=None):
+    """
+    Helper method to make tests easier to read. Checks result is of the expected
+    type. Asserts that accessing result.geometry.name raises, corresponding to
+    _geometry_column_name being in an invalid state
+    (either None, or a column no longer present)
+    This amounts to testing the assertion raised (geometry column is unset, vs
+    old geometry column is missing)
+
+    We assert that _geometry_column_name = int_geo_colname
+
+    """
+    if expected_type == GeoDataFrame:
+        if geo_colname is None:
+            assert result._geometry_column_name is None
+        else:
+            assert geo_colname == result._geometry_column_name
+
+        if result._geometry_column_name is None:
+            msg = (
+                "You are calling a geospatial method on the GeoDataFrame, "
+                "but the active"
+            )
+        else:
+            msg = (
+                "You are calling a geospatial method on the GeoDataFrame, but "
+                r"the active geometry column \("
+                rf"'{result._geometry_column_name}'\) is not present"
+            )
+        with pytest.raises(AttributeError, match=msg):
+            result.geometry.name  # be explicit that geometry is invalid here
+    else:
+        raise NotImplementedError()
+
+
+def test_getitem(df):
+    geo_name = df.geometry.name
+    assert_object(df[["value1", "value2"]], pd.DataFrame)
+    assert_object(df[[geo_name, "geometry2"]], GeoDataFrame, geo_name)
+    assert_object(df[[geo_name]], GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(df[["geometry2", "value1"]], GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(df[["geometry2"]], GeoDataFrame, geo_name)
+    assert_object(df[["value1"]], pd.DataFrame)
+    # Series
+    assert_object(df[geo_name], GeoSeries, geo_name)
+    assert_object(df["geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
+    assert_object(df["value1"], pd.Series)
+
+
+def test_loc(df):
+    geo_name = df.geometry.name
+    assert_object(df.loc[:, ["value1", "value2"]], pd.DataFrame)
+    assert_object(df.loc[:, [geo_name, "geometry2"]], GeoDataFrame, geo_name)
+    assert_object(df.loc[:, [geo_name]], GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(
+        df.loc[:, ["geometry2", "value1"]], GeoDataFrame, geo_name
+    )
+    assert_obj_no_active_geo_col(df.loc[:, ["geometry2"]], GeoDataFrame, geo_name)
+    assert_object(df.loc[:, ["value1"]], pd.DataFrame)
+    # Series
+    assert_object(df.loc[:, geo_name], GeoSeries, geo_name)
+    assert_object(df.loc[:, "geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
+    assert_object(df.loc[:, "value1"], pd.Series)
+
+
+@pytest.mark.parametrize(
+    "geom_name",
+    [
+        "geometry",
+        pytest.param(
+            "geom",
+            marks=pytest.mark.xfail(
+                reason="pre-regression behaviour only works for geometry col geometry"
+            ),
+        ),
+    ],
+)
+def test_loc_add_row(geom_name, nybb_filename):
+    # https://github.com/geopandas/geopandas/issues/3119
+
+    nybb = geopandas.read_file(nybb_filename)[["BoroCode", "geometry"]]
+    if geom_name != "geometry":
+        nybb = nybb.rename_geometry(geom_name)
+    # crs_orig = nybb.crs
+
+    # add a new row
+    nybb.loc[5] = [6, nybb.geometry.iloc[0]]
+    assert nybb.geometry.dtype == "geometry"
+    assert nybb.crs is None  # TODO this should be crs_orig, regressed in #2373
+
+
+def test_iloc(df):
+    geo_name = df.geometry.name
+    assert_object(df.iloc[:, 0:2], pd.DataFrame)
+    assert_object(df.iloc[:, 2:4], GeoDataFrame, geo_name)
+    assert_object(df.iloc[:, [2]], GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(df.iloc[:, [3, 0]], GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(df.iloc[:, [3]], GeoDataFrame, geo_name)
+    assert_object(df.iloc[:, [0]], pd.DataFrame)
+    # Series
+    assert_object(df.iloc[:, 2], GeoSeries, geo_name)
+    assert_object(df.iloc[:, 3], GeoSeries, "geometry2", crs=crs_osgb)
+    assert_object(df.iloc[:, 0], pd.Series)
+
+
+def test_squeeze(df):
+    geo_name = df.geometry.name
+    assert_object(df[[geo_name]].squeeze(), GeoSeries, geo_name)
+    assert_object(df[["geometry2"]].squeeze(), GeoSeries, "geometry2", crs=crs_osgb)
+
+
+def test_to_frame(df):
+    geo_name = df.geometry.name
+    res1 = df[geo_name].to_frame()
+    assert_object(res1, GeoDataFrame, geo_name, crs=df[geo_name].crs)
+
+    res2 = df["geometry2"].to_frame()
+    assert_object(res2, GeoDataFrame, "geometry2", crs=crs_osgb)
+
+    res3 = df["value1"].to_frame()
+    assert_object(res3, pd.DataFrame)
+
+
+def test_reindex(df):
+    geo_name = df.geometry.name
+    assert_object(df.reindex(columns=["value1", "value2"]), pd.DataFrame)
+    assert_object(df.reindex(columns=[geo_name, "geometry2"]), GeoDataFrame, geo_name)
+    assert_object(df.reindex(columns=[geo_name]), GeoDataFrame, geo_name)
+    assert_object(df.reindex(columns=["new_col", geo_name]), GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(
+        df.reindex(columns=["geometry2", "value1"]), GeoDataFrame, geo_name
+    )
+    assert_obj_no_active_geo_col(
+        df.reindex(columns=["geometry2"]), GeoDataFrame, geo_name
+    )
+    assert_object(df.reindex(columns=["value1"]), pd.DataFrame)
+
+    # reindexing the rows always preserves the GeoDataFrame
+    assert_object(df.reindex(index=[0, 1, 20]), GeoDataFrame, geo_name)
+
+    # reindexing both rows and columns
+    assert_object(
+        df.reindex(index=[0, 1, 20], columns=[geo_name]), GeoDataFrame, geo_name
+    )
+    assert_object(df.reindex(index=[0, 1, 20], columns=["value1"]), pd.DataFrame)
+
+
+def test_drop(df):
+    geo_name = df.geometry.name
+    assert_object(df.drop(columns=[geo_name, "geometry2"]), pd.DataFrame)
+    assert_object(df.drop(columns=["value1", "value2"]), GeoDataFrame, geo_name)
+    cols = ["value1", "value2", "geometry2"]
+    assert_object(df.drop(columns=cols), GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(
+        df.drop(columns=[geo_name, "value2"]), GeoDataFrame, geo_name
+    )
+    assert_obj_no_active_geo_col(
+        df.drop(columns=["value1", "value2", geo_name]), GeoDataFrame, geo_name
+    )
+    assert_object(df.drop(columns=["geometry2", "value2", geo_name]), pd.DataFrame)
+
+
+def test_apply(df):
+    geo_name = df.geometry.name
+
+    def identity(x):
+        return x
+
+    # axis = 0
+    assert_object(df[["value1", "value2"]].apply(identity), pd.DataFrame)
+    assert_object(df[[geo_name, "geometry2"]].apply(identity), GeoDataFrame, geo_name)
+    assert_object(df[[geo_name]].apply(identity), GeoDataFrame, geo_name)
+
+    res = df[["geometry2", "value1"]].apply(identity)
+    assert_obj_no_active_geo_col(res, GeoDataFrame, geo_name)
+    assert_obj_no_active_geo_col(
+        df[["geometry2"]].apply(identity), GeoDataFrame, geo_name
+    )
+    assert_object(df[["value1"]].apply(identity), pd.DataFrame)
+
+    # axis = 0, Series
+    assert_object(df[geo_name].apply(identity), GeoSeries, geo_name)
+    assert_object(df["geometry2"].apply(identity), GeoSeries, "geometry2", crs=crs_osgb)
+    assert_object(df["value1"].apply(identity), pd.Series)
+
+    # axis = 0, Series, no longer geometry
+    assert_object(df[geo_name].apply(lambda x: str(x)), pd.Series)
+    assert_object(df["geometry2"].apply(lambda x: str(x)), pd.Series)
+
+    # axis = 1
+    assert_object(df[["value1", "value2"]].apply(identity, axis=1), pd.DataFrame)
+    assert_object(
+        df[[geo_name, "geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
+    )
+    assert_object(df[[geo_name]].apply(identity, axis=1), GeoDataFrame, geo_name)
+    # TODO below should be a GeoDataFrame to be consistent with new getitem logic
+    #   leave as follow up as quite complicated
+    #   FrameColumnApply.series_generator returns object dtypes Series, so will have
+    #   patch result of apply
+    assert_object(df[["geometry2", "value1"]].apply(identity, axis=1), pd.DataFrame)
+
+    assert_object(df[["value1"]].apply(identity, axis=1), pd.DataFrame)
+
+
+def test_apply_axis1_secondary_geo_cols(df):
+    geo_name = df.geometry.name
+
+    def identity(x):
+        return x
+
+    assert_obj_no_active_geo_col(
+        df[["geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
+    )
+
+
+def test_expanddim_in_apply():
+    # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
+    s = GeoSeries.from_xy([0, 1], [0, 1])
+    result = s.apply(lambda x: pd.Series([x.x, x.y]))
+    assert_object(result, pd.DataFrame)
+
+
+def test_expandim_in_groupby_aggregate_multiple_funcs():
+    # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
+    # There are two calls to _constructor_expanddim here
+    # SeriesGroupBy._aggregate_multiple_funcs() and
+    # SeriesGroupBy._wrap_series_output() len(output) > 1
+
+    s = GeoSeries.from_xy([0, 1, 2], [0, 1, 3])
+
+    def union(s):
+        return s.union_all()
+
+    def total_area(s):
+        return s.area.sum()
+
+    grouped = s.groupby([0, 1, 0])
+    agg = grouped.agg([total_area, union])
+    assert_obj_no_active_geo_col(agg, GeoDataFrame, geo_colname=None)
+    result = grouped.agg([union, total_area])
+    assert_obj_no_active_geo_col(result, GeoDataFrame, geo_colname=None)
+    assert_object(grouped.agg([total_area, total_area]), pd.DataFrame)
+    assert_object(grouped.agg([total_area]), pd.DataFrame)
+
+
+def test_expanddim_in_unstack():
+    # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
+    s = GeoSeries.from_xy(
+        [0, 1, 2],
+        [0, 1, 3],
+        index=pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")]),
+    )
+    unstack = s.unstack()
+    expected_geo_name = None
+    assert_obj_no_active_geo_col(unstack, GeoDataFrame, geo_colname=expected_geo_name)
+
+    # https://github.com/geopandas/geopandas/issues/2486
+    s.name = "geometry"
+    unstack = s.unstack()
+    assert_obj_no_active_geo_col(unstack, GeoDataFrame, expected_geo_name)
+
+
+# indexing /  constructor_sliced tests
+
+test_case_column_sets = [
+    ["geometry"],
+    ["geometry2"],
+    ["geometry", "geometry2"],
+    # non active geo col case
+    ["geometry", "value"],
+    ["geometry", "value_nan"],
+    ["geometry2", "value"],
+    ["geometry2", "value_nan"],
+]
+
+
+@pytest.mark.parametrize(
+    "column_set",
+    test_case_column_sets,
+    ids=[", ".join(i) for i in test_case_column_sets],
+)
+def test_constructor_sliced_row_slices(df2, column_set):
+    # https://github.com/geopandas/geopandas/issues/2282
+    df_subset = df2[column_set]
+    assert isinstance(df_subset, GeoDataFrame)
+    res = df_subset.loc[0]
+    # row slices shouldn't be GeoSeries, even if they have a geometry col
+    assert type(res) == pd.Series
+    if "geometry" in column_set:
+        assert not isinstance(res.geometry, pd.Series)
+        assert res.geometry == Point(0, 0)
+
+
+def test_constructor_sliced_column_slices(df2):
+    # Note loc doesn't use _constructor_sliced so it's not tested here
+    geo_idx = df2.columns.get_loc("geometry")
+    sub = df2.head(1)
+    # column slices should be GeoSeries if of geometry type
+    assert type(sub.iloc[:, geo_idx]) == GeoSeries
+    assert type(sub.iloc[[0], geo_idx]) == GeoSeries
+    sub = df2.head(2)
+    assert type(sub.iloc[:, geo_idx]) == GeoSeries
+    assert type(sub.iloc[[0, 1], geo_idx]) == GeoSeries
+
+    # check iloc row slices are pd.Series instead
+    assert type(df2.iloc[0, :]) == pd.Series
+
+
+def test_constructor_sliced_in_pandas_methods(df2):
+    # constructor sliced is used in many places, checking a sample of non
+    # geometry cases are sensible
+    assert type(df2.count()) == pd.Series
+    # drop the secondary geometry columns as not hashable
+    hashable_test_df = df2.drop(columns=["geometry2", "geometry3"])
+    assert type(hashable_test_df.duplicated()) == pd.Series
+    assert type(df2.quantile(numeric_only=True)) == pd.Series
+    assert type(df2.memory_usage()) == pd.Series
+
+
+def test_merge_preserve_geodataframe():
+    # https://github.com/geopandas/geopandas/issues/2932
+    ser = GeoSeries.from_xy([1], [1])
+    df = GeoDataFrame({"geo": ser})
+    res = df.merge(df, left_index=True, right_index=True)
+    assert_obj_no_active_geo_col(res, GeoDataFrame, geo_colname=None)
+    expected = GeoDataFrame({"geo_x": ser, "geo_y": ser})
+    assert_geodataframe_equal(expected, res)
@@ -0,0 +1,891 @@
+import os
+
+import numpy as np
+import pandas as pd
+
+from shapely import make_valid
+from shapely.geometry import GeometryCollection, LineString, Point, Polygon, box
+
+import geopandas
+from geopandas import GeoDataFrame, GeoSeries, overlay, read_file
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+try:
+    from fiona.errors import DriverError
+except ImportError:
+
+    class DriverError(Exception):
+        pass
+
+
+DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay")
+
+
+@pytest.fixture
+def dfs(request):
+    s1 = GeoSeries(
+        [
+            Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+            Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+        ]
+    )
+    s2 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
+    df2 = GeoDataFrame({"col2": [1, 2], "geometry": s2})
+    return df1, df2
+
+
+@pytest.fixture(params=["default-index", "int-index", "string-index"])
+def dfs_index(request, dfs):
+    df1, df2 = dfs
+    if request.param == "int-index":
+        df1.index = [1, 2]
+        df2.index = [0, 2]
+    if request.param == "string-index":
+        df1.index = ["row1", "row2"]
+    return df1, df2
+
+
+@pytest.fixture(
+    params=["union", "intersection", "difference", "symmetric_difference", "identity"]
+)
+def how(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def keep_geom_type(request):
+    return request.param
+
+
+def test_overlay(dfs_index, how):
+    """
+    Basic overlay test with small dummy example dataframes (from docs).
+    Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools ->
+    Intersection / Union / ...), saved to GeoJSON
+    """
+    df1, df2 = dfs_index
+    result = overlay(df1, df2, how=how)
+
+    # construction of result
+
+    def _read(name):
+        expected = read_file(
+            os.path.join(DATA, "polys", "df1_df2-{0}.geojson".format(name))
+        )
+        expected.geometry.array.crs = None
+        for col in expected.columns[expected.dtypes == "int32"]:
+            expected[col] = expected[col].astype("int64")
+        return expected
+
+    if how == "identity":
+        expected_intersection = _read("intersection")
+        expected_difference = _read("difference")
+        expected = pd.concat(
+            [expected_intersection, expected_difference], ignore_index=True, sort=False
+        )
+        expected["col1"] = expected["col1"].astype(float)
+    else:
+        expected = _read(how)
+
+    # TODO needed adaptations to result
+    if how == "union":
+        result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
+    elif how == "difference":
+        result = result.reset_index(drop=True)
+
+    assert_geodataframe_equal(result, expected, check_column_type=False)
+
+    # for difference also reversed
+    if how == "difference":
+        result = overlay(df2, df1, how=how)
+        result = result.reset_index(drop=True)
+        expected = _read("difference-inverse")
+        assert_geodataframe_equal(result, expected, check_column_type=False)
+
+
+@pytest.mark.filterwarnings("ignore:GeoSeries crs mismatch:UserWarning")
+def test_overlay_nybb(how, nybb_filename):
+    polydf = read_file(nybb_filename)
+
+    # The circles have been constructed and saved at the time the expected
+    # results were created (exact output of buffer algorithm can slightly
+    # change over time -> use saved ones)
+    # # construct circles dataframe
+    # N = 10
+    # b = [int(x) for x in polydf.total_bounds]
+    # polydf2 = GeoDataFrame(
+    #     [
+    #         {"geometry": Point(x, y).buffer(10000), "value1": x + y, "value2": x - y}
+    #         for x, y in zip(
+    #             range(b[0], b[2], int((b[2] - b[0]) / N)),
+    #             range(b[1], b[3], int((b[3] - b[1]) / N)),
+    #         )
+    #     ],
+    #     crs=polydf.crs,
+    # )
+    polydf2 = read_file(os.path.join(DATA, "nybb_qgis", "polydf2.shp"))
+
+    result = overlay(polydf, polydf2, how=how)
+
+    cols = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area", "value1", "value2"]
+    if how == "difference":
+        cols = cols[:-2]
+
+    # expected result
+
+    if how == "identity":
+        # read union one, further down below we take the appropriate subset
+        expected = read_file(os.path.join(DATA, "nybb_qgis", "qgis-union.shp"))
+    else:
+        expected = read_file(
+            os.path.join(DATA, "nybb_qgis", "qgis-{0}.shp".format(how))
+        )
+
+    # The result of QGIS for 'union' contains incorrect geometries:
+    # 24 is a full original circle overlapping with unioned geometries, and
+    # 27 is a completely duplicated row)
+    if how == "union":
+        expected = expected.drop([24, 27])
+        expected.reset_index(inplace=True, drop=True)
+    # Eliminate observations without geometries (issue from QGIS)
+    expected = expected[expected.is_valid]
+    expected.reset_index(inplace=True, drop=True)
+
+    if how == "identity":
+        expected = expected[expected.BoroCode.notnull()].copy()
+
+    # Order GeoDataFrames
+    expected = expected.sort_values(cols).reset_index(drop=True)
+
+    # TODO needed adaptations to result
+    result = result.sort_values(cols).reset_index(drop=True)
+
+    if how in ("union", "identity"):
+        # concat < 0.23 sorts, so changes the order of the columns
+        # but at least we ensure 'geometry' is the last column
+        assert result.columns[-1] == "geometry"
+        assert len(result.columns) == len(expected.columns)
+        result = result.reindex(columns=expected.columns)
+
+    # the ordering of the spatial index results causes slight deviations
+    # in the resultant geometries for multipolygons
+    # for more details on the discussion, see:
+    # https://github.com/geopandas/geopandas/pull/1338
+    # https://github.com/geopandas/geopandas/issues/1337
+
+    # Temporary workaround below:
+
+    # simplify multipolygon geometry comparison
+    # since the order of the constituent polygons depends on
+    # the ordering of spatial indexing results, we cannot
+    # compare symmetric_difference results directly when the
+    # resultant geometry is a multipolygon
+
+    # first, check that all bounds and areas are approx equal
+    # this is a very rough check for multipolygon equality
+    kwargs = {}
+    pd.testing.assert_series_equal(
+        result.geometry.area, expected.geometry.area, **kwargs
+    )
+    pd.testing.assert_frame_equal(
+        result.geometry.bounds, expected.geometry.bounds, **kwargs
+    )
+
+    # There are two cases where the multipolygon have a different number
+    # of sub-geometries -> not solved by normalize (and thus drop for now)
+    if how == "symmetric_difference":
+        expected.loc[9, "geometry"] = None
+        result.loc[9, "geometry"] = None
+
+    if how == "union":
+        expected.loc[24, "geometry"] = None
+        result.loc[24, "geometry"] = None
+
+    # missing values get read as None in read_file for a string column, but
+    # are introduced as NaN by overlay
+    expected["BoroName"] = expected["BoroName"].fillna(np.nan)
+
+    assert_geodataframe_equal(
+        result,
+        expected,
+        normalize=True,
+        check_crs=False,
+        check_column_type=False,
+        check_less_precise=True,
+    )
+
+
+def test_overlay_overlap(how):
+    """
+    Overlay test with overlapping geometries in both dataframes.
+    Test files are created with::
+
+        import geopandas
+        from geopandas import GeoSeries, GeoDataFrame
+        from shapely.geometry import Point, Polygon, LineString
+
+        s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2)
+        s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2)
+
+        df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]})
+        df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]})
+
+        ax = df1.plot(alpha=0.5)
+        df2.plot(alpha=0.5, ax=ax, color='C1')
+
+        df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson',
+                    driver='GeoJSON')
+        df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson',
+                    driver='GeoJSON')
+
+    and then overlay results are obtained from using  QGIS 2.16
+    (Vector -> Geoprocessing Tools -> Intersection / Union / ...),
+    saved to GeoJSON.
+    """
+    df1 = read_file(os.path.join(DATA, "overlap", "df1_overlap.geojson"))
+    df2 = read_file(os.path.join(DATA, "overlap", "df2_overlap.geojson"))
+
+    result = overlay(df1, df2, how=how)
+
+    if how == "identity":
+        raise pytest.skip()
+
+    expected = read_file(
+        os.path.join(DATA, "overlap", "df1_df2_overlap-{0}.geojson".format(how))
+    )
+
+    if how == "union":
+        # the QGIS result has the last row duplicated, so removing this
+        expected = expected.iloc[:-1]
+
+    # TODO needed adaptations to result
+    result = result.reset_index(drop=True)
+    if how == "union":
+        result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
+
+    assert_geodataframe_equal(
+        result,
+        expected,
+        normalize=True,
+        check_column_type=False,
+        check_less_precise=True,
+    )
+
+
+@pytest.mark.parametrize("other_geometry", [False, True])
+def test_geometry_not_named_geometry(dfs, how, other_geometry):
+    # Issue #306
+    # Add points and flip names
+    df1, df2 = dfs
+    df3 = df1.copy()
+    df3 = df3.rename(columns={"geometry": "polygons"})
+    df3 = df3.set_geometry("polygons")
+    if other_geometry:
+        df3["geometry"] = df1.centroid.geometry
+    assert df3.geometry.name == "polygons"
+
+    res1 = overlay(df1, df2, how=how)
+    res2 = overlay(df3, df2, how=how)
+
+    assert df3.geometry.name == "polygons"
+
+    if how == "difference":
+        # in case of 'difference', column names of left frame are preserved
+        assert res2.geometry.name == "polygons"
+        if other_geometry:
+            assert "geometry" in res2.columns
+            assert_geoseries_equal(
+                res2["geometry"], df3["geometry"], check_series_type=False
+            )
+            res2 = res2.drop(["geometry"], axis=1)
+        res2 = res2.rename(columns={"polygons": "geometry"})
+        res2 = res2.set_geometry("geometry")
+
+    # TODO if existing column is overwritten -> geometry not last column
+    if other_geometry and how == "intersection":
+        res2 = res2.reindex(columns=res1.columns)
+    assert_geodataframe_equal(res1, res2)
+
+    df4 = df2.copy()
+    df4 = df4.rename(columns={"geometry": "geom"})
+    df4 = df4.set_geometry("geom")
+    if other_geometry:
+        df4["geometry"] = df2.centroid.geometry
+    assert df4.geometry.name == "geom"
+
+    res1 = overlay(df1, df2, how=how)
+    res2 = overlay(df1, df4, how=how)
+    assert_geodataframe_equal(res1, res2)
+
+
+def test_bad_how(dfs):
+    df1, df2 = dfs
+    with pytest.raises(ValueError):
+        overlay(df1, df2, how="spandex")
+
+
+def test_duplicate_column_name(dfs, how):
+    if how == "difference":
+        pytest.skip("Difference uses columns from one df only.")
+    df1, df2 = dfs
+    df2r = df2.rename(columns={"col2": "col1"})
+    res = overlay(df1, df2r, how=how)
+    assert ("col1_1" in res.columns) and ("col1_2" in res.columns)
+
+
+def test_geoseries_warning(dfs):
+    df1, df2 = dfs
+    # Issue #305
+    with pytest.raises(NotImplementedError):
+        overlay(df1, df2.geometry, how="union")
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+def test_preserve_crs(dfs, how):
+    df1, df2 = dfs
+    result = overlay(df1, df2, how=how)
+    assert result.crs is None
+    crs = "epsg:4326"
+    df1.crs = crs
+    df2.crs = crs
+    result = overlay(df1, df2, how=how)
+    assert result.crs == crs
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+def test_crs_mismatch(dfs, how):
+    df1, df2 = dfs
+    df1.crs = 4326
+    df2.crs = 3857
+    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
+        overlay(df1, df2, how=how)
+
+
+def test_empty_intersection(dfs):
+    df1, df2 = dfs
+    polys3 = GeoSeries(
+        [
+            Polygon([(-1, -1), (-3, -1), (-3, -3), (-1, -3)]),
+            Polygon([(-3, -3), (-5, -3), (-5, -5), (-3, -5)]),
+        ]
+    )
+    df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2]})
+    expected = GeoDataFrame([], columns=["col1", "col3", "geometry"])
+    result = overlay(df1, df3)
+    assert_geodataframe_equal(result, expected, check_dtype=False)
+
+
+def test_correct_index(dfs):
+    # GH883 - case where the index was not properly reset
+    df1, df2 = dfs
+    polys3 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2, 3]})
+    i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)])
+    i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)])
+    expected = GeoDataFrame(
+        [[1, 1, i1], [3, 2, i2]], columns=["col3", "col2", "geometry"]
+    )
+    result = overlay(df3, df2, keep_geom_type=True)
+    assert_geodataframe_equal(result, expected)
+
+
+def test_warn_on_keep_geom_type(dfs):
+    df1, df2 = dfs
+    polys3 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df3 = GeoDataFrame({"geometry": polys3})
+
+    with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
+        overlay(df2, df3, keep_geom_type=None)
+
+
+@pytest.mark.parametrize(
+    "geom_types", ["polys", "poly_line", "poly_point", "line_poly", "point_poly"]
+)
+def test_overlay_strict(how, keep_geom_type, geom_types):
+    """
+    Test of mixed geometry types on input and output. Expected results initially
+    generated using following snippet.
+
+        polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+                                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
+        df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1})
+
+        polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+                                Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+                                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
+        df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]})
+
+        lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]),
+                                LineString([(0, 3), (6, 3)])])
+        df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1})
+        points1 = gpd.GeoSeries([Point((2, 2)),
+                                 Point((3, 3))])
+        df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1})
+
+        params=["union", "intersection", "difference", "symmetric_difference",
+                "identity"]
+        stricts = [True, False]
+
+        for p in params:
+            for s in stricts:
+                exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s)
+                if not exp.empty:
+                    exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s),
+                                driver='GeoJSON')
+
+        for p in params:
+            for s in stricts:
+                exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s)
+                if not exp.empty:
+                    exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s),
+                                driver='GeoJSON')
+        for p in params:
+            for s in stricts:
+                exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s)
+                if not exp.empty:
+                    exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s),
+                                driver='GeoJSON')
+    """
+    polys1 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
+
+    polys2 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]})
+    lines1 = GeoSeries(
+        [LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]
+    )
+    df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1})
+    points1 = GeoSeries([Point((2, 2)), Point((3, 3))])
+    df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1})
+
+    if geom_types == "polys":
+        result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
+    elif geom_types == "poly_line":
+        result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type)
+    elif geom_types == "poly_point":
+        result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type)
+    elif geom_types == "line_poly":
+        result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type)
+    elif geom_types == "point_poly":
+        result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type)
+
+    try:
+        expected = read_file(
+            os.path.join(
+                DATA,
+                "strict",
+                "{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type),
+            )
+        )
+
+        # the order depends on the spatial index used
+        # so we sort the resultant dataframes to get a consistent order
+        # independently of the spatial index implementation
+        assert all(expected.columns == result.columns), "Column name mismatch"
+        cols = list(set(result.columns) - {"geometry"})
+        expected = expected.sort_values(cols, axis=0).reset_index(drop=True)
+        result = result.sort_values(cols, axis=0).reset_index(drop=True)
+
+        # some columns are all-NaN in the result, but get read as object dtype
+        # column of None values in read_file
+        for col in ["col1", "col3", "col4"]:
+            if col in expected.columns and expected[col].isna().all():
+                expected[col] = expected[col].astype("float64")
+
+        assert_geodataframe_equal(
+            result,
+            expected,
+            normalize=True,
+            check_column_type=False,
+            check_less_precise=True,
+            check_crs=False,
+            check_dtype=False,
+        )
+
+    except DriverError:  # fiona >= 1.8
+        assert result.empty
+
+    except OSError:  # fiona < 1.8
+        assert result.empty
+
+    except RuntimeError:  # pyogrio.DataSourceError
+        assert result.empty
+
+
+def test_mixed_geom_error():
+    polys1 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
+    mixed = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    dfmixed = GeoDataFrame({"col1": [1, 2], "geometry": mixed})
+    with pytest.raises(NotImplementedError):
+        overlay(df1, dfmixed, keep_geom_type=True)
+
+
+def test_keep_geom_type_error():
+    gcol = GeoSeries(
+        GeometryCollection(
+            [
+                Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+                LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
+            ]
+        )
+    )
+    dfcol = GeoDataFrame({"col1": [2], "geometry": gcol})
+    polys1 = GeoSeries(
+        [
+            Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+            Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+        ]
+    )
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
+    with pytest.raises(TypeError):
+        overlay(dfcol, df1, keep_geom_type=True)
+
+
+def test_keep_geom_type_geometry_collection():
+    # GH 1581
+
+    df1 = read_file(os.path.join(DATA, "geom_type", "df1.geojson"))
+    df2 = read_file(os.path.join(DATA, "geom_type", "df2.geojson"))
+
+    with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
+        intersection = overlay(df1, df2, keep_geom_type=None)
+    assert len(intersection) == 1
+    assert (intersection.geom_type == "Polygon").all()
+
+    intersection = overlay(df1, df2, keep_geom_type=True)
+    assert len(intersection) == 1
+    assert (intersection.geom_type == "Polygon").all()
+
+    intersection = overlay(df1, df2, keep_geom_type=False)
+    assert len(intersection) == 1
+    assert (intersection.geom_type == "GeometryCollection").all()
+
+
+def test_keep_geom_type_geometry_collection2():
+    polys1 = [
+        box(0, 0, 1, 1),
+        box(1, 1, 3, 3).union(box(1, 3, 5, 5)),
+    ]
+
+    polys2 = [
+        box(0, 0, 1, 1),
+        box(3, 1, 4, 2).union(box(4, 1, 5, 4)),
+    ]
+    df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
+    df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
+
+    result1 = overlay(df1, df2, keep_geom_type=True)
+    expected1 = GeoDataFrame(
+        {
+            "left": [0, 1],
+            "right": [0, 1],
+            "geometry": [box(0, 0, 1, 1), box(4, 3, 5, 4)],
+        }
+    )
+    assert_geodataframe_equal(result1, expected1)
+
+    result1 = overlay(df1, df2, keep_geom_type=False)
+    expected1 = GeoDataFrame(
+        {
+            "left": [0, 1, 1],
+            "right": [0, 0, 1],
+            "geometry": [
+                box(0, 0, 1, 1),
+                Point(1, 1),
+                GeometryCollection([box(4, 3, 5, 4), LineString([(3, 1), (3, 2)])]),
+            ],
+        }
+    )
+    assert_geodataframe_equal(result1, expected1)
+
+
+def test_keep_geom_type_geomcoll_different_types():
+    polys1 = [box(0, 1, 1, 3), box(10, 10, 12, 12)]
+    polys2 = [
+        Polygon([(1, 0), (3, 0), (3, 3), (1, 3), (1, 2), (2, 2), (2, 1), (1, 1)]),
+        box(11, 11, 13, 13),
+    ]
+    df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
+    df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
+    result1 = overlay(df1, df2, keep_geom_type=True)
+    expected1 = GeoDataFrame(
+        {
+            "left": [1],
+            "right": [1],
+            "geometry": [box(11, 11, 12, 12)],
+        }
+    )
+    assert_geodataframe_equal(result1, expected1)
+
+    result2 = overlay(df1, df2, keep_geom_type=False)
+    expected2 = GeoDataFrame(
+        {
+            "left": [0, 1],
+            "right": [0, 1],
+            "geometry": [
+                GeometryCollection([LineString([(1, 2), (1, 3)]), Point(1, 1)]),
+                box(11, 11, 12, 12),
+            ],
+        }
+    )
+    assert_geodataframe_equal(result2, expected2)
+
+
+def test_keep_geom_type_geometry_collection_difference():
+    # GH 2163
+
+    polys1 = [
+        box(0, 0, 1, 1),
+        box(1, 1, 2, 2),
+    ]
+
+    # the tiny sliver in the second geometry may be converted to a
+    # linestring during the overlay process due to floating point errors
+    # on some platforms
+    polys2 = [
+        box(0, 0, 1, 1),
+        box(1, 1, 2, 3).union(box(2, 2, 3, 2.00000000000000001)),
+    ]
+    df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
+    df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
+
+    result1 = overlay(df2, df1, keep_geom_type=True, how="difference")
+    expected1 = GeoDataFrame(
+        {
+            "right": [1],
+            "geometry": [box(1, 2, 2, 3)],
+        },
+    )
+
+    assert_geodataframe_equal(result1, expected1)
+
+
+@pytest.mark.parametrize("should_make_valid", [True, False])
+def test_overlap_make_valid(should_make_valid):
+    bowtie = Polygon([(1, 1), (9, 9), (9, 1), (1, 9), (1, 1)])
+    assert not bowtie.is_valid
+    fixed_bowtie = make_valid(bowtie)
+    assert fixed_bowtie.is_valid
+
+    df1 = GeoDataFrame({"col1": ["region"], "geometry": GeoSeries([box(0, 0, 10, 10)])})
+    df_bowtie = GeoDataFrame(
+        {"col1": ["invalid", "valid"], "geometry": GeoSeries([bowtie, fixed_bowtie])}
+    )
+
+    if should_make_valid:
+        df_overlay_bowtie = overlay(df1, df_bowtie, make_valid=should_make_valid)
+        assert df_overlay_bowtie.at[0, "geometry"].equals(fixed_bowtie)
+        assert df_overlay_bowtie.at[1, "geometry"].equals(fixed_bowtie)
+    else:
+        with pytest.raises(ValueError, match="1 invalid input geometries"):
+            overlay(df1, df_bowtie, make_valid=should_make_valid)
+
+
+def test_empty_overlay_return_non_duplicated_columns(nybb_filename):
+    nybb = geopandas.read_file(nybb_filename)
+    nybb2 = nybb.copy()
+    nybb2.geometry = nybb2.translate(20000000)
+
+    result = geopandas.overlay(nybb, nybb2)
+
+    expected = GeoDataFrame(
+        columns=[
+            "BoroCode_1",
+            "BoroName_1",
+            "Shape_Leng_1",
+            "Shape_Area_1",
+            "BoroCode_2",
+            "BoroName_2",
+            "Shape_Leng_2",
+            "Shape_Area_2",
+            "geometry",
+        ],
+        crs=nybb.crs,
+    )
+    assert_geodataframe_equal(result, expected, check_dtype=False)
+
+
+def test_non_overlapping(how):
+    p1 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
+    p2 = Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])
+    df1 = GeoDataFrame({"col1": [1], "geometry": [p1]})
+    df2 = GeoDataFrame({"col2": [2], "geometry": [p2]})
+    result = overlay(df1, df2, how=how)
+
+    if how == "intersection":
+        if PANDAS_GE_20:
+            index = None
+        else:
+            index = pd.Index([], dtype="object")
+
+        expected = GeoDataFrame(
+            {
+                "col1": np.array([], dtype="int64"),
+                "col2": np.array([], dtype="int64"),
+                "geometry": [],
+            },
+            index=index,
+        )
+    elif how == "union":
+        expected = GeoDataFrame(
+            {
+                "col1": [1, np.nan],
+                "col2": [np.nan, 2],
+                "geometry": [p1, p2],
+            }
+        )
+    elif how == "identity":
+        expected = GeoDataFrame(
+            {
+                "col1": [1.0],
+                "col2": [np.nan],
+                "geometry": [p1],
+            }
+        )
+    elif how == "symmetric_difference":
+        expected = GeoDataFrame(
+            {
+                "col1": [1, np.nan],
+                "col2": [np.nan, 2],
+                "geometry": [p1, p2],
+            }
+        )
+    elif how == "difference":
+        expected = GeoDataFrame(
+            {
+                "col1": [1],
+                "geometry": [p1],
+            }
+        )
+
+    assert_geodataframe_equal(result, expected)
+
+
+def test_no_intersection():
+    # overlapping bounds but non-overlapping geometries
+    gs = GeoSeries([Point(x, x).buffer(0.1) for x in range(3)])
+    gdf1 = GeoDataFrame({"foo": ["a", "b", "c"]}, geometry=gs)
+    gdf2 = GeoDataFrame({"bar": ["1", "3", "5"]}, geometry=gs.translate(1))
+
+    expected = GeoDataFrame(columns=["foo", "bar", "geometry"])
+    result = overlay(gdf1, gdf2, how="intersection")
+    assert_geodataframe_equal(result, expected, check_index_type=False)
+
+
+class TestOverlayWikiExample:
+    def setup_method(self):
+        self.layer_a = GeoDataFrame(geometry=[box(0, 2, 6, 6)])
+
+        self.layer_b = GeoDataFrame(geometry=[box(4, 0, 10, 4)])
+
+        self.intersection = GeoDataFrame(geometry=[box(4, 2, 6, 4)])
+
+        self.union = GeoDataFrame(
+            geometry=[
+                box(4, 2, 6, 4),
+                Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
+                Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
+            ]
+        )
+
+        self.a_difference_b = GeoDataFrame(
+            geometry=[Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)])]
+        )
+
+        self.b_difference_a = GeoDataFrame(
+            geometry=[
+                Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)])
+            ]
+        )
+
+        self.symmetric_difference = GeoDataFrame(
+            geometry=[
+                Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
+                Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
+            ]
+        )
+
+        self.a_identity_b = GeoDataFrame(
+            geometry=[
+                box(4, 2, 6, 4),
+                Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
+            ]
+        )
+
+        self.b_identity_a = GeoDataFrame(
+            geometry=[
+                box(4, 2, 6, 4),
+                Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
+            ]
+        )
+
+    def test_intersection(self):
+        df_result = overlay(self.layer_a, self.layer_b, how="intersection")
+        assert df_result.geom_equals(self.intersection).all()
+
+    def test_union(self):
+        df_result = overlay(self.layer_a, self.layer_b, how="union")
+        assert_geodataframe_equal(df_result, self.union)
+
+    def test_a_difference_b(self):
+        df_result = overlay(self.layer_a, self.layer_b, how="difference")
+        assert_geodataframe_equal(df_result, self.a_difference_b)
+
+    def test_b_difference_a(self):
+        df_result = overlay(self.layer_b, self.layer_a, how="difference")
+        assert_geodataframe_equal(df_result, self.b_difference_a)
+
+    def test_symmetric_difference(self):
+        df_result = overlay(self.layer_a, self.layer_b, how="symmetric_difference")
+        assert_geodataframe_equal(df_result, self.symmetric_difference)
+
+    def test_a_identity_b(self):
+        df_result = overlay(self.layer_a, self.layer_b, how="identity")
+        assert_geodataframe_equal(df_result, self.a_identity_b)
+
+    def test_b_identity_a(self):
+        df_result = overlay(self.layer_b, self.layer_a, how="identity")
+        assert_geodataframe_equal(df_result, self.b_identity_a)
@@ -0,0 +1,890 @@
+import os
+import warnings
+from packaging.version import Version
+
+import numpy as np
+import pandas as pd
+
+import shapely
+from shapely.geometry import GeometryCollection, LinearRing, LineString, Point
+
+import geopandas
+import geopandas._compat as compat
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas.array import from_shapely
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from numpy.testing import assert_array_equal
+from pandas.testing import assert_frame_equal, assert_series_equal
+
+
+@pytest.fixture
+def s():
+    return GeoSeries([Point(x, y) for x, y in zip(range(3), range(3))])
+
+
+@pytest.fixture
+def df():
+    return GeoDataFrame(
+        {
+            "geometry": [Point(x, x) for x in range(3)],
+            "value1": np.arange(3, dtype="int64"),
+            "value2": np.array([1, 2, 1], dtype="int64"),
+        }
+    )
+
+
+def test_repr(s, df):
+    assert "POINT" in repr(s)
+    assert "POINT" in repr(df)
+    assert "POINT" in df._repr_html_()
+
+
+@pytest.mark.skipif(shapely.geos_version < (3, 9, 0), reason="requires GEOS>=3.9")
+def test_repr_boxed_display_precision():
+    # geographic coordinates
+    p1 = Point(10.123456789, 50.123456789)
+    p2 = Point(4.123456789, 20.123456789)
+    s1 = GeoSeries([p1, p2, None])
+    assert "POINT (10.12346 50.12346)" in repr(s1)
+
+    # geographic coordinates 4326
+    s3 = GeoSeries([p1, p2], crs=4326)
+    assert "POINT (10.12346 50.12346)" in repr(s3)
+
+    # projected coordinates
+    p1 = Point(3000.123456789, 3000.123456789)
+    p2 = Point(4000.123456789, 4000.123456789)
+    s2 = GeoSeries([p1, p2, None])
+    assert "POINT (3000.123 3000.123)" in repr(s2)
+
+    # projected geographic coordinate
+    s4 = GeoSeries([p1, p2], crs=3857)
+    assert "POINT (3000.123 3000.123)" in repr(s4)
+
+    geopandas.options.display_precision = 1
+    assert "POINT (10.1 50.1)" in repr(s1)
+
+    geopandas.options.display_precision = 9
+    assert "POINT (10.123456789 50.123456789)" in repr(s1)
+
+
+def test_repr_all_missing():
+    # https://github.com/geopandas/geopandas/issues/1195
+    s = GeoSeries([None, None, None])
+    assert "None" in repr(s)
+    df = GeoDataFrame({"a": [1, 2, 3], "geometry": s})
+    assert "None" in repr(df)
+    assert "geometry" in df._repr_html_()
+
+
+def test_repr_empty():
+    # https://github.com/geopandas/geopandas/issues/1195
+    s = GeoSeries([])
+    assert repr(s) == "GeoSeries([], dtype: geometry)"
+    df = GeoDataFrame({"a": [], "geometry": s})
+    assert "Empty GeoDataFrame" in repr(df)
+    # https://github.com/geopandas/geopandas/issues/1184
+    assert "geometry" in df._repr_html_()
+
+
+def test_repr_linearring():
+    # https://github.com/geopandas/geopandas/pull/2689
+    # specifically, checking internal shapely/wkt/wkb conversions
+    # preserve LinearRing
+    s = GeoSeries([LinearRing([(0, 0), (1, 1), (1, -1)])])
+    assert "LINEARRING" in str(s.iloc[0])  # shapely scalar repr
+    assert "LINEARRING" in str(s)  # GeoSeries repr
+
+    # check something coercible to linearring is not converted
+    s2 = GeoSeries(
+        [
+            LineString([(0, 0), (1, 1), (1, -1)]),
+            LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
+        ]
+    )
+    assert "LINEARRING" not in str(s2)
+
+
+def test_indexing(s, df):
+    # accessing scalar from the geometry (column)
+    exp = Point(1, 1)
+    assert s[1] == exp
+    assert s.loc[1] == exp
+    assert s.iloc[1] == exp
+    assert df.loc[1, "geometry"] == exp
+    assert df.iloc[1, 0] == exp
+
+    # multiple values
+    exp = GeoSeries([Point(2, 2), Point(0, 0)], index=[2, 0])
+    assert_geoseries_equal(s.loc[[2, 0]], exp)
+    assert_geoseries_equal(s.iloc[[2, 0]], exp)
+    assert_geoseries_equal(s.reindex([2, 0]), exp)
+    assert_geoseries_equal(df.loc[[2, 0], "geometry"], exp)
+    # TODO here iloc does not return a GeoSeries
+    assert_series_equal(
+        df.iloc[[2, 0], 0], exp, check_series_type=False, check_names=False
+    )
+
+    # boolean indexing
+    exp = GeoSeries([Point(0, 0), Point(2, 2)], index=[0, 2])
+    mask = np.array([True, False, True])
+    assert_geoseries_equal(s[mask], exp)
+    assert_geoseries_equal(s.loc[mask], exp)
+    assert_geoseries_equal(df[mask]["geometry"], exp)
+    assert_geoseries_equal(df.loc[mask, "geometry"], exp)
+
+    # slices
+    s.index = [1, 2, 3]
+    exp = GeoSeries([Point(1, 1), Point(2, 2)], index=[2, 3])
+    assert_series_equal(s[1:], exp)
+    assert_series_equal(s.iloc[1:], exp)
+    assert_series_equal(s.loc[2:], exp)
+
+
+def test_reindex(s, df):
+    # GeoSeries reindex
+    res = s.reindex([1, 2, 3])
+    exp = GeoSeries([Point(1, 1), Point(2, 2), None], index=[1, 2, 3])
+    assert_geoseries_equal(res, exp)
+
+    # GeoDataFrame reindex index
+    res = df.reindex(index=[1, 2, 3])
+    assert_geoseries_equal(res.geometry, exp)
+
+    # GeoDataFrame reindex columns
+    res = df.reindex(columns=["value1", "geometry"])
+    assert isinstance(res, GeoDataFrame)
+    assert isinstance(res.geometry, GeoSeries)
+    assert_frame_equal(res, df[["value1", "geometry"]])
+
+    res = df.reindex(columns=["value1", "value2"])
+    assert type(res) == pd.DataFrame
+    assert_frame_equal(res, df[["value1", "value2"]])
+
+
+def test_take(s, df):
+    inds = np.array([0, 2])
+
+    # GeoSeries take
+    result = s.take(inds)
+    expected = s.iloc[[0, 2]]
+    assert isinstance(result, GeoSeries)
+    assert_geoseries_equal(result, expected)
+
+    # GeoDataFrame take axis 0
+    result = df.take(inds, axis=0)
+    expected = df.iloc[[0, 2], :]
+    assert isinstance(result, GeoDataFrame)
+    assert_geodataframe_equal(result, expected)
+
+    # GeoDataFrame take axis 1
+    df = df.reindex(columns=["value1", "value2", "geometry"])  # ensure consistent order
+    result = df.take(inds, axis=1)
+    expected = df[["value1", "geometry"]]
+    assert isinstance(result, GeoDataFrame)
+    assert_geodataframe_equal(result, expected)
+
+    result = df.take(np.array([0, 1]), axis=1)
+    expected = df[["value1", "value2"]]
+    assert isinstance(result, pd.DataFrame)
+    assert_frame_equal(result, expected)
+
+
+def test_take_empty(s, df):
+    # ensure that index type is preserved in an empty take
+    # https://github.com/geopandas/geopandas/issues/1190
+    inds = np.array([], dtype="int64")
+
+    # use non-default index
+    df.index = pd.date_range("2012-01-01", periods=len(df))
+
+    result = df.take(inds, axis=0)
+    assert isinstance(result, GeoDataFrame)
+    assert result.shape == (0, 3)
+    assert isinstance(result.index, pd.DatetimeIndex)
+
+    # the original bug report was an empty boolean mask
+    for result in [df.loc[df["value1"] > 100], df[df["value1"] > 100]]:
+        assert isinstance(result, GeoDataFrame)
+        assert result.shape == (0, 3)
+        assert isinstance(result.index, pd.DatetimeIndex)
+
+
+def test_assignment(s, df):
+    exp = GeoSeries([Point(10, 10), Point(1, 1), Point(2, 2)])
+
+    s2 = s.copy()
+    s2[0] = Point(10, 10)
+    assert_geoseries_equal(s2, exp)
+
+    s2 = s.copy()
+    s2.loc[0] = Point(10, 10)
+    assert_geoseries_equal(s2, exp)
+
+    s2 = s.copy()
+    s2.iloc[0] = Point(10, 10)
+    assert_geoseries_equal(s2, exp)
+
+    df2 = df.copy()
+    df2.loc[0, "geometry"] = Point(10, 10)
+    assert_geoseries_equal(df2["geometry"], exp)
+
+    df2 = df.copy()
+    df2.iloc[0, 0] = Point(10, 10)
+    assert_geoseries_equal(df2["geometry"], exp)
+
+
+def test_assign(df):
+    res = df.assign(new=1)
+    exp = df.copy()
+    exp["new"] = 1
+    assert isinstance(res, GeoDataFrame)
+    assert_frame_equal(res, exp)
+
+
+def test_astype(s, df):
+    # check geoseries functionality
+    with pytest.raises(TypeError):
+        s.astype(int)
+
+    assert s.astype(str)[0] == "POINT (0 0)"
+
+    res = s.astype(object)
+    if not (
+        (Version(pd.__version__) == Version("2.1.0"))
+        or (Version(pd.__version__) == Version("2.1.1"))
+    ):
+        # https://github.com/geopandas/geopandas/issues/2948 - bug in pandas 2.1.0
+        assert isinstance(res, pd.Series) and not isinstance(res, GeoSeries)
+        assert res.dtype == object
+
+    df = df.rename_geometry("geom_list")
+
+    # check whether returned object is a geodataframe
+    res = df.astype({"value1": float})
+    assert isinstance(res, GeoDataFrame)
+
+    # check whether returned object is a dataframe
+    res = df.astype(str)
+    assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
+
+    res = df.astype({"geom_list": str})
+    assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
+
+    res = df.astype(object)
+    assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
+    assert res["geom_list"].dtype == object
+
+
+def test_astype_invalid_geodataframe():
+    # https://github.com/geopandas/geopandas/issues/1144
+    # a GeoDataFrame without geometry column should not error in astype
+    df = GeoDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    res = df.astype(object)
+    assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
+    assert res["a"].dtype == object
+
+
+def test_convert_dtypes(df):
+    # https://github.com/geopandas/geopandas/issues/1870
+
+    # Test geometry col is first col, first, geom_col_name=geometry
+    # (order is important in concat, used internally)
+    res1 = df.convert_dtypes()
+
+    expected1 = GeoDataFrame(
+        pd.DataFrame(df).convert_dtypes(), crs=df.crs, geometry=df.geometry.name
+    )
+
+    # Checking type and metadata are right
+    assert_geodataframe_equal(expected1, res1)
+
+    # Test geom last, geom_col_name=geometry
+    res2 = df[["value1", "value2", "geometry"]].convert_dtypes()
+    assert_geodataframe_equal(expected1[["value1", "value2", "geometry"]], res2)
+
+    if compat.HAS_PYPROJ:
+        # Test again with crs set and custom geom col name
+        df2 = df.set_crs(epsg=4326).rename_geometry("points")
+        expected2 = GeoDataFrame(
+            pd.DataFrame(df2).convert_dtypes(), crs=df2.crs, geometry=df2.geometry.name
+        )
+        res3 = df2.convert_dtypes()
+        assert_geodataframe_equal(expected2, res3)
+
+        # Test geom last, geom_col=geometry
+        res4 = df2[["value1", "value2", "points"]].convert_dtypes()
+        assert_geodataframe_equal(expected2[["value1", "value2", "points"]], res4)
+
+
+def test_to_csv(df):
+    exp = (
+        "geometry,value1,value2\nPOINT (0 0),0,1\nPOINT (1 1),1,2\nPOINT (2 2),2,1\n"
+    ).replace("\n", os.linesep)
+    assert df.to_csv(index=False) == exp
+
+
+@pytest.mark.filterwarnings(
+    "ignore:Dropping of nuisance columns in DataFrame reductions"
+)
+def test_numerical_operations(s, df):
+    # df methods ignore the geometry column
+    exp = pd.Series([3, 4], index=["value1", "value2"])
+    if not compat.PANDAS_GE_20:
+        res = df.sum()
+    else:
+        res = df.sum(numeric_only=True)
+    assert_series_equal(res, exp)
+
+    # series methods raise error (not supported for geometry)
+    with pytest.raises(TypeError):
+        s.sum()
+
+    with pytest.raises(TypeError):
+        s.max()
+
+    with pytest.raises((TypeError, ValueError)):
+        # TODO: remove ValueError after pandas-dev/pandas#32749
+        s.idxmax()
+
+    # numerical ops raise an error
+    with pytest.raises(TypeError):
+        df + 1
+
+    with pytest.raises(TypeError):
+        s + 1
+
+    # boolean comparisons work
+    res = df == 100
+    exp = pd.DataFrame(False, index=df.index, columns=df.columns)
+    assert_frame_equal(res, exp)
+
+
+def test_where(s):
+    res = s.where(np.array([True, False, True]))
+    exp = GeoSeries([Point(0, 0), None, Point(2, 2)])
+    assert_series_equal(res, exp)
+
+
+def test_select_dtypes(df):
+    res = df.select_dtypes(include=[np.number])
+    exp = df[["value1", "value2"]]
+    assert_frame_equal(res, exp)
+
+
+def test_equals(s, df):
+    # https://github.com/geopandas/geopandas/issues/1420
+    s2 = s.copy()
+    assert s.equals(s2) is True
+    s2.iloc[0] = None
+    assert s.equals(s2) is False
+
+    df2 = df.copy()
+    assert df.equals(df2) is True
+    df2.loc[0, "geometry"] = Point(10, 10)
+    assert df.equals(df2) is False
+    df2 = df.copy()
+    df2.loc[0, "value1"] = 10
+    assert df.equals(df2) is False
+
+
+# Missing values
+
+
+def test_fillna_scalar(s, df):
+    s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
+
+    res = s2.fillna(Point(1, 1))
+    assert_geoseries_equal(res, s)
+
+    # allow np.nan although this does not change anything
+    # https://github.com/geopandas/geopandas/issues/1149
+    res = s2.fillna(np.nan)
+    assert_geoseries_equal(res, s2)
+
+    # raise exception if trying to fill missing geometry w/ non-geometry
+    df2 = df.copy()
+    df2["geometry"] = s2
+    res = df2.fillna(Point(1, 1))
+    assert_geodataframe_equal(res, df)
+    with pytest.raises((NotImplementedError, TypeError)):  # GH2351
+        df2.fillna(0)
+
+    # allow non-geometry fill value if there are no missing values
+    # https://github.com/geopandas/geopandas/issues/1149
+    df3 = df.copy()
+    df3.loc[0, "value1"] = np.nan
+    res = df3.fillna(0)
+    assert_geodataframe_equal(res.astype({"value1": "int64"}), df)
+
+
+def test_fillna_series(s):
+    # fill na with another GeoSeries
+    s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
+
+    # check na filled with the same index
+    res = s2.fillna(GeoSeries([Point(1, 1)] * 3))
+    assert_geoseries_equal(res, s)
+
+    # check na filled based on index, not position
+    index = [3, 2, 1]
+    res = s2.fillna(GeoSeries([Point(i, i) for i in index], index=index))
+    assert_geoseries_equal(res, s)
+
+    # check na filled but the input length is different
+    res = s2.fillna(GeoSeries([Point(1, 1)], index=[1]))
+    assert_geoseries_equal(res, s)
+
+    # check na filled but the inputting index is different
+    res = s2.fillna(GeoSeries([Point(1, 1)], index=[9]))
+    assert_geoseries_equal(res, s2)
+
+
+def test_fillna_inplace(s):
+    s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
+    arr = s2.array
+    s2.fillna(Point(1, 1), inplace=True)
+    assert_geoseries_equal(s2, s)
+    if compat.PANDAS_GE_21:
+        # starting from pandas 2.1, there is support to do this actually inplace
+        assert s2.array is arr
+
+
+def test_dropna():
+    s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
+    res = s2.dropna()
+    exp = s2.loc[[0, 2]]
+    assert_geoseries_equal(res, exp)
+
+
+@pytest.mark.parametrize("NA", [None, np.nan])
+def test_isna(NA):
+    s2 = GeoSeries([Point(0, 0), NA, Point(2, 2)], index=[2, 4, 5], name="tt")
+    exp = pd.Series([False, True, False], index=[2, 4, 5], name="tt")
+    res = s2.isnull()
+    assert type(res) == pd.Series
+    assert_series_equal(res, exp)
+    res = s2.isna()
+    assert_series_equal(res, exp)
+    res = s2.notnull()
+    assert_series_equal(res, ~exp)
+    res = s2.notna()
+    assert_series_equal(res, ~exp)
+
+
+# Any / all
+
+
+def test_any_all():
+    empty = GeometryCollection([])
+    s = GeoSeries([empty, Point(1, 1)])
+    assert not s.all()
+    assert s.any()
+
+    s = GeoSeries([Point(1, 1), Point(1, 1)])
+    assert s.all()
+    assert s.any()
+
+    s = GeoSeries([empty, empty])
+    assert not s.all()
+    assert not s.any()
+
+
+# Groupby / algos
+
+
+def test_sort_values():
+    s = GeoSeries([Point(0, 0), Point(2, 2), Point(0, 2)])
+    res = s.sort_values()
+    assert res.index.tolist() == [0, 2, 1]
+    res2 = s.sort_values(ascending=False)
+    assert res2.index.tolist() == [1, 2, 0]
+
+    # empty geoseries
+    assert_geoseries_equal(s.iloc[:0].sort_values(), s.iloc[:0])
+
+
+def test_sort_values_empty_missing():
+    s = GeoSeries([Point(0, 0), None, Point(), Point(1, 1)])
+    # default: NA sorts last, empty first
+    res = s.sort_values()
+    assert res.index.tolist() == [2, 0, 3, 1]
+
+    # descending: NA sorts last, empty last
+    res = s.sort_values(ascending=False)
+    assert res.index.tolist() == [3, 0, 2, 1]
+
+    # NAs first, empty first after NAs
+    res = s.sort_values(na_position="first")
+    assert res.index.tolist() == [1, 2, 0, 3]
+
+    # NAs first, descending with empyt last
+    res = s.sort_values(ascending=False, na_position="first")
+    assert res.index.tolist() == [1, 3, 0, 2]
+
+    # all missing / empty
+    s = GeoSeries([None, None, None])
+    res = s.sort_values()
+    assert res.index.tolist() == [0, 1, 2]
+
+    s = GeoSeries([Point(), Point(), Point()])
+    res = s.sort_values()
+    assert res.index.tolist() == [0, 1, 2]
+
+    s = GeoSeries([Point(), None, Point()])
+    res = s.sort_values()
+    assert res.index.tolist() == [0, 2, 1]
+
+
+def test_unique():
+    s = GeoSeries([Point(0, 0), Point(0, 0), Point(2, 2)])
+    exp = from_shapely([Point(0, 0), Point(2, 2)])
+    # TODO should have specialized GeometryArray assert method
+    assert_array_equal(s.unique(), exp)
+
+
+def pd14_compat_index(index):
+    if compat.PANDAS_GE_14:
+        return from_shapely(index)
+    else:
+        return index
+
+
+def test_value_counts():
+    # each object is considered unique
+    s = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)])
+    res = s.value_counts()
+    if compat.PANDAS_GE_20:
+        name = "count"
+    else:
+        name = None
+    exp = pd.Series(
+        [2, 1], index=pd14_compat_index([Point(0, 0), Point(1, 1)]), name=name
+    )
+    assert_series_equal(res, exp)
+    # Check crs doesn't make a difference - note it is not kept in output index anyway
+    s2 = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)], crs="EPSG:4326")
+    res2 = s2.value_counts()
+    assert_series_equal(res2, exp)
+    if compat.PANDAS_GE_14:
+        # TODO should/ can we fix CRS being lost
+        assert s2.value_counts().index.array.crs is None
+
+    # check mixed geometry
+    s3 = GeoSeries([Point(0, 0), LineString([[1, 1], [2, 2]]), Point(0, 0)])
+    res3 = s3.value_counts()
+    index = pd14_compat_index([Point(0, 0), LineString([[1, 1], [2, 2]])])
+    exp3 = pd.Series([2, 1], index=index, name=name)
+    assert_series_equal(res3, exp3)
+
+    # check None is handled
+    s4 = GeoSeries([Point(0, 0), None, Point(0, 0)])
+    res4 = s4.value_counts(dropna=True)
+    exp4_dropna = pd.Series([2], index=pd14_compat_index([Point(0, 0)]), name=name)
+    assert_series_equal(res4, exp4_dropna)
+    exp4_keepna = pd.Series(
+        [2, 1], index=pd14_compat_index([Point(0, 0), None]), name=name
+    )
+    res4_keepna = s4.value_counts(dropna=False)
+    assert_series_equal(res4_keepna, exp4_keepna)
+
+
+@pytest.mark.xfail(strict=False)
+def test_drop_duplicates_series():
+    # duplicated does not yet use EA machinery
+    # (https://github.com/pandas-dev/pandas/issues/27264)
+    # but relies on unstable hashing of unhashable objects in numpy array
+    # giving flaky test (https://github.com/pandas-dev/pandas/issues/27035)
+    dups = GeoSeries([Point(0, 0), Point(0, 0)])
+    dropped = dups.drop_duplicates()
+    assert len(dropped) == 1
+
+
+@pytest.mark.xfail(strict=False)
+def test_drop_duplicates_frame():
+    # duplicated does not yet use EA machinery, see above
+    gdf_len = 3
+    dup_gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)}
+    )
+    dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
+    assert len(dropped_geometry) == 1
+    dropped_all = dup_gdf.drop_duplicates()
+    assert len(dropped_all) == gdf_len
+
+
+def test_groupby(df):
+    # counts work fine
+    res = df.groupby("value2").count()
+    exp = pd.DataFrame(
+        {"geometry": [2, 1], "value1": [2, 1], "value2": [1, 2]}
+    ).set_index("value2")
+    assert_frame_equal(res, exp)
+
+    # reductions ignore geometry column
+    if not compat.PANDAS_GE_20:
+        res = df.groupby("value2").sum()
+    else:
+        res = df.groupby("value2").sum(numeric_only=True)
+    exp = pd.DataFrame({"value1": [2, 1], "value2": [1, 2]}, dtype="int64").set_index(
+        "value2"
+    )
+    assert_frame_equal(res, exp)
+
+    # applying on the geometry column
+    res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all())
+
+    exp = GeoSeries(
+        [shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)],
+        index=pd.Index([1, 2], name="value2"),
+        name="geometry",
+    )
+    assert_series_equal(res, exp)
+
+    # apply on geometry column not resulting in new geometry
+    res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all().area)
+    exp = pd.Series([0.0, 0.0], index=pd.Index([1, 2], name="value2"), name="geometry")
+
+    assert_series_equal(res, exp)
+
+
+def test_groupby_groups(df):
+    g = df.groupby("value2")
+    res = g.get_group(1)
+    assert isinstance(res, GeoDataFrame)
+    exp = df.loc[[0, 2]]
+    assert_frame_equal(res, exp)
+
+
+@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
+@pytest.mark.parametrize("geometry_name", ["geometry", "geom"])
+def test_groupby_metadata(crs, geometry_name):
+    if crs and not compat.HAS_PYPROJ:
+        pytest.skip("requires pyproj")
+    # https://github.com/geopandas/geopandas/issues/2294
+    df = GeoDataFrame(
+        {
+            geometry_name: [Point(0, 0), Point(1, 1), Point(0, 0)],
+            "value1": np.arange(3, dtype="int64"),
+            "value2": np.array([1, 2, 1], dtype="int64"),
+        },
+        crs=crs,
+        geometry=geometry_name,
+    )
+
+    kwargs = {}
+    if compat.PANDAS_GE_22:
+        # pandas is deprecating that the group key is present as column in the
+        # dataframe passed to `func`. To suppress this warning, it introduced
+        # a new include_groups keyword
+        kwargs = dict(include_groups=False)
+
+    # dummy test asserting we can access the crs
+    def func(group):
+        assert isinstance(group, GeoDataFrame)
+        assert group.crs == crs
+
+    df.groupby("value2").apply(func, **kwargs)
+    # selecting the non-group columns -> no need to pass the keyword
+    if (
+        compat.PANDAS_GE_22
+        or (compat.PANDAS_GE_20 and geometry_name == "geometry")
+        or not compat.PANDAS_GE_20
+    ):
+        df.groupby("value2")[[geometry_name, "value1"]].apply(func)
+    else:
+        # https://github.com/geopandas/geopandas/pull/2966#issuecomment-1878816712
+        # with pandas 2.0 and 2.1 with geom col != geometry this is failing
+        with pytest.raises(AttributeError):
+            df.groupby("value2")[[geometry_name, "value1"]].apply(func)
+
+    # actual test with functionality
+    res = df.groupby("value2").apply(
+        lambda x: geopandas.sjoin(x, x[[geometry_name, "value1"]], how="inner"),
+        **kwargs,
+    )
+
+    expected = (
+        df.take([0, 0, 2, 2, 1])
+        .set_index("value2", drop=compat.PANDAS_GE_22, append=True)
+        .swaplevel()
+        .rename(columns={"value1": "value1_left"})
+        .assign(value1_right=[0, 2, 0, 2, 1])
+    )
+    assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
+
+
+def test_apply(s):
+    # function that returns geometry preserves GeoSeries class
+    def geom_func(geom):
+        assert isinstance(geom, Point)
+        return geom
+
+    result = s.apply(geom_func)
+    assert isinstance(result, GeoSeries)
+    assert_geoseries_equal(result, s)
+
+    # function that returns non-geometry results in Series
+    def numeric_func(geom):
+        assert isinstance(geom, Point)
+        return geom.x
+
+    result = s.apply(numeric_func)
+    assert not isinstance(result, GeoSeries)
+    assert_series_equal(result, pd.Series([0.0, 1.0, 2.0]))
+
+
+def test_apply_loc_len1(df):
+    # subset of len 1 with loc -> bug in pandas with inconsistent Block ndim
+    # resulting in bug in apply
+    # https://github.com/geopandas/geopandas/issues/1078
+    subset = df.loc[[0], "geometry"]
+    result = subset.apply(lambda geom: geom.is_empty)
+    expected = subset.is_empty
+    np.testing.assert_allclose(result, expected)
+
+
+@pytest.mark.skipif(compat.PANDAS_GE_30, reason="convert_dtype is removed in pandas 3")
+def test_apply_convert_dtypes_keyword(s):
+    # ensure the convert_dtypes keyword is accepted
+    if not compat.PANDAS_GE_21:
+        recorder = warnings.catch_warnings(record=True)
+    else:
+        recorder = pytest.warns()
+
+    with recorder as record:
+        res = s.apply(lambda x: x, convert_dtype=True, args=())
+    assert_geoseries_equal(res, s)
+
+    if compat.PANDAS_GE_21:
+        assert len(record) == 1
+        assert "the convert_dtype parameter" in str(record[0].message)
+    else:
+        assert len(record) == 0
+
+
+@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
+def test_apply_no_geometry_result(df, crs):
+    if crs:
+        if not compat.HAS_PYPROJ:
+            pytest.skip("requires pyproj")
+        df = df.set_crs(crs)
+    result = df.apply(lambda col: col.astype(str), axis=0)
+    assert type(result) is pd.DataFrame
+    expected = df.astype(str)
+    assert_frame_equal(result, expected)
+
+    result = df.apply(lambda col: col.astype(str), axis=1)
+    assert type(result) is pd.DataFrame
+    assert_frame_equal(result, expected)
+
+
+def test_apply_preserves_geom_col_name(df):
+    df = df.rename_geometry("geom")
+    result = df.apply(lambda col: col, axis=0)
+    assert result.geometry.name == "geom"
+
+
+def test_df_apply_returning_series(df):
+    # https://github.com/geopandas/geopandas/issues/2283
+    result = df.apply(lambda row: row.geometry, axis=1)
+    assert_geoseries_equal(result, df.geometry, check_crs=False)
+
+    result = df.apply(lambda row: row.value1, axis=1)
+    assert_series_equal(result, df["value1"].rename(None))
+    # https://github.com/geopandas/geopandas/issues/2480
+    result = df.apply(lambda x: float("NaN"), axis=1)
+    assert result.dtype == "float64"
+    # assert list of nones is not promoted to GeometryDtype
+    result = df.apply(lambda x: None, axis=1)
+    assert result.dtype == "object"
+
+    # https://github.com/geopandas/geopandas/issues/2889
+    # contrived case such that `from_shapely` receives an array of geodataframes
+    res = df.apply(lambda row: df.geometry.to_frame(), axis=1)
+    assert res.dtype == "object"
+
+
+def test_df_apply_geometry_dtypes(df):
+    # https://github.com/geopandas/geopandas/issues/1852
+    apply_types = []
+
+    def get_dtypes(srs):
+        apply_types.append((srs.name, type(srs)))
+
+    df["geom2"] = df.geometry
+    df.apply(get_dtypes)
+    expected = [
+        ("geometry", GeoSeries),
+        ("value1", pd.Series),
+        ("value2", pd.Series),
+        ("geom2", GeoSeries),
+    ]
+    assert apply_types == expected
+
+
+def test_pivot(df):
+    # https://github.com/geopandas/geopandas/issues/2057
+    # pivot failing due to creating a MultiIndex
+    result = df.pivot(columns="value1")
+    expected = GeoDataFrame(pd.DataFrame(df).pivot(columns="value1"))
+    assert_geodataframe_equal(result, expected)
+
+
+def test_preserve_attrs(df):
+    # https://github.com/geopandas/geopandas/issues/1654
+    df.attrs["name"] = "my_name"
+    attrs = {"name": "my_name"}
+    assert df.attrs == attrs
+
+    # preserve attrs in indexing operations
+    for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
+        assert df.attrs == attrs
+
+    # preserve attrs in methods
+    df2 = df.reset_index()
+    assert df2.attrs == attrs
+
+    # https://github.com/geopandas/geopandas/issues/1875
+    df3 = df2.explode(index_parts=True)
+    assert df3.attrs == attrs
+
+
+def test_preserve_flags(df):
+    # https://github.com/geopandas/geopandas/issues/1654
+    df = df.set_flags(allows_duplicate_labels=False)
+    assert df.flags.allows_duplicate_labels is False
+
+    # preserve flags in indexing operations
+    for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
+        assert df.flags.allows_duplicate_labels is False
+
+    # preserve attrs in methods
+    df2 = df.reset_index()
+    assert df2.flags.allows_duplicate_labels is False
+
+    # it is honored for operations that introduce duplicate labels
+    with pytest.raises(ValueError):
+        df.reindex([0, 0, 1])
+
+    with pytest.raises(ValueError):
+        df[["value1", "value1", "geometry"]]
+
+    with pytest.raises(ValueError):
+        pd.concat([df, df])
+
+
+def test_ufunc():
+    # this is calling a shapely ufunc, but we currently rely on pandas' implementation
+    # of `__array_ufunc__` to wrap the result back into a GeoSeries
+    ser = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+    result = shapely.buffer(ser, 2)
+    assert isinstance(result, GeoSeries)
+
+    # ensure the result is still writeable
+    # (https://github.com/geopandas/geopandas/issues/3178)
+    assert result.array._data.flags.writeable
+    result.loc[0] = Point(10, 10)
+    assert result.iloc[0] == Point(10, 10)
@@ -0,0 +1,51 @@
+from geopandas.tools._show_versions import (
+    _get_C_info,
+    _get_deps_info,
+    _get_sys_info,
+    show_versions,
+)
+
+
+def test_get_sys_info():
+    sys_info = _get_sys_info()
+
+    assert "python" in sys_info
+    assert "executable" in sys_info
+    assert "machine" in sys_info
+
+
+def test_get_c_info():
+    C_info = _get_C_info()
+
+    assert "GEOS" in C_info
+    assert "GEOS lib" in C_info
+    assert "GDAL" in C_info
+    assert "GDAL data dir" in C_info
+    assert "PROJ" in C_info
+    assert "PROJ data dir" in C_info
+
+
+def test_get_deps_info():
+    deps_info = _get_deps_info()
+
+    assert "geopandas" in deps_info
+    assert "pandas" in deps_info
+    assert "fiona" in deps_info
+    assert "numpy" in deps_info
+    assert "shapely" in deps_info
+    assert "pyproj" in deps_info
+    assert "matplotlib" in deps_info
+    assert "mapclassify" in deps_info
+    assert "geopy" in deps_info
+    assert "psycopg" in deps_info
+    assert "psycopg2" in deps_info
+    assert "geoalchemy2" in deps_info
+
+
+def test_show_versions(capsys):
+    show_versions()
+    out, err = capsys.readouterr()
+
+    assert "python" in out
+    assert "GEOS" in out
+    assert "geopandas" in out
@@ -0,0 +1,959 @@
+from math import sqrt
+
+import numpy as np
+
+import shapely
+from shapely.geometry import (
+    GeometryCollection,
+    LineString,
+    MultiPolygon,
+    Point,
+    Polygon,
+    box,
+)
+
+import geopandas
+from geopandas import GeoDataFrame, GeoSeries, read_file
+from geopandas import _compat as compat
+
+import pytest
+from numpy.testing import assert_array_equal
+
+
+class TestSeriesSindex:
+    def test_has_sindex(self):
+        """Test the has_sindex method."""
+        t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+
+        d = GeoDataFrame({"geom": [t1, t2]}, geometry="geom")
+        assert not d.has_sindex
+        d.sindex
+        assert d.has_sindex
+        d.geometry.values._sindex = None
+        assert not d.has_sindex
+        d.sindex
+        assert d.has_sindex
+
+        s = GeoSeries([t1, t2])
+        assert not s.has_sindex
+        s.sindex
+        assert s.has_sindex
+        s.values._sindex = None
+        assert not s.has_sindex
+        s.sindex
+        assert s.has_sindex
+
+    def test_empty_geoseries(self):
+        """Tests creating a spatial index from an empty GeoSeries."""
+        s = GeoSeries(dtype=object)
+        assert not s.sindex
+        assert len(s.sindex) == 0
+
+    def test_point(self):
+        s = GeoSeries([Point(0, 0)])
+        assert s.sindex.size == 1
+        hits = s.sindex.intersection((-1, -1, 1, 1))
+        assert len(list(hits)) == 1
+        hits = s.sindex.intersection((-2, -2, -1, -1))
+        assert len(list(hits)) == 0
+
+    def test_empty_point(self):
+        """Tests that a single empty Point results in an empty tree."""
+        s = GeoSeries([Point()])
+        assert not s.sindex
+        assert len(s.sindex) == 0
+
+    def test_polygons(self):
+        t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+        sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        s = GeoSeries([t1, t2, sq])
+        assert s.sindex.size == 3
+
+    @pytest.mark.filterwarnings("ignore:The series.append method is deprecated")
+    @pytest.mark.skipif(compat.PANDAS_GE_20, reason="append removed in pandas 2.0")
+    def test_polygons_append(self):
+        t1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        t2 = Polygon([(0, 0), (1, 1), (0, 1)])
+        sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        s = GeoSeries([t1, t2, sq])
+        t = GeoSeries([t1, t2, sq], [3, 4, 5])
+        s = s.append(t)
+        assert len(s) == 6
+        assert s.sindex.size == 6
+
+    def test_lazy_build(self):
+        s = GeoSeries([Point(0, 0)])
+        assert s.values._sindex is None
+        assert s.sindex.size == 1
+        assert s.values._sindex is not None
+
+    def test_rebuild_on_item_change(self):
+        s = GeoSeries([Point(0, 0)])
+        original_index = s.sindex
+        s.iloc[0] = Point(0, 0)
+        assert s.sindex is not original_index
+
+    def test_rebuild_on_slice(self):
+        s = GeoSeries([Point(0, 0), Point(0, 0)])
+        original_index = s.sindex
+        # Select a couple of rows
+        sliced = s.iloc[:1]
+        assert sliced.sindex is not original_index
+        # Select all rows
+        sliced = s.iloc[:]
+        assert sliced.sindex is original_index
+        # Select all rows and flip
+        sliced = s.iloc[::-1]
+        assert sliced.sindex is not original_index
+
+
+class TestFrameSindex:
+    def setup_method(self):
+        data = {
+            "A": range(5),
+            "B": range(-5, 0),
+            "geom": [Point(x, y) for x, y in zip(range(5), range(5))],
+        }
+        self.df = GeoDataFrame(data, geometry="geom")
+
+    def test_sindex(self):
+        self.df.crs = "epsg:4326"
+        assert self.df.sindex.size == 5
+        hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4)))
+        assert len(hits) == 2
+        assert hits[0] == 3
+
+    def test_lazy_build(self):
+        assert self.df.geometry.values._sindex is None
+        assert self.df.sindex.size == 5
+        assert self.df.geometry.values._sindex is not None
+
+    def test_sindex_rebuild_on_set_geometry(self):
+        # First build the sindex
+        assert self.df.sindex is not None
+        original_index = self.df.sindex
+        self.df.set_geometry(
+            [Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True
+        )
+        assert self.df.sindex is not original_index
+
+    def test_rebuild_on_row_slice(self):
+        # Select a subset of rows rebuilds
+        original_index = self.df.sindex
+        sliced = self.df.iloc[:1]
+        assert sliced.sindex is not original_index
+        # Slicing all does not rebuild
+        original_index = self.df.sindex
+        sliced = self.df.iloc[:]
+        assert sliced.sindex is original_index
+        # Re-ordering rebuilds
+        sliced = self.df.iloc[::-1]
+        assert sliced.sindex is not original_index
+
+    def test_rebuild_on_single_col_selection(self):
+        """Selecting a single column should not rebuild the spatial index."""
+        # Selecting geometry column preserves the index
+        original_index = self.df.sindex
+        geometry_col = self.df["geom"]
+        assert geometry_col.sindex is original_index
+        geometry_col = self.df.geometry
+        assert geometry_col.sindex is original_index
+
+    def test_rebuild_on_multiple_col_selection(self):
+        """Selecting a subset of columns preserves the index."""
+        original_index = self.df.sindex
+        # Selecting a subset of columns preserves the index for pandas < 2.0
+        # with pandas 2.0, the column is now copied, losing the index. But
+        # with pandas >= 3.0 and Copy-on-Write this is preserved again
+        subset1 = self.df[["geom", "A"]]
+        if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
+            assert subset1.sindex is not original_index
+        else:
+            assert subset1.sindex is original_index
+        subset2 = self.df[["A", "geom"]]
+        if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
+            assert subset2.sindex is not original_index
+        else:
+            assert subset2.sindex is original_index
+
+    def test_rebuild_on_update_inplace(self):
+        gdf = self.df.copy()
+        old_sindex = gdf.sindex
+        # sorting in place
+        gdf.sort_values("A", ascending=False, inplace=True)
+        # spatial index should be invalidated
+        assert not gdf.has_sindex
+        new_sindex = gdf.sindex
+        # and should be different
+        assert new_sindex is not old_sindex
+
+        # sorting should still have happened though
+        assert gdf.index.tolist() == [4, 3, 2, 1, 0]
+
+    def test_update_inplace_no_rebuild(self):
+        gdf = self.df.copy()
+        old_sindex = gdf.sindex
+        gdf.rename(columns={"A": "AA"}, inplace=True)
+        # a rename shouldn't invalidate the index
+        assert gdf.has_sindex
+        # and the "new" should be the same
+        new_sindex = gdf.sindex
+        assert old_sindex is new_sindex
+
+
+# Skip to accommodate Shapely geometries being unhashable # TODO unskip?
+@pytest.mark.skip
+@pytest.mark.usefixtures("_setup_class_nybb_filename")
+class TestJoinSindex:
+    def setup_method(self):
+        self.boros = read_file(self.nybb_filename)
+
+    def test_merge_geo(self):
+        # First check that we gets hits from the boros frame.
+        tree = self.boros.sindex
+        hits = tree.intersection((1012821.80, 229228.26))
+        res = [self.boros.iloc[hit]["BoroName"] for hit in hits]
+        assert res == ["Bronx", "Queens"]
+
+        # Check that we only get the Bronx from this view.
+        first = self.boros[self.boros["BoroCode"] < 3]
+        tree = first.sindex
+        hits = tree.intersection((1012821.80, 229228.26))
+        res = [first.iloc[hit]["BoroName"] for hit in hits]
+        assert res == ["Bronx"]
+
+        # Check that we only get Queens from this view.
+        second = self.boros[self.boros["BoroCode"] >= 3]
+        tree = second.sindex
+        hits = tree.intersection((1012821.80, 229228.26))
+        res = ([second.iloc[hit]["BoroName"] for hit in hits],)
+        assert res == ["Queens"]
+
+        # Get both the Bronx and Queens again.
+        merged = first.merge(second, how="outer")
+        assert len(merged) == 5
+        assert merged.sindex.size == 5
+        tree = merged.sindex
+        hits = tree.intersection((1012821.80, 229228.26))
+        res = [merged.iloc[hit]["BoroName"] for hit in hits]
+        assert res == ["Bronx", "Queens"]
+
+
+class TestShapelyInterface:
+    def setup_method(self):
+        data = {
+            "geom": [Point(x, y) for x, y in zip(range(5), range(5))]
+            + [box(10, 10, 20, 20)]  # include a box geometry
+        }
+        self.df = GeoDataFrame(data, geometry="geom")
+        self.expected_size = len(data["geom"])
+
+    # --------------------------- `intersection` tests -------------------------- #
+    @pytest.mark.parametrize(
+        "test_geom, expected",
+        (
+            ((-1, -1, -0.5, -0.5), []),
+            ((-0.5, -0.5, 0.5, 0.5), [0]),
+            ((0, 0, 1, 1), [0, 1]),
+            ((0, 0), [0]),
+        ),
+    )
+    def test_intersection_bounds_tuple(self, test_geom, expected):
+        """Tests the `intersection` method with valid inputs."""
+        res = list(self.df.sindex.intersection(test_geom))
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize("test_geom", ((-1, -1, -0.5), -0.5, None, Point(0, 0)))
+    def test_intersection_invalid_bounds_tuple(self, test_geom):
+        """Tests the `intersection` method with invalid inputs."""
+        with pytest.raises(TypeError):
+            # we raise a useful TypeError
+            self.df.sindex.intersection(test_geom)
+
+    # ------------------------------ `query` tests ------------------------------ #
+    @pytest.mark.parametrize(
+        "predicate, test_geom, expected",
+        (
+            (None, box(-1, -1, -0.5, -0.5), []),  # bbox does not intersect
+            (None, box(-0.5, -0.5, 0.5, 0.5), [0]),  # bbox intersects
+            (None, box(0, 0, 1, 1), [0, 1]),  # bbox intersects multiple
+            (
+                None,
+                LineString([(0, 1), (1, 0)]),
+                [0, 1],
+            ),  # bbox intersects but not geometry
+            ("intersects", box(-1, -1, -0.5, -0.5), []),  # bbox does not intersect
+            (
+                "intersects",
+                box(-0.5, -0.5, 0.5, 0.5),
+                [0],
+            ),  # bbox and geometry intersect
+            (
+                "intersects",
+                box(0, 0, 1, 1),
+                [0, 1],
+            ),  # bbox and geometry intersect multiple
+            (
+                "intersects",
+                LineString([(0, 1), (1, 0)]),
+                [],
+            ),  # bbox intersects but not geometry
+            ("within", box(0.25, 0.28, 0.75, 0.75), []),  # does not intersect
+            ("within", box(0, 0, 10, 10), []),  # intersects but is not within
+            ("within", box(11, 11, 12, 12), [5]),  # intersects and is within
+            ("within", LineString([(0, 1), (1, 0)]), []),  # intersects but not within
+            ("contains", box(0, 0, 1, 1), []),  # intersects but does not contain
+            ("contains", box(0, 0, 1.001, 1.001), [1]),  # intersects and contains
+            ("contains", box(0.5, 0.5, 1.5, 1.5), [1]),  # intersects and contains
+            ("contains", box(-1, -1, 2, 2), [0, 1]),  # intersects and contains multiple
+            (
+                "contains",
+                LineString([(0, 1), (1, 0)]),
+                [],
+            ),  # intersects but not contains
+            ("touches", box(-1, -1, 0, 0), [0]),  # bbox intersects and touches
+            (
+                "touches",
+                box(-0.5, -0.5, 1.5, 1.5),
+                [],
+            ),  # bbox intersects but geom does not touch
+            (
+                "contains",
+                box(10, 10, 20, 20),
+                [5],
+            ),  # contains but does not contains_properly
+            (
+                "covers",
+                box(-0.5, -0.5, 1, 1),
+                [0, 1],
+            ),  # covers (0, 0) and (1, 1)
+            (
+                "covers",
+                box(0.001, 0.001, 0.99, 0.99),
+                [],
+            ),  # does not cover any
+            (
+                "covers",
+                box(0, 0, 1, 1),
+                [0, 1],
+            ),  # covers but does not contain
+            (
+                "contains_properly",
+                box(0, 0, 1, 1),
+                [],
+            ),  # intersects but does not contain
+            (
+                "contains_properly",
+                box(0, 0, 1.001, 1.001),
+                [1],
+            ),  # intersects 2 and contains 1
+            (
+                "contains_properly",
+                box(0.5, 0.5, 1.001, 1.001),
+                [1],
+            ),  # intersects 1 and contains 1
+            (
+                "contains_properly",
+                box(0.5, 0.5, 1.5, 1.5),
+                [1],
+            ),  # intersects and contains
+            (
+                "contains_properly",
+                box(-1, -1, 2, 2),
+                [0, 1],
+            ),  # intersects and contains multiple
+            (
+                "contains_properly",
+                box(10, 10, 20, 20),
+                [],
+            ),  # contains but does not contains_properly
+        ),
+    )
+    def test_query(self, predicate, test_geom, expected):
+        """Tests the `query` method with valid inputs and valid predicates."""
+        res = self.df.sindex.query(test_geom, predicate=predicate)
+        assert_array_equal(res, expected)
+
+    def test_query_invalid_geometry(self):
+        """Tests the `query` method with invalid geometry."""
+        with pytest.raises(TypeError):
+            self.df.sindex.query("notavalidgeom")
+
+    @pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
+    @pytest.mark.parametrize(
+        "distance, test_geom, expected",
+        (
+            # bounds don't intersect and not within distance=0
+            (
+                0,
+                box(9.0, 9.0, 9.9, 9.9),
+                [],
+            ),
+            # bounds don't intersect but is within distance=1
+            (
+                1,
+                box(9.0, 9.0, 9.9, 9.9),
+                [5],
+            ),
+            # within 1-D absolute distance in both axes, but not euclidean distance
+            (
+                0.5,
+                Point(0.5, 0.5),
+                [],
+            ),
+            # same as before but within euclidean distance
+            (
+                sqrt(2 * 0.5**2) + 1e-9,
+                Point(0.5, 0.5),
+                [0, 1],
+            ),
+            # less than euclidean distance between points, multi-object
+            (
+                sqrt(2) - 1e-9,
+                [
+                    Polygon([(0, 0), (1, 0), (1, 1)]),
+                    Polygon([(1, 1), (2, 1), (2, 2)]),
+                ],  # multi-object test
+                [[0, 0, 1, 1], [0, 1, 1, 2]],
+            ),
+            # more than euclidean distance between points, multi-object
+            (
+                sqrt(2) + 1e-9,
+                [
+                    Polygon([(0, 0), (1, 0), (1, 1)]),
+                    Polygon([(1, 1), (2, 1), (2, 2)]),
+                ],
+                [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]],
+            ),
+            # distance is array-like, broadcastable to geometry
+            (
+                [2, 10],
+                [Point(0.5, 0.5), Point(1, 1)],
+                [[0, 0, 1, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 4]],
+            ),
+        ),
+    )
+    def test_query_dwithin(self, distance, test_geom, expected):
+        """Tests the `query` method with predicates that require keyword arguments."""
+        res = self.df.sindex.query(test_geom, predicate="dwithin", distance=distance)
+        assert_array_equal(res, expected)
+
+    @pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
+    def test_dwithin_no_distance(self):
+        """Tests the `query` method with keyword arguments that are
+        invalid for certain predicates."""
+        with pytest.raises(
+            ValueError, match="'distance' parameter is required for 'dwithin' predicate"
+        ):
+            self.df.sindex.query(Point(0, 0), predicate="dwithin")
+
+    @pytest.mark.parametrize(
+        "predicate",
+        [
+            None,
+            "contains",
+            "contains_properly",
+            "covered_by",
+            "covers",
+            "crosses",
+            "intersects",
+            "overlaps",
+            "touches",
+            "within",
+        ],
+    )
+    def test_query_distance_invalid(self, predicate):
+        """Tests the `query` method with keyword arguments that are
+        invalid for certain predicates."""
+        msg = "'distance' parameter is only supported in combination with 'dwithin'"
+        with pytest.raises(ValueError, match=msg):
+            self.df.sindex.query(Point(0, 0), predicate=predicate, distance=0)
+
+    @pytest.mark.skipif(
+        compat.GEOS_GE_310, reason="Test for 'dwithin'-incompatible versions of GEOS"
+    )
+    def test_dwithin_requirements(self):
+        """Tests whether a ValueError is raised when trying to use dwithin with
+        incompatible versions of shapely or pyGEOS
+        """
+        with pytest.raises(
+            ValueError, match="predicate = 'dwithin' requires GEOS >= 3.10.0"
+        ):
+            self.df.sindex.query(Point(0, 0), predicate="dwithin", distance=0)
+
+    @pytest.mark.parametrize(
+        "test_geom, expected_value",
+        [
+            (None, []),
+            (GeometryCollection(), []),
+            (Point(), []),
+            (MultiPolygon(), []),
+            (Polygon(), []),
+        ],
+    )
+    def test_query_empty_geometry(self, test_geom, expected_value):
+        """Tests the `query` method with empty geometry."""
+        res = self.df.sindex.query(test_geom)
+        assert_array_equal(res, expected_value)
+
+    def test_query_invalid_predicate(self):
+        """Tests the `query` method with invalid predicates."""
+        test_geom = box(-1, -1, -0.5, -0.5)
+        with pytest.raises(ValueError):
+            self.df.sindex.query(test_geom, predicate="test")
+
+    @pytest.mark.parametrize(
+        "sort, expected",
+        (
+            (True, [[0, 0, 0], [0, 1, 2]]),
+            # False could be anything, at least we'll know if it changes
+            (False, [[0, 0, 0], [0, 1, 2]]),
+        ),
+    )
+    def test_query_sorting(self, sort, expected):
+        """Check that results from `query` don't depend on the
+        order of geometries.
+        """
+        # these geometries come from a reported issue:
+        # https://github.com/geopandas/geopandas/issues/1337
+        # there is no theoretical reason they were chosen
+        test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
+        tree_polys = GeoSeries(
+            [
+                Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+                Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+            ]
+        )
+        expected = [0, 1, 2]
+
+        test_geo = test_polys.values[0]
+        res = tree_polys.sindex.query(test_geo, sort=sort)
+
+        # asserting the same elements
+        assert sorted(res) == sorted(expected)
+        # asserting the exact array can fail if sort=False
+        try:
+            assert_array_equal(res, expected)
+        except AssertionError as e:
+            if sort is False:
+                pytest.xfail(
+                    "rtree results are known to be unordered, see "
+                    "https://github.com/geopandas/geopandas/issues/1337\n"
+                    "Expected:\n {}\n".format(expected)
+                    + "Got:\n {}\n".format(res.tolist())
+                )
+            raise e
+
+    # ------------------------- `query_bulk` tests -------------------------- #
+    @pytest.mark.parametrize(
+        "predicate, test_geom, expected",
+        (
+            (None, [(-1, -1, -0.5, -0.5)], [[], []]),
+            (None, [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
+            (None, [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
+            ("intersects", [(-1, -1, -0.5, -0.5)], [[], []]),
+            ("intersects", [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
+            ("intersects", [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
+            # only second geom intersects
+            ("intersects", [(-1, -1, -0.5, -0.5), (-0.5, -0.5, 0.5, 0.5)], [[1], [0]]),
+            # both geoms intersect
+            (
+                "intersects",
+                [(-1, -1, 1, 1), (-0.5, -0.5, 0.5, 0.5)],
+                [[0, 0, 1], [0, 1, 0]],
+            ),
+            ("within", [(0.25, 0.28, 0.75, 0.75)], [[], []]),  # does not intersect
+            ("within", [(0, 0, 10, 10)], [[], []]),  # intersects but is not within
+            ("within", [(11, 11, 12, 12)], [[0], [5]]),  # intersects and is within
+            (
+                "contains",
+                [(0, 0, 1, 1)],
+                [[], []],
+            ),  # intersects and covers, but does not contain
+            (
+                "contains",
+                [(0, 0, 1.001, 1.001)],
+                [[0], [1]],
+            ),  # intersects 2 and contains 1
+            (
+                "contains",
+                [(0.5, 0.5, 1.001, 1.001)],
+                [[0], [1]],
+            ),  # intersects 1 and contains 1
+            ("contains", [(0.5, 0.5, 1.5, 1.5)], [[0], [1]]),  # intersects and contains
+            (
+                "contains",
+                [(-1, -1, 2, 2)],
+                [[0, 0], [0, 1]],
+            ),  # intersects and contains multiple
+            (
+                "contains",
+                [(10, 10, 20, 20)],
+                [[0], [5]],
+            ),  # contains but does not contains_properly
+            ("touches", [(-1, -1, 0, 0)], [[0], [0]]),  # bbox intersects and touches
+            (
+                "touches",
+                [(-0.5, -0.5, 1.5, 1.5)],
+                [[], []],
+            ),  # bbox intersects but geom does not touch
+            (
+                "covers",
+                [(-0.5, -0.5, 1, 1)],
+                [[0, 0], [0, 1]],
+            ),  # covers (0, 0) and (1, 1)
+            (
+                "covers",
+                [(0.001, 0.001, 0.99, 0.99)],
+                [[], []],
+            ),  # does not cover any
+            (
+                "covers",
+                [(0, 0, 1, 1)],
+                [[0, 0], [0, 1]],
+            ),  # covers but does not contain
+            (
+                "contains_properly",
+                [(0, 0, 1, 1)],
+                [[], []],
+            ),  # intersects but does not contain
+            (
+                "contains_properly",
+                [(0, 0, 1.001, 1.001)],
+                [[0], [1]],
+            ),  # intersects 2 and contains 1
+            (
+                "contains_properly",
+                [(0.5, 0.5, 1.001, 1.001)],
+                [[0], [1]],
+            ),  # intersects 1 and contains 1
+            (
+                "contains_properly",
+                [(0.5, 0.5, 1.5, 1.5)],
+                [[0], [1]],
+            ),  # intersects and contains
+            (
+                "contains_properly",
+                [(-1, -1, 2, 2)],
+                [[0, 0], [0, 1]],
+            ),  # intersects and contains multiple
+            (
+                "contains_properly",
+                [(10, 10, 20, 20)],
+                [[], []],
+            ),  # contains but does not contains_properly
+        ),
+    )
+    def test_query_bulk(self, predicate, test_geom, expected):
+        """Tests the `query` method with valid
+        inputs and valid predicates.
+        """
+        res = self.df.sindex.query(
+            [box(*geom) for geom in test_geom], predicate=predicate
+        )
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize(
+        "test_geoms, expected_value",
+        [
+            # single empty geometry
+            ([GeometryCollection()], [[], []]),
+            # None should be skipped
+            ([GeometryCollection(), None], [[], []]),
+            ([None], [[], []]),
+            ([None, box(-0.5, -0.5, 0.5, 0.5), None], [[1], [0]]),
+        ],
+    )
+    def test_query_bulk_empty_geometry(self, test_geoms, expected_value):
+        """Tests the `query` method with an empty geometries."""
+        res = self.df.sindex.query(test_geoms)
+        assert_array_equal(res, expected_value)
+
+    def test_query_bulk_empty_input_array(self):
+        """Tests the `query` method with an empty input array."""
+        test_array = np.array([], dtype=object)
+        expected_value = [[], []]
+        res = self.df.sindex.query(test_array)
+        assert_array_equal(res, expected_value)
+
+    def test_query_bulk_invalid_input_geometry(self):
+        """
+        Tests the `query` method with invalid input for the `geometry` parameter.
+        """
+        test_array = "notanarray"
+        with pytest.raises(TypeError):
+            self.df.sindex.query(test_array)
+
+    def test_query_bulk_invalid_predicate(self):
+        """Tests the `query` method with invalid predicates."""
+        test_geom_bounds = (-1, -1, -0.5, -0.5)
+        test_predicate = "test"
+
+        with pytest.raises(ValueError):
+            self.df.sindex.query([box(*test_geom_bounds)], predicate=test_predicate)
+
+    @pytest.mark.parametrize(
+        "predicate, test_geom, expected",
+        (
+            (None, (-1, -1, -0.5, -0.5), [[], []]),
+            ("intersects", (-1, -1, -0.5, -0.5), [[], []]),
+            ("contains", (-1, -1, 1, 1), [[0], [0]]),
+        ),
+    )
+    def test_query_bulk_input_type(self, predicate, test_geom, expected):
+        """Tests that query can accept a GeoSeries, GeometryArray or
+        numpy array.
+        """
+        # pass through GeoSeries to test input type
+        test_geom = geopandas.GeoSeries([box(*test_geom)], index=["0"])
+
+        # test GeoSeries
+        res = self.df.sindex.query(test_geom, predicate=predicate)
+        assert_array_equal(res, expected)
+
+        # test GeometryArray
+        res = self.df.sindex.query(test_geom.geometry, predicate=predicate)
+        assert_array_equal(res, expected)
+        res = self.df.sindex.query(test_geom.geometry.values, predicate=predicate)
+        assert_array_equal(res, expected)
+
+        # test numpy array
+        res = self.df.sindex.query(
+            test_geom.geometry.values.to_numpy(), predicate=predicate
+        )
+        assert_array_equal(res, expected)
+        res = self.df.sindex.query(
+            test_geom.geometry.values.to_numpy(), predicate=predicate
+        )
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize(
+        "sort, expected",
+        (
+            (True, [[0, 0, 0], [0, 1, 2]]),
+            # False could be anything, at least we'll know if it changes
+            (False, [[0, 0, 0], [0, 1, 2]]),
+        ),
+    )
+    def test_query_bulk_sorting(self, sort, expected):
+        """Check that results from `query` don't depend
+        on the order of geometries.
+        """
+        # these geometries come from a reported issue:
+        # https://github.com/geopandas/geopandas/issues/1337
+        # there is no theoretical reason they were chosen
+        test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
+        tree_polys = GeoSeries(
+            [
+                Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
+                Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
+                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
+            ]
+        )
+
+        res = tree_polys.sindex.query(test_polys, sort=sort)
+
+        # asserting the same elements
+        assert sorted(res[0]) == sorted(expected[0])
+        assert sorted(res[1]) == sorted(expected[1])
+        # asserting the exact array can fail if sort=False
+        try:
+            assert_array_equal(res, expected)
+        except AssertionError as e:
+            if sort is False:
+                pytest.xfail(
+                    "rtree results are known to be unordered, see "
+                    "https://github.com/geopandas/geopandas/issues/1337\n"
+                    "Expected:\n {}\n".format(expected)
+                    + "Got:\n {}\n".format(res.tolist())
+                )
+            raise e
+
+    # ------------------------- `nearest` tests ------------------------- #
+    @pytest.mark.parametrize("return_all", [True, False])
+    @pytest.mark.parametrize(
+        "geometry,expected",
+        [
+            ([0.25, 0.25], [[0], [0]]),
+            ([0.75, 0.75], [[0], [1]]),
+        ],
+    )
+    def test_nearest_single(self, geometry, expected, return_all):
+        geoms = shapely.points(np.arange(10), np.arange(10))
+        df = geopandas.GeoDataFrame({"geometry": geoms})
+
+        p = Point(geometry)
+        res = df.sindex.nearest(p, return_all=return_all)
+        assert_array_equal(res, expected)
+
+        p = shapely.points(geometry)
+        res = df.sindex.nearest(p, return_all=return_all)
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize("return_all", [True, False])
+    @pytest.mark.parametrize(
+        "geometry,expected",
+        [
+            ([(1, 1), (0, 0)], [[0, 1], [1, 0]]),
+            ([(1, 1), (0.25, 1)], [[0, 1], [1, 1]]),
+        ],
+    )
+    def test_nearest_multi(self, geometry, expected, return_all):
+        geoms = shapely.points(np.arange(10), np.arange(10))
+        df = geopandas.GeoDataFrame({"geometry": geoms})
+
+        ps = [Point(p) for p in geometry]
+        res = df.sindex.nearest(ps, return_all=return_all)
+        assert_array_equal(res, expected)
+
+        ps = shapely.points(geometry)
+        res = df.sindex.nearest(ps, return_all=return_all)
+        assert_array_equal(res, expected)
+
+        s = geopandas.GeoSeries(ps)
+        res = df.sindex.nearest(s, return_all=return_all)
+        assert_array_equal(res, expected)
+
+        x, y = zip(*geometry)
+        ga = geopandas.points_from_xy(x, y)
+        res = df.sindex.nearest(ga, return_all=return_all)
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize("return_all", [True, False])
+    @pytest.mark.parametrize(
+        "geometry,expected",
+        [
+            (None, [[], []]),
+            ([None], [[], []]),
+        ],
+    )
+    def test_nearest_none(self, geometry, expected, return_all):
+        geoms = shapely.points(np.arange(10), np.arange(10))
+        df = geopandas.GeoDataFrame({"geometry": geoms})
+
+        res = df.sindex.nearest(geometry, return_all=return_all)
+        assert_array_equal(res, expected)
+
+    @pytest.mark.parametrize("return_distance", [True, False])
+    @pytest.mark.parametrize(
+        "return_all,max_distance,expected",
+        [
+            (True, None, ([[0, 0, 1], [0, 1, 5]], [sqrt(0.5), sqrt(0.5), sqrt(50)])),
+            (False, None, ([[0, 1], [0, 5]], [sqrt(0.5), sqrt(50)])),
+            (True, 1, ([[0, 0], [0, 1]], [sqrt(0.5), sqrt(0.5)])),
+            (False, 1, ([[0], [0]], [sqrt(0.5)])),
+        ],
+    )
+    def test_nearest_max_distance(
+        self, expected, max_distance, return_all, return_distance
+    ):
+        geoms = shapely.points(np.arange(10), np.arange(10))
+        df = geopandas.GeoDataFrame({"geometry": geoms})
+
+        ps = [Point(0.5, 0.5), Point(0, 10)]
+        res = df.sindex.nearest(
+            ps,
+            return_all=return_all,
+            max_distance=max_distance,
+            return_distance=return_distance,
+        )
+        if return_distance:
+            assert_array_equal(res[0], expected[0])
+            assert_array_equal(res[1], expected[1])
+        else:
+            assert_array_equal(res, expected[0])
+
+    @pytest.mark.parametrize("return_distance", [True, False])
+    @pytest.mark.parametrize(
+        "return_all,max_distance,exclusive,expected",
+        [
+            (False, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
+            (False, None, True, ([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3]], 5 * [sqrt(2)])),
+            (True, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
+            (
+                True,
+                None,
+                True,
+                ([[0, 1, 1, 2, 2, 3, 3, 4], [1, 0, 2, 1, 3, 2, 4, 3]], 8 * [sqrt(2)]),
+            ),
+            (False, 1.1, True, ([[1, 2, 5], [5, 5, 1]], 3 * [1])),
+            (True, 1.1, True, ([[1, 2, 5, 5], [5, 5, 1, 2]], 4 * [1])),
+        ],
+    )
+    def test_nearest_exclusive(
+        self, expected, max_distance, return_all, return_distance, exclusive
+    ):
+        geoms = shapely.points(np.arange(5), np.arange(5))
+        if max_distance:
+            # add a non grid point
+            geoms = np.append(geoms, [Point(1, 2)])
+
+        df = geopandas.GeoDataFrame({"geometry": geoms})
+
+        ps = geoms
+        res = df.sindex.nearest(
+            ps,
+            return_all=return_all,
+            max_distance=max_distance,
+            return_distance=return_distance,
+            exclusive=exclusive,
+        )
+        if return_distance:
+            assert_array_equal(res[0], expected[0])
+            assert_array_equal(res[1], expected[1])
+        else:
+            assert_array_equal(res, expected[0])
+
+    # --------------------------- misc tests ---------------------------- #
+
+    def test_empty_tree_geometries(self):
+        """Tests building sindex with interleaved empty geometries."""
+        geoms = [Point(0, 0), None, Point(), Point(1, 1), Point()]
+        df = geopandas.GeoDataFrame(geometry=geoms)
+        assert df.sindex.query(Point(1, 1))[0] == 3
+
+    def test_size(self):
+        """Tests the `size` property."""
+        assert self.df.sindex.size == self.expected_size
+
+    def test_len(self):
+        """Tests the `__len__` method of spatial indexes."""
+        assert len(self.df.sindex) == self.expected_size
+
+    def test_is_empty(self):
+        """Tests the `is_empty` property."""
+        # create empty tree
+        empty = geopandas.GeoSeries([], dtype=object)
+        assert empty.sindex.is_empty
+        empty = geopandas.GeoSeries([None])
+        assert empty.sindex.is_empty
+        empty = geopandas.GeoSeries([Point()])
+        assert empty.sindex.is_empty
+        # create a non-empty tree
+        non_empty = geopandas.GeoSeries([Point(0, 0)])
+        assert not non_empty.sindex.is_empty
+
+    @pytest.mark.parametrize(
+        "predicate, expected_shape",
+        [
+            (None, (2, 471)),
+            ("intersects", (2, 213)),
+            ("within", (2, 213)),
+            ("contains", (2, 0)),
+            ("overlaps", (2, 0)),
+            ("crosses", (2, 0)),
+            ("touches", (2, 0)),
+        ],
+    )
+    def test_integration_natural_earth(
+        self, predicate, expected_shape, naturalearth_lowres, naturalearth_cities
+    ):
+        """Tests output sizes for the naturalearth datasets."""
+        world = read_file(naturalearth_lowres)
+        capitals = read_file(naturalearth_cities)
+
+        res = world.sindex.query(capitals.geometry, predicate)
+        assert res.shape == expected_shape
@@ -0,0 +1,186 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+from pandas import DataFrame, Series
+
+from shapely.geometry import Point, Polygon
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import HAS_PYPROJ
+from geopandas.array import from_shapely
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+s1 = GeoSeries(
+    [
+        Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
+        Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+    ]
+)
+s2 = GeoSeries(
+    [
+        Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
+        Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+    ]
+)
+
+
+s3 = Series(
+    [
+        Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
+        Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+    ]
+)
+
+a = from_shapely(
+    [
+        Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
+        Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+    ]
+)
+
+s4 = Series(a)
+
+df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
+df2 = GeoDataFrame({"col1": [1, 2], "geometry": s2})
+
+s4 = s1.copy()
+s4.array.crs = 4326
+s5 = s2.copy()
+s5.array.crs = 27700
+
+s6 = GeoSeries(
+    [
+        Polygon([(0, 3), (0, 0), (2, 0), (2, 2)]),
+        Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
+    ]
+)
+
+df4 = GeoDataFrame(
+    {"col1": [1, 2], "geometry": s1.copy(), "geom2": s4.copy(), "geom3": s5.copy()},
+    crs=3857,
+)
+df5 = GeoDataFrame(
+    {"col1": [1, 2], "geometry": s1.copy(), "geom3": s5.copy(), "geom2": s4.copy()},
+    crs=3857,
+)
+
+
+@pytest.mark.filterwarnings("ignore::UserWarning")
+def test_geoseries():
+    assert_geoseries_equal(s1, s2)
+    assert_geoseries_equal(s1, s3, check_series_type=False, check_dtype=False)
+    assert_geoseries_equal(s3, s2, check_series_type=False, check_dtype=False)
+    assert_geoseries_equal(s1, s4, check_series_type=False)
+
+    with pytest.raises(AssertionError) as error:
+        assert_geoseries_equal(s1, s2, check_less_precise=True)
+    assert "1 out of 2 geometries are not almost equal" in str(error.value)
+    assert "not almost equal: [0]" in str(error.value)
+
+    with pytest.raises(AssertionError) as error:
+        assert_geoseries_equal(s2, s6, check_less_precise=False)
+    assert "1 out of 2 geometries are not equal" in str(error.value)
+    assert "not equal: [0]" in str(error.value)
+
+
+def test_geodataframe():
+    assert_geodataframe_equal(df1, df2)
+
+    with pytest.raises(AssertionError):
+        assert_geodataframe_equal(df1, df2, check_less_precise=True)
+
+    with pytest.raises(AssertionError):
+        assert_geodataframe_equal(df1, df2[["geometry", "col1"]])
+
+    assert_geodataframe_equal(df1, df2[["geometry", "col1"]], check_like=True)
+
+    df3 = df2.copy()
+    df3.loc[0, "col1"] = 10
+    with pytest.raises(AssertionError):
+        assert_geodataframe_equal(df1, df3)
+
+    assert_geodataframe_equal(df5, df4, check_like=True)
+    if HAS_PYPROJ:
+        df5["geom2"] = df5.geom2.set_crs(3857, allow_override=True)
+        with pytest.raises(AssertionError):
+            assert_geodataframe_equal(df5, df4, check_like=True)
+
+
+def test_equal_nans():
+    s = GeoSeries([Point(0, 0), np.nan])
+    assert_geoseries_equal(s, s.copy())
+    assert_geoseries_equal(s, s.copy(), check_less_precise=True)
+
+
+def test_no_crs():
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs=None)
+    df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs={})
+    assert_geodataframe_equal(df1, df2)
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+def test_ignore_crs_mismatch():
+    df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1.copy()}, crs="EPSG:4326")
+    df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:31370")
+
+    with pytest.raises(AssertionError):
+        assert_geodataframe_equal(df1, df2)
+
+    # assert that with `check_crs=False` the assert passes, and also does not
+    # generate any warning from comparing both geometries with different crs
+    with warnings.catch_warnings(record=True) as record:
+        assert_geodataframe_equal(df1, df2, check_crs=False)
+
+    assert len(record) == 0
+
+
+def test_almost_equal_but_not_equal():
+    s_origin = GeoSeries([Point(0, 0)])
+    s_almost_origin = GeoSeries([Point(0.0000001, 0)])
+    assert_geoseries_equal(s_origin, s_almost_origin, check_less_precise=True)
+    with pytest.raises(AssertionError):
+        assert_geoseries_equal(s_origin, s_almost_origin)
+
+
+def test_geodataframe_no_active_geometry_column():
+    def create_dataframe():
+        gdf = GeoDataFrame({"value": [1, 2], "geometry": [Point(1, 1), Point(2, 2)]})
+        gdf["geom2"] = GeoSeries([Point(3, 3), Point(4, 4)])
+        return gdf
+
+    # no active geometry column (None)
+    df1 = create_dataframe()
+    df1._geometry_column_name = None
+    df2 = create_dataframe()
+    df2._geometry_column_name = None
+    assert_geodataframe_equal(df1, df2)
+
+    # active geometry column ("geometry") not present
+    df1 = create_dataframe()[["value", "geom2"]]
+    df2 = create_dataframe()[["value", "geom2"]]
+    assert_geodataframe_equal(df1, df2)
+
+    df1 = GeoDataFrame(create_dataframe()[["value"]])
+    df2 = GeoDataFrame(create_dataframe()[["value"]])
+    assert_geodataframe_equal(df1, df2)
+
+
+def test_geodataframe_multiindex():
+    def create_dataframe():
+        gdf = DataFrame([[Point(0, 0), Point(1, 1)], [Point(2, 2), Point(3, 3)]])
+        gdf = GeoDataFrame(gdf.astype("geometry"))
+        gdf.columns = pd.MultiIndex.from_product([["geometry"], [0, 1]])
+        return gdf
+
+    df1 = create_dataframe()
+    df2 = create_dataframe()
+    assert_geodataframe_equal(df1, df2)
+
+    df1 = create_dataframe()
+    df1._geometry_column_name = None
+    df2 = create_dataframe()
+    df2._geometry_column_name = None
+    assert_geodataframe_equal(df1, df2)
@@ -0,0 +1,85 @@
+from pandas import DataFrame, Series
+
+from shapely.geometry import Point
+
+from geopandas import GeoDataFrame, GeoSeries
+
+
+class TestSeries:
+    def setup_method(self):
+        N = self.N = 10
+        r = 0.5
+        self.pts = GeoSeries([Point(x, y) for x, y in zip(range(N), range(N))])
+        self.polys = self.pts.buffer(r)
+
+    def test_slice(self):
+        assert type(self.pts[:2]) is GeoSeries
+        assert type(self.pts[::2]) is GeoSeries
+        assert type(self.polys[:2]) is GeoSeries
+
+    def test_head(self):
+        assert type(self.pts.head()) is GeoSeries
+
+    def test_tail(self):
+        assert type(self.pts.tail()) is GeoSeries
+
+    def test_sort_index(self):
+        assert type(self.pts.sort_index()) is GeoSeries
+
+    def test_loc(self):
+        assert type(self.pts.loc[5:]) is GeoSeries
+
+    def test_iloc(self):
+        assert type(self.pts.iloc[5:]) is GeoSeries
+
+    def test_fancy(self):
+        idx = (self.pts.index.to_series() % 2).astype(bool)
+        assert type(self.pts[idx]) is GeoSeries
+
+    def test_take(self):
+        assert type(self.pts.take(list(range(0, self.N, 2)))) is GeoSeries
+
+    def test_groupby(self):
+        for f, s in self.pts.groupby(lambda x: x % 2):
+            assert type(s) is GeoSeries
+
+
+class TestDataFrame:
+    def setup_method(self):
+        N = 10
+        self.df = GeoDataFrame(
+            [
+                {"geometry": Point(x, y), "value1": x + y, "value2": x * y}
+                for x, y in zip(range(N), range(N))
+            ]
+        )
+
+    def test_geometry(self):
+        assert type(self.df.geometry) is GeoSeries
+        # still GeoSeries if different name
+        df2 = GeoDataFrame(
+            {
+                "coords": [Point(x, y) for x, y in zip(range(5), range(5))],
+                "nums": range(5),
+            },
+            geometry="coords",
+        )
+        assert type(df2.geometry) is GeoSeries
+        assert type(df2["coords"]) is GeoSeries
+
+    def test_nongeometry(self):
+        assert type(self.df["value1"]) is Series
+
+    def test_geometry_multiple(self):
+        assert type(self.df[["geometry", "value1"]]) is GeoDataFrame
+
+    def test_nongeometry_multiple(self):
+        assert type(self.df[["value1", "value2"]]) is DataFrame
+
+    def test_slice(self):
+        assert type(self.df[:2]) is GeoDataFrame
+        assert type(self.df[::2]) is GeoDataFrame
+
+    def test_fancy(self):
+        idx = (self.df.index.to_series() % 2).astype(bool)
+        assert type(self.df[idx]) is GeoDataFrame
@@ -0,0 +1,151 @@
+import os.path
+
+from pandas import Series
+
+from geopandas import GeoDataFrame
+
+from geopandas.testing import (  # noqa: F401
+    assert_geoseries_equal,
+    geom_almost_equals,
+    geom_equals,
+)
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+PACKAGE_DIR = os.path.dirname(os.path.dirname(HERE))
+
+_TEST_DATA_DIR = os.path.join(PACKAGE_DIR, "geopandas", "tests", "data")
+_NYBB = "zip://" + os.path.join(_TEST_DATA_DIR, "nybb_16a.zip")
+_NATURALEARTH_CITIES = os.path.join(
+    _TEST_DATA_DIR, "naturalearth_cities", "naturalearth_cities.shp"
+)
+_NATURALEARTH_LOWRES = os.path.join(
+    _TEST_DATA_DIR, "naturalearth_lowres", "naturalearth_lowres.shp"
+)
+
+
+# mock not used here, but the import from here is used in other modules
+try:
+    from unittest import mock
+except ImportError:
+    import mock  # noqa: F401
+
+
+def validate_boro_df(df, case_sensitive=False):
+    """Tests a GeoDataFrame that has been read in from the nybb dataset."""
+    assert isinstance(df, GeoDataFrame)
+    # Make sure all the columns are there and the geometries
+    # were properly loaded as MultiPolygons
+    assert len(df) == 5
+    columns = ("BoroCode", "BoroName", "Shape_Leng", "Shape_Area")
+    if case_sensitive:
+        for col in columns:
+            assert col in df.columns
+    else:
+        for col in columns:
+            assert col.lower() in (dfcol.lower() for dfcol in df.columns)
+    assert Series(df.geometry.geom_type).dropna().eq("MultiPolygon").all()
+
+
+def get_srid(df):
+    """Return srid from `df.crs`."""
+    if df.crs is not None:
+        return df.crs.to_epsg() or 0
+    return 0
+
+
+def create_spatialite(con, df):
+    """
+    Return a SpatiaLite connection containing the nybb table.
+
+    Parameters
+    ----------
+    `con`: ``sqlite3.Connection``
+    `df`: ``GeoDataFrame``
+    """
+
+    with con:
+        geom_col = df.geometry.name
+        srid = get_srid(df)
+        con.execute(
+            "CREATE TABLE IF NOT EXISTS nybb "
+            "( ogc_fid INTEGER PRIMARY KEY"
+            ", borocode INTEGER"
+            ", boroname TEXT"
+            ", shape_leng REAL"
+            ", shape_area REAL"
+            ")"
+        )
+        con.execute(
+            "SELECT AddGeometryColumn(?, ?, ?, ?)",
+            ("nybb", geom_col, srid, df.geom_type.dropna().iat[0].upper()),
+        )
+        con.execute("SELECT CreateSpatialIndex(?, ?)", ("nybb", geom_col))
+        sql_row = "INSERT INTO nybb VALUES(?, ?, ?, ?, ?, GeomFromText(?, ?))"
+        con.executemany(
+            sql_row,
+            (
+                (
+                    None,
+                    row.BoroCode,
+                    row.BoroName,
+                    row.Shape_Leng,
+                    row.Shape_Area,
+                    row.geometry.wkt if row.geometry else None,
+                    srid,
+                )
+                for row in df.itertuples(index=False)
+            ),
+        )
+
+
+def create_postgis(con, df, srid=None, geom_col="geom"):
+    """
+    Create a nybb table in the test_geopandas PostGIS database.
+    Returns a boolean indicating whether the database table was successfully
+    created
+    """
+    # Try to create the database, skip the db tests if something goes
+    # wrong
+    # If you'd like these tests to run, create a database called
+    # 'test_geopandas' and enable postgis in it:
+    # > createdb test_geopandas
+    # > psql -c "CREATE EXTENSION postgis" -d test_geopandas
+    if srid is not None:
+        geom_schema = "geometry(MULTIPOLYGON, {})".format(srid)
+        geom_insert = "ST_SetSRID(ST_GeometryFromText(%s), {})".format(srid)
+    else:
+        geom_schema = "geometry"
+        geom_insert = "ST_GeometryFromText(%s)"
+    try:
+        cursor = con.cursor()
+        cursor.execute("DROP TABLE IF EXISTS nybb;")
+
+        sql = """CREATE TABLE nybb (
+            {geom_col}   {geom_schema},
+            borocode     integer,
+            boroname     varchar(40),
+            shape_leng   float,
+            shape_area   float
+            );""".format(
+            geom_col=geom_col, geom_schema=geom_schema
+        )
+        cursor.execute(sql)
+
+        for i, row in df.iterrows():
+            sql = """INSERT INTO nybb VALUES ({}, %s, %s, %s, %s
+            );""".format(
+                geom_insert
+            )
+            cursor.execute(
+                sql,
+                (
+                    row["geometry"].wkt,
+                    row["BoroCode"],
+                    row["BoroName"],
+                    row["Shape_Leng"],
+                    row["Shape_Area"],
+                ),
+            )
+    finally:
+        cursor.close()
+        con.commit()
@@ -0,0 +1,15 @@
+from .clip import clip
+from .geocoding import geocode, reverse_geocode
+from .overlay import overlay
+from .sjoin import sjoin, sjoin_nearest
+from .util import collect
+
+__all__ = [
+    "collect",
+    "geocode",
+    "overlay",
+    "reverse_geocode",
+    "sjoin",
+    "sjoin_nearest",
+    "clip",
+]
@@ -0,0 +1,84 @@
+from warnings import warn
+
+import numpy
+
+from shapely.geometry import MultiPoint
+
+from geopandas.array import from_shapely, points_from_xy
+from geopandas.geoseries import GeoSeries
+
+
+def uniform(geom, size, rng=None):
+    """
+
+    Sample uniformly at random from a geometry.
+
+    For polygons, this samples uniformly within the area of the polygon. For lines,
+    this samples uniformly along the length of the linestring. For multi-part
+    geometries, the weights of each part are selected according to their relevant
+    attribute (area for Polygons, length for LineStrings), and then points are
+    sampled from each part uniformly.
+
+    Any other geometry type (e.g. Point, GeometryCollection) are ignored, and an
+    empty MultiPoint geometry is returned.
+
+    Parameters
+    ----------
+    geom : any shapely.geometry.BaseGeometry type
+        the shape that describes the area in which to sample.
+
+    size : integer
+        an integer denoting how many points to sample
+
+    Returns
+    -------
+    shapely.MultiPoint geometry containing the sampled points
+
+    Examples
+    --------
+    >>> from shapely.geometry import box
+    >>> square = box(0,0,1,1)
+    >>> uniform(square, size=102) # doctest: +SKIP
+    """
+    generator = numpy.random.default_rng(seed=rng)
+
+    if geom is None or geom.is_empty:
+        return MultiPoint()
+
+    if geom.geom_type in ("Polygon", "MultiPolygon"):
+        return _uniform_polygon(geom, size=size, generator=generator)
+
+    if geom.geom_type in ("LineString", "MultiLineString"):
+        return _uniform_line(geom, size=size, generator=generator)
+
+    warn(
+        f"Sampling is not supported for {geom.geom_type} geometry type.",
+        UserWarning,
+        stacklevel=8,
+    )
+    return MultiPoint()
+
+
+def _uniform_line(geom, size, generator):
+    """
+    Sample points from an input shapely linestring
+    """
+
+    fracs = generator.uniform(size=size)
+    return from_shapely(geom.interpolate(fracs, normalized=True)).union_all()
+
+
+def _uniform_polygon(geom, size, generator):
+    """
+    Sample uniformly from within a polygon using batched sampling.
+    """
+    xmin, ymin, xmax, ymax = geom.bounds
+    candidates = []
+    while len(candidates) < size:
+        batch = points_from_xy(
+            x=generator.uniform(xmin, xmax, size=size),
+            y=generator.uniform(ymin, ymax, size=size),
+        )
+        valid_samples = batch[batch.sindex.query(geom, predicate="contains")]
+        candidates.extend(valid_samples)
+    return GeoSeries(candidates[:size]).union_all()
@@ -0,0 +1,169 @@
+import importlib
+import platform
+import sys
+
+
+def _get_sys_info():
+    """System information
+
+    Returns
+    -------
+    sys_info : dict
+        system and Python version information
+    """
+    python = sys.version.replace("\n", " ")
+
+    blob = [
+        ("python", python),
+        ("executable", sys.executable),
+        ("machine", platform.platform()),
+    ]
+
+    return dict(blob)
+
+
+def _get_C_info():
+    """Information on system PROJ, GDAL, GEOS
+    Returns
+    -------
+    c_info: dict
+        system PROJ information
+    """
+    try:
+        import pyproj
+
+        proj_version = pyproj.proj_version_str
+    except Exception:
+        proj_version = None
+    try:
+        import pyproj
+
+        proj_dir = pyproj.datadir.get_data_dir()
+    except Exception:
+        proj_dir = None
+
+    try:
+        import shapely._buildcfg
+
+        geos_version = "{}.{}.{}".format(*shapely._buildcfg.geos_version)
+        geos_dir = shapely._buildcfg.geos_library_path
+    except Exception:
+        try:
+            from shapely import geos_version_string
+
+            geos_version = geos_version_string
+            geos_dir = None
+        except Exception:
+            geos_version = None
+            geos_dir = None
+
+    try:
+        import pyogrio
+
+        gdal_version = pyogrio.__gdal_version_string__
+        gdal_dir = pyogrio.get_gdal_data_path()
+    except Exception:
+        gdal_version = None
+        gdal_dir = None
+
+    if gdal_version is None:
+        try:
+            import fiona
+
+            gdal_version = fiona.env.get_gdal_release_name()
+        except Exception:
+            gdal_version = None
+        try:
+            import fiona
+
+            gdal_dir = fiona.env.GDALDataFinder().search()
+        except Exception:
+            gdal_dir = None
+
+    blob = [
+        ("GEOS", geos_version),
+        ("GEOS lib", geos_dir),
+        ("GDAL", gdal_version),
+        ("GDAL data dir", gdal_dir),
+        ("PROJ", proj_version),
+        ("PROJ data dir", proj_dir),
+    ]
+
+    return dict(blob)
+
+
+def _get_deps_info():
+    """Overview of the installed version of main dependencies
+
+    Returns
+    -------
+    deps_info: dict
+        version information on relevant Python libraries
+    """
+    deps = [
+        "geopandas",
+        # required deps
+        "numpy",
+        "pandas",
+        "pyproj",
+        "shapely",
+        # optional deps
+        "pyogrio",
+        "geoalchemy2",
+        "geopy",
+        "matplotlib",
+        "mapclassify",
+        "fiona",
+        "psycopg",
+        "psycopg2",
+        "pyarrow",
+    ]
+
+    def get_version(module):
+        return module.__version__
+
+    deps_info = {}
+
+    for modname in deps:
+        try:
+            if modname in sys.modules:
+                mod = sys.modules[modname]
+            else:
+                mod = importlib.import_module(modname)
+            ver = get_version(mod)
+            deps_info[modname] = ver
+        except Exception:
+            deps_info[modname] = None
+
+    return deps_info
+
+
+def show_versions():
+    """
+    Print system information and installed module versions.
+
+    Examples
+    --------
+
+    ::
+
+        $ python -c "import geopandas; geopandas.show_versions()"
+    """
+    sys_info = _get_sys_info()
+    deps_info = _get_deps_info()
+    proj_info = _get_C_info()
+
+    maxlen = max(len(x) for x in deps_info)
+    tpl = "{{k:<{maxlen}}}: {{stat}}".format(maxlen=maxlen)
+    print("\nSYSTEM INFO")
+    print("-----------")
+    for k, stat in sys_info.items():
+        print(tpl.format(k=k, stat=stat))
+    print("\nGEOS, GDAL, PROJ INFO")
+    print("---------------------")
+    for k, stat in proj_info.items():
+        print(tpl.format(k=k, stat=stat))
+    print("\nPYTHON DEPENDENCIES")
+    print("-------------------")
+    for k, stat in deps_info.items():
+        print(tpl.format(k=k, stat=stat))
@@ -0,0 +1,257 @@
+"""
+geopandas.clip
+==============
+
+A module to clip vector data using GeoPandas.
+
+"""
+
+import warnings
+
+import numpy as np
+import pandas.api.types
+
+from shapely.geometry import MultiPolygon, Polygon, box
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def _mask_is_list_like_rectangle(mask):
+    return pandas.api.types.is_list_like(mask) and not isinstance(
+        mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)
+    )
+
+
+def _clip_gdf_with_mask(gdf, mask, sort=False):
+    """Clip geometry to the polygon/rectangle extent.
+
+    Clip an input GeoDataFrame to the polygon extent of the polygon
+    parameter.
+
+    Parameters
+    ----------
+    gdf : GeoDataFrame, GeoSeries
+        Dataframe to clip.
+
+    mask : (Multi)Polygon, list-like
+        Reference polygon/rectangle for clipping.
+
+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.
+
+    Returns
+    -------
+    GeoDataFrame
+        The returned GeoDataFrame is a clipped subset of gdf
+        that intersects with polygon/rectangle.
+    """
+    clipping_by_rectangle = _mask_is_list_like_rectangle(mask)
+    if clipping_by_rectangle:
+        intersection_polygon = box(*mask)
+    else:
+        intersection_polygon = mask
+
+    gdf_sub = gdf.iloc[
+        gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
+    ]
+
+    # For performance reasons points don't need to be intersected with poly
+    non_point_mask = gdf_sub.geom_type != "Point"
+
+    if not non_point_mask.any():
+        # only points, directly return
+        return gdf_sub
+
+    # Clip the data with the polygon
+    if isinstance(gdf_sub, GeoDataFrame):
+        clipped = gdf_sub.copy()
+        if clipping_by_rectangle:
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
+            )
+        else:
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].intersection(mask)
+            )
+    else:
+        # GeoSeries
+        clipped = gdf_sub.copy()
+        if clipping_by_rectangle:
+            clipped[non_point_mask] = gdf_sub.values[non_point_mask].clip_by_rect(*mask)
+        else:
+            clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(mask)
+
+    if clipping_by_rectangle:
+        # clip_by_rect might return empty geometry collections in edge cases
+        clipped = clipped[~clipped.is_empty]
+    return clipped
+
+
+def clip(gdf, mask, keep_geom_type=False, sort=False):
+    """Clip points, lines, or polygon geometries to the mask extent.
+
+    Both layers must be in the same Coordinate Reference System (CRS).
+    The ``gdf`` will be clipped to the full extent of the clip object.
+
+    If there are multiple polygons in mask, data from ``gdf`` will be
+    clipped to the total boundary of all polygons in mask.
+
+    If the ``mask`` is list-like with four elements ``(minx, miny, maxx, maxy)``, a
+    faster rectangle clipping algorithm will be used. Note that this can lead to
+    slightly different results in edge cases, e.g. if a line would be reduced to a
+    point, this point might not be returned.
+    The geometry is clipped in a fast but possibly dirty way. The output is not
+    guaranteed to be valid. No exceptions will be raised for topological errors.
+
+    Parameters
+    ----------
+    gdf : GeoDataFrame or GeoSeries
+        Vector layer (point, line, polygon) to be clipped to mask.
+    mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like
+        Polygon vector layer used to clip ``gdf``.
+        The mask's geometry is dissolved into one geometric feature
+        and intersected with ``gdf``.
+        If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``,
+        ``clip`` will use a faster rectangle clipping (:meth:`~GeoSeries.clip_by_rect`),
+        possibly leading to slightly different results.
+    keep_geom_type : boolean, default False
+        If True, return only geometries of original type in case of intersection
+        resulting in multiple geometry types or GeometryCollections.
+        If False, return all resulting geometries (potentially mixed-types).
+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.
+
+    Returns
+    -------
+    GeoDataFrame or GeoSeries
+         Vector data (points, lines, polygons) from ``gdf`` clipped to
+         polygon boundary from mask.
+
+    See also
+    --------
+    GeoDataFrame.clip : equivalent GeoDataFrame method
+    GeoSeries.clip : equivalent GeoSeries method
+
+    Examples
+    --------
+    Clip points (grocery stores) with polygons (the Near West Side community):
+
+    >>> import geodatasets
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... )
+    >>> near_west_side = chicago[chicago["community"] == "NEAR WEST SIDE"]
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... ).to_crs(chicago.crs)
+    >>> groceries.shape
+    (148, 8)
+
+    >>> nws_groceries = geopandas.clip(groceries, near_west_side)
+    >>> nws_groceries.shape
+    (7, 8)
+    """
+    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
+        raise TypeError(
+            "'gdf' should be GeoDataFrame or GeoSeries, got {}".format(type(gdf))
+        )
+
+    mask_is_list_like = _mask_is_list_like_rectangle(mask)
+    if (
+        not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon))
+        and not mask_is_list_like
+    ):
+        raise TypeError(
+            "'mask' should be GeoDataFrame, GeoSeries,"
+            f"(Multi)Polygon or list-like, got {type(mask)}"
+        )
+
+    if mask_is_list_like and len(mask) != 4:
+        raise TypeError(
+            "If 'mask' is list-like, it must have four values (minx, miny, maxx, maxy)"
+        )
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        if not _check_crs(gdf, mask):
+            _crs_mismatch_warn(gdf, mask, stacklevel=3)
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        box_mask = mask.total_bounds
+    elif mask_is_list_like:
+        box_mask = mask
+    else:
+        # Avoid empty tuple returned by .bounds when geometry is empty. A tuple of
+        # all nan values is consistent with the behavior of
+        # {GeoSeries, GeoDataFrame}.total_bounds for empty geometries.
+        # TODO(shapely) can simpely use mask.bounds once relying on Shapely 2.0
+        box_mask = mask.bounds if not mask.is_empty else (np.nan,) * 4
+    box_gdf = gdf.total_bounds
+    if not (
+        ((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2]))
+        and ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3]))
+    ):
+        return gdf.iloc[:0]
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        combined_mask = mask.geometry.union_all()
+    else:
+        combined_mask = mask
+
+    clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)
+
+    if keep_geom_type:
+        geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
+        geomcoll_orig = (gdf.geom_type == "GeometryCollection").any()
+
+        new_collection = geomcoll_concat and not geomcoll_orig
+
+        if geomcoll_orig:
+            warnings.warn(
+                "keep_geom_type can not be called on a "
+                "GeoDataFrame with GeometryCollection.",
+                stacklevel=2,
+            )
+        else:
+            polys = ["Polygon", "MultiPolygon"]
+            lines = ["LineString", "MultiLineString", "LinearRing"]
+            points = ["Point", "MultiPoint"]
+
+            # Check that the gdf for multiple geom types (points, lines and/or polys)
+            orig_types_total = sum(
+                [
+                    gdf.geom_type.isin(polys).any(),
+                    gdf.geom_type.isin(lines).any(),
+                    gdf.geom_type.isin(points).any(),
+                ]
+            )
+
+            # Check how many geometry types are in the clipped GeoDataFrame
+            clip_types_total = sum(
+                [
+                    clipped.geom_type.isin(polys).any(),
+                    clipped.geom_type.isin(lines).any(),
+                    clipped.geom_type.isin(points).any(),
+                ]
+            )
+
+            # Check there aren't any new geom types in the clipped GeoDataFrame
+            more_types = orig_types_total < clip_types_total
+
+            if orig_types_total > 1:
+                warnings.warn(
+                    "keep_geom_type can not be called on a mixed type GeoDataFrame.",
+                    stacklevel=2,
+                )
+            elif new_collection or more_types:
+                orig_type = gdf.geom_type.iloc[0]
+                if new_collection:
+                    clipped = clipped.explode(index_parts=False)
+                if orig_type in polys:
+                    clipped = clipped.loc[clipped.geom_type.isin(polys)]
+                elif orig_type in lines:
+                    clipped = clipped.loc[clipped.geom_type.isin(lines)]
+
+    return clipped
@@ -0,0 +1,184 @@
+import time
+from collections import defaultdict
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+
+
+def _get_throttle_time(provider):
+    """
+    Amount of time to wait between requests to a geocoding API, for providers
+    that specify rate limits in their terms of service.
+    """
+    import geopy.geocoders
+
+    # https://operations.osmfoundation.org/policies/nominatim/
+    if provider == geopy.geocoders.Nominatim:
+        return 1
+    else:
+        return 0
+
+
+def geocode(strings, provider=None, **kwargs):
+    """
+    Geocode a set of strings and get a GeoDataFrame of the resulting points.
+
+    Parameters
+    ----------
+    strings : list or Series of addresses to geocode
+    provider : str or geopy.geocoder
+        Specifies geocoding service to use. If none is provided,
+        will use 'photon' (see the Photon's terms of service at:
+        https://photon.komoot.io).
+
+        Either the string name used by geopy (as specified in
+        geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
+        (e.g., geopy.geocoders.Photon) may be used.
+
+        Some providers require additional arguments such as access keys
+        See each geocoder's specific parameters in geopy.geocoders
+
+    Notes
+    -----
+    Ensure proper use of the results by consulting the Terms of Service for
+    your provider.
+
+    Geocoding requires geopy. Install it using 'pip install geopy'. See also
+    https://github.com/geopy/geopy
+
+    Examples
+    --------
+    >>> df = geopandas.tools.geocode(  # doctest: +SKIP
+    ...         ["boston, ma", "1600 pennsylvania ave. washington, dc"]
+    ...     )
+    >>> df  # doctest: +SKIP
+                        geometry                                            address
+    0  POINT (-71.05863 42.35899)                          Boston, MA, United States
+    1  POINT (-77.03651 38.89766)  1600 Pennsylvania Ave NW, Washington, DC 20006...
+    """
+
+    if provider is None:
+        provider = "photon"
+    throttle_time = _get_throttle_time(provider)
+
+    return _query(strings, True, provider, throttle_time, **kwargs)
+
+
+def reverse_geocode(points, provider=None, **kwargs):
+    """
+    Reverse geocode a set of points and get a GeoDataFrame of the resulting
+    addresses.
+
+    The points
+
+    Parameters
+    ----------
+    points : list or Series of Shapely Point objects.
+        x coordinate is longitude
+        y coordinate is latitude
+    provider : str or geopy.geocoder (opt)
+        Specifies geocoding service to use. If none is provided,
+        will use 'photon' (see the Photon's terms of service at:
+        https://photon.komoot.io).
+
+        Either the string name used by geopy (as specified in
+        geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
+        (e.g., geopy.geocoders.Photon) may be used.
+
+        Some providers require additional arguments such as access keys
+        See each geocoder's specific parameters in geopy.geocoders
+
+    Notes
+    -----
+    Ensure proper use of the results by consulting the Terms of Service for
+    your provider.
+
+    Reverse geocoding requires geopy. Install it using 'pip install geopy'.
+    See also https://github.com/geopy/geopy
+
+    Examples
+    --------
+    >>> from shapely.geometry import Point
+    >>> df = geopandas.tools.reverse_geocode(  # doctest: +SKIP
+    ...     [Point(-71.0594869, 42.3584697), Point(-77.0365305, 38.8977332)]
+    ... )
+    >>> df  # doctest: +SKIP
+                         geometry                                            address
+    0  POINT (-71.05941 42.35837)       29 Court Sq, Boston, MA 02108, United States
+    1  POINT (-77.03641 38.89766)  1600 Pennsylvania Ave NW, Washington, DC 20006...
+    """
+
+    if provider is None:
+        provider = "photon"
+    throttle_time = _get_throttle_time(provider)
+
+    return _query(points, False, provider, throttle_time, **kwargs)
+
+
+def _query(data, forward, provider, throttle_time, **kwargs):
+    # generic wrapper for calls over lists to geopy Geocoders
+    from geopy.geocoders import get_geocoder_for_service
+    from geopy.geocoders.base import GeocoderQueryError
+
+    if forward:
+        if not isinstance(data, pd.Series):
+            data = pd.Series(data)
+    else:
+        if not isinstance(data, geopandas.GeoSeries):
+            data = geopandas.GeoSeries(data)
+
+    if isinstance(provider, str):
+        provider = get_geocoder_for_service(provider)
+
+    coder = provider(**kwargs)
+    results = {}
+    for i, s in data.items():
+        try:
+            if forward:
+                results[i] = coder.geocode(s)
+            else:
+                results[i] = coder.reverse((s.y, s.x), exactly_one=True)
+        except (GeocoderQueryError, ValueError):
+            results[i] = (None, None)
+        time.sleep(throttle_time)
+
+    df = _prepare_geocode_result(results)
+    return df
+
+
+def _prepare_geocode_result(results):
+    """
+    Helper function for the geocode function
+
+    Takes a dict where keys are index entries, values are tuples containing:
+    (address, (lat, lon))
+
+    """
+    # Prepare the data for the DataFrame as a dict of lists
+    d = defaultdict(list)
+    index = []
+
+    for i, s in results.items():
+        if s is None:
+            p = Point()
+            address = None
+
+        else:
+            address, loc = s
+
+            # loc is lat, lon and we want lon, lat
+            if loc is None:
+                p = Point()
+            else:
+                p = Point(loc[1], loc[0])
+
+        d["geometry"].append(p)
+        d["address"].append(address)
+        index.append(i)
+
+    df = geopandas.GeoDataFrame(d, index=index, crs="EPSG:4326")
+
+    return df
@@ -0,0 +1,188 @@
+import numpy as np
+
+
+def _hilbert_distance(geoms, total_bounds=None, level=16):
+    """
+    Calculate the distance along a Hilbert curve.
+
+    The distances are calculated for the midpoints of the geometries in the
+    GeoDataFrame.
+
+    Parameters
+    ----------
+    geoms : GeometryArray
+    total_bounds : 4-element array
+        Total bounds of geometries - array
+    level : int (1 - 16), default 16
+        Determines the precision of the curve (points on the curve will
+        have coordinates in the range [0, 2^level - 1]).
+
+    Returns
+    -------
+    np.ndarray
+        Array containing distances along the Hilbert curve
+
+    """
+    if geoms.is_empty.any() | geoms.isna().any():
+        raise ValueError(
+            "Hilbert distance cannot be computed on a GeoSeries with empty or "
+            "missing geometries.",
+        )
+    # Calculate bounds as numpy array
+    bounds = geoms.bounds
+
+    # Calculate discrete coords based on total bounds and bounds
+    x, y = _continuous_to_discrete_coords(bounds, level, total_bounds)
+    # Compute distance along hilbert curve
+    distances = _encode(level, x, y)
+
+    return distances
+
+
+def _continuous_to_discrete_coords(bounds, level, total_bounds):
+    """
+    Calculates mid points & ranges of geoms and returns
+    as discrete coords
+
+    Parameters
+    ----------
+
+    bounds : Bounds of each geometry - array
+
+    p : The number of iterations used in constructing the Hilbert curve
+
+    total_bounds : Total bounds of geometries - array
+
+    Returns
+    -------
+    Discrete two-dimensional numpy array
+    Two-dimensional array Array of hilbert distances for each geom
+
+    """
+    # Hilbert Side length
+    side_length = (2**level) - 1
+
+    # Calculate mid points for x and y bound coords - returns array
+    x_mids = (bounds[:, 0] + bounds[:, 2]) / 2.0
+    y_mids = (bounds[:, 1] + bounds[:, 3]) / 2.0
+
+    # Calculate x and y range of total bound coords - returns array
+    if total_bounds is None:
+        total_bounds = (
+            np.nanmin(x_mids),
+            np.nanmin(y_mids),
+            np.nanmax(x_mids),
+            np.nanmax(y_mids),
+        )
+
+    xmin, ymin, xmax, ymax = total_bounds
+
+    # Transform continuous value to discrete integer for each dimension
+    x_int = _continuous_to_discrete(x_mids, (xmin, xmax), side_length)
+    y_int = _continuous_to_discrete(y_mids, (ymin, ymax), side_length)
+
+    return x_int, y_int
+
+
+def _continuous_to_discrete(vals, val_range, n):
+    """
+    Convert a continuous one-dimensional array to discrete integer values
+    based their ranges
+
+    Parameters
+    ----------
+    vals : Array of continuous values
+
+    val_range : Tuple containing range of continuous values
+
+    n : Number of discrete values
+
+    Returns
+    -------
+    One-dimensional array of discrete ints
+
+    """
+    width = val_range[1] - val_range[0]
+    if width == 0:
+        return np.zeros_like(vals, dtype=np.uint32)
+    res = (vals - val_range[0]) * (n / width)
+
+    np.clip(res, 0, n, out=res)
+    return res.astype(np.uint32)
+
+
+# Fast Hilbert curve algorithm by http://threadlocalmutex.com/
+# From C++ https://github.com/rawrunprotected/hilbert_curves
+# (public domain)
+
+
+MAX_LEVEL = 16
+
+
+def _interleave(x):
+    x = (x | (x << 8)) & 0x00FF00FF
+    x = (x | (x << 4)) & 0x0F0F0F0F
+    x = (x | (x << 2)) & 0x33333333
+    x = (x | (x << 1)) & 0x55555555
+    return x
+
+
+def _encode(level, x, y):
+    x = np.asarray(x, dtype="uint32")
+    y = np.asarray(y, dtype="uint32")
+
+    if level > MAX_LEVEL:
+        raise ValueError("Level out of range")
+
+    x = x << (16 - level)
+    y = y << (16 - level)
+
+    # Initial prefix scan round, prime with x and y
+    a = x ^ y
+    b = 0xFFFF ^ a
+    c = 0xFFFF ^ (x | y)
+    d = x & (y ^ 0xFFFF)
+
+    A = a | (b >> 1)
+    B = (a >> 1) ^ a
+    C = ((c >> 1) ^ (b & (d >> 1))) ^ c
+    D = ((a & (c >> 1)) ^ (d >> 1)) ^ d
+
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    A = (a & (a >> 2)) ^ (b & (b >> 2))
+    B = (a & (b >> 2)) ^ (b & ((a ^ b) >> 2))
+    C ^= (a & (c >> 2)) ^ (b & (d >> 2))
+    D ^= (b & (c >> 2)) ^ ((a ^ b) & (d >> 2))
+
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    A = (a & (a >> 4)) ^ (b & (b >> 4))
+    B = (a & (b >> 4)) ^ (b & ((a ^ b) >> 4))
+    C ^= (a & (c >> 4)) ^ (b & (d >> 4))
+    D ^= (b & (c >> 4)) ^ ((a ^ b) & (d >> 4))
+
+    # Final round and projection
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    C ^= (a & (c >> 8)) ^ (b & (d >> 8))
+    D ^= (b & (c >> 8)) ^ ((a ^ b) & (d >> 8))
+
+    # Undo transformation prefix scan
+    a = C ^ (C >> 1)
+    b = D ^ (D >> 1)
+
+    # Recover index bits
+    i0 = x ^ y
+    i1 = b | (0xFFFF ^ (i0 | a))
+
+    return ((_interleave(i1) << 1) | _interleave(i0)) >> (32 - 2 * level)
@@ -0,0 +1,399 @@
+import warnings
+from functools import reduce
+
+import numpy as np
+import pandas as pd
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import PANDAS_GE_30
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def _ensure_geometry_column(df):
+    """
+    Helper function to ensure the geometry column is called 'geometry'.
+    If another column with that name exists, it will be dropped.
+    """
+    if not df._geometry_column_name == "geometry":
+        if PANDAS_GE_30:
+            if "geometry" in df.columns:
+                df = df.drop("geometry", axis=1)
+            df = df.rename_geometry("geometry")
+        else:
+            if "geometry" in df.columns:
+                df.drop("geometry", axis=1, inplace=True)
+            df.rename_geometry("geometry", inplace=True)
+    return df
+
+
+def _overlay_intersection(df1, df2):
+    """
+    Overlay Intersection operation used in overlay function
+    """
+    # Spatial Index to create intersections
+    idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
+    # Create pairs of geometries in both dataframes to be intersected
+    if idx1.size > 0 and idx2.size > 0:
+        left = df1.geometry.take(idx1)
+        left.reset_index(drop=True, inplace=True)
+        right = df2.geometry.take(idx2)
+        right.reset_index(drop=True, inplace=True)
+        intersections = left.intersection(right)
+        poly_ix = intersections.geom_type.isin(["Polygon", "MultiPolygon"])
+        intersections.loc[poly_ix] = intersections[poly_ix].make_valid()
+
+        # only keep actual intersecting geometries
+        pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2})
+        geom_intersect = intersections
+
+        # merge data for intersecting geometries
+        df1 = df1.reset_index(drop=True)
+        df2 = df2.reset_index(drop=True)
+        dfinter = pairs_intersect.merge(
+            df1.drop(df1._geometry_column_name, axis=1),
+            left_on="__idx1",
+            right_index=True,
+        )
+        dfinter = dfinter.merge(
+            df2.drop(df2._geometry_column_name, axis=1),
+            left_on="__idx2",
+            right_index=True,
+            suffixes=("_1", "_2"),
+        )
+
+        return GeoDataFrame(dfinter, geometry=geom_intersect, crs=df1.crs)
+    else:
+        result = df1.iloc[:0].merge(
+            df2.iloc[:0].drop(df2.geometry.name, axis=1),
+            left_index=True,
+            right_index=True,
+            suffixes=("_1", "_2"),
+        )
+        result["__idx1"] = np.nan
+        result["__idx2"] = np.nan
+        return result[
+            result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
+        ]
+
+
+def _overlay_difference(df1, df2):
+    """
+    Overlay Difference operation used in overlay function
+    """
+    # spatial index query to find intersections
+    idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
+    idx1_unique, idx1_unique_indices = np.unique(idx1, return_index=True)
+    idx2_split = np.split(idx2, idx1_unique_indices[1:])
+    sidx = [
+        idx2_split.pop(0) if idx in idx1_unique else []
+        for idx in range(df1.geometry.size)
+    ]
+    # Create differences
+    new_g = []
+    for geom, neighbours in zip(df1.geometry, sidx):
+        new = reduce(
+            lambda x, y: x.difference(y), [geom] + list(df2.geometry.iloc[neighbours])
+        )
+        new_g.append(new)
+    differences = GeoSeries(new_g, index=df1.index, crs=df1.crs)
+    poly_ix = differences.geom_type.isin(["Polygon", "MultiPolygon"])
+    differences.loc[poly_ix] = differences[poly_ix].make_valid()
+    geom_diff = differences[~differences.is_empty].copy()
+    dfdiff = df1[~differences.is_empty].copy()
+    dfdiff[dfdiff._geometry_column_name] = geom_diff
+    return dfdiff
+
+
+def _overlay_symmetric_diff(df1, df2):
+    """
+    Overlay Symmetric Difference operation used in overlay function
+    """
+    dfdiff1 = _overlay_difference(df1, df2)
+    dfdiff2 = _overlay_difference(df2, df1)
+    dfdiff1["__idx1"] = range(len(dfdiff1))
+    dfdiff2["__idx2"] = range(len(dfdiff2))
+    dfdiff1["__idx2"] = np.nan
+    dfdiff2["__idx1"] = np.nan
+    # ensure geometry name (otherwise merge goes wrong)
+    dfdiff1 = _ensure_geometry_column(dfdiff1)
+    dfdiff2 = _ensure_geometry_column(dfdiff2)
+    # combine both 'difference' dataframes
+    dfsym = dfdiff1.merge(
+        dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2")
+    )
+    geometry = dfsym.geometry_1.copy()
+    geometry.name = "geometry"
+    # https://github.com/pandas-dev/pandas/issues/26468 use loc for now
+    geometry.loc[dfsym.geometry_1.isnull()] = dfsym.loc[
+        dfsym.geometry_1.isnull(), "geometry_2"
+    ]
+    dfsym.drop(["geometry_1", "geometry_2"], axis=1, inplace=True)
+    dfsym.reset_index(drop=True, inplace=True)
+    dfsym = GeoDataFrame(dfsym, geometry=geometry, crs=df1.crs)
+    return dfsym
+
+
+def _overlay_union(df1, df2):
+    """
+    Overlay Union operation used in overlay function
+    """
+    dfinter = _overlay_intersection(df1, df2)
+    dfsym = _overlay_symmetric_diff(df1, df2)
+    dfunion = pd.concat([dfinter, dfsym], ignore_index=True, sort=False)
+    # keep geometry column last
+    columns = list(dfunion.columns)
+    columns.remove("geometry")
+    columns.append("geometry")
+    return dfunion.reindex(columns=columns)
+
+
+def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
+    """Perform spatial overlay between two GeoDataFrames.
+
+    Currently only supports data GeoDataFrames with uniform geometry types,
+    i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
+    combination of (Multi)LineString and LinearRing shapes.
+    Implements several methods that are all effectively subsets of the union.
+
+    See the User Guide page :doc:`../../user_guide/set_operations` for details.
+
+    Parameters
+    ----------
+    df1 : GeoDataFrame
+    df2 : GeoDataFrame
+    how : string
+        Method of spatial overlay: 'intersection', 'union',
+        'identity', 'symmetric_difference' or 'difference'.
+    keep_geom_type : bool
+        If True, return only geometries of the same geometry type as df1 has,
+        if False, return all resulting geometries. Default is None,
+        which will set keep_geom_type to True but warn upon dropping
+        geometries.
+    make_valid : bool, default True
+        If True, any invalid input geometries are corrected with a call to make_valid(),
+        if False, a `ValueError` is raised if any input geometries are invalid.
+
+    Returns
+    -------
+    df : GeoDataFrame
+        GeoDataFrame with new set of polygons and attributes
+        resulting from the overlay
+
+    Examples
+    --------
+    >>> from shapely.geometry import Polygon
+    >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
+    ...                               Polygon([(2,2), (4,2), (4,4), (2,4)])])
+    >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
+    ...                               Polygon([(3,3), (5,3), (5,5), (3,5)])])
+    >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
+    >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})
+
+    >>> geopandas.overlay(df1, df2, how='union')
+        df1_data  df2_data                                           geometry
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    5       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    6       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
+
+    >>> geopandas.overlay(df1, df2, how='intersection')
+       df1_data  df2_data                             geometry
+    0         1         1  POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1         2         1  POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2         2         2  POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+
+    >>> geopandas.overlay(df1, df2, how='symmetric_difference')
+        df1_data  df2_data                                           geometry
+    0       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    1       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    2       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    3       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
+
+    >>> geopandas.overlay(df1, df2, how='difference')
+                                                geometry  df1_data
+    0      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))         1
+    1  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...         2
+
+    >>> geopandas.overlay(df1, df2, how='identity')
+       df1_data  df2_data                                           geometry
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+
+    See also
+    --------
+    sjoin : spatial join
+    GeoDataFrame.overlay : equivalent method
+
+    Notes
+    -----
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+    # Allowed operations
+    allowed_hows = [
+        "intersection",
+        "union",
+        "identity",
+        "symmetric_difference",
+        "difference",  # aka erase
+    ]
+    # Error Messages
+    if how not in allowed_hows:
+        raise ValueError(
+            "`how` was '{0}' but is expected to be in {1}".format(how, allowed_hows)
+        )
+
+    if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
+        raise NotImplementedError(
+            "overlay currently only implemented for GeoDataFrames"
+        )
+
+    if not _check_crs(df1, df2):
+        _crs_mismatch_warn(df1, df2, stacklevel=3)
+
+    if keep_geom_type is None:
+        keep_geom_type = True
+        keep_geom_type_warning = True
+    else:
+        keep_geom_type_warning = False
+
+    polys = ["Polygon", "MultiPolygon"]
+    lines = ["LineString", "MultiLineString", "LinearRing"]
+    points = ["Point", "MultiPoint"]
+    for i, df in enumerate([df1, df2]):
+        poly_check = df.geom_type.isin(polys).any()
+        lines_check = df.geom_type.isin(lines).any()
+        points_check = df.geom_type.isin(points).any()
+        if sum([poly_check, lines_check, points_check]) > 1:
+            raise NotImplementedError(
+                "df{} contains mixed geometry types.".format(i + 1)
+            )
+
+    if how == "intersection":
+        box_gdf1 = df1.total_bounds
+        box_gdf2 = df2.total_bounds
+
+        if not (
+            ((box_gdf1[0] <= box_gdf2[2]) and (box_gdf2[0] <= box_gdf1[2]))
+            and ((box_gdf1[1] <= box_gdf2[3]) and (box_gdf2[1] <= box_gdf1[3]))
+        ):
+            result = df1.iloc[:0].merge(
+                df2.iloc[:0].drop(df2.geometry.name, axis=1),
+                left_index=True,
+                right_index=True,
+                suffixes=("_1", "_2"),
+            )
+            return result[
+                result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
+            ]
+
+    # Computations
+    def _make_valid(df):
+        df = df.copy()
+        if df.geom_type.isin(polys).all():
+            mask = ~df.geometry.is_valid
+            col = df._geometry_column_name
+            if make_valid:
+                df.loc[mask, col] = df.loc[mask, col].make_valid()
+            elif mask.any():
+                raise ValueError(
+                    "You have passed make_valid=False along with "
+                    f"{mask.sum()} invalid input geometries. "
+                    "Use make_valid=True or make sure that all geometries "
+                    "are valid before using overlay."
+                )
+        return df
+
+    df1 = _make_valid(df1)
+    df2 = _make_valid(df2)
+
+    with warnings.catch_warnings():  # CRS checked above, suppress array-level warning
+        warnings.filterwarnings("ignore", message="CRS mismatch between the CRS")
+        if how == "difference":
+            result = _overlay_difference(df1, df2)
+        elif how == "intersection":
+            result = _overlay_intersection(df1, df2)
+        elif how == "symmetric_difference":
+            result = _overlay_symmetric_diff(df1, df2)
+        elif how == "union":
+            result = _overlay_union(df1, df2)
+        elif how == "identity":
+            dfunion = _overlay_union(df1, df2)
+            result = dfunion[dfunion["__idx1"].notnull()].copy()
+
+        if how in ["intersection", "symmetric_difference", "union", "identity"]:
+            result.drop(["__idx1", "__idx2"], axis=1, inplace=True)
+
+    if keep_geom_type:
+        geom_type = df1.geom_type.iloc[0]
+
+        # First we filter the geometry types inside GeometryCollections objects
+        # (e.g. GeometryCollection([polygon, point]) -> polygon)
+        # we do this separately on only the relevant rows, as this is an expensive
+        # operation (an expensive no-op for geometry types other than collections)
+        is_collection = result.geom_type == "GeometryCollection"
+        if is_collection.any():
+            geom_col = result._geometry_column_name
+            collections = result[[geom_col]][is_collection]
+
+            exploded = collections.reset_index(drop=True).explode(index_parts=True)
+            exploded = exploded.reset_index(level=0)
+
+            orig_num_geoms_exploded = exploded.shape[0]
+            if geom_type in polys:
+                exploded.loc[~exploded.geom_type.isin(polys), geom_col] = None
+            elif geom_type in lines:
+                exploded.loc[~exploded.geom_type.isin(lines), geom_col] = None
+            elif geom_type in points:
+                exploded.loc[~exploded.geom_type.isin(points), geom_col] = None
+            else:
+                raise TypeError(
+                    "`keep_geom_type` does not support {}.".format(geom_type)
+                )
+            num_dropped_collection = (
+                orig_num_geoms_exploded - exploded.geometry.isna().sum()
+            )
+
+            # level_0 created with above reset_index operation
+            # and represents the original geometry collections
+            # TODO avoiding dissolve to call union_all in this case could further
+            # improve performance (we only need to collect geometries in their
+            # respective Multi version)
+            dissolved = exploded.dissolve(by="level_0")
+            result.loc[is_collection, geom_col] = dissolved[geom_col].values
+        else:
+            num_dropped_collection = 0
+
+        # Now we filter all geometries (in theory we don't need to do this
+        # again for the rows handled above for GeometryCollections, but filtering
+        # them out is probably more expensive as simply including them when this
+        # is typically about only a few rows)
+        orig_num_geoms = result.shape[0]
+        if geom_type in polys:
+            result = result.loc[result.geom_type.isin(polys)]
+        elif geom_type in lines:
+            result = result.loc[result.geom_type.isin(lines)]
+        elif geom_type in points:
+            result = result.loc[result.geom_type.isin(points)]
+        else:
+            raise TypeError("`keep_geom_type` does not support {}.".format(geom_type))
+        num_dropped = orig_num_geoms - result.shape[0]
+
+        if (num_dropped > 0 or num_dropped_collection > 0) and keep_geom_type_warning:
+            warnings.warn(
+                "`keep_geom_type=True` in overlay resulted in {} dropped "
+                "geometries of different geometry types than df1 has. "
+                "Set `keep_geom_type=False` to retain all "
+                "geometries".format(num_dropped + num_dropped_collection),
+                UserWarning,
+                stacklevel=2,
+            )
+
+    result.reset_index(drop=True, inplace=True)
+    return result
@@ -0,0 +1,734 @@
+import warnings
+from functools import partial
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+
+from geopandas import GeoDataFrame
+from geopandas._compat import PANDAS_GE_30
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def sjoin(
+    left_df,
+    right_df,
+    how="inner",
+    predicate="intersects",
+    lsuffix="left",
+    rsuffix="right",
+    distance=None,
+    on_attribute=None,
+    **kwargs,
+):
+    """Spatial join of two GeoDataFrames.
+
+    See the User Guide page :doc:`../../user_guide/mergingdata` for details.
+
+
+    Parameters
+    ----------
+    left_df, right_df : GeoDataFrames
+    how : string, default 'inner'
+        The type of join:
+
+        * 'left': use keys from left_df; retain only left_df geometry column
+        * 'right': use keys from right_df; retain only right_df geometry column
+        * 'inner': use intersection of keys from both dfs; retain only
+          left_df geometry column
+    predicate : string, default 'intersects'
+        Binary predicate. Valid values are determined by the spatial index used.
+        You can check the valid values in left_df or right_df as
+        ``left_df.sindex.valid_query_predicates`` or
+        ``right_df.sindex.valid_query_predicates``
+        Replaces deprecated ``op`` parameter.
+    lsuffix : string, default 'left'
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string, default 'right'
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance : number or array_like, optional
+        Distance(s) around each input geometry within which to query the tree
+        for the 'dwithin' predicate. If array_like, must be
+        one-dimesional with length equal to length of left GeoDataFrame.
+        Required if ``predicate='dwithin'``.
+    on_attribute : string, list or tuple
+        Column name(s) to join on as an additional join restriction on top
+        of the spatial predicate. These must be found in both DataFrames.
+        If set, observations are joined only if the predicate applies
+        and values in specified columns match.
+
+    Examples
+    --------
+    >>> import geodatasets
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... )
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... ).to_crs(chicago.crs)
+
+    >>> chicago.head()  # doctest: +SKIP
+        ComAreaID  ...                                           geometry
+    0         35  ...  POLYGON ((-87.60914 41.84469, -87.60915 41.844...
+    1         36  ...  POLYGON ((-87.59215 41.81693, -87.59231 41.816...
+    2         37  ...  POLYGON ((-87.62880 41.80189, -87.62879 41.801...
+    3         38  ...  POLYGON ((-87.60671 41.81681, -87.60670 41.816...
+    4         39  ...  POLYGON ((-87.59215 41.81693, -87.59215 41.816...
+    [5 rows x 87 columns]
+
+    >>> groceries.head()  # doctest: +SKIP
+        OBJECTID     Ycoord  ...  Category                         geometry
+    0        16  41.973266  ...       NaN  MULTIPOINT (-87.65661 41.97321)
+    1        18  41.696367  ...       NaN  MULTIPOINT (-87.68136 41.69713)
+    2        22  41.868634  ...       NaN  MULTIPOINT (-87.63918 41.86847)
+    3        23  41.877590  ...       new  MULTIPOINT (-87.65495 41.87783)
+    4        27  41.737696  ...       NaN  MULTIPOINT (-87.62715 41.73623)
+    [5 rows x 8 columns]
+
+    >>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
+    >>> groceries_w_communities.head()  # doctest: +SKIP
+       OBJECTID       community                           geometry
+    0        16          UPTOWN  MULTIPOINT ((-87.65661 41.97321))
+    1        18     MORGAN PARK  MULTIPOINT ((-87.68136 41.69713))
+    2        22  NEAR WEST SIDE  MULTIPOINT ((-87.63918 41.86847))
+    3        23  NEAR WEST SIDE  MULTIPOINT ((-87.65495 41.87783))
+    4        27         CHATHAM  MULTIPOINT ((-87.62715 41.73623))
+    [5 rows x 95 columns]
+
+    See also
+    --------
+    overlay : overlay operation resulting in a new geometry
+    GeoDataFrame.sjoin : equivalent method
+
+    Notes
+    -----
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+    if kwargs:
+        first = next(iter(kwargs.keys()))
+        raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")
+
+    on_attribute = _maybe_make_list(on_attribute)
+
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),
+
+    indices = _geom_predicate_query(
+        left_df, right_df, predicate, distance, on_attribute=on_attribute
+    )
+
+    joined, _ = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        None,
+        how,
+        lsuffix,
+        rsuffix,
+        predicate,
+        on_attribute=on_attribute,
+    )
+
+    return joined
+
+
+def _maybe_make_list(obj):
+    if isinstance(obj, tuple):
+        return list(obj)
+    if obj is not None and not isinstance(obj, list):
+        return [obj]
+    return obj
+
+
+def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
+    """Checks the validity of join input parameters.
+
+    `how` must be one of the valid options.
+    `'index_'` concatenated with `lsuffix` or `rsuffix` must not already
+    exist as columns in the left or right data frames.
+
+    Parameters
+    ------------
+    left_df : GeoDataFrame
+    right_df : GeoData Frame
+    how : str, one of 'left', 'right', 'inner'
+        join type
+    lsuffix : str
+        left index suffix
+    rsuffix : str
+        right index suffix
+    on_attribute : list, default None
+        list of column names to merge on along with geometry
+    """
+    if not isinstance(left_df, GeoDataFrame):
+        raise ValueError(
+            "'left_df' should be GeoDataFrame, got {}".format(type(left_df))
+        )
+
+    if not isinstance(right_df, GeoDataFrame):
+        raise ValueError(
+            "'right_df' should be GeoDataFrame, got {}".format(type(right_df))
+        )
+
+    allowed_hows = ["left", "right", "inner"]
+    if how not in allowed_hows:
+        raise ValueError(
+            '`how` was "{}" but is expected to be in {}'.format(how, allowed_hows)
+        )
+
+    if not _check_crs(left_df, right_df):
+        _crs_mismatch_warn(left_df, right_df, stacklevel=4)
+
+    if on_attribute:
+        for attr in on_attribute:
+            if (attr not in left_df) and (attr not in right_df):
+                raise ValueError(
+                    f"Expected column {attr} is missing from both of the dataframes."
+                )
+            if attr not in left_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the left dataframe."
+                )
+            if attr not in right_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the right dataframe."
+                )
+            if attr in (left_df.geometry.name, right_df.geometry.name):
+                raise ValueError(
+                    "Active geometry column cannot be used as an input "
+                    "for on_attribute parameter."
+                )
+
+
+def _geom_predicate_query(left_df, right_df, predicate, distance, on_attribute=None):
+    """Compute geometric comparisons and get matching indices.
+
+    Parameters
+    ----------
+    left_df : GeoDataFrame
+    right_df : GeoDataFrame
+    predicate : string
+        Binary predicate to query.
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+
+
+    Returns
+    -------
+    DataFrame
+        DataFrame with matching indices in
+        columns named `_key_left` and `_key_right`.
+    """
+
+    original_predicate = predicate
+
+    if predicate == "within":
+        # within is implemented as the inverse of contains
+        # contains is a faster predicate
+        # see discussion at https://github.com/geopandas/geopandas/pull/1421
+        predicate = "contains"
+        sindex = left_df.sindex
+        input_geoms = right_df.geometry
+    else:
+        # all other predicates are symmetric
+        # keep them the same
+        sindex = right_df.sindex
+        input_geoms = left_df.geometry
+
+    if sindex:
+        l_idx, r_idx = sindex.query(
+            input_geoms, predicate=predicate, sort=False, distance=distance
+        )
+    else:
+        # when sindex is empty / has no valid geometries
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
+
+    if original_predicate == "within":
+        # within is implemented as the inverse of contains
+        # flip back the results
+        r_idx, l_idx = l_idx, r_idx
+        indexer = np.lexsort((r_idx, l_idx))
+        l_idx = l_idx[indexer]
+        r_idx = r_idx[indexer]
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), _ = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+
+    return l_idx, r_idx
+
+
+def _reset_index_with_suffix(df, suffix, other):
+    """
+    Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
+    column names.
+    """
+    index_original = df.index.names
+    if PANDAS_GE_30:
+        df_reset = df.reset_index()
+    else:
+        # we already made a copy of the dataframe in _frame_join before getting here
+        df_reset = df
+        df_reset.reset_index(inplace=True)
+    column_names = df_reset.columns.to_numpy(copy=True)
+    for i, label in enumerate(index_original):
+        # if the original label was None, add suffix to auto-generated name
+        if label is None:
+            new_label = column_names[i]
+            if "level" in new_label:
+                # reset_index of MultiIndex gives "level_i" names, preserve the "i"
+                lev = new_label.split("_")[1]
+                new_label = f"index_{suffix}{lev}"
+            else:
+                new_label = f"index_{suffix}"
+            # check new label will not be in other dataframe
+            if new_label in df.columns or new_label in other.columns:
+                raise ValueError(
+                    "'{0}' cannot be a column name in the frames being"
+                    " joined".format(new_label)
+                )
+            column_names[i] = new_label
+    return df_reset, pd.Index(column_names)
+
+
+def _process_column_names_with_suffix(
+    left: pd.Index, right: pd.Index, suffixes, left_df, right_df
+):
+    """
+    Add suffixes to overlapping labels (ignoring the geometry column).
+
+    This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
+    a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
+    """
+    to_rename = left.intersection(right)
+    if len(to_rename) == 0:
+        return left, right
+
+    lsuffix, rsuffix = suffixes
+
+    if not lsuffix and not rsuffix:
+        raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
+
+    def renamer(x, suffix, geometry):
+        if x in to_rename and x != geometry and suffix is not None:
+            return f"{x}_{suffix}"
+        return x
+
+    lrenamer = partial(
+        renamer,
+        suffix=lsuffix,
+        geometry=getattr(left_df, "_geometry_column_name", None),
+    )
+    rrenamer = partial(
+        renamer,
+        suffix=rsuffix,
+        geometry=getattr(right_df, "_geometry_column_name", None),
+    )
+
+    # TODO retain index name?
+    left_renamed = pd.Index([lrenamer(lab) for lab in left])
+    right_renamed = pd.Index([rrenamer(lab) for lab in right])
+
+    dups = []
+    if not left_renamed.is_unique:
+        # Only warn when duplicates are caused because of suffixes, already duplicated
+        # columns in origin should not warn
+        dups = left_renamed[(left_renamed.duplicated()) & (~left.duplicated())].tolist()
+    if not right_renamed.is_unique:
+        dups.extend(
+            right_renamed[(right_renamed.duplicated()) & (~right.duplicated())].tolist()
+        )
+    # TODO turn this into an error (pandas has done so as well)
+    if dups:
+        warnings.warn(
+            f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
+            f"result is deprecated and will raise a MergeError in a future version.",
+            FutureWarning,
+            stacklevel=4,
+        )
+
+    return left_renamed, right_renamed
+
+
+def _restore_index(joined, index_names, index_names_original):
+    """
+    Set back the the original index columns, and restoring their name as `None`
+    if they didn't have a name originally.
+    """
+    if PANDAS_GE_30:
+        joined = joined.set_index(list(index_names))
+    else:
+        joined.set_index(list(index_names), inplace=True)
+
+    # restore the fact that the index didn't have a name
+    joined_index_names = list(joined.index.names)
+    for i, label in enumerate(index_names_original):
+        if label is None:
+            joined_index_names[i] = None
+    joined.index.names = joined_index_names
+    return joined
+
+
+def _adjust_indexers(indices, distances, original_length, how, predicate):
+    """
+    The left/right indexers from the query represents an inner join.
+    For a left or right join, we need to adjust them to include the rows
+    that would not be present in an inner join.
+    """
+    # the indices represent an inner join, no adjustment needed
+    if how == "inner":
+        return indices, distances
+
+    l_idx, r_idx = indices
+
+    if how == "right":
+        # re-sort so it is sorted by the right indexer
+        indexer = np.lexsort((l_idx, r_idx))
+        l_idx, r_idx = l_idx[indexer], r_idx[indexer]
+        if distances is not None:
+            distances = distances[indexer]
+
+        # switch order
+        r_idx, l_idx = l_idx, r_idx
+
+    # determine which indices are missing and where they would need to be inserted
+    idx = np.arange(original_length)
+    l_idx_missing = idx[~np.isin(idx, l_idx)]
+    insert_idx = np.searchsorted(l_idx, l_idx_missing)
+    # for the left indexer, insert those missing indices
+    l_idx = np.insert(l_idx, insert_idx, l_idx_missing)
+    # for the right indexer, insert -1 -> to get missing values in pandas' reindexing
+    r_idx = np.insert(r_idx, insert_idx, -1)
+    # for the indices, already insert those missing values manually
+    if distances is not None:
+        distances = np.insert(distances, insert_idx, np.nan)
+
+    if how == "right":
+        # switch back
+        l_idx, r_idx = r_idx, l_idx
+
+    return (l_idx, r_idx), distances
+
+
+def _frame_join(
+    left_df,
+    right_df,
+    indices,
+    distances,
+    how,
+    lsuffix,
+    rsuffix,
+    predicate,
+    on_attribute=None,
+):
+    """Join the GeoDataFrames at the DataFrame level.
+
+    Parameters
+    ----------
+    left_df : GeoDataFrame
+    right_df : GeoDataFrame
+    indices : tuple of ndarray
+        Indices returned by the geometric join. Tuple with with integer
+        indices representing the matches from `left_df` and `right_df`
+        respectively.
+    distances : ndarray, optional
+        Passed trough and adapted based on the indices, if needed.
+    how : string
+        The type of join to use on the DataFrame level.
+    lsuffix : string
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+
+
+    Returns
+    -------
+    GeoDataFrame
+        Joined GeoDataFrame.
+    """
+    if on_attribute:  # avoid renaming or duplicating shared column
+        right_df = right_df.drop(on_attribute, axis=1)
+
+    if how in ("inner", "left"):
+        right_df = right_df.drop(right_df.geometry.name, axis=1)
+    else:  # how == 'right':
+        left_df = left_df.drop(left_df.geometry.name, axis=1)
+
+    left_df = left_df.copy(deep=False)
+    left_nlevels = left_df.index.nlevels
+    left_index_original = left_df.index.names
+    left_df, left_column_names = _reset_index_with_suffix(left_df, lsuffix, right_df)
+
+    right_df = right_df.copy(deep=False)
+    right_nlevels = right_df.index.nlevels
+    right_index_original = right_df.index.names
+    right_df, right_column_names = _reset_index_with_suffix(right_df, rsuffix, left_df)
+
+    # if conflicting names in left and right, add suffix
+    left_column_names, right_column_names = _process_column_names_with_suffix(
+        left_column_names,
+        right_column_names,
+        (lsuffix, rsuffix),
+        left_df,
+        right_df,
+    )
+    left_df.columns = left_column_names
+    right_df.columns = right_column_names
+    left_index = left_df.columns[:left_nlevels]
+    right_index = right_df.columns[:right_nlevels]
+
+    # perform join on the dataframes
+    original_length = len(right_df) if how == "right" else len(left_df)
+    (l_idx, r_idx), distances = _adjust_indexers(
+        indices, distances, original_length, how, predicate
+    )
+    # the `take` method doesn't allow introducing NaNs with -1 indices
+    # left = left_df.take(l_idx)
+    # therefore we are using the private _reindex_with_indexers as workaround
+    new_index = pd.RangeIndex(len(l_idx))
+    left = left_df._reindex_with_indexers({0: (new_index, l_idx)})
+    right = right_df._reindex_with_indexers({0: (new_index, r_idx)})
+    if PANDAS_GE_30:
+        kwargs = {}
+    else:
+        kwargs = dict(copy=False)
+    joined = pd.concat([left, right], axis=1, **kwargs)
+
+    if how in ("inner", "left"):
+        joined = _restore_index(joined, left_index, left_index_original)
+    else:  # how == 'right':
+        joined = joined.set_geometry(right_df.geometry.name)
+        joined = _restore_index(joined, right_index, right_index_original)
+
+    return joined, distances
+
+
+def _nearest_query(
+    left_df: GeoDataFrame,
+    right_df: GeoDataFrame,
+    max_distance: float,
+    how: str,
+    return_distance: bool,
+    exclusive: bool,
+    on_attribute: Optional[list] = None,
+):
+    # use the opposite of the join direction for the index
+    use_left_as_sindex = how == "right"
+    if use_left_as_sindex:
+        sindex = left_df.sindex
+        query = right_df.geometry
+    else:
+        sindex = right_df.sindex
+        query = left_df.geometry
+    if sindex:
+        res = sindex.nearest(
+            query,
+            return_all=True,
+            max_distance=max_distance,
+            return_distance=return_distance,
+            exclusive=exclusive,
+        )
+        if return_distance:
+            (input_idx, tree_idx), distances = res
+        else:
+            (input_idx, tree_idx) = res
+            distances = None
+        if use_left_as_sindex:
+            l_idx, r_idx = tree_idx, input_idx
+            sort_order = np.argsort(l_idx, kind="stable")
+            l_idx, r_idx = l_idx[sort_order], r_idx[sort_order]
+            if distances is not None:
+                distances = distances[sort_order]
+        else:
+            l_idx, r_idx = input_idx, tree_idx
+    else:
+        # when sindex is empty / has no valid geometries
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
+        if return_distance:
+            distances = np.array([], dtype=np.float64)
+        else:
+            distances = None
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), shared_attribute_rows = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+            distances = distances[shared_attribute_rows]
+
+    return (l_idx, r_idx), distances
+
+
+def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
+    """
+    Returns the indices for the left and right dataframe that share the same entry
+    in the attribute column. Also returns a Boolean `shared_attribute_rows` for rows
+    with the same entry.
+    """
+    shared_attribute_rows = (
+        left_df[attribute].iloc[l_idx].values == right_df[attribute].iloc[r_idx].values
+    )
+
+    l_idx = l_idx[shared_attribute_rows]
+    r_idx = r_idx[shared_attribute_rows]
+    return (l_idx, r_idx), shared_attribute_rows
+
+
+def sjoin_nearest(
+    left_df: GeoDataFrame,
+    right_df: GeoDataFrame,
+    how: str = "inner",
+    max_distance: Optional[float] = None,
+    lsuffix: str = "left",
+    rsuffix: str = "right",
+    distance_col: Optional[str] = None,
+    exclusive: bool = False,
+) -> GeoDataFrame:
+    """Spatial join of two GeoDataFrames based on the distance between their geometries.
+
+    Results will include multiple output records for a single input record
+    where there are multiple equidistant nearest or intersected neighbors.
+
+    Distance is calculated in CRS units and can be returned using the
+    `distance_col` parameter.
+
+    See the User Guide page
+    https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html
+    for more details.
+
+
+    Parameters
+    ----------
+    left_df, right_df : GeoDataFrames
+    how : string, default 'inner'
+        The type of join:
+
+        * 'left': use keys from left_df; retain only left_df geometry column
+        * 'right': use keys from right_df; retain only right_df geometry column
+        * 'inner': use intersection of keys from both dfs; retain only
+          left_df geometry column
+    max_distance : float, default None
+        Maximum distance within which to query for nearest geometry.
+        Must be greater than 0.
+        The max_distance used to search for nearest items in the tree may have a
+        significant impact on performance by reducing the number of input
+        geometries that are evaluated for nearest items in the tree.
+    lsuffix : string, default 'left'
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string, default 'right'
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance_col : string, default None
+        If set, save the distances computed between matching geometries under a
+        column of this name in the joined GeoDataFrame.
+    exclusive : bool, default False
+        If True, the nearest geometries that are equal to the input geometry
+        will not be returned, default False.
+
+    Examples
+    --------
+    >>> import geodatasets
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... )
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... ).to_crs(groceries.crs)
+
+    >>> chicago.head()  # doctest: +SKIP
+       ComAreaID  ...                                           geometry
+    0         35  ...  POLYGON ((-87.60914 41.84469, -87.60915 41.844...
+    1         36  ...  POLYGON ((-87.59215 41.81693, -87.59231 41.816...
+    2         37  ...  POLYGON ((-87.62880 41.80189, -87.62879 41.801...
+    3         38  ...  POLYGON ((-87.60671 41.81681, -87.60670 41.816...
+    4         39  ...  POLYGON ((-87.59215 41.81693, -87.59215 41.816...
+    [5 rows x 87 columns]
+
+    >>> groceries.head()  # doctest: +SKIP
+       OBJECTID     Ycoord  ...  Category                           geometry
+    0        16  41.973266  ...       NaN  MULTIPOINT ((-87.65661 41.97321))
+    1        18  41.696367  ...       NaN  MULTIPOINT ((-87.68136 41.69713))
+    2        22  41.868634  ...       NaN  MULTIPOINT ((-87.63918 41.86847))
+    3        23  41.877590  ...       new  MULTIPOINT ((-87.65495 41.87783))
+    4        27  41.737696  ...       NaN  MULTIPOINT ((-87.62715 41.73623))
+    [5 rows x 8 columns]
+
+    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago)
+    >>> groceries_w_communities[["Chain", "community", "geometry"]].head(2)
+                   Chain    community                                geometry
+    0     VIET HOA PLAZA       UPTOWN   MULTIPOINT ((1168268.672 1933554.35))
+    1  COUNTY FAIR FOODS  MORGAN PARK  MULTIPOINT ((1162302.618 1832900.224))
+
+
+    To include the distances:
+
+    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago, \
+distance_col="distances")
+    >>> groceries_w_communities[["Chain", "community", \
+"distances"]].head(2)
+                   Chain    community  distances
+    0     VIET HOA PLAZA       UPTOWN        0.0
+    1  COUNTY FAIR FOODS  MORGAN PARK        0.0
+
+    In the following example, we get multiple groceries for Uptown because all
+    results are equidistant (in this case zero because they intersect).
+    In fact, we get 4 results in total:
+
+    >>> chicago_w_groceries = geopandas.sjoin_nearest(groceries, chicago, \
+distance_col="distances", how="right")
+    >>> uptown_results = \
+chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
+    >>> uptown_results[["Chain", "community"]]
+                Chain community
+    30  VIET HOA PLAZA    UPTOWN
+    30      JEWEL OSCO    UPTOWN
+    30          TARGET    UPTOWN
+    30       Mariano's    UPTOWN
+
+    See also
+    --------
+    sjoin : binary predicate joins
+    GeoDataFrame.sjoin_nearest : equivalent method
+
+    Notes
+    -----
+    Since this join relies on distances, results will be inaccurate
+    if your geometries are in a geographic CRS.
+
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix)
+
+    left_df.geometry.values.check_geographic_crs(stacklevel=1)
+    right_df.geometry.values.check_geographic_crs(stacklevel=1)
+
+    return_distance = distance_col is not None
+
+    indices, distances = _nearest_query(
+        left_df,
+        right_df,
+        max_distance,
+        how,
+        return_distance,
+        exclusive,
+    )
+    joined, distances = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        distances,
+        how,
+        lsuffix,
+        rsuffix,
+        None,
+    )
+
+    if return_distance:
+        joined[distance_col] = distances
+
+    return joined
@@ -0,0 +1,484 @@
+"""Tests for the clip module."""
+
+import numpy as np
+import pandas as pd
+
+import shapely
+from shapely.geometry import (
+    GeometryCollection,
+    LinearRing,
+    LineString,
+    MultiPoint,
+    Point,
+    Polygon,
+    box,
+)
+
+import geopandas
+from geopandas import GeoDataFrame, GeoSeries, clip
+from geopandas._compat import HAS_PYPROJ
+from geopandas.tools.clip import _mask_is_list_like_rectangle
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_index_equal
+
+mask_variants_single_rectangle = [
+    "single_rectangle_gdf",
+    "single_rectangle_gdf_list_bounds",
+    "single_rectangle_gdf_tuple_bounds",
+    "single_rectangle_gdf_array_bounds",
+]
+mask_variants_large_rectangle = [
+    "larger_single_rectangle_gdf",
+    "larger_single_rectangle_gdf_bounds",
+]
+
+
+@pytest.fixture
+def point_gdf():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[2, 2], [3, 4], [9, 8], [-12, -15]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def point_gdf2():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def pointsoutside_nooverlap_gdf():
+    """Create a point GeoDataFrame. Its points are all outside the single
+    rectangle, and its bounds are outside the single rectangle's."""
+    pts = np.array([[5, 15], [15, 15], [15, 20]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def pointsoutside_overlap_gdf():
+    """Create a point GeoDataFrame. Its points are all outside the single
+    rectangle, and its bounds are overlapping the single rectangle's."""
+    pts = np.array([[5, 15], [15, 15], [15, 5]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def single_rectangle_gdf():
+    """Create a single rectangle for clipping."""
+    poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
+    gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
+    gdf["attr2"] = "site-boundary"
+    return gdf
+
+
+@pytest.fixture
+def single_rectangle_gdf_tuple_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return tuple(single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def single_rectangle_gdf_list_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return list(single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def single_rectangle_gdf_array_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return single_rectangle_gdf.total_bounds
+
+
+@pytest.fixture
+def larger_single_rectangle_gdf():
+    """Create a slightly larger rectangle for clipping.
+    The smaller single rectangle is used to test the edge case where slivers
+    are returned when you clip polygons. This fixture is larger which
+    eliminates the slivers in the clip return.
+    """
+    poly_inters = Polygon([(-5, -5), (-5, 15), (15, 15), (15, -5), (-5, -5)])
+    gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
+    gdf["attr2"] = ["study area"]
+    return gdf
+
+
+@pytest.fixture
+def larger_single_rectangle_gdf_bounds(larger_single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return tuple(larger_single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def buffered_locations(point_gdf):
+    """Buffer points to create a multi-polygon."""
+    buffered_locs = point_gdf
+    buffered_locs["geometry"] = buffered_locs.buffer(4)
+    buffered_locs["type"] = "plot"
+    return buffered_locs
+
+
+@pytest.fixture
+def donut_geometry(buffered_locations, single_rectangle_gdf):
+    """Make a geometry with a hole in the middle (a donut)."""
+    donut = geopandas.overlay(
+        buffered_locations, single_rectangle_gdf, how="symmetric_difference"
+    )
+    return donut
+
+
+@pytest.fixture
+def two_line_gdf():
+    """Create Line Objects For Testing"""
+    linea = LineString([(1, 1), (2, 2), (3, 2), (5, 3)])
+    lineb = LineString([(3, 4), (5, 7), (12, 2), (10, 5), (9, 7.5)])
+    gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def multi_poly_gdf(donut_geometry):
+    """Create a multi-polygon GeoDataFrame."""
+    multi_poly = donut_geometry.union_all()
+    out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
+    out_df["attr"] = ["pool"]
+    return out_df
+
+
+@pytest.fixture
+def multi_line(two_line_gdf):
+    """Create a multi-line GeoDataFrame.
+    This GDF has one multiline and one regular line."""
+    # Create a single and multi line object
+    multiline_feat = two_line_gdf.union_all()
+    linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
+    out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
+    out_df["attr"] = ["road", "stream"]
+    return out_df
+
+
+@pytest.fixture
+def multi_point(point_gdf):
+    """Create a multi-point GeoDataFrame."""
+    multi_point = point_gdf.union_all()
+    out_df = GeoDataFrame(
+        geometry=GeoSeries(
+            [multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
+        ),
+        crs="EPSG:3857",
+    )
+    out_df["attr"] = ["tree", "another tree", "shrub", "berries"]
+    return out_df
+
+
+@pytest.fixture
+def mixed_gdf():
+    """Create a Mixed Polygon and LineString For Testing"""
+    point = Point(2, 3)
+    line = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
+    ring = LinearRing([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    gdf = GeoDataFrame(
+        [1, 2, 3, 4], geometry=[point, poly, line, ring], crs="EPSG:3857"
+    )
+    return gdf
+
+
+@pytest.fixture
+def geomcol_gdf():
+    """Create a Mixed Polygon and LineString For Testing"""
+    point = Point(2, 3)
+    poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
+    coll = GeometryCollection([point, poly])
+    gdf = GeoDataFrame([1], geometry=[coll], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def sliver_line():
+    """Create a line that will create a point when clipped."""
+    linea = LineString([(10, 5), (13, 5), (15, 5)])
+    lineb = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
+    return gdf
+
+
+def test_not_gdf(single_rectangle_gdf):
+    """Non-GeoDataFrame inputs raise attribute errors."""
+    with pytest.raises(TypeError):
+        clip((2, 3), single_rectangle_gdf)
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, "foobar")
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, (1, 2, 3))
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, (1, 2, 3, 4, 5))
+
+
+def test_non_overlapping_geoms():
+    """Test that a bounding box returns empty if the extents don't overlap"""
+    unit_box = Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])
+    unit_gdf = GeoDataFrame([1], geometry=[unit_box], crs="EPSG:3857")
+    non_overlapping_gdf = unit_gdf.copy()
+    non_overlapping_gdf = non_overlapping_gdf.geometry.apply(
+        lambda x: shapely.affinity.translate(x, xoff=20)
+    )
+    out = clip(unit_gdf, non_overlapping_gdf)
+    assert_geodataframe_equal(out, unit_gdf.iloc[:0])
+    out2 = clip(unit_gdf.geometry, non_overlapping_gdf)
+    assert_geoseries_equal(out2, GeoSeries(crs=unit_gdf.crs))
+
+
+@pytest.mark.parametrize("mask_fixture_name", mask_variants_single_rectangle)
+class TestClipWithSingleRectangleGdf:
+    @pytest.fixture
+    def mask(self, mask_fixture_name, request):
+        return request.getfixturevalue(mask_fixture_name)
+
+    def test_returns_gdf(self, point_gdf, mask):
+        """Test that function returns a GeoDataFrame (or GDF-like) object."""
+        out = clip(point_gdf, mask)
+        assert isinstance(out, GeoDataFrame)
+
+    def test_returns_series(self, point_gdf, mask):
+        """Test that function returns a GeoSeries if GeoSeries is passed."""
+        out = clip(point_gdf.geometry, mask)
+        assert isinstance(out, GeoSeries)
+
+    def test_clip_points(self, point_gdf, mask):
+        """Test clipping a points GDF with a generic polygon geometry."""
+        clip_pts = clip(point_gdf, mask)
+        pts = np.array([[2, 2], [3, 4], [9, 8]])
+        exp = GeoDataFrame(
+            [Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857"
+        )
+        assert_geodataframe_equal(clip_pts, exp)
+
+    def test_clip_points_geom_col_rename(self, point_gdf, mask):
+        """Test clipping a points GDF with a generic polygon geometry."""
+        point_gdf_geom_col_rename = point_gdf.rename_geometry("geometry2")
+        clip_pts = clip(point_gdf_geom_col_rename, mask)
+        pts = np.array([[2, 2], [3, 4], [9, 8]])
+        exp = GeoDataFrame(
+            [Point(xy) for xy in pts],
+            columns=["geometry2"],
+            crs="EPSG:3857",
+            geometry="geometry2",
+        )
+        assert_geodataframe_equal(clip_pts, exp)
+
+    def test_clip_poly(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+        clipped_poly = clip(buffered_locations, mask)
+        assert len(clipped_poly.geometry) == 3
+        assert all(clipped_poly.geom_type == "Polygon")
+
+    def test_clip_poly_geom_col_rename(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+
+        poly_gdf_geom_col_rename = buffered_locations.rename_geometry("geometry2")
+        clipped_poly = clip(poly_gdf_geom_col_rename, mask)
+        assert len(clipped_poly.geometry) == 3
+        assert "geometry" not in clipped_poly.keys()
+        assert "geometry2" in clipped_poly.keys()
+
+    def test_clip_poly_series(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+        clipped_poly = clip(buffered_locations.geometry, mask)
+        assert len(clipped_poly) == 3
+        assert all(clipped_poly.geom_type == "Polygon")
+
+    def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask):
+        """Test a multi poly object where the return includes a sliver.
+        Also the bounds of the object should == the bounds of the clip object
+        if they fully overlap (as they do in these fixtures)."""
+        clipped = clip(multi_poly_gdf, mask, keep_geom_type=True)
+        expected_bounds = (
+            mask if _mask_is_list_like_rectangle(mask) else mask.total_bounds
+        )
+        assert np.array_equal(clipped.total_bounds, expected_bounds)
+        # Assert returned data is a not geometry collection
+        assert (clipped.geom_type.isin(["Polygon", "MultiPolygon"])).all()
+
+    def test_clip_multiline(self, multi_line, mask):
+        """Test that clipping a multiline feature with a poly returns expected
+        output."""
+        clipped = clip(multi_line, mask)
+        assert clipped.geom_type[0] == "MultiLineString"
+
+    def test_clip_multipoint(self, multi_point, mask):
+        """Clipping a multipoint feature with a polygon works as expected.
+        should return a geodataframe with a single multi point feature"""
+        clipped = clip(multi_point, mask)
+        assert clipped.geom_type[0] == "MultiPoint"
+        assert hasattr(clipped, "attr")
+        # All points should intersect the clip geom
+        assert len(clipped) == 2
+        clipped_mutltipoint = MultiPoint(
+            [
+                Point(2, 2),
+                Point(3, 4),
+                Point(9, 8),
+            ]
+        )
+        assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
+        shape_for_points = (
+            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
+        )
+        assert all(clipped.intersects(shape_for_points))
+
+    def test_clip_lines(self, two_line_gdf, mask):
+        """Test what happens when you give the clip_extent a line GDF."""
+        clip_line = clip(two_line_gdf, mask)
+        assert len(clip_line.geometry) == 2
+
+    def test_mixed_geom(self, mixed_gdf, mask):
+        """Test clipping a mixed GeoDataFrame"""
+        clipped = clip(mixed_gdf, mask)
+        assert (
+            clipped.geom_type[0] == "Point"
+            and clipped.geom_type[1] == "Polygon"
+            and clipped.geom_type[2] == "LineString"
+        )
+
+    def test_mixed_series(self, mixed_gdf, mask):
+        """Test clipping a mixed GeoSeries"""
+        clipped = clip(mixed_gdf.geometry, mask)
+        assert (
+            clipped.geom_type[0] == "Point"
+            and clipped.geom_type[1] == "Polygon"
+            and clipped.geom_type[2] == "LineString"
+        )
+
+    def test_clip_with_line_extra_geom(self, sliver_line, mask):
+        """When the output of a clipped line returns a geom collection,
+        and keep_geom_type is True, no geometry collections should be returned."""
+        clipped = clip(sliver_line, mask, keep_geom_type=True)
+        assert len(clipped.geometry) == 1
+        # Assert returned data is a not geometry collection
+        assert not (clipped.geom_type == "GeometryCollection").any()
+
+    def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask):
+        """Test clip when intersection is empty and boxes do not overlap."""
+        clipped = clip(pointsoutside_nooverlap_gdf, mask)
+        assert len(clipped) == 0
+
+    def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask):
+        """Test clip when intersection is empty and boxes do overlap."""
+        clipped = clip(pointsoutside_overlap_gdf, mask)
+        assert len(clipped) == 0
+
+    def test_warning_extra_geoms_mixed(self, mixed_gdf, mask):
+        """Test the correct warnings are raised if keep_geom_type is
+        called on a mixed GDF"""
+        with pytest.warns(UserWarning):
+            clip(mixed_gdf, mask, keep_geom_type=True)
+
+    def test_warning_geomcoll(self, geomcol_gdf, mask):
+        """Test the correct warnings are raised if keep_geom_type is
+        called on a GDF with GeometryCollection"""
+        with pytest.warns(UserWarning):
+            clip(geomcol_gdf, mask, keep_geom_type=True)
+
+
+def test_clip_line_keep_slivers(sliver_line, single_rectangle_gdf):
+    """Test the correct output if a point is returned
+    from a line only geometry type."""
+    clipped = clip(sliver_line, single_rectangle_gdf)
+    # Assert returned data is a geometry collection given sliver geoms
+    assert "Point" == clipped.geom_type[0]
+    assert "LineString" == clipped.geom_type[1]
+
+
+def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
+    """Test a multi poly object where the return includes a sliver.
+    Also the bounds of the object should == the bounds of the clip object
+    if they fully overlap (as they do in these fixtures)."""
+    clipped = clip(multi_poly_gdf, single_rectangle_gdf)
+    assert np.array_equal(clipped.total_bounds, single_rectangle_gdf.total_bounds)
+    # Assert returned data is a geometry collection given sliver geoms
+    assert "GeometryCollection" in clipped.geom_type[0]
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
+    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
+        clip(point_gdf, single_rectangle_gdf.to_crs(4326))
+
+
+def test_clip_with_polygon(single_rectangle_gdf):
+    """Test clip when using a shapely object"""
+    polygon = Polygon([(0, 0), (5, 12), (10, 0), (0, 0)])
+    clipped = clip(single_rectangle_gdf, polygon)
+    exp_poly = polygon.intersection(
+        Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
+    )
+    exp = GeoDataFrame([1], geometry=[exp_poly], crs="EPSG:3857")
+    exp["attr2"] = "site-boundary"
+    assert_geodataframe_equal(clipped, exp)
+
+
+def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf):
+    """Test clipping a polygon with a multipolygon."""
+    multi = buffered_locations.dissolve(by="type").reset_index()
+    clipped = clip(single_rectangle_gdf, multi)
+    assert clipped.geom_type[0] == "Polygon"
+
+
+@pytest.mark.parametrize(
+    "mask_fixture_name",
+    mask_variants_large_rectangle,
+)
+def test_clip_single_multipoly_no_extra_geoms(
+    buffered_locations, mask_fixture_name, request
+):
+    """When clipping a multi-polygon feature, no additional geom types
+    should be returned."""
+    masks = request.getfixturevalue(mask_fixture_name)
+    multi = buffered_locations.dissolve(by="type").reset_index()
+    clipped = clip(multi, masks)
+    assert clipped.geom_type[0] == "Polygon"
+
+
+@pytest.mark.filterwarnings("ignore:All-NaN slice encountered")
+@pytest.mark.parametrize(
+    "mask",
+    [
+        Polygon(),
+        (np.nan,) * 4,
+        (np.nan, 0, np.nan, 1),
+        GeoSeries([Polygon(), Polygon()], crs="EPSG:3857"),
+        GeoSeries([Polygon(), Polygon()], crs="EPSG:3857").to_frame(),
+        GeoSeries([], crs="EPSG:3857"),
+        GeoSeries([], crs="EPSG:3857").to_frame(),
+    ],
+)
+def test_clip_empty_mask(buffered_locations, mask):
+    """Test that clipping with empty mask returns an empty result."""
+    clipped = clip(buffered_locations, mask)
+    assert_geodataframe_equal(
+        clipped,
+        GeoDataFrame([], columns=["geometry", "type"], crs="EPSG:3857"),
+        check_index_type=False,
+    )
+    clipped = clip(buffered_locations.geometry, mask)
+    assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
+
+
+def test_clip_sorting(point_gdf2):
+    """Test the sorting kwarg in clip"""
+    bbox = shapely.geometry.box(0, 0, 2, 2)
+    unsorted_clipped_gdf = point_gdf2.clip(bbox)
+    sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
+
+    expected_sorted_index = pd.Index([1, 3, 5])
+
+    assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
+    assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
+    assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
@@ -0,0 +1,76 @@
+import numpy as np
+
+from shapely.geometry import Point
+from shapely.wkt import loads
+
+import geopandas
+
+import pytest
+from pandas.testing import assert_series_equal
+
+
+def test_hilbert_distance():
+    # test the actual Hilbert Code algorithm against some hardcoded values
+    geoms = geopandas.GeoSeries.from_wkt(
+        [
+            "POINT (0 0)",
+            "POINT (1 1)",
+            "POINT (1 0)",
+            "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+        ]
+    )
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=2)
+    assert result.tolist() == [0, 10, 15, 2]
+
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=3)
+    assert result.tolist() == [0, 42, 63, 10]
+
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=16)
+    assert result.tolist() == [0, 2863311530, 4294967295, 715827882]
+
+
+@pytest.fixture
+def geoseries_points():
+    p1 = Point(1, 2)
+    p2 = Point(2, 3)
+    p3 = Point(3, 4)
+    p4 = Point(4, 1)
+    return geopandas.GeoSeries([p1, p2, p3, p4])
+
+
+def test_hilbert_distance_level(geoseries_points):
+    with pytest.raises(ValueError):
+        geoseries_points.hilbert_distance(level=20)
+
+
+def test_specified_total_bounds(geoseries_points):
+    result = geoseries_points.hilbert_distance(
+        total_bounds=geoseries_points.total_bounds
+    )
+    expected = geoseries_points.hilbert_distance()
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "empty",
+    [
+        None,
+        loads("POLYGON EMPTY"),
+    ],
+)
+def test_empty(geoseries_points, empty):
+    s = geoseries_points
+    s.iloc[-1] = empty
+    with pytest.raises(
+        ValueError, match="cannot be computed on a GeoSeries with empty"
+    ):
+        s.hilbert_distance()
+
+
+def test_zero_width():
+    # special case of all points on the same line -> avoid warnings because
+    # of division by 0 and introducing NaN
+    s = geopandas.GeoSeries([Point(0, 0), Point(0, 2), Point(0, 1)])
+    with np.errstate(all="raise"):
+        result = s.hilbert_distance()
+    assert np.array(result).argsort().tolist() == [0, 2, 1]
@@ -0,0 +1,67 @@
+import numpy
+
+import geopandas
+from geopandas.tools._random import uniform
+
+import pytest
+
+
+@pytest.fixture
+def multipolygons(nybb_filename):
+    return geopandas.read_file(nybb_filename).geometry
+
+
+@pytest.fixture
+def polygons(multipolygons):
+    return multipolygons.explode(ignore_index=True).geometry
+
+
+@pytest.fixture
+def multilinestrings(multipolygons):
+    return multipolygons.boundary
+
+
+@pytest.fixture
+def linestrings(polygons):
+    return polygons.boundary
+
+
+@pytest.fixture
+def points(multipolygons):
+    return multipolygons.centroid
+
+
+@pytest.mark.parametrize("size", [10, 100])
+@pytest.mark.parametrize(
+    "geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
+)
+def test_uniform(geom_fixture, size, request):
+    geom = request.getfixturevalue(geom_fixture)[0]
+    sample = uniform(geom, size=size, rng=1)
+    sample_series = (
+        geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
+    )
+    assert len(sample_series) == size
+    sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
+        geom, predicate="intersects"
+    )
+    assert len(sample_in_geom) == size
+
+
+def test_uniform_unsupported(points):
+    with pytest.warns(UserWarning, match="Sampling is not supported"):
+        sample = uniform(points[0], size=10, rng=1)
+    assert sample.is_empty
+
+
+def test_uniform_generator(polygons):
+    sample = uniform(polygons[0], size=10, rng=1)
+    sample2 = uniform(polygons[0], size=10, rng=1)
+    assert sample.equals(sample2)
+
+    generator = numpy.random.default_rng(seed=1)
+    gen_sample = uniform(polygons[0], size=10, rng=generator)
+    gen_sample2 = uniform(polygons[0], size=10, rng=generator)
+
+    assert sample.equals(gen_sample)
+    assert not sample.equals(gen_sample2)
@@ -0,0 +1,51 @@
+from shapely.geometry import LineString, MultiPoint, Point
+
+from geopandas import GeoSeries
+from geopandas.tools import collect
+
+import pytest
+
+
+class TestTools:
+    def setup_method(self):
+        self.p1 = Point(0, 0)
+        self.p2 = Point(1, 1)
+        self.p3 = Point(2, 2)
+        self.mpc = MultiPoint([self.p1, self.p2, self.p3])
+
+        self.mp1 = MultiPoint([self.p1, self.p2])
+        self.line1 = LineString([(3, 3), (4, 4)])
+
+    def test_collect_single(self):
+        result = collect(self.p1)
+        assert self.p1.equals(result)
+
+    def test_collect_single_force_multi(self):
+        result = collect(self.p1, multi=True)
+        expected = MultiPoint([self.p1])
+        assert expected.equals(result)
+
+    def test_collect_multi(self):
+        result = collect(self.mp1)
+        assert self.mp1.equals(result)
+
+    def test_collect_multi_force_multi(self):
+        result = collect(self.mp1)
+        assert self.mp1.equals(result)
+
+    def test_collect_list(self):
+        result = collect([self.p1, self.p2, self.p3])
+        assert self.mpc.equals(result)
+
+    def test_collect_GeoSeries(self):
+        s = GeoSeries([self.p1, self.p2, self.p3])
+        result = collect(s)
+        assert self.mpc.equals(result)
+
+    def test_collect_mixed_types(self):
+        with pytest.raises(ValueError):
+            collect([self.p1, self.line1])
+
+    def test_collect_mixed_multi(self):
+        with pytest.raises(ValueError):
+            collect([self.mpc, self.mp1])
@@ -0,0 +1,45 @@
+import pandas as pd
+
+from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon
+from shapely.geometry.base import BaseGeometry
+
+_multi_type_map = {
+    "Point": MultiPoint,
+    "LineString": MultiLineString,
+    "Polygon": MultiPolygon,
+}
+
+
+def collect(x, multi=False):
+    """
+    Collect single part geometries into their Multi* counterpart
+
+    Parameters
+    ----------
+    x : an iterable or Series of Shapely geometries, a GeoSeries, or
+        a single Shapely geometry
+    multi : boolean, default False
+        if True, force returned geometries to be Multi* even if they
+        only have one component.
+
+    """
+    if isinstance(x, BaseGeometry):
+        x = [x]
+    elif isinstance(x, pd.Series):
+        x = list(x)
+
+    # We cannot create GeometryCollection here so all types
+    # must be the same. If there is more than one element,
+    # they cannot be Multi*, i.e., can't pass in combination of
+    # Point and MultiPoint... or even just MultiPoint
+    t = x[0].geom_type
+    if not all(g.geom_type == t for g in x):
+        raise ValueError("Geometry type must be homogeneous")
+    if len(x) > 1 and t.startswith("Multi"):
+        raise ValueError("Cannot collect {0}. Must have single geometries".format(t))
+
+    if len(x) == 1 and (t.startswith("Multi") or not multi):
+        # If there's only one single part geom and we're not forcing to
+        # multi, then just return it
+        return x[0]
+    return _multi_type_map[t](x)