refactor: excel parse

2026-04-16 10:01:11 +08:00
parent 680ecc320f
commit f62f95ec02
7941 changed files with 2899112 additions and 0 deletions
@@ -0,0 +1,15 @@
+from .clip import clip
+from .geocoding import geocode, reverse_geocode
+from .overlay import overlay
+from .sjoin import sjoin, sjoin_nearest
+from .util import collect
+
+__all__ = [
+    "collect",
+    "geocode",
+    "overlay",
+    "reverse_geocode",
+    "sjoin",
+    "sjoin_nearest",
+    "clip",
+]
@@ -0,0 +1,84 @@
+from warnings import warn
+
+import numpy
+
+from shapely.geometry import MultiPoint
+
+from geopandas.array import from_shapely, points_from_xy
+from geopandas.geoseries import GeoSeries
+
+
+def uniform(geom, size, rng=None):
+    """
+
+    Sample uniformly at random from a geometry.
+
+    For polygons, this samples uniformly within the area of the polygon. For lines,
+    this samples uniformly along the length of the linestring. For multi-part
+    geometries, the weights of each part are selected according to their relevant
+    attribute (area for Polygons, length for LineStrings), and then points are
+    sampled from each part uniformly.
+
+    Any other geometry type (e.g. Point, GeometryCollection) are ignored, and an
+    empty MultiPoint geometry is returned.
+
+    Parameters
+    ----------
+    geom : any shapely.geometry.BaseGeometry type
+        the shape that describes the area in which to sample.
+
+    size : integer
+        an integer denoting how many points to sample
+
+    Returns
+    -------
+    shapely.MultiPoint geometry containing the sampled points
+
+    Examples
+    --------
+    >>> from shapely.geometry import box
+    >>> square = box(0,0,1,1)
+    >>> uniform(square, size=102) # doctest: +SKIP
+    """
+    generator = numpy.random.default_rng(seed=rng)
+
+    if geom is None or geom.is_empty:
+        return MultiPoint()
+
+    if geom.geom_type in ("Polygon", "MultiPolygon"):
+        return _uniform_polygon(geom, size=size, generator=generator)
+
+    if geom.geom_type in ("LineString", "MultiLineString"):
+        return _uniform_line(geom, size=size, generator=generator)
+
+    warn(
+        f"Sampling is not supported for {geom.geom_type} geometry type.",
+        UserWarning,
+        stacklevel=8,
+    )
+    return MultiPoint()
+
+
+def _uniform_line(geom, size, generator):
+    """
+    Sample points from an input shapely linestring
+    """
+
+    fracs = generator.uniform(size=size)
+    return from_shapely(geom.interpolate(fracs, normalized=True)).union_all()
+
+
+def _uniform_polygon(geom, size, generator):
+    """
+    Sample uniformly from within a polygon using batched sampling.
+    """
+    xmin, ymin, xmax, ymax = geom.bounds
+    candidates = []
+    while len(candidates) < size:
+        batch = points_from_xy(
+            x=generator.uniform(xmin, xmax, size=size),
+            y=generator.uniform(ymin, ymax, size=size),
+        )
+        valid_samples = batch[batch.sindex.query(geom, predicate="contains")]
+        candidates.extend(valid_samples)
+    return GeoSeries(candidates[:size]).union_all()
@@ -0,0 +1,169 @@
+import importlib
+import platform
+import sys
+
+
+def _get_sys_info():
+    """System information
+
+    Returns
+    -------
+    sys_info : dict
+        system and Python version information
+    """
+    python = sys.version.replace("\n", " ")
+
+    blob = [
+        ("python", python),
+        ("executable", sys.executable),
+        ("machine", platform.platform()),
+    ]
+
+    return dict(blob)
+
+
+def _get_C_info():
+    """Information on system PROJ, GDAL, GEOS
+    Returns
+    -------
+    c_info: dict
+        system PROJ information
+    """
+    try:
+        import pyproj
+
+        proj_version = pyproj.proj_version_str
+    except Exception:
+        proj_version = None
+    try:
+        import pyproj
+
+        proj_dir = pyproj.datadir.get_data_dir()
+    except Exception:
+        proj_dir = None
+
+    try:
+        import shapely._buildcfg
+
+        geos_version = "{}.{}.{}".format(*shapely._buildcfg.geos_version)
+        geos_dir = shapely._buildcfg.geos_library_path
+    except Exception:
+        try:
+            from shapely import geos_version_string
+
+            geos_version = geos_version_string
+            geos_dir = None
+        except Exception:
+            geos_version = None
+            geos_dir = None
+
+    try:
+        import pyogrio
+
+        gdal_version = pyogrio.__gdal_version_string__
+        gdal_dir = pyogrio.get_gdal_data_path()
+    except Exception:
+        gdal_version = None
+        gdal_dir = None
+
+    if gdal_version is None:
+        try:
+            import fiona
+
+            gdal_version = fiona.env.get_gdal_release_name()
+        except Exception:
+            gdal_version = None
+        try:
+            import fiona
+
+            gdal_dir = fiona.env.GDALDataFinder().search()
+        except Exception:
+            gdal_dir = None
+
+    blob = [
+        ("GEOS", geos_version),
+        ("GEOS lib", geos_dir),
+        ("GDAL", gdal_version),
+        ("GDAL data dir", gdal_dir),
+        ("PROJ", proj_version),
+        ("PROJ data dir", proj_dir),
+    ]
+
+    return dict(blob)
+
+
+def _get_deps_info():
+    """Overview of the installed version of main dependencies
+
+    Returns
+    -------
+    deps_info: dict
+        version information on relevant Python libraries
+    """
+    deps = [
+        "geopandas",
+        # required deps
+        "numpy",
+        "pandas",
+        "pyproj",
+        "shapely",
+        # optional deps
+        "pyogrio",
+        "geoalchemy2",
+        "geopy",
+        "matplotlib",
+        "mapclassify",
+        "fiona",
+        "psycopg",
+        "psycopg2",
+        "pyarrow",
+    ]
+
+    def get_version(module):
+        return module.__version__
+
+    deps_info = {}
+
+    for modname in deps:
+        try:
+            if modname in sys.modules:
+                mod = sys.modules[modname]
+            else:
+                mod = importlib.import_module(modname)
+            ver = get_version(mod)
+            deps_info[modname] = ver
+        except Exception:
+            deps_info[modname] = None
+
+    return deps_info
+
+
+def show_versions():
+    """
+    Print system information and installed module versions.
+
+    Examples
+    --------
+
+    ::
+
+        $ python -c "import geopandas; geopandas.show_versions()"
+    """
+    sys_info = _get_sys_info()
+    deps_info = _get_deps_info()
+    proj_info = _get_C_info()
+
+    maxlen = max(len(x) for x in deps_info)
+    tpl = "{{k:<{maxlen}}}: {{stat}}".format(maxlen=maxlen)
+    print("\nSYSTEM INFO")
+    print("-----------")
+    for k, stat in sys_info.items():
+        print(tpl.format(k=k, stat=stat))
+    print("\nGEOS, GDAL, PROJ INFO")
+    print("---------------------")
+    for k, stat in proj_info.items():
+        print(tpl.format(k=k, stat=stat))
+    print("\nPYTHON DEPENDENCIES")
+    print("-------------------")
+    for k, stat in deps_info.items():
+        print(tpl.format(k=k, stat=stat))
@@ -0,0 +1,257 @@
+"""
+geopandas.clip
+==============
+
+A module to clip vector data using GeoPandas.
+
+"""
+
+import warnings
+
+import numpy as np
+import pandas.api.types
+
+from shapely.geometry import MultiPolygon, Polygon, box
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def _mask_is_list_like_rectangle(mask):
+    return pandas.api.types.is_list_like(mask) and not isinstance(
+        mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)
+    )
+
+
+def _clip_gdf_with_mask(gdf, mask, sort=False):
+    """Clip geometry to the polygon/rectangle extent.
+
+    Clip an input GeoDataFrame to the polygon extent of the polygon
+    parameter.
+
+    Parameters
+    ----------
+    gdf : GeoDataFrame, GeoSeries
+        Dataframe to clip.
+
+    mask : (Multi)Polygon, list-like
+        Reference polygon/rectangle for clipping.
+
+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.
+
+    Returns
+    -------
+    GeoDataFrame
+        The returned GeoDataFrame is a clipped subset of gdf
+        that intersects with polygon/rectangle.
+    """
+    clipping_by_rectangle = _mask_is_list_like_rectangle(mask)
+    if clipping_by_rectangle:
+        intersection_polygon = box(*mask)
+    else:
+        intersection_polygon = mask
+
+    gdf_sub = gdf.iloc[
+        gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
+    ]
+
+    # For performance reasons points don't need to be intersected with poly
+    non_point_mask = gdf_sub.geom_type != "Point"
+
+    if not non_point_mask.any():
+        # only points, directly return
+        return gdf_sub
+
+    # Clip the data with the polygon
+    if isinstance(gdf_sub, GeoDataFrame):
+        clipped = gdf_sub.copy()
+        if clipping_by_rectangle:
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
+            )
+        else:
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].intersection(mask)
+            )
+    else:
+        # GeoSeries
+        clipped = gdf_sub.copy()
+        if clipping_by_rectangle:
+            clipped[non_point_mask] = gdf_sub.values[non_point_mask].clip_by_rect(*mask)
+        else:
+            clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(mask)
+
+    if clipping_by_rectangle:
+        # clip_by_rect might return empty geometry collections in edge cases
+        clipped = clipped[~clipped.is_empty]
+    return clipped
+
+
+def clip(gdf, mask, keep_geom_type=False, sort=False):
+    """Clip points, lines, or polygon geometries to the mask extent.
+
+    Both layers must be in the same Coordinate Reference System (CRS).
+    The ``gdf`` will be clipped to the full extent of the clip object.
+
+    If there are multiple polygons in mask, data from ``gdf`` will be
+    clipped to the total boundary of all polygons in mask.
+
+    If the ``mask`` is list-like with four elements ``(minx, miny, maxx, maxy)``, a
+    faster rectangle clipping algorithm will be used. Note that this can lead to
+    slightly different results in edge cases, e.g. if a line would be reduced to a
+    point, this point might not be returned.
+    The geometry is clipped in a fast but possibly dirty way. The output is not
+    guaranteed to be valid. No exceptions will be raised for topological errors.
+
+    Parameters
+    ----------
+    gdf : GeoDataFrame or GeoSeries
+        Vector layer (point, line, polygon) to be clipped to mask.
+    mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like
+        Polygon vector layer used to clip ``gdf``.
+        The mask's geometry is dissolved into one geometric feature
+        and intersected with ``gdf``.
+        If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``,
+        ``clip`` will use a faster rectangle clipping (:meth:`~GeoSeries.clip_by_rect`),
+        possibly leading to slightly different results.
+    keep_geom_type : boolean, default False
+        If True, return only geometries of original type in case of intersection
+        resulting in multiple geometry types or GeometryCollections.
+        If False, return all resulting geometries (potentially mixed-types).
+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.
+
+    Returns
+    -------
+    GeoDataFrame or GeoSeries
+         Vector data (points, lines, polygons) from ``gdf`` clipped to
+         polygon boundary from mask.
+
+    See also
+    --------
+    GeoDataFrame.clip : equivalent GeoDataFrame method
+    GeoSeries.clip : equivalent GeoSeries method
+
+    Examples
+    --------
+    Clip points (grocery stores) with polygons (the Near West Side community):
+
+    >>> import geodatasets
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... )
+    >>> near_west_side = chicago[chicago["community"] == "NEAR WEST SIDE"]
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... ).to_crs(chicago.crs)
+    >>> groceries.shape
+    (148, 8)
+
+    >>> nws_groceries = geopandas.clip(groceries, near_west_side)
+    >>> nws_groceries.shape
+    (7, 8)
+    """
+    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
+        raise TypeError(
+            "'gdf' should be GeoDataFrame or GeoSeries, got {}".format(type(gdf))
+        )
+
+    mask_is_list_like = _mask_is_list_like_rectangle(mask)
+    if (
+        not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon))
+        and not mask_is_list_like
+    ):
+        raise TypeError(
+            "'mask' should be GeoDataFrame, GeoSeries,"
+            f"(Multi)Polygon or list-like, got {type(mask)}"
+        )
+
+    if mask_is_list_like and len(mask) != 4:
+        raise TypeError(
+            "If 'mask' is list-like, it must have four values (minx, miny, maxx, maxy)"
+        )
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        if not _check_crs(gdf, mask):
+            _crs_mismatch_warn(gdf, mask, stacklevel=3)
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        box_mask = mask.total_bounds
+    elif mask_is_list_like:
+        box_mask = mask
+    else:
+        # Avoid empty tuple returned by .bounds when geometry is empty. A tuple of
+        # all nan values is consistent with the behavior of
+        # {GeoSeries, GeoDataFrame}.total_bounds for empty geometries.
+        # TODO(shapely) can simpely use mask.bounds once relying on Shapely 2.0
+        box_mask = mask.bounds if not mask.is_empty else (np.nan,) * 4
+    box_gdf = gdf.total_bounds
+    if not (
+        ((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2]))
+        and ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3]))
+    ):
+        return gdf.iloc[:0]
+
+    if isinstance(mask, (GeoDataFrame, GeoSeries)):
+        combined_mask = mask.geometry.union_all()
+    else:
+        combined_mask = mask
+
+    clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)
+
+    if keep_geom_type:
+        geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
+        geomcoll_orig = (gdf.geom_type == "GeometryCollection").any()
+
+        new_collection = geomcoll_concat and not geomcoll_orig
+
+        if geomcoll_orig:
+            warnings.warn(
+                "keep_geom_type can not be called on a "
+                "GeoDataFrame with GeometryCollection.",
+                stacklevel=2,
+            )
+        else:
+            polys = ["Polygon", "MultiPolygon"]
+            lines = ["LineString", "MultiLineString", "LinearRing"]
+            points = ["Point", "MultiPoint"]
+
+            # Check that the gdf for multiple geom types (points, lines and/or polys)
+            orig_types_total = sum(
+                [
+                    gdf.geom_type.isin(polys).any(),
+                    gdf.geom_type.isin(lines).any(),
+                    gdf.geom_type.isin(points).any(),
+                ]
+            )
+
+            # Check how many geometry types are in the clipped GeoDataFrame
+            clip_types_total = sum(
+                [
+                    clipped.geom_type.isin(polys).any(),
+                    clipped.geom_type.isin(lines).any(),
+                    clipped.geom_type.isin(points).any(),
+                ]
+            )
+
+            # Check there aren't any new geom types in the clipped GeoDataFrame
+            more_types = orig_types_total < clip_types_total
+
+            if orig_types_total > 1:
+                warnings.warn(
+                    "keep_geom_type can not be called on a mixed type GeoDataFrame.",
+                    stacklevel=2,
+                )
+            elif new_collection or more_types:
+                orig_type = gdf.geom_type.iloc[0]
+                if new_collection:
+                    clipped = clipped.explode(index_parts=False)
+                if orig_type in polys:
+                    clipped = clipped.loc[clipped.geom_type.isin(polys)]
+                elif orig_type in lines:
+                    clipped = clipped.loc[clipped.geom_type.isin(lines)]
+
+    return clipped
@@ -0,0 +1,184 @@
+import time
+from collections import defaultdict
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+
+
+def _get_throttle_time(provider):
+    """
+    Amount of time to wait between requests to a geocoding API, for providers
+    that specify rate limits in their terms of service.
+    """
+    import geopy.geocoders
+
+    # https://operations.osmfoundation.org/policies/nominatim/
+    if provider == geopy.geocoders.Nominatim:
+        return 1
+    else:
+        return 0
+
+
+def geocode(strings, provider=None, **kwargs):
+    """
+    Geocode a set of strings and get a GeoDataFrame of the resulting points.
+
+    Parameters
+    ----------
+    strings : list or Series of addresses to geocode
+    provider : str or geopy.geocoder
+        Specifies geocoding service to use. If none is provided,
+        will use 'photon' (see the Photon's terms of service at:
+        https://photon.komoot.io).
+
+        Either the string name used by geopy (as specified in
+        geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
+        (e.g., geopy.geocoders.Photon) may be used.
+
+        Some providers require additional arguments such as access keys
+        See each geocoder's specific parameters in geopy.geocoders
+
+    Notes
+    -----
+    Ensure proper use of the results by consulting the Terms of Service for
+    your provider.
+
+    Geocoding requires geopy. Install it using 'pip install geopy'. See also
+    https://github.com/geopy/geopy
+
+    Examples
+    --------
+    >>> df = geopandas.tools.geocode(  # doctest: +SKIP
+    ...         ["boston, ma", "1600 pennsylvania ave. washington, dc"]
+    ...     )
+    >>> df  # doctest: +SKIP
+                        geometry                                            address
+    0  POINT (-71.05863 42.35899)                          Boston, MA, United States
+    1  POINT (-77.03651 38.89766)  1600 Pennsylvania Ave NW, Washington, DC 20006...
+    """
+
+    if provider is None:
+        provider = "photon"
+    throttle_time = _get_throttle_time(provider)
+
+    return _query(strings, True, provider, throttle_time, **kwargs)
+
+
+def reverse_geocode(points, provider=None, **kwargs):
+    """
+    Reverse geocode a set of points and get a GeoDataFrame of the resulting
+    addresses.
+
+    The points
+
+    Parameters
+    ----------
+    points : list or Series of Shapely Point objects.
+        x coordinate is longitude
+        y coordinate is latitude
+    provider : str or geopy.geocoder (opt)
+        Specifies geocoding service to use. If none is provided,
+        will use 'photon' (see the Photon's terms of service at:
+        https://photon.komoot.io).
+
+        Either the string name used by geopy (as specified in
+        geopy.geocoders.SERVICE_TO_GEOCODER) or a geopy Geocoder instance
+        (e.g., geopy.geocoders.Photon) may be used.
+
+        Some providers require additional arguments such as access keys
+        See each geocoder's specific parameters in geopy.geocoders
+
+    Notes
+    -----
+    Ensure proper use of the results by consulting the Terms of Service for
+    your provider.
+
+    Reverse geocoding requires geopy. Install it using 'pip install geopy'.
+    See also https://github.com/geopy/geopy
+
+    Examples
+    --------
+    >>> from shapely.geometry import Point
+    >>> df = geopandas.tools.reverse_geocode(  # doctest: +SKIP
+    ...     [Point(-71.0594869, 42.3584697), Point(-77.0365305, 38.8977332)]
+    ... )
+    >>> df  # doctest: +SKIP
+                         geometry                                            address
+    0  POINT (-71.05941 42.35837)       29 Court Sq, Boston, MA 02108, United States
+    1  POINT (-77.03641 38.89766)  1600 Pennsylvania Ave NW, Washington, DC 20006...
+    """
+
+    if provider is None:
+        provider = "photon"
+    throttle_time = _get_throttle_time(provider)
+
+    return _query(points, False, provider, throttle_time, **kwargs)
+
+
+def _query(data, forward, provider, throttle_time, **kwargs):
+    # generic wrapper for calls over lists to geopy Geocoders
+    from geopy.geocoders import get_geocoder_for_service
+    from geopy.geocoders.base import GeocoderQueryError
+
+    if forward:
+        if not isinstance(data, pd.Series):
+            data = pd.Series(data)
+    else:
+        if not isinstance(data, geopandas.GeoSeries):
+            data = geopandas.GeoSeries(data)
+
+    if isinstance(provider, str):
+        provider = get_geocoder_for_service(provider)
+
+    coder = provider(**kwargs)
+    results = {}
+    for i, s in data.items():
+        try:
+            if forward:
+                results[i] = coder.geocode(s)
+            else:
+                results[i] = coder.reverse((s.y, s.x), exactly_one=True)
+        except (GeocoderQueryError, ValueError):
+            results[i] = (None, None)
+        time.sleep(throttle_time)
+
+    df = _prepare_geocode_result(results)
+    return df
+
+
+def _prepare_geocode_result(results):
+    """
+    Helper function for the geocode function
+
+    Takes a dict where keys are index entries, values are tuples containing:
+    (address, (lat, lon))
+
+    """
+    # Prepare the data for the DataFrame as a dict of lists
+    d = defaultdict(list)
+    index = []
+
+    for i, s in results.items():
+        if s is None:
+            p = Point()
+            address = None
+
+        else:
+            address, loc = s
+
+            # loc is lat, lon and we want lon, lat
+            if loc is None:
+                p = Point()
+            else:
+                p = Point(loc[1], loc[0])
+
+        d["geometry"].append(p)
+        d["address"].append(address)
+        index.append(i)
+
+    df = geopandas.GeoDataFrame(d, index=index, crs="EPSG:4326")
+
+    return df
@@ -0,0 +1,188 @@
+import numpy as np
+
+
+def _hilbert_distance(geoms, total_bounds=None, level=16):
+    """
+    Calculate the distance along a Hilbert curve.
+
+    The distances are calculated for the midpoints of the geometries in the
+    GeoDataFrame.
+
+    Parameters
+    ----------
+    geoms : GeometryArray
+    total_bounds : 4-element array
+        Total bounds of geometries - array
+    level : int (1 - 16), default 16
+        Determines the precision of the curve (points on the curve will
+        have coordinates in the range [0, 2^level - 1]).
+
+    Returns
+    -------
+    np.ndarray
+        Array containing distances along the Hilbert curve
+
+    """
+    if geoms.is_empty.any() | geoms.isna().any():
+        raise ValueError(
+            "Hilbert distance cannot be computed on a GeoSeries with empty or "
+            "missing geometries.",
+        )
+    # Calculate bounds as numpy array
+    bounds = geoms.bounds
+
+    # Calculate discrete coords based on total bounds and bounds
+    x, y = _continuous_to_discrete_coords(bounds, level, total_bounds)
+    # Compute distance along hilbert curve
+    distances = _encode(level, x, y)
+
+    return distances
+
+
+def _continuous_to_discrete_coords(bounds, level, total_bounds):
+    """
+    Calculates mid points & ranges of geoms and returns
+    as discrete coords
+
+    Parameters
+    ----------
+
+    bounds : Bounds of each geometry - array
+
+    p : The number of iterations used in constructing the Hilbert curve
+
+    total_bounds : Total bounds of geometries - array
+
+    Returns
+    -------
+    Discrete two-dimensional numpy array
+    Two-dimensional array Array of hilbert distances for each geom
+
+    """
+    # Hilbert Side length
+    side_length = (2**level) - 1
+
+    # Calculate mid points for x and y bound coords - returns array
+    x_mids = (bounds[:, 0] + bounds[:, 2]) / 2.0
+    y_mids = (bounds[:, 1] + bounds[:, 3]) / 2.0
+
+    # Calculate x and y range of total bound coords - returns array
+    if total_bounds is None:
+        total_bounds = (
+            np.nanmin(x_mids),
+            np.nanmin(y_mids),
+            np.nanmax(x_mids),
+            np.nanmax(y_mids),
+        )
+
+    xmin, ymin, xmax, ymax = total_bounds
+
+    # Transform continuous value to discrete integer for each dimension
+    x_int = _continuous_to_discrete(x_mids, (xmin, xmax), side_length)
+    y_int = _continuous_to_discrete(y_mids, (ymin, ymax), side_length)
+
+    return x_int, y_int
+
+
+def _continuous_to_discrete(vals, val_range, n):
+    """
+    Convert a continuous one-dimensional array to discrete integer values
+    based their ranges
+
+    Parameters
+    ----------
+    vals : Array of continuous values
+
+    val_range : Tuple containing range of continuous values
+
+    n : Number of discrete values
+
+    Returns
+    -------
+    One-dimensional array of discrete ints
+
+    """
+    width = val_range[1] - val_range[0]
+    if width == 0:
+        return np.zeros_like(vals, dtype=np.uint32)
+    res = (vals - val_range[0]) * (n / width)
+
+    np.clip(res, 0, n, out=res)
+    return res.astype(np.uint32)
+
+
+# Fast Hilbert curve algorithm by http://threadlocalmutex.com/
+# From C++ https://github.com/rawrunprotected/hilbert_curves
+# (public domain)
+
+
+MAX_LEVEL = 16
+
+
+def _interleave(x):
+    x = (x | (x << 8)) & 0x00FF00FF
+    x = (x | (x << 4)) & 0x0F0F0F0F
+    x = (x | (x << 2)) & 0x33333333
+    x = (x | (x << 1)) & 0x55555555
+    return x
+
+
+def _encode(level, x, y):
+    x = np.asarray(x, dtype="uint32")
+    y = np.asarray(y, dtype="uint32")
+
+    if level > MAX_LEVEL:
+        raise ValueError("Level out of range")
+
+    x = x << (16 - level)
+    y = y << (16 - level)
+
+    # Initial prefix scan round, prime with x and y
+    a = x ^ y
+    b = 0xFFFF ^ a
+    c = 0xFFFF ^ (x | y)
+    d = x & (y ^ 0xFFFF)
+
+    A = a | (b >> 1)
+    B = (a >> 1) ^ a
+    C = ((c >> 1) ^ (b & (d >> 1))) ^ c
+    D = ((a & (c >> 1)) ^ (d >> 1)) ^ d
+
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    A = (a & (a >> 2)) ^ (b & (b >> 2))
+    B = (a & (b >> 2)) ^ (b & ((a ^ b) >> 2))
+    C ^= (a & (c >> 2)) ^ (b & (d >> 2))
+    D ^= (b & (c >> 2)) ^ ((a ^ b) & (d >> 2))
+
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    A = (a & (a >> 4)) ^ (b & (b >> 4))
+    B = (a & (b >> 4)) ^ (b & ((a ^ b) >> 4))
+    C ^= (a & (c >> 4)) ^ (b & (d >> 4))
+    D ^= (b & (c >> 4)) ^ ((a ^ b) & (d >> 4))
+
+    # Final round and projection
+    a = A.copy()
+    b = B.copy()
+    c = C.copy()
+    d = D.copy()
+
+    C ^= (a & (c >> 8)) ^ (b & (d >> 8))
+    D ^= (b & (c >> 8)) ^ ((a ^ b) & (d >> 8))
+
+    # Undo transformation prefix scan
+    a = C ^ (C >> 1)
+    b = D ^ (D >> 1)
+
+    # Recover index bits
+    i0 = x ^ y
+    i1 = b | (0xFFFF ^ (i0 | a))
+
+    return ((_interleave(i1) << 1) | _interleave(i0)) >> (32 - 2 * level)
@@ -0,0 +1,399 @@
+import warnings
+from functools import reduce
+
+import numpy as np
+import pandas as pd
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import PANDAS_GE_30
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def _ensure_geometry_column(df):
+    """
+    Helper function to ensure the geometry column is called 'geometry'.
+    If another column with that name exists, it will be dropped.
+    """
+    if not df._geometry_column_name == "geometry":
+        if PANDAS_GE_30:
+            if "geometry" in df.columns:
+                df = df.drop("geometry", axis=1)
+            df = df.rename_geometry("geometry")
+        else:
+            if "geometry" in df.columns:
+                df.drop("geometry", axis=1, inplace=True)
+            df.rename_geometry("geometry", inplace=True)
+    return df
+
+
+def _overlay_intersection(df1, df2):
+    """
+    Overlay Intersection operation used in overlay function
+    """
+    # Spatial Index to create intersections
+    idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
+    # Create pairs of geometries in both dataframes to be intersected
+    if idx1.size > 0 and idx2.size > 0:
+        left = df1.geometry.take(idx1)
+        left.reset_index(drop=True, inplace=True)
+        right = df2.geometry.take(idx2)
+        right.reset_index(drop=True, inplace=True)
+        intersections = left.intersection(right)
+        poly_ix = intersections.geom_type.isin(["Polygon", "MultiPolygon"])
+        intersections.loc[poly_ix] = intersections[poly_ix].make_valid()
+
+        # only keep actual intersecting geometries
+        pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2})
+        geom_intersect = intersections
+
+        # merge data for intersecting geometries
+        df1 = df1.reset_index(drop=True)
+        df2 = df2.reset_index(drop=True)
+        dfinter = pairs_intersect.merge(
+            df1.drop(df1._geometry_column_name, axis=1),
+            left_on="__idx1",
+            right_index=True,
+        )
+        dfinter = dfinter.merge(
+            df2.drop(df2._geometry_column_name, axis=1),
+            left_on="__idx2",
+            right_index=True,
+            suffixes=("_1", "_2"),
+        )
+
+        return GeoDataFrame(dfinter, geometry=geom_intersect, crs=df1.crs)
+    else:
+        result = df1.iloc[:0].merge(
+            df2.iloc[:0].drop(df2.geometry.name, axis=1),
+            left_index=True,
+            right_index=True,
+            suffixes=("_1", "_2"),
+        )
+        result["__idx1"] = np.nan
+        result["__idx2"] = np.nan
+        return result[
+            result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
+        ]
+
+
+def _overlay_difference(df1, df2):
+    """
+    Overlay Difference operation used in overlay function
+    """
+    # spatial index query to find intersections
+    idx1, idx2 = df2.sindex.query(df1.geometry, predicate="intersects", sort=True)
+    idx1_unique, idx1_unique_indices = np.unique(idx1, return_index=True)
+    idx2_split = np.split(idx2, idx1_unique_indices[1:])
+    sidx = [
+        idx2_split.pop(0) if idx in idx1_unique else []
+        for idx in range(df1.geometry.size)
+    ]
+    # Create differences
+    new_g = []
+    for geom, neighbours in zip(df1.geometry, sidx):
+        new = reduce(
+            lambda x, y: x.difference(y), [geom] + list(df2.geometry.iloc[neighbours])
+        )
+        new_g.append(new)
+    differences = GeoSeries(new_g, index=df1.index, crs=df1.crs)
+    poly_ix = differences.geom_type.isin(["Polygon", "MultiPolygon"])
+    differences.loc[poly_ix] = differences[poly_ix].make_valid()
+    geom_diff = differences[~differences.is_empty].copy()
+    dfdiff = df1[~differences.is_empty].copy()
+    dfdiff[dfdiff._geometry_column_name] = geom_diff
+    return dfdiff
+
+
+def _overlay_symmetric_diff(df1, df2):
+    """
+    Overlay Symmetric Difference operation used in overlay function
+    """
+    dfdiff1 = _overlay_difference(df1, df2)
+    dfdiff2 = _overlay_difference(df2, df1)
+    dfdiff1["__idx1"] = range(len(dfdiff1))
+    dfdiff2["__idx2"] = range(len(dfdiff2))
+    dfdiff1["__idx2"] = np.nan
+    dfdiff2["__idx1"] = np.nan
+    # ensure geometry name (otherwise merge goes wrong)
+    dfdiff1 = _ensure_geometry_column(dfdiff1)
+    dfdiff2 = _ensure_geometry_column(dfdiff2)
+    # combine both 'difference' dataframes
+    dfsym = dfdiff1.merge(
+        dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2")
+    )
+    geometry = dfsym.geometry_1.copy()
+    geometry.name = "geometry"
+    # https://github.com/pandas-dev/pandas/issues/26468 use loc for now
+    geometry.loc[dfsym.geometry_1.isnull()] = dfsym.loc[
+        dfsym.geometry_1.isnull(), "geometry_2"
+    ]
+    dfsym.drop(["geometry_1", "geometry_2"], axis=1, inplace=True)
+    dfsym.reset_index(drop=True, inplace=True)
+    dfsym = GeoDataFrame(dfsym, geometry=geometry, crs=df1.crs)
+    return dfsym
+
+
+def _overlay_union(df1, df2):
+    """
+    Overlay Union operation used in overlay function
+    """
+    dfinter = _overlay_intersection(df1, df2)
+    dfsym = _overlay_symmetric_diff(df1, df2)
+    dfunion = pd.concat([dfinter, dfsym], ignore_index=True, sort=False)
+    # keep geometry column last
+    columns = list(dfunion.columns)
+    columns.remove("geometry")
+    columns.append("geometry")
+    return dfunion.reindex(columns=columns)
+
+
+def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
+    """Perform spatial overlay between two GeoDataFrames.
+
+    Currently only supports data GeoDataFrames with uniform geometry types,
+    i.e. containing only (Multi)Polygons, or only (Multi)Points, or a
+    combination of (Multi)LineString and LinearRing shapes.
+    Implements several methods that are all effectively subsets of the union.
+
+    See the User Guide page :doc:`../../user_guide/set_operations` for details.
+
+    Parameters
+    ----------
+    df1 : GeoDataFrame
+    df2 : GeoDataFrame
+    how : string
+        Method of spatial overlay: 'intersection', 'union',
+        'identity', 'symmetric_difference' or 'difference'.
+    keep_geom_type : bool
+        If True, return only geometries of the same geometry type as df1 has,
+        if False, return all resulting geometries. Default is None,
+        which will set keep_geom_type to True but warn upon dropping
+        geometries.
+    make_valid : bool, default True
+        If True, any invalid input geometries are corrected with a call to make_valid(),
+        if False, a `ValueError` is raised if any input geometries are invalid.
+
+    Returns
+    -------
+    df : GeoDataFrame
+        GeoDataFrame with new set of polygons and attributes
+        resulting from the overlay
+
+    Examples
+    --------
+    >>> from shapely.geometry import Polygon
+    >>> polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]),
+    ...                               Polygon([(2,2), (4,2), (4,4), (2,4)])])
+    >>> polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]),
+    ...                               Polygon([(3,3), (5,3), (5,5), (3,5)])])
+    >>> df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1_data':[1,2]})
+    >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})
+
+    >>> geopandas.overlay(df1, df2, how='union')
+        df1_data  df2_data                                           geometry
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    5       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    6       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
+
+    >>> geopandas.overlay(df1, df2, how='intersection')
+       df1_data  df2_data                             geometry
+    0         1         1  POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1         2         1  POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2         2         2  POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+
+    >>> geopandas.overlay(df1, df2, how='symmetric_difference')
+        df1_data  df2_data                                           geometry
+    0       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    1       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    2       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    3       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
+
+    >>> geopandas.overlay(df1, df2, how='difference')
+                                                geometry  df1_data
+    0      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))         1
+    1  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...         2
+
+    >>> geopandas.overlay(df1, df2, how='identity')
+       df1_data  df2_data                                           geometry
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+
+    See also
+    --------
+    sjoin : spatial join
+    GeoDataFrame.overlay : equivalent method
+
+    Notes
+    -----
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+    # Allowed operations
+    allowed_hows = [
+        "intersection",
+        "union",
+        "identity",
+        "symmetric_difference",
+        "difference",  # aka erase
+    ]
+    # Error Messages
+    if how not in allowed_hows:
+        raise ValueError(
+            "`how` was '{0}' but is expected to be in {1}".format(how, allowed_hows)
+        )
+
+    if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries):
+        raise NotImplementedError(
+            "overlay currently only implemented for GeoDataFrames"
+        )
+
+    if not _check_crs(df1, df2):
+        _crs_mismatch_warn(df1, df2, stacklevel=3)
+
+    if keep_geom_type is None:
+        keep_geom_type = True
+        keep_geom_type_warning = True
+    else:
+        keep_geom_type_warning = False
+
+    polys = ["Polygon", "MultiPolygon"]
+    lines = ["LineString", "MultiLineString", "LinearRing"]
+    points = ["Point", "MultiPoint"]
+    for i, df in enumerate([df1, df2]):
+        poly_check = df.geom_type.isin(polys).any()
+        lines_check = df.geom_type.isin(lines).any()
+        points_check = df.geom_type.isin(points).any()
+        if sum([poly_check, lines_check, points_check]) > 1:
+            raise NotImplementedError(
+                "df{} contains mixed geometry types.".format(i + 1)
+            )
+
+    if how == "intersection":
+        box_gdf1 = df1.total_bounds
+        box_gdf2 = df2.total_bounds
+
+        if not (
+            ((box_gdf1[0] <= box_gdf2[2]) and (box_gdf2[0] <= box_gdf1[2]))
+            and ((box_gdf1[1] <= box_gdf2[3]) and (box_gdf2[1] <= box_gdf1[3]))
+        ):
+            result = df1.iloc[:0].merge(
+                df2.iloc[:0].drop(df2.geometry.name, axis=1),
+                left_index=True,
+                right_index=True,
+                suffixes=("_1", "_2"),
+            )
+            return result[
+                result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
+            ]
+
+    # Computations
+    def _make_valid(df):
+        df = df.copy()
+        if df.geom_type.isin(polys).all():
+            mask = ~df.geometry.is_valid
+            col = df._geometry_column_name
+            if make_valid:
+                df.loc[mask, col] = df.loc[mask, col].make_valid()
+            elif mask.any():
+                raise ValueError(
+                    "You have passed make_valid=False along with "
+                    f"{mask.sum()} invalid input geometries. "
+                    "Use make_valid=True or make sure that all geometries "
+                    "are valid before using overlay."
+                )
+        return df
+
+    df1 = _make_valid(df1)
+    df2 = _make_valid(df2)
+
+    with warnings.catch_warnings():  # CRS checked above, suppress array-level warning
+        warnings.filterwarnings("ignore", message="CRS mismatch between the CRS")
+        if how == "difference":
+            result = _overlay_difference(df1, df2)
+        elif how == "intersection":
+            result = _overlay_intersection(df1, df2)
+        elif how == "symmetric_difference":
+            result = _overlay_symmetric_diff(df1, df2)
+        elif how == "union":
+            result = _overlay_union(df1, df2)
+        elif how == "identity":
+            dfunion = _overlay_union(df1, df2)
+            result = dfunion[dfunion["__idx1"].notnull()].copy()
+
+        if how in ["intersection", "symmetric_difference", "union", "identity"]:
+            result.drop(["__idx1", "__idx2"], axis=1, inplace=True)
+
+    if keep_geom_type:
+        geom_type = df1.geom_type.iloc[0]
+
+        # First we filter the geometry types inside GeometryCollections objects
+        # (e.g. GeometryCollection([polygon, point]) -> polygon)
+        # we do this separately on only the relevant rows, as this is an expensive
+        # operation (an expensive no-op for geometry types other than collections)
+        is_collection = result.geom_type == "GeometryCollection"
+        if is_collection.any():
+            geom_col = result._geometry_column_name
+            collections = result[[geom_col]][is_collection]
+
+            exploded = collections.reset_index(drop=True).explode(index_parts=True)
+            exploded = exploded.reset_index(level=0)
+
+            orig_num_geoms_exploded = exploded.shape[0]
+            if geom_type in polys:
+                exploded.loc[~exploded.geom_type.isin(polys), geom_col] = None
+            elif geom_type in lines:
+                exploded.loc[~exploded.geom_type.isin(lines), geom_col] = None
+            elif geom_type in points:
+                exploded.loc[~exploded.geom_type.isin(points), geom_col] = None
+            else:
+                raise TypeError(
+                    "`keep_geom_type` does not support {}.".format(geom_type)
+                )
+            num_dropped_collection = (
+                orig_num_geoms_exploded - exploded.geometry.isna().sum()
+            )
+
+            # level_0 created with above reset_index operation
+            # and represents the original geometry collections
+            # TODO avoiding dissolve to call union_all in this case could further
+            # improve performance (we only need to collect geometries in their
+            # respective Multi version)
+            dissolved = exploded.dissolve(by="level_0")
+            result.loc[is_collection, geom_col] = dissolved[geom_col].values
+        else:
+            num_dropped_collection = 0
+
+        # Now we filter all geometries (in theory we don't need to do this
+        # again for the rows handled above for GeometryCollections, but filtering
+        # them out is probably more expensive as simply including them when this
+        # is typically about only a few rows)
+        orig_num_geoms = result.shape[0]
+        if geom_type in polys:
+            result = result.loc[result.geom_type.isin(polys)]
+        elif geom_type in lines:
+            result = result.loc[result.geom_type.isin(lines)]
+        elif geom_type in points:
+            result = result.loc[result.geom_type.isin(points)]
+        else:
+            raise TypeError("`keep_geom_type` does not support {}.".format(geom_type))
+        num_dropped = orig_num_geoms - result.shape[0]
+
+        if (num_dropped > 0 or num_dropped_collection > 0) and keep_geom_type_warning:
+            warnings.warn(
+                "`keep_geom_type=True` in overlay resulted in {} dropped "
+                "geometries of different geometry types than df1 has. "
+                "Set `keep_geom_type=False` to retain all "
+                "geometries".format(num_dropped + num_dropped_collection),
+                UserWarning,
+                stacklevel=2,
+            )
+
+    result.reset_index(drop=True, inplace=True)
+    return result
@@ -0,0 +1,734 @@
+import warnings
+from functools import partial
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+
+from geopandas import GeoDataFrame
+from geopandas._compat import PANDAS_GE_30
+from geopandas.array import _check_crs, _crs_mismatch_warn
+
+
+def sjoin(
+    left_df,
+    right_df,
+    how="inner",
+    predicate="intersects",
+    lsuffix="left",
+    rsuffix="right",
+    distance=None,
+    on_attribute=None,
+    **kwargs,
+):
+    """Spatial join of two GeoDataFrames.
+
+    See the User Guide page :doc:`../../user_guide/mergingdata` for details.
+
+
+    Parameters
+    ----------
+    left_df, right_df : GeoDataFrames
+    how : string, default 'inner'
+        The type of join:
+
+        * 'left': use keys from left_df; retain only left_df geometry column
+        * 'right': use keys from right_df; retain only right_df geometry column
+        * 'inner': use intersection of keys from both dfs; retain only
+          left_df geometry column
+    predicate : string, default 'intersects'
+        Binary predicate. Valid values are determined by the spatial index used.
+        You can check the valid values in left_df or right_df as
+        ``left_df.sindex.valid_query_predicates`` or
+        ``right_df.sindex.valid_query_predicates``
+        Replaces deprecated ``op`` parameter.
+    lsuffix : string, default 'left'
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string, default 'right'
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance : number or array_like, optional
+        Distance(s) around each input geometry within which to query the tree
+        for the 'dwithin' predicate. If array_like, must be
+        one-dimesional with length equal to length of left GeoDataFrame.
+        Required if ``predicate='dwithin'``.
+    on_attribute : string, list or tuple
+        Column name(s) to join on as an additional join restriction on top
+        of the spatial predicate. These must be found in both DataFrames.
+        If set, observations are joined only if the predicate applies
+        and values in specified columns match.
+
+    Examples
+    --------
+    >>> import geodatasets
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... )
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... ).to_crs(chicago.crs)
+
+    >>> chicago.head()  # doctest: +SKIP
+        ComAreaID  ...                                           geometry
+    0         35  ...  POLYGON ((-87.60914 41.84469, -87.60915 41.844...
+    1         36  ...  POLYGON ((-87.59215 41.81693, -87.59231 41.816...
+    2         37  ...  POLYGON ((-87.62880 41.80189, -87.62879 41.801...
+    3         38  ...  POLYGON ((-87.60671 41.81681, -87.60670 41.816...
+    4         39  ...  POLYGON ((-87.59215 41.81693, -87.59215 41.816...
+    [5 rows x 87 columns]
+
+    >>> groceries.head()  # doctest: +SKIP
+        OBJECTID     Ycoord  ...  Category                         geometry
+    0        16  41.973266  ...       NaN  MULTIPOINT (-87.65661 41.97321)
+    1        18  41.696367  ...       NaN  MULTIPOINT (-87.68136 41.69713)
+    2        22  41.868634  ...       NaN  MULTIPOINT (-87.63918 41.86847)
+    3        23  41.877590  ...       new  MULTIPOINT (-87.65495 41.87783)
+    4        27  41.737696  ...       NaN  MULTIPOINT (-87.62715 41.73623)
+    [5 rows x 8 columns]
+
+    >>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
+    >>> groceries_w_communities.head()  # doctest: +SKIP
+       OBJECTID       community                           geometry
+    0        16          UPTOWN  MULTIPOINT ((-87.65661 41.97321))
+    1        18     MORGAN PARK  MULTIPOINT ((-87.68136 41.69713))
+    2        22  NEAR WEST SIDE  MULTIPOINT ((-87.63918 41.86847))
+    3        23  NEAR WEST SIDE  MULTIPOINT ((-87.65495 41.87783))
+    4        27         CHATHAM  MULTIPOINT ((-87.62715 41.73623))
+    [5 rows x 95 columns]
+
+    See also
+    --------
+    overlay : overlay operation resulting in a new geometry
+    GeoDataFrame.sjoin : equivalent method
+
+    Notes
+    -----
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+    if kwargs:
+        first = next(iter(kwargs.keys()))
+        raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")
+
+    on_attribute = _maybe_make_list(on_attribute)
+
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),
+
+    indices = _geom_predicate_query(
+        left_df, right_df, predicate, distance, on_attribute=on_attribute
+    )
+
+    joined, _ = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        None,
+        how,
+        lsuffix,
+        rsuffix,
+        predicate,
+        on_attribute=on_attribute,
+    )
+
+    return joined
+
+
+def _maybe_make_list(obj):
+    if isinstance(obj, tuple):
+        return list(obj)
+    if obj is not None and not isinstance(obj, list):
+        return [obj]
+    return obj
+
+
+def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
+    """Checks the validity of join input parameters.
+
+    `how` must be one of the valid options.
+    `'index_'` concatenated with `lsuffix` or `rsuffix` must not already
+    exist as columns in the left or right data frames.
+
+    Parameters
+    ------------
+    left_df : GeoDataFrame
+    right_df : GeoData Frame
+    how : str, one of 'left', 'right', 'inner'
+        join type
+    lsuffix : str
+        left index suffix
+    rsuffix : str
+        right index suffix
+    on_attribute : list, default None
+        list of column names to merge on along with geometry
+    """
+    if not isinstance(left_df, GeoDataFrame):
+        raise ValueError(
+            "'left_df' should be GeoDataFrame, got {}".format(type(left_df))
+        )
+
+    if not isinstance(right_df, GeoDataFrame):
+        raise ValueError(
+            "'right_df' should be GeoDataFrame, got {}".format(type(right_df))
+        )
+
+    allowed_hows = ["left", "right", "inner"]
+    if how not in allowed_hows:
+        raise ValueError(
+            '`how` was "{}" but is expected to be in {}'.format(how, allowed_hows)
+        )
+
+    if not _check_crs(left_df, right_df):
+        _crs_mismatch_warn(left_df, right_df, stacklevel=4)
+
+    if on_attribute:
+        for attr in on_attribute:
+            if (attr not in left_df) and (attr not in right_df):
+                raise ValueError(
+                    f"Expected column {attr} is missing from both of the dataframes."
+                )
+            if attr not in left_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the left dataframe."
+                )
+            if attr not in right_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the right dataframe."
+                )
+            if attr in (left_df.geometry.name, right_df.geometry.name):
+                raise ValueError(
+                    "Active geometry column cannot be used as an input "
+                    "for on_attribute parameter."
+                )
+
+
+def _geom_predicate_query(left_df, right_df, predicate, distance, on_attribute=None):
+    """Compute geometric comparisons and get matching indices.
+
+    Parameters
+    ----------
+    left_df : GeoDataFrame
+    right_df : GeoDataFrame
+    predicate : string
+        Binary predicate to query.
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+
+
+    Returns
+    -------
+    DataFrame
+        DataFrame with matching indices in
+        columns named `_key_left` and `_key_right`.
+    """
+
+    original_predicate = predicate
+
+    if predicate == "within":
+        # within is implemented as the inverse of contains
+        # contains is a faster predicate
+        # see discussion at https://github.com/geopandas/geopandas/pull/1421
+        predicate = "contains"
+        sindex = left_df.sindex
+        input_geoms = right_df.geometry
+    else:
+        # all other predicates are symmetric
+        # keep them the same
+        sindex = right_df.sindex
+        input_geoms = left_df.geometry
+
+    if sindex:
+        l_idx, r_idx = sindex.query(
+            input_geoms, predicate=predicate, sort=False, distance=distance
+        )
+    else:
+        # when sindex is empty / has no valid geometries
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
+
+    if original_predicate == "within":
+        # within is implemented as the inverse of contains
+        # flip back the results
+        r_idx, l_idx = l_idx, r_idx
+        indexer = np.lexsort((r_idx, l_idx))
+        l_idx = l_idx[indexer]
+        r_idx = r_idx[indexer]
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), _ = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+
+    return l_idx, r_idx
+
+
+def _reset_index_with_suffix(df, suffix, other):
+    """
+    Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
+    column names.
+    """
+    index_original = df.index.names
+    if PANDAS_GE_30:
+        df_reset = df.reset_index()
+    else:
+        # we already made a copy of the dataframe in _frame_join before getting here
+        df_reset = df
+        df_reset.reset_index(inplace=True)
+    column_names = df_reset.columns.to_numpy(copy=True)
+    for i, label in enumerate(index_original):
+        # if the original label was None, add suffix to auto-generated name
+        if label is None:
+            new_label = column_names[i]
+            if "level" in new_label:
+                # reset_index of MultiIndex gives "level_i" names, preserve the "i"
+                lev = new_label.split("_")[1]
+                new_label = f"index_{suffix}{lev}"
+            else:
+                new_label = f"index_{suffix}"
+            # check new label will not be in other dataframe
+            if new_label in df.columns or new_label in other.columns:
+                raise ValueError(
+                    "'{0}' cannot be a column name in the frames being"
+                    " joined".format(new_label)
+                )
+            column_names[i] = new_label
+    return df_reset, pd.Index(column_names)
+
+
+def _process_column_names_with_suffix(
+    left: pd.Index, right: pd.Index, suffixes, left_df, right_df
+):
+    """
+    Add suffixes to overlapping labels (ignoring the geometry column).
+
+    This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
+    a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
+    """
+    to_rename = left.intersection(right)
+    if len(to_rename) == 0:
+        return left, right
+
+    lsuffix, rsuffix = suffixes
+
+    if not lsuffix and not rsuffix:
+        raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
+
+    def renamer(x, suffix, geometry):
+        if x in to_rename and x != geometry and suffix is not None:
+            return f"{x}_{suffix}"
+        return x
+
+    lrenamer = partial(
+        renamer,
+        suffix=lsuffix,
+        geometry=getattr(left_df, "_geometry_column_name", None),
+    )
+    rrenamer = partial(
+        renamer,
+        suffix=rsuffix,
+        geometry=getattr(right_df, "_geometry_column_name", None),
+    )
+
+    # TODO retain index name?
+    left_renamed = pd.Index([lrenamer(lab) for lab in left])
+    right_renamed = pd.Index([rrenamer(lab) for lab in right])
+
+    dups = []
+    if not left_renamed.is_unique:
+        # Only warn when duplicates are caused because of suffixes, already duplicated
+        # columns in origin should not warn
+        dups = left_renamed[(left_renamed.duplicated()) & (~left.duplicated())].tolist()
+    if not right_renamed.is_unique:
+        dups.extend(
+            right_renamed[(right_renamed.duplicated()) & (~right.duplicated())].tolist()
+        )
+    # TODO turn this into an error (pandas has done so as well)
+    if dups:
+        warnings.warn(
+            f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
+            f"result is deprecated and will raise a MergeError in a future version.",
+            FutureWarning,
+            stacklevel=4,
+        )
+
+    return left_renamed, right_renamed
+
+
+def _restore_index(joined, index_names, index_names_original):
+    """
+    Set back the the original index columns, and restoring their name as `None`
+    if they didn't have a name originally.
+    """
+    if PANDAS_GE_30:
+        joined = joined.set_index(list(index_names))
+    else:
+        joined.set_index(list(index_names), inplace=True)
+
+    # restore the fact that the index didn't have a name
+    joined_index_names = list(joined.index.names)
+    for i, label in enumerate(index_names_original):
+        if label is None:
+            joined_index_names[i] = None
+    joined.index.names = joined_index_names
+    return joined
+
+
+def _adjust_indexers(indices, distances, original_length, how, predicate):
+    """
+    The left/right indexers from the query represents an inner join.
+    For a left or right join, we need to adjust them to include the rows
+    that would not be present in an inner join.
+    """
+    # the indices represent an inner join, no adjustment needed
+    if how == "inner":
+        return indices, distances
+
+    l_idx, r_idx = indices
+
+    if how == "right":
+        # re-sort so it is sorted by the right indexer
+        indexer = np.lexsort((l_idx, r_idx))
+        l_idx, r_idx = l_idx[indexer], r_idx[indexer]
+        if distances is not None:
+            distances = distances[indexer]
+
+        # switch order
+        r_idx, l_idx = l_idx, r_idx
+
+    # determine which indices are missing and where they would need to be inserted
+    idx = np.arange(original_length)
+    l_idx_missing = idx[~np.isin(idx, l_idx)]
+    insert_idx = np.searchsorted(l_idx, l_idx_missing)
+    # for the left indexer, insert those missing indices
+    l_idx = np.insert(l_idx, insert_idx, l_idx_missing)
+    # for the right indexer, insert -1 -> to get missing values in pandas' reindexing
+    r_idx = np.insert(r_idx, insert_idx, -1)
+    # for the indices, already insert those missing values manually
+    if distances is not None:
+        distances = np.insert(distances, insert_idx, np.nan)
+
+    if how == "right":
+        # switch back
+        l_idx, r_idx = r_idx, l_idx
+
+    return (l_idx, r_idx), distances
+
+
+def _frame_join(
+    left_df,
+    right_df,
+    indices,
+    distances,
+    how,
+    lsuffix,
+    rsuffix,
+    predicate,
+    on_attribute=None,
+):
+    """Join the GeoDataFrames at the DataFrame level.
+
+    Parameters
+    ----------
+    left_df : GeoDataFrame
+    right_df : GeoDataFrame
+    indices : tuple of ndarray
+        Indices returned by the geometric join. Tuple with with integer
+        indices representing the matches from `left_df` and `right_df`
+        respectively.
+    distances : ndarray, optional
+        Passed trough and adapted based on the indices, if needed.
+    how : string
+        The type of join to use on the DataFrame level.
+    lsuffix : string
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+
+
+    Returns
+    -------
+    GeoDataFrame
+        Joined GeoDataFrame.
+    """
+    if on_attribute:  # avoid renaming or duplicating shared column
+        right_df = right_df.drop(on_attribute, axis=1)
+
+    if how in ("inner", "left"):
+        right_df = right_df.drop(right_df.geometry.name, axis=1)
+    else:  # how == 'right':
+        left_df = left_df.drop(left_df.geometry.name, axis=1)
+
+    left_df = left_df.copy(deep=False)
+    left_nlevels = left_df.index.nlevels
+    left_index_original = left_df.index.names
+    left_df, left_column_names = _reset_index_with_suffix(left_df, lsuffix, right_df)
+
+    right_df = right_df.copy(deep=False)
+    right_nlevels = right_df.index.nlevels
+    right_index_original = right_df.index.names
+    right_df, right_column_names = _reset_index_with_suffix(right_df, rsuffix, left_df)
+
+    # if conflicting names in left and right, add suffix
+    left_column_names, right_column_names = _process_column_names_with_suffix(
+        left_column_names,
+        right_column_names,
+        (lsuffix, rsuffix),
+        left_df,
+        right_df,
+    )
+    left_df.columns = left_column_names
+    right_df.columns = right_column_names
+    left_index = left_df.columns[:left_nlevels]
+    right_index = right_df.columns[:right_nlevels]
+
+    # perform join on the dataframes
+    original_length = len(right_df) if how == "right" else len(left_df)
+    (l_idx, r_idx), distances = _adjust_indexers(
+        indices, distances, original_length, how, predicate
+    )
+    # the `take` method doesn't allow introducing NaNs with -1 indices
+    # left = left_df.take(l_idx)
+    # therefore we are using the private _reindex_with_indexers as workaround
+    new_index = pd.RangeIndex(len(l_idx))
+    left = left_df._reindex_with_indexers({0: (new_index, l_idx)})
+    right = right_df._reindex_with_indexers({0: (new_index, r_idx)})
+    if PANDAS_GE_30:
+        kwargs = {}
+    else:
+        kwargs = dict(copy=False)
+    joined = pd.concat([left, right], axis=1, **kwargs)
+
+    if how in ("inner", "left"):
+        joined = _restore_index(joined, left_index, left_index_original)
+    else:  # how == 'right':
+        joined = joined.set_geometry(right_df.geometry.name)
+        joined = _restore_index(joined, right_index, right_index_original)
+
+    return joined, distances
+
+
+def _nearest_query(
+    left_df: GeoDataFrame,
+    right_df: GeoDataFrame,
+    max_distance: float,
+    how: str,
+    return_distance: bool,
+    exclusive: bool,
+    on_attribute: Optional[list] = None,
+):
+    # use the opposite of the join direction for the index
+    use_left_as_sindex = how == "right"
+    if use_left_as_sindex:
+        sindex = left_df.sindex
+        query = right_df.geometry
+    else:
+        sindex = right_df.sindex
+        query = left_df.geometry
+    if sindex:
+        res = sindex.nearest(
+            query,
+            return_all=True,
+            max_distance=max_distance,
+            return_distance=return_distance,
+            exclusive=exclusive,
+        )
+        if return_distance:
+            (input_idx, tree_idx), distances = res
+        else:
+            (input_idx, tree_idx) = res
+            distances = None
+        if use_left_as_sindex:
+            l_idx, r_idx = tree_idx, input_idx
+            sort_order = np.argsort(l_idx, kind="stable")
+            l_idx, r_idx = l_idx[sort_order], r_idx[sort_order]
+            if distances is not None:
+                distances = distances[sort_order]
+        else:
+            l_idx, r_idx = input_idx, tree_idx
+    else:
+        # when sindex is empty / has no valid geometries
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
+        if return_distance:
+            distances = np.array([], dtype=np.float64)
+        else:
+            distances = None
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), shared_attribute_rows = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+            distances = distances[shared_attribute_rows]
+
+    return (l_idx, r_idx), distances
+
+
+def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
+    """
+    Returns the indices for the left and right dataframe that share the same entry
+    in the attribute column. Also returns a Boolean `shared_attribute_rows` for rows
+    with the same entry.
+    """
+    shared_attribute_rows = (
+        left_df[attribute].iloc[l_idx].values == right_df[attribute].iloc[r_idx].values
+    )
+
+    l_idx = l_idx[shared_attribute_rows]
+    r_idx = r_idx[shared_attribute_rows]
+    return (l_idx, r_idx), shared_attribute_rows
+
+
+def sjoin_nearest(
+    left_df: GeoDataFrame,
+    right_df: GeoDataFrame,
+    how: str = "inner",
+    max_distance: Optional[float] = None,
+    lsuffix: str = "left",
+    rsuffix: str = "right",
+    distance_col: Optional[str] = None,
+    exclusive: bool = False,
+) -> GeoDataFrame:
+    """Spatial join of two GeoDataFrames based on the distance between their geometries.
+
+    Results will include multiple output records for a single input record
+    where there are multiple equidistant nearest or intersected neighbors.
+
+    Distance is calculated in CRS units and can be returned using the
+    `distance_col` parameter.
+
+    See the User Guide page
+    https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html
+    for more details.
+
+
+    Parameters
+    ----------
+    left_df, right_df : GeoDataFrames
+    how : string, default 'inner'
+        The type of join:
+
+        * 'left': use keys from left_df; retain only left_df geometry column
+        * 'right': use keys from right_df; retain only right_df geometry column
+        * 'inner': use intersection of keys from both dfs; retain only
+          left_df geometry column
+    max_distance : float, default None
+        Maximum distance within which to query for nearest geometry.
+        Must be greater than 0.
+        The max_distance used to search for nearest items in the tree may have a
+        significant impact on performance by reducing the number of input
+        geometries that are evaluated for nearest items in the tree.
+    lsuffix : string, default 'left'
+        Suffix to apply to overlapping column names (left GeoDataFrame).
+    rsuffix : string, default 'right'
+        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance_col : string, default None
+        If set, save the distances computed between matching geometries under a
+        column of this name in the joined GeoDataFrame.
+    exclusive : bool, default False
+        If True, the nearest geometries that are equal to the input geometry
+        will not be returned, default False.
+
+    Examples
+    --------
+    >>> import geodatasets
+    >>> groceries = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.groceries")
+    ... )
+    >>> chicago = geopandas.read_file(
+    ...     geodatasets.get_path("geoda.chicago_health")
+    ... ).to_crs(groceries.crs)
+
+    >>> chicago.head()  # doctest: +SKIP
+       ComAreaID  ...                                           geometry
+    0         35  ...  POLYGON ((-87.60914 41.84469, -87.60915 41.844...
+    1         36  ...  POLYGON ((-87.59215 41.81693, -87.59231 41.816...
+    2         37  ...  POLYGON ((-87.62880 41.80189, -87.62879 41.801...
+    3         38  ...  POLYGON ((-87.60671 41.81681, -87.60670 41.816...
+    4         39  ...  POLYGON ((-87.59215 41.81693, -87.59215 41.816...
+    [5 rows x 87 columns]
+
+    >>> groceries.head()  # doctest: +SKIP
+       OBJECTID     Ycoord  ...  Category                           geometry
+    0        16  41.973266  ...       NaN  MULTIPOINT ((-87.65661 41.97321))
+    1        18  41.696367  ...       NaN  MULTIPOINT ((-87.68136 41.69713))
+    2        22  41.868634  ...       NaN  MULTIPOINT ((-87.63918 41.86847))
+    3        23  41.877590  ...       new  MULTIPOINT ((-87.65495 41.87783))
+    4        27  41.737696  ...       NaN  MULTIPOINT ((-87.62715 41.73623))
+    [5 rows x 8 columns]
+
+    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago)
+    >>> groceries_w_communities[["Chain", "community", "geometry"]].head(2)
+                   Chain    community                                geometry
+    0     VIET HOA PLAZA       UPTOWN   MULTIPOINT ((1168268.672 1933554.35))
+    1  COUNTY FAIR FOODS  MORGAN PARK  MULTIPOINT ((1162302.618 1832900.224))
+
+
+    To include the distances:
+
+    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago, \
+distance_col="distances")
+    >>> groceries_w_communities[["Chain", "community", \
+"distances"]].head(2)
+                   Chain    community  distances
+    0     VIET HOA PLAZA       UPTOWN        0.0
+    1  COUNTY FAIR FOODS  MORGAN PARK        0.0
+
+    In the following example, we get multiple groceries for Uptown because all
+    results are equidistant (in this case zero because they intersect).
+    In fact, we get 4 results in total:
+
+    >>> chicago_w_groceries = geopandas.sjoin_nearest(groceries, chicago, \
+distance_col="distances", how="right")
+    >>> uptown_results = \
+chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
+    >>> uptown_results[["Chain", "community"]]
+                Chain community
+    30  VIET HOA PLAZA    UPTOWN
+    30      JEWEL OSCO    UPTOWN
+    30          TARGET    UPTOWN
+    30       Mariano's    UPTOWN
+
+    See also
+    --------
+    sjoin : binary predicate joins
+    GeoDataFrame.sjoin_nearest : equivalent method
+
+    Notes
+    -----
+    Since this join relies on distances, results will be inaccurate
+    if your geometries are in a geographic CRS.
+
+    Every operation in GeoPandas is planar, i.e. the potential third
+    dimension is not taken into account.
+    """
+
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix)
+
+    left_df.geometry.values.check_geographic_crs(stacklevel=1)
+    right_df.geometry.values.check_geographic_crs(stacklevel=1)
+
+    return_distance = distance_col is not None
+
+    indices, distances = _nearest_query(
+        left_df,
+        right_df,
+        max_distance,
+        how,
+        return_distance,
+        exclusive,
+    )
+    joined, distances = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        distances,
+        how,
+        lsuffix,
+        rsuffix,
+        None,
+    )
+
+    if return_distance:
+        joined[distance_col] = distances
+
+    return joined
@@ -0,0 +1,484 @@
+"""Tests for the clip module."""
+
+import numpy as np
+import pandas as pd
+
+import shapely
+from shapely.geometry import (
+    GeometryCollection,
+    LinearRing,
+    LineString,
+    MultiPoint,
+    Point,
+    Polygon,
+    box,
+)
+
+import geopandas
+from geopandas import GeoDataFrame, GeoSeries, clip
+from geopandas._compat import HAS_PYPROJ
+from geopandas.tools.clip import _mask_is_list_like_rectangle
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_index_equal
+
+mask_variants_single_rectangle = [
+    "single_rectangle_gdf",
+    "single_rectangle_gdf_list_bounds",
+    "single_rectangle_gdf_tuple_bounds",
+    "single_rectangle_gdf_array_bounds",
+]
+mask_variants_large_rectangle = [
+    "larger_single_rectangle_gdf",
+    "larger_single_rectangle_gdf_bounds",
+]
+
+
+@pytest.fixture
+def point_gdf():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[2, 2], [3, 4], [9, 8], [-12, -15]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def point_gdf2():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def pointsoutside_nooverlap_gdf():
+    """Create a point GeoDataFrame. Its points are all outside the single
+    rectangle, and its bounds are outside the single rectangle's."""
+    pts = np.array([[5, 15], [15, 15], [15, 20]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def pointsoutside_overlap_gdf():
+    """Create a point GeoDataFrame. Its points are all outside the single
+    rectangle, and its bounds are overlapping the single rectangle's."""
+    pts = np.array([[5, 15], [15, 15], [15, 5]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def single_rectangle_gdf():
+    """Create a single rectangle for clipping."""
+    poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
+    gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
+    gdf["attr2"] = "site-boundary"
+    return gdf
+
+
+@pytest.fixture
+def single_rectangle_gdf_tuple_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return tuple(single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def single_rectangle_gdf_list_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return list(single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def single_rectangle_gdf_array_bounds(single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return single_rectangle_gdf.total_bounds
+
+
+@pytest.fixture
+def larger_single_rectangle_gdf():
+    """Create a slightly larger rectangle for clipping.
+    The smaller single rectangle is used to test the edge case where slivers
+    are returned when you clip polygons. This fixture is larger which
+    eliminates the slivers in the clip return.
+    """
+    poly_inters = Polygon([(-5, -5), (-5, 15), (15, 15), (15, -5), (-5, -5)])
+    gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857")
+    gdf["attr2"] = ["study area"]
+    return gdf
+
+
+@pytest.fixture
+def larger_single_rectangle_gdf_bounds(larger_single_rectangle_gdf):
+    """Bounds of the created single rectangle"""
+    return tuple(larger_single_rectangle_gdf.total_bounds)
+
+
+@pytest.fixture
+def buffered_locations(point_gdf):
+    """Buffer points to create a multi-polygon."""
+    buffered_locs = point_gdf
+    buffered_locs["geometry"] = buffered_locs.buffer(4)
+    buffered_locs["type"] = "plot"
+    return buffered_locs
+
+
+@pytest.fixture
+def donut_geometry(buffered_locations, single_rectangle_gdf):
+    """Make a geometry with a hole in the middle (a donut)."""
+    donut = geopandas.overlay(
+        buffered_locations, single_rectangle_gdf, how="symmetric_difference"
+    )
+    return donut
+
+
+@pytest.fixture
+def two_line_gdf():
+    """Create Line Objects For Testing"""
+    linea = LineString([(1, 1), (2, 2), (3, 2), (5, 3)])
+    lineb = LineString([(3, 4), (5, 7), (12, 2), (10, 5), (9, 7.5)])
+    gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def multi_poly_gdf(donut_geometry):
+    """Create a multi-polygon GeoDataFrame."""
+    multi_poly = donut_geometry.union_all()
+    out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
+    out_df["attr"] = ["pool"]
+    return out_df
+
+
+@pytest.fixture
+def multi_line(two_line_gdf):
+    """Create a multi-line GeoDataFrame.
+    This GDF has one multiline and one regular line."""
+    # Create a single and multi line object
+    multiline_feat = two_line_gdf.union_all()
+    linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
+    out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
+    out_df["attr"] = ["road", "stream"]
+    return out_df
+
+
+@pytest.fixture
+def multi_point(point_gdf):
+    """Create a multi-point GeoDataFrame."""
+    multi_point = point_gdf.union_all()
+    out_df = GeoDataFrame(
+        geometry=GeoSeries(
+            [multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
+        ),
+        crs="EPSG:3857",
+    )
+    out_df["attr"] = ["tree", "another tree", "shrub", "berries"]
+    return out_df
+
+
+@pytest.fixture
+def mixed_gdf():
+    """Create a Mixed Polygon and LineString For Testing"""
+    point = Point(2, 3)
+    line = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
+    ring = LinearRing([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    gdf = GeoDataFrame(
+        [1, 2, 3, 4], geometry=[point, poly, line, ring], crs="EPSG:3857"
+    )
+    return gdf
+
+
+@pytest.fixture
+def geomcol_gdf():
+    """Create a Mixed Polygon and LineString For Testing"""
+    point = Point(2, 3)
+    poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
+    coll = GeometryCollection([point, poly])
+    gdf = GeoDataFrame([1], geometry=[coll], crs="EPSG:3857")
+    return gdf
+
+
+@pytest.fixture
+def sliver_line():
+    """Create a line that will create a point when clipped."""
+    linea = LineString([(10, 5), (13, 5), (15, 5)])
+    lineb = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
+    gdf = GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:3857")
+    return gdf
+
+
+def test_not_gdf(single_rectangle_gdf):
+    """Non-GeoDataFrame inputs raise attribute errors."""
+    with pytest.raises(TypeError):
+        clip((2, 3), single_rectangle_gdf)
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, "foobar")
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, (1, 2, 3))
+    with pytest.raises(TypeError):
+        clip(single_rectangle_gdf, (1, 2, 3, 4, 5))
+
+
+def test_non_overlapping_geoms():
+    """Test that a bounding box returns empty if the extents don't overlap"""
+    unit_box = Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)])
+    unit_gdf = GeoDataFrame([1], geometry=[unit_box], crs="EPSG:3857")
+    non_overlapping_gdf = unit_gdf.copy()
+    non_overlapping_gdf = non_overlapping_gdf.geometry.apply(
+        lambda x: shapely.affinity.translate(x, xoff=20)
+    )
+    out = clip(unit_gdf, non_overlapping_gdf)
+    assert_geodataframe_equal(out, unit_gdf.iloc[:0])
+    out2 = clip(unit_gdf.geometry, non_overlapping_gdf)
+    assert_geoseries_equal(out2, GeoSeries(crs=unit_gdf.crs))
+
+
+@pytest.mark.parametrize("mask_fixture_name", mask_variants_single_rectangle)
+class TestClipWithSingleRectangleGdf:
+    @pytest.fixture
+    def mask(self, mask_fixture_name, request):
+        return request.getfixturevalue(mask_fixture_name)
+
+    def test_returns_gdf(self, point_gdf, mask):
+        """Test that function returns a GeoDataFrame (or GDF-like) object."""
+        out = clip(point_gdf, mask)
+        assert isinstance(out, GeoDataFrame)
+
+    def test_returns_series(self, point_gdf, mask):
+        """Test that function returns a GeoSeries if GeoSeries is passed."""
+        out = clip(point_gdf.geometry, mask)
+        assert isinstance(out, GeoSeries)
+
+    def test_clip_points(self, point_gdf, mask):
+        """Test clipping a points GDF with a generic polygon geometry."""
+        clip_pts = clip(point_gdf, mask)
+        pts = np.array([[2, 2], [3, 4], [9, 8]])
+        exp = GeoDataFrame(
+            [Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857"
+        )
+        assert_geodataframe_equal(clip_pts, exp)
+
+    def test_clip_points_geom_col_rename(self, point_gdf, mask):
+        """Test clipping a points GDF with a generic polygon geometry."""
+        point_gdf_geom_col_rename = point_gdf.rename_geometry("geometry2")
+        clip_pts = clip(point_gdf_geom_col_rename, mask)
+        pts = np.array([[2, 2], [3, 4], [9, 8]])
+        exp = GeoDataFrame(
+            [Point(xy) for xy in pts],
+            columns=["geometry2"],
+            crs="EPSG:3857",
+            geometry="geometry2",
+        )
+        assert_geodataframe_equal(clip_pts, exp)
+
+    def test_clip_poly(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+        clipped_poly = clip(buffered_locations, mask)
+        assert len(clipped_poly.geometry) == 3
+        assert all(clipped_poly.geom_type == "Polygon")
+
+    def test_clip_poly_geom_col_rename(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+
+        poly_gdf_geom_col_rename = buffered_locations.rename_geometry("geometry2")
+        clipped_poly = clip(poly_gdf_geom_col_rename, mask)
+        assert len(clipped_poly.geometry) == 3
+        assert "geometry" not in clipped_poly.keys()
+        assert "geometry2" in clipped_poly.keys()
+
+    def test_clip_poly_series(self, buffered_locations, mask):
+        """Test clipping a polygon GDF with a generic polygon geometry."""
+        clipped_poly = clip(buffered_locations.geometry, mask)
+        assert len(clipped_poly) == 3
+        assert all(clipped_poly.geom_type == "Polygon")
+
+    def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask):
+        """Test a multi poly object where the return includes a sliver.
+        Also the bounds of the object should == the bounds of the clip object
+        if they fully overlap (as they do in these fixtures)."""
+        clipped = clip(multi_poly_gdf, mask, keep_geom_type=True)
+        expected_bounds = (
+            mask if _mask_is_list_like_rectangle(mask) else mask.total_bounds
+        )
+        assert np.array_equal(clipped.total_bounds, expected_bounds)
+        # Assert returned data is a not geometry collection
+        assert (clipped.geom_type.isin(["Polygon", "MultiPolygon"])).all()
+
+    def test_clip_multiline(self, multi_line, mask):
+        """Test that clipping a multiline feature with a poly returns expected
+        output."""
+        clipped = clip(multi_line, mask)
+        assert clipped.geom_type[0] == "MultiLineString"
+
+    def test_clip_multipoint(self, multi_point, mask):
+        """Clipping a multipoint feature with a polygon works as expected.
+        should return a geodataframe with a single multi point feature"""
+        clipped = clip(multi_point, mask)
+        assert clipped.geom_type[0] == "MultiPoint"
+        assert hasattr(clipped, "attr")
+        # All points should intersect the clip geom
+        assert len(clipped) == 2
+        clipped_mutltipoint = MultiPoint(
+            [
+                Point(2, 2),
+                Point(3, 4),
+                Point(9, 8),
+            ]
+        )
+        assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
+        shape_for_points = (
+            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
+        )
+        assert all(clipped.intersects(shape_for_points))
+
+    def test_clip_lines(self, two_line_gdf, mask):
+        """Test what happens when you give the clip_extent a line GDF."""
+        clip_line = clip(two_line_gdf, mask)
+        assert len(clip_line.geometry) == 2
+
+    def test_mixed_geom(self, mixed_gdf, mask):
+        """Test clipping a mixed GeoDataFrame"""
+        clipped = clip(mixed_gdf, mask)
+        assert (
+            clipped.geom_type[0] == "Point"
+            and clipped.geom_type[1] == "Polygon"
+            and clipped.geom_type[2] == "LineString"
+        )
+
+    def test_mixed_series(self, mixed_gdf, mask):
+        """Test clipping a mixed GeoSeries"""
+        clipped = clip(mixed_gdf.geometry, mask)
+        assert (
+            clipped.geom_type[0] == "Point"
+            and clipped.geom_type[1] == "Polygon"
+            and clipped.geom_type[2] == "LineString"
+        )
+
+    def test_clip_with_line_extra_geom(self, sliver_line, mask):
+        """When the output of a clipped line returns a geom collection,
+        and keep_geom_type is True, no geometry collections should be returned."""
+        clipped = clip(sliver_line, mask, keep_geom_type=True)
+        assert len(clipped.geometry) == 1
+        # Assert returned data is a not geometry collection
+        assert not (clipped.geom_type == "GeometryCollection").any()
+
+    def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask):
+        """Test clip when intersection is empty and boxes do not overlap."""
+        clipped = clip(pointsoutside_nooverlap_gdf, mask)
+        assert len(clipped) == 0
+
+    def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask):
+        """Test clip when intersection is empty and boxes do overlap."""
+        clipped = clip(pointsoutside_overlap_gdf, mask)
+        assert len(clipped) == 0
+
+    def test_warning_extra_geoms_mixed(self, mixed_gdf, mask):
+        """Test the correct warnings are raised if keep_geom_type is
+        called on a mixed GDF"""
+        with pytest.warns(UserWarning):
+            clip(mixed_gdf, mask, keep_geom_type=True)
+
+    def test_warning_geomcoll(self, geomcol_gdf, mask):
+        """Test the correct warnings are raised if keep_geom_type is
+        called on a GDF with GeometryCollection"""
+        with pytest.warns(UserWarning):
+            clip(geomcol_gdf, mask, keep_geom_type=True)
+
+
+def test_clip_line_keep_slivers(sliver_line, single_rectangle_gdf):
+    """Test the correct output if a point is returned
+    from a line only geometry type."""
+    clipped = clip(sliver_line, single_rectangle_gdf)
+    # Assert returned data is a geometry collection given sliver geoms
+    assert "Point" == clipped.geom_type[0]
+    assert "LineString" == clipped.geom_type[1]
+
+
+def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
+    """Test a multi poly object where the return includes a sliver.
+    Also the bounds of the object should == the bounds of the clip object
+    if they fully overlap (as they do in these fixtures)."""
+    clipped = clip(multi_poly_gdf, single_rectangle_gdf)
+    assert np.array_equal(clipped.total_bounds, single_rectangle_gdf.total_bounds)
+    # Assert returned data is a geometry collection given sliver geoms
+    assert "GeometryCollection" in clipped.geom_type[0]
+
+
+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
+def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
+    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
+        clip(point_gdf, single_rectangle_gdf.to_crs(4326))
+
+
+def test_clip_with_polygon(single_rectangle_gdf):
+    """Test clip when using a shapely object"""
+    polygon = Polygon([(0, 0), (5, 12), (10, 0), (0, 0)])
+    clipped = clip(single_rectangle_gdf, polygon)
+    exp_poly = polygon.intersection(
+        Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
+    )
+    exp = GeoDataFrame([1], geometry=[exp_poly], crs="EPSG:3857")
+    exp["attr2"] = "site-boundary"
+    assert_geodataframe_equal(clipped, exp)
+
+
+def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf):
+    """Test clipping a polygon with a multipolygon."""
+    multi = buffered_locations.dissolve(by="type").reset_index()
+    clipped = clip(single_rectangle_gdf, multi)
+    assert clipped.geom_type[0] == "Polygon"
+
+
+@pytest.mark.parametrize(
+    "mask_fixture_name",
+    mask_variants_large_rectangle,
+)
+def test_clip_single_multipoly_no_extra_geoms(
+    buffered_locations, mask_fixture_name, request
+):
+    """When clipping a multi-polygon feature, no additional geom types
+    should be returned."""
+    masks = request.getfixturevalue(mask_fixture_name)
+    multi = buffered_locations.dissolve(by="type").reset_index()
+    clipped = clip(multi, masks)
+    assert clipped.geom_type[0] == "Polygon"
+
+
+@pytest.mark.filterwarnings("ignore:All-NaN slice encountered")
+@pytest.mark.parametrize(
+    "mask",
+    [
+        Polygon(),
+        (np.nan,) * 4,
+        (np.nan, 0, np.nan, 1),
+        GeoSeries([Polygon(), Polygon()], crs="EPSG:3857"),
+        GeoSeries([Polygon(), Polygon()], crs="EPSG:3857").to_frame(),
+        GeoSeries([], crs="EPSG:3857"),
+        GeoSeries([], crs="EPSG:3857").to_frame(),
+    ],
+)
+def test_clip_empty_mask(buffered_locations, mask):
+    """Test that clipping with empty mask returns an empty result."""
+    clipped = clip(buffered_locations, mask)
+    assert_geodataframe_equal(
+        clipped,
+        GeoDataFrame([], columns=["geometry", "type"], crs="EPSG:3857"),
+        check_index_type=False,
+    )
+    clipped = clip(buffered_locations.geometry, mask)
+    assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
+
+
+def test_clip_sorting(point_gdf2):
+    """Test the sorting kwarg in clip"""
+    bbox = shapely.geometry.box(0, 0, 2, 2)
+    unsorted_clipped_gdf = point_gdf2.clip(bbox)
+    sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
+
+    expected_sorted_index = pd.Index([1, 3, 5])
+
+    assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
+    assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
+    assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
@@ -0,0 +1,76 @@
+import numpy as np
+
+from shapely.geometry import Point
+from shapely.wkt import loads
+
+import geopandas
+
+import pytest
+from pandas.testing import assert_series_equal
+
+
+def test_hilbert_distance():
+    # test the actual Hilbert Code algorithm against some hardcoded values
+    geoms = geopandas.GeoSeries.from_wkt(
+        [
+            "POINT (0 0)",
+            "POINT (1 1)",
+            "POINT (1 0)",
+            "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+        ]
+    )
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=2)
+    assert result.tolist() == [0, 10, 15, 2]
+
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=3)
+    assert result.tolist() == [0, 42, 63, 10]
+
+    result = geoms.hilbert_distance(total_bounds=(0, 0, 1, 1), level=16)
+    assert result.tolist() == [0, 2863311530, 4294967295, 715827882]
+
+
+@pytest.fixture
+def geoseries_points():
+    p1 = Point(1, 2)
+    p2 = Point(2, 3)
+    p3 = Point(3, 4)
+    p4 = Point(4, 1)
+    return geopandas.GeoSeries([p1, p2, p3, p4])
+
+
+def test_hilbert_distance_level(geoseries_points):
+    with pytest.raises(ValueError):
+        geoseries_points.hilbert_distance(level=20)
+
+
+def test_specified_total_bounds(geoseries_points):
+    result = geoseries_points.hilbert_distance(
+        total_bounds=geoseries_points.total_bounds
+    )
+    expected = geoseries_points.hilbert_distance()
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "empty",
+    [
+        None,
+        loads("POLYGON EMPTY"),
+    ],
+)
+def test_empty(geoseries_points, empty):
+    s = geoseries_points
+    s.iloc[-1] = empty
+    with pytest.raises(
+        ValueError, match="cannot be computed on a GeoSeries with empty"
+    ):
+        s.hilbert_distance()
+
+
+def test_zero_width():
+    # special case of all points on the same line -> avoid warnings because
+    # of division by 0 and introducing NaN
+    s = geopandas.GeoSeries([Point(0, 0), Point(0, 2), Point(0, 1)])
+    with np.errstate(all="raise"):
+        result = s.hilbert_distance()
+    assert np.array(result).argsort().tolist() == [0, 2, 1]
@@ -0,0 +1,67 @@
+import numpy
+
+import geopandas
+from geopandas.tools._random import uniform
+
+import pytest
+
+
+@pytest.fixture
+def multipolygons(nybb_filename):
+    return geopandas.read_file(nybb_filename).geometry
+
+
+@pytest.fixture
+def polygons(multipolygons):
+    return multipolygons.explode(ignore_index=True).geometry
+
+
+@pytest.fixture
+def multilinestrings(multipolygons):
+    return multipolygons.boundary
+
+
+@pytest.fixture
+def linestrings(polygons):
+    return polygons.boundary
+
+
+@pytest.fixture
+def points(multipolygons):
+    return multipolygons.centroid
+
+
+@pytest.mark.parametrize("size", [10, 100])
+@pytest.mark.parametrize(
+    "geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
+)
+def test_uniform(geom_fixture, size, request):
+    geom = request.getfixturevalue(geom_fixture)[0]
+    sample = uniform(geom, size=size, rng=1)
+    sample_series = (
+        geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
+    )
+    assert len(sample_series) == size
+    sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
+        geom, predicate="intersects"
+    )
+    assert len(sample_in_geom) == size
+
+
+def test_uniform_unsupported(points):
+    with pytest.warns(UserWarning, match="Sampling is not supported"):
+        sample = uniform(points[0], size=10, rng=1)
+    assert sample.is_empty
+
+
+def test_uniform_generator(polygons):
+    sample = uniform(polygons[0], size=10, rng=1)
+    sample2 = uniform(polygons[0], size=10, rng=1)
+    assert sample.equals(sample2)
+
+    generator = numpy.random.default_rng(seed=1)
+    gen_sample = uniform(polygons[0], size=10, rng=generator)
+    gen_sample2 = uniform(polygons[0], size=10, rng=generator)
+
+    assert sample.equals(gen_sample)
+    assert not sample.equals(gen_sample2)
@@ -0,0 +1,51 @@
+from shapely.geometry import LineString, MultiPoint, Point
+
+from geopandas import GeoSeries
+from geopandas.tools import collect
+
+import pytest
+
+
+class TestTools:
+    def setup_method(self):
+        self.p1 = Point(0, 0)
+        self.p2 = Point(1, 1)
+        self.p3 = Point(2, 2)
+        self.mpc = MultiPoint([self.p1, self.p2, self.p3])
+
+        self.mp1 = MultiPoint([self.p1, self.p2])
+        self.line1 = LineString([(3, 3), (4, 4)])
+
+    def test_collect_single(self):
+        result = collect(self.p1)
+        assert self.p1.equals(result)
+
+    def test_collect_single_force_multi(self):
+        result = collect(self.p1, multi=True)
+        expected = MultiPoint([self.p1])
+        assert expected.equals(result)
+
+    def test_collect_multi(self):
+        result = collect(self.mp1)
+        assert self.mp1.equals(result)
+
+    def test_collect_multi_force_multi(self):
+        result = collect(self.mp1)
+        assert self.mp1.equals(result)
+
+    def test_collect_list(self):
+        result = collect([self.p1, self.p2, self.p3])
+        assert self.mpc.equals(result)
+
+    def test_collect_GeoSeries(self):
+        s = GeoSeries([self.p1, self.p2, self.p3])
+        result = collect(s)
+        assert self.mpc.equals(result)
+
+    def test_collect_mixed_types(self):
+        with pytest.raises(ValueError):
+            collect([self.p1, self.line1])
+
+    def test_collect_mixed_multi(self):
+        with pytest.raises(ValueError):
+            collect([self.mpc, self.mp1])
@@ -0,0 +1,45 @@
+import pandas as pd
+
+from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon
+from shapely.geometry.base import BaseGeometry
+
+_multi_type_map = {
+    "Point": MultiPoint,
+    "LineString": MultiLineString,
+    "Polygon": MultiPolygon,
+}
+
+
+def collect(x, multi=False):
+    """
+    Collect single part geometries into their Multi* counterpart
+
+    Parameters
+    ----------
+    x : an iterable or Series of Shapely geometries, a GeoSeries, or
+        a single Shapely geometry
+    multi : boolean, default False
+        if True, force returned geometries to be Multi* even if they
+        only have one component.
+
+    """
+    if isinstance(x, BaseGeometry):
+        x = [x]
+    elif isinstance(x, pd.Series):
+        x = list(x)
+
+    # We cannot create GeometryCollection here so all types
+    # must be the same. If there is more than one element,
+    # they cannot be Multi*, i.e., can't pass in combination of
+    # Point and MultiPoint... or even just MultiPoint
+    t = x[0].geom_type
+    if not all(g.geom_type == t for g in x):
+        raise ValueError("Geometry type must be homogeneous")
+    if len(x) > 1 and t.startswith("Multi"):
+        raise ValueError("Cannot collect {0}. Must have single geometries".format(t))
+
+    if len(x) == 1 and (t.startswith("Multi") or not multi):
+        # If there's only one single part geom and we're not forcing to
+        # multi, then just return it
+        return x[0]
+    return _multi_type_map[t](x)