refactor: excel parse

2026-04-16 10:01:11 +08:00
parent 680ecc320f
commit f62f95ec02
7941 changed files with 2899112 additions and 0 deletions
@@ -0,0 +1,614 @@
+import json
+from packaging.version import Version
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from numpy.typing import NDArray
+
+import shapely
+from shapely import GeometryType
+
+from geopandas import GeoDataFrame
+from geopandas._compat import SHAPELY_GE_204
+from geopandas.array import from_shapely, from_wkb
+
+GEOARROW_ENCODINGS = [
+    "point",
+    "linestring",
+    "polygon",
+    "multipoint",
+    "multilinestring",
+    "multipolygon",
+]
+
+
+## GeoPandas -> GeoArrow
+
+
+class ArrowTable:
+    """
+    Wrapper class for Arrow data.
+
+    This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
+    ``__arrow_c_stream__`` method). This object can then be consumed by
+    your Arrow implementation of choice that supports this protocol.
+
+    .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+    Example
+    -------
+    >>> import pyarrow as pa
+    >>> pa.table(gdf.to_arrow())  # doctest: +SKIP
+
+    """
+
+    def __init__(self, pa_table):
+        self._pa_table = pa_table
+
+    def __arrow_c_stream__(self, requested_schema=None):
+        return self._pa_table.__arrow_c_stream__(requested_schema=requested_schema)
+
+
+class GeoArrowArray:
+    """
+    Wrapper class for a geometry array as Arrow data.
+
+    This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
+    ``__arrow_c_array/stream__`` method). This object can then be consumed by
+    your Arrow implementation of choice that supports this protocol.
+
+    .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+    Example
+    -------
+    >>> import pyarrow as pa
+    >>> pa.array(ser.to_arrow())  # doctest: +SKIP
+
+    """
+
+    def __init__(self, pa_field, pa_array):
+        self._pa_array = pa_array
+        self._pa_field = pa_field
+
+    def __arrow_c_array__(self, requested_schema=None):
+        if requested_schema is not None:
+            raise NotImplementedError(
+                "Requested schema is not supported for geometry arrays"
+            )
+        return (
+            self._pa_field.__arrow_c_schema__(),
+            self._pa_array.__arrow_c_array__()[1],
+        )
+
+
+def geopandas_to_arrow(
+    df,
+    index=None,
+    geometry_encoding="WKB",
+    interleaved=True,
+    include_z=None,
+):
+    """
+    Convert GeoDataFrame to a pyarrow.Table.
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+        The GeoDataFrame to convert.
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+        The GeoArrow encoding to use for the data conversion.
+    interleaved : bool, default True
+        Only relevant for 'geoarrow' encoding. If True, the geometries'
+        coordinates are interleaved in a single fixed size list array.
+        If False, the coordinates are stored as separate arrays in a
+        struct type.
+    include_z : bool, default None
+        Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+        of the individial geometries is preserved).
+        If False, return 2D geometries. If True, include the third dimension
+        in the output (if a geometry has no third dimension, the z-coordinates
+        will be NaN). By default, will infer the dimensionality from the
+        input geometries. Note that this inference can be unreliable with
+        empty geometries (for a guaranteed result, it is recommended to
+        specify the keyword).
+
+    """
+    mask = df.dtypes == "geometry"
+    geometry_columns = df.columns[mask]
+    geometry_indices = np.asarray(mask).nonzero()[0]
+
+    df_attr = pd.DataFrame(df.copy(deep=False))
+
+    # replace geometry columns with dummy values -> will get converted to
+    # Arrow null column (not holding any memory), so we can afterwards
+    # fill the resulting table with the correct geometry fields
+    for col in geometry_columns:
+        df_attr[col] = None
+
+    table = pa.Table.from_pandas(df_attr, preserve_index=index)
+
+    geometry_encoding_dict = {}
+
+    if geometry_encoding.lower() == "geoarrow":
+        if Version(pa.__version__) < Version("10.0.0"):
+            raise ValueError("Converting to 'geoarrow' requires pyarrow >= 10.0.")
+
+        # Encode all geometry columns to GeoArrow
+        for i, col in zip(geometry_indices, geometry_columns):
+            field, geom_arr = construct_geometry_array(
+                np.array(df[col].array),
+                include_z=include_z,
+                field_name=col,
+                crs=df[col].crs,
+                interleaved=interleaved,
+            )
+            table = table.set_column(i, field, geom_arr)
+            geometry_encoding_dict[col] = (
+                field.metadata[b"ARROW:extension:name"]
+                .decode()
+                .removeprefix("geoarrow.")
+            )
+
+    elif geometry_encoding.lower() == "wkb":
+        # Encode all geometry columns to WKB
+        for i, col in zip(geometry_indices, geometry_columns):
+            field, wkb_arr = construct_wkb_array(
+                np.asarray(df[col].array), field_name=col, crs=df[col].crs
+            )
+            table = table.set_column(i, field, wkb_arr)
+            geometry_encoding_dict[col] = "WKB"
+
+    else:
+        raise ValueError(
+            f"Expected geometry encoding 'WKB' or 'geoarrow' got {geometry_encoding}"
+        )
+    return table, geometry_encoding_dict
+
+
+def construct_wkb_array(
+    shapely_arr: NDArray[np.object_],
+    *,
+    field_name: str = "geometry",
+    crs: Optional[str] = None,
+) -> Tuple[pa.Field, pa.Array]:
+
+    if shapely.geos_version > (3, 10, 0):
+        kwargs = {"flavor": "iso"}
+    else:
+        if shapely.has_z(shapely_arr).any():
+            raise ValueError("Cannot write 3D geometries with GEOS<3.10")
+        kwargs = {}
+
+    wkb_arr = shapely.to_wkb(shapely_arr, **kwargs)
+    extension_metadata = {"ARROW:extension:name": "geoarrow.wkb"}
+    if crs is not None:
+        extension_metadata["ARROW:extension:metadata"] = json.dumps(
+            {"crs": crs.to_json()}
+        )
+    else:
+        # In theory this should not be needed, but otherwise pyarrow < 17
+        # crashes on receiving such data through C Data Interface
+        # https://github.com/apache/arrow/issues/41741
+        extension_metadata["ARROW:extension:metadata"] = "{}"
+
+    field = pa.field(
+        field_name, type=pa.binary(), nullable=True, metadata=extension_metadata
+    )
+    parr = pa.array(np.asarray(wkb_arr), pa.binary())
+    return field, parr
+
+
+def _convert_inner_coords(coords, interleaved, dims, mask=None):
+    if interleaved:
+        coords_field = pa.field(dims, pa.float64(), nullable=False)
+        typ = pa.list_(coords_field, len(dims))
+        if mask is None:
+            # mask keyword only added in pyarrow 15.0.0
+            parr = pa.FixedSizeListArray.from_arrays(coords.ravel(), type=typ)
+        else:
+            parr = pa.FixedSizeListArray.from_arrays(
+                coords.ravel(), type=typ, mask=mask
+            )
+    else:
+        if dims == "xy":
+            fields = [
+                pa.field("x", pa.float64(), nullable=False),
+                pa.field("y", pa.float64(), nullable=False),
+            ]
+            parr = pa.StructArray.from_arrays(
+                [coords[:, 0].copy(), coords[:, 1].copy()], fields=fields, mask=mask
+            )
+        else:
+            fields = [
+                pa.field("x", pa.float64(), nullable=False),
+                pa.field("y", pa.float64(), nullable=False),
+                pa.field("z", pa.float64(), nullable=False),
+            ]
+            parr = pa.StructArray.from_arrays(
+                [coords[:, 0].copy(), coords[:, 1].copy(), coords[:, 2].copy()],
+                fields=fields,
+                mask=mask,
+            )
+    return parr
+
+
+def _linestring_type(point_type):
+    return pa.list_(pa.field("vertices", point_type, nullable=False))
+
+
+def _polygon_type(point_type):
+    return pa.list_(
+        pa.field(
+            "rings",
+            pa.list_(pa.field("vertices", point_type, nullable=False)),
+            nullable=False,
+        )
+    )
+
+
+def _multipoint_type(point_type):
+    return pa.list_(pa.field("points", point_type, nullable=False))
+
+
+def _multilinestring_type(point_type):
+    return pa.list_(
+        pa.field("linestrings", _linestring_type(point_type), nullable=False)
+    )
+
+
+def _multipolygon_type(point_type):
+    return pa.list_(pa.field("polygons", _polygon_type(point_type), nullable=False))
+
+
+def construct_geometry_array(
+    shapely_arr: NDArray[np.object_],
+    include_z: Optional[bool] = None,
+    *,
+    field_name: str = "geometry",
+    crs: Optional[str] = None,
+    interleaved: bool = True,
+) -> Tuple[pa.Field, pa.Array]:
+    # NOTE: this implementation returns a (field, array) pair so that it can set the
+    # extension metadata on the field without instantiating extension types into the
+    # global pyarrow registry
+    geom_type, coords, offsets = shapely.to_ragged_array(
+        shapely_arr, include_z=include_z
+    )
+
+    mask = shapely.is_missing(shapely_arr)
+    if mask.any():
+        if (
+            geom_type == GeometryType.POINT
+            and interleaved
+            and Version(pa.__version__) < Version("15.0.0")
+        ):
+            raise ValueError(
+                "Converting point geometries with missing values is not supported "
+                "for interleaved coordinates with pyarrow < 15.0.0. Please "
+                "upgrade to a newer version of pyarrow."
+            )
+        mask = pa.array(mask, type=pa.bool_())
+
+        if geom_type == GeometryType.POINT and not SHAPELY_GE_204:
+            # bug in shapely < 2.0.4, see https://github.com/shapely/shapely/pull/2034
+            # this workaround only works if there are no empty points
+            indices = np.nonzero(mask)[0]
+            indices = indices - np.arange(len(indices))
+            coords = np.insert(coords, indices, np.nan, axis=0)
+
+    else:
+        mask = None
+
+    if coords.shape[-1] == 2:
+        dims = "xy"
+    elif coords.shape[-1] == 3:
+        dims = "xyz"
+    else:
+        raise ValueError(f"Unexpected coords dimensions: {coords.shape}")
+
+    extension_metadata: Dict[str, str] = {}
+    if crs is not None:
+        extension_metadata["ARROW:extension:metadata"] = json.dumps(
+            {"crs": crs.to_json()}
+        )
+    else:
+        # In theory this should not be needed, but otherwise pyarrow < 17
+        # crashes on receiving such data through C Data Interface
+        # https://github.com/apache/arrow/issues/41741
+        extension_metadata["ARROW:extension:metadata"] = "{}"
+
+    if geom_type == GeometryType.POINT:
+        parr = _convert_inner_coords(coords, interleaved, dims, mask=mask)
+        extension_metadata["ARROW:extension:name"] = "geoarrow.point"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.LINESTRING:
+        assert len(offsets) == 1, "Expected one offsets array"
+        (geom_offsets,) = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        parr = pa.ListArray.from_arrays(
+            pa.array(geom_offsets), _parr, _linestring_type(_parr.type), mask=mask
+        )
+        extension_metadata["ARROW:extension:name"] = "geoarrow.linestring"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.POLYGON:
+        assert len(offsets) == 2, "Expected two offsets arrays"
+        ring_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
+        parr = parr.cast(_polygon_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.polygon"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTIPOINT:
+        assert len(offsets) == 1, "Expected one offsets array"
+        (geom_offsets,) = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        parr = pa.ListArray.from_arrays(
+            pa.array(geom_offsets), _parr, type=_multipoint_type(_parr.type), mask=mask
+        )
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multipoint"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTILINESTRING:
+        assert len(offsets) == 2, "Expected two offsets arrays"
+        ring_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
+        parr = parr.cast(_multilinestring_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multilinestring"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    elif geom_type == GeometryType.MULTIPOLYGON:
+        assert len(offsets) == 3, "Expected three offsets arrays"
+        ring_offsets, polygon_offsets, geom_offsets = offsets
+        _parr = _convert_inner_coords(coords, interleaved, dims)
+        _parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
+        _parr2 = pa.ListArray.from_arrays(pa.array(polygon_offsets), _parr1)
+        parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr2, mask=mask)
+        parr = parr.cast(_multipolygon_type(_parr.type))
+        extension_metadata["ARROW:extension:name"] = "geoarrow.multipolygon"
+        field = pa.field(
+            field_name,
+            parr.type,
+            nullable=True,
+            metadata=extension_metadata,
+        )
+        return field, parr
+
+    else:
+        raise ValueError(f"Unsupported type for geoarrow: {geom_type}")
+
+
+## GeoArrow -> GeoPandas
+
+
+def _get_arrow_geometry_field(field):
+    if (meta := field.metadata) is not None:
+        if (ext_name := meta.get(b"ARROW:extension:name", None)) is not None:
+            if ext_name.startswith(b"geoarrow."):
+                if (
+                    ext_meta := meta.get(b"ARROW:extension:metadata", None)
+                ) is not None:
+                    ext_meta = json.loads(ext_meta.decode())
+                return ext_name.decode(), ext_meta
+
+    if isinstance(field.type, pa.ExtensionType):
+        ext_name = field.type.extension_name
+        if ext_name.startswith("geoarrow."):
+            ext_meta_ser = field.type.__arrow_ext_serialize__()
+            if ext_meta_ser:
+                ext_meta = json.loads(ext_meta_ser.decode())
+            else:
+                ext_meta = None
+            return ext_name, ext_meta
+
+    return None
+
+
+def arrow_to_geopandas(table, geometry=None):
+    """
+    Convert Arrow table object to a GeoDataFrame based on GeoArrow extension types.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+        The Arrow table to convert.
+    geometry : str, default None
+        The name of the geometry column to set as the active geometry
+        column. If None, the first geometry column found will be used.
+
+    Returns
+    -------
+    GeoDataFrame
+
+    """
+    if not isinstance(table, pa.Table):
+        table = pa.table(table)
+
+    geom_fields = []
+
+    for i, field in enumerate(table.schema):
+        geom = _get_arrow_geometry_field(field)
+        if geom is not None:
+            geom_fields.append((i, field.name, *geom))
+
+    if len(geom_fields) == 0:
+        raise ValueError("No geometry column found in the Arrow table.")
+
+    table_attr = table.drop([f[1] for f in geom_fields])
+    df = table_attr.to_pandas()
+
+    for i, col, ext_name, ext_meta in geom_fields:
+        crs = None
+        if ext_meta is not None and "crs" in ext_meta:
+            crs = ext_meta["crs"]
+
+        if ext_name == "geoarrow.wkb":
+            geom_arr = from_wkb(np.array(table[col]), crs=crs)
+        elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
+
+            geom_arr = from_shapely(
+                construct_shapely_array(table[col].combine_chunks(), ext_name), crs=crs
+            )
+        else:
+            raise TypeError(f"Unknown GeoArrow extension type: {ext_name}")
+
+        df.insert(i, col, geom_arr)
+
+    return GeoDataFrame(df, geometry=geometry or geom_fields[0][1])
+
+
+def arrow_to_geometry_array(arr):
+    """
+    Convert Arrow array object (representing single GeoArrow array) to a
+    geopandas GeometryArray.
+
+    Specifically for GeoSeries.from_arrow.
+    """
+    if Version(pa.__version__) < Version("14.0.0"):
+        raise ValueError("Importing from Arrow requires pyarrow >= 14.0.")
+
+    schema_capsule, array_capsule = arr.__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    pa_arr = pa.Array._import_from_c_capsule(field.__arrow_c_schema__(), array_capsule)
+
+    geom_info = _get_arrow_geometry_field(field)
+    if geom_info is None:
+        raise ValueError("No GeoArrow geometry field found.")
+    ext_name, ext_meta = geom_info
+
+    crs = None
+    if ext_meta is not None and "crs" in ext_meta:
+        crs = ext_meta["crs"]
+
+    if ext_name == "geoarrow.wkb":
+        geom_arr = from_wkb(np.array(pa_arr), crs=crs)
+    elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
+
+        geom_arr = from_shapely(construct_shapely_array(pa_arr, ext_name), crs=crs)
+    else:
+        raise ValueError(f"Unknown GeoArrow extension type: {ext_name}")
+
+    return geom_arr
+
+
+def _get_inner_coords(arr):
+    if pa.types.is_struct(arr.type):
+        if arr.type.num_fields == 2:
+            coords = np.column_stack(
+                [np.asarray(arr.field("x")), np.asarray(arr.field("y"))]
+            )
+        else:
+            coords = np.column_stack(
+                [
+                    np.asarray(arr.field("x")),
+                    np.asarray(arr.field("y")),
+                    np.asarray(arr.field("z")),
+                ]
+            )
+        return coords
+    else:
+        # fixed size list
+        return np.asarray(arr.values).reshape(len(arr), -1)
+
+
+def construct_shapely_array(arr: pa.Array, extension_name: str):
+    """
+    Construct a NumPy array of shapely geometries from a pyarrow.Array
+    with GeoArrow extension type.
+
+    """
+    if isinstance(arr, pa.ExtensionArray):
+        arr = arr.storage
+
+    if extension_name == "geoarrow.point":
+        coords = _get_inner_coords(arr)
+        result = shapely.from_ragged_array(GeometryType.POINT, coords, None)
+
+    elif extension_name == "geoarrow.linestring":
+        coords = _get_inner_coords(arr.values)
+        offsets1 = np.asarray(arr.offsets)
+        offsets = (offsets1,)
+        result = shapely.from_ragged_array(GeometryType.LINESTRING, coords, offsets)
+
+    elif extension_name == "geoarrow.polygon":
+        coords = _get_inner_coords(arr.values.values)
+        offsets2 = np.asarray(arr.offsets)
+        offsets1 = np.asarray(arr.values.offsets)
+        offsets = (offsets1, offsets2)
+        result = shapely.from_ragged_array(GeometryType.POLYGON, coords, offsets)
+
+    elif extension_name == "geoarrow.multipoint":
+        coords = _get_inner_coords(arr.values)
+        offsets1 = np.asarray(arr.offsets)
+        offsets = (offsets1,)
+        result = shapely.from_ragged_array(GeometryType.MULTIPOINT, coords, offsets)
+
+    elif extension_name == "geoarrow.multilinestring":
+        coords = _get_inner_coords(arr.values.values)
+        offsets2 = np.asarray(arr.offsets)
+        offsets1 = np.asarray(arr.values.offsets)
+        offsets = (offsets1, offsets2)
+        result = shapely.from_ragged_array(
+            GeometryType.MULTILINESTRING, coords, offsets
+        )
+
+    elif extension_name == "geoarrow.multipolygon":
+        coords = _get_inner_coords(arr.values.values.values)
+        offsets3 = np.asarray(arr.offsets)
+        offsets2 = np.asarray(arr.values.offsets)
+        offsets1 = np.asarray(arr.values.values.offsets)
+        offsets = (offsets1, offsets2, offsets3)
+        result = shapely.from_ragged_array(GeometryType.MULTIPOLYGON, coords, offsets)
+
+    else:
+        raise ValueError(extension_name)
+
+    # apply validity mask
+    if arr.null_count:
+        mask = np.asarray(arr.is_null())
+        result = np.where(mask, None, result)
+
+    return result
@@ -0,0 +1,72 @@
+from packaging.version import Version
+
+import pyarrow
+
+_ERROR_MSG = """\
+Disallowed deserialization of 'arrow.py_extension_type':
+storage_type = {storage_type}
+serialized = {serialized}
+pickle disassembly:\n{pickle_disassembly}
+
+Reading of untrusted Parquet or Feather files with a PyExtensionType column
+allows arbitrary code execution.
+If you trust this file, you can enable reading the extension type by one of:
+
+- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
+- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
+  `import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
+
+We strongly recommend updating your Parquet/Feather files to use extension types
+derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
+See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
+for more details.
+"""
+
+
+def patch_pyarrow():
+    # starting from pyarrow 14.0.1, it has its own mechanism
+    if Version(pyarrow.__version__) >= Version("14.0.1"):
+        return
+
+    # if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
+    # installed, use this instead (which also ensures it works if they had
+    # called `pyarrow_hotfix.uninstall()`)
+    try:
+        import pyarrow_hotfix  # noqa: F401
+    except ImportError:
+        pass
+    else:
+        return
+
+    # if the hotfix is already installed and enabled
+    if getattr(pyarrow, "_hotfix_installed", False):
+        return
+
+    class ForbiddenExtensionType(pyarrow.ExtensionType):
+        def __arrow_ext_serialize__(self):
+            return b""
+
+        @classmethod
+        def __arrow_ext_deserialize__(cls, storage_type, serialized):
+            import io
+            import pickletools
+
+            out = io.StringIO()
+            pickletools.dis(serialized, out)
+            raise RuntimeError(
+                _ERROR_MSG.format(
+                    storage_type=storage_type,
+                    serialized=serialized,
+                    pickle_disassembly=out.getvalue(),
+                )
+            )
+
+    pyarrow.unregister_extension_type("arrow.py_extension_type")
+    pyarrow.register_extension_type(
+        ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
+    )
+
+    pyarrow._hotfix_installed = True
+
+
+patch_pyarrow()
@@ -0,0 +1,913 @@
+import json
+import warnings
+from packaging.version import Version
+
+import numpy as np
+from pandas import DataFrame, Series
+
+import shapely
+
+import geopandas
+from geopandas import GeoDataFrame
+from geopandas._compat import import_optional_dependency
+from geopandas.array import from_shapely, from_wkb
+
+from .file import _expand_user
+
+METADATA_VERSION = "1.0.0"
+SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
+GEOARROW_ENCODINGS = [
+    "point",
+    "linestring",
+    "polygon",
+    "multipoint",
+    "multilinestring",
+    "multipolygon",
+]
+SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
+
+# reference: https://github.com/opengeospatial/geoparquet
+
+# Metadata structure:
+# {
+#     "geo": {
+#         "columns": {
+#             "<name>": {
+#                 "encoding": "WKB"
+#                 "geometry_types": <list of str: REQUIRED>
+#                 "crs": "<PROJJSON or None: OPTIONAL>",
+#                 "orientation": "<'counterclockwise' or None: OPTIONAL>"
+#                 "edges": "planar"
+#                 "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL>
+#                 "epoch": <float: OPTIONAL>
+#             }
+#         },
+#         "primary_column": "<str: REQUIRED>",
+#         "version": "<METADATA_VERSION>",
+#
+#         # Additional GeoPandas specific metadata (not in metadata spec)
+#         "creator": {
+#             "library": "geopandas",
+#             "version": "<geopandas.__version__>"
+#         }
+#     }
+# }
+
+
+def _is_fsspec_url(url):
+    return (
+        isinstance(url, str)
+        and "://" in url
+        and not url.startswith(("http://", "https://"))
+    )
+
+
+def _remove_id_from_member_of_ensembles(json_dict):
+    """
+    Older PROJ versions will not recognize IDs of datum ensemble members that
+    were added in more recent PROJ database versions.
+
+    Cf https://github.com/opengeospatial/geoparquet/discussions/110
+    and https://github.com/OSGeo/PROJ/pull/3221
+
+    Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
+    """
+    for key, value in json_dict.items():
+        if isinstance(value, dict):
+            _remove_id_from_member_of_ensembles(value)
+        elif key == "members" and isinstance(value, list):
+            for member in value:
+                member.pop("id", None)
+
+
+# type ids 0 to 7
+_geometry_type_names = [
+    "Point",
+    "LineString",
+    "LineString",
+    "Polygon",
+    "MultiPoint",
+    "MultiLineString",
+    "MultiPolygon",
+    "GeometryCollection",
+]
+_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
+
+
+def _get_geometry_types(series):
+    """
+    Get unique geometry types from a GeoSeries.
+    """
+    arr_geometry_types = shapely.get_type_id(series.array._data)
+    # ensure to include "... Z" for 3D geometries
+    has_z = shapely.has_z(series.array._data)
+    arr_geometry_types[has_z] += 8
+
+    geometry_types = Series(arr_geometry_types).unique().tolist()
+    # drop missing values (shapely.get_type_id returns -1 for those)
+    if -1 in geometry_types:
+        geometry_types.remove(-1)
+
+    return sorted([_geometry_type_names[idx] for idx in geometry_types])
+
+
+def _create_metadata(
+    df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
+):
+    """Create and encode geo metadata dict.
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
+        GeoParquet specification version; if not provided will default to
+        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.
+
+    Returns
+    -------
+    dict
+    """
+    if schema_version is None:
+        if geometry_encoding and any(
+            encoding != "WKB" for encoding in geometry_encoding.values()
+        ):
+            schema_version = "1.1.0"
+        else:
+            schema_version = METADATA_VERSION
+
+    if schema_version not in SUPPORTED_VERSIONS:
+        raise ValueError(
+            f"schema_version must be one of: {', '.join(SUPPORTED_VERSIONS)}"
+        )
+
+    # Construct metadata for each geometry
+    column_metadata = {}
+    for col in df.columns[df.dtypes == "geometry"]:
+        series = df[col]
+
+        geometry_types = _get_geometry_types(series)
+        if schema_version[0] == "0":
+            geometry_types_name = "geometry_type"
+            if len(geometry_types) == 1:
+                geometry_types = geometry_types[0]
+        else:
+            geometry_types_name = "geometry_types"
+
+        crs = None
+        if series.crs:
+            if schema_version == "0.1.0":
+                crs = series.crs.to_wkt()
+            else:  # version >= 0.4.0
+                crs = series.crs.to_json_dict()
+                _remove_id_from_member_of_ensembles(crs)
+
+        column_metadata[col] = {
+            "encoding": geometry_encoding[col],
+            "crs": crs,
+            geometry_types_name: geometry_types,
+        }
+
+        bbox = series.total_bounds.tolist()
+        if np.isfinite(bbox).all():
+            # don't add bbox with NaNs for empty / all-NA geometry column
+            column_metadata[col]["bbox"] = bbox
+
+        if write_covering_bbox:
+            column_metadata[col]["covering"] = {
+                "bbox": {
+                    "xmin": ["bbox", "xmin"],
+                    "ymin": ["bbox", "ymin"],
+                    "xmax": ["bbox", "xmax"],
+                    "ymax": ["bbox", "ymax"],
+                },
+            }
+
+    return {
+        "primary_column": df._geometry_column_name,
+        "columns": column_metadata,
+        "version": schema_version,
+        "creator": {"library": "geopandas", "version": geopandas.__version__},
+    }
+
+
+def _encode_metadata(metadata):
+    """Encode metadata dict to UTF-8 JSON string
+
+    Parameters
+    ----------
+    metadata : dict
+
+    Returns
+    -------
+    UTF-8 encoded JSON string
+    """
+    return json.dumps(metadata).encode("utf-8")
+
+
+def _decode_metadata(metadata_str):
+    """Decode a UTF-8 encoded JSON string to dict
+
+    Parameters
+    ----------
+    metadata_str : string (UTF-8 encoded)
+
+    Returns
+    -------
+    dict
+    """
+    if metadata_str is None:
+        return None
+
+    return json.loads(metadata_str.decode("utf-8"))
+
+
+def _validate_dataframe(df):
+    """Validate that the GeoDataFrame conforms to requirements for writing
+    to Parquet format.
+
+    Raises `ValueError` if the GeoDataFrame is not valid.
+
+    copied from `pandas.io.parquet`
+
+    Parameters
+    ----------
+    df : GeoDataFrame
+    """
+
+    if not isinstance(df, DataFrame):
+        raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames")
+
+    # must have value column names (strings only)
+    if df.columns.inferred_type not in {"string", "unicode", "empty"}:
+        raise ValueError("Writing to Parquet/Feather requires string column names")
+
+    # index level names must be strings
+    valid_names = all(
+        isinstance(name, str) for name in df.index.names if name is not None
+    )
+    if not valid_names:
+        raise ValueError("Index level names must be strings")
+
+
+def _validate_geo_metadata(metadata):
+    """Validate geo metadata.
+    Must not be empty, and must contain the structure specified above.
+
+    Raises ValueError if metadata is not valid.
+
+    Parameters
+    ----------
+    metadata : dict
+    """
+
+    if not metadata:
+        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
+
+    # version was schema_version in 0.1.0
+    version = metadata.get("version", metadata.get("schema_version"))
+    if not version:
+        raise ValueError(
+            "'geo' metadata in Parquet/Feather file is missing required key: "
+            "'version'"
+        )
+
+    required_keys = ("primary_column", "columns")
+    for key in required_keys:
+        if metadata.get(key, None) is None:
+            raise ValueError(
+                "'geo' metadata in Parquet/Feather file is missing required key: "
+                "'{key}'".format(key=key)
+            )
+
+    if not isinstance(metadata["columns"], dict):
+        raise ValueError("'columns' in 'geo' metadata must be a dict")
+
+    # Validate that geometry columns have required metadata and values
+    # leaving out "geometry_type" for compatibility with 0.1
+    required_col_keys = ("encoding",)
+    for col, column_metadata in metadata["columns"].items():
+        for key in required_col_keys:
+            if key not in column_metadata:
+                raise ValueError(
+                    "'geo' metadata in Parquet/Feather file is missing required key "
+                    "'{key}' for column '{col}'".format(key=key, col=col)
+                )
+
+        if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
+            raise ValueError(
+                "Only WKB geometry encoding or one of the native encodings "
+                f"({GEOARROW_ENCODINGS!r}) are supported, "
+                f"got: {column_metadata['encoding']}"
+            )
+
+        if column_metadata.get("edges", "planar") == "spherical":
+            warnings.warn(
+                f"The geo metadata indicate that column '{col}' has spherical edges, "
+                "but because GeoPandas currently does not support spherical "
+                "geometry, it ignores this metadata and will interpret the edges of "
+                "the geometries as planar.",
+                UserWarning,
+                stacklevel=4,
+            )
+
+        if "covering" in column_metadata:
+            covering = column_metadata["covering"]
+            if "bbox" in covering:
+                bbox = covering["bbox"]
+                for var in ["xmin", "ymin", "xmax", "ymax"]:
+                    if var not in bbox.keys():
+                        raise ValueError("Metadata for bbox column is malformed.")
+
+
+def _geopandas_to_arrow(
+    df,
+    index=None,
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=None,
+):
+    """
+    Helper function with main, shared logic for to_parquet/to_feather.
+    """
+    from pyarrow import StructArray
+
+    from geopandas.io._geoarrow import geopandas_to_arrow
+
+    _validate_dataframe(df)
+
+    if schema_version is not None:
+        if geometry_encoding != "WKB" and schema_version != "1.1.0":
+            raise ValueError(
+                "'geoarrow' encoding is only supported with schema version >= 1.1.0"
+            )
+
+    table, geometry_encoding_dict = geopandas_to_arrow(
+        df, geometry_encoding=geometry_encoding, index=index, interleaved=False
+    )
+    geo_metadata = _create_metadata(
+        df,
+        schema_version=schema_version,
+        geometry_encoding=geometry_encoding_dict,
+        write_covering_bbox=write_covering_bbox,
+    )
+
+    if write_covering_bbox:
+        if "bbox" in df.columns:
+            raise ValueError(
+                "An existing column 'bbox' already exists in the dataframe. "
+                "Please rename to write covering bbox."
+            )
+        bounds = df.bounds
+        bbox_array = StructArray.from_arrays(
+            [bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
+            names=["xmin", "ymin", "xmax", "ymax"],
+        )
+        table = table.append_column("bbox", bbox_array)
+
+    # Store geopandas specific file-level metadata
+    # This must be done AFTER creating the table or it is not persisted
+    metadata = table.schema.metadata
+    metadata.update({b"geo": _encode_metadata(geo_metadata)})
+
+    return table.replace_schema_metadata(metadata)
+
+
+def _to_parquet(
+    df,
+    path,
+    index=None,
+    compression="snappy",
+    geometry_encoding="WKB",
+    schema_version=None,
+    write_covering_bbox=False,
+    **kwargs,
+):
+    """
+    Write a GeoDataFrame to the Parquet format.
+
+    Any geometry columns present are serialized to WKB format in the file.
+
+    Requires 'pyarrow'.
+
+    This is tracking version 1.0.0 of the GeoParquet specification at:
+    https://github.com/opengeospatial/geoparquet. Writing older versions is
+    supported using the `schema_version` keyword.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
+    geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
+        The encoding to use for the geometry columns. Defaults to "WKB"
+        for maximum interoperability. Specify "geoarrow" to use one of the
+        native GeoArrow-based single-geometry type encodings.
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
+        GeoParquet specification version; if not provided will default to
+        latest supported version.
+    write_covering_bbox : bool, default False
+        Writes the bounding box column for each row entry with column
+        name 'bbox'. Writing a bbox column can be computationally
+        expensive, hence is default setting is False.
+    **kwargs
+        Additional keyword arguments passed to pyarrow.parquet.write_table().
+    """
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+
+    path = _expand_user(path)
+    table = _geopandas_to_arrow(
+        df,
+        index=index,
+        geometry_encoding=geometry_encoding,
+        schema_version=schema_version,
+        write_covering_bbox=write_covering_bbox,
+    )
+    parquet.write_table(table, path, compression=compression, **kwargs)
+
+
+def _to_feather(df, path, index=None, compression=None, schema_version=None, **kwargs):
+    """
+    Write a GeoDataFrame to the Feather format.
+
+    Any geometry columns present are serialized to WKB format in the file.
+
+    Requires 'pyarrow' >= 0.17.
+
+    This is tracking version 1.0.0 of the GeoParquet specification for
+    the metadata at: https://github.com/opengeospatial/geoparquet. Writing
+    older versions is supported using the `schema_version` keyword.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    index : bool, default None
+        If ``True``, always include the dataframe's index(es) as columns
+        in the file output.
+        If ``False``, the index(es) will not be written to the file.
+        If ``None``, the index(ex) will be included as columns in the file
+        output except `RangeIndex` which is stored as metadata only.
+    compression : {'zstd', 'lz4', 'uncompressed'}, optional
+        Name of the compression to use. Use ``"uncompressed"`` for no
+        compression. By default uses LZ4 if available, otherwise uncompressed.
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
+        GeoParquet specification version for the metadata; if not provided
+        will default to latest supported version.
+    kwargs
+        Additional keyword arguments passed to pyarrow.feather.write_feather().
+    """
+    feather = import_optional_dependency(
+        "pyarrow.feather", extra="pyarrow is required for Feather support."
+    )
+    # TODO move this into `import_optional_dependency`
+    import pyarrow
+
+    if Version(pyarrow.__version__) < Version("0.17.0"):
+        raise ImportError("pyarrow >= 0.17 required for Feather support")
+
+    path = _expand_user(path)
+    table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
+    feather.write_feather(table, path, compression=compression, **kwargs)
+
+
+def _arrow_to_geopandas(table, geo_metadata=None):
+    """
+    Helper function with main, shared logic for read_parquet/read_feather.
+    """
+    if geo_metadata is None:
+        # Note: this path of not passing metadata is also used by dask-geopandas
+        geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
+
+    # Find all geometry columns that were read from the file.  May
+    # be a subset if 'columns' parameter is used.
+    geometry_columns = [
+        col for col in geo_metadata["columns"] if col in table.column_names
+    ]
+    result_column_names = list(table.slice(0, 0).to_pandas().columns)
+    geometry_columns.sort(key=result_column_names.index)
+
+    if not len(geometry_columns):
+        raise ValueError(
+            """No geometry columns are included in the columns read from
+            the Parquet/Feather file.  To read this file without geometry columns,
+            use pandas.read_parquet/read_feather() instead."""
+        )
+
+    geometry = geo_metadata["primary_column"]
+
+    # Missing geometry likely indicates a subset of columns was read;
+    # promote the first available geometry to the primary geometry.
+    if len(geometry_columns) and geometry not in geometry_columns:
+        geometry = geometry_columns[0]
+
+        # if there are multiple non-primary geometry columns, raise a warning
+        if len(geometry_columns) > 1:
+            warnings.warn(
+                "Multiple non-primary geometry columns read from Parquet/Feather "
+                "file. The first column read was promoted to the primary geometry.",
+                stacklevel=3,
+            )
+
+    table_attr = table.drop(geometry_columns)
+    df = table_attr.to_pandas()
+
+    # Convert the WKB columns that are present back to geometry.
+    for col in geometry_columns:
+        col_metadata = geo_metadata["columns"][col]
+        if "crs" in col_metadata:
+            crs = col_metadata["crs"]
+            if isinstance(crs, dict):
+                _remove_id_from_member_of_ensembles(crs)
+        else:
+            # per the GeoParquet spec, missing CRS is to be interpreted as
+            # OGC:CRS84
+            crs = "OGC:CRS84"
+
+        if col_metadata["encoding"] == "WKB":
+            geom_arr = from_wkb(np.array(table[col]), crs=crs)
+        else:
+            from geopandas.io._geoarrow import construct_shapely_array
+
+            geom_arr = from_shapely(
+                construct_shapely_array(
+                    table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
+                ),
+                crs=crs,
+            )
+
+        df.insert(result_column_names.index(col), col, geom_arr)
+
+    return GeoDataFrame(df, geometry=geometry)
+
+
+def _get_filesystem_path(path, filesystem=None, storage_options=None):
+    """
+    Get the filesystem and path for a given filesystem and path.
+
+    If the filesystem is not None then it's just returned as is.
+    """
+    import pyarrow
+
+    if (
+        isinstance(path, str)
+        and storage_options is None
+        and filesystem is None
+        and Version(pyarrow.__version__) >= Version("5.0.0")
+    ):
+        # Use the native pyarrow filesystem if possible.
+        try:
+            from pyarrow.fs import FileSystem
+
+            filesystem, path = FileSystem.from_uri(path)
+        except Exception:
+            # fallback to use get_handle / fsspec for filesystems
+            # that pyarrow doesn't support
+            pass
+
+    if _is_fsspec_url(path) and filesystem is None:
+        fsspec = import_optional_dependency(
+            "fsspec", extra="fsspec is requred for 'storage_options'."
+        )
+        filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
+
+    if filesystem is None and storage_options:
+        raise ValueError(
+            "Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
+        )
+
+    return filesystem, path
+
+
+def _ensure_arrow_fs(filesystem):
+    """
+    Simplified version of pyarrow.fs._ensure_filesystem. This is only needed
+    below because `pyarrow.parquet.read_metadata` does not yet accept a
+    filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719)
+    """
+    from pyarrow import fs
+
+    if isinstance(filesystem, fs.FileSystem):
+        return filesystem
+
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+    except ImportError:
+        pass
+    else:
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            return fs.PyFileSystem(fs.FSSpecHandler(filesystem))
+
+    return filesystem
+
+
+def _validate_and_decode_metadata(metadata):
+    if metadata is None or b"geo" not in metadata:
+        raise ValueError(
+            """Missing geo metadata in Parquet/Feather file.
+            Use pandas.read_parquet/read_feather() instead."""
+        )
+
+    # check for malformed metadata
+    try:
+        decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
+    except (TypeError, json.decoder.JSONDecodeError):
+        raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
+
+    _validate_geo_metadata(decoded_geo_metadata)
+    return decoded_geo_metadata
+
+
+def _read_parquet_schema_and_metadata(path, filesystem):
+    """
+    Opening the Parquet file/dataset a first time to get the schema and metadata.
+
+    TODO: we should look into how we can reuse opened dataset for reading the
+    actual data, to avoid discovering the dataset twice (problem right now is
+    that the ParquetDataset interface doesn't allow passing the filters on read)
+
+    """
+    import pyarrow
+    from pyarrow import parquet
+
+    kwargs = {}
+    if Version(pyarrow.__version__) < Version("15.0.0"):
+        kwargs = dict(use_legacy_dataset=False)
+
+    try:
+        schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
+    except Exception:
+        schema = parquet.read_schema(path, filesystem=filesystem)
+
+    metadata = schema.metadata
+
+    # read metadata separately to get the raw Parquet FileMetaData metadata
+    # (pyarrow doesn't properly exposes those in schema.metadata for files
+    # created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
+    if metadata is None or b"geo" not in metadata:
+        try:
+            metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
+        except Exception:
+            pass
+
+    return schema, metadata
+
+
+def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
+    """
+    Load a Parquet object from the file path, returning a GeoDataFrame.
+
+    You can read a subset of columns in the file using the ``columns`` parameter.
+    However, the structure of the returned GeoDataFrame will depend on which
+    columns you read:
+
+    * if no geometry columns are read, this will raise a ``ValueError`` - you
+      should use the pandas `read_parquet` method instead.
+    * if the primary geometry column saved to this file is not included in
+      columns, the first available geometry column will be set as the geometry
+      column of the returned GeoDataFrame.
+
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
+    specification at: https://github.com/opengeospatial/geoparquet
+
+    If 'crs' key is not present in the GeoParquet metadata associated with the
+    Parquet object, it will default to "OGC:CRS84" according to the specification.
+
+    Requires 'pyarrow'.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    columns : list-like of strings, default=None
+        If not None, only these columns will be read from the file.  If
+        the primary geometry column is not included, the first secondary
+        geometry read from the file will be set as the geometry column
+        of the returned GeoDataFrame.  If no geometry columns are present,
+        a ``ValueError`` will be raised.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g. host,
+        port, username, password, etc. For HTTP(S) URLs the key-value pairs are
+        forwarded to urllib as header options. For other URLs (e.g. starting with
+        "s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
+        see fsspec and urllib for more details.
+
+        When no storage options are provided and a filesystem is implemented by
+        both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
+        filesystem is preferred. Provide the instantiated fsspec filesystem using
+        the ``filesystem`` keyword if you wish to use its implementation.
+    bbox : tuple, optional
+        Bounding box to be used to filter selection from geoparquet data. This
+        is only usable if the data was saved with the bbox covering metadata.
+        Input is of the tuple format (xmin, ymin, xmax, ymax).
+
+    **kwargs
+        Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    >>> df = geopandas.read_parquet("data.parquet")  # doctest: +SKIP
+
+    Specifying columns to read:
+
+    >>> df = geopandas.read_parquet(
+    ...     "data.parquet",
+    ...     columns=["geometry", "pop_est"]
+    ... )  # doctest: +SKIP
+    """
+
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
+
+    # TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
+    # adds filesystem as a keyword and match that.
+    filesystem = kwargs.pop("filesystem", None)
+    filesystem, path = _get_filesystem_path(
+        path, filesystem=filesystem, storage_options=storage_options
+    )
+    path = _expand_user(path)
+    schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
+
+    geo_metadata = _validate_and_decode_metadata(metadata)
+
+    bbox_filter = (
+        _get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
+    )
+
+    if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
+
+    # by default, bbox column is not read in, so must specify which
+    # columns are read in if it exists.
+    if not columns and if_bbox_column_exists:
+        columns = _get_non_bbox_columns(schema, geo_metadata)
+
+    # if both bbox and filters kwargs are used, must splice together.
+    if "filters" in kwargs:
+        filters_kwarg = kwargs.pop("filters")
+        filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
+    else:
+        filters = bbox_filter
+
+    kwargs["use_pandas_metadata"] = True
+
+    table = parquet.read_table(
+        path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
+    )
+
+    return _arrow_to_geopandas(table, geo_metadata)
+
+
+def _read_feather(path, columns=None, **kwargs):
+    """
+    Load a Feather object from the file path, returning a GeoDataFrame.
+
+    You can read a subset of columns in the file using the ``columns`` parameter.
+    However, the structure of the returned GeoDataFrame will depend on which
+    columns you read:
+
+    * if no geometry columns are read, this will raise a ``ValueError`` - you
+      should use the pandas `read_feather` method instead.
+    * if the primary geometry column saved to this file is not included in
+      columns, the first available geometry column will be set as the geometry
+      column of the returned GeoDataFrame.
+
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
+    specification at: https://github.com/opengeospatial/geoparquet
+
+    If 'crs' key is not present in the Feather metadata associated with the
+    Parquet object, it will default to "OGC:CRS84" according to the specification.
+
+    Requires 'pyarrow' >= 0.17.
+
+    .. versionadded:: 0.8
+
+    Parameters
+    ----------
+    path : str, path object
+    columns : list-like of strings, default=None
+        If not None, only these columns will be read from the file.  If
+        the primary geometry column is not included, the first secondary
+        geometry read from the file will be set as the geometry column
+        of the returned GeoDataFrame.  If no geometry columns are present,
+        a ``ValueError`` will be raised.
+    **kwargs
+        Any additional kwargs passed to pyarrow.feather.read_table().
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    >>> df = geopandas.read_feather("data.feather")  # doctest: +SKIP
+
+    Specifying columns to read:
+
+    >>> df = geopandas.read_feather(
+    ...     "data.feather",
+    ...     columns=["geometry", "pop_est"]
+    ... )  # doctest: +SKIP
+    """
+
+    feather = import_optional_dependency(
+        "pyarrow.feather", extra="pyarrow is required for Feather support."
+    )
+    # TODO move this into `import_optional_dependency`
+    import pyarrow
+
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
+
+    if Version(pyarrow.__version__) < Version("0.17.0"):
+        raise ImportError("pyarrow >= 0.17 required for Feather support")
+
+    path = _expand_user(path)
+
+    table = feather.read_table(path, columns=columns, **kwargs)
+    return _arrow_to_geopandas(table)
+
+
+def _get_parquet_bbox_filter(geo_metadata, bbox):
+    primary_column = geo_metadata["primary_column"]
+
+    if _check_if_covering_in_geo_metadata(geo_metadata):
+        bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+        return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
+
+    elif geo_metadata["columns"][primary_column]["encoding"] == "point":
+        import pyarrow.compute as pc
+
+        return (
+            (pc.field((primary_column, "x")) >= bbox[0])
+            & (pc.field((primary_column, "x")) <= bbox[2])
+            & (pc.field((primary_column, "y")) >= bbox[1])
+            & (pc.field((primary_column, "y")) <= bbox[3])
+        )
+
+    else:
+        raise ValueError(
+            "Specifying 'bbox' not supported for this Parquet file (it should either "
+            "have a bbox covering column or use 'point' encoding)."
+        )
+
+
+def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
+    import pyarrow.compute as pc
+
+    return ~(
+        (pc.field((bbox_column_name, "xmin")) > bbox[2])
+        | (pc.field((bbox_column_name, "ymin")) > bbox[3])
+        | (pc.field((bbox_column_name, "xmax")) < bbox[0])
+        | (pc.field((bbox_column_name, "ymax")) < bbox[1])
+    )
+
+
+def _check_if_covering_in_geo_metadata(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return "covering" in geo_metadata["columns"][primary_column].keys()
+
+
+def _get_bbox_encoding_column_name(geo_metadata):
+    primary_column = geo_metadata["primary_column"]
+    return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
+
+
+def _get_non_bbox_columns(schema, geo_metadata):
+
+    bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
+    columns = schema.names
+    if bbox_column_name in columns:
+        columns.remove(bbox_column_name)
+    return columns
+
+
+def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
+    parquet = import_optional_dependency(
+        "pyarrow.parquet", extra="pyarrow is required for Parquet support."
+    )
+    if bbox_filter is None:
+        return kwarg_filters
+
+    filters_expression = parquet.filters_to_expression(kwarg_filters)
+    return bbox_filter & filters_expression
@@ -0,0 +1,851 @@
+from __future__ import annotations
+
+import os
+import urllib.request
+import warnings
+from io import IOBase
+from packaging.version import Version
+from pathlib import Path
+
+# Adapted from pandas.io.common
+from urllib.parse import urlparse as parse_url
+from urllib.parse import uses_netloc, uses_params, uses_relative
+
+import numpy as np
+import pandas as pd
+from pandas.api.types import is_integer_dtype
+
+import shapely
+from shapely.geometry import mapping
+from shapely.geometry.base import BaseGeometry
+
+from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
+from geopandas.io.util import vsi_path
+
+_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
+_VALID_URLS.discard("")
+# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
+_VALID_URLS.discard("file")
+
+fiona = None
+fiona_env = None
+fiona_import_error = None
+FIONA_GE_19 = False
+
+
+def _import_fiona():
+    global fiona
+    global fiona_env
+    global fiona_import_error
+    global FIONA_GE_19
+
+    if fiona is None:
+        try:
+            import fiona
+
+            # only try to import fiona.Env if the main fiona import succeeded
+            # (otherwise you can get confusing "AttributeError: module 'fiona'
+            # has no attribute '_loading'" / partially initialized module errors)
+            try:
+                from fiona import Env as fiona_env
+            except ImportError:
+                try:
+                    from fiona import drivers as fiona_env
+                except ImportError:
+                    fiona_env = None
+
+            FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
+                "1.9.0"
+            )
+
+        except ImportError as err:
+            fiona = False
+            fiona_import_error = str(err)
+
+
+pyogrio = None
+pyogrio_import_error = None
+
+
+def _import_pyogrio():
+    global pyogrio
+    global pyogrio_import_error
+
+    if pyogrio is None:
+        try:
+            import pyogrio
+
+        except ImportError as err:
+            pyogrio = False
+            pyogrio_import_error = str(err)
+
+
+def _check_fiona(func):
+    if not fiona:
+        raise ImportError(
+            f"the {func} requires the 'fiona' package, but it is not installed or does "
+            f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
+        )
+
+
+def _check_pyogrio(func):
+    if not pyogrio:
+        raise ImportError(
+            f"the {func} requires the 'pyogrio' package, but it is not installed "
+            "or does not import correctly."
+            "\nImporting pyogrio resulted in: {pyogrio_import_error}"
+        )
+
+
+def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
+    if metadata is None:
+        return
+    if driver != "GPKG":
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for the GPKG driver."
+        )
+
+    if engine == "fiona" and not FIONA_GE_19:
+        raise NotImplementedError(
+            "The 'metadata' keyword is only supported for Fiona >= 1.9."
+        )
+
+
+def _check_engine(engine, func):
+    # if not specified through keyword or option, then default to "pyogrio" if
+    # installed, otherwise try fiona
+    if engine is None:
+        import geopandas
+
+        engine = geopandas.options.io_engine
+
+    if engine is None:
+        _import_pyogrio()
+        if pyogrio:
+            engine = "pyogrio"
+        else:
+            _import_fiona()
+            if fiona:
+                engine = "fiona"
+
+    if engine == "pyogrio":
+        _import_pyogrio()
+        _check_pyogrio(func)
+    elif engine == "fiona":
+        _import_fiona()
+        _check_fiona(func)
+    elif engine is None:
+        raise ImportError(
+            f"The {func} requires the 'pyogrio' or 'fiona' package, "
+            "but neither is installed or imports correctly."
+            f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
+            f"\nImporting fiona resulted in: {fiona_import_error}"
+        )
+
+    return engine
+
+
+_EXTENSION_TO_DRIVER = {
+    ".bna": "BNA",
+    ".dxf": "DXF",
+    ".csv": "CSV",
+    ".shp": "ESRI Shapefile",
+    ".dbf": "ESRI Shapefile",
+    ".json": "GeoJSON",
+    ".geojson": "GeoJSON",
+    ".geojsonl": "GeoJSONSeq",
+    ".geojsons": "GeoJSONSeq",
+    ".gpkg": "GPKG",
+    ".gml": "GML",
+    ".xml": "GML",
+    ".gpx": "GPX",
+    ".gtm": "GPSTrackMaker",
+    ".gtz": "GPSTrackMaker",
+    ".tab": "MapInfo File",
+    ".mif": "MapInfo File",
+    ".mid": "MapInfo File",
+    ".dgn": "DGN",
+    ".fgb": "FlatGeobuf",
+}
+
+
+def _expand_user(path):
+    """Expand paths that use ~."""
+    if isinstance(path, str):
+        path = os.path.expanduser(path)
+    elif isinstance(path, Path):
+        path = path.expanduser()
+    return path
+
+
+def _is_url(url):
+    """Check to see if *url* has a valid protocol."""
+    try:
+        return parse_url(url).scheme in _VALID_URLS
+    except Exception:
+        return False
+
+
+def _read_file(
+    filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
+):
+    """
+    Returns a GeoDataFrame from a file or URL.
+
+    Parameters
+    ----------
+    filename : str, path object or file-like object
+        Either the absolute or relative path to the file or URL to
+        be opened, or any object with a read() method (such as an open file
+        or StringIO)
+    bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
+        Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely
+        geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries
+        or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the
+        dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of
+        shapely geometry objects. Cannot be used with mask.
+    mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
+        Filter for features that intersect with the given dict-like geojson
+        geometry, GeoSeries, GeoDataFrame or shapely geometry.
+        CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
+        Cannot be used with bbox. If multiple geometries are passed, this will
+        first union all geometries, which may be computationally expensive.
+    columns : list, optional
+        List of column names to import from the data source. Column names
+        must exactly match the names in the data source. To avoid reading
+        any columns (besides the geometry column), pass an empty list-like.
+        By default reads all columns.
+    rows : int or slice, default None
+        Load in specific rows by passing an integer (first `n` rows) or a
+        slice() object.
+    engine : str,  "pyogrio" or "fiona"
+        The underlying library that is used to read the file. Currently, the
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
+    **kwargs :
+        Keyword args to be passed to the engine, and can be used to write
+        to multi-layer data, store data within archives (zip files), etc.
+        In case of the "pyogrio" engine, the keyword arguments are passed to
+        `pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
+        arguments are passed to fiona.open`. For more information on possible
+        keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
+
+
+    Examples
+    --------
+    >>> df = geopandas.read_file("nybb.shp")  # doctest: +SKIP
+
+    Specifying layer of GPKG:
+
+    >>> df = geopandas.read_file("file.gpkg", layer='cities')  # doctest: +SKIP
+
+    Reading only first 10 rows:
+
+    >>> df = geopandas.read_file("nybb.shp", rows=10)  # doctest: +SKIP
+
+    Reading only geometries intersecting ``mask``:
+
+    >>> df = geopandas.read_file("nybb.shp", mask=polygon)  # doctest: +SKIP
+
+    Reading only geometries intersecting ``bbox``:
+
+    >>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20))  # doctest: +SKIP
+
+    Returns
+    -------
+    :obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
+        If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.
+
+    Notes
+    -----
+    The format drivers will attempt to detect the encoding of your data, but
+    may fail. In this case, the proper encoding can be specified explicitly
+    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
+
+    When specifying a URL, geopandas will check if the server supports reading
+    partial data and in that case pass the URL as is to the underlying engine,
+    which will then use the network file system handler of GDAL to read from
+    the URL. Otherwise geopandas will download the data from the URL and pass
+    all data in-memory to the underlying engine.
+    If you need more control over how the URL is read, you can specify the
+    GDAL virtual filesystem manually (e.g. ``/vsicurl/https://...``). See the
+    GDAL documentation on filesystems for more details
+    (https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
+
+    """
+    engine = _check_engine(engine, "'read_file' function")
+
+    filename = _expand_user(filename)
+
+    from_bytes = False
+    if _is_url(filename):
+        # if it is a url that supports random access -> pass through to
+        # pyogrio/fiona as is (to support downloading only part of the file)
+        # otherwise still download manually because pyogrio/fiona don't support
+        # all types of urls (https://github.com/geopandas/geopandas/issues/2908)
+        with urllib.request.urlopen(filename) as response:
+            if not response.headers.get("Accept-Ranges") == "bytes":
+                filename = response.read()
+                from_bytes = True
+
+    if engine == "pyogrio":
+        return _read_file_pyogrio(
+            filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
+        )
+
+    elif engine == "fiona":
+        if pd.api.types.is_file_like(filename):
+            data = filename.read()
+            path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data
+            from_bytes = True
+        else:
+            path_or_bytes = filename
+
+        return _read_file_fiona(
+            path_or_bytes,
+            from_bytes,
+            bbox=bbox,
+            mask=mask,
+            columns=columns,
+            rows=rows,
+            **kwargs,
+        )
+
+    else:
+        raise ValueError(f"unknown engine '{engine}'")
+
+
+def _read_file_fiona(
+    path_or_bytes,
+    from_bytes,
+    bbox=None,
+    mask=None,
+    columns=None,
+    rows=None,
+    where=None,
+    **kwargs,
+):
+    if where is not None and not FIONA_GE_19:
+        raise NotImplementedError("where requires fiona 1.9+")
+
+    if columns is not None:
+        if "include_fields" in kwargs:
+            raise ValueError(
+                "Cannot specify both 'include_fields' and 'columns' keywords"
+            )
+        if not FIONA_GE_19:
+            raise NotImplementedError("'columns' keyword requires fiona 1.9+")
+        kwargs["include_fields"] = columns
+    elif "include_fields" in kwargs:
+        # alias to columns, as this variable is used below to specify column order
+        # in the dataframe creation
+        columns = kwargs["include_fields"]
+
+    if not from_bytes:
+        # Opening a file via URL or file-like-object above automatically detects a
+        # zipped file. In order to match that behavior, attempt to add a zip scheme
+        # if missing.
+        path_or_bytes = vsi_path(str(path_or_bytes))
+
+    if from_bytes:
+        reader = fiona.BytesCollection
+    else:
+        reader = fiona.open
+
+    with fiona_env():
+        with reader(path_or_bytes, **kwargs) as features:
+            crs = features.crs_wkt
+            # attempt to get EPSG code
+            try:
+                # fiona 1.9+
+                epsg = features.crs.to_epsg(confidence_threshold=100)
+                if epsg is not None:
+                    crs = epsg
+            except AttributeError:
+                # fiona <= 1.8
+                try:
+                    crs = features.crs["init"]
+                except (TypeError, KeyError):
+                    pass
+
+            # handle loading the bounding box
+            if bbox is not None:
+                if isinstance(bbox, (GeoDataFrame, GeoSeries)):
+                    bbox = tuple(bbox.to_crs(crs).total_bounds)
+                elif isinstance(bbox, BaseGeometry):
+                    bbox = bbox.bounds
+                assert len(bbox) == 4
+            # handle loading the mask
+            elif isinstance(mask, (GeoDataFrame, GeoSeries)):
+                mask = mapping(mask.to_crs(crs).union_all())
+            elif isinstance(mask, BaseGeometry):
+                mask = mapping(mask)
+
+            filters = {}
+            if bbox is not None:
+                filters["bbox"] = bbox
+            if mask is not None:
+                filters["mask"] = mask
+            if where is not None:
+                filters["where"] = where
+
+            # setup the data loading filter
+            if rows is not None:
+                if isinstance(rows, int):
+                    rows = slice(rows)
+                elif not isinstance(rows, slice):
+                    raise TypeError("'rows' must be an integer or a slice.")
+                f_filt = features.filter(rows.start, rows.stop, rows.step, **filters)
+            elif filters:
+                f_filt = features.filter(**filters)
+            else:
+                f_filt = features
+            # get list of columns
+            columns = columns or list(features.schema["properties"])
+            datetime_fields = [
+                k for (k, v) in features.schema["properties"].items() if v == "datetime"
+            ]
+            if (
+                kwargs.get("ignore_geometry", False)
+                or features.schema["geometry"] == "None"
+            ):
+                df = pd.DataFrame(
+                    [record["properties"] for record in f_filt], columns=columns
+                )
+            else:
+                df = GeoDataFrame.from_features(
+                    f_filt, crs=crs, columns=columns + ["geometry"]
+                )
+            for k in datetime_fields:
+                as_dt = None
+                # plain try catch for when pandas will raise in the future
+                # TODO we can tighten the exception type in future when it does
+                try:
+                    with warnings.catch_warnings():
+                        # pandas 2.x does not yet enforce this behaviour but raises a
+                        # warning  -> we want to to suppress this warning for our users,
+                        # and do this by turning it into an error so we take the
+                        # `except` code path to try again with utc=True
+                        warnings.filterwarnings(
+                            "error",
+                            "In a future version of pandas, parsing datetimes with "
+                            "mixed time zones will raise an error",
+                            FutureWarning,
+                        )
+                        as_dt = pd.to_datetime(df[k])
+                except Exception:
+                    pass
+                if as_dt is None or as_dt.dtype == "object":
+                    # if to_datetime failed, try again for mixed timezone offsets
+                    # This can still fail if there are invalid datetimes
+                    try:
+                        as_dt = pd.to_datetime(df[k], utc=True)
+                    except Exception:
+                        pass
+                # if to_datetime succeeded, round datetimes as
+                # fiona only supports up to ms precision (any microseconds are
+                # floating point rounding error)
+                if as_dt is not None and not (as_dt.dtype == "object"):
+                    if PANDAS_GE_20:
+                        df[k] = as_dt.dt.as_unit("ms")
+                    else:
+                        df[k] = as_dt.dt.round(freq="ms")
+            return df
+
+
+def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs):
+    import pyogrio
+
+    if rows is not None:
+        if isinstance(rows, int):
+            kwargs["max_features"] = rows
+        elif isinstance(rows, slice):
+            if rows.start is not None:
+                if rows.start < 0:
+                    raise ValueError(
+                        "Negative slice start not supported with the 'pyogrio' engine."
+                    )
+                kwargs["skip_features"] = rows.start
+            if rows.stop is not None:
+                kwargs["max_features"] = rows.stop - (rows.start or 0)
+            if rows.step is not None:
+                raise ValueError("slice with step is not supported")
+        else:
+            raise TypeError("'rows' must be an integer or a slice.")
+
+    if bbox is not None and mask is not None:
+        # match error message from Fiona
+        raise ValueError("mask and bbox can not be set together")
+
+    if bbox is not None:
+        if isinstance(bbox, (GeoDataFrame, GeoSeries)):
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            bbox = tuple(bbox.to_crs(crs).total_bounds)
+        elif isinstance(bbox, BaseGeometry):
+            bbox = bbox.bounds
+        if len(bbox) != 4:
+            raise ValueError("'bbox' should be a length-4 tuple.")
+
+    if mask is not None:
+        # NOTE: mask cannot be used at same time as bbox keyword
+        if isinstance(mask, (GeoDataFrame, GeoSeries)):
+            crs = pyogrio.read_info(path_or_bytes).get("crs")
+            if isinstance(path_or_bytes, IOBase):
+                path_or_bytes.seek(0)
+
+            mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
+        elif isinstance(mask, BaseGeometry):
+            mask = shapely.unary_union(mask)
+        elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
+            # convert GeoJSON to shapely geometry
+            mask = shapely.geometry.shape(mask)
+
+        kwargs["mask"] = mask
+
+    if kwargs.pop("ignore_geometry", False):
+        kwargs["read_geometry"] = False
+
+    # translate `ignore_fields`/`include_fields` keyword for back compat with fiona
+    if "ignore_fields" in kwargs and "include_fields" in kwargs:
+        raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
+    elif "ignore_fields" in kwargs:
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'ignore_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        ignore_fields = kwargs.pop("ignore_fields")
+        fields = pyogrio.read_info(path_or_bytes)["fields"]
+        include_fields = [col for col in fields if col not in ignore_fields]
+        kwargs["columns"] = include_fields
+    elif "include_fields" in kwargs:
+        # translate `include_fields` keyword for back compat with fiona engine
+        if kwargs.get("columns", None) is not None:
+            raise ValueError(
+                "Cannot specify both 'columns' and 'include_fields' keywords"
+            )
+        warnings.warn(
+            "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
+            "will be removed in a future release. You can use the 'columns' keyword "
+            "instead to select which columns to read.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        kwargs["columns"] = kwargs.pop("include_fields")
+
+    return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
+
+
+def _detect_driver(path):
+    """
+    Attempt to auto-detect driver based on the extension
+    """
+    try:
+        # in case the path is a file handle
+        path = path.name
+    except AttributeError:
+        pass
+    try:
+        return _EXTENSION_TO_DRIVER[Path(path).suffix.lower()]
+    except KeyError:
+        # Assume it is a shapefile folder for now. In the future,
+        # will likely raise an exception when the expected
+        # folder writing behavior is more clearly defined.
+        return "ESRI Shapefile"
+
+
+def _to_file(
+    df,
+    filename,
+    driver=None,
+    schema=None,
+    index=None,
+    mode="w",
+    crs=None,
+    engine=None,
+    metadata=None,
+    **kwargs,
+):
+    """
+    Write this GeoDataFrame to an OGR data source
+
+    A dictionary of supported OGR providers is available via:
+
+    >>> import pyogrio
+    >>> pyogrio.list_drivers()  # doctest: +SKIP
+
+    Parameters
+    ----------
+    df : GeoDataFrame to be written
+    filename : string
+        File path or file handle to write to. The path may specify a
+        GDAL VSI scheme.
+    driver : string, default None
+        The OGR format driver used to write the vector file.
+        If not specified, it attempts to infer it from the file extension.
+        If no extension is specified, it saves ESRI Shapefile to a folder.
+    schema : dict, default None
+        If specified, the schema dictionary is passed to Fiona to
+        better control how the file is written. If None, GeoPandas
+        will determine the schema based on each column's dtype.
+        Not supported for the "pyogrio" engine.
+    index : bool, default None
+        If True, write index into one or more columns (for MultiIndex).
+        Default None writes the index into one or more columns only if
+        the index is named, is a MultiIndex, or has a non-integer data
+        type. If False, no index is written.
+
+        .. versionadded:: 0.7
+            Previously the index was not written.
+    mode : string, default 'w'
+        The write mode, 'w' to overwrite the existing file and 'a' to append;
+        when using the pyogrio engine, you can also pass ``append=True``.
+        Not all drivers support appending. For the fiona engine, the drivers
+        that support appending are listed in fiona.supported_drivers or
+        https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py.
+        For the pyogrio engine, you should be able to use any driver that
+        is available in your installation of GDAL that supports append
+        capability; see the specific driver entry at
+        https://gdal.org/drivers/vector/index.html for more information.
+    crs : pyproj.CRS, default None
+        If specified, the CRS is passed to Fiona to
+        better control how the file is written. If None, GeoPandas
+        will determine the crs based on crs df attribute.
+        The value can be anything accepted
+        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
+        such as an authority string (eg "EPSG:4326") or a WKT string.
+    engine : str,  "pyogrio" or "fiona"
+        The underlying library that is used to read the file. Currently, the
+        supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
+        installed, otherwise tries "fiona". Engine can also be set globally
+        with the ``geopandas.options.io_engine`` option.
+    metadata : dict[str, str], default None
+        Optional metadata to be stored in the file. Keys and values must be
+        strings. Only supported for the "GPKG" driver
+        (requires Fiona >= 1.9 or pyogrio >= 0.6).
+    **kwargs :
+        Keyword args to be passed to the engine, and can be used to write
+        to multi-layer data, store data within archives (zip files), etc.
+        In case of the "fiona" engine, the keyword arguments are passed to
+        fiona.open`. For more information on possible keywords, type:
+        ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
+        the keyword arguments are passed to `pyogrio.write_dataframe`.
+
+    Notes
+    -----
+    The format drivers will attempt to detect the encoding of your data, but
+    may fail. In this case, the proper encoding can be specified explicitly
+    by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
+    """
+    engine = _check_engine(engine, "'to_file' method")
+
+    filename = _expand_user(filename)
+
+    if index is None:
+        # Determine if index attribute(s) should be saved to file
+        # (only if they are named or are non-integer)
+        index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype)
+    if index:
+        df = df.reset_index(drop=False)
+
+    if driver is None:
+        driver = _detect_driver(filename)
+
+    if driver == "ESRI Shapefile" and any(len(c) > 10 for c in df.columns.tolist()):
+        warnings.warn(
+            "Column names longer than 10 characters will be truncated when saved to "
+            "ESRI Shapefile.",
+            stacklevel=3,
+        )
+
+    if (df.dtypes == "geometry").sum() > 1:
+        raise ValueError(
+            "GeoDataFrame contains multiple geometry columns but GeoDataFrame.to_file "
+            "supports only a single geometry column. Use a GeoDataFrame.to_parquet or "
+            "GeoDataFrame.to_feather, drop additional geometry columns or convert them "
+            "to a supported format like a well-known text (WKT) using "
+            "`GeoSeries.to_wkt()`.",
+        )
+    _check_metadata_supported(metadata, engine, driver)
+
+    if mode not in ("w", "a"):
+        raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
+
+    if engine == "pyogrio":
+        _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
+    elif engine == "fiona":
+        _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
+    else:
+        raise ValueError(f"unknown engine '{engine}'")
+
+
+def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
+    if not HAS_PYPROJ and crs:
+        raise ImportError(
+            "The 'pyproj' package is required to write a file with a CRS, but it is not"
+            " installed or does not import correctly."
+        )
+
+    if schema is None:
+        schema = infer_schema(df)
+
+    if crs:
+        from pyproj import CRS
+
+        crs = CRS.from_user_input(crs)
+    else:
+        crs = df.crs
+
+    with fiona_env():
+        crs_wkt = None
+        try:
+            gdal_version = Version(
+                fiona.env.get_gdal_release_name().strip("e")
+            )  # GH3147
+        except (AttributeError, ValueError):
+            gdal_version = Version("2.0.0")  # just assume it is not the latest
+        if gdal_version >= Version("3.0.0") and crs:
+            crs_wkt = crs.to_wkt()
+        elif crs:
+            crs_wkt = crs.to_wkt("WKT1_GDAL")
+        with fiona.open(
+            filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
+        ) as colxn:
+            if metadata is not None:
+                colxn.update_tags(metadata)
+            colxn.writerecords(df.iterfeatures())
+
+
+def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
+    import pyogrio
+
+    if schema is not None:
+        raise ValueError(
+            "The 'schema' argument is not supported with the 'pyogrio' engine."
+        )
+
+    if mode == "a":
+        kwargs["append"] = True
+
+    if crs is not None:
+        raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
+
+    # for the fiona engine, this check is done in gdf.iterfeatures()
+    if not df.columns.is_unique:
+        raise ValueError("GeoDataFrame cannot contain duplicated column names.")
+
+    pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
+
+
+def infer_schema(df):
+    from collections import OrderedDict
+
+    # TODO: test pandas string type and boolean type once released
+    types = {
+        "Int32": "int32",
+        "int32": "int32",
+        "Int64": "int",
+        "string": "str",
+        "boolean": "bool",
+    }
+
+    def convert_type(column, in_type):
+        if in_type == object:
+            return "str"
+        if in_type.name.startswith("datetime64"):
+            # numpy datetime type regardless of frequency
+            return "datetime"
+        if str(in_type) in types:
+            out_type = types[str(in_type)]
+        else:
+            out_type = type(np.zeros(1, in_type).item()).__name__
+        if out_type == "long":
+            out_type = "int"
+        return out_type
+
+    properties = OrderedDict(
+        [
+            (col, convert_type(col, _type))
+            for col, _type in zip(df.columns, df.dtypes)
+            if col != df._geometry_column_name
+        ]
+    )
+
+    if df.empty:
+        warnings.warn(
+            "You are attempting to write an empty DataFrame to file. "
+            "For some drivers, this operation may fail.",
+            UserWarning,
+            stacklevel=3,
+        )
+
+    # Since https://github.com/Toblerity/Fiona/issues/446 resolution,
+    # Fiona allows a list of geometry types
+    geom_types = _geometry_types(df)
+
+    schema = {"geometry": geom_types, "properties": properties}
+
+    return schema
+
+
+def _geometry_types(df):
+    """
+    Determine the geometry types in the GeoDataFrame for the schema.
+    """
+    geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
+    geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
+    geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
+    geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
+    geom_types = geom_types_3D + geom_types_2D
+
+    if len(geom_types) == 0:
+        # Default geometry type supported by Fiona
+        # (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
+        return "Unknown"
+
+    if len(geom_types) == 1:
+        geom_types = geom_types[0]
+
+    return geom_types
+
+
+def _list_layers(filename) -> pd.DataFrame:
+    """List layers available in a file.
+
+    Provides an overview of layers available in a file or URL together with their
+    geometry types. When supported by the data source, this includes both spatial and
+    non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
+    column being ``None``. GeoPandas will not read such layers but they can be read into
+    a pd.DataFrame using :func:`pyogrio.read_dataframe`.
+
+    Parameters
+    ----------
+    filename : str, path object or file-like object
+        Either the absolute or relative path to the file or URL to
+        be opened, or any object with a read() method (such as an open file
+        or StringIO)
+
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame with columns "name" and "geometry_type" and one row per layer.
+    """
+    _import_pyogrio()
+    _check_pyogrio("list_layers")
+
+    import pyogrio
+
+    return pd.DataFrame(
+        pyogrio.list_layers(filename), columns=["name", "geometry_type"]
+    )
@@ -0,0 +1,473 @@
+import warnings
+from contextlib import contextmanager
+from functools import lru_cache
+
+import pandas as pd
+
+import shapely
+import shapely.wkb
+
+from geopandas import GeoDataFrame
+
+
+@contextmanager
+def _get_conn(conn_or_engine):
+    """
+    Yield a connection within a transaction context.
+
+    Engine.begin() returns a Connection with an implicit Transaction while
+    Connection.begin() returns the Transaction. This helper will always return a
+    Connection with an implicit (possibly nested) Transaction.
+
+    Parameters
+    ----------
+    conn_or_engine : Connection or Engine
+        A sqlalchemy Connection or Engine instance
+    Returns
+    -------
+    Connection
+    """
+    from sqlalchemy.engine.base import Connection, Engine
+
+    if isinstance(conn_or_engine, Connection):
+        if not conn_or_engine.in_transaction():
+            with conn_or_engine.begin():
+                yield conn_or_engine
+        else:
+            yield conn_or_engine
+    elif isinstance(conn_or_engine, Engine):
+        with conn_or_engine.begin() as conn:
+            yield conn
+    else:
+        raise ValueError(f"Unknown Connectable: {conn_or_engine}")
+
+
+def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
+    """
+    Transforms a pandas DataFrame into a GeoDataFrame.
+    The column 'geom_col' must be a geometry column in WKB representation.
+    To be used to convert df based on pd.read_sql to gdf.
+    Parameters
+    ----------
+    df : DataFrame
+        pandas DataFrame with geometry column in WKB representation.
+    geom_col : string, default 'geom'
+        column name to convert to shapely geometries
+    crs : pyproj.CRS, optional
+        CRS to use for the returned GeoDataFrame. The value can be anything accepted
+        by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
+        such as an authority string (eg "EPSG:4326") or a WKT string.
+        If not set, tries to determine CRS from the SRID associated with the
+        first geometry in the database, and assigns that to all geometries.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the database to query.
+    Returns
+    -------
+    GeoDataFrame
+    """
+
+    if geom_col not in df:
+        raise ValueError("Query missing geometry column '{}'".format(geom_col))
+
+    if df.columns.to_list().count(geom_col) > 1:
+        raise ValueError(
+            f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
+            "one geometry column is allowed."
+        )
+
+    geoms = df[geom_col].dropna()
+
+    if not geoms.empty:
+        load_geom_bytes = shapely.wkb.loads
+        """Load from Python 3 binary."""
+
+        def load_geom_text(x):
+            """Load from binary encoded as text."""
+            return shapely.wkb.loads(str(x), hex=True)
+
+        if isinstance(geoms.iat[0], bytes):
+            load_geom = load_geom_bytes
+        else:
+            load_geom = load_geom_text
+
+        df[geom_col] = geoms = geoms.apply(load_geom)
+        if crs is None:
+            srid = shapely.get_srid(geoms.iat[0])
+            # if no defined SRID in geodatabase, returns SRID of 0
+            if srid != 0:
+                try:
+                    spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
+                except pd.errors.DatabaseError:
+                    warning_msg = (
+                        f"Could not find the spatial reference system table "
+                        f"(spatial_ref_sys) in PostGIS."
+                        f"Trying epsg:{srid} as a fallback."
+                    )
+                    warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                    crs = "epsg:{}".format(srid)
+                else:
+                    if not spatial_ref_sys_df.empty:
+                        auth_name = spatial_ref_sys_df["auth_name"].item()
+                        crs = f"{auth_name}:{srid}"
+                    else:
+                        warning_msg = (
+                            f"Could not find srid {srid} in the "
+                            f"spatial_ref_sys table. "
+                            f"Trying epsg:{srid} as a fallback."
+                        )
+                        warnings.warn(warning_msg, UserWarning, stacklevel=3)
+                        crs = "epsg:{}".format(srid)
+
+    return GeoDataFrame(df, crs=crs, geometry=geom_col)
+
+
+def _read_postgis(
+    sql,
+    con,
+    geom_col="geom",
+    crs=None,
+    index_col=None,
+    coerce_float=True,
+    parse_dates=None,
+    params=None,
+    chunksize=None,
+):
+    """
+    Returns a GeoDataFrame corresponding to the result of the query
+    string, which must contain a geometry column in WKB representation.
+
+    It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
+    Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
+
+    Parameters
+    ----------
+    sql : string
+        SQL query to execute in selecting entries from database, or name
+        of the table to read from the database.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the database to query.
+    geom_col : string, default 'geom'
+        column name to convert to shapely geometries
+    crs : dict or str, optional
+        CRS to use for the returned GeoDataFrame; if not set, tries to
+        determine CRS from the SRID associated with the first geometry in
+        the database, and assigns that to all geometries.
+    chunksize : int, default None
+        If specified, return an iterator where chunksize is the number of rows to
+        include in each chunk.
+
+    See the documentation for pandas.read_sql for further explanation
+    of the following parameters:
+    index_col, coerce_float, parse_dates, params, chunksize
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    PostGIS
+
+    >>> from sqlalchemy import create_engine  # doctest: +SKIP
+    >>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
+    >>> con = create_engine(db_connection_url)  # doctest: +SKIP
+    >>> sql = "SELECT geom, highway FROM roads"
+    >>> df = geopandas.read_postgis(sql, con)  # doctest: +SKIP
+
+    SpatiaLite
+
+    >>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
+    >>> df = geopandas.read_postgis(sql, con)  # doctest: +SKIP
+    """
+
+    if chunksize is None:
+        # read all in one chunk and return a single GeoDataFrame
+        df = pd.read_sql(
+            sql,
+            con,
+            index_col=index_col,
+            coerce_float=coerce_float,
+            parse_dates=parse_dates,
+            params=params,
+            chunksize=chunksize,
+        )
+        return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
+
+    else:
+        # read data in chunks and return a generator
+        df_generator = pd.read_sql(
+            sql,
+            con,
+            index_col=index_col,
+            coerce_float=coerce_float,
+            parse_dates=parse_dates,
+            params=params,
+            chunksize=chunksize,
+        )
+        return (
+            _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
+        )
+
+
+def _get_geometry_type(gdf):
+    """
+    Get basic geometry type of a GeoDataFrame. See more info from:
+    https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
+
+    Following rules apply:
+     - if geometries all share the same geometry-type,
+       geometries are inserted with the given GeometryType with following types:
+        - Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
+          GeometryCollection.
+        - LinearRing geometries will be converted into LineString -objects.
+     - in all other cases, geometries will be inserted with type GEOMETRY:
+        - a mix of Polygons and MultiPolygons in GeoSeries
+        - a mix of Points and LineStrings in GeoSeries
+        - geometry is of type GeometryCollection,
+          such as GeometryCollection([Point, LineStrings])
+     - if any of the geometries has Z-coordinate, all records will
+       be written with 3D.
+    """
+    geom_types = list(gdf.geometry.geom_type.unique())
+    has_curve = False
+
+    for gt in geom_types:
+        if gt is None:
+            continue
+        elif "LinearRing" in gt:
+            has_curve = True
+
+    if len(geom_types) == 1:
+        if has_curve:
+            target_geom_type = "LINESTRING"
+        else:
+            if geom_types[0] is None:
+                raise ValueError("No valid geometries in the data.")
+            else:
+                target_geom_type = geom_types[0].upper()
+    else:
+        target_geom_type = "GEOMETRY"
+
+    # Check for 3D-coordinates
+    if any(gdf.geometry.has_z):
+        target_geom_type += "Z"
+
+    return target_geom_type, has_curve
+
+
+def _get_srid_from_crs(gdf):
+    """
+    Get EPSG code from CRS if available. If not, return 0.
+    """
+
+    # Use geoalchemy2 default for srid
+    # Note: undefined srid in PostGIS is 0
+    srid = None
+    warning_msg = (
+        "Could not parse CRS from the GeoDataFrame. "
+        "Inserting data without defined CRS."
+    )
+    if gdf.crs is not None:
+        try:
+            for confidence in (100, 70, 25):
+                srid = gdf.crs.to_epsg(min_confidence=confidence)
+                if srid is not None:
+                    break
+                auth_srid = gdf.crs.to_authority(
+                    auth_name="ESRI", min_confidence=confidence
+                )
+                if auth_srid is not None:
+                    srid = int(auth_srid[1])
+                    break
+        except Exception:
+            warnings.warn(warning_msg, UserWarning, stacklevel=2)
+
+    if srid is None:
+        srid = 0
+        warnings.warn(warning_msg, UserWarning, stacklevel=2)
+
+    return srid
+
+
+def _convert_linearring_to_linestring(gdf, geom_name):
+    from shapely.geometry import LineString
+
+    # Todo: Use shapely function once it's implemented:
+    # https://github.com/shapely/shapely/issues/1617
+
+    mask = gdf.geom_type == "LinearRing"
+    gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
+        lambda geom: LineString(geom)
+    )
+    return gdf
+
+
+def _convert_to_ewkb(gdf, geom_name, srid):
+    """Convert geometries to ewkb."""
+    geoms = shapely.to_wkb(
+        shapely.set_srid(gdf[geom_name].values._data, srid=srid),
+        hex=True,
+        include_srid=True,
+    )
+
+    # The gdf will warn that the geometry column doesn't hold in-memory geometries
+    # now that they are EWKB, so convert back to a regular dataframe to avoid warning
+    # the user that the dtypes are unexpected.
+    df = pd.DataFrame(gdf, copy=False)
+    df[geom_name] = geoms
+    return df
+
+
+def _psql_insert_copy(tbl, conn, keys, data_iter):
+    import csv
+    import io
+
+    s_buf = io.StringIO()
+    writer = csv.writer(s_buf)
+    writer.writerows(data_iter)
+    s_buf.seek(0)
+
+    columns = ", ".join('"{}"'.format(k) for k in keys)
+
+    dbapi_conn = conn.connection
+    sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
+        tbl.table.schema, tbl.table.name, columns
+    )
+    with dbapi_conn.cursor() as cur:
+        # Use psycopg method if it's available
+        if hasattr(cur, "copy") and callable(cur.copy):
+            with cur.copy(sql) as copy:
+                copy.write(s_buf.read())
+        else:  # otherwise use psycopg2 method
+            cur.copy_expert(sql, s_buf)
+
+
+def _write_postgis(
+    gdf,
+    name,
+    con,
+    schema=None,
+    if_exists="fail",
+    index=False,
+    index_label=None,
+    chunksize=None,
+    dtype=None,
+):
+    """
+    Upload GeoDataFrame into PostGIS database.
+
+    This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
+    Python driver (e.g. psycopg2) to be installed.
+
+    Parameters
+    ----------
+    name : str
+        Name of the target table.
+    con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
+        Active connection to the PostGIS database.
+    if_exists : {'fail', 'replace', 'append'}, default 'fail'
+        How to behave if the table already exists:
+
+        - fail: Raise a ValueError.
+        - replace: Drop the table before inserting new values.
+        - append: Insert new values to the existing table.
+    schema : string, optional
+        Specify the schema. If None, use default schema: 'public'.
+    index : bool, default True
+        Write DataFrame index as a column.
+        Uses *index_label* as the column name in the table.
+    index_label : string or sequence, default None
+        Column label for index column(s).
+        If None is given (default) and index is True,
+        then the index names are used.
+    chunksize : int, optional
+        Rows will be written in batches of this size at a time.
+        By default, all rows will be written at once.
+    dtype : dict of column name to SQL type, default None
+        Specifying the datatype for columns.
+        The keys should be the column names and the values
+        should be the SQLAlchemy types.
+
+    Examples
+    --------
+
+    >>> from sqlalchemy import create_engine  # doctest: +SKIP
+    >>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
+/mydatabase";)  # doctest: +SKIP
+    >>> gdf.to_postgis("my_table", engine)  # doctest: +SKIP
+    """
+    try:
+        from geoalchemy2 import Geometry
+        from sqlalchemy import text
+    except ImportError:
+        raise ImportError("'to_postgis()' requires geoalchemy2 package.")
+
+    gdf = gdf.copy()
+    geom_name = gdf.geometry.name
+
+    # Get srid
+    srid = _get_srid_from_crs(gdf)
+
+    # Get geometry type and info whether data contains LinearRing.
+    geometry_type, has_curve = _get_geometry_type(gdf)
+
+    # Build dtype with Geometry
+    if dtype is not None:
+        dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
+    else:
+        dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
+
+    # Convert LinearRing geometries to LineString
+    if has_curve:
+        gdf = _convert_linearring_to_linestring(gdf, geom_name)
+
+    # Convert geometries to EWKB
+    gdf = _convert_to_ewkb(gdf, geom_name, srid)
+
+    if schema is not None:
+        schema_name = schema
+    else:
+        schema_name = "public"
+
+    if if_exists == "append":
+        # Check that the geometry srid matches with the current GeoDataFrame
+        with _get_conn(con) as connection:
+            # Only check SRID if table exists
+            if connection.dialect.has_table(connection, name, schema):
+                target_srid = connection.execute(
+                    text(
+                        "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                            schema=schema_name, table=name, geom_col=geom_name
+                        )
+                    )
+                ).fetchone()[0]
+
+                if target_srid != srid:
+                    msg = (
+                        "The CRS of the target table (EPSG:{epsg_t}) differs from the "
+                        "CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
+                            epsg_t=target_srid, epsg_src=srid
+                        )
+                    )
+                    raise ValueError(msg)
+
+    with _get_conn(con) as connection:
+        gdf.to_sql(
+            name,
+            connection,
+            schema=schema_name,
+            if_exists=if_exists,
+            index=index,
+            index_label=index_label,
+            chunksize=chunksize,
+            dtype=dtype,
+            method=_psql_insert_copy,
+        )
+
+
+@lru_cache
+def _get_spatial_ref_sys_df(con, srid):
+    spatial_ref_sys_sql = (
+        f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
+    )
+    return pd.read_sql(spatial_ref_sys_sql, con)
@@ -0,0 +1,100 @@
+"""
+Script to create the data and write legacy storage (pickle) files.
+
+Based on pandas' generate_legacy_storage_files.py script.
+
+To use this script, create an environment for which you want to
+generate pickles, activate the environment, and run this script as:
+
+$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
+    geopandas/geopandas/io/tests/data/pickle/ pickle
+
+This script generates a storage file for the current arch, system,
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of geopandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with master). These are then compared.
+
+"""
+
+import os
+import pickle
+import platform
+import sys
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+
+
+def create_pickle_data():
+    """create the pickle data"""
+
+    # custom geometry column name
+    gdf_the_geom = geopandas.GeoDataFrame(
+        {"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        geometry="the_geom",
+    )
+
+    # with crs
+    gdf_crs = geopandas.GeoDataFrame(
+        {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        crs="EPSG:4326",
+    )
+
+    return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
+
+
+def platform_name():
+    return "_".join(
+        [
+            str(geopandas.__version__),
+            "pd-" + str(pd.__version__),
+            "py-" + str(platform.python_version()),
+            str(platform.machine()),
+            str(platform.system().lower()),
+        ]
+    )
+
+
+def write_legacy_pickles(output_dir):
+    print(
+        "This script generates a storage file for the current arch, system, "
+        "and python version"
+    )
+    print("geopandas version: {}").format(geopandas.__version__)
+    print("   output dir    : {}".format(output_dir))
+    print("   storage format: pickle")
+
+    pth = "{}.pickle".format(platform_name())
+
+    fh = open(os.path.join(output_dir, pth), "wb")
+    pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
+    fh.close()
+
+    print("created pickle file: {}".format(pth))
+
+
+def main():
+    if len(sys.argv) != 3:
+        sys.exit(
+            "Specify output directory and storage type: generate_legacy_"
+            "storage_files.py <output_dir> <storage_type> "
+        )
+
+    output_dir = str(sys.argv[1])
+    storage_type = str(sys.argv[2])
+
+    if storage_type == "pickle":
+        write_legacy_pickles(output_dir=output_dir)
+    else:
+        sys.exit("storage_type must be one of {'pickle'}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,328 @@
+import os
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+import geopandas
+from geopandas import GeoDataFrame
+
+from .test_file import FIONA_MARK, PYOGRIO_MARK
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+
+
+# *****************************************
+# TEST TOOLING
+
+
+class _ExpectedError:
+    def __init__(self, error_type, error_message_match):
+        self.type = error_type
+        self.match = error_message_match
+
+
+class _ExpectedErrorBuilder:
+    def __init__(self, composite_key):
+        self.composite_key = composite_key
+
+    def to_raise(self, error_type, error_match):
+        _expected_exceptions[self.composite_key] = _ExpectedError(
+            error_type, error_match
+        )
+
+
+def _expect_writing(gdf, ogr_driver):
+    return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
+
+
+def _composite_key(gdf, ogr_driver):
+    return frozenset([id(gdf), ogr_driver])
+
+
+def _expected_error_on(gdf, ogr_driver):
+    composite_key = _composite_key(gdf, ogr_driver)
+    return _expected_exceptions.get(composite_key, None)
+
+
+# *****************************************
+# TEST CASES
+_geodataframes_to_write = []
+_expected_exceptions = {}
+_CRS = "epsg:4326"
+
+# ------------------
+# gdf with Points
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPoint([city_hall_balcony, city_hall_council_chamber]),
+        MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Points and MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
+)
+_geodataframes_to_write.append(gdf)
+# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
+# Polygon/MultiPolygon but does not mention Point/MultiPoint
+# see https://www.gdal.org/drv_shapefile.html
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with LineStrings
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with LineStrings and MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygons
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1]},
+    crs=_CRS,
+    geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygon and MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_boundaries,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and 3D Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometries only
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with all shape types mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with all 2D shape types and 3D Point mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6, 7]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+        point_3D,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+
+@pytest.fixture(params=_geodataframes_to_write)
+def geodataframe(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("GeoJSON", ".geojson"),
+        ("ESRI Shapefile", ".shp"),
+        ("GPKG", ".gpkg"),
+        ("SQLite", ".sqlite"),
+    ]
+)
+def ogr_driver(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        pytest.param("fiona", marks=FIONA_MARK),
+        pytest.param("pyogrio", marks=PYOGRIO_MARK),
+    ]
+)
+def engine(request):
+    return request.param
+
+
+def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
+    driver, ext = ogr_driver
+    output_file = os.path.join(str(tmpdir), "output_file" + ext)
+    write_kwargs = {}
+    if driver == "SQLite":
+        write_kwargs["spatialite"] = True
+
+        # This if statement can be removed once minimal fiona version >= 1.8.20
+        if engine == "fiona":
+            from packaging.version import Version
+
+            import fiona
+
+            if Version(fiona.__version__) < Version("1.8.20"):
+                pytest.skip("SQLite driver only available from version 1.8.20")
+
+        # If only 3D Points, geometry_type needs to be specified for spatialite at the
+        # moment. This if can be removed once the following PR is released:
+        # https://github.com/geopandas/pyogrio/pull/223
+        if (
+            engine == "pyogrio"
+            and len(geodataframe == 2)
+            and geodataframe.geometry[0] is None
+            and geodataframe.geometry[1] is not None
+            and geodataframe.geometry[1].has_z
+        ):
+            write_kwargs["geometry_type"] = "Point Z"
+
+    expected_error = _expected_error_on(geodataframe, driver)
+    if expected_error:
+        with pytest.raises(
+            RuntimeError, match="Failed to write record|Could not add feature to layer"
+        ):
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+    else:
+        if driver == "SQLite" and engine == "pyogrio":
+            try:
+                geodataframe.to_file(
+                    output_file, driver=driver, engine=engine, **write_kwargs
+                )
+            except ValueError as e:
+                if "unrecognized option 'SPATIALITE'" in str(e):
+                    pytest.xfail(
+                        "pyogrio wheels from PyPI do not come with SpatiaLite support. "
+                        f"Error: {e}"
+                    )
+                raise
+        else:
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+
+        reloaded = geopandas.read_file(output_file, engine=engine)
+
+        if driver == "GeoJSON" and engine == "pyogrio":
+            # For GeoJSON files, the int64 column comes back as int32
+            reloaded["a"] = reloaded["a"].astype("int64")
+
+        assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
@@ -0,0 +1,537 @@
+import contextlib
+import json
+import os
+import pathlib
+from packaging.version import Version
+
+import numpy as np
+
+import shapely
+from shapely import MultiPoint, Point, box
+
+from geopandas import GeoDataFrame, GeoSeries
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+pytest.importorskip("pyarrow")
+import pyarrow as pa
+import pyarrow.compute as pc
+from pyarrow import feather
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+def pa_table(table):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return table._pa_table
+    else:
+        return pa.table(table)
+
+
+def pa_array(array):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return array._pa_array
+    else:
+        return pa.array(array)
+
+
+def assert_table_equal(left, right, check_metadata=True):
+    geom_type = left["geometry"].type
+    # in case of Points (directly the inner fixed_size_list or struct type)
+    # -> there are NaNs for empties -> we need to compare them separately
+    # and then fill, because pyarrow.Table.equals considers NaNs as not equal
+    if pa.types.is_fixed_size_list(geom_type):
+        left_values = left["geometry"].chunk(0).values
+        right_values = right["geometry"].chunk(0).values
+        assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
+        left_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
+            type=left["geometry"].type,
+        )
+        right_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
+            type=right["geometry"].type,
+        )
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    elif pa.types.is_struct(geom_type):
+        left_arr = left["geometry"].chunk(0)
+        right_arr = right["geometry"].chunk(0)
+
+        for i in range(left_arr.type.num_fields):
+            assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
+
+        left_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
+                )
+                for i in range(left_arr.type.num_fields)
+            ],
+            fields=list(left["geometry"].type),
+        )
+        right_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
+                )
+                for i in range(right_arr.type.num_fields)
+            ],
+            fields=list(right["geometry"].type),
+        )
+
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    if left.equals(right, check_metadata=check_metadata):
+        return
+
+    if not left.schema.equals(right.schema):
+        raise AssertionError(
+            "Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                left.schema, right.schema
+            )
+        )
+
+    if check_metadata:
+        if not left.schema.equals(right.schema, check_metadata=True):
+            if not left.schema.metadata == right.schema.metadata:
+                raise AssertionError(
+                    "Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                        left.schema.metadata, right.schema.metadata
+                    )
+                )
+        for col in left.schema.names:
+            assert left.schema.field(col).equals(
+                right.schema.field(col), check_metadata=True
+            )
+
+    for col in left.column_names:
+        a_left = pa.concat_arrays(left.column(col).chunks)
+        a_right = pa.concat_arrays(right.column(col).chunks)
+        if not a_left.equals(a_right):
+            raise AssertionError(
+                "Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
+            )
+
+    raise AssertionError("Tables not equal for unknown reason")
+
+
+@pytest.mark.skipif(
+    shapely.geos_version < (3, 9, 0),
+    reason="Checking for empty is buggy with GEOS<3.9",
+)  # an old GEOS is installed in the CI builds with the defaults channel
+@pytest.mark.parametrize(
+    "dim",
+    [
+        "xy",
+        pytest.param(
+            "xyz",
+            marks=pytest.mark.skipif(
+                shapely.geos_version < (3, 10, 0),
+                reason="Cannot write 3D geometries with GEOS<3.10",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+@pytest.mark.parametrize(
+    "geometry_encoding, interleaved",
+    [("WKB", None), ("geoarrow", True), ("geoarrow", False)],
+    ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
+)
+def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    # Read the expected data
+    if geometry_encoding == "WKB":
+        filename = f"example-{suffix}-wkb.arrow"
+    else:
+        filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
+    expected = feather.read_table(base_path / filename)
+
+    # GeoDataFrame -> Arrow Table
+    result = pa_table(
+        df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
+    )
+    # remove the "pandas" metadata
+    result = result.replace_schema_metadata(None)
+
+    mask_nonempty = None
+    if (
+        geometry_encoding == "WKB"
+        and dim == "xyz"
+        and geometry_type.startswith("multi")
+    ):
+        # for collections with z dimension, drop the empties because those don't
+        # roundtrip correctly to WKB
+        # (https://github.com/libgeos/geos/issues/888)
+        mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
+        result = result.filter(mask_nonempty)
+        expected = expected.filter(mask_nonempty)
+
+    assert_table_equal(result, expected)
+
+    # GeoSeries -> Arrow array
+    if geometry_encoding != "WKB" and geometry_type == "point":
+        # for points, we again have to handle NaNs separately, we already did that
+        # for table so let's just skip this part
+        return
+    result_arr = pa_array(
+        df.geometry.to_arrow(
+            geometry_encoding=geometry_encoding, interleaved=interleaved
+        )
+    )
+    if mask_nonempty is not None:
+        result_arr = result_arr.filter(mask_nonempty)
+    assert result_arr.equals(expected["geometry"].chunk(0))
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_multiple_geometry_crs(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+    gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
+
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
+    meta1 = json.loads(
+        result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta1["crs"])["id"]["code"] == 4326
+    meta2 = json.loads(
+        result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta2["crs"])["id"]["code"] == 3857
+
+    roundtripped = GeoDataFrame.from_arrow(result)
+    assert_geodataframe_equal(gdf, roundtripped)
+    assert gdf.geometry.crs == "epsg:4326"
+    assert gdf.geom2.crs == "epsg:3857"
+
+
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_series_name_crs(encoding):
+    pytest.importorskip("pyproj")
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == "geom"
+    assert (
+        field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
+        if encoding == "WKB"
+        else b"geoarrow.polygon"
+    )
+    meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
+    assert json.loads(meta["crs"])["id"]["code"] == 4326
+
+    # ensure it also works without a name
+    gser = GeoSeries([box(0, 0, 10, 10)])
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == ""
+
+
+def test_geoarrow_unsupported_encoding():
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.to_arrow(geometry_encoding="invalid")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.geometry.to_arrow(geometry_encoding="invalid")
+
+
+def test_geoarrow_mixed_geometry_types():
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
+        crs="epsg:4326",
+    )
+
+    with pytest.raises(ValueError, match="Geometry type combination is not supported"):
+        gdf.to_arrow(geometry_encoding="geoarrow")
+
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
+        crs="epsg:4326",
+    )
+    result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert (
+        result.schema.field("geometry").metadata[b"ARROW:extension:name"]
+        == b"geoarrow.multipoint"
+    )
+
+
+@pytest.mark.parametrize("geom_type", ["point", "polygon"])
+@pytest.mark.parametrize(
+    "encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
+)
+def test_geoarrow_missing(encoding, interleaved, geom_type):
+    # dummy test for single geometry type until missing values are included
+    # in the test data for test_geoarrow_export
+    gdf = GeoDataFrame(
+        geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
+        crs="epsg:4326",
+    )
+    if (
+        encoding == "geoarrow"
+        and geom_type == "point"
+        and interleaved
+        and Version(pa.__version__) < Version("15.0.0")
+    ):
+        with pytest.raises(
+            ValueError,
+            match="Converting point geometries with missing values is not supported",
+        ):
+            gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
+        return
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
+    assert result["geometry"].null_count == 1
+    assert result["geometry"].is_null().to_pylist() == [False, True]
+
+
+def test_geoarrow_include_z():
+    gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+    assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
+
+    gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+
+@contextlib.contextmanager
+def with_geoarrow_extension_types():
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    gp.register_extension_types()
+    try:
+        yield
+    finally:
+        gp.unregister_extension_types()
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_export_with_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
+        assert isinstance(result1["geometry"].type, pa.ExtensionType)
+
+        result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        assert isinstance(result2["geometry"].type, pa.ExtensionType)
+
+        result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        assert isinstance(result3["geometry"].type, pa.ExtensionType)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    [
+        "point",
+        "linestring",
+        "polygon",
+        "multipoint",
+        "multilinestring",
+        "multipolygon",
+    ],
+)
+def test_geoarrow_import(geometry_type, dim):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df)
+    df.geometry.crs = None
+
+    table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
+    result1 = GeoDataFrame.from_arrow(table1)
+    assert_geodataframe_equal(result1, df)
+
+    table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
+    result2 = GeoDataFrame.from_arrow(table2)
+    assert_geodataframe_equal(result2, df)
+
+    table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
+    result3 = GeoDataFrame.from_arrow(table3)
+    assert_geodataframe_equal(result3, df)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_import_geometry_column(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
+    gdf["centroid"] = gdf.geometry.centroid
+
+    result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
+    assert_geodataframe_equal(result, gdf)
+    assert result.active_geometry_name == "geometry"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
+    )
+    assert result.active_geometry_name == "centroid"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
+    )
+    assert result.active_geometry_name == "centroid"
+    assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
+
+
+def test_geoarrow_import_missing_geometry():
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+    with pytest.raises(ValueError, match="No geometry column found"):
+        GeoDataFrame.from_arrow(table)
+
+    with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
+        GeoSeries.from_arrow(table["a"].chunk(0))
+
+
+def test_geoarrow_import_capsule_interface():
+    # ensure we can import non-pyarrow object
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+
+    result = GeoDataFrame.from_arrow(gdf.to_arrow())
+    assert_geodataframe_equal(result, gdf)
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_import_from_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    pytest.importorskip("pyproj")
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df, crs="EPSG:3857")
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="WKB"))
+        )
+        assert_geodataframe_equal(result1, df)
+
+        result2 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        )
+        assert_geodataframe_equal(result2, df)
+
+        result3 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        )
+        assert_geodataframe_equal(result3, df)
+
+
+def test_geoarrow_import_geoseries():
+    pytest.importorskip("pyproj")
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
+
+    with with_geoarrow_extension_types():
+        arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        # the name is lost when going through a pyarrow.Array
+        ser.name = "name"
+        arr = gp.array(ser.to_arrow())
+        result = GeoSeries.from_arrow(arr)
+        assert result.name is None
+        # we can specify the name as one of the kwargs
+        result = GeoSeries.from_arrow(arr, name="test")
+        assert_geoseries_equal(result, ser)
+
+
+def test_geoarrow_import_unknown_geoarrow_type():
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+    table = pa_table(gdf.to_arrow())
+    schema = table.schema
+    new_field = schema.field("geometry").with_metadata(
+        {
+            b"ARROW:extension:name": b"geoarrow.unknown",
+            b"ARROW:extension:metadata": b"{}",
+        }
+    )
+
+    new_schema = pa.schema([schema.field(0), new_field])
+    new_table = table.cast(new_schema)
+
+    with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
+        GeoDataFrame.from_arrow(new_table)
@@ -0,0 +1,306 @@
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+from geopandas import GeoDataFrame
+from geopandas.io.file import infer_schema
+
+import pytest
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+linestring_3D = LineString(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5546126200639, 45.5086813829106, 300),
+        (-73.5540185061397, 45.5084409343852, 300),
+    )
+)
+polygon_3D = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5535801792994, 45.5089539203786, 300),
+        (-73.5541107525234, 45.5091983609661, 300),
+    )
+)
+
+
+def test_infer_schema_only_points():
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_points_and_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPoint", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint(
+                [city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
+            )
+        ]
+    )
+
+    assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
+
+
+def test_infer_schema_only_linestrings():
+    df = GeoDataFrame(geometry=city_hall_walls)
+
+    assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
+
+
+def test_infer_schema_linestrings_and_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiLineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
+
+    assert infer_schema(df) == {
+        "geometry": "MultiLineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_polygons():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
+
+    assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_polygons_and_multipolygons():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPolygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipolygons():
+    df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
+
+    assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_multiple_shape_types():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_shape_type():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+            point_3D,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "3D Point",
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Point():
+    df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Point", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Points():
+    df = GeoDataFrame(geometry=[point_3D, point_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_mixed_3D_linestring():
+    df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D LineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_linestrings():
+    df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": "3D LineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Polygon():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Polygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Polygons():
+    df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_2D_point():
+    df = GeoDataFrame(geometry=[None, city_hall_entrance])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_3D_point():
+    df = GeoDataFrame(geometry=[None, point_3D])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_all():
+    df = GeoDataFrame(geometry=[None, None])
+
+    # None geometry type in then replaced by 'Unknown'
+    # (default geometry type supported by Fiona)
+    assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
+
+
+@pytest.mark.parametrize(
+    "array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
+)
+def test_infer_schema_int32(array_data, dtype):
+    int32col = pd.array(data=array_data, dtype=dtype)
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int32_column"] = int32col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int32_column", "int32")]),
+    }
+
+
+def test_infer_schema_int64():
+    int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int64_column"] = int64col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int64_column", "int")]),
+    }
@@ -0,0 +1,56 @@
+"""
+See generate_legacy_storage_files.py for the creation of the legacy files.
+
+"""
+
+import glob
+import os
+import pathlib
+
+import pandas as pd
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+@pytest.fixture(scope="module")
+def current_pickle_data():
+    # our current version pickle data
+    from .generate_legacy_storage_files import create_pickle_data
+
+    return create_pickle_data()
+
+
+files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
+
+
+@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
+def legacy_pickle(request):
+    return request.param
+
+
+@pytest.mark.skip(
+    reason=(
+        "shapely 2.0/pygeos-based unpickling currently only works for "
+        "shapely-2.0/pygeos-written files"
+    ),
+)
+def test_legacy_pickles(current_pickle_data, legacy_pickle):
+    result = pd.read_pickle(legacy_pickle)
+
+    for name, value in result.items():
+        expected = current_pickle_data[name]
+        assert_geodataframe_equal(value, expected)
+
+
+def test_round_trip_current(tmpdir, current_pickle_data):
+    data = current_pickle_data
+
+    for name, value in data.items():
+        path = str(tmpdir / "{}.pickle".format(name))
+        value.to_pickle(path)
+        result = pd.read_pickle(path)
+        assert_geodataframe_equal(result, value)
+        assert isinstance(result.has_sindex, bool)
@@ -0,0 +1,878 @@
+"""
+Tests here include reading/writing to different types of spatial databases.
+The spatial database tests may not work without additional system
+configuration. postGIS tests require a test database to have been setup;
+see geopandas.tests.util for more information.
+"""
+
+import os
+import warnings
+from importlib.util import find_spec
+
+import pandas as pd
+
+import geopandas
+import geopandas._compat as compat
+from geopandas import GeoDataFrame, read_file, read_postgis
+from geopandas._compat import HAS_PYPROJ
+from geopandas.io.sql import _get_conn as get_conn
+from geopandas.io.sql import _write_postgis as write_postgis
+
+import pytest
+from geopandas.tests.util import (
+    create_postgis,
+    create_spatialite,
+    mock,
+    validate_boro_df,
+)
+
+try:
+    from sqlalchemy import text
+except ImportError:
+    # Avoid local imports for text in all sqlalchemy tests
+    # all tests using text use engine_postgis, which ensures sqlalchemy is available
+    text = str
+
+
+@pytest.fixture
+def df_nybb(nybb_filename):
+    df = read_file(nybb_filename)
+    return df
+
+
+def check_available_postgis_drivers() -> list[str]:
+    """Work out which of psycopg2 and psycopg are available.
+    This prevents tests running if the relevant package isn't installed
+    (rather than being skipped, as skips are treated as failures during postgis CI)
+    """
+    drivers = []
+    if find_spec("psycopg"):
+        drivers.append("psycopg")
+    if find_spec("psycopg2"):
+        drivers.append("psycopg2")
+    return drivers
+
+
+POSTGIS_DRIVERS = check_available_postgis_drivers()
+
+
+def prepare_database_credentials() -> dict:
+    """Gather postgres connection credentials from environment variables."""
+    return {
+        "dbname": "test_geopandas",
+        "user": os.environ.get("PGUSER"),
+        "password": os.environ.get("PGPASSWORD"),
+        "host": os.environ.get("PGHOST"),
+        "port": os.environ.get("PGPORT"),
+    }
+
+
+@pytest.fixture()
+def connection_postgis(request):
+    """Create a postgres connection using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
+    psycopg = pytest.importorskip(request.param)
+
+    try:
+        con = psycopg.connect(**prepare_database_credentials())
+    except psycopg.OperationalError:
+        pytest.skip("Cannot connect with postgresql database")
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message="pandas only supports SQLAlchemy connectable.*"
+        )
+        yield con
+    con.close()
+
+
+@pytest.fixture()
+def engine_postgis(request):
+    """
+    Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
+    """
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+    from sqlalchemy.engine.url import URL
+
+    credentials = prepare_database_credentials()
+    try:
+        con = sqlalchemy.create_engine(
+            URL.create(
+                drivername=f"postgresql+{request.param}",
+                username=credentials["user"],
+                database=credentials["dbname"],
+                password=credentials["password"],
+                host=credentials["host"],
+                port=credentials["port"],
+            )
+        )
+        con.connect()
+    except Exception:
+        pytest.skip("Cannot connect with postgresql database")
+
+    yield con
+    con.dispose()
+
+
+@pytest.fixture()
+def connection_spatialite():
+    """
+    Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
+
+    `The sqlite3 module must be built with loadable extension support
+    <https://docs.python.org/3/library/sqlite3.html#f1>`_ and
+    `SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
+    must be available on the system as a SQLite module.
+    Packages available on Anaconda meet requirements.
+
+    Exceptions
+    ----------
+    ``AttributeError`` on missing support for loadable SQLite extensions
+    ``sqlite3.OperationalError`` on missing SpatiaLite
+    """
+    sqlite3 = pytest.importorskip("sqlite3")
+    try:
+        with sqlite3.connect(":memory:") as con:
+            con.enable_load_extension(True)
+            con.load_extension("mod_spatialite")
+            con.execute("SELECT InitSpatialMetaData(TRUE)")
+    except Exception:
+        con.close()
+        pytest.skip("Cannot setup spatialite database")
+
+    yield con
+    con.close()
+
+
+def drop_table_if_exists(conn_or_engine, table):
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+
+    if sqlalchemy.inspect(conn_or_engine).has_table(table):
+        metadata = sqlalchemy.MetaData()
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", message="Did not recognize type 'geometry' of column.*"
+            )
+            metadata.reflect(conn_or_engine)
+        table = metadata.tables.get(table)
+        if table is not None:
+            table.drop(conn_or_engine, checkfirst=True)
+
+
+@pytest.fixture
+def df_mixed_single_and_multi():
+    from shapely.geometry import LineString, MultiLineString, Point
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0), (1, 1)]),
+                MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+                Point(0, 1),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_geom_collection():
+    from shapely.geometry import GeometryCollection, LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (1, 1), (0, 1)]),
+                        LineString([(0, 0), (1, 1)]),
+                        Point(0, 0),
+                    ]
+                )
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_linear_ring():
+    from shapely.geometry import LinearRing
+
+    df = geopandas.GeoDataFrame(
+        {"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
+    )
+    return df
+
+
+@pytest.fixture
+def df_3D_geoms():
+    from shapely.geometry import LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0, 0), (1, 1, 1)]),
+                Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
+                Point(0, 1, 2),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+class TestIO:
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_get_conn(self, engine_postgis):
+        Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
+
+        engine = engine_postgis
+        with get_conn(engine) as output:
+            assert isinstance(output, Connection)
+        with engine.connect() as conn:
+            with get_conn(conn) as output:
+                assert isinstance(output, Connection)
+        with pytest.raises(ValueError):
+            with get_conn(object()):
+                pass
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
+        """Tests that a SELECT {geom} AS {some_other_geom} works."""
+        con = connection_postgis
+        orig_geom = "geom"
+        out_geom = "the_geom"
+        create_postgis(con, df_nybb, geom_col=orig_geom)
+
+        sql = """SELECT borocode, boroname, shape_leng, shape_area,
+                    {} as {} FROM nybb;""".format(
+            orig_geom, out_geom
+        )
+        df = read_postgis(sql, con, geom_col=out_geom)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
+        """Tests that an SRID can be read from a geodatabase (GH #451)."""
+        con = connection_postgis
+        crs = "epsg:4269"
+        df_reproj = df_nybb.to_crs(crs)
+        create_postgis(con, df_reproj, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        assert df.crs == crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
+        """Tests that a user specified CRS overrides the geodatabase SRID."""
+        con = connection_postgis
+        orig_crs = df_nybb.crs
+        create_postgis(con, df_nybb, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, crs=orig_crs)
+
+        validate_boro_df(df)
+        assert df.crs == orig_crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
+        """Tests that geometry with NULL is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        df_nybb.geometry.iat[0] = None
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    def test_read_postgis_binary(self, connection_spatialite, df_nybb):
+        """Tests that geometry read as binary is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument"""
+        chunksize = 2
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_default(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        engine = engine_postgis
+        table = "nybb"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
+        """Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
+        engine = engine_postgis
+        table = "aTestTable"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text('SELECT * FROM "{table}";'.format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        with engine_postgis.begin() as con:
+            table = "nybb_con"
+
+            # If table exists, delete it before trying to write with defaults
+            drop_table_if_exists(con, table)
+
+            # Write to db
+            write_postgis(df_nybb, con=con, name=table, if_exists="fail")
+            # Validate
+            sql = text("SELECT * FROM {table};".format(table=table))
+            df = read_postgis(sql, con, geom_col="geometry")
+            validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that uploading the same table raises error when: if_replace='fail'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        try:
+            write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        except ValueError as e:
+            if "already exists" in str(e):
+                pass
+            else:
+                raise e
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that replacing a table is possible when: if_replace='replace'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Overwrite
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that appending to existing table produces correct results when:
+        if_replace='append'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        orig_rows, orig_cols = df_nybb.shape
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        new_rows, new_cols = df.shape
+
+        # There should be twice as many rows in the new table
+        assert new_rows == orig_rows * 2, (
+            "There should be {target} rows,found: {current}".format(
+                target=orig_rows * 2, current=new_rows
+            ),
+        )
+        # Number of columns should stay the same
+        assert new_cols == orig_cols, (
+            "There should be {target} columns,found: {current}".format(
+                target=orig_cols, current=new_cols
+            ),
+        )
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS without CRS information.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb.geometry.array.crs = None
+        with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
+            write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate that srid is -1
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 0, "SRID should be 0, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
+        CRS information (GH #2414).
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb_esri = df_nybb.to_crs("ESRI:102003")
+        write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
+        # Validate that srid is 102003
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_geometry_collection(
+        self, engine_postgis, df_geom_collection
+    ):
+        """
+        Tests that writing a mix of different geometry types is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+
+        assert geom_type.upper() == "GEOMETRYCOLLECTION"
+        assert df.geom_type.unique()[0] == "GeometryCollection"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_mixed_geometry_types(
+        self, engine_postgis, df_mixed_single_and_multi
+    ):
+        """
+        Tests that writing a mix of single and MultiGeometries is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
+        )
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
+        """
+        Tests that writing a LinearRing.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+
+        assert geom_type.upper() == "LINESTRING"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
+        """
+        Tests writing a LinearRing works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi,
+            con=engine,
+            name=table,
+            if_exists="replace",
+            chunksize=1,
+        )
+        # Validate row count
+        sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
+        with engine.connect() as conn:
+            row_cnt = conn.execute(sql).fetchone()[0]
+        assert row_cnt == 3
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema_when_table_exists(
+        self, engine_postgis, df_nybb
+    ):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        try:
+            write_postgis(
+                df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
+            )
+            # Validate
+            sql = text(
+                "SELECT * FROM {schema}.{table};".format(
+                    schema=schema_to_use, table=table
+                )
+            )
+
+            df = read_postgis(sql, engine, geom_col="geometry")
+            validate_boro_df(df)
+
+        # Should raise a ValueError when table exists
+        except ValueError:
+            pass
+
+        # Try with replace flag on
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
+        """
+        Tests writing a geometries with 3 dimensions works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
+
+        # Check that all geometries have 3 dimensions
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert list(df.geometry.has_z) == [True, True, True]
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_row_order(self, engine_postgis, df_nybb):
+        """
+        Tests that the row order in db table follows the order of the original frame.
+        """
+        engine = engine_postgis
+
+        table = "row_order_test"
+        correct_order = df_nybb["BoroCode"].tolist()
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert df["BoroCode"].tolist() == correct_order
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_before_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that insert works with if_exists='append' when table does not exist yet.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_with_different_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that the warning is raised if table CRS differs from frame.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Reproject
+        df_nybb2 = df_nybb.to_crs(epsg=4326)
+
+        # Should raise error when appending
+        with pytest.raises(ValueError, match="CRS of the target table"):
+            write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_without_crs(self, engine_postgis, df_nybb):
+        # This test was included in #3328 when the default value for no
+        # CRS was changed from an SRID of -1 to 0. This resolves issues
+        # of appending dataframes to postgis that have no CRS as postgis
+        # no CRS value is 0.
+        engine = engine_postgis
+        df_nybb = df_nybb.set_crs(None, allow_override=True)
+        table = "nybb"
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # append another dataframe with no crs
+
+        df_nybb2 = df_nybb
+        write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    @pytest.mark.xfail(
+        compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
+        reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
+    )
+    def test_duplicate_geometry_column_fails(self, engine_postgis):
+        """
+        Tests that a ValueError is raised if an SQL query returns two geometry columns.
+        """
+        engine = engine_postgis
+
+        sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
+
+        with pytest.raises(ValueError):
+            read_postgis(sql, engine, geom_col="geom")
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+    @mock.patch("shapely.get_srid")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
+        # mock a non-existent srid for edge case if shapely has an srid
+        # not present in postgis table.
+        pyproj = pytest.importorskip("pyproj")
+
+        mock_get_srid.return_value = 99999
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
+            with pytest.warns(UserWarning, match="Could not find srid 99999"):
+                read_postgis(sql, con)
+
+    @mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_no_spatial_ref_sys_table_in_postgis(
+        self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
+    ):
+        # mock for a non-existent spatial_ref_sys database
+
+        mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb, srid=4326)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.warns(
+            UserWarning, match="Could not find the spatial reference system table"
+        ):
+            df = read_postgis(sql, con)
+
+        assert df.crs == "EPSG:4326"
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument with non epsg crs"""
+        chunksize = 2
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
@@ -0,0 +1,118 @@
+"""Vendored, cut down version of pyogrio/util.py for use with fiona"""
+
+import re
+import sys
+from urllib.parse import urlparse
+
+
+def vsi_path(path: str) -> str:
+    """
+    Ensure path is a local path or a GDAL-compatible vsi path.
+
+    """
+
+    # path is already in GDAL format
+    if path.startswith("/vsi"):
+        return path
+
+    # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
+    # URL schemes
+    if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
+        if not path.split("!")[0].endswith(".zip"):
+            return path
+
+        # prefix then allow to proceed with remaining parsing
+        path = f"zip://{path}"
+
+    path, archive, scheme = _parse_uri(path)
+
+    if scheme or archive or path.endswith(".zip"):
+        return _construct_vsi_path(path, archive, scheme)
+
+    return path
+
+
+# Supported URI schemes and their mapping to GDAL's VSI suffix.
+SCHEMES = {
+    "file": "file",
+    "zip": "zip",
+    "tar": "tar",
+    "gzip": "gzip",
+    "http": "curl",
+    "https": "curl",
+    "ftp": "curl",
+    "s3": "s3",
+    "gs": "gs",
+    "az": "az",
+    "adls": "adls",
+    "adl": "adls",  # fsspec uses this
+    "hdfs": "hdfs",
+    "webhdfs": "webhdfs",
+    # GDAL additionally supports oss and swift for remote filesystems, but
+    # those are for now not added as supported URI
+}
+
+CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"}
+
+
+def _parse_uri(path: str):
+    """
+    Parse a URI
+
+    Returns a tuples of (path, archive, scheme)
+
+    path : str
+        Parsed path. Includes the hostname and query string in the case
+        of a URI.
+    archive : str
+        Parsed archive path.
+    scheme : str
+        URI scheme such as "https" or "zip+s3".
+    """
+    parts = urlparse(path, allow_fragments=False)
+
+    # if the scheme is not one of GDAL's supported schemes, return raw path
+    if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")):
+        return path, "", ""
+
+    # we have a URI
+    path = parts.path
+    scheme = parts.scheme or ""
+
+    if parts.query:
+        path += "?" + parts.query
+
+    if parts.scheme and parts.netloc:
+        path = parts.netloc + path
+
+    parts = path.split("!")
+    path = parts.pop() if parts else ""
+    archive = parts.pop() if parts else ""
+    return (path, archive, scheme)
+
+
+def _construct_vsi_path(path, archive, scheme) -> str:
+    """Convert a parsed path to a GDAL VSI path"""
+
+    prefix = ""
+    suffix = ""
+    schemes = scheme.split("+")
+
+    if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
+        schemes.insert(0, "zip")
+
+    if schemes:
+        prefix = "/".join(
+            "vsi{0}".format(SCHEMES[p]) for p in schemes if p and p != "file"
+        )
+
+        if schemes[-1] in CURLSCHEMES:
+            suffix = f"{schemes[-1]}://"
+
+    if prefix:
+        if archive:
+            return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/"))
+        else:
+            return "/{}/{}{}".format(prefix, suffix, path)
+
+    return path