refactor: excel parse

2026-04-16 10:01:11 +08:00
parent 680ecc320f
commit f62f95ec02
7941 changed files with 2899112 additions and 0 deletions
@@ -0,0 +1,100 @@
+"""
+Script to create the data and write legacy storage (pickle) files.
+
+Based on pandas' generate_legacy_storage_files.py script.
+
+To use this script, create an environment for which you want to
+generate pickles, activate the environment, and run this script as:
+
+$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
+    geopandas/geopandas/io/tests/data/pickle/ pickle
+
+This script generates a storage file for the current arch, system,
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of geopandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with master). These are then compared.
+
+"""
+
+import os
+import pickle
+import platform
+import sys
+
+import pandas as pd
+
+from shapely.geometry import Point
+
+import geopandas
+
+
+def create_pickle_data():
+    """create the pickle data"""
+
+    # custom geometry column name
+    gdf_the_geom = geopandas.GeoDataFrame(
+        {"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        geometry="the_geom",
+    )
+
+    # with crs
+    gdf_crs = geopandas.GeoDataFrame(
+        {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
+        crs="EPSG:4326",
+    )
+
+    return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
+
+
+def platform_name():
+    return "_".join(
+        [
+            str(geopandas.__version__),
+            "pd-" + str(pd.__version__),
+            "py-" + str(platform.python_version()),
+            str(platform.machine()),
+            str(platform.system().lower()),
+        ]
+    )
+
+
+def write_legacy_pickles(output_dir):
+    print(
+        "This script generates a storage file for the current arch, system, "
+        "and python version"
+    )
+    print("geopandas version: {}").format(geopandas.__version__)
+    print("   output dir    : {}".format(output_dir))
+    print("   storage format: pickle")
+
+    pth = "{}.pickle".format(platform_name())
+
+    fh = open(os.path.join(output_dir, pth), "wb")
+    pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
+    fh.close()
+
+    print("created pickle file: {}".format(pth))
+
+
+def main():
+    if len(sys.argv) != 3:
+        sys.exit(
+            "Specify output directory and storage type: generate_legacy_"
+            "storage_files.py <output_dir> <storage_type> "
+        )
+
+    output_dir = str(sys.argv[1])
+    storage_type = str(sys.argv[2])
+
+    if storage_type == "pickle":
+        write_legacy_pickles(output_dir=output_dir)
+    else:
+        sys.exit("storage_type must be one of {'pickle'}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,328 @@
+import os
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+import geopandas
+from geopandas import GeoDataFrame
+
+from .test_file import FIONA_MARK, PYOGRIO_MARK
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+
+
+# *****************************************
+# TEST TOOLING
+
+
+class _ExpectedError:
+    def __init__(self, error_type, error_message_match):
+        self.type = error_type
+        self.match = error_message_match
+
+
+class _ExpectedErrorBuilder:
+    def __init__(self, composite_key):
+        self.composite_key = composite_key
+
+    def to_raise(self, error_type, error_match):
+        _expected_exceptions[self.composite_key] = _ExpectedError(
+            error_type, error_match
+        )
+
+
+def _expect_writing(gdf, ogr_driver):
+    return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
+
+
+def _composite_key(gdf, ogr_driver):
+    return frozenset([id(gdf), ogr_driver])
+
+
+def _expected_error_on(gdf, ogr_driver):
+    composite_key = _composite_key(gdf, ogr_driver)
+    return _expected_exceptions.get(composite_key, None)
+
+
+# *****************************************
+# TEST CASES
+_geodataframes_to_write = []
+_expected_exceptions = {}
+_CRS = "epsg:4326"
+
+# ------------------
+# gdf with Points
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPoint([city_hall_balcony, city_hall_council_chamber]),
+        MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Points and MultiPoints
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
+)
+_geodataframes_to_write.append(gdf)
+# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
+# Polygon/MultiPolygon but does not mention Point/MultiPoint
+# see https://www.gdal.org/drv_shapefile.html
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with LineStrings
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with LineStrings and MultiLineStrings
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygons
+gdf = GeoDataFrame(
+    {"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1]},
+    crs=_CRS,
+    geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with Polygon and MultiPolygon
+gdf = GeoDataFrame(
+    {"a": [1, 2]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_boundaries,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometry and 3D Point
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with null geometries only
+gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
+_geodataframes_to_write.append(gdf)
+
+# ------------------
+# gdf with all shape types mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+# ------------------
+# gdf with all 2D shape types and 3D Point mixed together
+gdf = GeoDataFrame(
+    {"a": [1, 2, 3, 4, 5, 6, 7]},
+    crs=_CRS,
+    geometry=[
+        MultiPolygon((city_hall_boundaries, vauquelin_place)),
+        city_hall_entrance,
+        MultiLineString(city_hall_walls),
+        city_hall_walls[0],
+        MultiPoint([city_hall_entrance, city_hall_balcony]),
+        city_hall_balcony,
+        point_3D,
+    ],
+)
+_geodataframes_to_write.append(gdf)
+# Not supported by 'ESRI Shapefile' driver
+_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
+
+
+@pytest.fixture(params=_geodataframes_to_write)
+def geodataframe(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("GeoJSON", ".geojson"),
+        ("ESRI Shapefile", ".shp"),
+        ("GPKG", ".gpkg"),
+        ("SQLite", ".sqlite"),
+    ]
+)
+def ogr_driver(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        pytest.param("fiona", marks=FIONA_MARK),
+        pytest.param("pyogrio", marks=PYOGRIO_MARK),
+    ]
+)
+def engine(request):
+    return request.param
+
+
+def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
+    driver, ext = ogr_driver
+    output_file = os.path.join(str(tmpdir), "output_file" + ext)
+    write_kwargs = {}
+    if driver == "SQLite":
+        write_kwargs["spatialite"] = True
+
+        # This if statement can be removed once minimal fiona version >= 1.8.20
+        if engine == "fiona":
+            from packaging.version import Version
+
+            import fiona
+
+            if Version(fiona.__version__) < Version("1.8.20"):
+                pytest.skip("SQLite driver only available from version 1.8.20")
+
+        # If only 3D Points, geometry_type needs to be specified for spatialite at the
+        # moment. This if can be removed once the following PR is released:
+        # https://github.com/geopandas/pyogrio/pull/223
+        if (
+            engine == "pyogrio"
+            and len(geodataframe == 2)
+            and geodataframe.geometry[0] is None
+            and geodataframe.geometry[1] is not None
+            and geodataframe.geometry[1].has_z
+        ):
+            write_kwargs["geometry_type"] = "Point Z"
+
+    expected_error = _expected_error_on(geodataframe, driver)
+    if expected_error:
+        with pytest.raises(
+            RuntimeError, match="Failed to write record|Could not add feature to layer"
+        ):
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+    else:
+        if driver == "SQLite" and engine == "pyogrio":
+            try:
+                geodataframe.to_file(
+                    output_file, driver=driver, engine=engine, **write_kwargs
+                )
+            except ValueError as e:
+                if "unrecognized option 'SPATIALITE'" in str(e):
+                    pytest.xfail(
+                        "pyogrio wheels from PyPI do not come with SpatiaLite support. "
+                        f"Error: {e}"
+                    )
+                raise
+        else:
+            geodataframe.to_file(
+                output_file, driver=driver, engine=engine, **write_kwargs
+            )
+
+        reloaded = geopandas.read_file(output_file, engine=engine)
+
+        if driver == "GeoJSON" and engine == "pyogrio":
+            # For GeoJSON files, the int64 column comes back as int32
+            reloaded["a"] = reloaded["a"].astype("int64")
+
+        assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
@@ -0,0 +1,537 @@
+import contextlib
+import json
+import os
+import pathlib
+from packaging.version import Version
+
+import numpy as np
+
+import shapely
+from shapely import MultiPoint, Point, box
+
+from geopandas import GeoDataFrame, GeoSeries
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+
+pytest.importorskip("pyarrow")
+import pyarrow as pa
+import pyarrow.compute as pc
+from pyarrow import feather
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+def pa_table(table):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return table._pa_table
+    else:
+        return pa.table(table)
+
+
+def pa_array(array):
+    if Version(pa.__version__) < Version("14.0.0"):
+        return array._pa_array
+    else:
+        return pa.array(array)
+
+
+def assert_table_equal(left, right, check_metadata=True):
+    geom_type = left["geometry"].type
+    # in case of Points (directly the inner fixed_size_list or struct type)
+    # -> there are NaNs for empties -> we need to compare them separately
+    # and then fill, because pyarrow.Table.equals considers NaNs as not equal
+    if pa.types.is_fixed_size_list(geom_type):
+        left_values = left["geometry"].chunk(0).values
+        right_values = right["geometry"].chunk(0).values
+        assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
+        left_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
+            type=left["geometry"].type,
+        )
+        right_geoms = pa.FixedSizeListArray.from_arrays(
+            pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
+            type=right["geometry"].type,
+        )
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    elif pa.types.is_struct(geom_type):
+        left_arr = left["geometry"].chunk(0)
+        right_arr = right["geometry"].chunk(0)
+
+        for i in range(left_arr.type.num_fields):
+            assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
+
+        left_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
+                )
+                for i in range(left_arr.type.num_fields)
+            ],
+            fields=list(left["geometry"].type),
+        )
+        right_geoms = pa.StructArray.from_arrays(
+            [
+                pc.replace_with_mask(
+                    right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
+                )
+                for i in range(right_arr.type.num_fields)
+            ],
+            fields=list(right["geometry"].type),
+        )
+
+        left = left.set_column(1, left.schema.field("geometry"), left_geoms)
+        right = right.set_column(1, right.schema.field("geometry"), right_geoms)
+
+    if left.equals(right, check_metadata=check_metadata):
+        return
+
+    if not left.schema.equals(right.schema):
+        raise AssertionError(
+            "Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                left.schema, right.schema
+            )
+        )
+
+    if check_metadata:
+        if not left.schema.equals(right.schema, check_metadata=True):
+            if not left.schema.metadata == right.schema.metadata:
+                raise AssertionError(
+                    "Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
+                        left.schema.metadata, right.schema.metadata
+                    )
+                )
+        for col in left.schema.names:
+            assert left.schema.field(col).equals(
+                right.schema.field(col), check_metadata=True
+            )
+
+    for col in left.column_names:
+        a_left = pa.concat_arrays(left.column(col).chunks)
+        a_right = pa.concat_arrays(right.column(col).chunks)
+        if not a_left.equals(a_right):
+            raise AssertionError(
+                "Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
+            )
+
+    raise AssertionError("Tables not equal for unknown reason")
+
+
+@pytest.mark.skipif(
+    shapely.geos_version < (3, 9, 0),
+    reason="Checking for empty is buggy with GEOS<3.9",
+)  # an old GEOS is installed in the CI builds with the defaults channel
+@pytest.mark.parametrize(
+    "dim",
+    [
+        "xy",
+        pytest.param(
+            "xyz",
+            marks=pytest.mark.skipif(
+                shapely.geos_version < (3, 10, 0),
+                reason="Cannot write 3D geometries with GEOS<3.10",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+@pytest.mark.parametrize(
+    "geometry_encoding, interleaved",
+    [("WKB", None), ("geoarrow", True), ("geoarrow", False)],
+    ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
+)
+def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    # Read the expected data
+    if geometry_encoding == "WKB":
+        filename = f"example-{suffix}-wkb.arrow"
+    else:
+        filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
+    expected = feather.read_table(base_path / filename)
+
+    # GeoDataFrame -> Arrow Table
+    result = pa_table(
+        df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
+    )
+    # remove the "pandas" metadata
+    result = result.replace_schema_metadata(None)
+
+    mask_nonempty = None
+    if (
+        geometry_encoding == "WKB"
+        and dim == "xyz"
+        and geometry_type.startswith("multi")
+    ):
+        # for collections with z dimension, drop the empties because those don't
+        # roundtrip correctly to WKB
+        # (https://github.com/libgeos/geos/issues/888)
+        mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
+        result = result.filter(mask_nonempty)
+        expected = expected.filter(mask_nonempty)
+
+    assert_table_equal(result, expected)
+
+    # GeoSeries -> Arrow array
+    if geometry_encoding != "WKB" and geometry_type == "point":
+        # for points, we again have to handle NaNs separately, we already did that
+        # for table so let's just skip this part
+        return
+    result_arr = pa_array(
+        df.geometry.to_arrow(
+            geometry_encoding=geometry_encoding, interleaved=interleaved
+        )
+    )
+    if mask_nonempty is not None:
+        result_arr = result_arr.filter(mask_nonempty)
+    assert result_arr.equals(expected["geometry"].chunk(0))
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_multiple_geometry_crs(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+    gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
+
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
+    meta1 = json.loads(
+        result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta1["crs"])["id"]["code"] == 4326
+    meta2 = json.loads(
+        result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
+    )
+    assert json.loads(meta2["crs"])["id"]["code"] == 3857
+
+    roundtripped = GeoDataFrame.from_arrow(result)
+    assert_geodataframe_equal(gdf, roundtripped)
+    assert gdf.geometry.crs == "epsg:4326"
+    assert gdf.geom2.crs == "epsg:3857"
+
+
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_series_name_crs(encoding):
+    pytest.importorskip("pyproj")
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == "geom"
+    assert (
+        field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
+        if encoding == "WKB"
+        else b"geoarrow.polygon"
+    )
+    meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
+    assert json.loads(meta["crs"])["id"]["code"] == 4326
+
+    # ensure it also works without a name
+    gser = GeoSeries([box(0, 0, 10, 10)])
+    schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
+    field = pa.Field._import_from_c_capsule(schema_capsule)
+    assert field.name == ""
+
+
+def test_geoarrow_unsupported_encoding():
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.to_arrow(geometry_encoding="invalid")
+
+    with pytest.raises(ValueError, match="Expected geometry encoding"):
+        gdf.geometry.to_arrow(geometry_encoding="invalid")
+
+
+def test_geoarrow_mixed_geometry_types():
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
+        crs="epsg:4326",
+    )
+
+    with pytest.raises(ValueError, match="Geometry type combination is not supported"):
+        gdf.to_arrow(geometry_encoding="geoarrow")
+
+    gdf = GeoDataFrame(
+        {"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
+        crs="epsg:4326",
+    )
+    result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert (
+        result.schema.field("geometry").metadata[b"ARROW:extension:name"]
+        == b"geoarrow.multipoint"
+    )
+
+
+@pytest.mark.parametrize("geom_type", ["point", "polygon"])
+@pytest.mark.parametrize(
+    "encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
+)
+def test_geoarrow_missing(encoding, interleaved, geom_type):
+    # dummy test for single geometry type until missing values are included
+    # in the test data for test_geoarrow_export
+    gdf = GeoDataFrame(
+        geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
+        crs="epsg:4326",
+    )
+    if (
+        encoding == "geoarrow"
+        and geom_type == "point"
+        and interleaved
+        and Version(pa.__version__) < Version("15.0.0")
+    ):
+        with pytest.raises(
+            ValueError,
+            match="Converting point geometries with missing values is not supported",
+        ):
+            gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
+        return
+    result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
+    assert result["geometry"].null_count == 1
+    assert result["geometry"].is_null().to_pylist() == [False, True]
+
+
+def test_geoarrow_include_z():
+    gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+    assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
+
+    gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
+    assert table["geometry"].type.value_field.name == "xyz"
+    assert table["geometry"].type.list_size == 3
+
+    table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
+    assert table["geometry"].type.value_field.name == "xy"
+    assert table["geometry"].type.list_size == 2
+
+
+@contextlib.contextmanager
+def with_geoarrow_extension_types():
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    gp.register_extension_types()
+    try:
+        yield
+    finally:
+        gp.unregister_extension_types()
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_export_with_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df["row_number"] = df["row_number"].astype("int32")
+    df = GeoDataFrame(df)
+    df.geometry.array.crs = None
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
+        assert isinstance(result1["geometry"].type, pa.ExtensionType)
+
+        result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        assert isinstance(result2["geometry"].type, pa.ExtensionType)
+
+        result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        assert isinstance(result3["geometry"].type, pa.ExtensionType)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    [
+        "point",
+        "linestring",
+        "polygon",
+        "multipoint",
+        "multilinestring",
+        "multipolygon",
+    ],
+)
+def test_geoarrow_import(geometry_type, dim):
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df)
+    df.geometry.crs = None
+
+    table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
+    result1 = GeoDataFrame.from_arrow(table1)
+    assert_geodataframe_equal(result1, df)
+
+    table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
+    result2 = GeoDataFrame.from_arrow(table2)
+    assert_geodataframe_equal(result2, df)
+
+    table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
+    result3 = GeoDataFrame.from_arrow(table3)
+    assert_geodataframe_equal(result3, df)
+
+
+@pytest.mark.skipif(
+    Version(shapely.__version__) < Version("2.0.2"),
+    reason="from_ragged_array failing with read-only array input",
+)
+@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
+def test_geoarrow_import_geometry_column(encoding):
+    pytest.importorskip("pyproj")
+    # ensure each geometry column has its own crs
+    gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
+    gdf["centroid"] = gdf.geometry.centroid
+
+    result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
+    assert_geodataframe_equal(result, gdf)
+    assert result.active_geometry_name == "geometry"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
+    )
+    assert result.active_geometry_name == "centroid"
+
+    result = GeoDataFrame.from_arrow(
+        pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
+    )
+    assert result.active_geometry_name == "centroid"
+    assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
+
+
+def test_geoarrow_import_missing_geometry():
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+
+    table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+    with pytest.raises(ValueError, match="No geometry column found"):
+        GeoDataFrame.from_arrow(table)
+
+    with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
+        GeoSeries.from_arrow(table["a"].chunk(0))
+
+
+def test_geoarrow_import_capsule_interface():
+    # ensure we can import non-pyarrow object
+    pytest.importorskip("pyarrow", minversion="14.0.0")
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+
+    result = GeoDataFrame.from_arrow(gdf.to_arrow())
+    assert_geodataframe_equal(result, gdf)
+
+
+@pytest.mark.parametrize("dim", ["xy", "xyz"])
+@pytest.mark.parametrize(
+    "geometry_type",
+    ["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
+)
+def test_geoarrow_import_from_extension_types(geometry_type, dim):
+    # ensure the exported data can be imported by geoarrow-pyarrow and are
+    # recognized as extension types
+    pytest.importorskip("pyproj")
+    base_path = DATA_PATH / "geoarrow"
+    suffix = geometry_type + ("_z" if dim == "xyz" else "")
+
+    # Read the example data
+    df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
+    df["geometry"] = GeoSeries.from_wkb(df["geometry"])
+    df = GeoDataFrame(df, crs="EPSG:3857")
+
+    pytest.importorskip("geoarrow.pyarrow")
+
+    with with_geoarrow_extension_types():
+        result1 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="WKB"))
+        )
+        assert_geodataframe_equal(result1, df)
+
+        result2 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow"))
+        )
+        assert_geodataframe_equal(result2, df)
+
+        result3 = GeoDataFrame.from_arrow(
+            pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
+        )
+        assert_geodataframe_equal(result3, df)
+
+
+def test_geoarrow_import_geoseries():
+    pytest.importorskip("pyproj")
+    gp = pytest.importorskip("geoarrow.pyarrow")
+    ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
+
+    with with_geoarrow_extension_types():
+        arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
+        result = GeoSeries.from_arrow(arr)
+        assert_geoseries_equal(result, ser)
+
+        # the name is lost when going through a pyarrow.Array
+        ser.name = "name"
+        arr = gp.array(ser.to_arrow())
+        result = GeoSeries.from_arrow(arr)
+        assert result.name is None
+        # we can specify the name as one of the kwargs
+        result = GeoSeries.from_arrow(arr, name="test")
+        assert_geoseries_equal(result, ser)
+
+
+def test_geoarrow_import_unknown_geoarrow_type():
+    gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
+    table = pa_table(gdf.to_arrow())
+    schema = table.schema
+    new_field = schema.field("geometry").with_metadata(
+        {
+            b"ARROW:extension:name": b"geoarrow.unknown",
+            b"ARROW:extension:metadata": b"{}",
+        }
+    )
+
+    new_schema = pa.schema([schema.field(0), new_field])
+    new_table = table.cast(new_schema)
+
+    with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
+        GeoDataFrame.from_arrow(new_table)
@@ -0,0 +1,306 @@
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from shapely.geometry import (
+    LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
+    Point,
+    Polygon,
+)
+
+from geopandas import GeoDataFrame
+from geopandas.io.file import infer_schema
+
+import pytest
+
+# Credit: Polygons below come from Montreal city Open Data portal
+# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
+city_hall_boundaries = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5539986525799, 45.5084323044531),
+        (-73.5535801792994, 45.5089539203786),
+        (-73.5541107525234, 45.5091983609661),
+    )
+)
+vauquelin_place = Polygon(
+    (
+        (-73.5542465586147, 45.5081555487952),
+        (-73.5540185061397, 45.5084409343852),
+        (-73.5546126200639, 45.5086813829106),
+        (-73.5548825850032, 45.5084033554357),
+        (-73.5542465586147, 45.5081555487952),
+    )
+)
+
+city_hall_walls = [
+    LineString(
+        (
+            (-73.5541107525234, 45.5091983609661),
+            (-73.5546126200639, 45.5086813829106),
+            (-73.5540185061397, 45.5084409343852),
+        )
+    ),
+    LineString(
+        (
+            (-73.5539986525799, 45.5084323044531),
+            (-73.5535801792994, 45.5089539203786),
+            (-73.5541107525234, 45.5091983609661),
+        )
+    ),
+]
+
+city_hall_entrance = Point(-73.553785, 45.508722)
+city_hall_balcony = Point(-73.554138, 45.509080)
+city_hall_council_chamber = Point(-73.554246, 45.508931)
+
+point_3D = Point(-73.553785, 45.508722, 300)
+linestring_3D = LineString(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5546126200639, 45.5086813829106, 300),
+        (-73.5540185061397, 45.5084409343852, 300),
+    )
+)
+polygon_3D = Polygon(
+    (
+        (-73.5541107525234, 45.5091983609661, 300),
+        (-73.5535801792994, 45.5089539203786, 300),
+        (-73.5541107525234, 45.5091983609661, 300),
+    )
+)
+
+
+def test_infer_schema_only_points():
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_points_and_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPoint", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipoints():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPoint(
+                [city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
+            )
+        ]
+    )
+
+    assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
+
+
+def test_infer_schema_only_linestrings():
+    df = GeoDataFrame(geometry=city_hall_walls)
+
+    assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
+
+
+def test_infer_schema_linestrings_and_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiLineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multilinestrings():
+    df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
+
+    assert infer_schema(df) == {
+        "geometry": "MultiLineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_polygons():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
+
+    assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_polygons_and_multipolygons():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": ["MultiPolygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_multipolygons():
+    df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
+
+    assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_multiple_shape_types():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_shape_type():
+    df = GeoDataFrame(
+        geometry=[
+            MultiPolygon((city_hall_boundaries, vauquelin_place)),
+            city_hall_boundaries,
+            MultiLineString(city_hall_walls),
+            city_hall_walls[0],
+            MultiPoint([city_hall_entrance, city_hall_balcony]),
+            city_hall_balcony,
+            point_3D,
+        ]
+    )
+
+    assert infer_schema(df) == {
+        "geometry": [
+            "3D Point",
+            "MultiPolygon",
+            "Polygon",
+            "MultiLineString",
+            "LineString",
+            "MultiPoint",
+            "Point",
+        ],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Point():
+    df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Point", "Point"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Points():
+    df = GeoDataFrame(geometry=[point_3D, point_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_mixed_3D_linestring():
+    df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D LineString", "LineString"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_linestrings():
+    df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
+
+    assert infer_schema(df) == {
+        "geometry": "3D LineString",
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_mixed_3D_Polygon():
+    df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
+
+    assert infer_schema(df) == {
+        "geometry": ["3D Polygon", "Polygon"],
+        "properties": OrderedDict(),
+    }
+
+
+def test_infer_schema_only_3D_Polygons():
+    df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
+
+    assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_2D_point():
+    df = GeoDataFrame(geometry=[None, city_hall_entrance])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_and_3D_point():
+    df = GeoDataFrame(geometry=[None, point_3D])
+
+    # None geometry type is then omitted
+    assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
+
+
+def test_infer_schema_null_geometry_all():
+    df = GeoDataFrame(geometry=[None, None])
+
+    # None geometry type in then replaced by 'Unknown'
+    # (default geometry type supported by Fiona)
+    assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
+
+
+@pytest.mark.parametrize(
+    "array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
+)
+def test_infer_schema_int32(array_data, dtype):
+    int32col = pd.array(data=array_data, dtype=dtype)
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int32_column"] = int32col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int32_column", "int32")]),
+    }
+
+
+def test_infer_schema_int64():
+    int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
+    df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
+    df["int64_column"] = int64col
+
+    assert infer_schema(df) == {
+        "geometry": "Point",
+        "properties": OrderedDict([("int64_column", "int")]),
+    }
@@ -0,0 +1,56 @@
+"""
+See generate_legacy_storage_files.py for the creation of the legacy files.
+
+"""
+
+import glob
+import os
+import pathlib
+
+import pandas as pd
+
+import pytest
+from geopandas.testing import assert_geodataframe_equal
+
+DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
+
+
+@pytest.fixture(scope="module")
+def current_pickle_data():
+    # our current version pickle data
+    from .generate_legacy_storage_files import create_pickle_data
+
+    return create_pickle_data()
+
+
+files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
+
+
+@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
+def legacy_pickle(request):
+    return request.param
+
+
+@pytest.mark.skip(
+    reason=(
+        "shapely 2.0/pygeos-based unpickling currently only works for "
+        "shapely-2.0/pygeos-written files"
+    ),
+)
+def test_legacy_pickles(current_pickle_data, legacy_pickle):
+    result = pd.read_pickle(legacy_pickle)
+
+    for name, value in result.items():
+        expected = current_pickle_data[name]
+        assert_geodataframe_equal(value, expected)
+
+
+def test_round_trip_current(tmpdir, current_pickle_data):
+    data = current_pickle_data
+
+    for name, value in data.items():
+        path = str(tmpdir / "{}.pickle".format(name))
+        value.to_pickle(path)
+        result = pd.read_pickle(path)
+        assert_geodataframe_equal(result, value)
+        assert isinstance(result.has_sindex, bool)
@@ -0,0 +1,878 @@
+"""
+Tests here include reading/writing to different types of spatial databases.
+The spatial database tests may not work without additional system
+configuration. postGIS tests require a test database to have been setup;
+see geopandas.tests.util for more information.
+"""
+
+import os
+import warnings
+from importlib.util import find_spec
+
+import pandas as pd
+
+import geopandas
+import geopandas._compat as compat
+from geopandas import GeoDataFrame, read_file, read_postgis
+from geopandas._compat import HAS_PYPROJ
+from geopandas.io.sql import _get_conn as get_conn
+from geopandas.io.sql import _write_postgis as write_postgis
+
+import pytest
+from geopandas.tests.util import (
+    create_postgis,
+    create_spatialite,
+    mock,
+    validate_boro_df,
+)
+
+try:
+    from sqlalchemy import text
+except ImportError:
+    # Avoid local imports for text in all sqlalchemy tests
+    # all tests using text use engine_postgis, which ensures sqlalchemy is available
+    text = str
+
+
+@pytest.fixture
+def df_nybb(nybb_filename):
+    df = read_file(nybb_filename)
+    return df
+
+
+def check_available_postgis_drivers() -> list[str]:
+    """Work out which of psycopg2 and psycopg are available.
+    This prevents tests running if the relevant package isn't installed
+    (rather than being skipped, as skips are treated as failures during postgis CI)
+    """
+    drivers = []
+    if find_spec("psycopg"):
+        drivers.append("psycopg")
+    if find_spec("psycopg2"):
+        drivers.append("psycopg2")
+    return drivers
+
+
+POSTGIS_DRIVERS = check_available_postgis_drivers()
+
+
+def prepare_database_credentials() -> dict:
+    """Gather postgres connection credentials from environment variables."""
+    return {
+        "dbname": "test_geopandas",
+        "user": os.environ.get("PGUSER"),
+        "password": os.environ.get("PGPASSWORD"),
+        "host": os.environ.get("PGHOST"),
+        "port": os.environ.get("PGPORT"),
+    }
+
+
+@pytest.fixture()
+def connection_postgis(request):
+    """Create a postgres connection using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
+    psycopg = pytest.importorskip(request.param)
+
+    try:
+        con = psycopg.connect(**prepare_database_credentials())
+    except psycopg.OperationalError:
+        pytest.skip("Cannot connect with postgresql database")
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", message="pandas only supports SQLAlchemy connectable.*"
+        )
+        yield con
+    con.close()
+
+
+@pytest.fixture()
+def engine_postgis(request):
+    """
+    Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
+
+    Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
+    """
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+    from sqlalchemy.engine.url import URL
+
+    credentials = prepare_database_credentials()
+    try:
+        con = sqlalchemy.create_engine(
+            URL.create(
+                drivername=f"postgresql+{request.param}",
+                username=credentials["user"],
+                database=credentials["dbname"],
+                password=credentials["password"],
+                host=credentials["host"],
+                port=credentials["port"],
+            )
+        )
+        con.connect()
+    except Exception:
+        pytest.skip("Cannot connect with postgresql database")
+
+    yield con
+    con.dispose()
+
+
+@pytest.fixture()
+def connection_spatialite():
+    """
+    Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
+
+    `The sqlite3 module must be built with loadable extension support
+    <https://docs.python.org/3/library/sqlite3.html#f1>`_ and
+    `SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
+    must be available on the system as a SQLite module.
+    Packages available on Anaconda meet requirements.
+
+    Exceptions
+    ----------
+    ``AttributeError`` on missing support for loadable SQLite extensions
+    ``sqlite3.OperationalError`` on missing SpatiaLite
+    """
+    sqlite3 = pytest.importorskip("sqlite3")
+    try:
+        with sqlite3.connect(":memory:") as con:
+            con.enable_load_extension(True)
+            con.load_extension("mod_spatialite")
+            con.execute("SELECT InitSpatialMetaData(TRUE)")
+    except Exception:
+        con.close()
+        pytest.skip("Cannot setup spatialite database")
+
+    yield con
+    con.close()
+
+
+def drop_table_if_exists(conn_or_engine, table):
+    sqlalchemy = pytest.importorskip("sqlalchemy")
+
+    if sqlalchemy.inspect(conn_or_engine).has_table(table):
+        metadata = sqlalchemy.MetaData()
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore", message="Did not recognize type 'geometry' of column.*"
+            )
+            metadata.reflect(conn_or_engine)
+        table = metadata.tables.get(table)
+        if table is not None:
+            table.drop(conn_or_engine, checkfirst=True)
+
+
+@pytest.fixture
+def df_mixed_single_and_multi():
+    from shapely.geometry import LineString, MultiLineString, Point
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0), (1, 1)]),
+                MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
+                Point(0, 1),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_geom_collection():
+    from shapely.geometry import GeometryCollection, LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (1, 1), (0, 1)]),
+                        LineString([(0, 0), (1, 1)]),
+                        Point(0, 0),
+                    ]
+                )
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+@pytest.fixture
+def df_linear_ring():
+    from shapely.geometry import LinearRing
+
+    df = geopandas.GeoDataFrame(
+        {"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
+    )
+    return df
+
+
+@pytest.fixture
+def df_3D_geoms():
+    from shapely.geometry import LineString, Point, Polygon
+
+    df = geopandas.GeoDataFrame(
+        {
+            "geometry": [
+                LineString([(0, 0, 0), (1, 1, 1)]),
+                Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
+                Point(0, 1, 2),
+            ]
+        },
+        crs="epsg:4326",
+    )
+    return df
+
+
+class TestIO:
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_get_conn(self, engine_postgis):
+        Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
+
+        engine = engine_postgis
+        with get_conn(engine) as output:
+            assert isinstance(output, Connection)
+        with engine.connect() as conn:
+            with get_conn(conn) as output:
+                assert isinstance(output, Connection)
+        with pytest.raises(ValueError):
+            with get_conn(object()):
+                pass
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
+        """Tests that a SELECT {geom} AS {some_other_geom} works."""
+        con = connection_postgis
+        orig_geom = "geom"
+        out_geom = "the_geom"
+        create_postgis(con, df_nybb, geom_col=orig_geom)
+
+        sql = """SELECT borocode, boroname, shape_leng, shape_area,
+                    {} as {} FROM nybb;""".format(
+            orig_geom, out_geom
+        )
+        df = read_postgis(sql, con, geom_col=out_geom)
+
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
+        """Tests that an SRID can be read from a geodatabase (GH #451)."""
+        con = connection_postgis
+        crs = "epsg:4269"
+        df_reproj = df_nybb.to_crs(crs)
+        create_postgis(con, df_reproj, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+
+        validate_boro_df(df)
+        assert df.crs == crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
+        """Tests that a user specified CRS overrides the geodatabase SRID."""
+        con = connection_postgis
+        orig_crs = df_nybb.crs
+        create_postgis(con, df_nybb, srid=4269)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con, crs=orig_crs)
+
+        validate_boro_df(df)
+        assert df.crs == orig_crs
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_default(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        geom_col = "the_geom"
+        create_postgis(con, df_nybb, geom_col=geom_col)
+
+        sql = "SELECT * FROM nybb;"
+        df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
+
+        validate_boro_df(df, case_sensitive=False)
+
+    def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
+        """Tests that geometry with NULL is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        df_nybb.geometry.iat[0] = None
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    def test_read_postgis_binary(self, connection_spatialite, df_nybb):
+        """Tests that geometry read as binary is accepted."""
+        con = connection_spatialite
+        geom_col = df_nybb.geometry.name
+        create_spatialite(con, df_nybb)
+        sql = (
+            "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
+            'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
+        )
+        df = read_postgis(sql, con, geom_col=geom_col)
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument"""
+        chunksize = 2
+        con = connection_postgis
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        # no crs defined on the created geodatabase, and none specified
+        # by user; should not be set to 0, as from get_srid failure
+        assert df.crs is None
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_default(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        engine = engine_postgis
+        table = "nybb"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
+        """Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
+        engine = engine_postgis
+        table = "aTestTable"
+
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        # Write to db
+        write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        # Validate
+        sql = text('SELECT * FROM "{table}";'.format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
+        """Tests that GeoDataFrame can be written to PostGIS with defaults."""
+        with engine_postgis.begin() as con:
+            table = "nybb_con"
+
+            # If table exists, delete it before trying to write with defaults
+            drop_table_if_exists(con, table)
+
+            # Write to db
+            write_postgis(df_nybb, con=con, name=table, if_exists="fail")
+            # Validate
+            sql = text("SELECT * FROM {table};".format(table=table))
+            df = read_postgis(sql, con, geom_col="geometry")
+            validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that uploading the same table raises error when: if_replace='fail'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        try:
+            write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
+        except ValueError as e:
+            if "already exists" in str(e):
+                pass
+            else:
+                raise e
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that replacing a table is possible when: if_replace='replace'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Ensure table exists
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Overwrite
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that appending to existing table produces correct results when:
+        if_replace='append'.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        orig_rows, orig_cols = df_nybb.shape
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+        # Validate
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        new_rows, new_cols = df.shape
+
+        # There should be twice as many rows in the new table
+        assert new_rows == orig_rows * 2, (
+            "There should be {target} rows,found: {current}".format(
+                target=orig_rows * 2, current=new_rows
+            ),
+        )
+        # Number of columns should stay the same
+        assert new_cols == orig_cols, (
+            "There should be {target} columns,found: {current}".format(
+                target=orig_cols, current=new_cols
+            ),
+        )
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS without CRS information.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb.geometry.array.crs = None
+        with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
+            write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # Validate that srid is -1
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 0, "SRID should be 0, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
+        """
+        Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
+        CRS information (GH #2414).
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+
+        # Write to db
+        df_nybb_esri = df_nybb.to_crs("ESRI:102003")
+        write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
+        # Validate that srid is 102003
+        sql = text(
+            "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
+                schema="public", table=table, geom_col="geometry"
+            )
+        )
+        with engine.connect() as conn:
+            target_srid = conn.execute(sql).fetchone()[0]
+        assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_geometry_collection(
+        self, engine_postgis, df_geom_collection
+    ):
+        """
+        Tests that writing a mix of different geometry types is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+
+        assert geom_type.upper() == "GEOMETRYCOLLECTION"
+        assert df.geom_type.unique()[0] == "GeometryCollection"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_mixed_geometry_types(
+        self, engine_postgis, df_mixed_single_and_multi
+    ):
+        """
+        Tests that writing a mix of single and MultiGeometries is possible.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
+        )
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
+        """
+        Tests that writing a LinearRing.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            geom_type = conn.execute(sql).fetchone()[0]
+
+        assert geom_type.upper() == "LINESTRING"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
+        """
+        Tests writing a LinearRing works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(
+            df_mixed_single_and_multi,
+            con=engine,
+            name=table,
+            if_exists="replace",
+            chunksize=1,
+        )
+        # Validate row count
+        sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
+        with engine.connect() as conn:
+            row_cnt = conn.execute(sql).fetchone()[0]
+        assert row_cnt == 3
+
+        # Validate geometry type
+        sql = text(
+            "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
+                table=table
+            )
+        )
+        with engine.connect() as conn:
+            res = conn.execute(sql).fetchall()
+        assert res[0][0].upper() == "LINESTRING"
+        assert res[1][0].upper() == "MULTILINESTRING"
+        assert res[2][0].upper() == "POINT"
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_to_different_schema_when_table_exists(
+        self, engine_postgis, df_nybb
+    ):
+        """
+        Tests writing data to alternative schema.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        schema_to_use = "test"
+        sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
+        with engine.begin() as conn:
+            conn.execute(sql)
+
+        try:
+            write_postgis(
+                df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
+            )
+            # Validate
+            sql = text(
+                "SELECT * FROM {schema}.{table};".format(
+                    schema=schema_to_use, table=table
+                )
+            )
+
+            df = read_postgis(sql, engine, geom_col="geometry")
+            validate_boro_df(df)
+
+        # Should raise a ValueError when table exists
+        except ValueError:
+            pass
+
+        # Try with replace flag on
+        write_postgis(
+            df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
+        )
+        # Validate
+        sql = text(
+            "SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
+        )
+
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
+        """
+        Tests writing a geometries with 3 dimensions works.
+        """
+        engine = engine_postgis
+
+        table = "geomtype_tests"
+
+        write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
+
+        # Check that all geometries have 3 dimensions
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert list(df.geometry.has_z) == [True, True, True]
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_row_order(self, engine_postgis, df_nybb):
+        """
+        Tests that the row order in db table follows the order of the original frame.
+        """
+        engine = engine_postgis
+
+        table = "row_order_test"
+        correct_order = df_nybb["BoroCode"].tolist()
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        assert df["BoroCode"].tolist() == correct_order
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_before_table_exists(self, engine_postgis, df_nybb):
+        """
+        Tests that insert works with if_exists='append' when table does not exist yet.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        # If table exists, delete it before trying to write with defaults
+        drop_table_if_exists(engine, table)
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="append")
+
+        # Check that the row order matches
+        sql = text("SELECT * FROM {table};".format(table=table))
+        df = read_postgis(sql, engine, geom_col="geometry")
+        validate_boro_df(df)
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_with_different_crs(self, engine_postgis, df_nybb):
+        """
+        Tests that the warning is raised if table CRS differs from frame.
+        """
+        engine = engine_postgis
+
+        table = "nybb"
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+
+        # Reproject
+        df_nybb2 = df_nybb.to_crs(epsg=4326)
+
+        # Should raise error when appending
+        with pytest.raises(ValueError, match="CRS of the target table"):
+            write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_append_without_crs(self, engine_postgis, df_nybb):
+        # This test was included in #3328 when the default value for no
+        # CRS was changed from an SRID of -1 to 0. This resolves issues
+        # of appending dataframes to postgis that have no CRS as postgis
+        # no CRS value is 0.
+        engine = engine_postgis
+        df_nybb = df_nybb.set_crs(None, allow_override=True)
+        table = "nybb"
+
+        write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
+        # append another dataframe with no crs
+
+        df_nybb2 = df_nybb
+        write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
+
+    @pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
+    @pytest.mark.xfail(
+        compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
+        reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
+    )
+    def test_duplicate_geometry_column_fails(self, engine_postgis):
+        """
+        Tests that a ValueError is raised if an SQL query returns two geometry columns.
+        """
+        engine = engine_postgis
+
+        sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
+
+        with pytest.raises(ValueError):
+            read_postgis(sql, engine, geom_col="geom")
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = read_postgis(sql, con)
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"
+
+    @pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
+    @mock.patch("shapely.get_srid")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
+        # mock a non-existent srid for edge case if shapely has an srid
+        # not present in postgis table.
+        pyproj = pytest.importorskip("pyproj")
+
+        mock_get_srid.return_value = 99999
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
+            with pytest.warns(UserWarning, match="Could not find srid 99999"):
+                read_postgis(sql, con)
+
+    @mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_no_spatial_ref_sys_table_in_postgis(
+        self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
+    ):
+        # mock for a non-existent spatial_ref_sys database
+
+        mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
+
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="epsg:4326")
+        create_postgis(con, df_nybb, srid=4326)
+
+        sql = "SELECT * FROM nybb;"
+        with pytest.warns(
+            UserWarning, match="Could not find the spatial reference system table"
+        ):
+            df = read_postgis(sql, con)
+
+        assert df.crs == "EPSG:4326"
+
+    @pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
+    def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
+        """Test chunksize argument with non epsg crs"""
+        chunksize = 2
+        con = connection_postgis
+        df_nybb = df_nybb.to_crs(crs="esri:54052")
+
+        create_postgis(con, df_nybb, srid=54052)
+
+        sql = "SELECT * FROM nybb;"
+        df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
+
+        validate_boro_df(df)
+        assert df.crs == "ESRI:54052"