refactor: excel parse

This commit is contained in:
Blizzard
2026-04-16 10:01:11 +08:00
parent 680ecc320f
commit f62f95ec02
7941 changed files with 2899112 additions and 0 deletions
@@ -0,0 +1,57 @@
from datetime import datetime, timedelta, timezone
import pytest
from qdrant_client.local.datetime_utils import parse
@pytest.mark.parametrize( # type: ignore
"date_str, expected",
[
("2021-01-01T00:00:00", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01T00:00:00Z", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01T00:00:00+00:00", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01T00:00:00.000000", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01T00:00:00.000000Z", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
(
"2021-01-01T00:00:00.000000+01:00",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=1))),
),
(
"2021-01-01T00:00:00.000000-10:00",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=-10))),
),
("2021-01-01", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01 00:00:00", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01 00:00:00Z", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
(
"2021-01-01 00:00:00+0200",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=2))),
),
("2021-01-01 00:00:00.000000", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
("2021-01-01 00:00:00.000000Z", datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone.utc)),
(
"2021-01-01 00:00:00.000000+00:30",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(minutes=30))),
),
(
"2021-01-01 00:00:00.000009+00:30",
datetime(2021, 1, 1, 0, 0, 0, 9, tzinfo=timezone(timedelta(minutes=30))),
),
# this is accepted in core but not here, there is no specifier for only-hour offset
(
"2021-01-01 00:00:00.000+01",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=1))),
),
(
"2021-01-01 00:00:00.000-10",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=-10))),
),
(
"2021-01-01 00:00:00-03:00",
datetime(2021, 1, 1, 0, 0, 0, tzinfo=timezone(timedelta(hours=-3))),
),
],
)
def test_parse_dates(date_str: str, expected: datetime):
assert parse(date_str) == expected
@@ -0,0 +1,57 @@
import numpy as np
from qdrant_client.http import models
from qdrant_client.local.distances import calculate_distance
from qdrant_client.local.multi_distances import calculate_multi_distance
from qdrant_client.local.sparse_distances import calculate_distance_sparse
def test_distances() -> None:
query = np.array([1.0, 2.0, 3.0])
vectors = np.array([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]])
assert np.allclose(calculate_distance(query, vectors, models.Distance.DOT), [14.0, 14.0])
assert np.allclose(calculate_distance(query, vectors, models.Distance.EUCLID), [0.0, 0.0])
assert np.allclose(calculate_distance(query, vectors, models.Distance.MANHATTAN), [0.0, 0.0])
# cosine modifies vectors inplace
assert np.allclose(calculate_distance(query, vectors, models.Distance.COSINE), [1.0, 1.0])
query = np.array([1.0, 0.0, 1.0])
vectors = np.array([[1.0, 2.0, 3.0], [0.0, 1.0, 0.0]])
assert np.allclose(
calculate_distance(query, vectors, models.Distance.DOT), [4.0, 0.0], atol=0.0001
)
assert np.allclose(
calculate_distance(query, vectors, models.Distance.EUCLID),
[2.82842712, 1.7320508],
atol=0.0001,
)
assert np.allclose(
calculate_distance(query, vectors, models.Distance.MANHATTAN),
[4.0, 3.0],
atol=0.0001,
)
# cosine modifies vectors inplace
assert np.allclose(
calculate_distance(query, vectors, models.Distance.COSINE),
[0.75592895, 0.0],
atol=0.0001,
)
sparse_query = models.SparseVector(indices=[1, 2], values=[1, 2])
sparse_vectors = [models.SparseVector(indices=[10, 20], values=[1, 2])]
assert calculate_distance_sparse(sparse_query, sparse_vectors) == [np.float32("-inf")]
sparse_vectors = [
models.SparseVector(indices=[1, 2], values=[3, 4]),
models.SparseVector(indices=[1, 2, 3], values=[1, 2, 3]),
]
assert np.allclose(
calculate_distance_sparse(sparse_query, sparse_vectors), [11.0, 5], atol=0.0001
)
multivector_query = np.array([[1, 2, 3], [3, 4, 5]])
docs = [np.array([[1, 2, 3], [0, 1, 2]])]
assert calculate_multi_distance(multivector_query, docs, models.Distance.DOT)[0] == 40.0
@@ -0,0 +1,189 @@
from qdrant_client.http.models import models
from qdrant_client.local.payload_filters import check_filter
def test_nested_payload_filters():
payload = {
"country": {
"name": "Germany",
"capital": "Berlin",
"cities": [
{
"name": "Berlin",
"population": 3.7,
"location": {
"lon": 13.76116,
"lat": 52.33826,
},
"sightseeing": ["Brandenburg Gate", "Reichstag"],
},
{
"name": "Munich",
"population": 1.5,
"location": {
"lon": 11.57549,
"lat": 48.13743,
},
"sightseeing": ["Marienplatz", "Olympiapark"],
},
{
"name": "Hamburg",
"population": 1.8,
"location": {
"lon": 9.99368,
"lat": 53.55108,
},
"sightseeing": ["Reeperbahn", "Elbphilharmonie"],
},
],
}
}
query = models.Filter(
**{
"must": [
{
"nested": {
"key": "country.cities",
"filter": {
"must": [
{
"key": "population",
"range": {
"gte": 1.0,
},
}
],
"must_not": [{"key": "sightseeing", "values_count": {"gt": 1}}],
},
}
}
]
}
)
res = check_filter(query, payload, 0, has_vector={})
assert res is False
query = models.Filter(
**{
"must": [
{
"nested": {
"key": "country.cities",
"filter": {
"must": [
{
"key": "population",
"range": {
"gte": 1.0,
},
}
]
},
}
}
]
}
)
res = check_filter(query, payload, 0, has_vector={})
assert res is True
query = models.Filter(
**{
"must": [
{
"nested": {
"key": "country.cities",
"filter": {
"must": [
{
"key": "population",
"range": {
"gte": 1.0,
},
},
{"key": "sightseeing", "values_count": {"gt": 2}},
]
},
}
}
]
}
)
res = check_filter(query, payload, 0, has_vector={})
assert res is False
query = models.Filter(
**{
"must": [
{
"nested": {
"key": "country.cities",
"filter": {
"must": [
{
"key": "population",
"range": {
"gte": 9.0,
},
}
]
},
}
}
]
}
)
res = check_filter(query, payload, 0, has_vector={})
assert res is False
def test_geo_polygon_filter_query():
payload = {
"location": [
{
"lon": 70.0,
"lat": 70.0,
},
]
}
query = models.Filter(
**{
"must": [
{
"key": "location",
"geo_polygon": {
"exterior": {
"points": [
{"lon": 55.455868, "lat": 55.495862},
{"lon": 86.455868, "lat": 55.495862},
{"lon": 86.455868, "lat": 86.495862},
{"lon": 55.455868, "lat": 86.495862},
{"lon": 55.455868, "lat": 55.495862},
]
},
},
}
]
}
)
res = check_filter(query, payload, 0, has_vector={})
assert res is True
payload = {
"location": [
{
"lon": 30.693738,
"lat": 30.502165,
},
]
}
res = check_filter(query, payload, 0, has_vector={})
assert res is False
@@ -0,0 +1,549 @@
from typing import Any
import pytest
from qdrant_client.local.json_path_parser import (
JsonPathItem,
JsonPathItemType,
parse_json_path,
)
from qdrant_client.local.payload_value_extractor import value_by_key
from qdrant_client.local.payload_value_setter import set_value_by_key
def test_parse_json_path() -> None:
jp_key = "a"
keys = parse_json_path(jp_key)
assert keys == [JsonPathItem(item_type=JsonPathItemType.KEY, key="a")]
jp_key = "a.b"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
]
jp_key = 'a."a[b]".c'
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="a[b]"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="c"),
]
jp_key = "a[0]"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=0),
]
jp_key = "a[0].b"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=0),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
]
jp_key = "a[0].b[1]"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=0),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=1),
]
jp_key = "a[][]"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.WILDCARD_INDEX, index=None),
JsonPathItem(item_type=JsonPathItemType.WILDCARD_INDEX, index=None),
]
jp_key = "a[0][1]"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=0),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=1),
]
jp_key = "a[0][1].b"
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=0),
JsonPathItem(item_type=JsonPathItemType.INDEX, index=1),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
]
jp_key = 'a."k.c"'
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="k.c"),
]
jp_key = 'a."c[][]".b'
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="c[][]"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
]
jp_key = 'a."c..q".b'
keys = parse_json_path(jp_key)
assert keys == [
JsonPathItem(item_type=JsonPathItemType.KEY, key="a"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="c..q"),
JsonPathItem(item_type=JsonPathItemType.KEY, key="b"),
]
with pytest.raises(ValueError):
jp_key = 'a."k.c'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = 'a."k.c".'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = 'a."k.c".[]'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a.'k.c'"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a["
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a[]]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a[][]."
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a[][]b"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = ".a"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a[x]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = 'a[]""'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = '""b'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "[]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a[.]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = 'a["1"]'
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = ""
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a..c"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a.c[]b[]"
parse_json_path(jp_key)
with pytest.raises(ValueError):
jp_key = "a.c[].[]"
parse_json_path(jp_key)
def test_value_by_key() -> None:
payload = {
"name": "John",
"age": 25,
"counts": [1, 2, 3],
"address": {
"city": "New York",
},
"location": [
{"name": "home", "counts": [1, 2, 3]},
{"name": "work", "counts": [4, 5, 6]},
],
"nested": [{"empty": []}, {"empty": []}, {"empty": None}],
"the_null": None,
"the": {"nested.key": "cuckoo"},
"double-nest-array": [[1, 2], [3, 4], [5, 6]],
}
# region flat=True
assert value_by_key(payload, "name") == ["John"]
assert value_by_key(payload, "address.city") == ["New York"]
assert value_by_key(payload, "location[].name") == ["home", "work"]
assert value_by_key(payload, "location[0].name") == ["home"]
assert value_by_key(payload, "location[1].name") == ["work"]
assert value_by_key(payload, "location[2].name") is None
assert value_by_key(payload, "location[].name[0]") is None
assert value_by_key(payload, "location[0]") == [{"name": "home", "counts": [1, 2, 3]}]
assert value_by_key(payload, "not_exits") is None
assert value_by_key(payload, "address") == [{"city": "New York"}]
assert value_by_key(payload, "address.city[0]") is None
assert value_by_key(payload, "counts") == [1, 2, 3]
assert value_by_key(payload, "location[].counts") == [1, 2, 3, 4, 5, 6]
assert value_by_key(payload, "nested[].empty") == [None]
assert value_by_key(payload, "the_null") == [None]
assert value_by_key(payload, 'the."nested.key"') == ["cuckoo"]
assert value_by_key(payload, "double-nest-array[][]") == [1, 2, 3, 4, 5, 6]
assert value_by_key(payload, "double-nest-array[0][]") == [1, 2]
assert value_by_key(payload, "double-nest-array[0][0]") == [1]
assert value_by_key(payload, "double-nest-array[0][0]") == [1]
assert value_by_key(payload, "double-nest-array[][1]") == [2, 4, 6]
# endregion
# region flat=False
assert value_by_key(payload, "name", flat=False) == ["John"]
assert value_by_key(payload, "address.city", flat=False) == ["New York"]
assert value_by_key(payload, "location[].name", flat=False) == ["home", "work"]
assert value_by_key(payload, "location[0].name", flat=False) == ["home"]
assert value_by_key(payload, "location[1].name", flat=False) == ["work"]
assert value_by_key(payload, "location[2].name", flat=False) is None
assert value_by_key(payload, "location[].name[0]", flat=False) is None
assert value_by_key(payload, "location[0]", flat=False) == [
{"name": "home", "counts": [1, 2, 3]}
]
assert value_by_key(payload, "not_exist", flat=False) is None
assert value_by_key(payload, "address", flat=False) == [{"city": "New York"}]
assert value_by_key(payload, "address.city[0]", flat=False) is None
assert value_by_key(payload, "counts", flat=False) == [[1, 2, 3]]
assert value_by_key(payload, "location[].counts", flat=False) == [
[1, 2, 3],
[4, 5, 6],
]
assert value_by_key(payload, "nested[].empty", flat=False) == [[], [], None]
assert value_by_key(payload, "the_null", flat=False) == [None]
assert value_by_key(payload, "age.nested.not_exist") is None
# endregion
def test_set_value_by_key() -> None:
# region valid keys
payload: dict[str, Any] = {}
new_value: dict[str, Any] = {}
key = "a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {}}, payload
payload = {"a": {"a": 2}}
new_value = {}
key = "a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": 2}}, payload
payload = {"a": {"a": 2}}
new_value = {"b": 3}
key = "a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": 2, "b": 3}}, payload
payload = {"a": {"a": 2}}
new_value = {"a": 3}
key = "a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": 3}}, payload
payload = {"a": {"a": 2}}
new_value = {"a": 3}
key = "a.a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": {"a": 3}}}, payload
payload = {"a": {"a": {"a": 1}}}
new_value = {"b": 2}
key = "a.a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": {"a": 1, "b": 2}}}, payload
payload = {"a": {"a": {"a": 1}}}
new_value = {"a": 2}
key = "a.a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"a": {"a": 2}}}, payload
payload = {"a": []}
new_value = {"b": 2}
key = "a[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": []}, payload
payload = {"a": [{}]}
new_value = {"b": 2}
key = "a[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"b": 2}]}, payload
payload = {"a": [{"a": 1}]}
new_value = {"b": 2}
key = "a[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"a": 1, "b": 2}]}, payload
payload = {"a": [[]]}
new_value = {"b": 2}
key = "a[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"b": 2}]}, payload
payload = {"a": [[]]}
new_value = {"b": 2}
key = "a[1]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[]]}, payload
payload = {"a": [{"a": []}]}
new_value = {"b": 2}
key = "a[0].a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"a": {"b": 2}}]}, payload
payload = {"a": [{"a": []}]}
new_value = {"b": 2}
key = "a[].a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"a": {"b": 2}}]}, payload
payload = {"a": [{"a": []}, {"a": []}]}
new_value = {"b": 2}
key = "a[].a"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"a": {"b": 2}}, {"a": {"b": 2}}]}, payload
payload = {"a": 1, "b": 2}
new_value = {"c": 3}
key = "c"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": 1, "b": 2, "c": {"c": 3}}, payload
payload = {"a": {"b": {"c": 1}}}
new_value = {"d": 2}
key = "a.b.d"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": 1, "d": {"d": 2}}}}, payload
payload = {"a": {"b": {"c": 1}}}
new_value = {"c": 2}
key = "a.b"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": 2}}}, payload
payload = {"a": [{"b": 1}, {"b": 2}]}
new_value = {"c": 3}
key = "a[1]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"b": 1}, {"b": 2, "c": 3}]}, payload
payload = {"a": []}
new_value = {"b": {"c": 1}}
key = "a[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": []}, payload
payload = {"a": {"b": {"c": {"d": {"e": 1}}}}}
new_value = {"f": 2}
key = "a.b.c.d"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": {"d": {"e": 1, "f": 2}}}}}, payload
payload = {"a": {"b": {"c": 1}}}
new_value = {"d": {"e": 2}}
key = "a.b.c"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": {"d": {"e": 2}}}}}, payload
payload = {"a": [{"b": 1}]}
new_value = {"c": 2}
key = "a[1]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [{"b": 1}]}, payload
payload = {"a": {"b": [{"c": 1}, {"c": 2}]}}
new_value = {"d": 3}
key = "a.b[0].c"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": [{"c": {"d": 3}}, {"c": 2}]}}, payload
payload = {"a": {"b": {"c": [{"d": 1}]}}}
new_value = {"e": {"f": 2}}
key = "a.b.c[0].d"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": [{"d": {"e": {"f": 2}}}]}}}, payload
payload = {"a": [[{"b": 1}], [{"b": 2}]]}
new_value = {"c": 3}
key = "a[0][0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[{"b": 1, "c": 3}], [{"b": 2}]]}, payload
payload = {"a": [[{"b": 1}], [{"b": 2}]]}
new_value = {"c": 3}
key = "a[1][0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[{"b": 1}], [{"b": 2, "c": 3}]]}, payload
payload = {"a": [[{"b": 1}], [{"b": 2}]]}
new_value = {"c": 3}
key = "a[1][1]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[{"b": 1}], [{"b": 2}]]}, payload
payload = {"a": [[{"b": 1}], [{"b": 2}]]}
new_value = {"c": 3}
key = "a[][0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[{"b": 1, "c": 3}], [{"b": 2, "c": 3}]]}, payload
payload = {"a": [[{"b": 1}], [{"b": 2}]]}
new_value = {"c": 3}
key = "a[][]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": [[{"b": 1, "c": 3}], [{"b": 2, "c": 3}]]}, payload
payload = {"a": []}
new_value = {"c": 3}
key = 'a."b.c"'
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b.c": {"c": 3}}}, payload
payload = {"a": {"c": [1]}}
new_value = {"a": 1}
key = "a.c[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"c": [{"a": 1}]}}, payload
payload = {"a": {"c": [1]}}
new_value = {"a": 1}
key = "a.c[0].d"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"c": [{"d": {"a": 1}}]}}, payload
payload = {"": 2}
new_value = {"a": 1}
key = '""'
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"": {"a": 1}}, payload
# endregion
# region exceptions
try:
payload = {"a": []}
new_value = {"c": 3}
key = "a.'b.c'"
set_value_by_key(payload, parse_json_path(key), new_value)
assert False, f"Should've raised an exception due to the key with incorrect quotes: {key}"
except Exception:
assert True
try:
payload = {"a": [{"b": 1}, {"b": 2}]}
new_value = {"c": 3}
key = "a[-1]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert False, "Negative indexation is not supported"
except Exception:
assert True
try:
payload = {"a": [{"b": 1}, {"b": 2}]}
new_value = {"c": 3}
key = "a["
set_value_by_key(payload, parse_json_path(key), new_value)
assert False, f"Should've raised an exception due to the incorrect key: {key}"
except Exception:
assert True
try:
payload = {"a": [{"b": 1}, {"b": 2}]}
new_value = {"c": 3}
key = "a]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert False, f"Should've raise an exception due to the incorrect key: {key}"
except Exception:
assert True
# endregion
# region wrong keys
payload = {"a": []}
new_value = {}
key = "a.b[0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": []}}, payload
payload = {"a": []}
new_value = {}
key = "a.b"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {}}}, payload
payload = {"a": []}
new_value = {"c": 2}
key = "a.b"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": {"c": 2}}}, payload
payload = {"a": [[{"a": 1}]]}
new_value = {"a": 2}
key = "a.b[0][0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"b": []}}, payload
payload = {"a": {"c": 2}}
new_value = {"a": 1}
key = "a[]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": []}, payload
payload = {"a": {"c": 2}}
new_value = {"a": 1}
key = "a[].b"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": []}, payload
payload = {"a": {"c": [1]}}
new_value = {"a": 1}
key = "a.c[][][0]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"c": [[]]}}, payload
payload = {"a": {"c": [{"d": 1}]}}
new_value = {"a": 1}
key = "a.c[][]"
set_value_by_key(payload, parse_json_path(key), new_value)
assert payload == {"a": {"c": [[]]}}, payload
# endregion
@@ -0,0 +1,135 @@
import copy
import pytest
from qdrant_client.local.qdrant_local import QdrantLocal
from qdrant_client import models
@pytest.fixture(scope="module", autouse=True)
def client():
"""
Sets up multiple collections with a bunch of points
"""
client = QdrantLocal(":memory:")
client.create_collection(
"collection_default",
vectors_config=models.VectorParams(
size=4,
distance=models.Distance.DOT,
),
)
client.create_collection(
"collection_multiple_vectors",
vectors_config={
"": models.VectorParams(
size=4,
distance=models.Distance.DOT,
),
"byte": models.VectorParams(
size=4, distance=models.Distance.DOT, datatype=models.Datatype.UINT8
),
"colbert": models.VectorParams(
size=4,
distance=models.Distance.DOT,
multivector_config=models.MultiVectorConfig(
comparator=models.MultiVectorComparator.MAX_SIM
),
),
},
sparse_vectors_config={"sparse": models.SparseVectorParams()},
)
client.upsert(
"collection_default",
[
models.PointStruct(id=1, vector=[0.25, 0.0, 0.0, 0.0]),
],
)
client.upsert(
"collection_multiple_vectors",
[
models.PointStruct(
id=1,
vector={
"": [0.0, 0.25, 0.0, 0.0],
"byte": [0, 25, 0, 0],
"colbert": [[0.0, 0.25, 0.0, 0.0], [0.0, 0.25, 0.0, 0.0]],
"sparse": models.SparseVector(indices=[1], values=[0.25]),
},
),
],
)
return client
@pytest.mark.parametrize(
"query",
[
models.NearestQuery(nearest=1),
models.RecommendQuery(recommend=models.RecommendInput(positive=[1], negative=[1])),
models.DiscoverQuery(
discover=models.DiscoverInput(
target=1, context=[models.ContextPair(**{"positive": 1, "negative": 1})]
)
),
models.ContextQuery(context=[models.ContextPair(**{"positive": 1, "negative": 1})]),
models.OrderByQuery(order_by=models.OrderBy(key="price", direction=models.Direction.ASC)),
models.FusionQuery(fusion=models.Fusion.RRF),
],
)
@pytest.mark.parametrize(
"using, lookup_from, expected, mentioned",
[
(None, None, [0.25, 0.0, 0.0, 0.0], True),
("", None, [0.25, 0.0, 0.0, 0.0], True),
(
"byte",
models.LookupLocation(collection="collection_multiple_vectors"),
[0, 25, 0, 0],
False,
),
(
"",
models.LookupLocation(collection="collection_multiple_vectors", vector="colbert"),
[[0.0, 0.25, 0.0, 0.0], [0.0, 0.25, 0.0, 0.0]],
False,
),
(
None,
models.LookupLocation(collection="collection_multiple_vectors", vector="sparse"),
models.SparseVector(indices=[1], values=[0.25]),
False,
),
],
)
def test_vector_dereferencing(client, query, using, lookup_from, expected, mentioned):
resolved, mentioned_ids = client._resolve_query_input(
collection_name="collection_default",
query=copy.deepcopy(query),
using=using,
lookup_from=lookup_from,
)
if isinstance(resolved, models.NearestQuery):
assert resolved.nearest == expected
elif isinstance(resolved, models.RecommendQuery):
assert resolved.recommend.positive == [expected]
assert resolved.recommend.negative == [expected]
elif isinstance(resolved, models.DiscoverQuery):
assert resolved.discover.target == expected
assert resolved.discover.context[0].positive == expected
assert resolved.discover.context[0].negative == expected
elif isinstance(resolved, models.ContextQuery):
assert resolved.context[0].positive == expected
assert resolved.context[0].negative == expected
else:
mentioned = False
assert resolved == query
if mentioned:
assert mentioned_ids == {1}
@@ -0,0 +1,21 @@
import random
from qdrant_client import models
from qdrant_client.local.local_collection import LocalCollection, DEFAULT_VECTOR_NAME
def test_get_vectors():
collection = LocalCollection(
models.CreateCollection(
vectors=models.VectorParams(size=2, distance=models.Distance.MANHATTAN)
)
)
collection.upsert(
points=[
models.PointStruct(id=i, vector=[random.random(), random.random()]) for i in range(10)
]
)
assert collection._get_vectors(idx=1, with_vectors=DEFAULT_VECTOR_NAME)
assert collection._get_vectors(idx=2, with_vectors=True)
assert collection._get_vectors(idx=3, with_vectors=False) is None