Skip to content

Commit 82467ab

Browse files
authored
Introduce datumaro integration tests for Geti Sample types (#6053)
1 parent c3acf9a commit 82467ab

4 files changed

Lines changed: 84 additions & 2 deletions

File tree

application/backend/Justfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ test-integration *ARGS: install-uv
5959

6060
# Run BDD tests with behave
6161
test-bdd *ARGS: install-uv
62-
uv run behave tests/bdd {{ ARGS }}
62+
uv run behave ./tests/bdd -v {{ ARGS }}
6363

6464
# -------------------------------------------------------------------------------------------------
6565
# Documentation
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright (C) 2026 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import shutil
5+
import zipfile
6+
from pathlib import Path
7+
8+
import pytest
9+
import requests
10+
11+
S3_URL = "https://storage.geti.intel.com/test-data/geti/datasets"
12+
OBJECT_NAME = "regression.zip"
13+
PARENT_DIR = Path(__file__).parent
14+
DATASETS_DIR = PARENT_DIR / "regression"
15+
16+
17+
def _download_regression_datasets(dest_dir: Path) -> None:
18+
"""Download and unpack the regression dataset archive from public location."""
19+
archive = dest_dir / OBJECT_NAME
20+
21+
if not archive.exists():
22+
response = requests.get(f"{S3_URL}/{OBJECT_NAME}", stream=True)
23+
with open(archive, "wb") as f:
24+
shutil.copyfileobj(response.raw, f)
25+
26+
with zipfile.ZipFile(archive, "r") as zf:
27+
zf.extractall(dest_dir)
28+
29+
30+
def pytest_configure() -> None:
31+
"""Session-wide hook - download datasets before collection begins."""
32+
_download_regression_datasets(PARENT_DIR)
33+
34+
35+
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
36+
"""Parametrize after datasets are already downloaded."""
37+
if "archive" in metafunc.fixturenames:
38+
zip_files = sorted(DATASETS_DIR.glob("*.zip"))
39+
if not zip_files:
40+
raise pytest.UsageError(
41+
f"No regression dataset archives were found in '{DATASETS_DIR}'. The dataset download/extraction may "
42+
f"have failed, or the archive structure may have changed."
43+
)
44+
metafunc.parametrize("archive", zip_files, ids=[p.stem for p in zip_files])
45+
46+
47+
def pytest_unconfigure() -> None:
48+
"""Session-wide hook - remove downloaded archive and unpacked files after tests complete."""
49+
archive = PARENT_DIR / OBJECT_NAME
50+
if archive.exists():
51+
archive.unlink()
52+
if DATASETS_DIR.exists():
53+
shutil.rmtree(DATASETS_DIR)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (C) 2026 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from pathlib import Path
5+
6+
from datumaro.experimental import Sample, import_dataset
7+
8+
from app.datumaro_converter import (
9+
DetectionImportExportSample,
10+
InstanceSegmentationImportExportSample,
11+
MulticlassClassificationImportExportSample,
12+
MultilabelClassificationImportExportSample,
13+
)
14+
15+
ANNOTATION_TYPE_TO_SAMPLE: dict[str, type[Sample]] = {
16+
"bounding_box": DetectionImportExportSample,
17+
"multilabel": MultilabelClassificationImportExportSample,
18+
"single_label": MulticlassClassificationImportExportSample,
19+
"polygon": InstanceSegmentationImportExportSample,
20+
}
21+
22+
23+
def test_import_dataset(archive: Path) -> None:
24+
"""Verify that each regression zip archive can be imported by datumaro."""
25+
dataset = import_dataset(archive)
26+
annotation_type, _ = archive.stem.split("-", 1)
27+
sample_type = ANNOTATION_TYPE_TO_SAMPLE[annotation_type]
28+
dataset = dataset.convert_to_schema(sample_type)
29+
assert len(dataset) > 0

application/backend/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)