Skip to content

Commit 3563be2

Browse files
committed
changed the default attenuation_threshold
1 parent 3b844f8 commit 3563be2

2 files changed

Lines changed: 126 additions & 20 deletions

File tree

oceanstream/echodata/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ class DenoiseConfig:
260260
impulse_ping_lags: list[int] = field(default_factory=lambda: [1])
261261

262262
# Attenuation
263-
attenuation_threshold: float = 0.8 # Correlation threshold
263+
attenuation_threshold: float = 6.0 # dB below block median to flag
264264
attenuation_upper_limit: float = 180.0 # m
265265
attenuation_lower_limit: float = 280.0 # m
266266
attenuation_side_pings: int = 15

scripts/batch_processing/build_full_survey.py

Lines changed: 125 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
# Skip raw→Sv stages; use existing Sv zarrs from output container
5050
python build_full_survey.py --skip-raw
5151
52+
# Re-denoise from existing Sv zarrs (skip raw→Sv, re-run denoise→products)
53+
python build_full_survey.py --skip-sv
54+
5255
# 38 kHz only (skip 200 kHz)
5356
python build_full_survey.py --freq 38
5457
@@ -1593,9 +1596,12 @@ def plot_combined_echogram(
15931596

15941597

15951598
# ---------------------------------------------------------------------------
1596-
# List existing denoised zarrs (for --skip-denoise / --resume)
1599+
# List existing Sv / denoised zarrs
15971600
# ---------------------------------------------------------------------------
15981601

1602+
_SV_RE = re.compile(
1603+
r"(\d{4}-\d{2}-\d{2})--(\w+)\.zarr$"
1604+
)
15991605
_DENOISED_RE = re.compile(
16001606
r"(\d{4}-\d{2}-\d{2})--(\w+)--denoised\.zarr$"
16011607
)
@@ -1604,6 +1610,72 @@ def plot_combined_echogram(
16041610
)
16051611

16061612

1613+
def _list_sv_local(local_root: Path) -> list[tuple[str, str, str]]:
1614+
"""Scan local disk for Sv zarr directories (not denoised/mvbs/nasc)."""
1615+
results: list[tuple[str, str, str]] = []
1616+
for day_dir in sorted(local_root.iterdir()):
1617+
if not day_dir.is_dir() or not re.match(r"\d{4}-\d{2}-\d{2}$", day_dir.name):
1618+
continue
1619+
for item in day_dir.iterdir():
1620+
# Match {day}--{category}.zarr but NOT {day}--{category}--denoised.zarr etc.
1621+
if "--denoised" in item.name or "--mvbs" in item.name or "--nasc" in item.name:
1622+
continue
1623+
m = _SV_RE.match(item.name)
1624+
if m and item.is_dir():
1625+
day_key = m.group(1)
1626+
category = m.group(2)
1627+
zarr_path = f"{day_key}/{item.name}"
1628+
results.append((day_key, category, zarr_path))
1629+
results.sort()
1630+
return results
1631+
1632+
1633+
def list_sv_zarrs(container: str) -> list[tuple[str, str, str]]:
1634+
"""List Sv zarr paths from local disk or Azure.
1635+
1636+
Returns sorted list of (day_key, category, zarr_path).
1637+
Only returns raw Sv zarrs, not denoised/mvbs/nasc variants.
1638+
"""
1639+
# Try local disk first
1640+
try:
1641+
from local_storage import _OUTPUT_ROOT
1642+
local_root = _OUTPUT_ROOT / container
1643+
if local_root.exists():
1644+
return _list_sv_local(local_root)
1645+
except ImportError:
1646+
pass
1647+
1648+
# Fall back to Azure Blob
1649+
from azure.storage.blob import ContainerClient
1650+
1651+
conn_str = _connection_string()
1652+
client = ContainerClient.from_connection_string(conn_str, container)
1653+
1654+
# Match {day}/{day}--{category}.zarr/ but exclude denoised/mvbs/nasc
1655+
pattern = re.compile(
1656+
r"(\d{4}-\d{2}-\d{2})/\d{4}-\d{2}-\d{2}--(\w+)\.zarr/(zarr\.json|\.zmetadata|\.zattrs)$"
1657+
)
1658+
1659+
results: list[tuple[str, str, str]] = []
1660+
seen: set[str] = set()
1661+
for blob in client.list_blobs(name_starts_with="2023-"):
1662+
# Skip denoised / mvbs / nasc
1663+
if "--denoised" in blob.name or "--mvbs" in blob.name or "--nasc" in blob.name:
1664+
continue
1665+
m = pattern.search(blob.name)
1666+
if m:
1667+
day_key = m.group(1)
1668+
category = m.group(2)
1669+
zarr_path = blob.name.rsplit("/", 1)[0]
1670+
key = f"{day_key}/{category}"
1671+
if key not in seen:
1672+
seen.add(key)
1673+
results.append((day_key, category, zarr_path))
1674+
1675+
results.sort()
1676+
return results
1677+
1678+
16071679
def _list_denoised_local(local_root: Path) -> list[tuple[str, str, str]]:
16081680
"""Scan local disk for denoised zarr directories."""
16091681
results: list[tuple[str, str, str]] = []
@@ -2442,29 +2514,33 @@ def run_full_pipeline(args: argparse.Namespace) -> None:
24422514
sys.exit(1)
24432515

24442516
# ── Stage 1: Discover raw EK80 files ────────────────────────
2445-
log.info("=" * 70)
2446-
log.info("STAGE 1: Discover raw EK80 files")
2447-
log.info("=" * 70)
2448-
t0 = time.time()
2517+
day_files: dict = {}
2518+
if not args.skip_sv and not args.skip_raw:
2519+
log.info("=" * 70)
2520+
log.info("STAGE 1: Discover raw EK80 files")
2521+
log.info("=" * 70)
2522+
t0 = time.time()
24492523

2450-
raw_files = discover_raw_files(
2451-
start_date, end_date,
2452-
file_share_name=args.file_share,
2453-
file_share_path=args.file_share_path,
2454-
)
2455-
if not raw_files:
2456-
log.error("No raw files found — check file share")
2457-
sys.exit(1)
2524+
raw_files = discover_raw_files(
2525+
start_date, end_date,
2526+
file_share_name=args.file_share,
2527+
file_share_path=args.file_share_path,
2528+
)
2529+
if not raw_files:
2530+
log.error("No raw files found — check file share")
2531+
sys.exit(1)
24582532

2459-
day_files = group_raw_by_day(raw_files)
2460-
log.info(
2461-
"Stage 1 complete: %d raw files across %d days (%.1fs)",
2462-
len(raw_files), len(day_files), time.time() - t0,
2463-
)
2533+
day_files = group_raw_by_day(raw_files)
2534+
log.info(
2535+
"Stage 1 complete: %d raw files across %d days (%.1fs)",
2536+
len(raw_files), len(day_files), time.time() - t0,
2537+
)
2538+
else:
2539+
log.info("Skipping Stage 1 (raw file discovery) — using existing zarrs")
24642540

24652541
# ── Stage 2: Download GPS GeoParquet ────────────────────────
24662542
gps_df = None
2467-
if not args.skip_gps:
2543+
if not args.skip_gps and not args.skip_sv:
24682544
log.info("=" * 70)
24692545
log.info("STAGE 2: Download GPS GeoParquet")
24702546
log.info("=" * 70)
@@ -2514,6 +2590,32 @@ def run_full_pipeline(args: argparse.Namespace) -> None:
25142590
day_denoised_zarrs.setdefault(day_key, {})[category] = zarr_path
25152591
sv_path = f"{day_key}/{day_key}--{category}.zarr"
25162592
day_sv_zarrs.setdefault(day_key, {})[category] = sv_path
2593+
elif args.skip_sv:
2594+
log.info("--skip-sv: loading existing Sv zarrs, re-running denoise → products")
2595+
sv_entries = list_sv_zarrs(output_container)
2596+
if start_date or end_date:
2597+
sv_entries = [
2598+
(d, c, p) for d, c, p in sv_entries
2599+
if (not start_date or datetime.fromisoformat(d) >= start_date)
2600+
and (not end_date or datetime.fromisoformat(d) <= end_date)
2601+
]
2602+
log.info(" Found %d Sv zarrs to re-denoise", len(sv_entries))
2603+
for day_key, category, sv_path in sv_entries:
2604+
day_sv_zarrs.setdefault(day_key, {})[category] = sv_path
2605+
2606+
# Re-denoise each Sv zarr
2607+
for day_key, categories in sorted(day_sv_zarrs.items()):
2608+
for category, sv_path in categories.items():
2609+
try:
2610+
denoised_path = denoise_day_zarr(
2611+
sv_path, output_container, day_key, category,
2612+
)
2613+
day_denoised_zarrs.setdefault(day_key, {})[category] = denoised_path
2614+
all_denoised.append((day_key, category, denoised_path))
2615+
except Exception as e:
2616+
log.error(" Denoise failed %s/%s: %s", day_key, category, e)
2617+
_release_memory()
2618+
log.info(" Re-denoised %d zarrs across %d days", len(all_denoised), len(day_denoised_zarrs))
25172619
else:
25182620
# Filter days for --resume
25192621
days_to_process = []
@@ -2861,6 +2963,10 @@ def main() -> None:
28612963
"--skip-raw", action="store_true",
28622964
help="Skip raw→Sv stages; use existing Sv + denoised zarrs from output container.",
28632965
)
2966+
parser.add_argument(
2967+
"--skip-sv", action="store_true",
2968+
help="Skip raw→Sv stages; re-run denoising from existing Sv zarrs, then MVBS/NASC/echograms.",
2969+
)
28642970
parser.add_argument(
28652971
"--skip-gps", action="store_true",
28662972
help="Skip GPS GeoParquet download.",

0 commit comments

Comments
 (0)