|
| 1 | +from re import compile |
| 2 | +from pathlib import Path |
| 3 | +from rq import Queue, Worker |
| 4 | +from rq.job import Job |
| 5 | +from podman import PodmanClient |
| 6 | +from shutil import rmtree |
| 7 | + |
| 8 | +from asu.config import settings |
| 9 | +from asu.util import log, get_podman |
| 10 | + |
| 11 | +REQUEST_HASH_LENGTH = 64 |
| 12 | +store: Path = settings.public_path / "store" |
| 13 | +podman: PodmanClient = get_podman() |
| 14 | + |
| 15 | + |
| 16 | +class GCWorker(Worker): |
| 17 | + """A Worker class that does periodic garbage collection on ASU's |
| 18 | + public store directory. We tie into the standard `Worker` maintenance |
| 19 | + sequence, so the period is controlled by the base class. You may change |
| 20 | + the garbage collection frequency in podman-compose.yml by adding a |
| 21 | + `--maintenance-interval` option to the startup command as follows (the |
| 22 | + default is 600 seconds). |
| 23 | +
|
| 24 | + >>> command: rqworker ... --maintenance-interval 1800 |
| 25 | + """ |
| 26 | + |
| 27 | + hash_match = compile(f"^[0-9a-f]{{{REQUEST_HASH_LENGTH}}}$") |
| 28 | + |
| 29 | + def clean_store(self) -> None: |
| 30 | + """For performance testing, the store directory was mounted on a |
| 31 | + slow external USB hard drive. A typical timing result showed ~1000 |
| 32 | + directories deleted per second on that test system. The synthetic |
| 33 | + test directories were created containing 10 files in each. |
| 34 | + File count dominated the timing, with file size being relatively |
| 35 | + insignificant, likely due to `stat` calls being the bottleneck. |
| 36 | + (Just for comparison, tests against store mounted on a fast SSD |
| 37 | + were about twice as fast.) |
| 38 | +
|
| 39 | + >>> Cleaning /mnt/slow/public/store: deleted 5000/5000 builds |
| 40 | + >>> Timing analysis for clean_store: 5.081s |
| 41 | + """ |
| 42 | + |
| 43 | + deleted: int = 0 |
| 44 | + total: int = 0 |
| 45 | + dir: Path |
| 46 | + queue: Queue |
| 47 | + for dir in store.glob("*"): |
| 48 | + if not dir.is_dir() or not self.hash_match.match(dir.name): |
| 49 | + continue |
| 50 | + total += 1 |
| 51 | + for queue in self.queues: |
| 52 | + job: Job = queue.fetch_job(dir.name) |
| 53 | + log.info(f" Found {dir.name = } {job = }") |
| 54 | + if job is None: |
| 55 | + rmtree(dir) |
| 56 | + deleted += 1 |
| 57 | + |
| 58 | + log.info(f"Cleaning {store}: deleted {deleted}/{total} builds") |
| 59 | + |
| 60 | + def clean_podman(self) -> None: |
| 61 | + """Reclaim space from the various podman disk entities as they are orphaned.""" |
| 62 | + removed = podman.containers.prune() |
| 63 | + log.info(f"Reclaimed {removed.get('SpaceReclaimed', 0):,d}B from containers") |
| 64 | + removed = podman.images.prune() |
| 65 | + log.info(f"Reclaimed {removed.get('SpaceReclaimed', 0):,d}B from images") |
| 66 | + removed = podman.volumes.prune() |
| 67 | + log.info(f"Reclaimed {removed.get('SpaceReclaimed', 0):,d}B from volumes") |
| 68 | + |
| 69 | + def run_maintenance_tasks(self): |
| 70 | + super().run_maintenance_tasks() |
| 71 | + self.clean_store() |
| 72 | + self.clean_podman() |
0 commit comments