Skip to content

Commit f7a4bfa

Browse files
authored
fix: Improve the registry blob redirect evaluation (#10)
- Fix the calculation of IP Addresses for redirect (not showing on HEAD requests). - Added support for both IPV4 and IPV6 resolution (configurable). - Removed row for the hostname since it could be inferred from urls. - Refactored code to simplify it.
1 parent b17f479 commit f7a4bfa

4 files changed

Lines changed: 107 additions & 63 deletions

File tree

README.md

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -134,31 +134,43 @@ useful when you need to configure firewall rules, proxy allowlists, or DNS polic
134134
pulls often hit multiple hosts (registry, auth server, CDN) that all need to be reachable:
135135

136136
```bash
137-
$ crpy resolve alpine:latest
138-
Endpoints for alpine:latest
139-
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
140-
┃ Role ┃ Hostname ┃ IPs ┃ URL ┃
141-
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
142-
│ registry │ index.docker.io │ 100.50.185.129, │ https://index.docker.io/v2/li │
143-
│ │ │ 174.129.222.113, 3.81.188.6, │ brary/alpine/manifests/latest │
144-
│ │ │ 44.208.12.140, 52.71.174.30, │ │
145-
│ │ │ 52.86.153.188, 54.147.201.31, │ │
146-
│ │ │ 54.196.196.77 │ │
147-
│ auth │ auth.docker.io │ 104.18.43.178, 172.64.144.78 │ https://auth.docker.io/token?
148-
│ │ │ │ service=registry.docker.io&sc │
149-
│ │ │ │ ope=repository:library/alpine │
150-
│ │ │ │ :pull │
151-
│ config │ index.docker.io │ 100.50.185.129, │ https://index.docker.io/v2/li │
152-
│ │ │ 174.129.222.113, 3.81.188.6, │ brary/alpine/blobs/sha256:a40 │
153-
│ │ │ 44.208.12.140, 52.71.174.30, │ c03cbb81c59bfb0e0887ab0b18597 │
154-
│ │ │ 52.86.153.188, 54.147.201.31, │ 27075da7b9cc576a1cec2c771f38c │
155-
│ │ │ 54.196.196.77 │ 5fb │
156-
│ layer-0 │ index.docker.io │ 100.50.185.129, │ https://index.docker.io/v2/li │
157-
│ │ │ 174.129.222.113, 3.81.188.6, │ brary/alpine/blobs/sha256:589 │
158-
│ │ │ 44.208.12.140, 52.71.174.30, │ 002ba0eaed121a1dbf42f6648f29e │
159-
│ │ │ 52.86.153.188, 54.147.201.31, │ 5be55d5c8a6ee0f8eaa0285cc21ac │
160-
│ │ │ 54.196.196.77 │ 153 │
161-
└──────────┴─────────────────┴───────────────────────────────┴───────────────────────────────┘
137+
$ crpy resolve -4 alpine:latest
138+
Endpoints for alpine:latest
139+
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
140+
┃ Role ┃ IPs ┃ URL ┃
141+
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
142+
│ auth │ 104.18.43.178, 172.64.144.78 │ https://auth.docker.io/token?service=regi │
143+
│ │ │ stry.docker.io&scope=repository:library/a │
144+
│ │ │ lpine:pull │
145+
├────────────────────┼───────────────────────────────────────────┼───────────────────────────────────────────┤
146+
│ manifest │ 32.192.123.231, 32.195.147.39, │ https://index.docker.io/v2/library/alpine │
147+
│ │ 34.206.220.186, 44.194.100.18, │ /manifests/latest │
148+
│ │ 52.71.123.245, 54.152.111.129, │ │
149+
│ │ 54.210.213.255, 98.94.122.193 │ │
150+
├────────────────────┼───────────────────────────────────────────┼───────────────────────────────────────────┤
151+
│ config │ 32.192.123.231, 32.195.147.39, │ https://index.docker.io/v2/library/alpine │
152+
│ │ 34.206.220.186, 44.194.100.18, │ /blobs/sha256:a40c03cbb81c59bfb0e0887ab0b │
153+
│ │ 52.71.123.245, 54.152.111.129, │ 1859727075da7b9cc576a1cec2c771f38c5fb │
154+
│ │ 54.210.213.255, 98.94.122.193 │ │
155+
├────────────────────┼───────────────────────────────────────────┼───────────────────────────────────────────┤
156+
│ config (redirect) │ 172.64.66.1 │ https://docker-images-prod.6aa30f8b08e164 │
157+
│ │ │ 09b46e0173d6de2f56.r2.cloudflarestorage.c │
158+
│ │ │ om/registry-v2/docker/registry/v2/blobs/s │
159+
│ │ │ ha256/a4/a40c03cbb81c59bfb0e0887ab0b18597 │
160+
│ │ │ 27075da7b9cc576a1cec2c771f38c5fb/data?... │
161+
├────────────────────┼───────────────────────────────────────────┼───────────────────────────────────────────┤
162+
│ layer-0 │ 32.192.123.231, 32.195.147.39, │ https://index.docker.io/v2/library/alpine │
163+
│ │ 34.206.220.186, 44.194.100.18, │ /blobs/sha256:589002ba0eaed121a1dbf42f664 │
164+
│ │ 52.71.123.245, 54.152.111.129, │ 8f29e5be55d5c8a6ee0f8eaa0285cc21ac153 │
165+
│ │ 54.210.213.255, 98.94.122.193 │ │
166+
├────────────────────┼───────────────────────────────────────────┼───────────────────────────────────────────┤
167+
│ layer-0 (redirect) │ 172.64.66.1 │ https://docker-images-prod.6aa30f8b08e164 │
168+
│ │ │ 09b46e0173d6de2f56.r2.cloudflarestorage.c │
169+
│ │ │ om/registry-v2/docker/registry/v2/blobs/s │
170+
│ │ │ ha256/58/589002ba0eaed121a1dbf42f6648f29e │
171+
│ │ │ 5be55d5c8a6ee0f8eaa0285cc21ac153/data?... │
172+
└────────────────────┴───────────────────────────────────────────┴───────────────────────────────────────────┘
173+
162174
```
163175

164176
# Why creating this package?

crpy/cmd.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from rich.table import Table
99
from rich.text import Text
1010

11+
1112
from crpy.common import HTTPConnectionError, UnauthorizedError
1213
from crpy.registry import RegistryInfo
1314
from crpy.storage import (
@@ -133,18 +134,25 @@ async def _auth(args):
133134

134135
async def _resolve(args):
135136
ri = RegistryInfo.from_url(args.url[0], proxy=args.proxy, insecure=args.insecure)
136-
entries = await ri.resolve(args.architecture[0] if args.architecture else None)
137+
ip_version = 6 if args.ipv6 else (4 if args.ipv4 else 0)
138+
entries = await ri.resolve(args.architecture[0] if args.architecture else None, ip_version=ip_version)
137139

138140
table = Table(title=f"Endpoints for {args.url[0]}", title_style="bold")
139141
table.add_column("Role", style="magenta", no_wrap=True)
140-
table.add_column("Hostname", style="cyan")
141142
table.add_column("IPs", style="green", overflow="fold")
142143
table.add_column("URL", overflow="fold")
143144

144145
for entry in entries:
145146
url_text = Text(entry.url)
146147
url_text.stylize(f"link {entry.url}")
147-
table.add_row(entry.role, entry.hostname or "", ", ".join(entry.ips), url_text)
148+
table.add_row(entry.role, ", ".join(entry.ips), url_text)
149+
table.add_section()
150+
if entry.redirect:
151+
redirect_text = Text(entry.redirect.url)
152+
redirect_text.stylize(f"link {entry.redirect.url}")
153+
table.add_row(f"{entry.role} (redirect)", ", ".join(entry.redirect.ips), redirect_text)
154+
table.add_section()
155+
148156
print(table)
149157

150158

@@ -307,6 +315,9 @@ def main(*args):
307315
help="Architecture for the image.",
308316
default=None,
309317
)
318+
ip_group = resolve.add_mutually_exclusive_group()
319+
ip_group.add_argument("-4", dest="ipv4", action="store_true", default=False, help="Resolve IPv4 addresses only.")
320+
ip_group.add_argument("-6", dest="ipv6", action="store_true", default=False, help="Resolve IPv6 addresses only.")
310321
# version
311322
version = subparsers.add_parser("version", help="Displays the application version.")
312323
version.set_defaults(func=_version)

crpy/common.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ class Response:
1515
status: int
1616
data: bytes
1717
headers: Optional[dict] = None
18-
real_url: Optional[str] = None
1918

2019
def json(self) -> dict:
2120
return json.loads(self.data)
@@ -34,9 +33,7 @@ async def _request(
3433
async with aiohttp.ClientSession(trust_env=True) as session:
3534
method_fn = getattr(session, method)
3635
async with method_fn(url, headers=headers, params=params, data=data, **aiohttp_kwargs) as response:
37-
return Response(
38-
response.status, await response.read(), dict(response.headers), str(response.request_info.real_url)
39-
)
36+
return Response(response.status, await response.read(), dict(response.headers))
4037
except aiohttp.ClientConnectionError as e:
4138
raise HTTPConnectionError(str(e))
4239

@@ -107,19 +104,18 @@ def platform_from_dict(platform: dict) -> str:
107104
return base_str
108105

109106

110-
async def resolve_hostname(hostname: str) -> List[str]:
107+
async def resolve_hostname(hostname: str, family: int = socket.AF_UNSPEC) -> List[str]:
111108
"""
112109
Resolves a hostname to a sorted list of unique IP addresses.
113110
114111
:param hostname: the hostname to resolve.
112+
:param family: socket address family to filter results. Use ``socket.AF_INET`` for IPv4 only,
113+
``socket.AF_INET6`` for IPv6 only, or ``socket.AF_UNSPEC`` (default) for both.
115114
:return: sorted list of unique IP address strings.
116115
"""
117-
try:
118-
loop = asyncio.get_running_loop()
119-
results = await loop.getaddrinfo(hostname, None)
120-
return sorted({r[4][0] for r in results})
121-
except socket.gaierror:
122-
return []
116+
loop = asyncio.get_running_loop()
117+
results = await loop.getaddrinfo(hostname, None, family=family)
118+
return sorted({r[4][0] for r in results})
123119

124120

125121
# exceptions

crpy/registry.py

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
import json
55
import pathlib
66
import re
7+
import socket
78
import sys
89
import tarfile
910
import tempfile
1011
from dataclasses import dataclass, field
11-
from typing import List, Optional, Union
12+
from typing import List, Optional, Union, Literal
1213
from urllib.parse import urlparse
1314

1415
from async_lru import alru_cache
@@ -51,17 +52,16 @@
5152
@dataclass
5253
class FirewallEntry:
5354
role: str
54-
request_url: str
55+
url: str
5556
ips: List[str] = field(default_factory=list)
56-
redirect_url: Optional[str] = None
57+
redirect: Optional["FirewallEntry"] = None
5758

5859
@property
59-
def url(self) -> str:
60-
return self.redirect_url or self.request_url
61-
62-
@property
63-
def hostname(self) -> Optional[str]:
64-
return urlparse(self.url).hostname
60+
def hostname(self) -> str:
61+
parsed = urlparse(self.url)
62+
if parsed.hostname is None:
63+
raise ValueError(f"Could not parse url {self.url}")
64+
return parsed.hostname
6565

6666

6767
@dataclass
@@ -113,16 +113,19 @@ async def _request_with_auth(
113113
params: dict = None,
114114
data: Union[dict, bytes, None] = None,
115115
headers: dict = None,
116+
aiohttp_kwargs: dict = None,
116117
) -> Response:
117118
if not headers:
118119
headers = {}
120+
if not aiohttp_kwargs:
121+
aiohttp_kwargs = {}
119122
response = await _request(
120123
url,
121124
{**headers, **self._headers},
122125
params=params,
123126
data=data,
124127
method=method,
125-
aiohttp_kwargs=self._aiohttp_kwargs,
128+
aiohttp_kwargs=self._aiohttp_kwargs | aiohttp_kwargs,
126129
)
127130
if response.status == 401:
128131
www_auth = response.headers["WWW-Authenticate"]
@@ -620,12 +623,20 @@ async def delete_tag(self) -> Response:
620623
response = await self._request_with_auth(url, headers=self._headers, method="delete")
621624
return response
622625

623-
async def _head_entry(self, role: str, blob_url: str) -> FirewallEntry:
624-
response = await self._request_with_auth(blob_url, method="head", headers=self._headers)
625-
redirect_url = response.real_url if response.real_url and response.real_url != blob_url else None
626-
return FirewallEntry(role, blob_url, redirect_url=redirect_url)
626+
async def _resolve_entry(self, role: str, blob_url: str) -> FirewallEntry:
627+
# Use GET with allow_redirects=False to capture CDN redirects (e.g. cdn01.quay.io).
628+
response = await self._request_with_auth(
629+
blob_url, headers=self._headers, method="get", aiohttp_kwargs={"allow_redirects": False}
630+
)
631+
redirect_url = (
632+
str(response.headers.get("Location")) if response.headers and 300 <= response.status < 400 else None
633+
)
634+
redirect = FirewallEntry(role, redirect_url, redirect=None) if redirect_url else None
635+
return FirewallEntry(role, blob_url, redirect=redirect)
627636

628-
async def resolve(self, architecture: Union[str, "Platform", None] = None) -> List[FirewallEntry]:
637+
async def resolve(
638+
self, architecture: Union[str, "Platform", None] = None, ip_version: Literal[4, 6, 0] = 0
639+
) -> List[FirewallEntry]:
629640
"""
630641
Performs a dry-run pull to discover every network endpoint that a real pull would contact. Executes
631642
authentication, manifest fetch, and HEAD requests for config and layer blobs — without downloading any data.
@@ -641,27 +652,41 @@ async def resolve(self, architecture: Union[str, "Platform", None] = None) -> Li
641652
```
642653
643654
:param architecture: optional architecture for the image.
655+
:param ip_version: IP version filter. Use `4` for IPv4 only, `6` for IPv6 only, or `0` (default) for both.
644656
:return: list of FirewallEntry objects with role, request URL, redirect URL, hostname and resolved IPs.
645657
"""
658+
# multiple queries can be done for retrieving the manifest, but they all go to the same url
646659
manifest = await self.get_default_manifest(architecture)
647660

648-
entries: List[FirewallEntry] = [
649-
FirewallEntry("registry", self.manifest_url()),
650-
]
651-
661+
entries: List[FirewallEntry] = []
652662
if self.auth_server_url:
653663
entries.append(FirewallEntry("auth", self.auth_server_url))
664+
entries.append(FirewallEntry("manifest", self.manifest_url()))
654665

666+
# we retrieve the config
655667
config_digest = manifest["config"]["digest"]
656-
entries.append(await self._head_entry("config", f"{self.blobs_url()}/{config_digest}"))
668+
entries.append(await self._resolve_entry("config", f"{self.blobs_url()}/{config_digest}"))
657669

670+
# then each individual layer
658671
for idx, layer in enumerate(await self.get_layers(architecture)):
659-
entries.append(await self._head_entry(f"layer-{idx}", f"{self.blobs_url()}/{layer}"))
672+
entries.append(await self._resolve_entry(f"layer-{idx}", f"{self.blobs_url()}/{layer}"))
660673

661-
unique_hostnames = {e.hostname for e in entries if e.hostname}
662-
resolved = await asyncio.gather(*(resolve_hostname(h) for h in unique_hostnames))
663-
ip_map = dict(zip(unique_hostnames, resolved))
674+
# unwrap all entries once in case there were any redirects
675+
unwrapped_entries: List[FirewallEntry] = []
664676
for entry in entries:
677+
unwrapped_entries.append(entry)
678+
if entry.redirect:
679+
unwrapped_entries.append(entry.redirect)
680+
681+
# compute hostnames
682+
unique_hostnames = set()
683+
for e in unwrapped_entries:
684+
unique_hostnames.add(e.hostname)
685+
family = {4: socket.AF_INET, 6: socket.AF_INET6}.get(ip_version, socket.AF_UNSPEC)
686+
resolved = await asyncio.gather(*(resolve_hostname(h, family=family) for h in unique_hostnames))
687+
ip_map = dict(zip(unique_hostnames, resolved))
688+
for entry in unwrapped_entries:
665689
entry.ips = ip_map.get(entry.hostname, [])
666690

691+
# return the original entries object (nested)
667692
return entries

0 commit comments

Comments
 (0)