44import json
55import pathlib
66import re
7+ import socket
78import sys
89import tarfile
910import tempfile
1011from dataclasses import dataclass , field
11- from typing import List , Optional , Union
12+ from typing import List , Optional , Union , Literal
1213from urllib .parse import urlparse
1314
1415from async_lru import alru_cache
5152@dataclass
5253class FirewallEntry :
5354 role : str
54- request_url : str
55+ url : str
5556 ips : List [str ] = field (default_factory = list )
56- redirect_url : Optional [str ] = None
57+ redirect : Optional ["FirewallEntry" ] = None
5758
5859 @property
59- def url (self ) -> str :
60- return self .redirect_url or self .request_url
61-
62- @property
63- def hostname (self ) -> Optional [str ]:
64- return urlparse (self .url ).hostname
60+ def hostname (self ) -> str :
61+ parsed = urlparse (self .url )
62+ if parsed .hostname is None :
63+ raise ValueError (f"Could not parse url { self .url } " )
64+ return parsed .hostname
6565
6666
6767@dataclass
@@ -113,16 +113,19 @@ async def _request_with_auth(
113113 params : dict = None ,
114114 data : Union [dict , bytes , None ] = None ,
115115 headers : dict = None ,
116+ aiohttp_kwargs : dict = None ,
116117 ) -> Response :
117118 if not headers :
118119 headers = {}
120+ if not aiohttp_kwargs :
121+ aiohttp_kwargs = {}
119122 response = await _request (
120123 url ,
121124 {** headers , ** self ._headers },
122125 params = params ,
123126 data = data ,
124127 method = method ,
125- aiohttp_kwargs = self ._aiohttp_kwargs ,
128+ aiohttp_kwargs = self ._aiohttp_kwargs | aiohttp_kwargs ,
126129 )
127130 if response .status == 401 :
128131 www_auth = response .headers ["WWW-Authenticate" ]
@@ -620,12 +623,20 @@ async def delete_tag(self) -> Response:
620623 response = await self ._request_with_auth (url , headers = self ._headers , method = "delete" )
621624 return response
622625
623- async def _head_entry (self , role : str , blob_url : str ) -> FirewallEntry :
624- response = await self ._request_with_auth (blob_url , method = "head" , headers = self ._headers )
625- redirect_url = response .real_url if response .real_url and response .real_url != blob_url else None
626- return FirewallEntry (role , blob_url , redirect_url = redirect_url )
626+ async def _resolve_entry (self , role : str , blob_url : str ) -> FirewallEntry :
627+ # Use GET with allow_redirects=False to capture CDN redirects (e.g. cdn01.quay.io).
628+ response = await self ._request_with_auth (
629+ blob_url , headers = self ._headers , method = "get" , aiohttp_kwargs = {"allow_redirects" : False }
630+ )
631+ redirect_url = (
632+ str (response .headers .get ("Location" )) if response .headers and 300 <= response .status < 400 else None
633+ )
634+ redirect = FirewallEntry (role , redirect_url , redirect = None ) if redirect_url else None
635+ return FirewallEntry (role , blob_url , redirect = redirect )
627636
628- async def resolve (self , architecture : Union [str , "Platform" , None ] = None ) -> List [FirewallEntry ]:
637+ async def resolve (
638+ self , architecture : Union [str , "Platform" , None ] = None , ip_version : Literal [4 , 6 , 0 ] = 0
639+ ) -> List [FirewallEntry ]:
629640 """
630641 Performs a dry-run pull to discover every network endpoint that a real pull would contact. Executes
631642 authentication, manifest fetch, and HEAD requests for config and layer blobs — without downloading any data.
@@ -641,27 +652,41 @@ async def resolve(self, architecture: Union[str, "Platform", None] = None) -> Li
641652 ```
642653
643654 :param architecture: optional architecture for the image.
655+ :param ip_version: IP version filter. Use `4` for IPv4 only, `6` for IPv6 only, or `0` (default) for both.
644656 :return: list of FirewallEntry objects with role, request URL, redirect URL, hostname and resolved IPs.
645657 """
658+ # multiple queries can be done for retrieving the manifest, but they all go to the same url
646659 manifest = await self .get_default_manifest (architecture )
647660
648- entries : List [FirewallEntry ] = [
649- FirewallEntry ("registry" , self .manifest_url ()),
650- ]
651-
661+ entries : List [FirewallEntry ] = []
652662 if self .auth_server_url :
653663 entries .append (FirewallEntry ("auth" , self .auth_server_url ))
664+ entries .append (FirewallEntry ("manifest" , self .manifest_url ()))
654665
666+ # we retrieve the config
655667 config_digest = manifest ["config" ]["digest" ]
656- entries .append (await self ._head_entry ("config" , f"{ self .blobs_url ()} /{ config_digest } " ))
668+ entries .append (await self ._resolve_entry ("config" , f"{ self .blobs_url ()} /{ config_digest } " ))
657669
670+ # then each individual layer
658671 for idx , layer in enumerate (await self .get_layers (architecture )):
659- entries .append (await self ._head_entry (f"layer-{ idx } " , f"{ self .blobs_url ()} /{ layer } " ))
672+ entries .append (await self ._resolve_entry (f"layer-{ idx } " , f"{ self .blobs_url ()} /{ layer } " ))
660673
661- unique_hostnames = {e .hostname for e in entries if e .hostname }
662- resolved = await asyncio .gather (* (resolve_hostname (h ) for h in unique_hostnames ))
663- ip_map = dict (zip (unique_hostnames , resolved ))
674+ # unwrap all entries once in case there were any redirects
675+ unwrapped_entries : List [FirewallEntry ] = []
664676 for entry in entries :
677+ unwrapped_entries .append (entry )
678+ if entry .redirect :
679+ unwrapped_entries .append (entry .redirect )
680+
681+ # compute hostnames
682+ unique_hostnames = set ()
683+ for e in unwrapped_entries :
684+ unique_hostnames .add (e .hostname )
685+ family = {4 : socket .AF_INET , 6 : socket .AF_INET6 }.get (ip_version , socket .AF_UNSPEC )
686+ resolved = await asyncio .gather (* (resolve_hostname (h , family = family ) for h in unique_hostnames ))
687+ ip_map = dict (zip (unique_hostnames , resolved ))
688+ for entry in unwrapped_entries :
665689 entry .ips = ip_map .get (entry .hostname , [])
666690
691+ # return the original entries object (nested)
667692 return entries
0 commit comments