@@ -870,6 +870,9 @@ def __init__(
870870 # Mapping from SCC id to corresponding SCC instance. This is populated
871871 # in process_graph().
872872 self .scc_by_id : dict [int , SCC ] = {}
873+ # Mapping from module id to the SCC it belongs to. This is populated
874+ # in process_graph().
875+ self .scc_by_mod_id : dict [str , SCC ] = {}
873876 # Global topological order for SCCs. This exists to make order of processing
874877 # SCCs more predictable.
875878 self .top_order : list [int ] = []
@@ -892,6 +895,8 @@ def __init__(
892895 # raw parsed trees not analyzed with mypy. We use these to find absolute
893896 # location of a symbol used as a location for an error message.
894897 self .extra_trees : dict [str , MypyFile ] = {}
898+ # Cache for transitive dependency check (expensive).
899+ self .transitive_deps_cache : dict [tuple [int , int ], bool ] = {}
895900
896901 def dump_stats (self ) -> None :
897902 if self .options .dump_build_stats :
@@ -1203,6 +1208,35 @@ def wait_for_done_workers(self) -> tuple[list[SCC], bool, dict[str, tuple[str, l
12031208 results ,
12041209 )
12051210
1211+ def is_transitive_scc_dep (self , from_scc_id : int , to_scc_id : int ) -> bool :
1212+ """Check if one SCC is a (transitive) dependency of another."""
1213+ edge = (from_scc_id , to_scc_id )
1214+ if (cached := self .transitive_deps_cache .get (edge )) is not None :
1215+ return cached
1216+ todo = self .scc_by_id [from_scc_id ].deps
1217+ seen = set ()
1218+ while todo :
1219+ more = set ()
1220+ # Breadth-first search seems to be better here, because all
1221+ # "lower-level" SCCs are processed and some may be cached.
1222+ for dep in todo :
1223+ seen .add (dep )
1224+ if dep == to_scc_id :
1225+ self .transitive_deps_cache [edge ] = True
1226+ return True
1227+ if cached := self .transitive_deps_cache .get ((dep , to_scc_id )):
1228+ self .transitive_deps_cache [edge ] = True
1229+ return True
1230+ elif cached is None :
1231+ more |= self .scc_by_id [dep ].deps
1232+ todo = more
1233+ self .transitive_deps_cache [edge ] = False
1234+ for dep in seen :
1235+ # We negative-cache all intermediate lookups, thus
1236+ # trading time for space.
1237+ self .transitive_deps_cache [(dep , to_scc_id )] = False
1238+ return False
1239+
12061240
12071241def deps_to_json (x : dict [str , set [str ]]) -> bytes :
12081242 return json_dumps ({k : list (v ) for k , v in x .items ()})
@@ -1841,6 +1875,7 @@ def write_cache(
18411875 dep_prios : list [int ],
18421876 dep_lines : list [int ],
18431877 old_interface_hash : bytes ,
1878+ trans_dep_hash : bytes ,
18441879 source_hash : str ,
18451880 ignore_all : bool ,
18461881 manager : BuildManager ,
@@ -1957,6 +1992,7 @@ def write_cache(
19571992 dep_prios = dep_prios ,
19581993 dep_lines = dep_lines ,
19591994 interface_hash = interface_hash ,
1995+ trans_dep_hash = trans_dep_hash ,
19601996 version_id = manager .version_id ,
19611997 ignore_all = ignore_all ,
19621998 plugin_data = plugin_data ,
@@ -2175,6 +2211,12 @@ class State:
21752211 # Contains a hash of the public interface in incremental mode
21762212 interface_hash : bytes = b""
21772213
2214+ # Hash of import structure that this module depends on. It is not 1:1 with
2215+ # transitive dependencies set, but if two hashes are equal, transitive
2216+ # dependencies are guaranteed to be identical. Some expensive checks can be
2217+ # skipped if this value is unchanged for a module.
2218+ trans_dep_hash : bytes = b""
2219+
21782220 # Options, specialized for this file
21792221 options : Options
21802222
@@ -2322,15 +2364,15 @@ def new_state(
23222364 if temporary :
23232365 state .load_tree (temporary = True )
23242366 if not manager .use_fine_grained_cache ():
2325- # Special case: if there were a previously missing package imported here
2367+ # Special case: if there were a previously missing package imported here,
23262368 # and it is not present, then we need to re-calculate dependencies.
23272369 # This is to support patterns like this:
23282370 # from missing_package import missing_module # type: ignore
23292371 # At first mypy doesn't know that `missing_module` is a module
23302372 # (it may be a variable, a class, or a function), so it is not added to
23312373 # suppressed dependencies. Therefore, when the package with module is added,
23322374 # we need to re-calculate dependencies.
2333- # NOTE: see comment below for why we skip this in fine grained mode.
2375+ # NOTE: see comment below for why we skip this in fine- grained mode.
23342376 if exist_added_packages (suppressed , manager , options ):
23352377 state .parse_file () # This is safe because the cache is anyway stale.
23362378 state .compute_dependencies ()
@@ -2350,6 +2392,7 @@ def new_state(
23502392 # We don't need parsed trees in coordinator process, we parse only to
23512393 # compute dependencies.
23522394 state .tree = None
2395+ del manager .ast_cache [id ]
23532396
23542397 return state
23552398
@@ -3012,6 +3055,7 @@ def write_cache(self) -> tuple[CacheMeta, str] | None:
30123055 dep_prios ,
30133056 dep_lines ,
30143057 self .interface_hash ,
3058+ self .trans_dep_hash ,
30153059 self .source_hash ,
30163060 self .ignore_all ,
30173061 self .manager ,
@@ -3774,6 +3818,27 @@ def order_ascc_ex(graph: Graph, ascc: SCC) -> list[str]:
37743818 return scc
37753819
37763820
3821+ def verify_transitive_deps (ascc : SCC , graph : Graph , manager : BuildManager ) -> str | None :
3822+ """Verify all indirect dependencies of this SCC are still reachable via direct ones.
3823+
3824+ Return first unreachable dependency id, or None.
3825+ """
3826+ for id in ascc .mod_ids :
3827+ st = graph [id ]
3828+ assert st .meta is not None , "Must be called on fresh SCCs only"
3829+ if st .trans_dep_hash == st .meta .trans_dep_hash :
3830+ # Import graph unchanged, skip this module.
3831+ continue
3832+ for dep in st .dependencies :
3833+ if st .priorities .get (dep ) == PRI_INDIRECT :
3834+ dep_scc_id = manager .scc_by_mod_id [dep ].id
3835+ if dep_scc_id == ascc .id :
3836+ continue
3837+ if not manager .is_transitive_scc_dep (ascc .id , dep_scc_id ):
3838+ return dep
3839+ return None
3840+
3841+
37773842def find_stale_sccs (
37783843 sccs : list [SCC ], graph : Graph , manager : BuildManager
37793844) -> tuple [list [SCC ], list [SCC ]]:
@@ -3782,7 +3847,8 @@ def find_stale_sccs(
37823847 Fresh SCCs are those where:
37833848 * We have valid cache files for all modules in the SCC.
37843849 * There are no changes in dependencies (files removed from/added to the build).
3785- * The interface hashes of direct dependents matches those recorded in the cache.
3850+ * The interface hashes of dependencies matches those recorded in the cache.
3851+ * All indirect dependencies are still reachable via direct ones.
37863852 The first and second conditions are verified by is_fresh().
37873853 """
37883854 stale_sccs = []
@@ -3799,6 +3865,15 @@ def find_stale_sccs(
37993865 stale_deps .add (dep )
38003866 fresh = fresh and not stale_deps
38013867
3868+ # Verify the invariant that indirect dependencies are a subset of transitive direct
3869+ # dependencies. Note: the case where indirect dependency is removed from the graph
3870+ # completely is already handled above.
3871+ stale_indirect = None
3872+ if fresh :
3873+ stale_indirect = verify_transitive_deps (ascc , graph , manager )
3874+ if stale_indirect is not None :
3875+ fresh = False
3876+
38023877 if fresh :
38033878 fresh_msg = "fresh"
38043879 elif stale_scc :
@@ -3807,8 +3882,11 @@ def find_stale_sccs(
38073882 fresh_msg += f" ({ ' ' .join (sorted (stale_scc ))} )"
38083883 if stale_deps :
38093884 fresh_msg += f" with stale deps ({ ' ' .join (sorted (stale_deps ))} )"
3810- else :
3885+ elif stale_deps :
38113886 fresh_msg = f"stale due to deps ({ ' ' .join (sorted (stale_deps ))} )"
3887+ else :
3888+ assert stale_indirect is not None
3889+ fresh_msg = f"stale due to stale indirect dep(s): first { stale_indirect } "
38123890
38133891 scc_str = " " .join (ascc .mod_ids )
38143892 if fresh :
@@ -3860,6 +3938,9 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
38603938 scc_by_id = {scc .id : scc for scc in sccs }
38613939 manager .scc_by_id = scc_by_id
38623940 manager .top_order = [scc .id for scc in sccs ]
3941+ for scc in sccs :
3942+ for mod_id in scc .mod_ids :
3943+ manager .scc_by_mod_id [mod_id ] = scc
38633944
38643945 # Broadcast SCC structure to the parallel workers, since they don't compute it.
38653946 sccs_message = SccsDataMessage (sccs = sccs )
@@ -3904,8 +3985,8 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
39043985 # type-checking this is already done and results should be empty here.
39053986 if not manager .workers :
39063987 assert not results
3907- for id , (interface_cache , errors ) in results .items ():
3908- new_hash = bytes .fromhex (interface_cache )
3988+ for id , (interface_hash , errors ) in results .items ():
3989+ new_hash = bytes .fromhex (interface_hash )
39093990 if new_hash != graph [id ].interface_hash :
39103991 graph [id ].mark_interface_stale ()
39113992 graph [id ].interface_hash = new_hash
@@ -3917,6 +3998,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
39173998 if not scc_by_id [dependent ].not_ready_deps :
39183999 not_ready .remove (scc_by_id [dependent ])
39194000 ready .append (scc_by_id [dependent ])
4001+ manager .trace (f"Transitive deps cache size: { sys .getsizeof (manager .transitive_deps_cache )} " )
39204002
39214003
39224004def order_ascc (graph : Graph , ascc : AbstractSet [str ], pri_max : int = PRI_INDIRECT ) -> list [str ]:
@@ -4168,6 +4250,11 @@ def sorted_components(graph: Graph) -> list[SCC]:
41684250 scc .size_hint = sum (graph [mid ].size_hint for mid in scc .mod_ids )
41694251 for dep in scc_dep_map [scc ]:
41704252 dep .direct_dependents .append (scc .id )
4253+ # We compute dependencies hash here since we know no direct
4254+ # dependencies will be added or suppressed after this point.
4255+ trans_dep_hash = transitive_dep_hash (scc , graph )
4256+ for id in scc .mod_ids :
4257+ graph [id ].trans_dep_hash = trans_dep_hash
41714258 res .extend (sorted_ready )
41724259 return res
41734260
@@ -4201,6 +4288,21 @@ def deps_filtered(graph: Graph, vertices: AbstractSet[str], id: str, pri_max: in
42014288 ]
42024289
42034290
4291+ def transitive_dep_hash (scc : SCC , graph : Graph ) -> bytes :
4292+ """Compute stable snapshot of transitive import structure for given SCC."""
4293+ all_direct_deps = {
4294+ dep
4295+ for id in scc .mod_ids
4296+ for dep in graph [id ].dependencies
4297+ if graph [id ].priorities .get (dep ) != PRI_INDIRECT
4298+ }
4299+ trans_dep_hash_map = {
4300+ dep_id : "" if dep_id in scc .mod_ids else graph [dep_id ].trans_dep_hash .hex ()
4301+ for dep_id in all_direct_deps
4302+ }
4303+ return hash_digest_bytes (json_dumps (trans_dep_hash_map ))
4304+
4305+
42044306def missing_stubs_file (cache_dir : str ) -> str :
42054307 return os .path .join (cache_dir , "missing_stubs" )
42064308
0 commit comments