1313from crowdgit .errors import CommandExecutionError , CrowdGitError
1414from crowdgit .models import CloneBatchInfo , Repository , ServiceExecution
1515from crowdgit .services .base .base_service import BaseService
16- from crowdgit .services .utils import get_default_branch , get_repo_name , run_shell_command
16+ from crowdgit .services .utils import (
17+ get_default_branch ,
18+ get_remote_default_branch ,
19+ get_repo_name ,
20+ run_shell_command ,
21+ )
1722
1823DEFAULT_CLONE_BATCH_DEPTH = 10
1924DEFAULT_STORAGE_OPTIMIZATION_THRESHOLD_MB = 2000
@@ -47,9 +52,9 @@ async def _check_if_final_batch(self, path: str, target_commit_hash: str | None)
4752 except CommandExecutionError :
4853 return False
4954
50- async def _init_minimal_clone (self , path : str , remote : str ) -> None :
55+ async def _perform_minimal_clone (self , path : str , remote : str ) -> None :
5156 """
52- Inits minimal clone of depth=1
57+ Perform minimal clone of depth=1
5358 """
5459 # increasing post buffer to avoid RPC failed error
5560 await run_shell_command (
@@ -140,6 +145,7 @@ async def _update_batch_info(
140145 For batched clones (clone_with_batches=True): Checks if target commit reached or full history fetched.
141146 """
142147 batch_info .repo_path = repo_path
148+ batch_info .clone_with_batches = clone_with_batches
143149
144150 if batch_info .is_first_batch :
145151 # Set latest commit only from first batch
@@ -271,16 +277,86 @@ async def _calculate_batch_depth(self, repo_path: str, remote: str) -> int:
271277 )
272278 return calculated_depth
273279
274- async def _clone_repo (self , repo_path : str , remote : str ):
275- """Perform full repository clone for new repositories that haven't been processed before"""
280+ async def _perform_full_clone (self , repo_path : str , remote : str ):
281+ """Perform full repository clone"""
282+ self .logger .info (f"Performing full clone for repo { remote } ..." )
276283 await run_shell_command (["git" , "clone" , remote , repo_path ], cwd = repo_path )
277284 self .logger .info (f"Successfully completed full clone of repository: { remote } " )
278285
286+ async def has_default_branch_changed (self , remote : str , saved_branch : str | None ) -> bool :
287+ """Check if the default branch has changed compared to the saved branch
288+ Args:
289+ remote: The remote repository URL
290+ saved_branch: The branch currently saved in the database (can be None)
291+ Returns:
292+ True if default branch has changed and requires re-cloning, False otherwise
293+ """
294+ try :
295+ remote_default_branch = await get_remote_default_branch (remote )
296+
297+ if remote_default_branch is None :
298+ self .logger .warning (f"Could not determine default branch for { remote } " )
299+ return False
300+
301+ if saved_branch is None :
302+ self .logger .info (f"No saved branch for { remote } assuming it's not changed" )
303+ return False
304+
305+ if saved_branch != remote_default_branch :
306+ self .logger .info (
307+ f"Branch changed for { remote } : saved='{ saved_branch } ' vs remote='{ remote_default_branch } '"
308+ )
309+ return True
310+
311+ self .logger .debug (f"Branch unchanged for { remote } : { saved_branch } " )
312+ return False
313+
314+ except Exception as e :
315+ self .logger .error (f"Error validating branch for { remote } : { e } " )
316+ # On error, assume no change to avoid unnecessary re-cloning
317+ return False
318+
319+ async def determine_clone_strategy (
320+ self , repo_path : str , remote : str , branch : str | None , last_processed_commit : str | None
321+ ) -> bool :
322+ """Determine whether to use full clone or minimal clone strategy based on repository state.
323+
324+ Args:
325+ repo_path: Local path where repository will be cloned
326+ remote: Remote repository URL (e.g., 'https://github.com/user/repo')
327+ branch: Current saved branch name or None for new repositories
328+ last_processed_commit: Last processed commit hash or None for new repositories
329+
330+ Returns: (clone_with_batches)
331+ bool: False for full clone (clone_with_batches=False), True for minimal clone (clone_with_batches=True)
332+
333+ Strategy:
334+ - Full clone: New repositories (last_processed_commit=None) or branch changed
335+ - Minimal clone: Existing repositories with unchanged branch for incremental processing
336+ """
337+
338+ self .logger .info (
339+ f"Starting clone decision for { remote } (branch: { branch } , last_commit: { last_processed_commit } )"
340+ )
341+
342+ default_branch_changed = await self .has_default_branch_changed (remote , branch )
343+
344+ if not last_processed_commit or default_branch_changed :
345+ reason = "new repository" if not last_processed_commit else "branch changed"
346+ self .logger .info (f"Performing full clone for { remote } - reason: { reason } " )
347+ await self ._perform_full_clone (repo_path , remote )
348+ return False
349+
350+ self .logger .info (
351+ f"Performing minimal clone for { remote } - existing repository with unchanged branch"
352+ )
353+ await self ._perform_minimal_clone (repo_path , remote )
354+ return True
355+
279356 async def clone_batches_generator (
280357 self ,
281358 repository : Repository ,
282359 working_dir_cleanup : bool | None = False ,
283- clone_with_batches : bool | None = True ,
284360 ) -> AsyncIterator [CloneBatchInfo ]:
285361 """
286362 Async generator that yields CloneBatchInfo for repository cloning.
@@ -302,22 +378,15 @@ async def clone_batches_generator(
302378 remote = remote ,
303379 is_final_batch = False ,
304380 is_first_batch = True ,
305- total_commits_count = 0 ,
306381 )
307382 try :
308383 temp_repo_path = tempfile .mkdtemp (prefix = f"{ get_repo_name (remote )} _" )
309- batch_info .repo_path = temp_repo_path
310384 batch_start_time = time .time ()
311385
312- if not clone_with_batches :
313- self .logger .info (f"Performing full clone for repo: { remote } " )
314- await self ._clone_repo (temp_repo_path , remote )
315- else :
316- # Incremental processing: start with minimal clone and fetch in batches
317- self .logger .info (
318- f"Performing incremental batched clone for existing repository: { remote } "
319- )
320- await self ._init_minimal_clone (temp_repo_path , remote )
386+ clone_with_batches = await self .determine_clone_strategy (
387+ temp_repo_path , remote , repository .branch , repository .last_processed_commit
388+ )
389+ if clone_with_batches :
321390 batch_depth = await self ._calculate_batch_depth (temp_repo_path , remote )
322391 await self ._update_batch_info (
323392 batch_info , temp_repo_path , repository .last_processed_commit , clone_with_batches
0 commit comments