Skip to content

Commit e2e09fa

Browse files
authored
fix: support formats other than utf-8 while decoding text [CM-1078] (#3968)
Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
1 parent 31ddfc0 commit e2e09fa

2 files changed

Lines changed: 14 additions & 13 deletions

File tree

services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from crowdgit.services.base.base_service import BaseService
3939
from crowdgit.services.maintainer.bedrock import invoke_bedrock
4040
from crowdgit.services.maintainer.section_extractor import SectionExtractor
41-
from crowdgit.services.utils import run_shell_command
41+
from crowdgit.services.utils import run_shell_command, safe_decode
4242
from crowdgit.settings import MAINTAINER_RETRY_INTERVAL_DAYS, MAINTAINER_UPDATE_INTERVAL_HOURS
4343

4444

@@ -140,6 +140,11 @@ class MaintainerService(BaseService):
140140

141141
_section_extractor = SectionExtractor()
142142

143+
@staticmethod
144+
async def _read_text_file(file_path: str) -> str:
145+
async with aiofiles.open(file_path, "rb") as f:
146+
return safe_decode(await f.read())
147+
143148
def make_role(self, title: str):
144149
title = title.lower()
145150
title = (
@@ -554,8 +559,7 @@ async def find_candidate_files(
554559
for candidate_path in all_paths:
555560
file_path = os.path.join(repo_path, candidate_path)
556561
try:
557-
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
558-
content = await f.read()
562+
content = await self._read_text_file(file_path)
559563
except Exception as e:
560564
self.logger.warning(f"Failed to read candidate {candidate_path}: {repr(e)}")
561565
continue
@@ -638,9 +642,7 @@ async def try_saved_maintainer_file(
638642
f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'"
639643
)
640644
try:
641-
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
642-
content = await f.read()
643-
645+
content = await self._read_text_file(file_path)
644646
result = await self.analyze_and_build_result(saved_maintainer_file, content)
645647
cost += result.total_cost
646648
return result, cost
@@ -784,8 +786,7 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
784786
)
785787
else:
786788
try:
787-
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
788-
content = await f.read()
789+
content = await self._read_text_file(file_path)
789790
result = await self.analyze_and_build_result(ai_file_name, content)
790791
total_cost += result.total_cost
791792
return _attach_metadata(result)

services/apps/git_integration/src/crowdgit/services/utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from crowdgit.logger import logger
1414

1515

16-
def _safe_decode(data: bytes) -> str:
16+
def safe_decode(data: bytes) -> str:
1717
"""
1818
Safely decode bytes to string, handling various encodings that might be present in git output.
1919
@@ -229,7 +229,7 @@ async def run_shell_command(
229229
async def _run_with_stderr_logging() -> bytes:
230230
async def _stream() -> None:
231231
async for raw_line in process.stderr:
232-
line = _safe_decode(raw_line).rstrip()
232+
line = safe_decode(raw_line).rstrip()
233233
if line:
234234
stderr_logger.log(stderr_log_level, line)
235235
stderr_lines.append(line)
@@ -240,7 +240,7 @@ async def _stream() -> None:
240240

241241
coro = _run_with_stderr_logging()
242242
stdout = await (asyncio.wait_for(coro, timeout=timeout) if timeout else coro)
243-
stdout_text = _safe_decode(stdout).strip() if stdout else ""
243+
stdout_text = safe_decode(stdout).strip() if stdout else ""
244244
stderr_text = "\n".join(stderr_lines)
245245
else:
246246
# Wait for completion with optional timeout
@@ -252,8 +252,8 @@ async def _stream() -> None:
252252
stdout, stderr = await process.communicate(input=stdin_input)
253253

254254
# Handle potentially non-UTF-8 encoded output from git commands
255-
stdout_text = _safe_decode(stdout).strip() if stdout else ""
256-
stderr_text = _safe_decode(stderr).strip() if stderr else ""
255+
stdout_text = safe_decode(stdout).strip() if stdout else ""
256+
stderr_text = safe_decode(stderr).strip() if stderr else ""
257257

258258
# Check return code
259259
if process.returncode == 0:

0 commit comments

Comments
 (0)