fix(sim): decode simulator output on Windows

Yahya Farhadi · LarsAsplund · commit 42f8358d1698 · 2026-03-07T23:48:51.000+01:00
The old code used a simple data.decode("utf-8") which would throw a raw UnicodeDecodeError when simulator output contained
 non-UTF-8 characters (e.g., Windows legacy code pages like cp1252). The error message was something like:

UnicodeDecodeError:'utf-8' codec can't decode byte 0xe9 in position 42: invalid continuation byte

This was confusing because it gave no indication that the problem was with simulator output encoding, not with your source code or VUnit itself.

The change tries each encoding in order. If all fail, it falls back to data.decode("utf-8", errors="replace") which substitutes undecodable bytes with � instead of crashing.

Before: A simulator emitting a single non-UTF-8 byte (e.g., accented character in a file path, or a copyright symbol in a vendor library message) would crash VUnit with an opaque UnicodeDecodeError — both on compile success output and on compile failure output (err.output).

After: The output is decoded gracefully using the most likely encoding, so VUnit continues normally and the user sees the actual compile pass/fail message instead of a decoding traceback.
diff --git a/vunit/sim_if/__init__.py b/vunit/sim_if/__init__.py
@@ -11,6 +11,7 @@
 import sys
 import os
 from os import environ, listdir, pathsep
+import locale
 import subprocess
 from pathlib import Path
 from typing import List
@@ -360,14 +361,36 @@ def check_output(command, env=None):
     """
     Wrapper arround subprocess.check_output
     """
+    def _decode(data: bytes) -> str:
+        """Decode tool output robustly across platforms.
+
+        Some simulators on Windows emit output in a legacy code page (e.g. cp1252),
+        which can raise UnicodeDecodeError if decoded as strict UTF-8.
+        """
+
+        encodings_to_try = (
+            "utf-8",
+            "utf-8-sig",
+            locale.getpreferredencoding(False) or "utf-8",
+            "cp1252",
+        )
+
+        for encoding in encodings_to_try:
+            try:
+                return data.decode(encoding)
+            except UnicodeDecodeError:
+                continue
+
+        return data.decode("utf-8", errors="replace")
+
     try:
         output = subprocess.check_output(  # pylint: disable=unexpected-keyword-arg
             command, env=env, stderr=subprocess.STDOUT
         )
     except subprocess.CalledProcessError as err:
-        err.output = err.output.decode("utf-8")
+        err.output = _decode(err.output)
         raise err
-    return output.decode("utf-8")
+    return _decode(output)
 
 
 def check_executable(simulator_name, prefix, executable_name):