Skip to content

Commit bfa9aa7

Browse files
committed
feat: add LibreOffice runtime probing and conversion helpers
- Introduced `libreoffice_runtime.py` to encapsulate functions for probing and converting documents using LibreOffice. - Implemented functions to validate LibreOffice binaries, run conversions, and handle isolated runtime environments. - Enhanced test suite to cover scenarios for detecting and handling unusable LibreOffice installations. - Updated workspace bootstrap tests to rewrite LibreOffice runtime paths and validate conversion capabilities.
1 parent 4437fdb commit bfa9aa7

12 files changed

Lines changed: 1632 additions & 411 deletions

scripts/bootstrap-workspace.sh

Lines changed: 126 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -347,42 +347,78 @@ workspace_runtime_ready() {
347347
[[ -x "$ROOT/.venv/bin/python" && -f "$ROOT/runtime/bootstrap_state.json" ]]
348348
}
349349

350-
validated_soffice_binary() {
351-
local candidate="$1"
352-
local output_path=""
353-
local probe_pid=""
354-
local ticks=0
355-
356-
[[ -x "$candidate" ]] || return 1
357-
output_path="$(mktemp "${TMPDIR:-/tmp}/docmason-soffice-version.XXXXXX")" || return 1
350+
probe_libreoffice_validation() {
351+
local probe_python=""
352+
local shell_line=""
353+
local candidate=""
358354

359-
"$candidate" --version >"$output_path" 2>&1 &
360-
probe_pid="$!"
361-
while kill -0 "$probe_pid" >/dev/null 2>&1; do
362-
if (( ticks >= 150 )); then
363-
pkill -P "$probe_pid" >/dev/null 2>&1 || true
364-
kill "$probe_pid" >/dev/null 2>&1 || true
365-
sleep 0.1
366-
pkill -P "$probe_pid" >/dev/null 2>&1 || true
367-
kill -9 "$probe_pid" >/dev/null 2>&1 || true
368-
wait "$probe_pid" >/dev/null 2>&1 || true
369-
rm -f "$output_path"
370-
return 1
371-
fi
372-
sleep 0.1
373-
ticks=$((ticks + 1))
374-
done
375-
if ! wait "$probe_pid" >/dev/null 2>&1; then
376-
rm -f "$output_path"
377-
return 1
378-
fi
379-
if grep -qi "libreoffice" "$output_path"; then
380-
rm -f "$output_path"
381-
printf '%s\n' "$candidate"
355+
LIBREOFFICE_BINARY=""
356+
LIBREOFFICE_CANDIDATE_BINARY=""
357+
LIBREOFFICE_VALIDATION_DETAIL=""
358+
LIBREOFFICE_PROBE_CONTRACT=""
359+
LIBREOFFICE_DETECTED_BUT_UNUSABLE="0"
360+
LIBREOFFICE_VALIDATION_UNAVAILABLE="0"
361+
362+
probe_python="$(find_host_context_helper_python || true)"
363+
if [[ -z "$probe_python" ]]; then
364+
for candidate in \
365+
"$(command -v soffice 2>/dev/null || true)" \
366+
"$(command -v libreoffice 2>/dev/null || true)" \
367+
"/Applications/LibreOffice.app/Contents/MacOS/soffice"
368+
do
369+
[[ -n "$candidate" ]] || continue
370+
if [[ -e "$candidate" ]]; then
371+
LIBREOFFICE_CANDIDATE_BINARY="$candidate"
372+
LIBREOFFICE_VALIDATION_UNAVAILABLE="1"
373+
break
374+
fi
375+
done
376+
LIBREOFFICE_VALIDATION_DETAIL="No supported Python runtime was available to validate LibreOffice with the current smoke-probe contract."
382377
return 0
383378
fi
384-
rm -f "$output_path"
385-
return 1
379+
380+
while IFS= read -r shell_line; do
381+
eval "$shell_line"
382+
done < <(
383+
ROOT="$ROOT" PYTHONPATH="$ROOT/src${PYTHONPATH:+:$PYTHONPATH}" "$probe_python" - <<'PY'
384+
import os
385+
import shlex
386+
import sys
387+
388+
root = os.environ["ROOT"]
389+
sys.path.insert(0, os.path.join(root, "src"))
390+
391+
payload = {
392+
"ready": False,
393+
"binary": None,
394+
"detail": "",
395+
"probe_contract": "",
396+
"candidate_failures": [],
397+
}
398+
try:
399+
from docmason.libreoffice_runtime import LIBREOFFICE_PROBE_CONTRACT, validate_soffice_binary
400+
401+
payload = validate_soffice_binary(None)
402+
if not payload.get("probe_contract"):
403+
payload["probe_contract"] = LIBREOFFICE_PROBE_CONTRACT
404+
except Exception as exc: # pragma: no cover - launcher fallback path
405+
payload["detail"] = f"LibreOffice validation probe failed: {exc}"
406+
407+
408+
def emit(name: str, value: object) -> None:
409+
text = "" if value is None else str(value)
410+
print(f"{name}={shlex.quote(text)}")
411+
412+
413+
binary = payload.get("binary")
414+
ready = bool(payload.get("ready"))
415+
emit("LIBREOFFICE_BINARY", binary if ready else "")
416+
emit("LIBREOFFICE_CANDIDATE_BINARY", binary or "")
417+
emit("LIBREOFFICE_VALIDATION_DETAIL", payload.get("detail") or "")
418+
emit("LIBREOFFICE_PROBE_CONTRACT", payload.get("probe_contract") or "")
419+
emit("LIBREOFFICE_DETECTED_BUT_UNUSABLE", "1" if (not ready and binary) else "0")
420+
PY
421+
)
386422
}
387423

388424
scan_office_renderer_requirement() {
@@ -411,16 +447,21 @@ scan_office_renderer_requirement() {
411447

412448
probe_machine_baseline() {
413449
local platform_name=""
414-
local soffice_binary=""
415450
local missing=()
416-
local candidate=""
451+
local baseline_gap_detail=""
417452

418453
MACHINE_BASELINE_APPLICABLE=0
419454
MACHINE_BASELINE_READY=1
420455
MACHINE_BASELINE_STATUS="not-applicable"
421456
MACHINE_BASELINE_DETAIL="Native macOS machine-baseline policy is not active for this host surface."
457+
MACHINE_BASELINE_HOST_ACCESS_REASON=""
422458
BREW_BINARY=""
423459
LIBREOFFICE_BINARY=""
460+
LIBREOFFICE_CANDIDATE_BINARY=""
461+
LIBREOFFICE_VALIDATION_DETAIL=""
462+
LIBREOFFICE_PROBE_CONTRACT=""
463+
LIBREOFFICE_DETECTED_BUT_UNUSABLE="0"
464+
LIBREOFFICE_VALIDATION_UNAVAILABLE="0"
424465
OFFICE_RENDERER_REQUIRED=0
425466
MACHINE_BASELINE_MISSING_COMPONENTS=()
426467

@@ -434,19 +475,16 @@ probe_machine_baseline() {
434475
if command -v brew >/dev/null 2>&1; then
435476
BREW_BINARY="$(command -v brew)"
436477
fi
437-
for candidate in \
438-
"$(command -v soffice 2>/dev/null || true)" \
439-
"$(command -v libreoffice 2>/dev/null || true)" \
440-
"/Applications/LibreOffice.app/Contents/MacOS/soffice"
441-
do
442-
[[ -n "$candidate" ]] || continue
443-
soffice_binary="$(validated_soffice_binary "$candidate" || true)"
444-
[[ -n "$soffice_binary" ]] && break
445-
done
446-
LIBREOFFICE_BINARY="$soffice_binary"
478+
probe_libreoffice_validation
447479

448480
if [[ "$OFFICE_RENDERER_REQUIRED" == "1" && -z "$LIBREOFFICE_BINARY" ]]; then
449-
missing+=("LibreOffice")
481+
if [[ "$LIBREOFFICE_VALIDATION_UNAVAILABLE" == "1" ]]; then
482+
missing+=("LibreOffice")
483+
elif [[ "$LIBREOFFICE_DETECTED_BUT_UNUSABLE" == "1" ]]; then
484+
missing+=("LibreOffice (detected but unusable)")
485+
else
486+
missing+=("LibreOffice")
487+
fi
450488
fi
451489

452490
if (( ${#missing[@]} == 0 )); then
@@ -464,15 +502,39 @@ probe_machine_baseline() {
464502

465503
MACHINE_BASELINE_READY=0
466504
MACHINE_BASELINE_MISSING_COMPONENTS=("${missing[@]}")
505+
if [[ "$LIBREOFFICE_VALIDATION_UNAVAILABLE" == "1" ]]; then
506+
baseline_gap_detail="Native Codex machine baseline detected LibreOffice"
507+
if [[ -n "$LIBREOFFICE_CANDIDATE_BINARY" ]]; then
508+
baseline_gap_detail="$baseline_gap_detail at \`$LIBREOFFICE_CANDIDATE_BINARY\`"
509+
fi
510+
baseline_gap_detail="$baseline_gap_detail, but the current bootstrap path cannot execute the required smoke probe yet because no supported helper Python or bootstrap runtime is available."
511+
if [[ -n "$LIBREOFFICE_VALIDATION_DETAIL" ]]; then
512+
baseline_gap_detail="$baseline_gap_detail Validation detail: $LIBREOFFICE_VALIDATION_DETAIL"
513+
fi
514+
MACHINE_BASELINE_HOST_ACCESS_REASON="Native Codex machine baseline cannot yet validate LibreOffice for the current Office corpus because no supported helper Python or bootstrap runtime is available."
515+
elif [[ "$LIBREOFFICE_DETECTED_BUT_UNUSABLE" == "1" ]]; then
516+
baseline_gap_detail="Native Codex machine baseline detected LibreOffice"
517+
if [[ -n "$LIBREOFFICE_CANDIDATE_BINARY" ]]; then
518+
baseline_gap_detail="$baseline_gap_detail at \`$LIBREOFFICE_CANDIDATE_BINARY\`"
519+
fi
520+
baseline_gap_detail="$baseline_gap_detail, but it is not currently usable for the current Office corpus."
521+
if [[ -n "$LIBREOFFICE_VALIDATION_DETAIL" ]]; then
522+
baseline_gap_detail="$baseline_gap_detail Validation detail: $LIBREOFFICE_VALIDATION_DETAIL"
523+
fi
524+
MACHINE_BASELINE_HOST_ACCESS_REASON="Native Codex machine baseline detected LibreOffice, but it is not currently usable for the current Office corpus and needs machine-level repair."
525+
else
526+
baseline_gap_detail="Native Codex machine baseline is missing ${missing[*]} for the current Office corpus."
527+
MACHINE_BASELINE_HOST_ACCESS_REASON="Native Codex machine baseline is missing ${missing[*]} for the current Office corpus and needs machine-level installation."
528+
fi
467529
if [[ "$FULL_MACHINE_ACCESS" == "true" ]]; then
468530
MACHINE_BASELINE_STATUS="install-required"
469-
MACHINE_BASELINE_DETAIL="Native Codex machine baseline is missing ${missing[*]} for the current Office corpus."
531+
MACHINE_BASELINE_DETAIL="$baseline_gap_detail"
470532
else
471533
MACHINE_BASELINE_STATUS="host-access-upgrade-required"
472534
if [[ "$PERMISSION_MODE" == "default-permissions" ]]; then
473-
MACHINE_BASELINE_DETAIL="Native Codex machine baseline is missing ${missing[*]} for the current Office corpus, and the current thread is still in \`Default permissions\`."
535+
MACHINE_BASELINE_DETAIL="$baseline_gap_detail The current thread is still in \`Default permissions\`."
474536
else
475-
MACHINE_BASELINE_DETAIL="Native Codex machine baseline is missing ${missing[*]} for the current Office corpus, and the current turn does not expose \`Full access\` yet."
537+
MACHINE_BASELINE_DETAIL="$baseline_gap_detail The current turn does not expose \`Full access\` yet."
476538
fi
477539
fi
478540
}
@@ -507,6 +569,20 @@ emit_host_access_upgrade() {
507569
printf 'false,\n'
508570
fi
509571
printf ' "machine_baseline_status": "%s",\n' "$(json_escape "$MACHINE_BASELINE_STATUS")"
572+
printf ' "libreoffice_candidate_binary": '
573+
json_string_or_null "$LIBREOFFICE_CANDIDATE_BINARY"
574+
printf ',\n'
575+
printf ' "libreoffice_validation_detail": '
576+
json_string_or_null "$LIBREOFFICE_VALIDATION_DETAIL"
577+
printf ',\n'
578+
printf ' "libreoffice_probe_contract": '
579+
json_string_or_null "$LIBREOFFICE_PROBE_CONTRACT"
580+
printf ',\n'
581+
if [[ "$LIBREOFFICE_DETECTED_BUT_UNUSABLE" == "1" ]]; then
582+
printf ' "libreoffice_detected_but_unusable": true,\n'
583+
else
584+
printf ' "libreoffice_detected_but_unusable": false,\n'
585+
fi
510586
printf ' "bootstrap_source": '
511587
json_string_or_null "$BOOTSTRAP_SOURCE"
512588
printf ',\n'
@@ -584,7 +660,7 @@ probe_machine_baseline
584660
HOST_ACCESS_REASONS=()
585661
if [[ "$MACHINE_BASELINE_APPLICABLE" == "1" && "$MACHINE_BASELINE_READY" != "1" && "$FULL_MACHINE_ACCESS" != "true" ]]; then
586662
HOST_ACCESS_REASONS+=(
587-
"Native Codex machine baseline is missing ${MACHINE_BASELINE_MISSING_COMPONENTS[*]} and needs machine-level installation."
663+
"${MACHINE_BASELINE_HOST_ACCESS_REASON:-Native Codex machine baseline is not ready and needs machine-level repair.}"
588664
)
589665
fi
590666
if [[ "$WORKSPACE_RUNTIME_READY" != "1" ]]; then

skills/canonical/knowledge-base-sync/SKILL.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ If the agent cannot run local commands or inspect the resulting artifacts, stop
2929
- if `sync_status=waiting-shared-job`, treat the existing shared sync job as the legal owner and wait or retry rather than starting a second path
3030
- if `sync_status=action-required`, surface the blocker directly
3131
- when the blocker is missing sync capability, route the operator to `prepare`
32+
- when the blocker is a repairable Office machine-baseline gap and the host can provide `Full access`, continue through `docmason prepare --yes --json` and then resume the same sync task instead of stopping at a passive blocker report
3233
4. Treat successful `sync` as the deterministic truth-building path.
3334
- detect source changes
3435
- rebuild or reuse staged evidence
@@ -54,7 +55,7 @@ If the agent cannot run local commands or inspect the resulting artifacts, stop
5455
## Escalation Rules
5556

5657
- Do not invent a second approval surface. The public approval command is `docmason sync --yes`.
57-
- If Office rendering is required but unavailable, stop and return the concrete install step.
58+
- If Office rendering is required but unavailable, stop only when the governed `prepare` path still cannot repair or install LibreOffice honestly; otherwise continue through that repair path first.
5859
- If staged or hybrid follow-up work requires per-source editing, that bounded work may be parallelized, but the final rerun and final judgment remain on the main path.
5960
- Do not silently trigger this workflow from an ordinary answer path without surfacing the governed state transition.
6061

skills/canonical/workspace-bootstrap/SKILL.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,11 @@ If the agent cannot perform these capabilities, stop and explain that the enviro
4242
5. Run `docmason prepare --json --yes` when the launcher was not used, or when bootstrap needs an explicit rerun to repair or complete the repo-local environment.
4343
6. If `prepare` reports a degraded result, follow the reported next steps and rerun only the necessary deterministic command.
4444
7. Use `docs/setup/manual-workspace-recovery.md` only when the launcher or `prepare` still cannot finish honestly after the governed automatic path has already had enough access.
45-
8. If the corpus already contains PPTX, DOCX, or XLSX files and LibreOffice is missing:
45+
8. If the corpus already contains PPTX, DOCX, or XLSX files and LibreOffice is missing or detected but unusable:
4646
- LibreOffice is required only for an Office-rendering corpus; it is not a universal machine baseline dependency
4747
- on macOS with Homebrew already present, DocMason may use `brew install --cask libreoffice-still`
4848
- on macOS without Homebrew, DocMason should use the official LibreOffice installer path instead of trying to install Homebrew first
49+
- on macOS, if LibreOffice is already detected but fails the governed smoke probe, `prepare --yes` should treat that as a repair or reinstall case rather than as a fake ready state
4950
- on Linux, install LibreOffice with the distro package manager or the official packages, then ensure `soffice` is on `PATH`
5051
9. Run `docmason status --json` when you need to confirm the resulting workspace stage.
5152
10. Recommend `docmason sync --json` when source files are present and the user needs a usable knowledge base next.
@@ -59,7 +60,7 @@ If the agent cannot perform these capabilities, stop and explain that the enviro
5960
- If the platform or Python version is unsupported, stop and surface that blocker directly.
6061
- If `prepare` can only proceed through a higher-intrusion install step, explain it explicitly rather than hiding it inside automation.
6162
- If system-level installation requires additional permissions, request them when the current platform supports that flow; otherwise give the user the exact command or GUI step to run.
62-
- On native Codex/macOS, if the thread is still in `Default permissions` and higher access is required for downloads or machine-level setup, stop once with an explicit `Full access` upgrade instruction. Do not keep asking lower-level machine-inspection questions.
63+
- On native Codex/macOS, if the thread is still in `Default permissions` and higher access is required for downloads or machine-level setup, stop once with an explicit `Full access` upgrade instruction. After `Full access` is available, continue through the governed automatic repair or reinstall path instead of stopping at diagnosis alone.
6364
- For Claude Code or another compatibility host, keep the fallback wording short and host-generic; do not expand it into a second native bootstrap story.
6465
- Deterministic shell setup steps may run as background or main-agent commands, but the final environment judgment returns to the main agent.
6566

@@ -72,6 +73,7 @@ If the agent cannot perform these capabilities, stop and explain that the enviro
7273
- `prepare` bootstraps repo-local state only.
7374
- `./scripts/bootstrap-workspace.sh --yes` is the preferred zero-to-working launcher from a raw checkout because it can prepare `.venv` before the package is importable from the `src/` layout.
7475
- The launcher now performs governed preflight first, then probes bootstrap-Python liveness in bounded time and prefers repo-local candidates before shared ones.
76+
- The launcher and `prepare` now trust real LibreOffice smoke conversion, not only `soffice --version`, before declaring an Office-rendering machine baseline ready.
7577
- `runtime/bootstrap_state.json` is the cached ready marker that ordinary ask-time work should reuse.
7678
- The steady-state runtime is repo-local managed Python `3.13` under `.docmason/toolchain/python/`.
7779
- On the native Codex path, bootstrap should refresh repo-local skill shims under `.agents/skills/` rather than writing into `~/.codex/skills`.

0 commit comments

Comments
 (0)