def convert_file(
input_path: str | Path,
output_path: str | Path | None = None,
*,
input_format: str | None = None,
output_format: str | None = None,
instruction: str | None = None,
max_retries: int = 3,
model: str | None = None,
teacher_fn: TeacherFn | None = None,
) -> ConversionResult:
"""Convert ``input_path`` to the target format, optionally writing ``output_path``.
Formats are auto-detected from file extensions when not given explicitly. The
pipeline retries up to ``max_retries`` times with error feedback. ``teacher_fn``
can be injected to run the pipeline without the model (used in tests).
"""
input_path = Path(input_path)
if not input_path.exists():
raise FileNotFoundError(f"input file not found: {input_path}")
in_fmt = _detect_format(input_path, input_format, "input")
if output_format:
out_fmt = output_format.lower()
if out_fmt not in EXT:
raise ValueError(
f"Unsupported output_format {output_format!r}; one of {SUPPORTED_FORMATS}."
)
elif output_path is not None:
out_fmt = _detect_format(Path(output_path), None, "output")
else:
raise ValueError("Provide output_format=, or an output_path with a known extension.")
if teacher_fn is None:
teacher_fn = _default_teacher_fn(model)
envelope = extract_envelope(input_path, in_fmt)
envelope.pop("file_path", None) # never leak local paths
instruction = instruction or f"Convert this {in_fmt.upper()} to {out_fmt.upper()}."
out_suffix = EXT[out_fmt]
feedback: str | None = None
result = ConversionResult("", in_fmt, out_fmt, error="not_run")
for attempt in range(max_retries + 1):
tr = teacher_fn(envelope, instruction, out_fmt, feedback=feedback)
if not tr.ok:
result = ConversionResult("", in_fmt, out_fmt, script=tr.script,
retries=attempt, error="no_script")
feedback = "Your previous response had no <script> block. Output one."
continue
sr = run_script(tr.script, input_path, output_suffix=out_suffix)
if not sr.ok:
result = ConversionResult(sr.output_text, in_fmt, out_fmt, script=tr.script,
retries=attempt, error=sr.error_kind)
feedback = f"The script failed ({sr.error_kind}): {sr.stderr[:300]}. Fix it."
continue
out = sr.output_text
scores = {
"format_validity": format_validity(out, out_fmt),
"loadability": loadability(out, out_fmt),
}
accepted = scores["format_validity"] == 1.0 and scores["loadability"] == 1.0
result = ConversionResult(out, in_fmt, out_fmt, script=tr.script, scores=scores,
accepted=accepted, retries=attempt, error=None)
if accepted:
break
feedback = f"Output was not valid {out_fmt.upper()} (scores={scores}). Fix the script."
if output_path is not None and result.output_text:
output_path = Path(output_path)
output_path.write_text(result.output_text, encoding="utf-8")
result.output_path = output_path
return result