Skip to content

API reference

The public surface is small: one function, its result type, and the model resolver.

convert_file

datamorph.convert.convert_file(input_path, output_path=None, *, input_format=None, output_format=None, instruction=None, max_retries=3, model=None, teacher_fn=None)

Convert input_path to the target format, optionally writing output_path.

Formats are auto-detected from file extensions when not given explicitly. The pipeline retries up to max_retries times with error feedback. teacher_fn can be injected to run the pipeline without the model (used in tests).

Source code in datamorph/convert.py
def convert_file(
    input_path: str | Path,
    output_path: str | Path | None = None,
    *,
    input_format: str | None = None,
    output_format: str | None = None,
    instruction: str | None = None,
    max_retries: int = 3,
    model: str | None = None,
    teacher_fn: TeacherFn | None = None,
) -> ConversionResult:
    """Convert ``input_path`` to the target format, optionally writing ``output_path``.

    Formats are auto-detected from file extensions when not given explicitly. The
    pipeline retries up to ``max_retries`` times with error feedback. ``teacher_fn``
    can be injected to run the pipeline without the model (used in tests).
    """
    input_path = Path(input_path)
    if not input_path.exists():
        raise FileNotFoundError(f"input file not found: {input_path}")

    in_fmt = _detect_format(input_path, input_format, "input")
    if output_format:
        out_fmt = output_format.lower()
        if out_fmt not in EXT:
            raise ValueError(
                f"Unsupported output_format {output_format!r}; one of {SUPPORTED_FORMATS}."
            )
    elif output_path is not None:
        out_fmt = _detect_format(Path(output_path), None, "output")
    else:
        raise ValueError("Provide output_format=, or an output_path with a known extension.")

    if teacher_fn is None:
        teacher_fn = _default_teacher_fn(model)

    envelope = extract_envelope(input_path, in_fmt)
    envelope.pop("file_path", None)  # never leak local paths
    instruction = instruction or f"Convert this {in_fmt.upper()} to {out_fmt.upper()}."
    out_suffix = EXT[out_fmt]

    feedback: str | None = None
    result = ConversionResult("", in_fmt, out_fmt, error="not_run")
    for attempt in range(max_retries + 1):
        tr = teacher_fn(envelope, instruction, out_fmt, feedback=feedback)
        if not tr.ok:
            result = ConversionResult("", in_fmt, out_fmt, script=tr.script,
                                      retries=attempt, error="no_script")
            feedback = "Your previous response had no <script> block. Output one."
            continue
        sr = run_script(tr.script, input_path, output_suffix=out_suffix)
        if not sr.ok:
            result = ConversionResult(sr.output_text, in_fmt, out_fmt, script=tr.script,
                                      retries=attempt, error=sr.error_kind)
            feedback = f"The script failed ({sr.error_kind}): {sr.stderr[:300]}. Fix it."
            continue
        out = sr.output_text
        scores = {
            "format_validity": format_validity(out, out_fmt),
            "loadability": loadability(out, out_fmt),
        }
        accepted = scores["format_validity"] == 1.0 and scores["loadability"] == 1.0
        result = ConversionResult(out, in_fmt, out_fmt, script=tr.script, scores=scores,
                                  accepted=accepted, retries=attempt, error=None)
        if accepted:
            break
        feedback = f"Output was not valid {out_fmt.upper()} (scores={scores}). Fix the script."

    if output_path is not None and result.output_text:
        output_path = Path(output_path)
        output_path.write_text(result.output_text, encoding="utf-8")
        result.output_path = output_path
    return result

ConversionResult

datamorph.convert.ConversionResult dataclass

Outcome of a single convert_file call.

Source code in datamorph/convert.py
@dataclass
class ConversionResult:
    """Outcome of a single ``convert_file`` call."""

    output_text: str
    input_format: str
    output_format: str
    script: str = ""
    scores: dict[str, float] = field(default_factory=dict)
    accepted: bool = False
    retries: int = 0
    error: str | None = None
    output_path: Path | None = None

resolve_model

datamorph.model.resolve_model(model=None)

Return a local path to the MLX model to load (downloading it if needed).

Source code in datamorph/model.py
def resolve_model(model: str | os.PathLike[str] | None = None) -> str:
    """Return a local path to the MLX model to load (downloading it if needed)."""
    candidate = model or os.environ.get(ENV_VAR)
    if candidate:
        candidate = str(candidate)
        path = Path(candidate).expanduser()
        if path.exists():
            return str(path)
        if _looks_like_repo_id(candidate):
            return _download(candidate)
        raise FileNotFoundError(
            f"Model path not found: {path}. Pass a local directory, a Hugging Face "
            f"repo id, or set ${ENV_VAR}."
        )

    if DEFAULT_MODEL_DIR.exists():
        return str(DEFAULT_MODEL_DIR)

    # Nothing local — fall back to the published model on the Hub.
    return _download(DEFAULT_HF_REPO)