#!/usr/bin/env python3
from __future__ import annotations

import argparse
import os
import sys
from pathlib import Path
from urllib.parse import quote
from urllib.request import Request, urlopen


def iter_pdfs(paths: list[Path]) -> list[Path]:
    found: list[Path] = []
    for path in paths:
        if path.is_dir():
            found.extend(sorted(p for p in path.iterdir() if p.is_file() and p.suffix.lower() == ".pdf"))
        elif path.is_file() and path.suffix.lower() == ".pdf":
            found.append(path)
        else:
            print(f"skip: {path}", file=sys.stderr)
    return found


def upload(path: Path, *, endpoint: str, token: str) -> None:
    url = endpoint.rstrip("/") + "?filename=" + quote(path.name)
    data = path.read_bytes()
    request = Request(
        url,
        data=data,
        method="POST",
        headers={
            "Content-Type": "application/pdf",
            "X-Corpus-Upload-Token": token,
            "X-Corpus-Created-By": "local_uploader",
        },
    )
    with urlopen(request, timeout=300) as response:
        body = response.read().decode("utf-8", errors="replace")
        print(f"{path.name}: {response.status} {body}")


def main() -> int:
    parser = argparse.ArgumentParser(description="Upload local PDFs to corpus-pipeline staging.")
    parser.add_argument("paths", nargs="+", type=Path, help="PDF files or directories containing PDFs")
    parser.add_argument("--endpoint", default="https://corpus.phuyu.cloud/upload/pdf")
    parser.add_argument("--token", default=os.environ.get("CORPUS_UPLOAD_TOKEN", ""))
    args = parser.parse_args()

    if not args.token:
        parser.error("provide --token or set CORPUS_UPLOAD_TOKEN")

    pdfs = iter_pdfs(args.paths)
    if not pdfs:
        print("no PDFs found", file=sys.stderr)
        return 1

    failed = 0
    for pdf in pdfs:
        try:
            upload(pdf, endpoint=args.endpoint, token=args.token)
        except Exception as exc:  # noqa: BLE001 - CLI should continue through a batch.
            failed += 1
            print(f"{pdf.name}: failed: {exc}", file=sys.stderr)
    return 1 if failed else 0


if __name__ == "__main__":
    raise SystemExit(main())
