import sys import argparse import os from mistral_ocr.commands import process, convert, markdown, version def main(): parser = argparse.ArgumentParser( description="A CLI tool for performing OCR on documents using Mistral AI.", prog="mistral-ocr" ) parser.add_argument("--api-key", help="Mistral API key (defaults to MISTRAL_API_KEY env variable)") subparsers = parser.add_subparsers(dest="command", help="Command to execute") # Process command process_parser = subparsers.add_parser("process", help="Process a document with OCR") process_parser.add_argument("file", help="File path or URL to process") process_parser.add_argument("-o", "--output-file", help="Output JSON file path (default is stdout)") process_parser.add_argument("--include-images", action="store_true", help="Include base64 encoded images in the output") # Convert command convert_parser = subparsers.add_parser("convert", help="Convert OCR JSON output to Markdown") convert_parser.add_argument("json_file", help="JSON file to convert") convert_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files") convert_parser.add_argument("-o", "--output-file", help="Output filename for single file mode (default: document.md)") convert_parser.add_argument("--images", action="store_true", help="Include images in markdown (if available)") convert_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages") convert_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title") convert_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page") # Markdown command markdown_parser = subparsers.add_parser("markdown", help="Process document and convert to markdown in one step") markdown_parser.add_argument("file_or_url", help="File path or URL to process") markdown_parser.add_argument("-j", "--json-file", help="Save intermediate JSON to file (optional)") markdown_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files") markdown_parser.add_argument("-o", "--output-file", help="Path for output markdown file (implies --single-file)") markdown_parser.add_argument("--images", action="store_true", help="Include extracted images in markdown (if available)") markdown_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages") markdown_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title") markdown_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page") # Version command subparsers.add_parser("version", help="Print the version number") args = parser.parse_args() # Set API key from args or environment if args.api_key: os.environ["MISTRAL_API_KEY"] = args.api_key # Execute the appropriate command if args.command == "process": process.run(args) elif args.command == "convert": convert.run(args) elif args.command == "markdown": markdown.run(args) elif args.command == "version": version.run(args) else: parser.print_help() return 1 return 0 if __name__ == "__main__": sys.exit(main())