Files
mistral-ocr/mistral_ocr/__main__.py
T
schihei 5e891ef461 Add comprehensive documentation and code comments
This commit adds extensive documentation to the Mistral OCR CLI project:

- Add API.md with detailed API response format documentation
- Add CHANGELOG.md to track version changes
- Add CONTRIBUTING.md with guidelines for contributors
- Enhance README.md with more detailed usage examples and troubleshooting
- Add proper docstrings to all Python modules and functions
- Update requirements.txt with development dependencies
- Improve setup.py with better metadata

These changes make the project more accessible to users and contributors.
2025-04-24 21:11:41 +02:00

76 lines
3.8 KiB
Python

import sys
import argparse
import os
from mistral_ocr.commands import process, convert, markdown, version
def main():
"""
Main entry point for the Mistral OCR CLI.
Parses command line arguments and dispatches to the appropriate command handler.
Returns:
int: Exit code (0 for success, 1 for error)
"""
parser = argparse.ArgumentParser(
description="A CLI tool for performing OCR on documents using Mistral AI.",
prog="mistral-ocr"
)
parser.add_argument("--api-key", help="Mistral API key (defaults to MISTRAL_API_KEY env variable)")
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
# Process command
process_parser = subparsers.add_parser("process", help="Process a document with OCR")
process_parser.add_argument("file", help="File path or URL to process")
process_parser.add_argument("-o", "--output-file", help="Output JSON file path (default is stdout)")
process_parser.add_argument("--include-images", action="store_true", help="Include base64 encoded images in the output")
# Convert command
convert_parser = subparsers.add_parser("convert", help="Convert OCR JSON output to Markdown")
convert_parser.add_argument("json_file", help="JSON file to convert")
convert_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files")
convert_parser.add_argument("-o", "--output-file", help="Output filename for single file mode (default: document.md)")
convert_parser.add_argument("--images", action="store_true", help="Include images in markdown (if available)")
convert_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages")
convert_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title")
convert_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page")
# Markdown command
markdown_parser = subparsers.add_parser("markdown", help="Process document and convert to markdown in one step")
markdown_parser.add_argument("file_or_url", help="File path or URL to process")
markdown_parser.add_argument("-j", "--json-file", help="Save intermediate JSON to file (optional)")
markdown_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files")
markdown_parser.add_argument("-o", "--output-file", help="Path for output markdown file (implies --single-file)")
markdown_parser.add_argument("--images", action="store_true", help="Include extracted images in markdown (if available)")
markdown_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages")
markdown_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title")
markdown_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page")
# Version command
subparsers.add_parser("version", help="Print the version number")
args = parser.parse_args()
# Set API key from args or environment
if args.api_key:
os.environ["MISTRAL_API_KEY"] = args.api_key
# Execute the appropriate command
if args.command == "process":
process.run(args)
elif args.command == "convert":
convert.run(args)
elif args.command == "markdown":
markdown.run(args)
elif args.command == "version":
version.run(args)
else:
parser.print_help()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())