import os import json import sys from pathlib import Path import urllib.parse from mistral_ocr.client import MistralClient def run(args): """ Main entry point for the process command. Processes a document with OCR, either from a URL or a local file. Args: args: Command line arguments parsed by argparse """ file_path = args.file # Determine if input is a URL or a local file if file_path.startswith(("http://", "https://")): process_url(file_path, args.output_file, args.include_images) else: process_local_file(file_path, args.output_file, args.include_images) def process_url(url, output_file, include_image_base64): """ Process a document from a URL. Args: url (str): URL of the document to process output_file (str): Path to save the OCR results, or None for stdout include_image_base64 (bool): Whether to include base64-encoded images in the output Raises: SystemExit: If an error occurs during processing """ try: client = MistralClient() # Determine the document type based on URL doc_type = "document_url" url_lower = url.lower() if any(url_lower.endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]): doc_type = "image_url" # Process the document resp_data = client.process_ocr(doc_type, url, include_image_base64) # Handle the output handle_output(resp_data, output_file) except Exception as e: print(f"Error processing document: {e}", file=sys.stderr) sys.exit(1) def process_local_file(file_path, output_file, include_image_base64): """ Process a local document file. Args: file_path (str): Path to the local file to process output_file (str): Path to save the OCR results, or None for stdout include_image_base64 (bool): Whether to include base64-encoded images in the output Raises: SystemExit: If an error occurs during processing """ try: print(f"Processing local file: {file_path}") # Check if file exists if not os.path.exists(file_path): print(f"Error: file '{file_path}' does not exist", file=sys.stderr) sys.exit(1) client = MistralClient() # Upload the file to Mistral API file_id = client.upload_file(file_path) print(f"File uploaded successfully with ID: {file_id}") # Get the signed file URL for processing file_url = client.get_file_url(file_id) # Determine the document type based on file extension doc_type = "document_url" file_path_lower = file_path.lower() if any(file_path_lower.endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]): doc_type = "image_url" print(f"Processing with signed file URL (type: {doc_type})") # Process the uploaded file with the appropriate type resp_data = client.process_ocr(doc_type, file_url, include_image_base64) # Handle the output handle_output(resp_data, output_file) except Exception as e: print(f"Error processing document: {e}", file=sys.stderr) sys.exit(1) def handle_output(data, output_file): """ Handle the OCR response output. Args: data (bytes): JSON response data from the OCR API output_file (str): Path to save the OCR results, or None for stdout """ # Pretty print the JSON response pretty_json = json.dumps(json.loads(data), indent=2) # Write to output file or stdout if output_file: # Create directory if it doesn't exist output_path = Path(output_file) output_path.parent.mkdir(parents=True, exist_ok=True) # Write the file with open(output_file, 'w', encoding='utf-8') as f: f.write(pretty_json) print(f"OCR results saved to {output_file}") else: # Write to stdout print(pretty_json)