Add comprehensive documentation and code comments

This commit adds extensive documentation to the Mistral OCR CLI project: - Add API.md with detailed API response format documentation - Add CHANGELOG.md to track version changes - Add CONTRIBUTING.md with guidelines for contributors - Enhance README.md with more detailed usage examples and troubleshooting - Add proper docstrings to all Python modules and functions - Update requirements.txt with development dependencies - Improve setup.py with better metadata These changes make the project more accessible to users and contributors.
2025-04-24 21:11:41 +02:00
parent 240d64023b
commit 5e891ef461
13 changed files with 786 additions and 15 deletions
@@ -1 +1,17 @@
-# Mistral OCR Python Package
+"""
+Mistral OCR Python Package
+
+A command-line tool for processing documents with Mistral AI's OCR capabilities.
+This package provides functionality for extracting text and structured content
+from PDF documents and images while preserving the original formatting and layout.
+
+Main components:
+- Client for interacting with the Mistral AI OCR API
+- Commands for processing documents, converting results to Markdown, and more
+- Utilities for handling file operations and formatting
+
+For usage information, see the README.md file or run:
+    mistral-ocr --help
+"""
+
+__version__ = "0.1.0"
@@ -4,6 +4,14 @@ import os
 from mistral_ocr.commands import process, convert, markdown, version

 def main():
+    """
+    Main entry point for the Mistral OCR CLI.
+    
+    Parses command line arguments and dispatches to the appropriate command handler.
+    
+    Returns:
+        int: Exit code (0 for success, 1 for error)
+    """
    parser = argparse.ArgumentParser(
        description="A CLI tool for performing OCR on documents using Mistral AI.",
        prog="mistral-ocr"
@@ -5,10 +5,33 @@ import requests
 from typing import Optional, Dict, Any, Tuple

 class MistralClient:
+    """
+    Client for interacting with the Mistral AI OCR API.
+    
+    This client handles authentication, file uploads, and OCR processing
+    requests to the Mistral AI API.
+    
+    Attributes:
+        BASE_URL (str): Base URL for the Mistral AI API
+        MAX_FILE_SIZE (int): Maximum allowed file size in bytes (52 MB)
+        api_key (str): Mistral AI API key for authentication
+        session (requests.Session): Session object for making HTTP requests
+    """
+    
    BASE_URL = "https://api.mistral.ai/v1"
    MAX_FILE_SIZE = 52 * 1024 * 1024  # 52 MB
    
    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the Mistral AI client.
+        
+        Args:
+            api_key (Optional[str]): Mistral AI API key. If not provided,
+                                    will look for MISTRAL_API_KEY environment variable.
+                                    
+        Raises:
+            ValueError: If no API key is provided or found in environment variables.
+        """
        self.api_key = api_key or os.environ.get("MISTRAL_API_KEY")
        if not self.api_key:
            raise ValueError("API key must be provided or set as MISTRAL_API_KEY environment variable")
@@ -20,7 +43,19 @@ class MistralClient:
        })
    
    def upload_file(self, file_path: str) -> str:
-        """Upload a file to Mistral API for OCR processing."""
+        """
+        Upload a file to Mistral API for OCR processing.
+        
+        Args:
+            file_path (str): Path to the local file to upload
+            
+        Returns:
+            str: File ID returned by the API
+            
+        Raises:
+            ValueError: If the file is too large or if the upload fails
+            requests.RequestException: If there's an error communicating with the API
+        """
        # Check file size
        file_size = os.path.getsize(file_path)
        if file_size > self.MAX_FILE_SIZE:
@@ -72,7 +107,19 @@ class MistralClient:
        raise last_error or ValueError(f"Failed to upload file after {max_retries} attempts")
    
    def get_file_url(self, file_id: str) -> str:
-        """Get a signed URL for an uploaded file."""
+        """
+        Get a signed URL for an uploaded file.
+        
+        Args:
+            file_id (str): ID of the file previously uploaded to the API
+            
+        Returns:
+            str: Signed URL that can be used for OCR processing
+            
+        Raises:
+            ValueError: If the API response does not contain a URL
+            requests.RequestException: If there's an error communicating with the API
+        """
        response = self.session.get(f"{self.BASE_URL}/files/{file_id}/url?expiry=24")
        response.raise_for_status()
        
@@ -85,7 +132,22 @@ class MistralClient:
        return url
    
    def process_ocr(self, doc_type: str, doc_source: str, include_image_base64: bool = False) -> bytes:
-        """Process a document with OCR."""
+        """
+        Process a document with OCR.
+        
+        Args:
+            doc_type (str): Type of document, either "document_url" or "image_url"
+            doc_source (str): URL of the document to process
+            include_image_base64 (bool, optional): Whether to include base64-encoded 
+                                                  images in the response. Defaults to False.
+            
+        Returns:
+            bytes: JSON response from the API containing OCR results
+            
+        Raises:
+            ValueError: If the document type is unsupported or if processing fails
+            requests.RequestException: If there's an error communicating with the API
+        """
        if doc_type not in ["document_url", "image_url"]:
            raise ValueError(f"Unsupported document type: {doc_type}")
        
@@ -5,6 +5,14 @@ import re
 from pathlib import Path

 def run(args):
+    """
+    Main entry point for the convert command.
+    
+    Converts OCR JSON results to Markdown format.
+    
+    Args:
+        args: Command line arguments parsed by argparse
+    """
    # If output file is specified, enable single file mode
    if args.output_file:
        args.single_file = True
@@ -13,11 +21,28 @@ def run(args):

 # OCR response structure classes
 class OCRResponseImage:
+    """
+    Represents an image in the OCR response.
+    
+    Attributes:
+        id (str): Unique identifier for the image
+        image_base64 (str): Base64-encoded image data
+    """
    def __init__(self, id, image_base64):
        self.id = id
        self.image_base64 = image_base64

 class OCRResponsePage:
+    """
+    Represents a page in the OCR response.
+    
+    Attributes:
+        index (int): Zero-based page index
+        markdown (str): Extracted text content in Markdown format
+        image (str, optional): Main page image (if available)
+        images (list): List of OCRResponseImage objects
+        dimensions (dict, optional): Page dimensions
+    """
    def __init__(self, index, markdown, image=None, images=None, dimensions=None):
        self.index = index
        self.markdown = markdown
@@ -26,6 +51,15 @@ class OCRResponsePage:
        self.dimensions = dimensions

 class OCRResponseMetadata:
+    """
+    Represents metadata in the OCR response.
+    
+    Attributes:
+        title (str, optional): Document title
+        author (str, optional): Document author
+        creation_date (str, optional): Document creation date
+        page_count (int, optional): Total number of pages
+    """
    def __init__(self, title=None, author=None, creation_date=None, page_count=None):
        self.title = title
        self.author = author
@@ -33,11 +67,29 @@ class OCRResponseMetadata:
        self.page_count = page_count

 class OCRResponse:
+    """
+    Represents the complete OCR response.
+    
+    Attributes:
+        pages (list): List of OCRResponsePage objects
+        metadata (OCRResponseMetadata): Document metadata
+    """
    def __init__(self, pages=None, metadata=None):
        self.pages = pages or []
        self.metadata = metadata or OCRResponseMetadata()

 def replace_image_references(content, images, include_images):
+    """
+    Replace image references in markdown content with base64 data.
+    
+    Args:
+        content (str): Markdown content with image references
+        images (list): List of OCRResponseImage objects
+        include_images (bool): Whether to include images in the output
+        
+    Returns:
+        str: Markdown content with image references replaced with base64 data
+    """
    if not include_images or not images:
        return content
    
@@ -62,6 +114,16 @@ def replace_image_references(content, images, include_images):
    return content

 def convert_json_to_markdown(json_file, args):
+    """
+    Convert OCR JSON results to Markdown format.
+    
+    Args:
+        json_file (str): Path to the JSON file containing OCR results
+        args: Command line arguments containing conversion options
+        
+    Raises:
+        SystemExit: If an error occurs during conversion
+    """
    try:
        # Read JSON file
        with open(json_file, 'r', encoding='utf-8') as f:
@@ -5,6 +5,19 @@ from pathlib import Path
 from mistral_ocr.commands import process, convert

 def run(args):
+    """
+    Main entry point for the markdown command.
+    
+    Processes a document with OCR and converts the results to Markdown in one step.
+    This is a convenience command that combines the functionality of the 'process'
+    and 'convert' commands.
+    
+    Args:
+        args: Command line arguments parsed by argparse
+        
+    Raises:
+        SystemExit: If an error occurs during processing or conversion
+    """
    # Ensure that if --images is true, include_image_base64 is also true
    include_image_base64 = args.images
    
@@ -6,6 +6,14 @@ import urllib.parse
 from mistral_ocr.client import MistralClient

 def run(args):
+    """
+    Main entry point for the process command.
+    
+    Processes a document with OCR, either from a URL or a local file.
+    
+    Args:
+        args: Command line arguments parsed by argparse
+    """
    file_path = args.file
    
    # Determine if input is a URL or a local file
@@ -15,6 +23,17 @@ def run(args):
        process_local_file(file_path, args.output_file, args.include_images)

 def process_url(url, output_file, include_image_base64):
+    """
+    Process a document from a URL.
+    
+    Args:
+        url (str): URL of the document to process
+        output_file (str): Path to save the OCR results, or None for stdout
+        include_image_base64 (bool): Whether to include base64-encoded images in the output
+        
+    Raises:
+        SystemExit: If an error occurs during processing
+    """
    try:
        client = MistralClient()
        
@@ -35,6 +54,17 @@ def process_url(url, output_file, include_image_base64):
        sys.exit(1)

 def process_local_file(file_path, output_file, include_image_base64):
+    """
+    Process a local document file.
+    
+    Args:
+        file_path (str): Path to the local file to process
+        output_file (str): Path to save the OCR results, or None for stdout
+        include_image_base64 (bool): Whether to include base64-encoded images in the output
+        
+    Raises:
+        SystemExit: If an error occurs during processing
+    """
    try:
        print(f"Processing local file: {file_path}")
        
@@ -71,6 +101,13 @@ def process_local_file(file_path, output_file, include_image_base64):
        sys.exit(1)

 def handle_output(data, output_file):
+    """
+    Handle the OCR response output.
+    
+    Args:
+        data (bytes): JSON response data from the OCR API
+        output_file (str): Path to save the OCR results, or None for stdout
+    """
    # Pretty print the JSON response
    pretty_json = json.dumps(json.loads(data), indent=2)
    
@@ -1,6 +1,13 @@
 import sys
-
-VERSION = "0.1.0"
+from mistral_ocr import __version__

 def run(args):
-    print(f"Mistral OCR CLI v{VERSION}")
+    """
+    Main entry point for the version command.
+    
+    Prints the current version of the Mistral OCR CLI.
+    
+    Args:
+        args: Command line arguments parsed by argparse (not used)
+    """
+    print(f"Mistral OCR CLI v{__version__}")