Add comprehensive documentation and code comments

This commit adds extensive documentation to the Mistral OCR CLI project:

- Add API.md with detailed API response format documentation
- Add CHANGELOG.md to track version changes
- Add CONTRIBUTING.md with guidelines for contributors
- Enhance README.md with more detailed usage examples and troubleshooting
- Add proper docstrings to all Python modules and functions
- Update requirements.txt with development dependencies
- Improve setup.py with better metadata

These changes make the project more accessible to users and contributors.
This commit is contained in:
2025-04-24 21:11:41 +02:00
parent 240d64023b
commit 5e891ef461
13 changed files with 786 additions and 15 deletions
+62
View File
@@ -5,6 +5,14 @@ import re
from pathlib import Path
def run(args):
"""
Main entry point for the convert command.
Converts OCR JSON results to Markdown format.
Args:
args: Command line arguments parsed by argparse
"""
# If output file is specified, enable single file mode
if args.output_file:
args.single_file = True
@@ -13,11 +21,28 @@ def run(args):
# OCR response structure classes
class OCRResponseImage:
"""
Represents an image in the OCR response.
Attributes:
id (str): Unique identifier for the image
image_base64 (str): Base64-encoded image data
"""
def __init__(self, id, image_base64):
self.id = id
self.image_base64 = image_base64
class OCRResponsePage:
"""
Represents a page in the OCR response.
Attributes:
index (int): Zero-based page index
markdown (str): Extracted text content in Markdown format
image (str, optional): Main page image (if available)
images (list): List of OCRResponseImage objects
dimensions (dict, optional): Page dimensions
"""
def __init__(self, index, markdown, image=None, images=None, dimensions=None):
self.index = index
self.markdown = markdown
@@ -26,6 +51,15 @@ class OCRResponsePage:
self.dimensions = dimensions
class OCRResponseMetadata:
"""
Represents metadata in the OCR response.
Attributes:
title (str, optional): Document title
author (str, optional): Document author
creation_date (str, optional): Document creation date
page_count (int, optional): Total number of pages
"""
def __init__(self, title=None, author=None, creation_date=None, page_count=None):
self.title = title
self.author = author
@@ -33,11 +67,29 @@ class OCRResponseMetadata:
self.page_count = page_count
class OCRResponse:
"""
Represents the complete OCR response.
Attributes:
pages (list): List of OCRResponsePage objects
metadata (OCRResponseMetadata): Document metadata
"""
def __init__(self, pages=None, metadata=None):
self.pages = pages or []
self.metadata = metadata or OCRResponseMetadata()
def replace_image_references(content, images, include_images):
"""
Replace image references in markdown content with base64 data.
Args:
content (str): Markdown content with image references
images (list): List of OCRResponseImage objects
include_images (bool): Whether to include images in the output
Returns:
str: Markdown content with image references replaced with base64 data
"""
if not include_images or not images:
return content
@@ -62,6 +114,16 @@ def replace_image_references(content, images, include_images):
return content
def convert_json_to_markdown(json_file, args):
"""
Convert OCR JSON results to Markdown format.
Args:
json_file (str): Path to the JSON file containing OCR results
args: Command line arguments containing conversion options
Raises:
SystemExit: If an error occurs during conversion
"""
try:
# Read JSON file
with open(json_file, 'r', encoding='utf-8') as f: