Add comprehensive documentation and code comments
This commit adds extensive documentation to the Mistral OCR CLI project: - Add API.md with detailed API response format documentation - Add CHANGELOG.md to track version changes - Add CONTRIBUTING.md with guidelines for contributors - Enhance README.md with more detailed usage examples and troubleshooting - Add proper docstrings to all Python modules and functions - Update requirements.txt with development dependencies - Improve setup.py with better metadata These changes make the project more accessible to users and contributors.
This commit is contained in:
@@ -5,6 +5,14 @@ import re
|
||||
from pathlib import Path
|
||||
|
||||
def run(args):
|
||||
"""
|
||||
Main entry point for the convert command.
|
||||
|
||||
Converts OCR JSON results to Markdown format.
|
||||
|
||||
Args:
|
||||
args: Command line arguments parsed by argparse
|
||||
"""
|
||||
# If output file is specified, enable single file mode
|
||||
if args.output_file:
|
||||
args.single_file = True
|
||||
@@ -13,11 +21,28 @@ def run(args):
|
||||
|
||||
# OCR response structure classes
|
||||
class OCRResponseImage:
|
||||
"""
|
||||
Represents an image in the OCR response.
|
||||
|
||||
Attributes:
|
||||
id (str): Unique identifier for the image
|
||||
image_base64 (str): Base64-encoded image data
|
||||
"""
|
||||
def __init__(self, id, image_base64):
|
||||
self.id = id
|
||||
self.image_base64 = image_base64
|
||||
|
||||
class OCRResponsePage:
|
||||
"""
|
||||
Represents a page in the OCR response.
|
||||
|
||||
Attributes:
|
||||
index (int): Zero-based page index
|
||||
markdown (str): Extracted text content in Markdown format
|
||||
image (str, optional): Main page image (if available)
|
||||
images (list): List of OCRResponseImage objects
|
||||
dimensions (dict, optional): Page dimensions
|
||||
"""
|
||||
def __init__(self, index, markdown, image=None, images=None, dimensions=None):
|
||||
self.index = index
|
||||
self.markdown = markdown
|
||||
@@ -26,6 +51,15 @@ class OCRResponsePage:
|
||||
self.dimensions = dimensions
|
||||
|
||||
class OCRResponseMetadata:
|
||||
"""
|
||||
Represents metadata in the OCR response.
|
||||
|
||||
Attributes:
|
||||
title (str, optional): Document title
|
||||
author (str, optional): Document author
|
||||
creation_date (str, optional): Document creation date
|
||||
page_count (int, optional): Total number of pages
|
||||
"""
|
||||
def __init__(self, title=None, author=None, creation_date=None, page_count=None):
|
||||
self.title = title
|
||||
self.author = author
|
||||
@@ -33,11 +67,29 @@ class OCRResponseMetadata:
|
||||
self.page_count = page_count
|
||||
|
||||
class OCRResponse:
|
||||
"""
|
||||
Represents the complete OCR response.
|
||||
|
||||
Attributes:
|
||||
pages (list): List of OCRResponsePage objects
|
||||
metadata (OCRResponseMetadata): Document metadata
|
||||
"""
|
||||
def __init__(self, pages=None, metadata=None):
|
||||
self.pages = pages or []
|
||||
self.metadata = metadata or OCRResponseMetadata()
|
||||
|
||||
def replace_image_references(content, images, include_images):
|
||||
"""
|
||||
Replace image references in markdown content with base64 data.
|
||||
|
||||
Args:
|
||||
content (str): Markdown content with image references
|
||||
images (list): List of OCRResponseImage objects
|
||||
include_images (bool): Whether to include images in the output
|
||||
|
||||
Returns:
|
||||
str: Markdown content with image references replaced with base64 data
|
||||
"""
|
||||
if not include_images or not images:
|
||||
return content
|
||||
|
||||
@@ -62,6 +114,16 @@ def replace_image_references(content, images, include_images):
|
||||
return content
|
||||
|
||||
def convert_json_to_markdown(json_file, args):
|
||||
"""
|
||||
Convert OCR JSON results to Markdown format.
|
||||
|
||||
Args:
|
||||
json_file (str): Path to the JSON file containing OCR results
|
||||
args: Command line arguments containing conversion options
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs during conversion
|
||||
"""
|
||||
try:
|
||||
# Read JSON file
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
|
||||
@@ -5,6 +5,19 @@ from pathlib import Path
|
||||
from mistral_ocr.commands import process, convert
|
||||
|
||||
def run(args):
|
||||
"""
|
||||
Main entry point for the markdown command.
|
||||
|
||||
Processes a document with OCR and converts the results to Markdown in one step.
|
||||
This is a convenience command that combines the functionality of the 'process'
|
||||
and 'convert' commands.
|
||||
|
||||
Args:
|
||||
args: Command line arguments parsed by argparse
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs during processing or conversion
|
||||
"""
|
||||
# Ensure that if --images is true, include_image_base64 is also true
|
||||
include_image_base64 = args.images
|
||||
|
||||
|
||||
@@ -6,6 +6,14 @@ import urllib.parse
|
||||
from mistral_ocr.client import MistralClient
|
||||
|
||||
def run(args):
|
||||
"""
|
||||
Main entry point for the process command.
|
||||
|
||||
Processes a document with OCR, either from a URL or a local file.
|
||||
|
||||
Args:
|
||||
args: Command line arguments parsed by argparse
|
||||
"""
|
||||
file_path = args.file
|
||||
|
||||
# Determine if input is a URL or a local file
|
||||
@@ -15,6 +23,17 @@ def run(args):
|
||||
process_local_file(file_path, args.output_file, args.include_images)
|
||||
|
||||
def process_url(url, output_file, include_image_base64):
|
||||
"""
|
||||
Process a document from a URL.
|
||||
|
||||
Args:
|
||||
url (str): URL of the document to process
|
||||
output_file (str): Path to save the OCR results, or None for stdout
|
||||
include_image_base64 (bool): Whether to include base64-encoded images in the output
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs during processing
|
||||
"""
|
||||
try:
|
||||
client = MistralClient()
|
||||
|
||||
@@ -35,6 +54,17 @@ def process_url(url, output_file, include_image_base64):
|
||||
sys.exit(1)
|
||||
|
||||
def process_local_file(file_path, output_file, include_image_base64):
|
||||
"""
|
||||
Process a local document file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the local file to process
|
||||
output_file (str): Path to save the OCR results, or None for stdout
|
||||
include_image_base64 (bool): Whether to include base64-encoded images in the output
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs during processing
|
||||
"""
|
||||
try:
|
||||
print(f"Processing local file: {file_path}")
|
||||
|
||||
@@ -71,6 +101,13 @@ def process_local_file(file_path, output_file, include_image_base64):
|
||||
sys.exit(1)
|
||||
|
||||
def handle_output(data, output_file):
|
||||
"""
|
||||
Handle the OCR response output.
|
||||
|
||||
Args:
|
||||
data (bytes): JSON response data from the OCR API
|
||||
output_file (str): Path to save the OCR results, or None for stdout
|
||||
"""
|
||||
# Pretty print the JSON response
|
||||
pretty_json = json.dumps(json.loads(data), indent=2)
|
||||
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import sys
|
||||
|
||||
VERSION = "0.1.0"
|
||||
from mistral_ocr import __version__
|
||||
|
||||
def run(args):
|
||||
print(f"Mistral OCR CLI v{VERSION}")
|
||||
"""
|
||||
Main entry point for the version command.
|
||||
|
||||
Prints the current version of the Mistral OCR CLI.
|
||||
|
||||
Args:
|
||||
args: Command line arguments parsed by argparse (not used)
|
||||
"""
|
||||
print(f"Mistral OCR CLI v{__version__}")
|
||||
|
||||
Reference in New Issue
Block a user