Files
mistral-ocr/mistral_ocr/commands/markdown.py
T
schihei 5e891ef461 Add comprehensive documentation and code comments
This commit adds extensive documentation to the Mistral OCR CLI project:

- Add API.md with detailed API response format documentation
- Add CHANGELOG.md to track version changes
- Add CONTRIBUTING.md with guidelines for contributors
- Enhance README.md with more detailed usage examples and troubleshooting
- Add proper docstrings to all Python modules and functions
- Update requirements.txt with development dependencies
- Improve setup.py with better metadata

These changes make the project more accessible to users and contributors.
2025-04-24 21:11:41 +02:00

58 lines
1.9 KiB
Python

import os
import sys
import tempfile
from pathlib import Path
from mistral_ocr.commands import process, convert
def run(args):
"""
Main entry point for the markdown command.
Processes a document with OCR and converts the results to Markdown in one step.
This is a convenience command that combines the functionality of the 'process'
and 'convert' commands.
Args:
args: Command line arguments parsed by argparse
Raises:
SystemExit: If an error occurs during processing or conversion
"""
# Ensure that if --images is true, include_image_base64 is also true
include_image_base64 = args.images
# If output file is specified, enable single file mode
if args.output_file:
args.single_file = True
# Create temporary file for JSON output if not specified
json_output_path = args.json_file
temp_file = None
if not json_output_path:
temp_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
json_output_path = temp_file.name
temp_file.close()
try:
# Step 1: Process the document
if args.file_or_url.startswith(("http://", "https://")):
process.process_url(args.file_or_url, json_output_path, include_image_base64)
else:
process.process_local_file(args.file_or_url, json_output_path, include_image_base64)
# Step 2: Convert the JSON to markdown
print("Converting JSON to Markdown...")
convert.convert_json_to_markdown(json_output_path, args)
except Exception as e:
print(f"Error processing and converting document: {e}", file=sys.stderr)
sys.exit(1)
finally:
# Clean up temporary file if we created one
if temp_file and not args.json_file:
try:
os.unlink(temp_file.name)
except:
pass