5e891ef461
This commit adds extensive documentation to the Mistral OCR CLI project: - Add API.md with detailed API response format documentation - Add CHANGELOG.md to track version changes - Add CONTRIBUTING.md with guidelines for contributors - Enhance README.md with more detailed usage examples and troubleshooting - Add proper docstrings to all Python modules and functions - Update requirements.txt with development dependencies - Improve setup.py with better metadata These changes make the project more accessible to users and contributors.
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from mistral_ocr.commands import process, convert
|
|
|
|
def run(args):
|
|
"""
|
|
Main entry point for the markdown command.
|
|
|
|
Processes a document with OCR and converts the results to Markdown in one step.
|
|
This is a convenience command that combines the functionality of the 'process'
|
|
and 'convert' commands.
|
|
|
|
Args:
|
|
args: Command line arguments parsed by argparse
|
|
|
|
Raises:
|
|
SystemExit: If an error occurs during processing or conversion
|
|
"""
|
|
# Ensure that if --images is true, include_image_base64 is also true
|
|
include_image_base64 = args.images
|
|
|
|
# If output file is specified, enable single file mode
|
|
if args.output_file:
|
|
args.single_file = True
|
|
|
|
# Create temporary file for JSON output if not specified
|
|
json_output_path = args.json_file
|
|
temp_file = None
|
|
|
|
if not json_output_path:
|
|
temp_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
|
|
json_output_path = temp_file.name
|
|
temp_file.close()
|
|
|
|
try:
|
|
# Step 1: Process the document
|
|
if args.file_or_url.startswith(("http://", "https://")):
|
|
process.process_url(args.file_or_url, json_output_path, include_image_base64)
|
|
else:
|
|
process.process_local_file(args.file_or_url, json_output_path, include_image_base64)
|
|
|
|
# Step 2: Convert the JSON to markdown
|
|
print("Converting JSON to Markdown...")
|
|
convert.convert_json_to_markdown(json_output_path, args)
|
|
|
|
except Exception as e:
|
|
print(f"Error processing and converting document: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
finally:
|
|
# Clean up temporary file if we created one
|
|
if temp_file and not args.json_file:
|
|
try:
|
|
os.unlink(temp_file.name)
|
|
except:
|
|
pass
|