Files
mistral-ocr/mistral_ocr/client.py
T
schihei 5e891ef461 Add comprehensive documentation and code comments
This commit adds extensive documentation to the Mistral OCR CLI project:

- Add API.md with detailed API response format documentation
- Add CHANGELOG.md to track version changes
- Add CONTRIBUTING.md with guidelines for contributors
- Enhance README.md with more detailed usage examples and troubleshooting
- Add proper docstrings to all Python modules and functions
- Update requirements.txt with development dependencies
- Improve setup.py with better metadata

These changes make the project more accessible to users and contributors.
2025-04-24 21:11:41 +02:00

216 lines
8.1 KiB
Python

import os
import json
import time
import requests
from typing import Optional, Dict, Any, Tuple
class MistralClient:
"""
Client for interacting with the Mistral AI OCR API.
This client handles authentication, file uploads, and OCR processing
requests to the Mistral AI API.
Attributes:
BASE_URL (str): Base URL for the Mistral AI API
MAX_FILE_SIZE (int): Maximum allowed file size in bytes (52 MB)
api_key (str): Mistral AI API key for authentication
session (requests.Session): Session object for making HTTP requests
"""
BASE_URL = "https://api.mistral.ai/v1"
MAX_FILE_SIZE = 52 * 1024 * 1024 # 52 MB
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Mistral AI client.
Args:
api_key (Optional[str]): Mistral AI API key. If not provided,
will look for MISTRAL_API_KEY environment variable.
Raises:
ValueError: If no API key is provided or found in environment variables.
"""
self.api_key = api_key or os.environ.get("MISTRAL_API_KEY")
if not self.api_key:
raise ValueError("API key must be provided or set as MISTRAL_API_KEY environment variable")
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {self.api_key}",
"Accept": "application/json"
})
def upload_file(self, file_path: str) -> str:
"""
Upload a file to Mistral API for OCR processing.
Args:
file_path (str): Path to the local file to upload
Returns:
str: File ID returned by the API
Raises:
ValueError: If the file is too large or if the upload fails
requests.RequestException: If there's an error communicating with the API
"""
# Check file size
file_size = os.path.getsize(file_path)
if file_size > self.MAX_FILE_SIZE:
raise ValueError(f"File is too large ({file_size/1024/1024:.2f} MB). Maximum allowed size is {self.MAX_FILE_SIZE/1024/1024:.2f} MB")
# Retry logic
max_retries = 3
retry_delay = 3
last_error = None
for attempt in range(1, max_retries + 1):
try:
with open(file_path, 'rb') as f:
files = {'file': f}
data = {'purpose': 'ocr'}
response = self.session.post(
f"{self.BASE_URL}/files",
files=files,
data=data
)
response.raise_for_status()
if not response.content:
last_error = ValueError("Received empty response from API")
time.sleep(retry_delay)
continue
file_response = response.json()
file_id = file_response.get('id')
if not file_id:
last_error = ValueError("Received response without file ID")
time.sleep(retry_delay)
continue
return file_id
except requests.RequestException as e:
last_error = e
# Retry on server errors or rate limiting
if hasattr(e, 'response') and e.response is not None:
status_code = e.response.status_code
if status_code >= 500 or status_code == 429:
time.sleep(retry_delay)
continue
raise
raise last_error or ValueError(f"Failed to upload file after {max_retries} attempts")
def get_file_url(self, file_id: str) -> str:
"""
Get a signed URL for an uploaded file.
Args:
file_id (str): ID of the file previously uploaded to the API
Returns:
str: Signed URL that can be used for OCR processing
Raises:
ValueError: If the API response does not contain a URL
requests.RequestException: If there's an error communicating with the API
"""
response = self.session.get(f"{self.BASE_URL}/files/{file_id}/url?expiry=24")
response.raise_for_status()
url_response = response.json()
url = url_response.get('url')
if not url:
raise ValueError("API response did not contain a URL")
return url
def process_ocr(self, doc_type: str, doc_source: str, include_image_base64: bool = False) -> bytes:
"""
Process a document with OCR.
Args:
doc_type (str): Type of document, either "document_url" or "image_url"
doc_source (str): URL of the document to process
include_image_base64 (bool, optional): Whether to include base64-encoded
images in the response. Defaults to False.
Returns:
bytes: JSON response from the API containing OCR results
Raises:
ValueError: If the document type is unsupported or if processing fails
requests.RequestException: If there's an error communicating with the API
"""
if doc_type not in ["document_url", "image_url"]:
raise ValueError(f"Unsupported document type: {doc_type}")
document_map = {"type": doc_type}
if doc_type == "document_url":
document_map["document_url"] = doc_source
elif doc_type == "image_url":
document_map["image_url"] = doc_source
request_body = {
"model": "mistral-ocr-latest",
"document": document_map,
"include_image_base64": include_image_base64
}
# Retry logic
max_retries = 5
retry_delay = 10
last_error = None
for attempt in range(1, max_retries + 1):
try:
response = self.session.post(
f"{self.BASE_URL}/ocr",
json=request_body,
headers={"Content-Type": "application/json"}
)
# Check for non-200 status codes
if response.status_code != 200:
error_msg = response.text or response.reason
# Retry on server errors or rate limiting
if response.status_code >= 500 or response.status_code == 429:
last_error = ValueError(f"API returned error status: {response.status_code} - {error_msg}")
time.sleep(retry_delay)
continue
# For other errors, don't retry
raise ValueError(f"API returned error status: {response.status_code} - {error_msg}")
# Check for empty response
if not response.content:
last_error = ValueError("Received empty response from API")
adjusted_delay = retry_delay * attempt
time.sleep(adjusted_delay)
continue
# Check if response is valid JSON
try:
json.loads(response.content)
except json.JSONDecodeError:
last_error = ValueError("Received invalid JSON response from API")
time.sleep(retry_delay)
continue
# If we got here, we have a valid response
return response.content
except requests.RequestException as e:
last_error = e
time.sleep(retry_delay)
continue
raise last_error or ValueError(f"Failed after {max_retries} attempts")