5e891ef461
This commit adds extensive documentation to the Mistral OCR CLI project: - Add API.md with detailed API response format documentation - Add CHANGELOG.md to track version changes - Add CONTRIBUTING.md with guidelines for contributors - Enhance README.md with more detailed usage examples and troubleshooting - Add proper docstrings to all Python modules and functions - Update requirements.txt with development dependencies - Improve setup.py with better metadata These changes make the project more accessible to users and contributors.
216 lines
8.1 KiB
Python
216 lines
8.1 KiB
Python
import os
|
|
import json
|
|
import time
|
|
import requests
|
|
from typing import Optional, Dict, Any, Tuple
|
|
|
|
class MistralClient:
|
|
"""
|
|
Client for interacting with the Mistral AI OCR API.
|
|
|
|
This client handles authentication, file uploads, and OCR processing
|
|
requests to the Mistral AI API.
|
|
|
|
Attributes:
|
|
BASE_URL (str): Base URL for the Mistral AI API
|
|
MAX_FILE_SIZE (int): Maximum allowed file size in bytes (52 MB)
|
|
api_key (str): Mistral AI API key for authentication
|
|
session (requests.Session): Session object for making HTTP requests
|
|
"""
|
|
|
|
BASE_URL = "https://api.mistral.ai/v1"
|
|
MAX_FILE_SIZE = 52 * 1024 * 1024 # 52 MB
|
|
|
|
def __init__(self, api_key: Optional[str] = None):
|
|
"""
|
|
Initialize the Mistral AI client.
|
|
|
|
Args:
|
|
api_key (Optional[str]): Mistral AI API key. If not provided,
|
|
will look for MISTRAL_API_KEY environment variable.
|
|
|
|
Raises:
|
|
ValueError: If no API key is provided or found in environment variables.
|
|
"""
|
|
self.api_key = api_key or os.environ.get("MISTRAL_API_KEY")
|
|
if not self.api_key:
|
|
raise ValueError("API key must be provided or set as MISTRAL_API_KEY environment variable")
|
|
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Accept": "application/json"
|
|
})
|
|
|
|
def upload_file(self, file_path: str) -> str:
|
|
"""
|
|
Upload a file to Mistral API for OCR processing.
|
|
|
|
Args:
|
|
file_path (str): Path to the local file to upload
|
|
|
|
Returns:
|
|
str: File ID returned by the API
|
|
|
|
Raises:
|
|
ValueError: If the file is too large or if the upload fails
|
|
requests.RequestException: If there's an error communicating with the API
|
|
"""
|
|
# Check file size
|
|
file_size = os.path.getsize(file_path)
|
|
if file_size > self.MAX_FILE_SIZE:
|
|
raise ValueError(f"File is too large ({file_size/1024/1024:.2f} MB). Maximum allowed size is {self.MAX_FILE_SIZE/1024/1024:.2f} MB")
|
|
|
|
# Retry logic
|
|
max_retries = 3
|
|
retry_delay = 3
|
|
last_error = None
|
|
|
|
for attempt in range(1, max_retries + 1):
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
files = {'file': f}
|
|
data = {'purpose': 'ocr'}
|
|
response = self.session.post(
|
|
f"{self.BASE_URL}/files",
|
|
files=files,
|
|
data=data
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
if not response.content:
|
|
last_error = ValueError("Received empty response from API")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
|
|
file_response = response.json()
|
|
file_id = file_response.get('id')
|
|
|
|
if not file_id:
|
|
last_error = ValueError("Received response without file ID")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
|
|
return file_id
|
|
|
|
except requests.RequestException as e:
|
|
last_error = e
|
|
# Retry on server errors or rate limiting
|
|
if hasattr(e, 'response') and e.response is not None:
|
|
status_code = e.response.status_code
|
|
if status_code >= 500 or status_code == 429:
|
|
time.sleep(retry_delay)
|
|
continue
|
|
raise
|
|
|
|
raise last_error or ValueError(f"Failed to upload file after {max_retries} attempts")
|
|
|
|
def get_file_url(self, file_id: str) -> str:
|
|
"""
|
|
Get a signed URL for an uploaded file.
|
|
|
|
Args:
|
|
file_id (str): ID of the file previously uploaded to the API
|
|
|
|
Returns:
|
|
str: Signed URL that can be used for OCR processing
|
|
|
|
Raises:
|
|
ValueError: If the API response does not contain a URL
|
|
requests.RequestException: If there's an error communicating with the API
|
|
"""
|
|
response = self.session.get(f"{self.BASE_URL}/files/{file_id}/url?expiry=24")
|
|
response.raise_for_status()
|
|
|
|
url_response = response.json()
|
|
url = url_response.get('url')
|
|
|
|
if not url:
|
|
raise ValueError("API response did not contain a URL")
|
|
|
|
return url
|
|
|
|
def process_ocr(self, doc_type: str, doc_source: str, include_image_base64: bool = False) -> bytes:
|
|
"""
|
|
Process a document with OCR.
|
|
|
|
Args:
|
|
doc_type (str): Type of document, either "document_url" or "image_url"
|
|
doc_source (str): URL of the document to process
|
|
include_image_base64 (bool, optional): Whether to include base64-encoded
|
|
images in the response. Defaults to False.
|
|
|
|
Returns:
|
|
bytes: JSON response from the API containing OCR results
|
|
|
|
Raises:
|
|
ValueError: If the document type is unsupported or if processing fails
|
|
requests.RequestException: If there's an error communicating with the API
|
|
"""
|
|
if doc_type not in ["document_url", "image_url"]:
|
|
raise ValueError(f"Unsupported document type: {doc_type}")
|
|
|
|
document_map = {"type": doc_type}
|
|
if doc_type == "document_url":
|
|
document_map["document_url"] = doc_source
|
|
elif doc_type == "image_url":
|
|
document_map["image_url"] = doc_source
|
|
|
|
request_body = {
|
|
"model": "mistral-ocr-latest",
|
|
"document": document_map,
|
|
"include_image_base64": include_image_base64
|
|
}
|
|
|
|
# Retry logic
|
|
max_retries = 5
|
|
retry_delay = 10
|
|
last_error = None
|
|
|
|
for attempt in range(1, max_retries + 1):
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.BASE_URL}/ocr",
|
|
json=request_body,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
|
|
# Check for non-200 status codes
|
|
if response.status_code != 200:
|
|
error_msg = response.text or response.reason
|
|
|
|
# Retry on server errors or rate limiting
|
|
if response.status_code >= 500 or response.status_code == 429:
|
|
last_error = ValueError(f"API returned error status: {response.status_code} - {error_msg}")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
|
|
# For other errors, don't retry
|
|
raise ValueError(f"API returned error status: {response.status_code} - {error_msg}")
|
|
|
|
# Check for empty response
|
|
if not response.content:
|
|
last_error = ValueError("Received empty response from API")
|
|
adjusted_delay = retry_delay * attempt
|
|
time.sleep(adjusted_delay)
|
|
continue
|
|
|
|
# Check if response is valid JSON
|
|
try:
|
|
json.loads(response.content)
|
|
except json.JSONDecodeError:
|
|
last_error = ValueError("Received invalid JSON response from API")
|
|
time.sleep(retry_delay)
|
|
continue
|
|
|
|
# If we got here, we have a valid response
|
|
return response.content
|
|
|
|
except requests.RequestException as e:
|
|
last_error = e
|
|
time.sleep(retry_delay)
|
|
continue
|
|
|
|
raise last_error or ValueError(f"Failed after {max_retries} attempts")
|