From 240d64023b6a9ffc6779cc4d1c6492efb34417d0 Mon Sep 17 00:00:00 2001
From: Heiko Joerg Schick <info@schihei.de>
Date: Thu, 24 Apr 2025 20:54:50 +0200
Subject: [PATCH] Initial commit

---
 .gitignore                       |  40 +++++++
 README.md                        | 168 +++++++++++++++++++++++++++
 build.sh                         |  20 ++++
 mistral_ocr/__init__.py          |   1 +
 mistral_ocr/__main__.py          |  67 +++++++++++
 mistral_ocr/client.py            | 153 ++++++++++++++++++++++++
 mistral_ocr/commands/__init__.py |   1 +
 mistral_ocr/commands/convert.py  | 193 +++++++++++++++++++++++++++++++
 mistral_ocr/commands/markdown.py |  44 +++++++
 mistral_ocr/commands/process.py  |  89 ++++++++++++++
 mistral_ocr/commands/version.py  |   6 +
 requirements.txt                 |   1 +
 setup.py                         |  28 +++++
 13 files changed, 811 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100755 build.sh
 create mode 100644 mistral_ocr/__init__.py
 create mode 100644 mistral_ocr/__main__.py
 create mode 100644 mistral_ocr/client.py
 create mode 100644 mistral_ocr/commands/__init__.py
 create mode 100644 mistral_ocr/commands/convert.py
 create mode 100644 mistral_ocr/commands/markdown.py
 create mode 100644 mistral_ocr/commands/process.py
 create mode 100644 mistral_ocr/commands/version.py
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8aafe96
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,40 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+venv/
+env/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS specific
+.DS_Store
+Thumbs.db
+
+# Mistral OCR specific
+markdown_output/
+*.json
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d47a05b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,168 @@
+# Mistral OCR CLI (Python)
+
+A command-line tool for processing documents with Mistral AI's OCR capabilities, implemented in Python.
+
+## Features
+
+- Process PDF documents and images using Mistral AI's OCR
+- Extract text and structured content from documents
+- Process local files or files from URLs
+- Output results to stdout or to a file
+- Convert OCR results to Markdown format
+- Maintain document structure and formatting in the output
+
+## Installation
+
+### Requirements
+
+- Python 3.7 or later
+- pip (Python package installer)
+
+### Installing from source
+
+```bash
+git clone https://github.com/yourusername/mistral-ocr-python
+cd mistral-ocr-python
+pip install -e .
+```
+
+Alternatively, you can use the build script:
+
+```bash
+git clone https://github.com/yourusername/mistral-ocr-python
+cd mistral-ocr-python
+./build.sh
+```
+
+## Usage
+
+### Setting up your API key
+
+You can provide your Mistral API key in two ways:
+
+1. Environment variable:
+```bash
+export MISTRAL_API_KEY=your-api-key
+```
+
+2. Command line flag:
+```bash
+mistral-ocr --api-key=your-api-key [command]
+```
+
+### Commands
+
+#### Process a document
+
+Process a document file or URL:
+
+```bash
+# Process a local PDF file
+mistral-ocr process path/to/document.pdf
+
+# Process a document from a URL
+mistral-ocr process https://example.com/document.pdf
+
+# Process an image from a URL
+mistral-ocr process https://example.com/image.jpg
+
+# Save output to a file
+mistral-ocr process path/to/document.pdf --output-file results.json
+
+# Include base64 encoded images in the output
+mistral-ocr process path/to/document.pdf --include-images
+```
+
+#### Convert OCR JSON to Markdown
+
+Convert previously processed OCR JSON results to Markdown:
+
+```bash
+# Convert OCR JSON to Markdown
+mistral-ocr convert results.json
+
+# Specify output directory
+mistral-ocr convert results.json --output-dir output_folder
+
+# Create a single markdown file instead of one per page
+mistral-ocr convert results.json --single-file
+
+# Specify output filename for single file mode
+mistral-ocr convert results.json --output-file document.md
+
+# Include images in markdown (if available in JSON)
+mistral-ocr convert results.json --images
+```
+
+#### Process and Convert in One Step
+
+Process a document and convert to Markdown in a single command:
+
+```bash
+# Process document and generate markdown files
+mistral-ocr markdown path/to/document.pdf
+
+# Generate a single markdown file instead of separate files per page
+mistral-ocr markdown path/to/document.pdf --single-file
+
+# Specify output directory for markdown files
+mistral-ocr markdown https://example.com/document.pdf --output-dir docs
+
+# Specify a specific output file path (implies single file)
+mistral-ocr markdown path/to/document.pdf --output-file docs/result.md
+
+# Save intermediate JSON and generate markdown files
+mistral-ocr markdown path/to/document.pdf --json-file results.json --output-dir docs
+```
+
+This command combines the `process` and `convert` steps, creating markdown files directly from the document.
+
+#### Version information
+
+```bash
+mistral-ocr version
+```
+
+### Examples
+
+### Process a local PDF and save the output
+
+```bash
+mistral-ocr process ~/Documents/sample.pdf --output-file results.json
+```
+
+### Process a document from a URL
+
+```bash
+mistral-ocr process https://arxiv.org/pdf/2201.04234 > output.json
+```
+
+### Convert OCR JSON to Markdown files
+
+```bash
+# Create separate files (one per page)
+mistral-ocr convert output.json --output-dir markdown_docs
+
+# Create a single file with all pages
+mistral-ocr convert output.json --single-file --output-dir markdown_docs
+
+# Create a single file with a specific filename
+mistral-ocr convert output.json --output-file docs/paper.md
+```
+
+### Process a document and generate markdown files in one step
+
+```bash
+# Generate separate files (one per page)
+mistral-ocr markdown ~/Documents/research-paper.pdf --output-dir research_docs
+
+# Generate a single markdown file
+mistral-ocr markdown ~/Documents/research-paper.pdf --single-file --output-dir research_docs
+
+# Generate a single markdown file with specific filename
+mistral-ocr markdown ~/Documents/research-paper.pdf --output-file research_docs/paper.md
+```
+
+## License
+
+MIT
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..aa57d1d
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+# Create virtual environment if it doesn't exist
+if [ ! -d "venv" ]; then
+    python3 -m venv venv
+fi
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Install dependencies
+pip install -e .
+
+# Run tests if they exist
+if [ -d "tests" ]; then
+    python -m unittest discover tests
+fi
+
+echo "Build completed successfully!"
diff --git a/mistral_ocr/__init__.py b/mistral_ocr/__init__.py
new file mode 100644
index 0000000..1abd1d2
--- /dev/null
+++ b/mistral_ocr/__init__.py
@@ -0,0 +1 @@
+# Mistral OCR Python Package
diff --git a/mistral_ocr/__main__.py b/mistral_ocr/__main__.py
new file mode 100644
index 0000000..e7b0232
--- /dev/null
+++ b/mistral_ocr/__main__.py
@@ -0,0 +1,67 @@
+import sys
+import argparse
+import os
+from mistral_ocr.commands import process, convert, markdown, version
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="A CLI tool for performing OCR on documents using Mistral AI.",
+        prog="mistral-ocr"
+    )
+    parser.add_argument("--api-key", help="Mistral API key (defaults to MISTRAL_API_KEY env variable)")
+    
+    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
+    
+    # Process command
+    process_parser = subparsers.add_parser("process", help="Process a document with OCR")
+    process_parser.add_argument("file", help="File path or URL to process")
+    process_parser.add_argument("-o", "--output-file", help="Output JSON file path (default is stdout)")
+    process_parser.add_argument("--include-images", action="store_true", help="Include base64 encoded images in the output")
+    
+    # Convert command
+    convert_parser = subparsers.add_parser("convert", help="Convert OCR JSON output to Markdown")
+    convert_parser.add_argument("json_file", help="JSON file to convert")
+    convert_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files")
+    convert_parser.add_argument("-o", "--output-file", help="Output filename for single file mode (default: document.md)")
+    convert_parser.add_argument("--images", action="store_true", help="Include images in markdown (if available)")
+    convert_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages")
+    convert_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title")
+    convert_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page")
+    
+    # Markdown command
+    markdown_parser = subparsers.add_parser("markdown", help="Process document and convert to markdown in one step")
+    markdown_parser.add_argument("file_or_url", help="File path or URL to process")
+    markdown_parser.add_argument("-j", "--json-file", help="Save intermediate JSON to file (optional)")
+    markdown_parser.add_argument("-d", "--output-dir", default="markdown_output", help="Directory to store markdown files")
+    markdown_parser.add_argument("-o", "--output-file", help="Path for output markdown file (implies --single-file)")
+    markdown_parser.add_argument("--images", action="store_true", help="Include extracted images in markdown (if available)")
+    markdown_parser.add_argument("--page-breaks", action="store_true", default=True, help="Include page break indicators between pages")
+    markdown_parser.add_argument("--title-from-filename", action="store_true", default=True, help="Use filename as document title")
+    markdown_parser.add_argument("--single-file", action="store_true", help="Create a single markdown file instead of one per page")
+    
+    # Version command
+    subparsers.add_parser("version", help="Print the version number")
+    
+    args = parser.parse_args()
+    
+    # Set API key from args or environment
+    if args.api_key:
+        os.environ["MISTRAL_API_KEY"] = args.api_key
+    
+    # Execute the appropriate command
+    if args.command == "process":
+        process.run(args)
+    elif args.command == "convert":
+        convert.run(args)
+    elif args.command == "markdown":
+        markdown.run(args)
+    elif args.command == "version":
+        version.run(args)
+    else:
+        parser.print_help()
+        return 1
+    
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/mistral_ocr/client.py b/mistral_ocr/client.py
new file mode 100644
index 0000000..6191dc1
--- /dev/null
+++ b/mistral_ocr/client.py
@@ -0,0 +1,153 @@
+import os
+import json
+import time
+import requests
+from typing import Optional, Dict, Any, Tuple
+
+class MistralClient:
+    BASE_URL = "https://api.mistral.ai/v1"
+    MAX_FILE_SIZE = 52 * 1024 * 1024  # 52 MB
+    
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.environ.get("MISTRAL_API_KEY")
+        if not self.api_key:
+            raise ValueError("API key must be provided or set as MISTRAL_API_KEY environment variable")
+        
+        self.session = requests.Session()
+        self.session.headers.update({
+            "Authorization": f"Bearer {self.api_key}",
+            "Accept": "application/json"
+        })
+    
+    def upload_file(self, file_path: str) -> str:
+        """Upload a file to Mistral API for OCR processing."""
+        # Check file size
+        file_size = os.path.getsize(file_path)
+        if file_size > self.MAX_FILE_SIZE:
+            raise ValueError(f"File is too large ({file_size/1024/1024:.2f} MB). Maximum allowed size is {self.MAX_FILE_SIZE/1024/1024:.2f} MB")
+        
+        # Retry logic
+        max_retries = 3
+        retry_delay = 3
+        last_error = None
+        
+        for attempt in range(1, max_retries + 1):
+            try:
+                with open(file_path, 'rb') as f:
+                    files = {'file': f}
+                    data = {'purpose': 'ocr'}
+                    response = self.session.post(
+                        f"{self.BASE_URL}/files",
+                        files=files,
+                        data=data
+                    )
+                
+                response.raise_for_status()
+                
+                if not response.content:
+                    last_error = ValueError("Received empty response from API")
+                    time.sleep(retry_delay)
+                    continue
+                
+                file_response = response.json()
+                file_id = file_response.get('id')
+                
+                if not file_id:
+                    last_error = ValueError("Received response without file ID")
+                    time.sleep(retry_delay)
+                    continue
+                
+                return file_id
+                
+            except requests.RequestException as e:
+                last_error = e
+                # Retry on server errors or rate limiting
+                if hasattr(e, 'response') and e.response is not None:
+                    status_code = e.response.status_code
+                    if status_code >= 500 or status_code == 429:
+                        time.sleep(retry_delay)
+                        continue
+                raise
+        
+        raise last_error or ValueError(f"Failed to upload file after {max_retries} attempts")
+    
+    def get_file_url(self, file_id: str) -> str:
+        """Get a signed URL for an uploaded file."""
+        response = self.session.get(f"{self.BASE_URL}/files/{file_id}/url?expiry=24")
+        response.raise_for_status()
+        
+        url_response = response.json()
+        url = url_response.get('url')
+        
+        if not url:
+            raise ValueError("API response did not contain a URL")
+        
+        return url
+    
+    def process_ocr(self, doc_type: str, doc_source: str, include_image_base64: bool = False) -> bytes:
+        """Process a document with OCR."""
+        if doc_type not in ["document_url", "image_url"]:
+            raise ValueError(f"Unsupported document type: {doc_type}")
+        
+        document_map = {"type": doc_type}
+        if doc_type == "document_url":
+            document_map["document_url"] = doc_source
+        elif doc_type == "image_url":
+            document_map["image_url"] = doc_source
+        
+        request_body = {
+            "model": "mistral-ocr-latest",
+            "document": document_map,
+            "include_image_base64": include_image_base64
+        }
+        
+        # Retry logic
+        max_retries = 5
+        retry_delay = 10
+        last_error = None
+        
+        for attempt in range(1, max_retries + 1):
+            try:
+                response = self.session.post(
+                    f"{self.BASE_URL}/ocr",
+                    json=request_body,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                # Check for non-200 status codes
+                if response.status_code != 200:
+                    error_msg = response.text or response.reason
+                    
+                    # Retry on server errors or rate limiting
+                    if response.status_code >= 500 or response.status_code == 429:
+                        last_error = ValueError(f"API returned error status: {response.status_code} - {error_msg}")
+                        time.sleep(retry_delay)
+                        continue
+                    
+                    # For other errors, don't retry
+                    raise ValueError(f"API returned error status: {response.status_code} - {error_msg}")
+                
+                # Check for empty response
+                if not response.content:
+                    last_error = ValueError("Received empty response from API")
+                    adjusted_delay = retry_delay * attempt
+                    time.sleep(adjusted_delay)
+                    continue
+                
+                # Check if response is valid JSON
+                try:
+                    json.loads(response.content)
+                except json.JSONDecodeError:
+                    last_error = ValueError("Received invalid JSON response from API")
+                    time.sleep(retry_delay)
+                    continue
+                
+                # If we got here, we have a valid response
+                return response.content
+                
+            except requests.RequestException as e:
+                last_error = e
+                time.sleep(retry_delay)
+                continue
+        
+        raise last_error or ValueError(f"Failed after {max_retries} attempts")
diff --git a/mistral_ocr/commands/__init__.py b/mistral_ocr/commands/__init__.py
new file mode 100644
index 0000000..b2f20af
--- /dev/null
+++ b/mistral_ocr/commands/__init__.py
@@ -0,0 +1 @@
+# Commands package initialization
diff --git a/mistral_ocr/commands/convert.py b/mistral_ocr/commands/convert.py
new file mode 100644
index 0000000..8f57b1f
--- /dev/null
+++ b/mistral_ocr/commands/convert.py
@@ -0,0 +1,193 @@
+import json
+import os
+import sys
+import re
+from pathlib import Path
+
+def run(args):
+    # If output file is specified, enable single file mode
+    if args.output_file:
+        args.single_file = True
+    
+    convert_json_to_markdown(args.json_file, args)
+
+# OCR response structure classes
+class OCRResponseImage:
+    def __init__(self, id, image_base64):
+        self.id = id
+        self.image_base64 = image_base64
+
+class OCRResponsePage:
+    def __init__(self, index, markdown, image=None, images=None, dimensions=None):
+        self.index = index
+        self.markdown = markdown
+        self.image = image
+        self.images = images or []
+        self.dimensions = dimensions
+
+class OCRResponseMetadata:
+    def __init__(self, title=None, author=None, creation_date=None, page_count=None):
+        self.title = title
+        self.author = author
+        self.creation_date = creation_date
+        self.page_count = page_count
+
+class OCRResponse:
+    def __init__(self, pages=None, metadata=None):
+        self.pages = pages or []
+        self.metadata = metadata or OCRResponseMetadata()
+
+def replace_image_references(content, images, include_images):
+    if not include_images or not images:
+        return content
+    
+    # Create a map of image IDs to their base64 data
+    image_map = {}
+    for img in images:
+        if img.image_base64:
+            img_data = img.image_base64
+            if not img_data.startswith("data:"):
+                img_data = "data:image/jpeg;base64," + img_data
+            image_map[img.id] = img_data
+    
+    # Replace all image references with base64 data
+    for id, base64_data in image_map.items():
+        # Escape special characters in the ID for regex
+        escaped_id = re.escape(id)
+        pattern = f"!\\[{escaped_id}\\]\\({escaped_id}\\)"
+        replacement = f"![{id}]({base64_data})"
+        
+        content = re.sub(pattern, replacement, content)
+    
+    return content
+
+def convert_json_to_markdown(json_file, args):
+    try:
+        # Read JSON file
+        with open(json_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        
+        # Parse JSON into our structure
+        ocr_response = OCRResponse()
+        
+        # Parse pages
+        if "pages" in data:
+            for page_data in data["pages"]:
+                page = OCRResponsePage(
+                    index=page_data.get("index", 0),
+                    markdown=page_data.get("markdown", ""),
+                    image=page_data.get("image", "")
+                )
+                
+                # Parse images if present
+                if "images" in page_data:
+                    for img_data in page_data["images"]:
+                        page.images.append(OCRResponseImage(
+                            id=img_data.get("id", ""),
+                            image_base64=img_data.get("image_base64", "")
+                        ))
+                
+                ocr_response.pages.append(page)
+        
+        # Parse metadata
+        if "metadata" in data:
+            metadata = data["metadata"]
+            ocr_response.metadata = OCRResponseMetadata(
+                title=metadata.get("title", ""),
+                author=metadata.get("author", ""),
+                creation_date=metadata.get("creation_date", ""),
+                page_count=metadata.get("page_count", 0)
+            )
+        
+        # Create output directory if it doesn't exist
+        os.makedirs(args.output_dir, exist_ok=True)
+        
+        if args.single_file:
+            # Process all pages into a single markdown file
+            combined = []
+            title = "Document"
+            
+            # Use metadata title if available
+            if ocr_response.metadata.title:
+                title = ocr_response.metadata.title
+            elif args.title_from_filename:
+                # Use filename without extension
+                title = Path(json_file).stem
+            
+            combined.append(f"# {title}\n")
+            
+            # Add metadata if available
+            if (ocr_response.metadata.author or 
+                ocr_response.metadata.creation_date or 
+                ocr_response.metadata.page_count):
+                combined.append("## Document Metadata\n")
+                if ocr_response.metadata.author:
+                    combined.append(f"**Author:** {ocr_response.metadata.author}\n")
+                if ocr_response.metadata.creation_date:
+                    combined.append(f"**Creation Date:** {ocr_response.metadata.creation_date}\n")
+                if ocr_response.metadata.page_count:
+                    combined.append(f"**Page Count:** {ocr_response.metadata.page_count}\n")
+                combined.append("\n")
+            
+            # Process each page
+            for i, page in enumerate(ocr_response.pages):
+                # Add page header
+                combined.append(f"## Page {page.index + 1}\n")
+                
+                # Convert page images to OCRResponseImage format
+                page_images = [OCRResponseImage(img.id, img.image_base64) for img in page.images]
+                
+                # Replace image references in markdown content if includeImages is true
+                page_content = page.markdown
+                if args.images:
+                    page_content = replace_image_references(page_content, page_images, args.images)
+                
+                # Add page content
+                combined.append(page_content)
+                combined.append("\n")
+                
+                # Add page separator if not the last page
+                if args.page_breaks and i < len(ocr_response.pages) - 1:
+                    combined.append("\n---\n")
+            
+            # Write combined markdown file
+            # Use custom filename if provided, otherwise use default
+            filename = "document.md"
+            if args.output_file:
+                # If output_file contains directory components, ensure they exist
+                output_path = Path(args.output_dir) / args.output_file
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                filename = args.output_file
+            else:
+                output_path = Path(args.output_dir) / filename
+            
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write("\n".join(combined))
+            
+            print(f"Created single markdown file: {output_path}")
+        else:
+            # Process each page into a separate file
+            for page in ocr_response.pages:
+                # Use page index as the filename
+                filename = f"{page.index}.md"
+                output_path = Path(args.output_dir) / filename
+                
+                # Convert page images to OCRResponseImage format
+                page_images = [OCRResponseImage(img.id, img.image_base64) for img in page.images]
+                
+                # Get page content with image references replaced if needed
+                markdown_content = page.markdown
+                if args.images:
+                    markdown_content = replace_image_references(markdown_content, page_images, args.images)
+                
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(markdown_content)
+                
+                print(f"Created markdown file: {output_path}")
+        
+        print(f"Successfully converted {json_file} to markdown files in {args.output_dir}/")
+        print(f"Total pages: {len(ocr_response.pages)}")
+        
+    except Exception as e:
+        print(f"Error converting JSON to markdown: {e}", file=sys.stderr)
+        sys.exit(1)
diff --git a/mistral_ocr/commands/markdown.py b/mistral_ocr/commands/markdown.py
new file mode 100644
index 0000000..57d89d6
--- /dev/null
+++ b/mistral_ocr/commands/markdown.py
@@ -0,0 +1,44 @@
+import os
+import sys
+import tempfile
+from pathlib import Path
+from mistral_ocr.commands import process, convert
+
+def run(args):
+    # Ensure that if --images is true, include_image_base64 is also true
+    include_image_base64 = args.images
+    
+    # If output file is specified, enable single file mode
+    if args.output_file:
+        args.single_file = True
+    
+    # Create temporary file for JSON output if not specified
+    json_output_path = args.json_file
+    temp_file = None
+    
+    if not json_output_path:
+        temp_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
+        json_output_path = temp_file.name
+        temp_file.close()
+    
+    try:
+        # Step 1: Process the document
+        if args.file_or_url.startswith(("http://", "https://")):
+            process.process_url(args.file_or_url, json_output_path, include_image_base64)
+        else:
+            process.process_local_file(args.file_or_url, json_output_path, include_image_base64)
+        
+        # Step 2: Convert the JSON to markdown
+        print("Converting JSON to Markdown...")
+        convert.convert_json_to_markdown(json_output_path, args)
+        
+    except Exception as e:
+        print(f"Error processing and converting document: {e}", file=sys.stderr)
+        sys.exit(1)
+    finally:
+        # Clean up temporary file if we created one
+        if temp_file and not args.json_file:
+            try:
+                os.unlink(temp_file.name)
+            except:
+                pass
diff --git a/mistral_ocr/commands/process.py b/mistral_ocr/commands/process.py
new file mode 100644
index 0000000..393ab00
--- /dev/null
+++ b/mistral_ocr/commands/process.py
@@ -0,0 +1,89 @@
+import os
+import json
+import sys
+from pathlib import Path
+import urllib.parse
+from mistral_ocr.client import MistralClient
+
+def run(args):
+    file_path = args.file
+    
+    # Determine if input is a URL or a local file
+    if file_path.startswith(("http://", "https://")):
+        process_url(file_path, args.output_file, args.include_images)
+    else:
+        process_local_file(file_path, args.output_file, args.include_images)
+
+def process_url(url, output_file, include_image_base64):
+    try:
+        client = MistralClient()
+        
+        # Determine the document type based on URL
+        doc_type = "document_url"
+        url_lower = url.lower()
+        if any(url_lower.endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]):
+            doc_type = "image_url"
+        
+        # Process the document
+        resp_data = client.process_ocr(doc_type, url, include_image_base64)
+        
+        # Handle the output
+        handle_output(resp_data, output_file)
+        
+    except Exception as e:
+        print(f"Error processing document: {e}", file=sys.stderr)
+        sys.exit(1)
+
+def process_local_file(file_path, output_file, include_image_base64):
+    try:
+        print(f"Processing local file: {file_path}")
+        
+        # Check if file exists
+        if not os.path.exists(file_path):
+            print(f"Error: file '{file_path}' does not exist", file=sys.stderr)
+            sys.exit(1)
+        
+        client = MistralClient()
+        
+        # Upload the file to Mistral API
+        file_id = client.upload_file(file_path)
+        print(f"File uploaded successfully with ID: {file_id}")
+        
+        # Get the signed file URL for processing
+        file_url = client.get_file_url(file_id)
+        
+        # Determine the document type based on file extension
+        doc_type = "document_url"
+        file_path_lower = file_path.lower()
+        if any(file_path_lower.endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]):
+            doc_type = "image_url"
+        
+        print(f"Processing with signed file URL (type: {doc_type})")
+        
+        # Process the uploaded file with the appropriate type
+        resp_data = client.process_ocr(doc_type, file_url, include_image_base64)
+        
+        # Handle the output
+        handle_output(resp_data, output_file)
+        
+    except Exception as e:
+        print(f"Error processing document: {e}", file=sys.stderr)
+        sys.exit(1)
+
+def handle_output(data, output_file):
+    # Pretty print the JSON response
+    pretty_json = json.dumps(json.loads(data), indent=2)
+    
+    # Write to output file or stdout
+    if output_file:
+        # Create directory if it doesn't exist
+        output_path = Path(output_file)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Write the file
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write(pretty_json)
+        print(f"OCR results saved to {output_file}")
+    else:
+        # Write to stdout
+        print(pretty_json)
diff --git a/mistral_ocr/commands/version.py b/mistral_ocr/commands/version.py
new file mode 100644
index 0000000..c659810
--- /dev/null
+++ b/mistral_ocr/commands/version.py
@@ -0,0 +1,6 @@
+import sys
+
+VERSION = "0.1.0"
+
+def run(args):
+    print(f"Mistral OCR CLI v{VERSION}")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4a5625c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+requests>=2.25.0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c68a0fe
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="mistral-ocr",
+    version="0.1.0",
+    description="A CLI tool for performing OCR on documents using Mistral AI",
+    author="Mistral OCR Team",
+    packages=find_packages(),
+    install_requires=[
+        "requests>=2.25.0",
+    ],
+    entry_points={
+        "console_scripts": [
+            "mistral-ocr=mistral_ocr.__main__:main",
+        ],
+    },
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+    ],
+    python_requires=">=3.7",
+)