remove benchmark script.

Merge branch 'main' into ronshakutai/gpu-optimizations
Bump actions/cache from 4 to 5 (#1817 )
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -327,7 +327,7 @@ jobs:
          python-version: '3.10'

      - name: Cache E2E dependencies
        uses: actions/cache@v4
        uses: actions/cache@v5
        with:
          path: |
            ~/.cache/pip
@@ -415,7 +415,7 @@ jobs:
          python-version: '3.10'

      - name: Cache E2E dependencies
        uses: actions/cache@v4
        uses: actions/cache@v5
        with:
          path: |
            ~/.cache/pip
--- a/benchmark_presidio.py
+++ b/benchmark_presidio.py
@@ -1,606 +0,0 @@
 #!/usr/bin/env python3
 """Comprehensive benchmark script for Presidio Analyzer performance testing.

 Tests different dataset sizes and NLP engines (spaCy, Transformers, GLiNER).
 Generates a markdown report.
 """

 import argparse
 import json
 import logging
 import sys
 import time
 import warnings

 from presidio_analyzer import AnalyzerEngine
 from presidio_analyzer.batch_analyzer_engine import BatchAnalyzerEngine

 # Configure logging - suppress presidio-analyzer INFO logs
 logging.basicConfig(
    level=logging.ERROR,
    format='%(levelname)s - %(name)s - %(message)s',
    stream=sys.stderr
 )

 # Suppress warnings from spacy_huggingface_pipelines and other libraries
 warnings.filterwarnings('ignore')

 # Optional imports for different NLP engines
 try:
    from presidio_analyzer.nlp_engine import NlpEngineProvider, TransformersNlpEngine
    from presidio_analyzer.nlp_engine.ner_model_configuration import (
        NerModelConfiguration,
    )
    HAS_TRANSFORMERS = True
 except ImportError:
    HAS_TRANSFORMERS = False

 try:
    from presidio_analyzer.predefined_recognizers import GLiNERRecognizer
    HAS_GLINER = True
 except ImportError:
    HAS_GLINER = False

 try:
    from presidio_analyzer.nlp_engine import StanzaNlpEngine
    HAS_STANZA = True
 except ImportError:
    HAS_STANZA = False

 # Sample texts for testing - large dataset
 TEST_TEXT_TEMPLATES = [
    (
        "My name is {name} and my email is {email}. "
        "I work at {company} as a software engineer."
    ),
    "Patient information: Name: {name}, SSN: {ssn}, Phone: {phone}, Address: {address}",
    (
        "Dear {name}, your account {email} has been verified. "
        "Contact us at {phone} for support."
    ),
    "Employee ID: {id}, Name: {name}, Credit Card: {cc}, Expires: {exp_date}",
    "Contact {name} at {phone} or email {email}. Office located at {address}.",
    (
        "Medical record for {name}, born {dob}. "
        "Insurance details: Policy #{id}, contact {phone}."
    ),
    "Transaction approved for {name}. Card ending {cc_last4}. Receipt sent to {email}.",
    (
        "Hello {name}, your appointment at {address} is confirmed "
        "for {date} at {time}. Call {phone} if needed."
    ),
    "User profile: {name}, Username: {email}, Phone: {phone}, Registered: {date}",
    (
        "Billing statement for {name} at {address}. Amount due: $2,500. "
        "Questions? Email {email} or call {phone}."
    ),
    (
        "Dear Dr. {name}, patient consultation scheduled {date}. "
        "Patient contact: {phone}, Address: {address}"
    ),
    (
        "Account #{id} for {name} ({email}) shows activity on {date}. "
        "Security code sent to {phone}."
    ),
    (
        "Prescription refill for {name}, DOB: {dob}. Pharmacy: {address}. "
        "Insurance verification needed, call {phone}."
    ),
    (
        "Welcome {name}! Your credit card {cc} has been added. "
        "Billing address: {address}. Contact: {email}"
    ),
    (
        "Invoice #{id} - {name}, {company}. Payment to {address}. "
        "Due {date}. Support: {email}/{phone}"
    ),
 ]

 NAMES = [
    "John Smith",
    "Sarah Johnson",
    "Michael Brown",
    "Emily Davis",
    "James Wilson",
    "Jessica Martinez",
    "David Anderson",
    "Jennifer Taylor",
    "Robert Thomas",
    "Mary Garcia",
    "Christopher Lee",
    "Patricia Rodriguez",
    "Daniel White",
    "Linda Harris",
    "Matthew Clark",
    "Barbara Lewis",
    "Joseph Walker",
    "Susan Hall",
    "Charles Allen",
    "Karen Young",
 ]

 EMAILS = [
    "john.smith@example.com",
    "sarah.j@company.org",
    "mbrown@corp.net",
    "emily.davis@mail.com",
    "jwilson@business.io",
    "jmartinez@enterprise.com",
    "david.a@startup.tech",
    "jtaylor@firm.law",
    "rthomas@clinic.med",
    "mgarcia@university.edu",
    "clee@consulting.biz",
    "prodriguez@agency.gov",
    "dwhite@financial.com",
    "lharris@retail.store",
    "mclark@manufacturing.ind",
    "blewis@services.pro",
    "jwalker@healthcare.org",
    "shall@education.edu",
    "callen@technology.io",
    "kyoung@pharma.com",
 ]

 PHONES = [
    "555-123-4567",
    "555-234-5678",
    "555-345-6789",
    "555-456-7890",
    "555-567-8901",
    "+1-555-678-9012",
    "+1-202-555-0173",
    "555-789-0123",
    "555-890-1234",
    "555-901-2345",
    "+1-415-555-0198",
    "+1-310-555-0142",
    "555-111-2222",
    "555-222-3333",
    "555-333-4444",
    "+1-713-555-0156",
    "+1-617-555-0187",
    "555-444-5555",
    "555-555-6666",
    "555-666-7777",
 ]

 SSNS = [
    "123-45-6789", "234-56-7890", "345-67-8901", "456-78-9012", "567-89-0123",
    "678-90-1234", "789-01-2345", "890-12-3456", "901-23-4567", "012-34-5678",
    "111-22-3333", "222-33-4444", "333-44-5555", "444-55-6666", "555-66-7777",
    "666-77-8888", "777-88-9999", "888-99-0000", "999-00-1111", "000-11-2222"
 ]

 ADDRESSES = [
    "123 Main St, New York, NY 10001", "456 Oak Ave, Los Angeles, CA 90012",
    "789 Pine Rd, Chicago, IL 60601", "321 Elm St, Houston, TX 77001",
    "654 Maple Dr, Phoenix, AZ 85001", "987 Cedar Ln, Philadelphia, PA 19101",
    "147 Birch Way, San Antonio, TX 78201", "258 Spruce Ct, San Diego, CA 92101",
    "369 Willow Pl, Dallas, TX 75201", "741 Ash Blvd, San Jose, CA 95101",
    "852 Hickory St, Austin, TX 78701", "963 Walnut Ave, Jacksonville, FL 32099",
    "159 Chestnut Rd, Fort Worth, TX 76101", "357 Magnolia Dr, Columbus, OH 43004",
    "486 Sycamore Ln, Charlotte, NC 28201"
 ]

 CREDIT_CARDS = [
    "4532-1234-5678-9010", "5425-2345-6789-0123", "3782-345678-90123",
    "6011-4567-8901-2345", "3056-567890-1234", "4916-6789-0123-4567",
    "5412-7890-1234-5678", "3714-890123-45678", "6011-9012-3456-7890"
 ]

 DATES = [
    "01/15/2024", "02/20/2024", "03/25/2024", "04/10/2024", "05/18/2024",
    "06/22/2024", "07/30/2024", "08/14/2024", "09/05/2024", "10/12/2024",
    "11/28/2024", "12/31/2024"
 ]

 TIMES = [
    "10:30 AM",
    "2:15 PM",
    "9:00 AM",
    "4:45 PM",
    "11:20 AM",
    "3:30 PM",
    "8:15 AM",
 ]
 DOBS = [
    "05/15/1985",
    "08/22/1990",
    "03/10/1978",
    "11/05/1982",
    "07/30/1995",
    "12/18/1988",
 ]


 def generate_test_texts(count):
    """Generate test texts with PII."""
    texts = []
    for i in range(count):
        template = TEST_TEXT_TEMPLATES[i % len(TEST_TEXT_TEMPLATES)]
        text = template.format(
            name=NAMES[i % len(NAMES)],
            email=EMAILS[i % len(EMAILS)],
            phone=PHONES[i % len(PHONES)],
            ssn=SSNS[i % len(SSNS)],
            address=ADDRESSES[i % len(ADDRESSES)],
            company=f"Company{i % 50}",
            id=f"EMP{10000 + i}",
            cc=CREDIT_CARDS[i % len(CREDIT_CARDS)],
            cc_last4=str(1000 + i % 9000),
            exp_date=DATES[i % len(DATES)],
            date=DATES[i % len(DATES)],
            time=TIMES[i % len(TIMES)],
            dob=DOBS[i % len(DOBS)],
        )
        texts.append(text)
    return texts


 def create_transformers_analyzer():
    """Create an analyzer with Transformers NLP engine."""
    if not HAS_TRANSFORMERS:
        raise ImportError(
            "Transformers support not available. "
            "Install with: pip install 'presidio-analyzer[transformers]'"
        )

    # Use simple configuration (same as previous working version)
    # This gives better performance than loading from config file
    model_config = [{
        "lang_code": "en",
        "model_name": {
            "spacy": "en_core_web_sm",
            "transformers": "StanfordAIMI/stanford-deidentifier-base"
        }
    }]

    # Entity mapping from official transformers.yaml config
    mapping = {
        "PER": "PERSON",
        "PERSON": "PERSON",
        "LOC": "LOCATION",
        "LOCATION": "LOCATION",
        "GPE": "LOCATION",
        "ORG": "ORGANIZATION",
        "ORGANIZATION": "ORGANIZATION",
        "NORP": "NRP",
        "AGE": "AGE",
        "ID": "ID",
        "EMAIL": "EMAIL",
        "PATIENT": "PERSON",
        "STAFF": "PERSON",
        "HOSP": "ORGANIZATION",
        "PATORG": "ORGANIZATION",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
        "PHONE": "PHONE_NUMBER",
        "HCW": "PERSON",
        "HOSPITAL": "LOCATION",
        "FACILITY": "LOCATION",
        "VENDOR": "ORGANIZATION",
    }

    ner_model_configuration = NerModelConfiguration(
        model_to_presidio_entity_mapping=mapping,
        alignment_mode="strict",  # faster than expand
        aggregation_strategy="simple",  # faster than max
        labels_to_ignore=["O"]
    )

    nlp_engine = TransformersNlpEngine(
        models=model_config,
        ner_model_configuration=ner_model_configuration
    )

    return AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["en"])


 def create_gliner_analyzer():
    """Create an analyzer with GLiNER recognizer."""
    if not HAS_GLINER:
        raise ImportError(
            "GLiNER support not available. "
            "Install with: pip install 'presidio-analyzer[gliner]'"
        )

    # Use small spaCy model (we don't need spaCy's NER)
    nlp_config = {
        "nlp_engine_name": "spacy",
        "models": [{"lang_code": "en", "model_name": "en_core_web_sm"}],
    }

    provider = NlpEngineProvider(nlp_configuration=nlp_config)
    nlp_engine = provider.create_engine()

    analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["en"])

    # Entity mapping for GLiNER
    entity_mapping = {
        "person": "PERSON",
        "name": "PERSON",
        "organization": "ORGANIZATION",
        "location": "LOCATION",
        "phone number": "PHONE_NUMBER",
        "email": "EMAIL_ADDRESS",
        "email address": "EMAIL_ADDRESS",
        "credit card number": "CREDIT_CARD",
        "social security number": "US_SSN",
        "date of birth": "DATE_TIME",
        "address": "LOCATION",
    }

    # Create GLiNER recognizer - will auto-detect GPU via DeviceDetector
    gliner_recognizer = GLiNERRecognizer(
        model_name="urchade/gliner_multi_pii-v1",
        entity_mapping=entity_mapping,
        flat_ner=False,
        multi_label=True,
    )

    # Add GLiNER and remove spaCy NER recognizer
    analyzer.registry.add_recognizer(gliner_recognizer)
    analyzer.registry.remove_recognizer("SpacyRecognizer")

    return analyzer


 def create_stanza_analyzer():
    """Create an analyzer with Stanza NLP engine."""
    if not HAS_STANZA:
        raise ImportError(
            "Stanza support not available. "
            "Install with: pip install 'presidio-analyzer[stanza]'"
        )

    # Entity mapping from stanza.yaml config
    mapping = {
        "PER": "PERSON",
        "PERSON": "PERSON",
        "NORP": "NRP",
        "FAC": "LOCATION",
        "LOC": "LOCATION",
        "LOCATION": "LOCATION",
        "GPE": "LOCATION",
        "ORG": "ORGANIZATION",
        "ORGANIZATION": "ORGANIZATION",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
    }

    ner_model_configuration = NerModelConfiguration(
        model_to_presidio_entity_mapping=mapping,
        labels_to_ignore=["O"]
    )

    # Create Stanza NLP engine with GPU support
    nlp_engine = StanzaNlpEngine(
        models=[{"lang_code": "en", "model_name": "en"}],
        ner_model_configuration=ner_model_configuration
    )

    return AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["en"])


 def run_benchmark(num_texts, batch_size, engine_type="spacy"):
    """Run benchmark for a specific dataset size and NLP engine.

    Args:
        num_texts: Number of texts to process
        batch_size: Batch size for processing
        engine_type: Type of NLP engine - "spacy", "transformers", or "gliner"
    """
    print(f"\n{'='*80}")
    print(
        f"Running benchmark: {num_texts} texts, "
        f"batch_size={batch_size}, engine={engine_type}"
    )
    print('='*80)

    # Generate texts
    print(f"Generating {num_texts} test texts...")
    texts = generate_test_texts(num_texts)

    # Initialize analyzer based on engine type
    print(f"Initializing AnalyzerEngine ({engine_type})...")
    start_init = time.time()

    if engine_type == "transformers":
        analyzer = create_transformers_analyzer()
    elif engine_type == "gliner":
        analyzer = create_gliner_analyzer()
    elif engine_type == "stanza":
        analyzer = create_stanza_analyzer()
    else:  # spacy (default)
        analyzer = AnalyzerEngine()

    batch_analyzer = BatchAnalyzerEngine(analyzer)
    init_time = time.time() - start_init
    print(f"  Initialization: {init_time:.2f}s")

    # Warm-up
    print("Warm-up run...")
    start_warmup = time.time()
    _ = batch_analyzer.analyze_iterator(
        texts=texts[:min(10, num_texts)],
        language="en",
        batch_size=batch_size,
    )
    warmup_time = time.time() - start_warmup
    print(f"  Warm-up: {warmup_time:.2f}s")

    # Main benchmark
    print(f"Processing {num_texts} texts...")
    start_analysis = time.time()
    results = batch_analyzer.analyze_iterator(
        texts=texts,
        language="en",
        batch_size=batch_size,
    )
    total_analysis_time = time.time() - start_analysis

    total_entities = sum(len(result) for result in results)
    avg_time = total_analysis_time / num_texts
    throughput = num_texts / total_analysis_time

    print(f"  Complete: {total_analysis_time:.2f}s")
    print(f"  Throughput: {throughput:.2f} texts/second")
    print(f"  Entities found: {total_entities}")

    return {
        "num_texts": num_texts,
        "batch_size": batch_size,
        "engine_type": engine_type,
        "init_time": init_time,
        "warmup_time": warmup_time,
        "total_time": total_analysis_time,
        "avg_time_ms": avg_time * 1000,
        "throughput": throughput,
        "total_entities": total_entities,
    }


 def main():
    """Run comprehensive benchmarks on Presidio Analyzer engines."""
    parser = argparse.ArgumentParser(
        description="Comprehensive Presidio Analyzer performance benchmark"
    )
    parser.add_argument(
        "--json",
        type=str,
        default="benchmark_results.json",
        help="Save results as JSON to this file (default: benchmark_results.json)",
    )
    parser.add_argument(
        "--engines",
        type=str,
        default="spacy",
        help=(
            "Comma-separated list of engines to test: "
            "spacy,transformers,gliner,stanza (default: spacy)"
        ),
    )
    parser.add_argument(
        "--sizes",
        type=str,
        default="50,500,5000",
        help="Comma-separated list of dataset sizes to test (default: 50,500,5000)",
    )
    args = parser.parse_args()

    # Parse engines to test
    requested_engines = [e.strip() for e in args.engines.split(',')]
    available_engines = []

    for engine in requested_engines:
        if engine == "spacy":
            available_engines.append("spacy")
        elif engine == "transformers":
            if HAS_TRANSFORMERS:
                available_engines.append("transformers")
            else:
                print(
                    "⚠️  Transformers engine requested but not available. "
                    "Install with: pip install 'presidio-analyzer[transformers]'"
                )
        elif engine == "gliner":
            if HAS_GLINER:
                available_engines.append("gliner")
            else:
                print(
                    "⚠️  GLiNER engine requested but not available. "
                    "Install with: pip install 'presidio-analyzer[gliner]'"
                )
        elif engine == "stanza":
            if HAS_STANZA:
                available_engines.append("stanza")
            else:
                print(
                    "⚠️  Stanza engine requested but not available. "
                    "Install with: pip install 'presidio-analyzer[stanza]'"
                )
        else:
            print(f"⚠️  Unknown engine: {engine}. Skipping.")

    if not available_engines:
        print("❌ No valid engines available. Exiting.")
        sys.exit(1)

    # Parse dataset sizes
    try:
        dataset_sizes = [int(s.strip()) for s in args.sizes.split(',')]
    except ValueError:
        print(
            "❌ Invalid dataset sizes format. "
            "Use comma-separated integers (e.g., 50,500,5000)"
        )
        sys.exit(1)

    # Auto-adjust batch sizes based on dataset size
    def get_batch_size(num_texts):
        return 16

    # Create test configurations
    test_configs = []
    for engine in available_engines:
        for size in dataset_sizes:
            batch_size = get_batch_size(size)
            test_configs.append((size, batch_size, engine))

    print("="*80)
    print("PRESIDIO ANALYZER COMPREHENSIVE BENCHMARK")
    print("="*80)
    print(f"\nEngines to test: {', '.join(available_engines)}")
    print(f"Dataset sizes: {', '.join(str(s) for s in dataset_sizes)}")
    print(f"Total tests: {len(test_configs)}")
    print("This may take several minutes...\n")

    all_results = []

    for num_texts, batch_size, engine in test_configs:
        try:
            result = run_benchmark(num_texts, batch_size, engine)
            all_results.append(result)
        except KeyboardInterrupt:
            print("\n\n⚠️  Benchmark interrupted by user")
            if all_results:
                print("Generating partial results...")
            else:
                print("No results to save.")
                sys.exit(1)
            break
        except Exception as e:
            print(
                f"\n❌ Error running benchmark for {num_texts} texts "
                f"with {engine} engine: {e}"
            )
            import traceback
            traceback.print_exc()
            continue

    if all_results:
        # Save JSON results
        with open(args.json, 'w') as f:
            json.dump(all_results, f, indent=2)
        print(f"✅ JSON results saved to: {args.json}")

        print("\n" + "="*80)
        print("BENCHMARK COMPLETE")
        print("="*80)
    else:
        print("\n❌ No results collected")
        sys.exit(1)


 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n⚠️  Benchmark interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n❌ Fatal error: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc()
        sys.exit(1)
--- a/docs/getting_started/getting_started_text.md
+++ b/docs/getting_started/getting_started_text.md
@@ -93,6 +93,25 @@ Using Presidio's modules as Python packages to get started:
        !!! tip "Tip: Downloading models"
            If not available, the transformers model and the spacy model would be downloaded on the first call to the `AnalyzerEngine`. To pre-download, see [this doc](../analyzer/nlp_engines/transformers.md#downloading-a-pre-trained-model).

 === "GPU Acceleration (Optional)"

    GPU support is available for spaCy, Stanza, Transformers, and GLiNER.

    - Install the `gpu` extra (includes `cupy-cuda12x`):
      ```sh
      pip install "presidio-analyzer[gpu]"
      pip install presidio-anonymizer
      ```

    - Combine with other extras:
      ```sh
      pip install "presidio-analyzer[transformers,gpu]"
      pip install presidio-anonymizer
      python -m spacy download en_core_web_sm
      ```

    - Or install your own CUDA dependencies

 ## Simple flow - Docker container

 Presidio provides Docker containers that you can use to de-identify text data. Each module, analyzer, and anonymizer, has its own Docker container. The containers are available on Docker Hub.
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -61,6 +61,22 @@ with at least one NLP engine (`spaCy`, `transformers` or `stanza`):
        
        Stanza models would be loaded lazily. To pre-load them, see: [Downloading a pre-trained model](./analyzer/nlp_engines/spacy_stanza.md#download-the-pre-trained-model).

 ### GPU acceleration (optional)

 For GPU acceleration with spaCy, Stanza, Transformers, or GLiNER:

 - Install the `gpu` extra (includes `cupy-cuda12x`):
  ```sh
  pip install "presidio-analyzer[gpu]"
  ```

 - Combine with other extras:
  ```sh
  pip install "presidio-analyzer[transformers,gpu]"
  ```

 - Or install your own CUDA dependencies

 ### PII redaction in images

 For PII redaction in images
--- a/presidio-analyzer/README.md
+++ b/presidio-analyzer/README.md
@@ -84,6 +84,22 @@ print(results)

 ```

 ## GPU Acceleration

 GPU support is available for spaCy, Stanza, Transformers, and GLiNER.

 - Install the `gpu` extra (includes `cupy-cuda12x`):
  ```bash
  pip install "presidio-analyzer[gpu]"
  ```

 - Combine with other extras:
  ```bash
  pip install "presidio-analyzer[transformers,gpu]"
  ```

 - Or install your own CUDA dependencies

 ## Documentation

 Additional documentation on installation, usage and extending the Analyzer can be found under the [Analyzer](https://microsoft.github.io/presidio/analyzer/) section of [Presidio Documentation](https://microsoft.github.io/presidio)
--- a/presidio-analyzer/gpu_gliner_results.json
+++ b/presidio-analyzer/gpu_gliner_results.json
@@ -1,24 +0,0 @@
 [
  {
    "num_texts": 50,
    "batch_size": 16,
    "engine_type": "gliner",
    "init_time": 9.893409252166748,
    "warmup_time": 2.4725661277770996,
    "total_time": 2.848795175552368,
    "avg_time_ms": 56.97590351104736,
    "throughput": 17.551279372096392,
    "total_entities": 242
  },
  {
    "num_texts": 500,
    "batch_size": 16,
    "engine_type": "gliner",
    "init_time": 9.43524694442749,
    "warmup_time": 0.5841579437255859,
    "total_time": 45.80630612373352,
    "avg_time_ms": 91.61261224746704,
    "throughput": 10.915527627339854,
    "total_entities": 2416
  }
 ]
--- a/presidio-analyzer/gpu_spacy_results.json
+++ b/presidio-analyzer/gpu_spacy_results.json
@@ -1,24 +0,0 @@
 [
  {
    "num_texts": 50,
    "batch_size": 16,
    "engine_type": "spacy",
    "init_time": 3.232339859008789,
    "warmup_time": 1.648664951324463,
    "total_time": 0.5361835956573486,
    "avg_time_ms": 10.723671913146973,
    "throughput": 93.25164067860219,
    "total_entities": 235
  },
  {
    "num_texts": 500,
    "batch_size": 16,
    "engine_type": "spacy",
    "init_time": 1.908512830734253,
    "warmup_time": 0.2146310806274414,
    "total_time": 4.622352361679077,
    "avg_time_ms": 9.244704723358154,
    "throughput": 108.17003137735138,
    "total_entities": 2377
  }
 ]
--- a/presidio-analyzer/gpu_stanza_results.json
+++ b/presidio-analyzer/gpu_stanza_results.json
@@ -1,24 +0,0 @@
 [
  {
    "num_texts": 50,
    "batch_size": 16,
    "engine_type": "stanza",
    "init_time": 5.2999162673950195,
    "warmup_time": 2.0750410556793213,
    "total_time": 7.569249153137207,
    "avg_time_ms": 151.38498306274414,
    "throughput": 6.60567501325764,
    "total_entities": 253
  },
  {
    "num_texts": 500,
    "batch_size": 16,
    "engine_type": "stanza",
    "init_time": 6.010739803314209,
    "warmup_time": 2.8395984172821045,
    "total_time": 160.41057419776917,
    "avg_time_ms": 320.82114839553833,
    "throughput": 3.1170014975668203,
    "total_entities": 2510
  }
 ]
--- a/presidio-analyzer/gpu_trans_results.json
+++ b/presidio-analyzer/gpu_trans_results.json
@@ -1,24 +0,0 @@
 [
  {
    "num_texts": 50,
    "batch_size": 16,
    "engine_type": "transformers",
    "init_time": 1.8468782901763916,
    "warmup_time": 1.5114922523498535,
    "total_time": 0.7709858417510986,
    "avg_time_ms": 15.419716835021973,
    "throughput": 64.85203397047823,
    "total_entities": 273
  },
  {
    "num_texts": 500,
    "batch_size": 16,
    "engine_type": "transformers",
    "init_time": 1.2162683010101318,
    "warmup_time": 0.1569383144378662,
    "total_time": 7.970991134643555,
    "avg_time_ms": 15.941982269287111,
    "throughput": 62.72745654262466,
    "total_entities": 2746
  }
 ]
--- a/presidio-analyzer/presidio_analyzer/conf/langextract_config_azureopenai.yaml
+++ b/presidio-analyzer/presidio_analyzer/conf/langextract_config_azureopenai.yaml
@@ -1,17 +1,8 @@
 # Azure OpenAI Configuration for LangExtract
 #
 # This config file is OPTIONAL for basic usage. You can pass model_id and credentials
 # as parameters instead of using this file.
 #
 # Use this file when you need to customize:
 # - Supported entities
 # - Entity mappings
 # - Prompts and examples
 # - Detection parameters
 #
 # IMPORTANT: The model_id below is a placeholder. You can:
 # 1. Pass model_id as a parameter: AzureOpenAILangExtractRecognizer(model_id="your-deployment-name")
 # 2. OR update model_id below to match your Azure OpenAI deployment name
 # Required parameters: model_id (deployment name)
 # Auth parameters: azure_endpoint, api_key (via constructor or environment variables)
 # Optional parameters use defaults if not specified

 lm_recognizer:
  supported_entities:
@@ -41,11 +32,12 @@ langextract:
  examples_file: "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_examples.yaml"
  
  model:
    # Azure OpenAI deployment name (e.g., "gpt-4", "gpt-4o", "my-gpt-deployment")
    # This is the deployment name from Azure Portal, NOT the model name
    # You can override this by passing model_id parameter to the recognizer
    model_id: "gpt-4o"
    temperature: null
    
    # Optional: Uncomment to override defaults
    # fence_output: true
    # use_schema_constraints: false
  
  entity_mappings:
    person: PERSON
--- a/presidio-analyzer/presidio_analyzer/conf/langextract_config_ollama.yaml
+++ b/presidio-analyzer/presidio_analyzer/conf/langextract_config_ollama.yaml
@@ -1,5 +1,8 @@
 # Ollama Configuration
 # Ollama Configuration for LangExtract
 # https://github.com/google/langextract#using-local-llms-with-ollama
 #
 # Required parameters: model_id, model_url
 # Optional parameters use defaults if not specified

 lm_recognizer:
  supported_entities:
@@ -30,6 +33,13 @@ langextract:
    model_id: "qwen2.5:1.5b"
    model_url: "http://localhost:11434"
    temperature: null
    
    # Optional: Uncomment to override defaults
    # max_char_buffer: 400
    # use_schema_constraints: false
    # fence_output: false
    # timeout: 240
    # num_ctx: 8192
  
  entity_mappings:
    person: PERSON
--- a/presidio-analyzer/presidio_analyzer/nlp_engine/init.py
+++ b/presidio-analyzer/presidio_analyzer/nlp_engine/init.py
@@ -1,6 +1,6 @@
 """NLP engine package. Performs text pre-processing."""

 from .device_detector import DeviceDetector
 from .device_detector import device_detector
 from .ner_model_configuration import NerModelConfiguration
 from .nlp_artifacts import NlpArtifacts
 from .nlp_engine import NlpEngine
@@ -11,7 +11,7 @@ from .transformers_nlp_engine import TransformersNlpEngine
 from .nlp_engine_provider import NlpEngineProvider  # isort:skip

 __all__ = [
    "DeviceDetector",
    "device_detector",
    "NerModelConfiguration",
    "NlpArtifacts",
    "NlpEngine",
--- a/presidio-analyzer/presidio_analyzer/nlp_engine/device_detector.py
+++ b/presidio-analyzer/presidio_analyzer/nlp_engine/device_detector.py
@@ -1,4 +1,11 @@
 """GPU/CPU device detection singleton for Presidio NLP engines."""
 """GPU/CPU device detection for Presidio NLP engines.

 This module creates a single, process-wide DeviceDetector instance.
 Consumers may import and use the shared instance directly.

 The detector is initialized once at import time and is intended to be
 read-only in practice.
 """

 import logging
 from typing import Optional
@@ -7,84 +14,52 @@ logger = logging.getLogger("presidio-analyzer")


 class DeviceDetector:
    """Singleton for GPU/CPU detection. Lazy initialization on first use."""

    _instance: Optional["DeviceDetector"] = None
    _torch_initialized: bool = False
    _has_torch_gpu: bool = False
    _torch_device: str = "cpu"
    _torch_device_name: Optional[str] = None
    """Detect and expose PyTorch GPU/CPU availability.

    def __new__(cls) -> "DeviceDetector":
        """Return singleton instance and detect torch GPU on first creation."""
        if cls._instance is None:
            cls._instance = super(DeviceDetector, cls).__new__(cls)
            cls._instance._detect_torch_gpu()
        return cls._instance
    This class performs a one-time detection of CUDA availability and
    exposes the result for reuse across the process.
    """

    def _detect_torch_gpu(self) -> None:
        """Detect PyTorch GPU/CUDA once."""
        if DeviceDetector._torch_initialized:
            return
    def __init__(self) -> None:
        self._device = "cpu"
        self._device_name: Optional[str] = None
        self._detect()

    def _detect(self) -> None:
        """Detect PyTorch CUDA support once."""
        try:
            import torch

            if torch.cuda.is_available():
                logger.info("GPU found, attempting CUDA initialization")


                try:
                    # Force CUDA initialization
                    str(torch.tensor([1.0], device="cuda"))
                    DeviceDetector._torch_device_name = torch.cuda.get_device_name(0)
                    _ = str(torch.tensor([1.0], device="cuda"))
                    self._device_name = torch.cuda.get_device_name(0)
                    torch.cuda.get_device_capability(0)
                    torch.cuda.empty_cache()

                    DeviceDetector._has_torch_gpu = True
                    DeviceDetector._torch_device = "cuda"
                    self._device = "cuda"
                    logger.info(
                        "GPU and CUDA available. Device: "
                        f"{DeviceDetector._torch_device_name}"
                        "CUDA available. Device: %s",
                        self._device_name,
                    )

                except Exception as e:
                    logger.warning(f"PyTorch Pre-Check: FAILED with error: {e}")
                    DeviceDetector._has_torch_gpu = False
                    DeviceDetector._torch_device = "cpu"
            else:
                logger.info("No GPU found, using CPU")
                DeviceDetector._has_torch_gpu = False
                DeviceDetector._torch_device = "cpu"

                    logger.warning(
                        "PyTorch CUDA initialization failed, falling back to CPU: %s",
                        e,
                    )
        except ImportError:
            logger.info("PyTorch not available, using CPU")
            DeviceDetector._has_torch_gpu = False
            DeviceDetector._torch_device = "cpu"

        DeviceDetector._torch_initialized = True


    def has_torch_gpu(self) -> bool:
        """Return True if PyTorch GPU is available."""
        return DeviceDetector._has_torch_gpu

    def get_torch_device(self) -> str:
        """Return torch device string: 'cuda:0' or 'cpu'."""
        return DeviceDetector._torch_device

    def get_torch_device_name(self) -> Optional[str]:
        """Return PyTorch GPU device name or None."""
        return DeviceDetector._torch_device_name
    def get_device(self) -> str:
        """Return device string ('cuda' or 'cpu')."""
        return self._device

    def get_torch_device_info(self) -> dict:
        """Return PyTorch device information."""
        return {
            "has_gpu": DeviceDetector._has_torch_gpu,
            "device_name": DeviceDetector._torch_device_name,
            "device": DeviceDetector._torch_device,
        }
    def get_gpu_device_name(self) -> Optional[str]:
        """Return GPU device name if available."""
        return self._device_name


 # Initialize singleton at module import to preload CUDA libraries if GPU available
 DeviceDetector()
 # Shared, process-wide instance
 device_detector = DeviceDetector()
--- a/presidio-analyzer/presidio_analyzer/nlp_engine/spacy_nlp_engine.py
+++ b/presidio-analyzer/presidio_analyzer/nlp_engine/spacy_nlp_engine.py
@@ -7,10 +7,10 @@ from spacy.language import Language
 from spacy.tokens import Doc, Span

 from presidio_analyzer.nlp_engine import (
    DeviceDetector,
    NerModelConfiguration,
    NlpArtifacts,
    NlpEngine,
    device_detector,
 )

 logger = logging.getLogger("presidio-analyzer")
@@ -56,8 +56,7 @@ class SpacyNlpEngine(NlpEngine):
        logger.debug(f"Loading SpaCy models: {self.models}")

        # Configure GPU if available
        device_detector = DeviceDetector()
        if device_detector.has_torch_gpu():
        if device_detector.get_device() == "cuda":
            try:
                spacy.require_gpu()
                logger.info("spaCy GPU configured successfully")
--- a/presidio-analyzer/presidio_analyzer/nlp_engine/stanza_nlp_engine.py
+++ b/presidio-analyzer/presidio_analyzer/nlp_engine/stanza_nlp_engine.py
@@ -1,6 +1,6 @@
 import logging
 import warnings
 from typing import Dict, List, Optional, Union
 from typing import Any, Dict, Generator, List, Optional, Tuple, Union

 try:
    import stanza
@@ -18,9 +18,10 @@ from spacy.tokens import Doc, Token
 from spacy.util import registry

 from presidio_analyzer.nlp_engine import (
    DeviceDetector,
    NerModelConfiguration,
    NlpArtifacts,
    SpacyNlpEngine,
    device_detector,
 )

 logger = logging.getLogger("presidio-analyzer")
@@ -52,21 +53,13 @@ class StanzaNlpEngine(SpacyNlpEngine):
    ):
        super().__init__(models, ner_model_configuration)
        self.download_if_missing = download_if_missing
        self.use_gpu = device_detector.get_device() == "cuda"

    def load(self) -> None:
        """Load the NLP model."""

        logger.debug(f"Loading Stanza models: {self.models}")

        # Detect GPU availability
        device_detector = DeviceDetector()
        use_gpu = device_detector.has_torch_gpu()

        if use_gpu:
            logger.info("Stanza will use GPU")
        else:
            logger.info("Stanza will use CPU")

        self.nlp = {}
        for model in self.models:
            self._validate_model_params(model)
@@ -76,9 +69,82 @@ class StanzaNlpEngine(SpacyNlpEngine):
                download_method="DOWNLOAD_RESOURCES"
                if self.download_if_missing
                else None,
                use_gpu=use_gpu,
                use_gpu=self.use_gpu,
            )

    def process_batch(
        self,
        texts: Union[List[str], List[Tuple[str, object]]],
        language: str,
        batch_size: int = 1,
        n_process: int = 1,
        as_tuples: bool = False,
    ) -> Generator[
        Union[Tuple[Any, NlpArtifacts, Any], Tuple[Any, NlpArtifacts]], Any, None
    ]:
        """Execute the NLP pipeline on a batch of texts using Stanza's bulk processing.

        This method overrides SpacyNlpEngine.process_batch to leverage Stanza's
        efficient bulk_process method, which processes multiple documents together
        for better GPU utilization.

        Note: Stanza batches internally at the sentence/token level, not docs.
        For optimal GPU performance, use larger batch sizes (e.g., 16-32 docs).
        GPU utilization depends on total sentences/tokens across all docs in batch.

        :param texts: A list of texts to process. if as_tuples is set to True,
            texts should be a list of tuples (text, context).
        :param language: The language of the texts.
        :param batch_size: Number of documents per bulk_process call.
            Recommended: 16-32+ for GPU, lower values acceptable for CPU.
        :param n_process: Not used for Stanza (kept for API compatibility).
        :param as_tuples: If set to True, inputs should be a sequence of
            (text, context) tuples. Output will then be a sequence of
            (text, NlpArtifacts, context) tuples. Defaults to False.

        :return: A generator of tuples (text, NlpArtifacts, context) or
            (text, NlpArtifacts) depending on the value of as_tuples.
        """

        if not self.nlp:
            raise ValueError("NLP engine is not loaded. Consider calling .load()")

        # Get the StanzaTokenizer (which wraps the Stanza pipeline)
        # In spaCy, tokenizers are accessed via .tokenizer, not .get_pipe()
        stanza_tokenizer = self.nlp[language].tokenizer
        stanza_pipeline = stanza_tokenizer.snlp

        # Process texts in batches
        text_list = list(texts) if not isinstance(texts, list) else texts

        for batch_start in range(0, len(text_list), batch_size):
            batch_end = min(batch_start + batch_size, len(text_list))
            batch = text_list[batch_start:batch_end]

            # Prepare batch for Stanza
            if as_tuples:
                batch_texts = [str(text) for text, context in batch]
                contexts = [context for text, context in batch]
            else:
                batch_texts = [str(text) for text in batch]
                contexts = None

            # Create Stanza Document objects and process via bulk_process
            # Stanza handles internal batching at sentence/token level
            stanza_docs = [stanza.Document([], text=text) for text in batch_texts]
            processed_stanza_docs = stanza_pipeline.bulk_process(stanza_docs)

            # Convert processed Stanza docs to spaCy docs using spacy-stanza's logic
            # We call _convert_doc() which reuses StanzaTokenizer's conversion path
            for idx, processed_stanza_doc in enumerate(processed_stanza_docs):
                spacy_doc = stanza_tokenizer._convert_doc(processed_stanza_doc)
                nlp_artifacts = self._doc_to_nlp_artifact(spacy_doc, language)

                if as_tuples:
                    yield batch_texts[idx], nlp_artifacts, contexts[idx]
                else:
                    yield batch_texts[idx], nlp_artifacts


 # Code taken from https://github.com/explosion/spacy-stanza
 # Supports Stanza > 1.7.0
@@ -226,6 +292,22 @@ class StanzaTokenizer(object):
            return Doc(self.vocab, words=[text], spaces=[False])

        snlp_doc = self.snlp(text)
        return self._convert_doc(snlp_doc)

    def _convert_doc(self, snlp_doc):
        """Convert a processed Stanza Document to a spaCy Doc.

        This method contains the conversion logic separated from text processing,
        allowing it to be called with already-processed Stanza documents.

        :param snlp_doc: Processed Stanza Document
        :return: spaCy Doc object
        """
        if not snlp_doc.text:
            return Doc(self.vocab)
        elif snlp_doc.text.isspace():
            return Doc(self.vocab, words=[snlp_doc.text], spaces=[False])

        text = snlp_doc.text
        snlp_tokens, snlp_heads = self.__get_tokens_with_heads(snlp_doc)
        pos = []
--- a/presidio-analyzer/presidio_analyzer/nlp_engine/transformers_nlp_engine.py
+++ b/presidio-analyzer/presidio_analyzer/nlp_engine/transformers_nlp_engine.py
@@ -12,7 +12,6 @@ except ImportError:
    transformers = None

 from presidio_analyzer.nlp_engine import (
    DeviceDetector,
    NerModelConfiguration,
    SpacyNlpEngine,
 )
@@ -76,15 +75,6 @@ class TransformersNlpEngine(SpacyNlpEngine):

        logger.debug(f"Loading SpaCy and transformers models: {self.models}")

        # Configure GPU if available
        device_detector = DeviceDetector()
        if device_detector.has_torch_gpu():
            try:
                spacy.require_gpu()
                logger.info("spaCy GPU configured successfully")
            except Exception as e:
                logger.warning(f"Failed to configure spaCy for GPU: {e}")

        self.nlp = {}

        for model in self.models:
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/ner/gliner_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/ner/gliner_recognizer.py
@@ -8,9 +8,9 @@ from presidio_analyzer import (
    RecognizerResult,
 )
 from presidio_analyzer.nlp_engine import (
    DeviceDetector,
    NerModelConfiguration,
    NlpArtifacts,
    device_detector,
 )

 try:
@@ -92,7 +92,7 @@ class GLiNERRecognizer(LocalRecognizer):
        self.map_location = (
            map_location
            if map_location is not None
            else DeviceDetector().get_torch_device()
            else device_detector.get_device()
        )

        self.flat_ner = flat_ner
@@ -117,7 +117,9 @@ class GLiNERRecognizer(LocalRecognizer):
            raise ImportError("GLiNER is not installed. Please install it.")

        logger.info(f"Loading GLiNER model on device: {self.map_location}")
        self.gliner = GLiNER.from_pretrained(self.model_name).to(self.map_location)
        self.gliner = GLiNER.from_pretrained(
            self.model_name, map_location=self.map_location
        )

    def analyze(
        self,
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py
@@ -112,7 +112,13 @@ class AzureOpenAILangExtractRecognizer(LangExtractRecognizer):
        super().__init__(
            config_path=actual_config_path,
            name="Azure OpenAI LangExtract PII",
            supported_language=supported_language
            supported_language=supported_language,
            extract_params={
                "extract": {
                    "fence_output": True,
                    "use_schema_constraints": False,
                },
            }
        )

        # Override model_id if provided as parameter (deployment name)
@@ -133,44 +139,20 @@ class AzureOpenAILangExtractRecognizer(LangExtractRecognizer):
                f"See {AZURE_OPENAI_DOCS_URL} for details."
            )

    def _call_langextract(self, **kwargs):
        """
        Call Azure OpenAI through LangExtract for PII extraction.

        Uses LangExtract's provider registry system to instantiate the custom
        Azure OpenAI provider. The model_id with 'azure:' prefix triggers the
        provider registration.
        """
        try:

            model_id_with_prefix = f"azure:{self.model_id}"
    def _get_provider_params(self):
        """Return Azure OpenAI-specific params."""
        model_id_with_prefix = f"azure:{self.model_id}"

            language_model_params = {
                "azure_endpoint": self.azure_endpoint,
                "api_version": self.api_version,
                "azure_deployment": self.model_id,
            }

            if self.api_key:
                language_model_params["api_key"] = self.api_key

            extract_params = {
                "text_or_documents": kwargs.pop("text"),
                "prompt_description": kwargs.pop("prompt"),
                "examples": kwargs.pop("examples"),
                "model_id": model_id_with_prefix,
                "language_model_params": language_model_params,
                "fence_output": True,
                "use_schema_constraints": False,
            }
        language_model_params = {
            "azure_endpoint": self.azure_endpoint,
            "api_version": self.api_version,
            "azure_deployment": self.model_id,
        }

            extract_params.update(kwargs)
        if self.api_key:
            language_model_params["api_key"] = self.api_key

            return lx.extract(**extract_params)

        except Exception:
            logger.exception(
                "LangExtract extraction failed (Azure OpenAI at %s, model '%s')",
                self.azure_endpoint, self.model_id
            )
            raise
        return {
            "model_id": model_id_with_prefix,
            "language_model_params": language_model_params,
        }
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/langextract_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/langextract_recognizer.py
@@ -1,6 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
 from typing import List
 from typing import Any, Dict, List, Optional

 from presidio_analyzer.llm_utils import (
    check_langextract_available,
@@ -12,6 +12,7 @@ from presidio_analyzer.llm_utils import (
    load_prompt_file,
    load_yaml_examples,
    load_yaml_file,
    lx,
    render_jinja_template,
    validate_config_fields,
 )
@@ -31,13 +32,16 @@ class LangExtractRecognizer(LMRecognizer, ABC):
        self,
        config_path: str,
        name: str = "LangExtract LLM PII",
        supported_language: str = "en"
        supported_language: str = "en",
        extract_params: Optional[Dict[str, Any]] = None,
    ):
        """Initialize LangExtract recognizer.

        :param config_path: Path to configuration file.
        :param name: Name of the recognizer (provided by subclass).
        :param supported_language: Language this recognizer supports (default: "en").
        :param extract_params: Dict with 'extract' and/or 'language_model'
            keys containing param defaults.
        """
        check_langextract_available()

@@ -102,6 +106,26 @@ class LangExtractRecognizer(LMRecognizer, ABC):

        self.entity_mappings = langextract_config["entity_mappings"]
        self.debug = langextract_config.get("debug", False)
        self._model_config = model_config

        # Process extract params with config override
        self._extract_params = {}
        self._language_model_params = {}

        if extract_params:
            if "extract" in extract_params:
                for param_name, default_value in extract_params["extract"].items():
                    self._extract_params[param_name] = self._model_config.get(
                        param_name, default_value
                    )

            if "language_model" in extract_params:
                for param_name, default_value in (
                    extract_params["language_model"].items()
                ):
                    self._language_model_params[param_name] = (
                        self._model_config.get(param_name, default_value)
                    )

    def _call_llm(self, text: str, entities: List[str], **kwargs):
        """Call LangExtract LLM."""
@@ -130,7 +154,33 @@ class LangExtractRecognizer(LMRecognizer, ABC):
            recognizer_name=self.__class__.__name__
        )

    @abstractmethod
    def _call_langextract(self, **kwargs):
        """Call provider-specific LangExtract implementation."""
        """Call LangExtract with configured parameters."""
        try:
            extract_params = {
                "text_or_documents": kwargs.pop("text"),
                "prompt_description": kwargs.pop("prompt"),
                "examples": kwargs.pop("examples"),
            }

            extract_params.update(self._get_provider_params())
            extract_params.update(self._extract_params)
            if self._language_model_params:
                extract_params["language_model_params"] = self._language_model_params
            extract_params.update(kwargs)

            return lx.extract(**extract_params)
        except Exception:
            logger.exception(
                "LangExtract extraction failed (model '%s')",
                self.model_id
            )
            raise

    @abstractmethod
    def _get_provider_params(self) -> Dict[str, Any]:
        """Return provider-specific params.

        Examples: model_id, model_url, azure_endpoint, etc.
        """
        ...
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py
@@ -1,13 +1,13 @@
 import logging
 from pathlib import Path
 from typing import Optional
 from typing import Any, Dict, Optional

 from presidio_analyzer.llm_utils import lx
 from presidio_analyzer.predefined_recognizers.third_party.\
    langextract_recognizer import LangExtractRecognizer

 logger = logging.getLogger("presidio-analyzer")


 class OllamaLangExtractRecognizer(LangExtractRecognizer):
    """LangExtract recognizer using Ollama backend."""

@@ -19,20 +19,9 @@ class OllamaLangExtractRecognizer(LangExtractRecognizer):
        self,
        config_path: Optional[str] = None,
        supported_language: str = "en",
        context: Optional[list] = None
        context: Optional[list] = None,
    ):
        """Initialize Ollama LangExtract recognizer.

        Note: Ollama server availability and model availability are not validated
        during initialization. Any connectivity or model issues will be reported
        when analyze() is first called.

        :param config_path: Path to configuration file (optional).
        :param supported_language: Language this recognizer supports
            (optional, default: "en").
        :param context: List of context words
            (optional, currently not used by LLM recognizers).
        """
        """Initialize Ollama LangExtract recognizer."""
        actual_config_path = (
            config_path if config_path else str(self.DEFAULT_CONFIG_PATH)
        )
@@ -40,7 +29,18 @@ class OllamaLangExtractRecognizer(LangExtractRecognizer):
        super().__init__(
            config_path=actual_config_path,
            name="Ollama LangExtract PII",
            supported_language=supported_language
            supported_language=supported_language,
            extract_params={
                "extract": {
                    "use_schema_constraints": False,
                    "fence_output": False,
                    "max_char_buffer": 400,
                },
                "language_model": {
                    "timeout": 240,
                    "num_ctx": 8192,
                }
            }
        )

        model_config = self.config.get("model", {})
@@ -48,23 +48,9 @@ class OllamaLangExtractRecognizer(LangExtractRecognizer):
        if not self.model_url:
            raise ValueError("Ollama model configuration must contain 'model_url'")

    def _call_langextract(self, **kwargs):
        """Call Ollama through LangExtract."""
        try:
            extract_params = {
                "text_or_documents": kwargs.pop("text"),
                "prompt_description": kwargs.pop("prompt"),
                "examples": kwargs.pop("examples"),
                "model_id": self.model_id,
                "model_url": self.model_url,
            }

            extract_params.update(kwargs)

            return lx.extract(**extract_params)
        except Exception:
            logger.exception(
                "LangExtract extraction failed (Ollama at %s, model '%s')",
                self.model_url, self.model_id
            )
            raise
    def _get_provider_params(self) -> Dict[str, Any]:
        """Return Ollama-specific params."""
        return {
            "model_id": self.model_id,
            "model_url": self.model_url,
        }
--- a/presidio-analyzer/pyproject.toml
+++ b/presidio-analyzer/pyproject.toml
@@ -29,10 +29,12 @@ dependencies = [
    "pyyaml",
    "phonenumbers (>=8.12,<10.0.0)",
    "pydantic (>=2.0.0,<3.0.0)",
    "cupy-cuda12x>=13.4.1",
 ]

 [project.optional-dependencies]
 gpu = [
    "cupy-cuda12x>=13.4.1",
 ]
 server = [
    "flask (>=1.1)",
    "gunicorn; platform_system != 'Windows'",
--- a/presidio-analyzer/tests/test_azure_openai_langextract_recognizer.py
+++ b/presidio-analyzer/tests/test_azure_openai_langextract_recognizer.py
@@ -370,3 +370,133 @@ class TestAzureOpenAIProvider:
                        azure_endpoint="https://test.openai.azure.com/"
                        # No API key, so should try managed identity
                    )


 class TestAzureOpenAILangExtractRecognizerParameterConfiguration:
    """Test parameter configuration with defaults and YAML overrides."""

    def test_when_no_config_params_then_uses_defaults(self, mock_langextract, tmp_path):
        """Test that default extract params are used when not in config."""
        import yaml
        
        config = {
            "lm_recognizer": {
                "supported_entities": ["PERSON"],
            },
            "langextract": {
                "prompt_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_prompt.j2",
                "examples_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_examples.yaml",
                "entity_mappings": {"person": "PERSON"},
                "model": {
                    "model_id": "gpt-4o",
                }
            }
        }
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        recognizer = AzureOpenAILangExtractRecognizer(
            config_path=str(config_file),
            azure_endpoint="https://test.openai.azure.com/",
            api_key="test-key"
        )
        
        # Verify Azure defaults are set (different from Ollama)
        assert recognizer._extract_params["fence_output"] is True
        assert recognizer._extract_params["use_schema_constraints"] is False

    def test_when_config_has_params_then_overrides_defaults(self, mock_langextract, tmp_path):
        """Test that config values override defaults."""
        import yaml
        
        config = {
            "lm_recognizer": {
                "supported_entities": ["PERSON"],
            },
            "langextract": {
                "prompt_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_prompt.j2",
                "examples_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_examples.yaml",
                "entity_mappings": {"person": "PERSON"},
                "model": {
                    "model_id": "gpt-4o",
                    "fence_output": False,  # Override default
                    "use_schema_constraints": True,  # Override default
                }
            }
        }
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        recognizer = AzureOpenAILangExtractRecognizer(
            config_path=str(config_file),
            azure_endpoint="https://test.openai.azure.com/",
            api_key="test-key"
        )
        
        # Verify config values override defaults
        assert recognizer._extract_params["fence_output"] is False
        assert recognizer._extract_params["use_schema_constraints"] is True

    def test_when_analyze_called_then_params_passed_to_langextract(self, tmp_path):
        """Test that configured params are passed to langextract.extract()."""
        import yaml
        
        config = {
            "lm_recognizer": {
                "supported_entities": ["PERSON"],
            },
            "langextract": {
                "prompt_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_prompt.j2",
                "examples_file": "presidio-analyzer/presidio_analyzer/conf/langextract_prompts/default_pii_phi_examples.yaml",
                "entity_mappings": {"person": "PERSON"},
                "model": {
                    "model_id": "gpt-4o",
                    "fence_output": False,
                }
            }
        }
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        recognizer = AzureOpenAILangExtractRecognizer(
            config_path=str(config_file),
            azure_endpoint="https://test.openai.azure.com/",
            api_key="test-key"
        )

        text = "My name is John Doe"
        
        mock_extraction = MagicMock()
        mock_extraction.extraction_class = "person"
        mock_extraction.extraction_text = "John Doe"
        mock_extraction.char_interval = MagicMock(start_pos=11, end_pos=19)
        mock_extraction.alignment_status = "MATCH_EXACT"
        mock_extraction.attributes = {}

        mock_result = MagicMock()
        mock_result.extractions = [mock_extraction]

        with patch('langextract.extract', return_value=mock_result) as mock_extract:
            recognizer.analyze(text)
            
            # Verify extract was called
            assert mock_extract.called
            call_kwargs = mock_extract.call_args[1]
            
            # Verify extract params were passed
            assert call_kwargs["fence_output"] is False
            assert call_kwargs["use_schema_constraints"] is False
            
            # Verify Azure-specific provider params
            assert call_kwargs["model_id"] == "azure:gpt-4o"
            assert "language_model_params" in call_kwargs
            assert call_kwargs["language_model_params"]["azure_endpoint"] == "https://test.openai.azure.com/"
            assert call_kwargs["language_model_params"]["azure_deployment"] == "gpt-4o"
            assert call_kwargs["language_model_params"]["api_key"] == "test-key"

--- a/presidio-analyzer/tests/test_device_detector.py
+++ b/presidio-analyzer/tests/test_device_detector.py
@@ -0,0 +1,187 @@
 """Unit tests for DeviceDetector."""

 from unittest.mock import MagicMock, patch

 import pytest

 from presidio_analyzer.nlp_engine.device_detector import DeviceDetector, device_detector


 class TestDeviceDetectorErrorPaths:
    """Test suite for DeviceDetector error handling."""

    def test_when_torch_import_fails_then_cpu_device(self):
        """Test that CPU is used when PyTorch import fails."""
        with patch("builtins.__import__", side_effect=ImportError("No module named 'torch'")):
            detector = DeviceDetector()
            
            assert detector.get_device() == "cpu"
            assert detector.get_gpu_device_name() is None

    def test_when_cuda_not_available_then_cpu_device(self):
        """Test that CPU is used when CUDA is not available."""
        mock_torch = MagicMock()
        mock_torch.cuda.is_available.return_value = False
        
        def mock_import(name, *args):
            if name == "torch":
                return mock_torch
            return __builtins__.__import__(name, *args)
        
        with patch("builtins.__import__", side_effect=mock_import):
            detector = DeviceDetector()
            
            assert detector.get_device() == "cpu"
            assert detector.get_gpu_device_name() is None

    def test_when_cuda_initialization_fails_then_fallback_to_cpu(self):
        """Test that CPU fallback occurs when CUDA initialization fails."""
        mock_torch = MagicMock()
        mock_torch.cuda.is_available.return_value = True
        mock_torch.tensor.side_effect = RuntimeError("CUDA initialization error")
        
        def mock_import(name, *args):
            if name == "torch":
                return mock_torch
            return __builtins__.__import__(name, *args)
        
        with patch("builtins.__import__", side_effect=mock_import):
            detector = DeviceDetector()
            
            assert detector.get_device() == "cpu"
            assert detector.get_gpu_device_name() is None

    def test_when_cuda_get_device_name_fails_then_fallback_to_cpu(self):
        """Test fallback when get_device_name fails."""
        mock_torch = MagicMock()
        mock_torch.cuda.is_available.return_value = True
        mock_torch.tensor.return_value = MagicMock(__str__=lambda x: "tensor")
        mock_torch.cuda.get_device_name.side_effect = RuntimeError("Device name error")
        
        def mock_import(name, *args):
            if name == "torch":
                return mock_torch
            return __builtins__.__import__(name, *args)
        
        with patch("builtins.__import__", side_effect=mock_import):
            detector = DeviceDetector()
            
            assert detector.get_device() == "cpu"

    def test_when_cuda_available_then_cuda_device(self):
        """Test successful CUDA detection."""
        mock_torch = MagicMock()
        mock_torch.cuda.is_available.return_value = True
        mock_torch.tensor.return_value = MagicMock(__str__=lambda x: "tensor")
        mock_torch.cuda.get_device_name.return_value = "Test GPU"
        mock_torch.cuda.get_device_capability.return_value = (8, 0)
        
        def mock_import(name, *args):
            if name == "torch":
                return mock_torch
            return __builtins__.__import__(name, *args)
        
        with patch("builtins.__import__", side_effect=mock_import):
            detector = DeviceDetector()
            
            assert detector.get_device() == "cuda"
            assert detector.get_gpu_device_name() == "Test GPU"


 class TestDeviceDetector:
    """Test suite for DeviceDetector functionality."""

    def test_when_get_device_then_returns_string(self):
        """Test that get_device() returns a valid device string."""
        detector = DeviceDetector()
        device = detector.get_device()
        assert isinstance(device, str)
        assert device in ["cpu", "cuda"]

    def test_when_get_gpu_device_name_then_returns_optional_string(self):
        """Test that get_gpu_device_name() returns None or string."""
        detector = DeviceDetector()
        device_name = detector.get_gpu_device_name()
        assert device_name is None or isinstance(device_name, str)

    def test_when_multiple_instances_then_same_values(self):
        """Test that multiple DeviceDetector instances have consistent values."""
        detector1 = DeviceDetector()
        detector2 = DeviceDetector()

        
        # Both should return the same device
        assert detector1.get_device() == detector2.get_device()
        assert detector1.get_gpu_device_name() == detector2.get_gpu_device_name()


 class TestDeviceDetectorIntegration:
    """Integration tests for DeviceDetector usage in NLP engines."""

    def test_when_spacy_engine_loads_then_uses_device_detector(self):
        """Test that SpacyNlpEngine uses device_detector."""
        from presidio_analyzer.nlp_engine import SpacyNlpEngine
        
        engine = SpacyNlpEngine(
            models=[{"lang_code": "en", "model_name": "en_core_web_sm"}]
        )
        
        # Verify device_detector is accessible
        assert device_detector.get_device() in ["cpu", "cuda"]

    def test_when_stanza_engine_initializes_then_sets_use_gpu(self):
        """Test that StanzaNlpEngine correctly sets use_gpu from device_detector."""
        from presidio_analyzer.nlp_engine import StanzaNlpEngine
        
        engine = StanzaNlpEngine(
            models=[{"lang_code": "en", "model_name": "en"}]
        )
        
        # use_gpu should match device_detector
        expected_use_gpu = device_detector.get_device() == "cuda"
        assert engine.use_gpu == expected_use_gpu

    def test_when_gliner_recognizer_initializes_then_uses_correct_device(self):
        """Test that GLiNERRecognizer uses device from device_detector."""
        pytest.importorskip("gliner")
        from presidio_analyzer.predefined_recognizers import GLiNERRecognizer
        
        recognizer = GLiNERRecognizer()
        
        # map_location should match device_detector.get_device()
        assert recognizer.map_location == device_detector.get_device()

    def test_when_stanza_engine_use_gpu_matches_device_detector(self):
        """Test that StanzaNlpEngine.use_gpu matches device_detector."""
        from presidio_analyzer.nlp_engine import StanzaNlpEngine
        
        engine = StanzaNlpEngine(
            models=[{"lang_code": "en", "model_name": "en"}]
        )
        
        expected_use_gpu = device_detector.get_device() == "cuda"
        assert engine.use_gpu == expected_use_gpu


 class TestDeviceDetectorBehavior:
    """Test suite for DeviceDetector runtime behavior."""

    def test_when_creating_new_instance_then_device_consistent(self):
        """Test that new instances have consistent device detection."""
        detector1 = DeviceDetector()
        detector2 = DeviceDetector()
        
        # Both should detect the same device
        assert detector1.get_device() == detector2.get_device()

    def test_when_device_is_cuda_then_has_capabilities(self):
        """Test that CUDA device has expected capabilities."""
        if device_detector.get_device() == "cuda":
            # Should have a device name
            assert device_detector.get_gpu_device_name() is not None
            assert len(device_detector.get_gpu_device_name()) > 0
        
    def test_when_device_is_cpu_then_no_gpu_name(self):
        """Test that CPU device has no GPU name."""
        if device_detector.get_device() == "cpu":
            assert device_detector.get_gpu_device_name() is None
--- a/presidio-analyzer/tests/test_gliner_recognizer.py
+++ b/presidio-analyzer/tests/test_gliner_recognizer.py
@@ -16,7 +16,6 @@ def mock_gliner():

    # Mock the GLiNER class and its methods
    mock_gliner_instance = MagicMock()
    # Make .to() return the same mock instance (for device placement)
    mock_gliner_instance.to.return_value = mock_gliner_instance
    # Mock the from_pretrained method to return the mock instance
    with patch("gliner.GLiNER.from_pretrained", return_value=mock_gliner_instance):
--- a/presidio-analyzer/tests/test_ollama_recognizer.py
+++ b/presidio-analyzer/tests/test_ollama_recognizer.py
@@ -430,3 +430,136 @@ class TestOllamaLangExtractRecognizerAnalyze:

        # Unknown entity type should be skipped when consolidation is disabled
        assert len(results) == 0


 class TestOllamaLangExtractRecognizerParameterConfiguration:
    """Test parameter configuration with defaults and YAML overrides."""

    def test_when_no_config_params_then_uses_defaults(self, tmp_path):
        """Test that default extract params are used when not in config."""
        import yaml
        
        config = create_test_config()
        # No extract params in config - should use defaults
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        with patch('presidio_analyzer.llm_utils.langextract_helper.lx',
                   return_value=Mock()):
            from presidio_analyzer.predefined_recognizers.third_party.ollama_langextract_recognizer import OllamaLangExtractRecognizer
            recognizer = OllamaLangExtractRecognizer(config_path=str(config_file))
            
            # Verify defaults are set
            assert recognizer._extract_params["max_char_buffer"] == 400
            assert recognizer._extract_params["use_schema_constraints"] is False
            assert recognizer._extract_params["fence_output"] is False
            assert recognizer._language_model_params["timeout"] == 240
            assert recognizer._language_model_params["num_ctx"] == 8192

    def test_when_config_has_params_then_overrides_defaults(self, tmp_path):
        """Test that config values override defaults."""
        import yaml
        
        config = create_test_config()
        # Add custom values to override defaults
        config["langextract"]["model"]["max_char_buffer"] = 1000
        config["langextract"]["model"]["use_schema_constraints"] = True
        config["langextract"]["model"]["fence_output"] = True
        config["langextract"]["model"]["timeout"] = 120
        config["langextract"]["model"]["num_ctx"] = 4096
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        with patch('presidio_analyzer.llm_utils.langextract_helper.lx',
                   return_value=Mock()):
            from presidio_analyzer.predefined_recognizers.third_party.ollama_langextract_recognizer import OllamaLangExtractRecognizer
            recognizer = OllamaLangExtractRecognizer(config_path=str(config_file))
            
            # Verify config values override defaults
            assert recognizer._extract_params["max_char_buffer"] == 1000
            assert recognizer._extract_params["use_schema_constraints"] is True
            assert recognizer._extract_params["fence_output"] is True
            assert recognizer._language_model_params["timeout"] == 120
            assert recognizer._language_model_params["num_ctx"] == 4096

    def test_when_partial_config_params_then_uses_defaults_for_missing(self, tmp_path):
        """Test that only some params can be overridden."""
        import yaml
        
        config = create_test_config()
        # Override only some params
        config["langextract"]["model"]["max_char_buffer"] = 500
        config["langextract"]["model"]["timeout"] = 60
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        with patch('presidio_analyzer.llm_utils.langextract_helper.lx',
                   return_value=Mock()):
            from presidio_analyzer.predefined_recognizers.third_party.ollama_langextract_recognizer import OllamaLangExtractRecognizer
            recognizer = OllamaLangExtractRecognizer(config_path=str(config_file))
            
            # Verify overridden values
            assert recognizer._extract_params["max_char_buffer"] == 500
            assert recognizer._language_model_params["timeout"] == 60
            
            # Verify defaults for non-overridden params
            assert recognizer._extract_params["use_schema_constraints"] is False
            assert recognizer._extract_params["fence_output"] is False
            assert recognizer._language_model_params["num_ctx"] == 8192

    def test_when_analyze_called_then_params_passed_to_langextract(self, tmp_path):
        """Test that configured params are passed to langextract.extract()."""
        import yaml
        
        config = create_test_config()
        config["langextract"]["model"]["max_char_buffer"] = 1500
        config["langextract"]["model"]["timeout"] = 180
        
        config_file = tmp_path / "test_config.yaml"
        with open(config_file, 'w') as f:
            yaml.dump(config, f)

        with patch('presidio_analyzer.llm_utils.langextract_helper.lx',
                   return_value=Mock()):
            from presidio_analyzer.predefined_recognizers.third_party.ollama_langextract_recognizer import OllamaLangExtractRecognizer
            recognizer = OllamaLangExtractRecognizer(config_path=str(config_file))

        text = "My name is John Doe"
        
        mock_extraction = Mock()
        mock_extraction.extraction_class = "person"
        mock_extraction.extraction_text = "John Doe"
        mock_extraction.char_interval = Mock(start_pos=11, end_pos=19)
        mock_extraction.alignment_status = "MATCH_EXACT"
        mock_extraction.attributes = {}

        mock_result = Mock()
        mock_result.extractions = [mock_extraction]

        with patch('langextract.extract', return_value=mock_result) as mock_extract:
            recognizer.analyze(text)
            
            # Verify extract was called
            assert mock_extract.called
            call_kwargs = mock_extract.call_args[1]
            
            # Verify extract params were passed
            assert call_kwargs["max_char_buffer"] == 1500
            assert call_kwargs["use_schema_constraints"] is False
            assert call_kwargs["fence_output"] is False
            
            # Verify language model params were passed
            assert "language_model_params" in call_kwargs
            assert call_kwargs["language_model_params"]["timeout"] == 180
            assert call_kwargs["language_model_params"]["num_ctx"] == 8192
            
            # Verify provider params
            assert call_kwargs["model_id"] == "qwen2.5:1.5b"
            assert call_kwargs["model_url"] == "http://localhost:11434"

--- a/presidio-analyzer/tests/test_spacy_nlp_engine.py
+++ b/presidio-analyzer/tests/test_spacy_nlp_engine.py
@@ -1,5 +1,6 @@
 import json
 from typing import Iterator
 from unittest.mock import MagicMock, patch

 import pytest

@@ -102,4 +103,51 @@ def test_batch_processing_with_as_tuples_returns_context(spacy_nlp_engine, texts
    else:
        for text, nlp_artifacts in nlp_artifacts_batch:
            assert text == "simple text"
            assert len(nlp_artifacts.tokens) == 2
            assert len(nlp_artifacts.tokens) == 2


 def test_when_gpu_available_then_spacy_gpu_configured():
    """Test that spaCy GPU is configured when GPU is detected."""
    with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.device_detector") as mock_detector:
        mock_detector.get_device.return_value = "cuda"
        
        with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.spacy") as mock_spacy:
            mock_spacy.load.return_value = MagicMock()
            mock_spacy.util.is_package.return_value = True
            
            engine = SpacyNlpEngine(models=[{"lang_code": "en", "model_name": "en_core_web_sm"}])
            engine.load()
            
            mock_spacy.require_gpu.assert_called_once()


 def test_when_gpu_configuration_fails_then_warning_logged():
    """Test that warning is logged when GPU configuration fails."""
    with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.device_detector") as mock_detector:
        mock_detector.get_device.return_value = "cuda"
        
        with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.spacy") as mock_spacy:
            mock_spacy.load.return_value = MagicMock()
            mock_spacy.util.is_package.return_value = True
            mock_spacy.require_gpu.side_effect = Exception("GPU error")
            
            with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.logger") as mock_logger:
                engine = SpacyNlpEngine(models=[{"lang_code": "en", "model_name": "en_core_web_sm"}])
                engine.load()
                
                assert mock_logger.warning.called


 def test_when_cpu_device_then_gpu_not_configured():
    """Test that GPU is not configured when CPU device is detected."""
    with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.device_detector") as mock_detector:
        mock_detector.get_device.return_value = "cpu"
        
        with patch("presidio_analyzer.nlp_engine.spacy_nlp_engine.spacy") as mock_spacy:
            mock_spacy.load.return_value = MagicMock()
            mock_spacy.util.is_package.return_value = True
            
            engine = SpacyNlpEngine(models=[{"lang_code": "en", "model_name": "en_core_web_sm"}])
            engine.load()
            
            mock_spacy.require_gpu.assert_not_called()
--- a/presidio-analyzer/tests/test_stanza_batch_processing.py
+++ b/presidio-analyzer/tests/test_stanza_batch_processing.py
@@ -0,0 +1,339 @@
 """Unit tests for StanzaNlpEngine.process_batch() and bulk_process integration."""

 from typing import Iterator
 from unittest.mock import Mock, MagicMock, patch

 import pytest

 from presidio_analyzer.nlp_engine import NlpArtifacts


@pytest.fixture(scope="module")
 def stanza_nlp_engine(nlp_engines):
    """Get the StanzaNlpEngine from the available engines."""
    nlp_engine = nlp_engines.get("stanza_en", None)
    if nlp_engine:
        nlp_engine.load()
    return nlp_engine


@pytest.mark.skip_engine("stanza_en")
 class TestStanzaBatchProcessing:
    """Test suite for Stanza batch processing functionality."""

    def test_when_process_batch_with_strings_then_returns_iterator(
        self, stanza_nlp_engine
    ):
        """Test basic batch processing with simple strings."""
        texts = ["Hello world", "This is a test"]
        
        result = stanza_nlp_engine.process_batch(texts, language="en", batch_size=2)
        
        assert isinstance(result, Iterator)
        result_list = list(result)
        assert len(result_list) == 2
        
        for text, nlp_artifacts in result_list:
            assert isinstance(text, str)
            assert isinstance(nlp_artifacts, NlpArtifacts)
            assert len(nlp_artifacts.tokens) > 0

    def test_when_process_batch_with_tuples_then_returns_context(
        self, stanza_nlp_engine
    ):
        """Test batch processing with tuples including context."""
        texts = [
            ("Hello world", {"id": 1}),
            ("This is a test", {"id": 2})
        ]
        
        result = stanza_nlp_engine.process_batch(
            texts, language="en", batch_size=2, as_tuples=True
        )
        
        result_list = list(result)
        assert len(result_list) == 2
        
        text1, nlp_artifacts1, context1 = result_list[0]
        assert text1 == "Hello world"
        assert isinstance(nlp_artifacts1, NlpArtifacts)
        assert context1 == {"id": 1}
        
        text2, nlp_artifacts2, context2 = result_list[1]
        assert text2 == "This is a test"
        assert isinstance(nlp_artifacts2, NlpArtifacts)
        assert context2 == {"id": 2}

    def test_when_process_batch_with_entities_then_extracts_correctly(
        self, stanza_nlp_engine
    ):
        """Test that batch processing correctly extracts entities."""
        texts = [
            "Barack Obama was born in Hawaii.",
            "John Smith lives in New York."
        ]
        
        result = stanza_nlp_engine.process_batch(texts, language="en", batch_size=2)
        result_list = list(result)
        
        # First text should have entities (Barack Obama, Hawaii)
        text1, nlp_artifacts1 = result_list[0]
        assert len(nlp_artifacts1.entities) >= 2
        
        # Second text should have entities (John Smith, New York)
        text2, nlp_artifacts2 = result_list[1]
        assert len(nlp_artifacts2.entities) >= 2

    def test_when_process_batch_with_different_batch_sizes_then_works(
        self, stanza_nlp_engine
    ):
        """Test batch processing with various batch sizes."""
        texts = ["Text one", "Text two", "Text three", "Text four", "Text five"]
        
        for batch_size in [1, 2, 3, 10]:
            result = stanza_nlp_engine.process_batch(
                texts, language="en", batch_size=batch_size
            )
            result_list = list(result)
            
            assert len(result_list) == 5
            for text, nlp_artifacts in result_list:
                assert isinstance(nlp_artifacts, NlpArtifacts)

    def test_when_process_batch_with_empty_list_then_returns_empty(
        self, stanza_nlp_engine
    ):
        """Test batch processing with empty input."""
        texts = []
        
        result = stanza_nlp_engine.process_batch(texts, language="en")
        result_list = list(result)
        
        assert len(result_list) == 0

    def test_when_process_batch_not_loaded_then_raises_error(self):
        """Test that processing without loading raises an error."""
        from presidio_analyzer.nlp_engine import StanzaNlpEngine
        
        engine = StanzaNlpEngine(
            models=[{"lang_code": "en", "model_name": "en"}]
        )
        # Don't call load()
        
        with pytest.raises(ValueError, match="NLP engine is not loaded"):
            list(engine.process_batch(["test"], language="en"))

    def test_when_process_batch_with_whitespace_then_handles_correctly(
        self, stanza_nlp_engine
    ):
        """Test batch processing with texts containing whitespace."""
        texts = [
            " Leading whitespace",
            "Trailing whitespace ",
            "  Multiple   spaces  "
        ]
        
        result = stanza_nlp_engine.process_batch(texts, language="en", batch_size=3)
        result_list = list(result)
        
        assert len(result_list) == 3
        for text, nlp_artifacts in result_list:
            assert isinstance(nlp_artifacts, NlpArtifacts)
            # Should have tokens despite whitespace
            assert len(nlp_artifacts.tokens) > 0

    def test_when_process_batch_preserves_text_order(self, stanza_nlp_engine):
        """Test that batch processing preserves input order."""
        texts = [f"Text number {i}" for i in range(10)]
        
        result = stanza_nlp_engine.process_batch(texts, language="en", batch_size=3)
        result_list = list(result)
        
        for i, (text, nlp_artifacts) in enumerate(result_list):
            assert text == f"Text number {i}"

    def test_when_process_batch_with_special_chars_then_works(
        self, stanza_nlp_engine
    ):
        """Test batch processing with special characters."""
        texts = [
            "Email: test@example.com",
            "Phone: +1-555-1234",
            "URL: https://example.com"
        ]
        
        result = stanza_nlp_engine.process_batch(texts, language="en", batch_size=3)
        result_list = list(result)
        
        assert len(result_list) == 3
        for text, nlp_artifacts in result_list:
            assert isinstance(nlp_artifacts, NlpArtifacts)


@pytest.mark.skip_engine("stanza_en")
 class TestStanzaTokenizerConvertDoc:
    """Test suite for StanzaTokenizer._convert_doc() method."""

    def test_when_convert_doc_called_then_returns_spacy_doc(
        self, stanza_nlp_engine
    ):
        """Test that _convert_doc() correctly converts Stanza docs to spaCy docs."""
        import stanza
        
        # Get the tokenizer
        stanza_tokenizer = stanza_nlp_engine.nlp["en"].tokenizer
        stanza_pipeline = stanza_tokenizer.snlp
        
        # Process a text through Stanza
        text = "Barack Obama was born in Hawaii."
        stanza_doc = stanza.Document([], text=text)
        processed_doc = stanza_pipeline(stanza_doc)
        
        # Convert to spaCy doc
        spacy_doc = stanza_tokenizer._convert_doc(processed_doc)
        
        # Verify the conversion
        assert spacy_doc.text == text
        assert len(spacy_doc) > 0  # Has tokens
        # Note: Sentence boundaries require the full pipeline
        assert any(token.is_sent_start for token in spacy_doc)  # Has sentence starts
        assert len(spacy_doc.ents) > 0  # Has entities

    def test_when_convert_doc_with_empty_text_then_returns_empty_doc(
        self, stanza_nlp_engine
    ):
        """Test _convert_doc() with empty text."""
        import stanza
        from spacy.tokens import Doc
        
        stanza_tokenizer = stanza_nlp_engine.nlp["en"].tokenizer
        
        # Create empty Stanza doc
        empty_doc = stanza.Document([], text="")
        
        # Convert
        spacy_doc = stanza_tokenizer._convert_doc(empty_doc)
        
        assert isinstance(spacy_doc, Doc)
        assert len(spacy_doc) == 0

    def test_when_convert_doc_with_whitespace_only_then_handles_correctly(
        self, stanza_nlp_engine
    ):
        """Test _convert_doc() with whitespace-only text."""
        import stanza
        from spacy.tokens import Doc
        
        stanza_tokenizer = stanza_nlp_engine.nlp["en"].tokenizer
        
        # Create whitespace-only Stanza doc
        whitespace_doc = stanza.Document([], text="   ")
        
        # Convert
        spacy_doc = stanza_tokenizer._convert_doc(whitespace_doc)
        
        assert isinstance(spacy_doc, Doc)
        # Should handle whitespace gracefully

    def test_when_convert_doc_preserves_linguistic_features(
        self, stanza_nlp_engine
    ):
        """Test that _convert_doc() preserves POS tags, lemmas, and dependencies."""
        import stanza
        
        stanza_tokenizer = stanza_nlp_engine.nlp["en"].tokenizer
        stanza_pipeline = stanza_tokenizer.snlp
        
        text = "The quick brown fox jumps."
        stanza_doc = stanza.Document([], text=text)
        processed_doc = stanza_pipeline(stanza_doc)
        
        spacy_doc = stanza_tokenizer._convert_doc(processed_doc)
        
        # Verify linguistic features are preserved
        for token in spacy_doc:
            assert token.pos_ is not None  # POS tags
            assert token.lemma_ is not None  # Lemmas
            if token.dep_:
                assert token.head is not None  # Dependencies


@pytest.mark.skip_engine("stanza_en")
 class TestStanzaBulkProcessIntegration:
    """Integration tests for Stanza's bulk_process usage."""

    @patch("stanza.Pipeline.bulk_process")
    def test_when_process_batch_then_calls_bulk_process(
        self, mock_bulk_process, stanza_nlp_engine
    ):
        """Test that process_batch() calls Stanza's bulk_process method."""
        import stanza
        
        # Setup mock to return processed docs
        mock_bulk_process.return_value = [
            Mock(text="Text 1", sentences=[], entities=[]),
            Mock(text="Text 2", sentences=[], entities=[])
        ]
        
        # Create mock for the conversion
        stanza_tokenizer = stanza_nlp_engine.nlp["en"].tokenizer
        original_convert = stanza_tokenizer._convert_doc
        
        def mock_convert(doc):
            # Return a minimal spaCy doc
            from spacy.tokens import Doc
            return Doc(stanza_tokenizer.vocab, words=["test"])
        
        stanza_tokenizer._convert_doc = mock_convert
        
        try:
            texts = ["Text 1", "Text 2"]
            result = list(stanza_nlp_engine.process_batch(
                texts, language="en", batch_size=2
            ))
            
            # Verify bulk_process was called
            assert mock_bulk_process.called
            
            # Verify the input to bulk_process
            call_args = mock_bulk_process.call_args[0][0]
            assert len(call_args) == 2
            assert all(isinstance(doc, stanza.Document) for doc in call_args)
        finally:
            # Restore original method
            stanza_tokenizer._convert_doc = original_convert

    def test_when_process_batch_with_large_batch_then_handles_correctly(
        self, stanza_nlp_engine
    ):
        """Test batch processing with a large number of texts."""
        num_texts = 100
        texts = [f"This is test text number {i}." for i in range(num_texts)]
        
        result = stanza_nlp_engine.process_batch(
            texts, language="en", batch_size=16
        )
        result_list = list(result)
        
        assert len(result_list) == num_texts
        
        # Verify all texts were processed
        for i, (text, nlp_artifacts) in enumerate(result_list):
            assert f"number {i}" in text
            assert isinstance(nlp_artifacts, NlpArtifacts)

    def test_when_process_batch_batching_matches_batch_size(
        self, stanza_nlp_engine
    ):
        """Test that internal batching respects batch_size parameter."""
        texts = [f"Text {i}" for i in range(10)]
        
        # Process with different batch sizes
        for batch_size in [1, 3, 5, 10]:
            result = stanza_nlp_engine.process_batch(
                texts, language="en", batch_size=batch_size
            )
            result_list = list(result)
            
            # Should process all texts regardless of batch size
            assert len(result_list) == 10
Author	SHA1	Message	Date
Ron Shakutai	8c453d4948	remove benchmark script.	9 hours ago
Ron Shakutai	c46929fd05	Merge branch 'main' into ronshakutai/gpu-optimizations	9 hours ago
dependabot[bot]	e0109d405c	Bump actions/cache from 4 to 5 (#1817 ) Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	9 hours ago
Ron Shakutai	8d7804dca4	Add comprehensive error path mocking for device_detector and spacy GPU config tests	9 hours ago
Ron Shakutai	754349be5e	test: enhance GPU detection tests for DeviceDetector and SpacyNlpEngine	10 hours ago
Ron Shakutai	0509b6a824	docs added around gpu	10 hours ago
Ron Shakutai	03742ea2fd	fix: reorder device_detector import for consistency	10 hours ago
Ron Shakutai	62a1185844	Merge branch 'main' of https://github.com/microsoft/presidio into ronshakutai/gpu-optimizations	10 hours ago
Ron Shakutai	ffe66e0ded	refactor: remove unused GPU result files and update device detector usage	10 hours ago
Hoa Ngo	586eaa8083	fix(analyzer): Pass map_location to GLiNER.from_pretrained for GPU support (#1813 ) The map_location parameter was accepted in __init__ and stored in self.map_location, but never passed to GLiNER.from_pretrained() in the load() method. This caused the model to always load on CPU regardless of configuration. GLiNER library defaults to map_location="cpu" if not specified, so GPU inference was not possible even when configured. Co-authored-by: Ngô Quang Hòa <hoanq3@vng.com.vn> Co-authored-by: Omri Mendels <omri374@users.noreply.github.com>	1 day ago
Ron Shakutai	2ac7a320ce	Change parameters in extraction in langextract (#1811 ) * Refactor LangExtract recognizers to support customizable extraction parameters and improve configuration clarity for Azure OpenAI and Ollama. * Add tests for parameter configuration and overrides in Ollama and Azure OpenAI recognizers * Refactor Azure OpenAI and Ollama LangExtract recognizers to improve parameter handling and documentation clarity * Update max_char_buffer parameter in OllamaLangExtractRecognizer to improve performance * Update max_char_buffer parameter in Ollama configuration to enhance performance * Update presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/langextract_recognizer.py Co-authored-by: Dor Lugasi-Gal <dorlugasigal@gmail.com> * lx to top level --------- Co-authored-by: Omri Mendels <omri374@users.noreply.github.com> Co-authored-by: Dor Lugasi-Gal <dorlugasigal@gmail.com>	4 days ago