Custom Conversion

Docling for IBM watsonx allows you to customize the options in your conversion request.

API Endpoint Usage

Change Output Format

You can specify which output format you would like your conversion:

curl -X POST "${DOCLING_SERVICE_URL}/v1/convert/source/async" \
  -H "X-Api-Key: ${DOCLING_API_KEY}" \
  -H "Content-Type: application/json" \
  -d '{
    "sources": [
      {
        "kind": "http",
        "url": "https://arxiv.org/pdf/2501.17887"
      }
    ],
    "options": {
      "to_formats": ["html"]
    }
  }'

Read the API reference to see the full list of to_formats options.

Python SDK Usage

Change Output Format

You can specify which output format you would like your conversion:

from pathlib import Path
from docling.datamodel.service.options import ConvertDocumentsOptions
from docling.service_client import DoclingServiceClient
import os

SERVICE_URL = os.getenv("DOCLING_SERVICE_URL")
API_KEY = os.getenv("DOCLING_API_KEY")

with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
    options = ConvertDocumentsOptions(
        to_formats=["html"],
    )
    result = client.convert(
        source=Path("path/to/doc.pdf"),
        options=options
    )
    
    print(result.document.export_to_markdown())

Additional OCR Options

This example includes alternative configurations with OCR engines (EasyOCR, Tesseract, system OCR, no OCR). Uncomment the lines for the configurations you want to try.

Accelerators: tune AcceleratorOptions to select CPU/GPU or threads.
Exports: JSON, plain text, Markdown, and doctags are saved in scratch/.

import json
import logging
import time
import os 
from pathlib import Path

from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
    OcrMacOptions,
    PdfBackend,
    PdfPipelineOptions,
    TableStructureOptions,
    TesseractCliOcrOptions,
    TesseractOcrOptions,
)
from docling.datamodel.service.options import ConvertDocumentsOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.service_client import DoclingServiceClient

_log = logging.getLogger(__name__)


def main():
    logging.basicConfig(level=logging.INFO)

    def create_pipeline_options(
        do_ocr: bool, do_table_structure: bool, do_cell_matching: bool
    ) -> PdfPipelineOptions:
        pipeline_options = PdfPipelineOptions()
        pipeline_options.do_ocr = do_ocr
        pipeline_options.do_table_structure = do_table_structure
        pipeline_options.table_structure_options = TableStructureOptions(
            do_cell_matching=do_cell_matching
        )
        return pipeline_options

    ###########################################################################

    # The sections below demo combinations of PdfPipelineOptions and backends.
    # Tip: Uncomment exactly one section at a time to compare outputs.

    # PyPdfium without EasyOCR
    # --------------------
    # pipeline_options = create_pipeline_options(do_ocr=False, do_table_structure=True, do_cell_matching=False)
    # backend = PyPdfiumDocumentBackend

    # PyPdfium with EasyOCR
    # -----------------
    # pipeline_options = create_pipeline_options(do_ocr=True, do_table_structure=True, do_cell_matching=True)
    # backend = PyPdfiumDocumentBackend

    # Docling Parse without EasyOCR
    # -------------------------
    # pipeline_options = create_pipeline_options(do_ocr=False, do_table_structure=True, do_cell_matching=True)
    # backend = None

    # Docling Parse with EasyOCR (default)
    # -------------------------------
    # Enables OCR and table structure with EasyOCR, using automatic device
    # selection via AcceleratorOptions.
    pipeline_options = create_pipeline_options(
        do_ocr=True, do_table_structure=True, do_cell_matching=True
    )
    pipeline_options.accelerator_options = AcceleratorOptions(
        num_threads=4, device=AcceleratorDevice.AUTO
    )
    backend = None

    # Docling Parse with EasyOCR (CPU only)
    # -------------------------------------
    # pipeline_options = create_pipeline_options(do_ocr=True, do_table_structure=True, do_cell_matching=True)
    # pipeline_options.ocr_options.use_gpu = False
    # backend = None

    # Docling Parse with Tesseract
    # ----------------------------
    # pipeline_options = create_pipeline_options(do_ocr=True, do_table_structure=True, do_cell_matching=True)
    # pipeline_options.ocr_options = TesseractOcrOptions()
    # backend = None

    # Docling Parse with Tesseract CLI
    # --------------------------------
    # pipeline_options = create_pipeline_options(do_ocr=True, do_table_structure=True, do_cell_matching=True)
    # pipeline_options.ocr_options = TesseractCliOcrOptions()
    # backend = None

    # Docling Parse with ocrmac (macOS only)
    # --------------------------------------
    # pipeline_options = create_pipeline_options(do_ocr=True, do_table_structure=True, do_cell_matching=True)
    # pipeline_options.ocr_options = OcrMacOptions()
    # backend = None

    ###########################################################################
    input_doc_path = "path/to/doc.pdf"

    start_time = time.time()

    SERVE_URL = os.getenv("DOCLING_SERVICE_URL")
    API_KEY = os.getenv("DOCLING_API_KEY")

    table_cell_matching = getattr(pipeline_options.table_structure_options, 'do_cell_matching', True) if pipeline_options.table_structure_options else True
    pdf_backend = PdfBackend.PYPDFIUM2 if backend == PyPdfiumDocumentBackend else PdfBackend.DOCLING_PARSE
    options = ConvertDocumentsOptions(
        do_ocr=pipeline_options.do_ocr,
        do_table_structure=pipeline_options.do_table_structure,
        table_cell_matching=table_cell_matching,
        pdf_backend=pdf_backend,
        )
    with DoclingServiceClient(url=SERVE_URL, api_key=API_KEY) as client:
        conv_result = client.convert(Path(input_doc_path), options=options)

    _log.info(f"Document converted in {time.time() - start_time:.2f} seconds.")

    # Export results
    output_dir = Path("scratch")
    output_dir.mkdir(parents=True, exist_ok=True)
    doc_filename = Path(input_doc_path).stem

    # Export Docling document JSON format:
    with (output_dir / f"{doc_filename}.json").open("w", encoding="utf-8") as fp:
        fp.write(json.dumps(conv_result.document.export_to_dict()))

    # Export Text format (plain text via Markdown export):
    with (output_dir / f"{doc_filename}.txt").open("w", encoding="utf-8") as fp:
        fp.write(conv_result.document.export_to_markdown(strict_text=True))

    # Export Markdown format:
    with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp:
        fp.write(conv_result.document.export_to_markdown())

    # Export Document Tags format:
    with (output_dir / f"{doc_filename}.doctags").open("w", encoding="utf-8") as fp:
        fp.write(conv_result.document.export_to_doctags())


if __name__ == "__main__":
    main()

Java SDK Usage

Set conversion options on the request with ConvertDocumentOptions.builder():

import java.net.URI;

import ai.docling.serve.api.DoclingServeApi;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
import ai.docling.serve.api.convert.request.options.ImageRefMode;
import ai.docling.serve.api.convert.request.options.OutputFormat;
import ai.docling.serve.api.convert.request.source.HttpSource;
import ai.docling.serve.api.convert.response.InBodyConvertDocumentResponse;

public class Main {
    public static void main(String[] args) {
        String serviceUrl = System.getenv("DOCLING_SERVICE_URL");
        String apiKey = System.getenv("DOCLING_API_KEY");

        DoclingServeApi client = DoclingServeApi.builder()
            .baseUrl(serviceUrl)
            .apiKey(apiKey)
            .build();

        ConvertDocumentRequest request = ConvertDocumentRequest.builder()
            .source(HttpSource.builder()
                .url(URI.create("https://arxiv.org/pdf/2501.17887"))
                .build())
            .options(ConvertDocumentOptions.builder()
                .toFormat(OutputFormat.HTML)
                .imageExportMode(ImageRefMode.EMBEDDED)
                .doOcr(true)
                .build())
            .build();

        InBodyConvertDocumentResponse response =
            (InBodyConvertDocumentResponse) client.convertSource(request);
        System.out.println(response.getDocument().getHtmlContent());
    }
}

Custom Conversion

API Endpoint Usage

Change Output Format

Python SDK Usage

Change Output Format

Additional OCR Options

Java SDK Usage

On this page