Skip to main content

Processing Documents

After receiving a document via webhook, you need to fetch, parse, and process it. This guide covers best practices for document processing.

Fetch the Document​

When you receive a webhook, fetch the full document content:

import requests

def fetch_document(document_id: str) -> dict:
"""Fetch a received document from GoRoute."""
response = requests.get(
f"https://app.goroute.ai/peppol-api/api/v1/documents/{document_id}",
headers={"X-API-Key": "your_api_key"}
)
response.raise_for_status()
return response.json()

# Document response
# {
# "id": "doc_xyz789",
# "type": "Invoice",
# "received_at": "2024-01-15T10:30:45Z",
# "sender": {...},
# "receiver": {...},
# "xml": "<?xml version=\"1.0\"?>...",
# "parsed": {...}
# }

Document Response Structure​

{
"id": "doc_xyz789",
"type": "Invoice",
"status": "received",
"received_at": "2024-01-15T10:30:45Z",
"sender": {
"scheme": "0106",
"identifier": "12345678",
"name": "Supplier BV",
"country": "NL"
},
"receiver": {
"scheme": "0106",
"identifier": "87654321",
"name": "Your Company BV",
"country": "NL"
},
"xml": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><Invoice>...</Invoice>",
"parsed": {
"invoice_id": "INV-2024-00123",
"issue_date": "2024-01-15",
"due_date": "2024-02-15",
"currency": "EUR",
"subtotal": "100.00",
"tax_amount": "21.00",
"total": "121.00",
"lines": [
{
"line_id": "1",
"description": "Consulting Services",
"quantity": "10",
"unit": "HUR",
"unit_price": "10.00",
"line_total": "100.00",
"tax_rate": "21"
}
],
"payment_info": {
"iban": "NL12ABCD3456789012",
"reference": "INV-2024-00123"
}
},
"attachments": [
{
"id": "att_001",
"filename": "invoice.pdf",
"mime_type": "application/pdf",
"size": 125000
}
],
"validation": {
"valid": true,
"errors": [],
"warnings": []
}
}

Parse UBL XML​

If you need to parse the XML directly:

from lxml import etree
from dataclasses import dataclass
from decimal import Decimal
from typing import Optional

@dataclass
class InvoiceLine:
id: str
description: str
quantity: Decimal
unit: str
unit_price: Decimal
line_total: Decimal
tax_rate: Decimal

@dataclass
class Invoice:
invoice_id: str
issue_date: str
due_date: Optional[str]
currency: str
seller_name: str
seller_id: str
buyer_name: str
buyer_id: str
subtotal: Decimal
tax_amount: Decimal
total: Decimal
lines: list[InvoiceLine]

class UBLParser:
NAMESPACES = {
'inv': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2',
'cac': 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2',
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'
}

def parse_invoice(self, xml_content: str) -> Invoice:
"""Parse a UBL invoice XML string."""
root = etree.fromstring(xml_content.encode())
ns = self.NAMESPACES

def xpath_text(path: str, default: str = "") -> str:
result = root.xpath(path, namespaces=ns)
return result[0].text if result else default

# Parse invoice lines
lines = []
for line_elem in root.xpath('.//cac:InvoiceLine', namespaces=ns):
lines.append(InvoiceLine(
id=line_elem.xpath('cbc:ID/text()', namespaces=ns)[0],
description=line_elem.xpath('.//cac:Item/cbc:Name/text()', namespaces=ns)[0],
quantity=Decimal(line_elem.xpath('cbc:InvoicedQuantity/text()', namespaces=ns)[0]),
unit=line_elem.xpath('cbc:InvoicedQuantity/@unitCode', namespaces=ns)[0],
unit_price=Decimal(line_elem.xpath('.//cac:Price/cbc:PriceAmount/text()', namespaces=ns)[0]),
line_total=Decimal(line_elem.xpath('cbc:LineExtensionAmount/text()', namespaces=ns)[0]),
tax_rate=Decimal(line_elem.xpath('.//cac:ClassifiedTaxCategory/cbc:Percent/text()', namespaces=ns)[0] or '0')
))

return Invoice(
invoice_id=xpath_text('.//cbc:ID'),
issue_date=xpath_text('.//cbc:IssueDate'),
due_date=xpath_text('.//cbc:DueDate') or None,
currency=xpath_text('.//cbc:DocumentCurrencyCode'),
seller_name=xpath_text('.//cac:AccountingSupplierParty//cac:PartyLegalEntity/cbc:RegistrationName'),
seller_id=xpath_text('.//cac:AccountingSupplierParty//cbc:EndpointID'),
buyer_name=xpath_text('.//cac:AccountingCustomerParty//cac:PartyLegalEntity/cbc:RegistrationName'),
buyer_id=xpath_text('.//cac:AccountingCustomerParty//cbc:EndpointID'),
subtotal=Decimal(xpath_text('.//cac:LegalMonetaryTotal/cbc:LineExtensionAmount', '0')),
tax_amount=Decimal(xpath_text('.//cac:TaxTotal/cbc:TaxAmount', '0')),
total=Decimal(xpath_text('.//cac:LegalMonetaryTotal/cbc:PayableAmount', '0')),
lines=lines
)


# Usage
parser = UBLParser()
document = fetch_document("doc_xyz789")
invoice = parser.parse_invoice(document["xml"])

print(f"Invoice: {invoice.invoice_id}")
print(f"From: {invoice.seller_name}")
print(f"Total: {invoice.currency} {invoice.total}")

Download Attachments​

Fetch embedded attachments:

def download_attachment(document_id: str, attachment_id: str) -> bytes:
"""Download an attachment from a received document."""
response = requests.get(
f"https://app.goroute.ai/peppol-api/api/v1/documents/{document_id}/attachments/{attachment_id}",
headers={"X-API-Key": "your_api_key"}
)
response.raise_for_status()
return response.content

# Save attachment to file
attachment_data = download_attachment("doc_xyz789", "att_001")
with open("invoice.pdf", "wb") as f:
f.write(attachment_data)

Processing Pipeline​

Implement a robust processing pipeline:

from enum import Enum
import logging

class ProcessingStatus(Enum):
RECEIVED = "received"
VALIDATED = "validated"
MATCHED = "matched"
APPROVED = "approved"
PAID = "paid"
FAILED = "failed"

class DocumentProcessor:
def __init__(self, api_key: str):
self.api_key = api_key
self.logger = logging.getLogger(__name__)

async def process(self, document_id: str):
"""Process a received document through the pipeline."""
try:
# 1. Fetch the document
document = await self.fetch_document(document_id)
self.logger.info(f"Processing document {document_id}: {document['type']}")

# 2. Parse the content
if document["type"] == "Invoice":
parsed = self.parse_invoice(document["xml"])
elif document["type"] == "CreditNote":
parsed = self.parse_credit_note(document["xml"])
else:
raise ValueError(f"Unsupported document type: {document['type']}")

# 3. Validate business rules
self.validate_business_rules(parsed)

# 4. Match to purchase order
po_match = await self.match_to_po(parsed)

# 5. Store in your system
await self.store_document(document, parsed, po_match)

# 6. Download attachments
for attachment in document.get("attachments", []):
await self.download_and_store_attachment(
document_id,
attachment["id"],
attachment["filename"]
)

# 7. Update status
await self.update_status(document_id, ProcessingStatus.MATCHED)

# 8. Trigger workflow
await self.trigger_approval_workflow(document_id)

return {"status": "success", "document_id": document_id}

except Exception as e:
self.logger.error(f"Failed to process {document_id}: {e}")
await self.update_status(document_id, ProcessingStatus.FAILED)
await self.notify_error(document_id, str(e))
raise

def validate_business_rules(self, invoice: Invoice):
"""Apply custom business validation."""
errors = []

# Check totals match
calculated_total = invoice.subtotal + invoice.tax_amount
if calculated_total != invoice.total:
errors.append(f"Total mismatch: {calculated_total} != {invoice.total}")

# Check due date is in future
from datetime import date
if invoice.due_date:
due = date.fromisoformat(invoice.due_date)
if due < date.today():
errors.append(f"Due date {due} is in the past")

if errors:
raise ValueError(f"Business validation failed: {errors}")

async def match_to_po(self, invoice: Invoice) -> dict:
"""Try to match invoice to a purchase order."""
# Look for PO reference in invoice
# This is business-specific logic
return {"matched": False, "po_number": None}

Handle Different Document Types​

async def handle_document(document: dict):
"""Route document to appropriate handler."""
handlers = {
"Invoice": handle_invoice,
"CreditNote": handle_credit_note,
"Order": handle_order,
"OrderResponse": handle_order_response,
"DespatchAdvice": handle_despatch_advice
}

handler = handlers.get(document["type"])
if not handler:
raise ValueError(f"No handler for {document['type']}")

return await handler(document)

async def handle_invoice(document: dict):
"""Process incoming invoice."""
invoice = parse_invoice(document["xml"])

# Create accounts payable record
await create_ap_record(invoice)

# Notify AP team
await notify_team(
"accounts_payable",
f"New invoice received: {invoice.invoice_id} - {invoice.total}"
)

async def handle_credit_note(document: dict):
"""Process incoming credit note."""
credit_note = parse_credit_note(document["xml"])

# Find and update original invoice
original = await find_invoice(credit_note.invoice_reference)
if original:
await apply_credit(original, credit_note)

Error Handling​

class DocumentProcessingError(Exception):
def __init__(self, document_id: str, message: str, recoverable: bool = True):
self.document_id = document_id
self.message = message
self.recoverable = recoverable
super().__init__(message)

async def process_with_error_handling(document_id: str):
"""Process document with comprehensive error handling."""
try:
await process_document(document_id)

except DocumentProcessingError as e:
if e.recoverable:
# Queue for retry
await retry_queue.add(document_id)
logger.warning(f"Document {document_id} queued for retry: {e.message}")
else:
# Send to manual review
await manual_review_queue.add(document_id)
logger.error(f"Document {document_id} requires manual review: {e.message}")

except Exception as e:
# Unexpected error - alert team
logger.exception(f"Unexpected error processing {document_id}")
await alert_team(f"Document processing failed: {document_id}")
raise

Mark Document as Processed​

Acknowledge document processing:

# Mark as processed in GoRoute
response = requests.post(
f"https://app.goroute.ai/peppol-api/api/v1/documents/{document_id}/acknowledge",
headers={
"X-API-Key": "your_api_key",
"Content-Type": "application/json"
},
json={
"status": "processed",
"internal_reference": "AP-2024-00456"
}
)

Best Practices​

  1. Process Asynchronously — Don't block webhook responses
  2. Implement Idempotency — Handle duplicate deliveries
  3. Validate Business Rules — Apply your own checks
  4. Store Raw XML — Keep the original for audit
  5. Handle Attachments — Don't forget embedded PDFs
  6. Log Everything — Detailed logs help debugging

Next Steps​