Attachments
Peppol documents can include attachments such as PDFs, images, or spreadsheets. This guide covers how to embed attachments correctly.
Attachment Types
| Type | Use Case | Max Size |
|---|---|---|
| Invoice PDF | Human-readable version | 10 MB |
| Supporting Docs | Contracts, timesheets | 10 MB |
| Product Images | Item photos | 5 MB |
| Specifications | Technical docs | 10 MB |
Embedding Attachments
Attachments are embedded as Base64-encoded binary content:
<Invoice>
<!-- ... invoice content ... -->
<cac:AdditionalDocumentReference>
<cbc:ID>1</cbc:ID>
<cbc:DocumentDescription>Invoice PDF</cbc:DocumentDescription>
<cac:Attachment>
<cbc:EmbeddedDocumentBinaryObject
mimeCode="application/pdf"
filename="INV-2024-00123.pdf">
JVBERi0xLjQKJeLjz9MKMyAwIG9iago8PC9MZW5n...
</cbc:EmbeddedDocumentBinaryObject>
</cac:Attachment>
</cac:AdditionalDocumentReference>
</Invoice>
Required Attributes
| Attribute | Description | Example |
|---|---|---|
mimeCode | MIME type of the file | application/pdf |
filename | Original filename | invoice.pdf |
Supported MIME Types
| MIME Type | Extension | Description |
|---|---|---|
application/pdf | PDF documents | |
image/png | .png | PNG images |
image/jpeg | .jpeg, .jpg | JPEG images |
image/gif | .gif | GIF images |
text/csv | .csv | CSV data |
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | .xlsx | Excel files |
application/vnd.openxmlformats-officedocument.wordprocessingml.document | .docx | Word files |
application/xml | .xml | XML files |
text/plain | .txt | Plain text |
Adding Attachments in Python
import base64
from pathlib import Path
from lxml import etree
def add_attachment(
invoice_xml: str,
file_path: str,
description: str = "Attachment"
) -> str:
"""Add an attachment to a Peppol invoice."""
# Read and encode the file
file_content = Path(file_path).read_bytes()
encoded_content = base64.b64encode(file_content).decode('utf-8')
# Determine MIME type
mime_types = {
'.pdf': 'application/pdf',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.csv': 'text/csv',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
}
extension = Path(file_path).suffix.lower()
mime_type = mime_types.get(extension, 'application/octet-stream')
filename = Path(file_path).name
# Parse the invoice
tree = etree.fromstring(invoice_xml.encode())
ns = {
'cac': 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2',
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'
}
# Create the attachment reference
doc_ref = etree.Element('{%s}AdditionalDocumentReference' % ns['cac'])
# ID
id_elem = etree.SubElement(doc_ref, '{%s}ID' % ns['cbc'])
id_elem.text = str(len(tree.findall('.//cac:AdditionalDocumentReference', ns)) + 1)
# Description
desc_elem = etree.SubElement(doc_ref, '{%s}DocumentDescription' % ns['cbc'])
desc_elem.text = description
# Attachment
attachment = etree.SubElement(doc_ref, '{%s}Attachment' % ns['cac'])
binary_obj = etree.SubElement(attachment, '{%s}EmbeddedDocumentBinaryObject' % ns['cbc'])
binary_obj.set('mimeCode', mime_type)
binary_obj.set('filename', filename)
binary_obj.text = encoded_content
# Insert before LegalMonetaryTotal (or at end)
legal_total = tree.find('.//cac:LegalMonetaryTotal', ns)
if legal_total is not None:
legal_total.addprevious(doc_ref)
else:
tree.append(doc_ref)
return etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding='UTF-8').decode()
# Usage
invoice_with_pdf = add_attachment(
invoice_xml,
"invoices/INV-2024-00123.pdf",
description="Invoice PDF"
)
Multiple Attachments
Add multiple files to a single invoice:
def add_multiple_attachments(
invoice_xml: str,
attachments: list[dict]
) -> str:
"""Add multiple attachments to an invoice.
Args:
invoice_xml: The invoice XML string
attachments: List of {'path': str, 'description': str}
Returns:
Invoice XML with attachments
"""
result = invoice_xml
for attachment in attachments:
result = add_attachment(
result,
attachment['path'],
attachment.get('description', 'Attachment')
)
return result
# Usage
invoice_with_attachments = add_multiple_attachments(
invoice_xml,
[
{'path': 'invoice.pdf', 'description': 'Invoice PDF'},
{'path': 'timesheet.xlsx', 'description': 'Timesheet'},
{'path': 'contract.pdf', 'description': 'Contract Reference'}
]
)
External References
For large files, you can include a URL instead of embedding:
<cac:AdditionalDocumentReference>
<cbc:ID>external-doc-1</cbc:ID>
<cbc:DocumentDescription>Large Technical Specification</cbc:DocumentDescription>
<cac:Attachment>
<cac:ExternalReference>
<cbc:URI>https://your-server.com/docs/spec-123.pdf</cbc:URI>
</cac:ExternalReference>
</cac:Attachment>
</cac:AdditionalDocumentReference>
External References
Not all receivers support external references. The URL must be:
- Publicly accessible (or accessible to the receiver)
- Available for at least 7 years (legal requirement)
- Secure (HTTPS)
Best Practices
1. Keep Files Small
def check_attachment_size(file_path: str, max_mb: float = 10) -> bool:
"""Check if file is within size limit."""
size_bytes = Path(file_path).stat().st_size
size_mb = size_bytes / (1024 * 1024)
if size_mb > max_mb:
raise ValueError(f"File too large: {size_mb:.2f}MB (max: {max_mb}MB)")
return True
2. Compress PDFs
import subprocess
def compress_pdf(input_path: str, output_path: str) -> str:
"""Compress a PDF using Ghostscript."""
subprocess.run([
'gs', '-sDEVICE=pdfwrite',
'-dCompatibilityLevel=1.4',
'-dPDFSETTINGS=/ebook', # Good quality, smaller size
'-dNOPAUSE', '-dQUIET', '-dBATCH',
f'-sOutputFile={output_path}',
input_path
], check=True)
return output_path
3. Use Meaningful Descriptions
<!-- ❌ Not helpful -->
<cbc:DocumentDescription>file1.pdf</cbc:DocumentDescription>
<!-- ✅ Clear and descriptive -->
<cbc:DocumentDescription>Invoice PDF - Human readable copy</cbc:DocumentDescription>
4. Validate MIME Types
import magic # python-magic library
def validate_mime_type(file_path: str, expected_extension: str) -> bool:
"""Validate that file content matches its extension."""
detected_mime = magic.from_file(file_path, mime=True)
expected_mimes = {
'.pdf': 'application/pdf',
'.png': 'image/png',
'.jpg': ['image/jpeg'],
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
}
expected = expected_mimes.get(expected_extension)
if expected and detected_mime not in (expected if isinstance(expected, list) else [expected]):
raise ValueError(f"File content ({detected_mime}) doesn't match extension ({expected_extension})")
return True
Size Limits
| Document Type | Max Attachment Size | Max Total Size |
|---|---|---|
| Invoice | 10 MB per file | 15 MB total |
| Credit Note | 10 MB per file | 15 MB total |
| Order | 10 MB per file | 15 MB total |
Optimize File Size
- Compress PDFs before attaching
- Use JPEG for photos (smaller than PNG)
- Consider linking to external storage for very large files
Complete Example
import base64
import mimetypes
from pathlib import Path
from lxml import etree
class InvoiceBuilder:
def __init__(self):
self.attachments = []
def add_attachment(
self,
file_path: str,
description: str,
document_type: str = None
):
"""Queue an attachment to be added."""
path = Path(file_path)
# Validate file exists
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
# Check size (10MB limit)
size_mb = path.stat().st_size / (1024 * 1024)
if size_mb > 10:
raise ValueError(f"File too large: {size_mb:.2f}MB (max: 10MB)")
# Detect MIME type
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
mime_type = 'application/octet-stream'
# Read and encode
content = base64.b64encode(path.read_bytes()).decode('utf-8')
self.attachments.append({
'id': len(self.attachments) + 1,
'filename': path.name,
'mime_type': mime_type,
'description': description,
'document_type': document_type,
'content': content
})
return self
def add_pdf_invoice(self, file_path: str):
"""Add the PDF version of the invoice."""
return self.add_attachment(
file_path,
description="Invoice PDF",
document_type="CommercialInvoice"
)
def build_attachments_xml(self) -> list[etree.Element]:
"""Generate XML elements for all attachments."""
ns = {
'cac': 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2',
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'
}
elements = []
for att in self.attachments:
doc_ref = etree.Element('{%s}AdditionalDocumentReference' % ns['cac'])
etree.SubElement(doc_ref, '{%s}ID' % ns['cbc']).text = str(att['id'])
etree.SubElement(doc_ref, '{%s}DocumentDescription' % ns['cbc']).text = att['description']
if att.get('document_type'):
etree.SubElement(doc_ref, '{%s}DocumentTypeCode' % ns['cbc']).text = att['document_type']
attachment = etree.SubElement(doc_ref, '{%s}Attachment' % ns['cac'])
binary = etree.SubElement(attachment, '{%s}EmbeddedDocumentBinaryObject' % ns['cbc'])
binary.set('mimeCode', att['mime_type'])
binary.set('filename', att['filename'])
binary.text = att['content']
elements.append(doc_ref)
return elements
# Usage
builder = InvoiceBuilder()
builder.add_pdf_invoice("invoices/INV-2024-00123.pdf")
builder.add_attachment("supporting/timesheet.xlsx", "Project Timesheet")
builder.add_attachment("supporting/photo.jpg", "Delivered Goods Photo")
# Get XML elements to insert into invoice
attachment_elements = builder.build_attachments_xml()