Annotations
When a Question Answering or Extraction job completes, each result contains a referenceIds field. These IDs point to annotation objects — structured records that identify the exact passages in your documents that the AI used to produce each answer or extracted value.
Fetching Annotations for a Job
A single endpoint returns all annotations for all results in a job. It aggregates every referenceIds value across all results and resolves them in one call.
- cURL
- Python (httpx)
- R
- SDK
# For a QA job
curl -s "https://<api-domain>/api/core/qa/jobs/${JOB_ID}/annotations" \
-H "Authorization: Bearer <your-pat>"
# For an Extraction job
curl -s "https://<api-domain>/api/core/extraction/jobs/${JOB_ID}/annotations" \
-H "Authorization: Bearer <your-pat>"
from typing import Any
from pydantic import BaseModel, Field
class AnnotationId(BaseModel):
id: str
entity_type: str = Field(alias="entityType")
class BboxDto(BaseModel):
x0: float
y0: float
x1: float
y1: float
class EliseAnnotation(BaseModel):
id: AnnotationId
type: str
status: str | None = None
data: dict[str, Any] | None = None
created_time: int | None = Field(default=None, alias="createdTime")
modified_time: int | None = Field(default=None, alias="modifiedTime")
# Replace "qa" with "extraction" for an extraction job
response = client.get(f"/qa/jobs/{job_id}/annotations")
response.raise_for_status()
annotations = [EliseAnnotation.model_validate(a) for a in response.json()]
for ann in annotations:
if ann.data is None:
continue
print(f"[{ann.data.get('documentName')}] {ann.data.get('content', '')[:120]}")
for pos in ann.data.get("positions", []):
if pos.get("type") == "BBOX":
print(f" Page {pos['pageNumber']}: ({pos['bbox']['x0']:.1f}, {pos['bbox']['y0']:.1f})")
elif pos.get("type") == "CELL":
print(f" Sheet '{pos['sheetName']}' row {pos['row']} col {pos['col']}")
elif pos.get("type") == "LINE":
print(f" Line {pos['lineNumber']} cols {pos['columnIndexStart']}–{pos['columnIndexStop']}")
# Replace "qa" with "extraction" for an extraction job
resp <- base_req |>
req_url_path_append("qa", "jobs", job_id, "annotations") |>
req_perform()
annotations <- resp_body_json(resp)
for (ann in annotations) {
if (is.null(ann$data)) next
cat(sprintf("[%s] %s\n", ann$data$documentName, substr(ann$data$content, 1, 120)))
for (pos in ann$data$positions) {
if (pos$type == "BBOX") {
cat(sprintf(" Page %d: (%.1f, %.1f)\n", pos$pageNumber, pos$bbox$x0, pos$bbox$y0))
} else if (pos$type == "CELL") {
cat(sprintf(" Sheet '%s' row %d col %d\n", pos$sheetName, pos$row, pos$col))
} else if (pos$type == "LINE") {
cat(sprintf(" Line %d cols %d-%d\n", pos$lineNumber, pos$columnIndexStart, pos$columnIndexStop))
}
}
}
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-job-id>"
# QA job annotations
annotations = await client.qa.get_job_annotations(job_id)
for ann in annotations:
if ann.data is None:
continue
print(f"[{ann.data.document_name}] {ann.data.content[:120]}")
for pos in ann.data.positions or []:
if pos.get("type") == "BBOX":
print(f" Page {pos['pageNumber']}: ({pos['bbox']['x0']:.1f}, {pos['bbox']['y0']:.1f})")
elif pos.get("type") == "CELL":
print(f" Sheet '{pos['sheetName']}' row {pos['row']} col {pos['col']}")
elif pos.get("type") == "LINE":
print(f" Line {pos['lineNumber']} cols {pos['columnIndexStart']}–{pos['columnIndexStop']}")
asyncio.run(main())
Annotation Object Structure
Each annotation has the following top-level fields:
| Field | Description |
|---|---|
id | Annotation identifier { id: string, entityType: string } |
type | Always DOCUMENT_STATEMENT for QA and Extraction jobs |
status | VALID or NOTVALID |
data | The document statement payload — see below |
createdTime | Unix timestamp (ms) of annotation creation |
modifiedTime | Unix timestamp (ms) of last modification |
Annotation Data
QA and Extraction jobs produce DOCUMENT_STATEMENT annotations. The data field contains:
| Field | Description |
|---|---|
content | The text excerpt the AI used as evidence |
documentName | Human-readable name of the source document |
documentId | ID of the source EliseFile |
positions | List of position objects — see Position Types |
Position Types
The positions array locates the excerpt within the document. Three formats are used depending on the file type.
BBOX — PDFs and images
| Field | Description |
|---|---|
pageNumber | Zero-based page index |
bbox.x0 | Left edge (normalised, 0–1) |
bbox.y0 | Top edge (normalised, 0–1) |
bbox.x1 | Right edge (normalised, 0–1) |
bbox.y1 | Bottom edge (normalised, 0–1) |
{
"type": "BBOX",
"pageNumber": 2,
"bbox": { "x0": 0.12, "y0": 0.45, "x1": 0.88, "y1": 0.52 }
}
CELL — Spreadsheets
Used for XLSX, XLS, CSV, and ODS files.
| Field | Description |
|---|---|
sheetName | Name of the sheet |
row | Zero-based row index |
col | Zero-based column index |
{
"type": "CELL",
"sheetName": "Sheet1",
"row": 4,
"col": 2
}
LINE — Plain text and structured text
Used for TXT, HTML, JSON, XML, and similar files.
| Field | Description |
|---|---|
lineNumber | Zero-based line index |
columnIndexStart | Start column of the match |
columnIndexStop | End column of the match |
{
"type": "LINE",
"lineNumber": 17,
"columnIndexStart": 0,
"columnIndexStop": 84
}
Looking Up Source Passages
The referenceIds in each job result map directly to the id.id of the annotation objects. Build a lookup map to display source passages alongside results:
- Python (httpx)
- R
- SDK (QA)
- SDK (Extraction)
# Fetch results
results_response = client.get(f"/qa/jobs/{job_id}/results")
results_response.raise_for_status()
results = results_response.json().get("results", [])
# Fetch annotations and build a lookup map
ann_response = client.get(f"/qa/jobs/{job_id}/annotations")
ann_response.raise_for_status()
annotation_map = {a["id"]["id"]: a for a in ann_response.json()}
# Display answers with their source passages
for result in results:
print(f"Q: {result['question']}")
print(f"A: {result['rawValue']}")
for ref in result.get("referenceIds", []):
ann = annotation_map.get(ref["id"])
if ann:
data = ann["data"]
print(f" [{data['documentName']}] {data['content'][:100]}")
for pos in data.get("positions", []):
if pos["type"] == "BBOX":
print(f" -> page {pos['pageNumber']}")
print()
# Fetch results
results_resp <- base_req |>
req_url_path_append("qa", "jobs", job_id, "results") |>
req_perform()
results <- resp_body_json(results_resp)$results
# Fetch annotations and build a lookup map
ann_resp <- base_req |>
req_url_path_append("qa", "jobs", job_id, "annotations") |>
req_perform()
annotations <- resp_body_json(ann_resp)
annotation_map <- setNames(annotations, sapply(annotations, function(a) a$id$id))
# Display answers with their source passages
for (result in results) {
cat(sprintf("Q: %s\n", result$question))
cat(sprintf("A: %s\n", result$rawValue))
for (ref in result$referenceIds) {
ann <- annotation_map[[ref$id]]
if (!is.null(ann)) {
cat(sprintf(" [%s] %s\n",
ann$data$documentName,
substr(ann$data$content, 1, 100)
))
for (pos in ann$data$positions) {
if (pos$type == "BBOX") {
cat(sprintf(" -> page %d\n", pos$pageNumber))
}
}
}
}
cat("\n")
}
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-qa-job-id>"
outputs = await client.qa.get_job_outputs(job_id)
annotations = await client.qa.get_job_annotations(job_id)
annotation_map = {a.id.id: a for a in annotations}
for result in outputs.results:
print(f"Q: {result.question}")
print(f"A: {result.raw_value}")
for ref in result.reference_ids:
ann = annotation_map.get(ref.id)
if ann and ann.data:
print(f" [{ann.data.document_name}] {ann.data.content[:100]}")
for pos in ann.data.positions or []:
if pos.get("type") == "BBOX":
print(f" -> page {pos['pageNumber']}")
print()
asyncio.run(main())
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-extraction-job-id>"
outputs = await client.extraction.get_job_outputs(job_id)
annotations = await client.extraction.get_job_annotations(job_id)
annotation_map = {a.id.id: a for a in annotations}
for result in outputs.results:
print(f"Field: {result.meta}")
print(f"Value: {result.raw_value}")
for ref in result.reference_ids:
ann = annotation_map.get(ref.id)
if ann and ann.data:
print(f" [{ann.data.document_name}] {ann.data.content[:100]}")
for pos in ann.data.positions or []:
if pos.get("type") == "BBOX":
print(f" -> page {pos['pageNumber']}")
print()
asyncio.run(main())
Next Steps
- Question Answering — submit questions and retrieve answers with source references
- Extraction — extract structured metadata fields with source references
- API Reference for complete endpoint schemas