Question Answering
This guide covers the Question Answering API, which lets you ask natural-language questions about your indexed documents and receive AI-generated answers with source references. Files must be indexed as EliseFiles first — see File Indexation.
How It Works
Question Answering runs as an asynchronous job. You submit a request with a list of files to analyse and a list of questions, and the platform processes them in the background. You then poll for the job status and retrieve the results when complete.
Creating a QA Job
Submit a QA job by providing a set of target files and a list of questions. You can target files by individual EliseFile IDs, by Collection IDs, or both.
- cURL
- Python (httpx)
- R
- SDK
curl -X POST "https://<api-domain>/api/core/qa/jobs" \
-H "Authorization: Bearer <your-pat>" \
-H "Content-Type: application/json" \
-d '{
"files": {
"fileIds": ["a1b2c3d4-e5f6-7890-abcd-ef1234567890"],
"collectionIds": []
},
"questions": [
{
"id": "q1",
"question": "What is the main conclusion of this document?",
"answerType": { "dataType": "STRING", "multiValued": false }
},
{
"id": "q2",
"question": "What is the publication date?",
"answerType": { "dataType": "DATE", "multiValued": false }
}
]
}'
from pydantic import BaseModel, Field
from typing import Literal
class AnswerType(BaseModel):
data_type: str = Field(alias="dataType")
multi_valued: bool = Field(default=False, alias="multiValued")
enum_values: list[str] | None = Field(default=None, alias="enumValues")
class QuestionInput(BaseModel):
id: str
question: str
answer_type: AnswerType = Field(alias="answerType")
guidelines: str | None = None
expected_answer: str | None = Field(default=None, alias="expectedAnswer")
input_question_ids: list[str] | None = Field(default=None, alias="inputQuestionIds")
class FilesInput(BaseModel):
file_ids: list[str] = Field(default_factory=list, alias="fileIds")
collection_ids: list[str] = Field(default_factory=list, alias="collectionIds")
class Job(BaseModel):
job_id: str = Field(alias="jobId")
status: str
task_type: str | None = Field(default=None, alias="taskType")
created_time: int | None = Field(default=None, alias="createdTime")
error_message: str | None = Field(default=None, alias="errorMessage")
response = client.post(
"/qa/jobs",
json={
"files": {
"fileIds": ["a1b2c3d4-e5f6-7890-abcd-ef1234567890"],
"collectionIds": [],
},
"questions": [
{
"id": "q1",
"question": "What is the main conclusion of this document?",
"answerType": {"dataType": "STRING", "multiValued": False},
},
{
"id": "q2",
"question": "What is the publication date?",
"answerType": {"dataType": "DATE", "multiValued": False},
},
],
},
)
response.raise_for_status()
job = Job.model_validate(response.json())
print(f"Job created: {job.job_id} (status: {job.status})")
resp <- base_req |>
req_url_path_append("qa", "jobs") |>
req_body_json(list(
files = list(
fileIds = list("a1b2c3d4-e5f6-7890-abcd-ef1234567890"),
collectionIds = list()
),
questions = list(
list(
id = "q1",
question = "What is the main conclusion of this document?",
answerType = list(dataType = "STRING", multiValued = FALSE)
),
list(
id = "q2",
question = "What is the publication date?",
answerType = list(dataType = "DATE", multiValued = FALSE)
)
)
)) |>
req_perform()
job <- resp_body_json(resp)
cat(sprintf("Job created: %s (status: %s)\n", job$jobId, job$status))
import asyncio
from biolevate import BiolevateClient, QuestionInput
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job = await client.qa.create_job(
questions=[
QuestionInput(
id="q1",
question="What is the main conclusion of this document?",
answer_type={"dataType": "STRING", "multiValued": False},
),
QuestionInput(
id="q2",
question="What is the publication date?",
answer_type={"dataType": "DATE", "multiValued": False},
),
],
file_ids=["a1b2c3d4-e5f6-7890-abcd-ef1234567890"],
)
print(f"Job created: {job.job_id} (status: {job.status})")
asyncio.run(main())
Targeting Files
The files field accepts both fileIds and collectionIds simultaneously. All files are processed together in a single job.
| Field | Type | Description |
|---|---|---|
fileIds | string[] | UUIDs of individual indexed EliseFiles |
collectionIds | string[] | UUIDs of Collections — all files in each collection are included |
Defining Questions
Each question in the questions array is described by EliseQuestionInput:
| Field | Required | Description |
|---|---|---|
id | Yes | Unique identifier for this question within the job |
question | Yes | The natural-language question to answer |
answerType | Yes | Expected answer format — see Answer Types below |
guidelines | No | Additional instructions to guide the AI answer |
expectedAnswer | No | Reference answer used to compute answerValidity |
inputQuestionIds | No | IDs of other questions whose answers feed into this one |
Answer Types
The answerType.dataType field controls how the answer is formatted:
dataType | Description | Example |
|---|---|---|
STRING | Free text | "The study found..." |
INT | Integer number | 42 |
FLOAT | Decimal number | 3.14 |
BOOL | Boolean yes/no | true |
DATE | Date value | "2024-01-15" |
ENUM | One of a fixed list of values | "HIGH" |
For ENUM types, provide the allowed values in answerType.enumValues.
Polling Job Status
The job runs asynchronously. Poll the job endpoint until status is SUCCESS or FAILED.
- cURL
- Python (httpx)
- R
- SDK
curl -s "https://<api-domain>/api/core/qa/jobs/${JOB_ID}" \
-H "Authorization: Bearer <your-pat>"
import time
def wait_for_job(job_id: str, poll_interval: float = 3.0) -> Job:
while True:
response = client.get(f"/qa/jobs/{job_id}")
response.raise_for_status()
job = Job.model_validate(response.json())
print(f"Status: {job.status}")
if job.status in ("SUCCESS", "FAILED", "ABORTED"):
return job
time.sleep(poll_interval)
completed_job = wait_for_job(job.job_id)
if completed_job.status != "SUCCESS":
print(f"Job failed: {completed_job.error_message}")
wait_for_job <- function(job_id, poll_interval = 3) {
repeat {
resp <- base_req |>
req_url_path_append("qa", "jobs", job_id) |>
req_perform()
job <- resp_body_json(resp)
cat(sprintf("Status: %s\n", job$status))
if (job$status %in% c("SUCCESS", "FAILED", "ABORTED")) {
return(job)
}
Sys.sleep(poll_interval)
}
}
completed_job <- wait_for_job(job$jobId)
if (completed_job$status != "SUCCESS") {
cat(sprintf("Job failed: %s\n", completed_job$errorMessage))
}
import asyncio
from biolevate import BiolevateClient
TERMINAL_STATUSES = {"SUCCESS", "FAILED", "ABORTED"}
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-job-id>"
while True:
job = await client.qa.get_job(job_id)
print(f"Status: {job.status}")
if job.status in TERMINAL_STATUSES:
break
await asyncio.sleep(3)
asyncio.run(main())
Job Status Values
| Status | Description |
|---|---|
PENDING | Job is queued and waiting to start |
RUNNING | Job is currently being processed |
SUCCESS | Job completed successfully |
FAILED | Job encountered an error |
ABORTED | Job was cancelled |
Retrieving Results
Once the job status is SUCCESS, fetch the answers.
- cURL
- Python (httpx)
- R
- SDK
curl -s "https://<api-domain>/api/core/qa/jobs/${JOB_ID}/results" \
-H "Authorization: Bearer <your-pat>"
from pydantic import BaseModel, Field
class AnnotationId(BaseModel):
id: str
entity_type: str = Field(alias="entityType")
class QAResult(BaseModel):
question: str | None = None
raw_value: str | None = Field(default=None, alias="rawValue")
explanation: str | None = None
sourced_content: str | None = Field(default=None, alias="sourcedContent")
answer_validity: float | None = Field(default=None, alias="answerValidity")
validity_explanation: str | None = Field(default=None, alias="validityExplaination")
reference_ids: list[AnnotationId] = Field(default_factory=list, alias="referenceIds")
class QAJobOutputs(BaseModel):
results: list[QAResult] = Field(default_factory=list)
response = client.get(f"/qa/jobs/{job.job_id}/results")
response.raise_for_status()
outputs = QAJobOutputs.model_validate(response.json())
for result in outputs.results:
print(f"Q: {result.question}")
print(f"A: {result.raw_value}")
if result.explanation:
print(f" Explanation: {result.explanation}")
if result.answer_validity is not None:
print(f" Validity: {result.answer_validity:.2f}")
print()
resp <- base_req |>
req_url_path_append("qa", "jobs", job$jobId, "results") |>
req_perform()
outputs <- resp_body_json(resp)
for (result in outputs$results) {
cat(sprintf("Q: %s\n", result$question))
cat(sprintf("A: %s\n", result$rawValue))
if (!is.null(result$explanation)) {
cat(sprintf(" Explanation: %s\n", result$explanation))
}
if (!is.null(result$answerValidity)) {
cat(sprintf(" Validity: %.2f\n", result$answerValidity))
}
cat("\n")
}
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-job-id>"
outputs = await client.qa.get_job_outputs(job_id)
for result in outputs.results:
print(f"Q: {result.question}")
print(f"A: {result.raw_value}")
if result.explanation:
print(f" Explanation: {result.explanation}")
asyncio.run(main())
Result Fields
Each EliseQAResult in the results array contains:
| Field | Description |
|---|---|
question | The original question text |
rawValue | The AI-generated answer as a string |
explanation | Why the AI gave this answer |
sourcedContent | The document excerpt the answer was derived from |
answerValidity | Score between 0 and 1 comparing the answer to expectedAnswer (if provided) |
validityExplaination | Explanation of the validity score |
referenceIds | Annotation IDs pointing to the source locations in the documents |
Retrieving Inputs
Retrieve the original inputs submitted to the job — useful for auditing or displaying alongside results.
- cURL
- Python (httpx)
- R
- SDK
curl -s "https://<api-domain>/api/core/qa/jobs/${JOB_ID}/inputs" \
-H "Authorization: Bearer <your-pat>"
response = client.get(f"/qa/jobs/{job.job_id}/inputs")
response.raise_for_status()
inputs = response.json()
print(f"File IDs: {inputs['files']['fileIds']}")
print(f"Questions: {[q['question'] for q in inputs['questions']]}")
resp <- base_req |>
req_url_path_append("qa", "jobs", job$jobId, "inputs") |>
req_perform()
inputs <- resp_body_json(resp)
cat(sprintf("File IDs: %s\n", paste(inputs$files$fileIds, collapse = ", ")))
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-job-id>"
inputs = await client.qa.get_job_inputs(job_id)
print(f"Questions: {[q.question for q in inputs.questions]}")
asyncio.run(main())
Retrieving Annotations
Each QA result includes a referenceIds field identifying the exact document passages the AI used to produce each answer. Use the annotations endpoint to resolve those IDs into full objects with text excerpts, document names, and precise positions (page, bounding box, cell, or line).
- cURL
- SDK
curl -s "https://<api-domain>/api/core/qa/jobs/${JOB_ID}/annotations" \
-H "Authorization: Bearer <your-pat>"
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
job_id = "<your-job-id>"
annotations = await client.qa.get_job_annotations(job_id)
for ann in annotations:
if ann.data:
print(f"[{ann.data.document_name}] {ann.data.content[:80]}")
asyncio.run(main())
For the full annotation data model, position types, and lookup patterns, see the Annotations guide.
Listing QA Jobs
List all QA jobs submitted by the current user, with pagination.
- cURL
- Python (httpx)
- R
- SDK
curl -s "https://<api-domain>/api/core/qa/jobs?page=0&pageSize=20" \
-H "Authorization: Bearer <your-pat>"
class JobPage(BaseModel):
data: list[Job]
total_pages: int = Field(alias="totalPages")
total_elements: int = Field(alias="totalElements")
has_next: bool = Field(alias="hasNext")
response = client.get("/qa/jobs", params={"page": 0, "pageSize": 20})
response.raise_for_status()
page = JobPage.model_validate(response.json())
for j in page.data:
print(f"{j.job_id}: {j.status}")
resp <- base_req |>
req_url_path_append("qa", "jobs") |>
req_url_query(page = 0, pageSize = 20) |>
req_perform()
page <- resp_body_json(resp)
for (j in page$data) {
cat(sprintf("%s: %s\n", j$jobId, j$status))
}
import asyncio
from biolevate import BiolevateClient
async def main():
async with BiolevateClient(
base_url="https://<api-domain>",
token="<your-pat>",
) as client:
page = await client.qa.list_jobs(page=0, page_size=20)
for job in page.data:
print(f"{job.job_id}: {job.status}")
asyncio.run(main())
Next Steps
- Annotations — understand the full annotation data model and position types
- Extraction to pull structured metadata fields from documents
- Collections to organise files before running jobs on them
- API Reference for complete endpoint documentation