"""Main public client for the Open Targets Platform."""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional, cast
from ._cache import CacheBackend, TTLCache, _NoCache
from ._graphql import GraphQLClient
from ._queries.disease import DISEASE_QUERY, DISEASE_TARGETS_QUERY
from ._queries.drug import DRUG_CHEMBL_IDS_QUERY, DRUG_INDICATIONS_QUERY, DRUG_QUERY
from ._queries.search import SEARCH_QUERY
from ._queries.target import (
TARGET_ASSOCIATIONS_QUERY,
TARGET_CONSTRAINT_QUERY,
TARGET_DRUGS_QUERY,
TARGET_EXPRESSION_QUERY,
TARGET_QUERY,
TARGET_SAFETY_QUERY,
TARGET_TRACTABILITY_QUERY,
TARGETS_BATCH_QUERY,
)
from ._retry import DEFAULT_RETRY_CONFIG, RetryConfig
from .exceptions import NotFoundError
from .models import (
Association,
DatasourceScore,
Disease,
Drug,
DrugIndication,
GeneticConstraint,
ProteinExpression,
RnaExpression,
SafetyBiosample,
SafetyEffect,
SafetyLiability,
SearchResult,
Target,
TissueExpression,
TissueInfo,
Tractability,
)
if TYPE_CHECKING:
import pandas as pd
_DEFAULT_URL = "https://api.platform.opentargets.org/api/v4/graphql"
[docs]
class OpenTargetsClient:
"""Synchronous client for the Open Targets Platform GraphQL API.
Args:
base_url: GraphQL endpoint. Override for self-hosted instances.
timeout: HTTP timeout in seconds.
cache: Set to ``False`` to disable in-memory caching.
cache_ttl: Cache entry lifetime in seconds (default 5 min).
Example::
from opentargets import OpenTargetsClient
client = OpenTargetsClient()
target = client.get_target("EGFR")
print(target.approved_name)
"""
def __init__(
self,
base_url: str = _DEFAULT_URL,
timeout: float = 30.0,
cache: bool | CacheBackend = True,
cache_ttl: float = 300.0,
retry_config: Optional[RetryConfig] = None,
) -> None:
self._gql = GraphQLClient(
base_url=base_url,
timeout=timeout,
retry_config=retry_config
if retry_config is not None
else DEFAULT_RETRY_CONFIG,
)
if isinstance(cache, bool):
_sym: CacheBackend = TTLCache(ttl=cache_ttl) if cache else _NoCache()
_res: CacheBackend = TTLCache(ttl=cache_ttl) if cache else _NoCache()
else:
_sym = cache
_res = cache
self._symbol_cache = _sym
self._result_cache = _res
# ------------------------------------------------------------------
# Target queries
# ------------------------------------------------------------------
[docs]
def get_target(self, target_id: str) -> Target:
"""Return core annotations for a single gene target.
Retrieves approved name, biotype, and functional descriptions for the
given target. Accepts either an Ensembl stable ID or an HGNC gene
symbol; symbols are resolved automatically via a search call.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
A :class:`~opentargets.models.Target` instance.
Raises:
NotFoundError: If no target matches *target_id*.
Example::
client = OpenTargetsClient()
target = client.get_target("EGFR")
print(target.approved_name) # epidermal growth factor receptor
print(target.id) # ENSG00000146648
"""
ensembl_id = self._resolve_target(target_id)
cache_key = f"target:{ensembl_id}"
cached = self._result_cache.get(cache_key)
if cached is not None:
return cast(Target, cached)
data = self._gql.execute(TARGET_QUERY, {"ensemblId": ensembl_id})
raw = data.get("target")
if not raw:
raise NotFoundError("target", target_id)
target = _parse_target(raw)
self._result_cache.set(cache_key, target)
return target
[docs]
def get_targets(self, target_ids: list[str]) -> list[Target]:
"""Return core annotations for multiple gene targets in one API call.
More efficient than calling :meth:`get_target` in a loop when you
already have a list of identifiers.
Args:
target_ids: List of Ensembl gene IDs like ``'ENSG00000146648'`` or
HGNC symbols like ``'EGFR'``. Mixed formats are accepted.
Returns:
List of :class:`~opentargets.models.Target` instances in the same
order as *target_ids* (targets not found are silently omitted).
Example::
client = OpenTargetsClient()
targets = client.get_targets(["EGFR", "BRAF", "TP53"])
for t in targets:
print(t.approved_symbol, t.biotype)
"""
ensembl_ids = [self._resolve_target(t) for t in target_ids]
data = self._gql.execute(TARGETS_BATCH_QUERY, {"ids": ensembl_ids})
raws: list[dict[str, Any]] = data.get("targets") or []
by_id = {r["id"]: _parse_target(r) for r in raws}
return [by_id[eid] for eid in ensembl_ids if eid in by_id]
[docs]
def get_target_associations(
self,
target_id: str,
limit: int = 25,
as_dataframe: bool = False,
) -> list[Association] | pd.DataFrame:
"""Return diseases associated with a target, ranked by association score.
Each association includes an overall score (0–1) and per-datasource
scores (genetics, literature, clinical trials, etc.). Results are
ordered by descending overall score.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
limit: Maximum number of associations to return (default ``25``).
as_dataframe: When ``True``, return a flat ``pandas.DataFrame``
instead of a list of model objects. Requires ``pandas``.
Returns:
List of :class:`~opentargets.models.Association` objects, or a
``pandas.DataFrame`` when *as_dataframe* is ``True``.
Example::
client = OpenTargetsClient()
assocs = client.get_target_associations("EGFR", limit=5)
for a in assocs:
print(a.disease_name, round(a.score, 3))
"""
ensembl_id = self._resolve_target(target_id)
rows = self._gql.paginate(
TARGET_ASSOCIATIONS_QUERY,
{"ensemblId": ensembl_id},
data_path=["target", "associatedDiseases"],
size=min(limit, 25),
)
rows = rows[:limit]
symbol = ""
data_raw = self._gql.execute(TARGET_QUERY, {"ensemblId": ensembl_id})
if data_raw.get("target"):
symbol = data_raw["target"].get("approvedSymbol", "")
associations = [_parse_target_association(r, ensembl_id, symbol) for r in rows]
if as_dataframe:
return _to_dataframe(associations)
return associations
[docs]
def get_target_drugs(self, target_id: str) -> list[Drug]:
"""Return approved drugs and clinical candidates that interact with a target.
Includes the drug name, type, mechanism of action, trade names, and
maximum clinical trial phase reached.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
List of :class:`~opentargets.models.Drug` objects.
Example::
client = OpenTargetsClient()
drugs = client.get_target_drugs("EGFR")
for d in drugs:
print(d.name, d.maximum_clinical_stage)
"""
ensembl_id = self._resolve_target(target_id)
data = self._gql.execute(TARGET_DRUGS_QUERY, {"ensemblId": ensembl_id})
rows = (data.get("target") or {}).get("drugAndClinicalCandidates", {}).get(
"rows"
) or []
return [_parse_drug(r["drug"]) for r in rows if "drug" in r]
[docs]
def get_target_tractability(self, target_id: str) -> list[Tractability]:
"""Return tractability assessments indicating how druggable a target is.
Covers small-molecule, antibody, PROTAC, and other modalities, each
with a label and value indicating the assessment category (e.g.
``"Clinical precedence"``, ``"Discovery precedence"``).
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
List of :class:`~opentargets.models.Tractability` objects, one per
modality/label combination.
Example::
client = OpenTargetsClient()
tracts = client.get_target_tractability("EGFR")
for t in tracts:
print(t.modality, t.label, t.value)
"""
ensembl_id = self._resolve_target(target_id)
data = self._gql.execute(TARGET_TRACTABILITY_QUERY, {"ensemblId": ensembl_id})
rows: list[dict[str, Any]] = (data.get("target") or {}).get(
"tractability"
) or []
return [Tractability.model_validate(r) for r in rows]
[docs]
def get_target_safety(self, target_id: str) -> list[SafetyLiability]:
"""Return known safety liabilities for a target.
Safety liabilities describe adverse events associated with target
perturbation, the biosample in which they were observed, the
directional effect (activation/inhibition), and the source literature.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
List of :class:`~opentargets.models.SafetyLiability` objects.
Example::
client = OpenTargetsClient()
liabilities = client.get_target_safety("EGFR")
for s in liabilities:
print(s.event, s.datasource)
"""
ensembl_id = self._resolve_target(target_id)
data = self._gql.execute(TARGET_SAFETY_QUERY, {"ensemblId": ensembl_id})
rows: list[dict[str, Any]] = (data.get("target") or {}).get(
"safetyLiabilities"
) or []
return [_parse_safety_liability(r) for r in rows]
[docs]
def get_target_expression(self, target_id: str) -> list[TissueExpression]:
"""Return baseline tissue-level RNA and protein expression for a target.
Data is sourced from GTEx (RNA) and the Human Protein Atlas (protein).
Each entry covers one tissue and includes RNA TPM value/z-score and
protein reliability/level.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
List of :class:`~opentargets.models.TissueExpression` objects.
Example::
client = OpenTargetsClient()
expressions = client.get_target_expression("EGFR")
for e in expressions:
print(e.tissue.label, e.rna.value, e.protein.level)
"""
ensembl_id = self._resolve_target(target_id)
data = self._gql.execute(TARGET_EXPRESSION_QUERY, {"ensemblId": ensembl_id})
rows: list[dict[str, Any]] = (data.get("target") or {}).get("expressions") or []
return [_parse_tissue_expression(r) for r in rows]
[docs]
def get_target_constraint(self, target_id: str) -> list[GeneticConstraint]:
"""Return gnomAD genetic constraint metrics for a target.
Constraint metrics quantify intolerance to variation and are useful
when assessing whether perturbing a target is likely to be tolerated.
Typical entries cover synonymous (``syn``), missense (``mis``), and
loss-of-function (``lof``) variant classes with pLI, LOEUF, and Z-score
values.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
Returns:
List of :class:`~opentargets.models.GeneticConstraint` objects —
typically one entry each for ``syn``, ``mis``, and ``lof``.
Example::
client = OpenTargetsClient()
constraints = client.get_target_constraint("EGFR")
for c in constraints:
print(c.constraintType, c.pLI, c.loeuf)
"""
ensembl_id = self._resolve_target(target_id)
data = self._gql.execute(TARGET_CONSTRAINT_QUERY, {"ensemblId": ensembl_id})
rows: list[dict[str, Any]] = (data.get("target") or {}).get(
"geneticConstraint"
) or []
return [GeneticConstraint.model_validate(r) for r in rows]
# ------------------------------------------------------------------
# Disease queries
# ------------------------------------------------------------------
[docs]
def get_disease(self, disease_id: str) -> Disease:
"""Return core annotations for a single disease or phenotype.
Retrieves name, description, therapeutic area classification, and
cross-database references (OMIM, MeSH, MONDO, etc.).
Args:
disease_id: EFO ontology identifier like ``'EFO_0000311'``
(cancer) or ``'EFO_0003060'`` (lung carcinoma). MONDO and
OMIM IDs are also accepted where Open Targets indexes them.
Returns:
A :class:`~opentargets.models.Disease` instance.
Raises:
NotFoundError: If no disease matches *disease_id*.
Example::
client = OpenTargetsClient()
disease = client.get_disease("EFO_0000311")
print(disease.name) # cancer
print(disease.description)
"""
cache_key = f"disease:{disease_id}"
cached = self._result_cache.get(cache_key)
if cached is not None:
return cast(Disease, cached)
data = self._gql.execute(DISEASE_QUERY, {"efoId": disease_id})
raw = data.get("disease")
if not raw:
raise NotFoundError("disease", disease_id)
disease = _parse_disease(raw)
self._result_cache.set(cache_key, disease)
return disease
[docs]
def get_disease_targets(
self,
disease_id: str,
limit: int = 25,
as_dataframe: bool = False,
) -> list[Association] | pd.DataFrame:
"""Return targets associated with a disease, ranked by association score.
The inverse of :meth:`get_target_associations`. Each association
includes an overall score and per-datasource scores. Results are
ordered by descending overall score.
Args:
disease_id: EFO ontology identifier like ``'EFO_0000311'``
(cancer) or ``'EFO_0003060'`` (lung carcinoma).
limit: Maximum number of associations to return (default ``25``).
as_dataframe: When ``True``, return a flat ``pandas.DataFrame``
instead of a list of model objects. Requires ``pandas``.
Returns:
List of :class:`~opentargets.models.Association` objects or a
``pandas.DataFrame`` when *as_dataframe* is ``True``.
Example::
client = OpenTargetsClient()
assocs = client.get_disease_targets("EFO_0000311", limit=5)
for a in assocs:
print(a.target_symbol, round(a.score, 3))
"""
rows = self._gql.paginate(
DISEASE_TARGETS_QUERY,
{"efoId": disease_id},
data_path=["disease", "associatedTargets"],
size=min(limit, 25),
)
rows = rows[:limit]
disease_name = ""
data_raw = self._gql.execute(DISEASE_QUERY, {"efoId": disease_id})
if data_raw.get("disease"):
disease_name = data_raw["disease"].get("name", "")
associations = [
_parse_disease_association(r, disease_id, disease_name) for r in rows
]
if as_dataframe:
return _to_dataframe(associations)
return associations
# ------------------------------------------------------------------
# Drug queries
# ------------------------------------------------------------------
[docs]
def get_drug(self, drug_id: str) -> Drug:
"""Return core annotations for a single drug or clinical candidate.
Retrieves the drug name, type (small molecule, antibody, etc.),
mechanism of action, synonyms, trade names, and the highest clinical
trial phase reached.
Args:
drug_id: ChEMBL identifier like ``'CHEMBL941'`` (erlotinib) or
``'CHEMBL1421'`` (gefitinib).
Returns:
A :class:`~opentargets.models.Drug` instance.
Raises:
NotFoundError: If no drug matches *drug_id*.
Example::
client = OpenTargetsClient()
drug = client.get_drug("CHEMBL941")
print(drug.name) # ERLOTINIB
print(drug.maximum_clinical_stage) # 4
"""
cache_key = f"drug:{drug_id}"
cached = self._result_cache.get(cache_key)
if cached is not None:
return cast(Drug, cached)
data = self._gql.execute(DRUG_QUERY, {"chemblId": drug_id})
raw = data.get("drug")
if not raw:
raise NotFoundError("drug", drug_id)
drug = _parse_drug(raw)
self._result_cache.set(cache_key, drug)
return drug
[docs]
def get_drug_indications(self, drug_id: str) -> list[DrugIndication]:
"""Return approved and clinical-stage disease indications for a drug.
Each indication includes the disease name and the maximum clinical
trial phase associated with the drug–disease pair.
Args:
drug_id: ChEMBL identifier like ``'CHEMBL941'`` (erlotinib).
Returns:
List of :class:`~opentargets.models.DrugIndication` objects.
Example::
client = OpenTargetsClient()
indications = client.get_drug_indications("CHEMBL941")
for ind in indications:
print(ind.disease_name, ind.max_clinical_stage)
"""
data = self._gql.execute(DRUG_INDICATIONS_QUERY, {"chemblId": drug_id})
rows = (data.get("drug") or {}).get("indications", {}).get("rows") or []
return [_parse_drug_indication(r) for r in rows]
[docs]
def get_drug_chembl_ids(self, drug_id: str) -> list[str]:
"""Return all ChEMBL IDs linked to a drug via its cross-references.
The Open Targets ``Drug`` type stores external references in
``crossReferences`` (source + ids). This method returns only those
``ids`` belonging to sources that look like a ChEMBL reference — i.e.
any cross-reference whose ``ids`` list contains strings starting with
``CHEMBL``, plus the primary drug ID itself. Useful when a compound
has multiple ChEMBL entries (e.g. salt vs. free base).
Args:
drug_id: ChEMBL identifier like ``'CHEMBL941'`` (erlotinib) or
``'CHEMBL521'``.
Returns:
Deduplicated list of ChEMBL identifier strings, primary ID first.
Raises:
NotFoundError: If no drug matches *drug_id*.
Example::
client = OpenTargetsClient()
ids = client.get_drug_chembl_ids("CHEMBL941")
print(ids) # ['CHEMBL941', ...]
"""
data = self._gql.execute(DRUG_CHEMBL_IDS_QUERY, {"chemblId": drug_id})
raw = data.get("drug")
if not raw:
raise NotFoundError("drug", drug_id)
return _extract_chembl_ids(raw)
# ------------------------------------------------------------------
# Search
# ------------------------------------------------------------------
[docs]
def search(
self,
query_string: str,
entity_type: str | None = None,
limit: int = 10,
) -> list[SearchResult]:
"""Search the Open Targets Platform for targets, diseases, or drugs.
Performs a ranked free-text search. Each result carries the entity
type, stable ID, display name, and a relevance score. Useful for
resolving human-readable names to stable identifiers.
Args:
query_string: Free-text search string, e.g. ``'EGFR'``,
``'lung cancer'``, or ``'erlotinib'``.
entity_type: Restrict results to ``'target'``, ``'disease'``, or
``'drug'``. Pass ``None`` (default) to search all types.
limit: Maximum number of results to return (default ``10``).
Returns:
List of :class:`~opentargets.models.SearchResult` objects ordered
by relevance.
Example::
client = OpenTargetsClient()
results = client.search("lung cancer", entity_type="disease", limit=3)
for r in results:
print(r.id, r.name, r.entity)
"""
entity_names = [entity_type] if entity_type else []
data = self._gql.execute(
SEARCH_QUERY,
{
"queryString": query_string,
"entityNames": entity_names,
"page": {"index": 0, "size": limit},
},
)
hits: list[dict[str, Any]] = (data.get("search") or {}).get("hits") or []
return [SearchResult.model_validate(h) for h in hits]
# ------------------------------------------------------------------
# Association queries
# ------------------------------------------------------------------
[docs]
def get_associations(
self,
target_id: str,
disease_id: str,
) -> Association | None:
"""Return the association score between one specific target and disease.
Looks up the direct target–disease pair and returns its overall
association score together with per-datasource scores. Returns
``None`` if Open Targets does not record an association.
Args:
target_id: Ensembl gene ID like ``'ENSG00000146648'`` or HGNC
symbol like ``'EGFR'``.
disease_id: EFO ontology identifier like ``'EFO_0000311'``
(cancer) or ``'EFO_0003060'`` (lung carcinoma).
Returns:
An :class:`~opentargets.models.Association` with overall and
per-datasource scores, or ``None`` if no association exists.
Example::
client = OpenTargetsClient()
assoc = client.get_associations("EGFR", "EFO_0000311")
if assoc:
print(assoc.score) # e.g. 0.853
"""
ensembl_id = self._resolve_target(target_id)
rows = self._gql.paginate(
TARGET_ASSOCIATIONS_QUERY,
{"ensemblId": ensembl_id},
data_path=["target", "associatedDiseases"],
size=25,
)
match = next(
(r for r in rows if (r.get("disease") or {}).get("id") == disease_id),
None,
)
if match is None:
return None
symbol = ""
d = self._gql.execute(TARGET_QUERY, {"ensemblId": ensembl_id})
if d.get("target"):
symbol = d["target"].get("approvedSymbol", "")
return _parse_target_association(match, ensembl_id, symbol)
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
[docs]
def close(self) -> None:
"""Close the underlying HTTP connection pool."""
self._gql.close()
def __enter__(self) -> OpenTargetsClient:
return self
def __exit__(self, *_: object) -> None:
self.close()
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _resolve_target(self, target_id: str) -> str:
"""Return Ensembl ID for *target_id*, resolving gene symbols via search."""
if target_id.upper().startswith("ENSG"):
return target_id
cached = self._symbol_cache.get(target_id.upper())
if cached is not None:
return cast(str, cached)
results = self.search(target_id, entity_type="target", limit=1)
if not results:
raise NotFoundError("target", target_id)
ensembl_id = results[0].id
self._symbol_cache.set(target_id.upper(), ensembl_id)
return ensembl_id
# ------------------------------------------------------------------
# Parse helpers (keep client.py clean by handling field mapping here)
# ------------------------------------------------------------------
def _parse_target(raw: dict[str, Any]) -> Target:
descs: list[str] = raw.get("functionDescriptions") or []
return Target.model_validate(
{
"id": raw.get("id", ""),
"approvedSymbol": raw.get("approvedSymbol", ""),
"approvedName": raw.get("approvedName", ""),
"biotype": raw.get("biotype", ""),
"functionDescriptions": descs,
"description": descs[0] if descs else "",
}
)
def _parse_disease(raw: dict[str, Any]) -> Disease:
areas_raw: list[Any] = raw.get("therapeuticAreas") or []
areas = [a["name"] if isinstance(a, dict) else str(a) for a in areas_raw]
return Disease.model_validate(
{
"id": raw.get("id", ""),
"name": raw.get("name", ""),
"description": raw.get("description", ""),
"therapeuticAreas": areas,
"dbXRefs": raw.get("dbXRefs") or [],
}
)
def _parse_drug(raw: dict[str, Any]) -> Drug:
moa_obj = raw.get("mechanismsOfAction") or {}
moa_rows: list[dict[str, Any]] = moa_obj.get("rows") or []
moa_str = moa_rows[0].get("mechanismOfAction", "") if moa_rows else ""
return Drug.model_validate(
{
"id": raw.get("id", ""),
"name": raw.get("name", ""),
"drugType": raw.get("drugType", ""),
"mechanism_of_action": moa_str,
"synonyms": raw.get("synonyms") or [],
"tradeNames": raw.get("tradeNames") or [],
"maximumClinicalStage": raw.get("maximumClinicalStage"),
}
)
def _parse_target_association(
row: dict[str, Any],
target_id: str,
target_symbol: str,
) -> Association:
disease = row.get("disease") or {}
ds_scores = [
DatasourceScore(id=s["id"], score=s["score"])
for s in (row.get("datasourceScores") or [])
]
return Association(
target_id=target_id,
target_symbol=target_symbol,
disease_id=disease.get("id", ""),
disease_name=disease.get("name", ""),
score=row.get("score", 0.0),
datasource_scores=ds_scores,
)
def _parse_disease_association(
row: dict[str, Any],
disease_id: str,
disease_name: str,
) -> Association:
target = row.get("target") or {}
ds_scores = [
DatasourceScore(id=s["id"], score=s["score"])
for s in (row.get("datasourceScores") or [])
]
return Association(
target_id=target.get("id", ""),
target_symbol=target.get("approvedSymbol", ""),
disease_id=disease_id,
disease_name=disease_name,
score=row.get("score", 0.0),
datasource_scores=ds_scores,
)
def _parse_association_raw(raw: dict[str, Any]) -> Association:
target = raw.get("target") or {}
disease = raw.get("disease") or {}
ds_scores = [
DatasourceScore(id=s["id"], score=s["score"])
for s in (raw.get("datasourceScores") or [])
]
return Association(
target_id=target.get("id", ""),
target_symbol=target.get("approvedSymbol", ""),
disease_id=disease.get("id", ""),
disease_name=disease.get("name", ""),
score=raw.get("score", 0.0),
datasource_scores=ds_scores,
)
def _parse_drug_indication(row: dict[str, Any]) -> DrugIndication:
disease = row.get("disease") or {}
return DrugIndication.model_validate(
{
"disease_id": disease.get("id", ""),
"disease_name": disease.get("name", ""),
"maxClinicalStage": row.get("maxClinicalStage"),
}
)
def _parse_safety_liability(raw: dict[str, Any]) -> SafetyLiability:
biosamples = [
SafetyBiosample.model_validate(b) for b in (raw.get("biosamples") or [])
]
effects = [SafetyEffect.model_validate(e) for e in (raw.get("effects") or [])]
return SafetyLiability.model_validate(
{
"event": raw.get("event"),
"datasource": raw.get("datasource", ""),
"biosamples": biosamples,
"effects": effects,
"literature": raw.get("literature"),
"url": raw.get("url"),
"eventId": raw.get("eventId"),
}
)
def _parse_tissue_expression(raw: dict[str, Any]) -> TissueExpression:
tissue_raw = raw.get("tissue") or {}
rna_raw = raw.get("rna") or {}
protein_raw = raw.get("protein") or {}
return TissueExpression(
tissue=TissueInfo(
id=tissue_raw.get("id", ""),
label=tissue_raw.get("label", ""),
),
rna=RnaExpression(
value=rna_raw.get("value", 0.0),
level=rna_raw.get("level", 0),
zscore=rna_raw.get("zscore", 0),
unit=rna_raw.get("unit", ""),
),
protein=ProteinExpression(
level=protein_raw.get("level", -1),
reliability=protein_raw.get("reliability", False),
),
)
def _extract_chembl_ids(raw: dict[str, Any]) -> list[str]:
"""Return deduplicated ChEMBL IDs from a drug's crossReferences."""
seen: set[str] = set()
result: list[str] = []
primary_id = raw.get("id", "")
if primary_id:
seen.add(primary_id)
result.append(primary_id)
for ref in raw.get("crossReferences") or []:
for ref_id in ref.get("ids") or []:
if (
isinstance(ref_id, str)
and ref_id.upper().startswith("CHEMBL")
and ref_id not in seen
):
seen.add(ref_id)
result.append(ref_id)
return result
def _to_dataframe(associations: list[Association]) -> pd.DataFrame:
try:
import pandas as pd
except ImportError as exc:
raise ImportError(
"pandas is required for DataFrame output. "
"Install it with: pip install opentargets-py[pandas]"
) from exc
return pd.DataFrame([a.model_dump() for a in associations])