Skip to content

Pubchem Lookup

Query compounds from the local PubChem cache exposed as emzed.db.pubchem.

"""Query the local PubChem cache table if available."""

import emzed

pubchem = None
try:
    pubchem = emzed.db.pubchem
except IOError as exc:
    print("local PubChem cache not available")
    print("run emzed.db.update_pubchem() once to download it")
    print(f"details: {exc}")

if pubchem is not None:
    print(f"loaded local pubchem table with {len(pubchem)} rows")
    print()

    # Example: lookup compounds by molecular formula (caffeine).
    hits = pubchem.filter(pubchem.mf == "C8H10N4O2").sort_by("cid")
    view = hits.extract_columns("cid", "mf", "mw", "iupac", "is_in_kegg", "is_in_hmdb")
    print(view[:5])
loaded local pubchem table with 243175 rows



cid   mf         mw         iupac                            is_in_kegg  is_in_hmdb
str   str        float      str                              bool        bool
----  ---------  ---------  -------------------------------  ----------  ----------
1676  C8H10N4O2   194.1900  3-propyl-7H-purine-2,6-dione     True        True
2519  C8H10N4O2   194.1900  1,3,7-trimethylpurine-2,6-dione  True        True