Skip to content

Table Manipulations

Join tables and compute grouped summaries.

"""Show joins, grouped summaries, and column management for tables."""

import emzed

# Peak/intensity table measured per sample and compound.
intensities = emzed.Table.create_table(
    ["sample_id", "compound", "area"],
    [str, str, float],
    rows=[
        ["S1", "glucose", 8.10e5],
        ["S1", "lactate", 2.50e5],
        ["S2", "glucose", 6.40e5],
        ["S2", "lactate", 3.10e5],
    ],
)
meta = emzed.Table.create_table(
    ["sample_id", "condition"],
    [str, str],
    rows=[["S1", "control"], ["S2", "treated"]],
)

joined = intensities.join(meta, intensities.sample_id == meta.sample_id)

# Join keeps both same-named key columns:
# left "sample_id" and right "sample_id__0".
# Suffix "__0" denotes the first collided column from
# the right-hand table. After rename_postfixes:
# sample_id__0 -> sample_id_meta,
# condition__0 -> condition_meta.
joined.rename_postfixes(__0="_meta")

# Grouped sum is broadcast back to every row within each sample_id group.
joined.add_column(
    "total_area_per_sample",
    joined.group_by(joined.sample_id).sum(joined.area),
    float,
)

# Row-wise ratio against the grouped total.
joined.add_column(
    "relative_area",
    joined.area / joined.total_area_per_sample,
    float,
)

summary = joined.extract_columns(
    "sample_id",
    "condition_meta",
    "compound",
    "area",
    "total_area_per_sample",
    "relative_area",
).sort_by("sample_id", "compound")

print(summary)
sample_id  condition_meta  compound  area           total_area_per_sample  relative_area
str        str             str       float          float                  float
---------  --------------  --------  -------------  ---------------------  -------------
S1         control         glucose   810000.000000         1060000.000000       0.764151
S1         control         lactate   250000.000000         1060000.000000       0.235849
S2         treated         glucose   640000.000000          950000.000000       0.673684
S2         treated         lactate   310000.000000          950000.000000       0.326316