Table Expressions¶
Create derived columns using table expressions and filters.
"""Use column expressions for derived values and boolean filtering."""
import emzed
peaks = emzed.Table.create_table(
["compound", "mz", "rt", "area"],
[str, emzed.MzType, emzed.RtType, float],
rows=[
["caffeine", 195.08765, 42.0, 2.40e6],
["glucose", 181.07066, 60.0, 8.10e5],
["arginine", 175.11895, 95.0, 1.20e6],
["alanine", 90.0550, 30.0, 4.10e5],
],
)
peaks.add_column("area_million", peaks.area / 1e6, float)
peaks.add_column("is_late", peaks.rt > 50.0, bool)
peaks.add_column(
"intensity_class",
(peaks.area > 1e6).then_else("high", "mid_or_low"),
str,
)
peaks.add_column(
"mz_rounded",
peaks.apply(lambda mz: round(mz, 2), peaks.mz),
float,
)
filtered = peaks.filter((peaks.rt > 40.0) & (peaks.area > 7e5))
filtered = filtered.sort_by("area", ascending=False)
print("full table with derived columns:")
print(peaks)
print()
print("filtered result:")
print(filtered.extract_columns("compound", "mz", "rt", "area_million", "is_late"))
full table with derived columns:
compound mz rt area area_million is_late intensity_class mz_rounded
str MzType RtType float float bool str float
-------- ----------- -------- -------------- ------------ ------- --------------- ----------
caffeine 195.087650 0.70 m 2400000.000000 2.400000 False high 195.090000
glucose 181.070660 1.00 m 810000.000000 0.810000 True mid_or_low 181.070000
arginine 175.118950 1.58 m 1200000.000000 1.200000 True high 175.120000
alanine 90.055000 0.50 m 410000.000000 0.410000 False mid_or_low 90.060000
filtered result:
compound mz rt area_million is_late
str MzType RtType float bool
-------- ----------- -------- ------------ -------
caffeine 195.087650 0.70 m 2.400000 False
arginine 175.118950 1.58 m 1.200000 True
glucose 181.070660 1.00 m 0.810000 True