Start a group by operation
Description
Start a group by operation
Usage
<LazyFrame>$group_by(..., .maintain_order = FALSE)
Arguments
…
|
\<dynamic-dots \> Column(s) to group by. Accepts expression
input. Strings are parsed as column names.
|
.maintain_order
|
Ensure that the order of the groups is consistent with the input data.
This is slower than a default group by. Setting this to
TRUE blocks the possibility to run on the streaming engine.
|
Value
A lazy groupby
Examples
library("polars")
# Group by one column and call agg() to compute the grouped sum of another
# column.
lf <- pl$LazyFrame(
a = c("a", "b", "a", "b", "c"),
b = c(1, 2, 1, 3, 3),
c = c(5, 4, 3, 2, 1)
)
lf$group_by("a")$agg(pl$col("b")$sum())$collect()
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ a ┆ b │
#> │ --- ┆ --- │
#> │ str ┆ f64 │
#> ╞═════╪═════╡
#> │ b ┆ 5.0 │
#> │ a ┆ 2.0 │
#> │ c ┆ 3.0 │
#> └─────┴─────┘
# Set .maintain_order = TRUE to ensure the order of the groups is consistent
# with the input.
lf$group_by("a", .maintain_order = TRUE)$agg(pl$col("b")$sum())$collect()
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ a ┆ b │
#> │ --- ┆ --- │
#> │ str ┆ f64 │
#> ╞═════╪═════╡
#> │ a ┆ 2.0 │
#> │ b ┆ 5.0 │
#> │ c ┆ 3.0 │
#> └─────┴─────┘
# Group by multiple columns by passing a vector of column names.
lf$group_by(c("a", "b"))$agg(pl$col("c")$max())$collect()
#> shape: (4, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ b ┆ 3.0 ┆ 2.0 │
#> │ b ┆ 2.0 ┆ 4.0 │
#> │ c ┆ 3.0 ┆ 1.0 │
#> │ a ┆ 1.0 ┆ 5.0 │
#> └─────┴─────┴─────┘
# Or use positional arguments to group by multiple columns in the same way.
# Expressions are also accepted.
lf$
group_by("a", pl$col("b") / 2)$
agg(pl$col("c")$mean())$collect()
#> shape: (4, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ b ┆ 1.5 ┆ 2.0 │
#> │ c ┆ 1.5 ┆ 1.0 │
#> │ b ┆ 1.0 ┆ 4.0 │
#> │ a ┆ 0.5 ┆ 4.0 │
#> └─────┴─────┴─────┘