Modify/append column(s) of a LazyFrame
Description
Add columns or modify existing ones with expressions. This is similar to
dplyr::mutate()
as it keeps unmentioned columns (unlike
$select()
).
However, unlike dplyr::mutate()
, one cannot use new
variables in subsequent expressions in the same
$with_columns()
call. For
instance, if you create a variable x
, you will only be able
to use it in another
$with_columns()
or
$select()
call.
Usage
<LazyFrame>$with_columns(...)
Arguments
…
|
\<dynamic-dots \> Name-value pairs of objects to be
converted to polars expressions by the as_polars_expr()
function. Characters are parsed as column names, other non-expression
inputs are parsed as literals. Each name will be used as the expression
name.
|
Value
A polars LazyFrame
Examples
library("polars")
# Pass an expression to add it as a new column.
lf <- pl$LazyFrame(
a = 1:4,
b = c(0.5, 4, 10, 13),
c = c(TRUE, TRUE, FALSE, TRUE),
)
lf$with_columns((pl$col("a")^2)$alias("a^2"))$collect()
#> shape: (4, 4)
#> ┌─────┬──────┬───────┬──────┐
#> │ a ┆ b ┆ c ┆ a^2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 │
#> ╞═════╪══════╪═══════╪══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 │
#> └─────┴──────┴───────┴──────┘
# Added columns will replace existing columns with the same name.
lf$with_columns(a = pl$col("a")$cast(pl$Float64))$collect()
#> shape: (4, 3)
#> ┌─────┬──────┬───────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╡
#> │ 1.0 ┆ 0.5 ┆ true │
#> │ 2.0 ┆ 4.0 ┆ true │
#> │ 3.0 ┆ 10.0 ┆ false │
#> │ 4.0 ┆ 13.0 ┆ true │
#> └─────┴──────┴───────┘
# Multiple columns can be added
lf$with_columns(
(pl$col("a")^2)$alias("a^2"),
(pl$col("b") / 2)$alias("b/2"),
(pl$col("c")$not())$alias("not c"),
)$collect()
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Name expression instead of `$alias()`
lf$with_columns(
`a^2` = pl$col("a")^2,
`b/2` = pl$col("b") / 2,
`not c` = pl$col("c")$not(),
)$collect()
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Expressions with multiple outputs can automatically be instantiated
# as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
if (requireNamespace("withr", quietly = TRUE)) {
withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
lf$drop("c")$with_columns(
diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
)$collect()
})
}
#> shape: (4, 3)
#> ┌─────┬──────┬─────────────┐
#> │ a ┆ b ┆ diffs │
#> │ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ struct[2] │
#> ╞═════╪══════╪═════════════╡
#> │ 1 ┆ 0.5 ┆ {null,null} │
#> │ 2 ┆ 4.0 ┆ {1,3.5} │
#> │ 3 ┆ 10.0 ┆ {1,6.0} │
#> │ 4 ┆ 13.0 ┆ {1,3.0} │
#> └─────┴──────┴─────────────┘