Modify/append column(s) of a DataFrame
Description
This will run all expression sequentially instead of in parallel. Use this only when the work per expression is cheap.
Add columns or modify existing ones with expressions. This is similar to
dplyr::mutate()
as it keeps unmentioned columns (unlike
$select()
).
However, unlike dplyr::mutate()
, one cannot use new
variables in subsequent expressions in the same
$with_columns_seq()
call. For
instance, if you create a variable x
, you will only be able
to use it in another
$with_columns_seq()
or
$select()
call.
Usage
<DataFrame>$with_columns_seq(...)
Arguments
…
|
\<dynamic-dots \> Name-value pairs of objects to be
converted to polars expressions by the as_polars_expr()
function. Characters are parsed as column names, other non-expression
inputs are parsed as literals. Each name will be used as the expression
name.
|
Value
A polars DataFrame
Examples
library("polars")
# Pass an expression to add it as a new column.
df <- pl$DataFrame(
a = 1:4,
b = c(0.5, 4, 10, 13),
c = c(TRUE, TRUE, FALSE, TRUE),
)
df$with_columns_seq((pl$col("a")^2)$alias("a^2"))
#> shape: (4, 4)
#> ┌─────┬──────┬───────┬──────┐
#> │ a ┆ b ┆ c ┆ a^2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 │
#> ╞═════╪══════╪═══════╪══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 │
#> └─────┴──────┴───────┴──────┘
# Added columns will replace existing columns with the same name.
df$with_columns_seq(a = pl$col("a")$cast(pl$Float64))
#> shape: (4, 3)
#> ┌─────┬──────┬───────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╡
#> │ 1.0 ┆ 0.5 ┆ true │
#> │ 2.0 ┆ 4.0 ┆ true │
#> │ 3.0 ┆ 10.0 ┆ false │
#> │ 4.0 ┆ 13.0 ┆ true │
#> └─────┴──────┴───────┘
# Multiple columns can be added
df$with_columns_seq(
(pl$col("a")^2)$alias("a^2"),
(pl$col("b") / 2)$alias("b/2"),
(pl$col("c")$not())$alias("not c"),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Name expression instead of `$alias()`
df$with_columns_seq(
`a^2` = pl$col("a")^2,
`b/2` = pl$col("b") / 2,
`not c` = pl$col("c")$not(),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
#> │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
#> │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
#> │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Expressions with multiple outputs can automatically be instantiated
# as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
if (requireNamespace("withr", quietly = TRUE)) {
withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
df$drop("c")$with_columns_seq(
diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
)
})
}
#> shape: (4, 3)
#> ┌─────┬──────┬─────────────┐
#> │ a ┆ b ┆ diffs │
#> │ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ struct[2] │
#> ╞═════╪══════╪═════════════╡
#> │ 1 ┆ 0.5 ┆ {null,null} │
#> │ 2 ┆ 4.0 ┆ {1,3.5} │
#> │ 3 ┆ 10.0 ┆ {1,6.0} │
#> │ 4 ┆ 13.0 ┆ {1,3.0} │
#> └─────┴──────┴─────────────┘