Skip to content

Modify/append column(s) of a DataFrame

Description

This will run all expression sequentially instead of in parallel. Use this only when the work per expression is cheap.

Add columns or modify existing ones with expressions. This is similar to dplyr::mutate() as it keeps unmentioned columns (unlike $select()).

However, unlike dplyr::mutate(), one cannot use new variables in subsequent expressions in the same $with_columns_seq()call. For instance, if you create a variable x, you will only be able to use it in another $with_columns_seq() or $select() call.

Usage

<DataFrame>$with_columns_seq(...)

Arguments

\<dynamic-dots\> Name-value pairs of objects to be converted to polars expressions by the as_polars_expr() function. Characters are parsed as column names, other non-expression inputs are parsed as literals. Each name will be used as the expression name.

Value

A polars DataFrame

Examples

library("polars")

# Pass an expression to add it as a new column.
df <- pl$DataFrame(
  a = 1:4,
  b = c(0.5, 4, 10, 13),
  c = c(TRUE, TRUE, FALSE, TRUE),
)
df$with_columns_seq((pl$col("a")^2)$alias("a^2"))
#> shape: (4, 4)
#> ┌─────┬──────┬───────┬──────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  │
#> │ --- ┆ ---  ┆ ---   ┆ ---  │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  │
#> ╞═════╪══════╪═══════╪══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 │
#> └─────┴──────┴───────┴──────┘
# Added columns will replace existing columns with the same name.
df$with_columns_seq(a = pl$col("a")$cast(pl$Float64))
#> shape: (4, 3)
#> ┌─────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     │
#> │ --- ┆ ---  ┆ ---   │
#> │ f64 ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╡
#> │ 1.0 ┆ 0.5  ┆ true  │
#> │ 2.0 ┆ 4.0  ┆ true  │
#> │ 3.0 ┆ 10.0 ┆ false │
#> │ 4.0 ┆ 13.0 ┆ true  │
#> └─────┴──────┴───────┘
# Multiple columns can be added
df$with_columns_seq(
  (pl$col("a")^2)$alias("a^2"),
  (pl$col("b") / 2)$alias("b/2"),
  (pl$col("c")$not())$alias("not c"),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
#> │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Name expression instead of `$alias()`
df$with_columns_seq(
  `a^2` = pl$col("a")^2,
  `b/2` = pl$col("b") / 2,
  `not c` = pl$col("c")$not(),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
#> │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Expressions with multiple outputs can automatically be instantiated
# as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
if (requireNamespace("withr", quietly = TRUE)) {
  withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
    df$drop("c")$with_columns_seq(
      diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
    )
  })
}
#> shape: (4, 3)
#> ┌─────┬──────┬─────────────┐
#> │ a   ┆ b    ┆ diffs       │
#> │ --- ┆ ---  ┆ ---         │
#> │ i32 ┆ f64  ┆ struct[2]   │
#> ╞═════╪══════╪═════════════╡
#> │ 1   ┆ 0.5  ┆ {null,null} │
#> │ 2   ┆ 4.0  ┆ {1,3.5}     │
#> │ 3   ┆ 10.0 ┆ {1,6.0}     │
#> │ 4   ┆ 13.0 ┆ {1,3.0}     │
#> └─────┴──────┴─────────────┘