Skip to content

Modify/append column(s) of a DataFrame

Description

Add columns or modify existing ones with expressions. This is similar to dplyr::mutate() as it keeps unmentioned columns (unlike $select()).

However, unlike dplyr::mutate(), one cannot use new variables in subsequent expressions in the same $with_columns()call. For instance, if you create a variable x, you will only be able to use it in another $with_columns() or $select() call.

Usage

<DataFrame>$with_columns(...)

Arguments

\<dynamic-dots\> Name-value pairs of objects to be converted to polars expressions by the as_polars_expr() function. Characters are parsed as column names, other non-expression inputs are parsed as literals. Each name will be used as the expression name.

Value

A polars DataFrame

Examples

library("polars")

# Pass an expression to add it as a new column.
df <- pl$DataFrame(
  a = 1:4,
  b = c(0.5, 4, 10, 13),
  c = c(TRUE, TRUE, FALSE, TRUE),
)
df$with_columns((pl$col("a")^2)$alias("a^2"))
#> shape: (4, 4)
#> ┌─────┬──────┬───────┬──────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  │
#> │ --- ┆ ---  ┆ ---   ┆ ---  │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  │
#> ╞═════╪══════╪═══════╪══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 │
#> └─────┴──────┴───────┴──────┘
# Added columns will replace existing columns with the same name.
df$with_columns(a = pl$col("a")$cast(pl$Float64))
#> shape: (4, 3)
#> ┌─────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     │
#> │ --- ┆ ---  ┆ ---   │
#> │ f64 ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╡
#> │ 1.0 ┆ 0.5  ┆ true  │
#> │ 2.0 ┆ 4.0  ┆ true  │
#> │ 3.0 ┆ 10.0 ┆ false │
#> │ 4.0 ┆ 13.0 ┆ true  │
#> └─────┴──────┴───────┘
# Multiple columns can be added
df$with_columns(
  (pl$col("a")^2)$alias("a^2"),
  (pl$col("b") / 2)$alias("b/2"),
  (pl$col("c")$not())$alias("not c"),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
#> │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Name expression instead of `$alias()`
df$with_columns(
  `a^2` = pl$col("a")^2,
  `b/2` = pl$col("b") / 2,
  `not c` = pl$col("c")$not(),
)
#> shape: (4, 6)
#> ┌─────┬──────┬───────┬──────┬──────┬───────┐
#> │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
#> │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
#> │ i32 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
#> ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
#> │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
#> │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
#> │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
#> │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
#> └─────┴──────┴───────┴──────┴──────┴───────┘
# Expressions with multiple outputs can automatically be instantiated
# as Structs by enabling the experimental setting `POLARS_AUTO_STRUCTIFY`:
if (requireNamespace("withr", quietly = TRUE)) {
  withr::with_envvar(c(POLARS_AUTO_STRUCTIFY = "1"), {
    df$drop("c")$with_columns(
      diffs = pl$col("a", "b")$diff()$name$suffix("_diff"),
    )
  })
}
#> shape: (4, 3)
#> ┌─────┬──────┬─────────────┐
#> │ a   ┆ b    ┆ diffs       │
#> │ --- ┆ ---  ┆ ---         │
#> │ i32 ┆ f64  ┆ struct[2]   │
#> ╞═════╪══════╪═════════════╡
#> │ 1   ┆ 0.5  ┆ {null,null} │
#> │ 2   ┆ 4.0  ┆ {1,3.5}     │
#> │ 3   ┆ 10.0 ┆ {1,6.0}     │
#> │ 4   ┆ 13.0 ┆ {1,3.0}     │
#> └─────┴──────┴─────────────┘