Skip to content

Replace all values by different values

Description

This changes all the values in a column, either using a specific replacement or a default one. See $replace() to replace only a subset of values.

Usage

<Expr>$replace_strict(old, new, ..., default = NULL, return_dtype = NULL)

Arguments

old Value or vector of values to replace. Accepts expression input. Vectors are parsed as Series, other non-expression inputs are parsed as literals. Also accepts a list of values like list(old = new).
new Value or vector of values to replace by. Accepts expression input. Vectors are parsed as Series, other non-expression inputs are parsed as literals. Length must match the length of old or have length 1.
These dots are for future extensions and must be empty.
default Set values that were not replaced to this value. If NULL (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals.
return_dtype The data type of the resulting expression. If NULL (default), the data type is determined automatically based on the other inputs.

Details

The global string cache must be enabled when replacing categorical values.

Value

A polars expression

Examples

library("polars")

df <- pl$DataFrame(a = c(1, 2, 2, 3))

# "old" and "new" can take vectors of length 1 or of same length
df$with_columns(replaced = pl$col("a")$replace_strict(2, 100, default = 1))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0      │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 1.0      │
#> └─────┴──────────┘
df$with_columns(
  replaced = pl$col("a")$replace_strict(c(2, 3), c(100, 200), default = 1)
)
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1.0      │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 200.0    │
#> └─────┴──────────┘
# "old" can be a named list where names are values to replace, and values are
# the replacements
mapping <- list(`2` = 100, `3` = 200)
df$with_columns(replaced = pl$col("a")$replace_strict(mapping, default = -1))
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ f64      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ -1.0     │
#> │ 2.0 ┆ 100.0    │
#> │ 2.0 ┆ 100.0    │
#> │ 3.0 ┆ 200.0    │
#> └─────┴──────────┘
# By default, an error is raised if any non-null values were not replaced.
# Specify a default to set all values that were not matched.
tryCatch(
  df$with_columns(replaced = pl$col("a")$replace_strict(mapping)),
  error = function(e) print(e)
)
#> <error/rlang_error>
#> Error in `df$with_columns()`:
#> ! Evaluation failed in `$with_columns()`.
#> Caused by error:
#> ! Evaluation failed in `$collect()`.
#> Caused by error:
#> ! Invalid operation: incomplete mapping specified for `replace_strict`
#> 
#> Hint: Pass a `default` value to set unmapped values.
#> ---
#> Backtrace:
#>      ▆
#>   1. ├─base::tryCatch(...)
#>   2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   3. │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>   4. │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   5. └─df$with_columns(replaced = pl$col("a")$replace_strict(mapping))
#>   6.   ├─polars:::wrap(self$lazy()$with_columns(...)$collect(`_eager` = TRUE)) at neo-r-polars/R/dataframe-frame.R:428:3
#>   7.   │ └─rlang::try_fetch(...) at neo-r-polars/R/utils-wrap.R:3:3
#>   8.   │   ├─base::tryCatch(...)
#>   9.   │   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  10.   │   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  11.   │   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  12.   │   └─base::withCallingHandlers(...)
#>  13.   └─self$lazy()$with_columns(...)$collect(`_eager` = TRUE) at neo-r-polars/R/utils-wrap.R:3:3
#>  14.     ├─polars:::wrap(...) at neo-r-polars/R/lazyframe-frame.R:284:3
#>  15.     │ └─rlang::try_fetch(...) at neo-r-polars/R/utils-wrap.R:3:3
#>  16.     │   ├─base::tryCatch(...)
#>  17.     │   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  18.     │   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  19.     │   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  20.     │   └─base::withCallingHandlers(...)
#>  21.     └─ldf$collect(engine) at neo-r-polars/R/lazyframe-frame.R:331:5
#>  22.       └─polars:::.savvy_wrap_PlRDataFrame(...) at neo-r-polars/R/000-wrappers.R:3579:5
# one can specify the data type to return instead of automatically
# inferring it
df$with_columns(
  replaced = pl$col("a")$replace_strict(
    mapping,
    default = 1, return_dtype = pl$Int32
  )
)
#> shape: (4, 2)
#> ┌─────┬──────────┐
#> │ a   ┆ replaced │
#> │ --- ┆ ---      │
#> │ f64 ┆ i32      │
#> ╞═════╪══════════╡
#> │ 1.0 ┆ 1        │
#> │ 2.0 ┆ 100      │
#> │ 2.0 ┆ 100      │
#> │ 3.0 ┆ 200      │
#> └─────┴──────────┘
# "old", "new", and "default" can take Expr
df <- pl$DataFrame(a = c(1, 2, 2, 3), b = c(1.5, 2.5, 5, 1))
df$with_columns(
  replaced = pl$col("a")$replace_strict(
    old = pl$col("a")$max(),
    new = pl$col("b")$sum(),
    default = pl$col("b"),
  )
)
#> shape: (4, 3)
#> ┌─────┬─────┬──────────┐
#> │ a   ┆ b   ┆ replaced │
#> │ --- ┆ --- ┆ ---      │
#> │ f64 ┆ f64 ┆ f64      │
#> ╞═════╪═════╪══════════╡
#> │ 1.0 ┆ 1.5 ┆ 1.5      │
#> │ 2.0 ┆ 2.5 ┆ 2.5      │
#> │ 2.0 ┆ 5.0 ┆ 5.0      │
#> │ 3.0 ┆ 1.0 ┆ 10.0     │
#> └─────┴─────┴──────────┘