Skip to content

Filter the rows in the LazyFrame based on a predicate expression

Description

The original order of the remaining rows is preserved. Rows where the filter does not evaluate to TRUE are discarded, including nulls.

Usage

<LazyFrame>$filter(...)

Arguments

\<dynamic-dots\> Expression that evaluates to a boolean Series.

Value

A polars LazyFrame

Examples

library("polars")

lf <- pl$LazyFrame(
  foo = c(1, 2, 3, NA, 4, NA, 0),
  bar = c(6, 7, 8, NA, NA, 9, 0),
  ham = c("a", "b", "c", NA, "d", "e", "f")
)

# Filter on one condition
lf$filter(pl$col("foo") > 1)$collect()
#> shape: (3, 3)
#> ┌─────┬──────┬─────┐
#> │ foo ┆ bar  ┆ ham │
#> │ --- ┆ ---  ┆ --- │
#> │ f64 ┆ f64  ┆ str │
#> ╞═════╪══════╪═════╡
#> │ 2.0 ┆ 7.0  ┆ b   │
#> │ 3.0 ┆ 8.0  ┆ c   │
#> │ 4.0 ┆ null ┆ d   │
#> └─────┴──────┴─────┘
# Filter on multiple conditions
lf$filter((pl$col("foo") < 3) & (pl$col("ham") == "a"))$collect()
#> shape: (1, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ ham │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 6.0 ┆ a   │
#> └─────┴─────┴─────┘
# Filter on an OR condition
lf$filter((pl$col("foo") == 1) | (pl$col("ham") == " c"))$collect()
#> shape: (1, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ ham │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 6.0 ┆ a   │
#> └─────┴─────┴─────┘
# Filter by comparing two columns against each other
lf$filter(pl$col("foo") == pl$col("bar"))$collect()
#> shape: (1, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ ham │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 0.0 ┆ 0.0 ┆ f   │
#> └─────┴─────┴─────┘
lf$filter(pl$col("foo") != pl$col("bar"))$collect()
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ ham │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 6.0 ┆ a   │
#> │ 2.0 ┆ 7.0 ┆ b   │
#> │ 3.0 ┆ 8.0 ┆ c   │
#> └─────┴─────┴─────┘
# Notice how the row with null values is filtered out$ In order to keep the
# rows with nulls, use:
lf$filter(pl$col("foo")$ne_missing(pl$col("bar")))$collect()
#> shape: (5, 3)
#> ┌──────┬──────┬─────┐
#> │ foo  ┆ bar  ┆ ham │
#> │ ---  ┆ ---  ┆ --- │
#> │ f64  ┆ f64  ┆ str │
#> ╞══════╪══════╪═════╡
#> │ 1.0  ┆ 6.0  ┆ a   │
#> │ 2.0  ┆ 7.0  ┆ b   │
#> │ 3.0  ┆ 8.0  ┆ c   │
#> │ 4.0  ┆ null ┆ d   │
#> │ null ┆ 9.0  ┆ e   │
#> └──────┴──────┴─────┘