Summary statistics for a DataFrame.
Description
Summary statistics for a DataFrame.
Usage
<DataFrame>$describe(
percentiles = c(0.25, 0.5, 0.75),
...,
interpolation = c("nearest", "higher", "lower", "midpoint", "linear")
)
Arguments
percentiles
|
One or more percentiles to include in the summary statistics. All values
must be in the range \[0; 1\] .
|
…
|
These dots are for future extensions and must be empty. |
interpolation
|
Interpolation method for computing quantiles. Must be one of
“nearest” , “higher” , “lower” ,
“midpoint” , or “linear” .
|
Value
A polars DataFrame
Examples
library("polars")
df <- pl$DataFrame(
int = 1:3,
float = c(0.5, NA, 2.5),
string = c(letters[1:2], NA),
date = c(as.Date("2024-01-20"), as.Date("2024-01-21"), NA),
cat = factor(c(letters[1:2], NA)),
bool = c(TRUE, FALSE, NA)
)
df
#> shape: (3, 6)
#> ┌─────┬───────┬────────┬────────────┬──────┬───────┐
#> │ int ┆ float ┆ string ┆ date ┆ cat ┆ bool │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ i32 ┆ f64 ┆ str ┆ date ┆ cat ┆ bool │
#> ╞═════╪═══════╪════════╪════════════╪══════╪═══════╡
#> │ 1 ┆ 0.5 ┆ a ┆ 2024-01-20 ┆ a ┆ true │
#> │ 2 ┆ null ┆ b ┆ 2024-01-21 ┆ b ┆ false │
#> │ 3 ┆ 2.5 ┆ null ┆ null ┆ null ┆ null │
#> └─────┴───────┴────────┴────────────┴──────┴───────┘
#> shape: (9, 7)
#> ┌────────────┬─────┬──────────┬────────┬─────────────────────────┬──────┬──────┐
#> │ statistic ┆ int ┆ float ┆ string ┆ date ┆ cat ┆ bool │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ str ┆ str ┆ str ┆ f64 │
#> ╞════════════╪═════╪══════════╪════════╪═════════════════════════╪══════╪══════╡
#> │ count ┆ 3.0 ┆ 2.0 ┆ 2 ┆ 2 ┆ 2 ┆ 2.0 │
#> │ null_count ┆ 0.0 ┆ 1.0 ┆ 1 ┆ 1 ┆ 1 ┆ 1.0 │
#> │ mean ┆ 2.0 ┆ 1.5 ┆ null ┆ 2024-01-20 12:00:00.000 ┆ null ┆ 0.5 │
#> │ std ┆ 1.0 ┆ 1.414214 ┆ null ┆ null ┆ null ┆ null │
#> │ min ┆ 1.0 ┆ 0.5 ┆ a ┆ 2024-01-20 ┆ a ┆ 0.0 │
#> │ 25% ┆ 2.0 ┆ 0.5 ┆ null ┆ 2024-01-20 ┆ null ┆ null │
#> │ 50% ┆ 2.0 ┆ 2.5 ┆ null ┆ 2024-01-21 ┆ null ┆ null │
#> │ 75% ┆ 3.0 ┆ 2.5 ┆ null ┆ 2024-01-21 ┆ null ┆ null │
#> │ max ┆ 3.0 ┆ 2.5 ┆ b ┆ 2024-01-21 ┆ b ┆ 1.0 │
#> └────────────┴─────┴──────────┴────────┴─────────────────────────┴──────┴──────┘
# Customize which percentiles are displayed, applying linear interpolation:
df$describe(
percentiles = c(0.1, 0.3, 0.5, 0.7, 0.9),
interpolation = "linear"
)
#> shape: (11, 7)
#> ┌────────────┬─────┬──────────┬────────┬─────────────────────────┬──────┬──────┐
#> │ statistic ┆ int ┆ float ┆ string ┆ date ┆ cat ┆ bool │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ str ┆ str ┆ str ┆ f64 │
#> ╞════════════╪═════╪══════════╪════════╪═════════════════════════╪══════╪══════╡
#> │ count ┆ 3.0 ┆ 2.0 ┆ 2 ┆ 2 ┆ 2 ┆ 2.0 │
#> │ null_count ┆ 0.0 ┆ 1.0 ┆ 1 ┆ 1 ┆ 1 ┆ 1.0 │
#> │ mean ┆ 2.0 ┆ 1.5 ┆ null ┆ 2024-01-20 12:00:00.000 ┆ null ┆ 0.5 │
#> │ std ┆ 1.0 ┆ 1.414214 ┆ null ┆ null ┆ null ┆ null │
#> │ min ┆ 1.0 ┆ 0.5 ┆ a ┆ 2024-01-20 ┆ a ┆ 0.0 │
#> │ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │
#> │ 30% ┆ 1.6 ┆ 1.1 ┆ null ┆ 2024-01-20 ┆ null ┆ null │
#> │ 50% ┆ 2.0 ┆ 1.5 ┆ null ┆ 2024-01-20 ┆ null ┆ null │
#> │ 70% ┆ 2.4 ┆ 1.9 ┆ null ┆ 2024-01-20 ┆ null ┆ null │
#> │ 90% ┆ 2.8 ┆ 2.3 ┆ null ┆ 2024-01-20 ┆ null ┆ null │
#> │ max ┆ 3.0 ┆ 2.5 ┆ b ┆ 2024-01-21 ┆ b ┆ 1.0 │
#> └────────────┴─────┴──────────┴────────┴─────────────────────────┴──────┴──────┘