nazq · September 29, 2025 18:57 · GreatRedOne99 · Dec 5, 2025 · nazq · Dec 5, 2025
diff --git a/describe.rs b/describe.rs
 //! DataFrame describe() functionality for Polars in Rust
 //!
 //! This module provides a `describe()` method for both DataFrame and LazyFrame,
 //! similar to pandas/polars describe() in Python. It computes summary statistics
 //! including count, null_count, mean, std, min, percentiles, and max.
 //!
 //! The implementation follows the Python polars pattern closely, avoiding
 //! unnecessary data collection when working with LazyFrames.

 use anyhow::Result;
 use polars::prelude::*;
 use std::collections::HashMap;

 /// Trait for types that can produce descriptive statistics
 pub trait Describable {
    /// Compute descriptive statistics
    ///
    /// # Arguments
    /// * `percentiles` - Optional vector of percentiles to compute (values between 0.0 and 1.0)
    ///                   Defaults to [0.25, 0.50, 0.75] if None
    ///
    /// # Returns
    /// A DataFrame containing statistics for each column:
    /// - count: number of non-null values
    /// - null_count: number of null values
    /// - mean: average value (numeric/temporal/boolean columns)
    /// - std: standard deviation (numeric columns only)
    /// - min: minimum value
    /// - percentiles: requested percentiles
    /// - max: maximum value
    ///
    /// # Example
    /// ```rust
    /// use polars::prelude::*;
    /// use your_crate::Describable;
    ///
    /// let df = df! {
    ///     "ints" => [1, 2, 3, 4, 5],
    ///     "floats" => [1.0, 2.5, 3.0, 4.5, 5.0],
    ///     "strings" => ["a", "b", "c", "d", "e"],
    /// }?;
    ///
    /// let stats = df.describe(None)?;
    /// println!("{}", stats);
    /// ```
    fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame>;
 }

 /// Implementation for DataFrame
 impl Describable for DataFrame {
    fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
        // Convert to LazyFrame and use the efficient implementation
        let lf = self.clone().lazy();
        describe_lazy_impl(&lf, percentiles)
    }
 }

 /// Implementation for LazyFrame
 impl Describable for LazyFrame {
    fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
        describe_lazy_impl(self, percentiles)
    }
 }

 /// Internal implementation that works purely with LazyFrame
 /// This follows the same pattern as the Python implementation
 #[allow(clippy::too_many_lines)]
 fn describe_lazy_impl(lazy_frame: &LazyFrame, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
    use polars::lazy::dsl;
    use polars::prelude::{QuantileMethod, NULL};

    // Get schema without collecting the data
    let mut lf_mut = lazy_frame.clone();
    let schema = lf_mut.collect_schema()?;

    if schema.is_empty() {
        return Err(anyhow::anyhow!(
            "cannot describe a LazyFrame that has no columns"
        ));
    }

    // Default percentiles if not provided
    let percentiles = percentiles.unwrap_or_else(|| vec![0.25, 0.50, 0.75]);

    // Build statistic row names (metrics)
    let mut metrics = vec![
        "count".to_string(),
        "null_count".to_string(),
        "mean".to_string(),
        "std".to_string(),
        "min".to_string(),
    ];
    for p in &percentiles {
        #[allow(clippy::cast_possible_truncation)]
        metrics.push(format!("{}%", (p * 100.0) as i32));
    }
    metrics.push("max".to_string());

    // Helper to check if we skip min/max
    let skip_minmax = |dtype: &DataType| -> bool {
        dtype.is_nested()
            || matches!(
                dtype,
                DataType::Categorical(..) | DataType::Null | DataType::Unknown(_)
            )
    };

    // Build all metric expressions for all columns in a single pass
    let mut metric_exprs = Vec::new();

    // Loop over columns and datatypes (like Python: for c, dtype in schema.items())
    for (col_name, dtype) in schema.iter() {
        let col_name_str = col_name.to_string();
        let col = dsl::col(&col_name_str);

        // Determine if numeric or temporal
        let is_numeric = dtype.is_numeric();
        let is_temporal = !is_numeric && dtype.is_temporal();

        // Count expressions - for all columns
        let count_expr = col.clone().count().alias(format!("count:{col_name_str}"));
        let null_count_expr = col
            .clone()
            .null_count()
            .alias(format!("null_count:{col_name_str}"));

        // Mean - for temporal, numeric, or boolean
        let mean_expr = if is_temporal || is_numeric || dtype == &DataType::Boolean {
            if dtype == &DataType::Boolean {
                col.clone().cast(DataType::Float64).mean()
            } else {
                col.clone().mean()
            }
        } else {
            dsl::lit(NULL).cast(DataType::Float64)
        };
        let mean_expr = mean_expr.alias(format!("mean:{col_name_str}"));

        // Standard deviation - only for numeric
        let std_expr = if is_numeric {
            col.clone().std(1) // ddof=1 for sample std
        } else {
            dsl::lit(NULL).cast(DataType::Float64)
        };
        let std_expr = std_expr.alias(format!("std:{col_name_str}"));

        // Min/Max - based on skip_minmax
        let min_expr = if skip_minmax(dtype) {
            dsl::lit(NULL).cast(DataType::Float64)
        } else {
            col.clone().min()
        };
        let min_expr = min_expr.alias(format!("min:{col_name_str}"));

        let max_expr = if skip_minmax(dtype) {
            dsl::lit(NULL).cast(DataType::Float64)
        } else {
            col.clone().max()
        };
        let max_expr = max_expr.alias(format!("max:{col_name_str}"));

        // Percentiles - for numeric or temporal
        let mut pct_exprs = Vec::new();
        for (i, p) in percentiles.iter().enumerate() {
            let pct_expr = if is_numeric || is_temporal {
                // For temporal types, we might need special handling
                // For now, just use quantile directly
                col.clone().quantile(dsl::lit(*p), QuantileMethod::Linear)
            } else {
                dsl::lit(NULL).cast(DataType::Float64)
            };
            pct_exprs.push(pct_expr.alias(format!("{p}:{i}:{col_name_str}")));
        }

        // Add all expressions for this column
        metric_exprs.push(count_expr);
        metric_exprs.push(null_count_expr);
        metric_exprs.push(mean_expr);
        metric_exprs.push(std_expr);
        metric_exprs.push(min_expr);
        metric_exprs.extend(pct_exprs);
        metric_exprs.push(max_expr);
    }

    // Execute all aggregations in a single pass
    let df_metrics = lazy_frame.clone().select(metric_exprs).collect()?;

    // Reshape the wide result into the final format
    let n_metrics = metrics.len();
    let mut result_series = Vec::new();

    // Add the statistic column first
    result_series.push(Series::new(
        "statistic".into(),
        metrics.clone(),
    ));

    // Process each column's metrics
    for (col_name, dtype) in schema.iter() {
        let col_name_str = col_name.to_string();
        let mut col_values = Vec::new();

        // Extract values for this column from the metrics DataFrame
        // The metrics are in groups of n_metrics per column
        // let base_idx = idx * n_metrics;  // Not needed with column name lookup

        // Helper to format values based on type
        let is_numeric_result = dtype.is_numeric()
            || dtype.is_nested()
            || matches!(dtype, DataType::Null | DataType::Boolean);

        // Extract each metric for this column
        for metric_idx in 0..n_metrics {
            // let _col_idx = base_idx + metric_idx;  // Not needed
            let metric_name = match metric_idx {
                0 => format!("count:{col_name_str}"),
                1 => format!("null_count:{col_name_str}"),
                2 => format!("mean:{col_name_str}"),
                3 => format!("std:{col_name_str}"),
                4 => format!("min:{col_name_str}"),
                i if i < n_metrics - 1 => {
                    // Percentile
                    let pct_idx = i - 5;
                    let p = &percentiles[pct_idx];
                    format!("{p}:{pct_idx}:{col_name_str}")
                }
                _ => format!("max:{col_name_str}"),
            };

            // Get the value from df_metrics
            if let Ok(val) = df_metrics.column(&metric_name)?.get(0) {
                // Format based on type and metric
                let formatted = if val.is_null() {
                    "null".to_string()
                } else if metric_idx <= 1 {
                    // count and null_count - always as integer
                    format!("{val}")
                } else if is_numeric_result && (metric_idx == 2 || metric_idx == 3) {
                    // mean and std for numeric - format with decimals
                    format!("{val:.6}")
                } else if dtype == &DataType::Boolean
                    && (metric_idx == 4 || metric_idx == n_metrics - 1)
                {
                    // min/max for boolean
                    if metric_idx == 4 {
                        "false".to_string()
                    } else {
                        "true".to_string()
                    }
                } else {
                    format!("{val}")
                };

                col_values.push(formatted);
            } else {
                col_values.push("null".to_string());
            }
        }

        // Add this column's values to the result
        result_series.push(Series::new(col_name_str.into(), col_values));
    }

    DataFrame::new(result_series).map_err(Into::into)
 }

 #[cfg(test)]
 mod tests {
    use super::*;

    #[test]
    fn test_describe_numeric() -> Result<()> {
        let df = df! {
            "ints" => [1, 2, 3, 4, 5],
            "floats" => [1.0, 2.0, 3.0, 4.0, 5.0],
        }?;

        let stats = df.describe(None)?;

        // Check shape
        assert_eq!(stats.shape(), (9, 3)); // 9 stats x 3 columns (statistic + 2 data cols)

        // Check that statistic column exists
        assert!(stats.column("statistic").is_ok());

        Ok(())
    }

    #[test]
    fn test_describe_with_custom_percentiles() -> Result<()> {
        let df = df! {
            "values" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        }?;

        let stats = df.describe(Some(vec![0.1, 0.5, 0.9]))?;

        // Check that we have the right number of rows
        // count, null_count, mean, std, min, 10%, 50%, 90%, max = 9 rows
        assert_eq!(stats.height(), 9);

        Ok(())
    }

    #[test]
    fn test_describe_mixed_types() -> Result<()> {
        let df = df! {
            "numbers" => [1, 2, 3],
            "strings" => ["a", "b", "c"],
            "bools" => [true, false, true],
        }?;

        let stats = df.describe(None)?;

        // Should not panic and should return stats for all columns
        assert_eq!(stats.width(), 4); // statistic + 3 data columns

        Ok(())
    }

    #[test]
    fn test_describe_lazy_frame() -> Result<()> {
        let df = df! {
            "a" => [1, 2, 3, 4, 5],
            "b" => [10.0, 20.0, 30.0, 40.0, 50.0],
        }?;

        let lf = df.lazy();
        let stats = lf.describe(None)?;

        // Should work with LazyFrame without collecting first
        assert_eq!(stats.shape(), (9, 3));

        Ok(())
    }
 }
diff --git a/describe_example.rs b/describe_example.rs
 //! Example usage of the describe functionality

 use anyhow::Result;
 use polars::prelude::*;

 // Include the describe module
 mod describe;
 use describe::Describable;

 fn main() -> Result<()> {
    // Create a sample DataFrame with different data types
    let df = df! {
        "integers" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        "floats" => [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5],
        "strings" => ["apple", "banana", "cherry", "date", "elderberry",
                      "fig", "grape", "honeydew", "kiwi", "lemon"],
        "booleans" => [true, false, true, false, true,
                       false, true, false, true, false],
        "nullables" => [Some(1), None, Some(3), None, Some(5),
                        Some(6), None, Some(8), Some(9), Some(10)],
    }?;

    println!("Original DataFrame:");
    println!("{}\n", df);

    // Example 1: Basic describe with default percentiles (25%, 50%, 75%)
    println!("=== Basic describe() with default percentiles ===");
    let basic_stats = df.describe(None)?;
    println!("{}\n", basic_stats);

    // Example 2: Custom percentiles
    println!("=== describe() with custom percentiles (10%, 50%, 90%) ===");
    let custom_stats = df.describe(Some(vec![0.1, 0.5, 0.9]))?;
    println!("{}\n", custom_stats);

    // Example 3: Using with LazyFrame for efficiency
    println!("=== Using with LazyFrame (no unnecessary data collection) ===");
    let lazy_df = df.lazy();
    let lazy_stats = lazy_df.describe(Some(vec![0.25, 0.5, 0.75]))?;
    println!("{}\n", lazy_stats);

    // Example 4: Demonstrating with a larger dataset
    println!("=== Large dataset example ===");
    let large_df = df! {
        "values" => (0..10000).collect::<Vec<_>>(),
        "randoms" => (0..10000).map(|i| (i as f64) * 0.1).collect::<Vec<_>>(),
    }?;

    let large_stats = large_df.describe(Some(vec![0.05, 0.25, 0.5, 0.75, 0.95]))?;
    println!("Stats for 10,000 row DataFrame:");
    println!("{}\n", large_stats);

    // Example 5: Working with time series data
    use chrono::NaiveDate;

    let dates_df = df! {
        "dates" => vec![
            NaiveDate::from_ymd_opt(2024, 1, 1),
            NaiveDate::from_ymd_opt(2024, 2, 1),
            NaiveDate::from_ymd_opt(2024, 3, 1),
            NaiveDate::from_ymd_opt(2024, 4, 1),
            NaiveDate::from_ymd_opt(2024, 5, 1),
        ].into_iter().flatten().collect::<Vec<_>>(),
        "values" => [100, 150, 120, 180, 200],
    }?;

    println!("=== Time series data ===");
    let time_stats = dates_df.describe(None)?;
    println!("{}", time_stats);

    Ok(())
 }
diff --git a/describe_README.md b/describe_README.md
	//! DataFrame describe() functionality for Polars in Rust
	//!
	//! This module provides a `describe()` method for both DataFrame and LazyFrame,
	//! similar to pandas/polars describe() in Python. It computes summary statistics
	//! including count, null_count, mean, std, min, percentiles, and max.
	//!
	//! The implementation follows the Python polars pattern closely, avoiding
	//! unnecessary data collection when working with LazyFrames.

	use anyhow::Result;
	use polars::prelude::*;
	use std::collections::HashMap;

	/// Trait for types that can produce descriptive statistics
	pub trait Describable {
	/// Compute descriptive statistics
	///
	/// # Arguments
	/// * `percentiles` - Optional vector of percentiles to compute (values between 0.0 and 1.0)
	/// Defaults to [0.25, 0.50, 0.75] if None
	///
	/// # Returns
	/// A DataFrame containing statistics for each column:
	/// - count: number of non-null values
	/// - null_count: number of null values
	/// - mean: average value (numeric/temporal/boolean columns)
	/// - std: standard deviation (numeric columns only)
	/// - min: minimum value
	/// - percentiles: requested percentiles
	/// - max: maximum value
	///
	/// # Example
	/// ```rust
	/// use polars::prelude::*;
	/// use your_crate::Describable;
	///
	/// let df = df! {
	/// "ints" => [1, 2, 3, 4, 5],
	/// "floats" => [1.0, 2.5, 3.0, 4.5, 5.0],
	/// "strings" => ["a", "b", "c", "d", "e"],
	/// }?;
	///
	/// let stats = df.describe(None)?;
	/// println!("{}", stats);
	/// ```
	fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame>;
	}

	/// Implementation for DataFrame
	impl Describable for DataFrame {
	fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
	// Convert to LazyFrame and use the efficient implementation
	let lf = self.clone().lazy();
	describe_lazy_impl(&lf, percentiles)
	}
	}

	/// Implementation for LazyFrame
	impl Describable for LazyFrame {
	fn describe(&self, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
	describe_lazy_impl(self, percentiles)
	}
	}

	/// Internal implementation that works purely with LazyFrame
	/// This follows the same pattern as the Python implementation
	#[allow(clippy::too_many_lines)]
	fn describe_lazy_impl(lazy_frame: &LazyFrame, percentiles: Option<Vec<f64>>) -> Result<DataFrame> {
	use polars::lazy::dsl;
	use polars::prelude::{QuantileMethod, NULL};

	// Get schema without collecting the data
	let mut lf_mut = lazy_frame.clone();
	let schema = lf_mut.collect_schema()?;

	if schema.is_empty() {
	return Err(anyhow::anyhow!(
	"cannot describe a LazyFrame that has no columns"
	));
	}

	// Default percentiles if not provided
	let percentiles = percentiles.unwrap_or_else(\|\| vec![0.25, 0.50, 0.75]);

	// Build statistic row names (metrics)
	let mut metrics = vec![
	"count".to_string(),
	"null_count".to_string(),
	"mean".to_string(),
	"std".to_string(),
	"min".to_string(),
	];
	for p in &percentiles {
	#[allow(clippy::cast_possible_truncation)]
	metrics.push(format!("{}%", (p * 100.0) as i32));
	}
	metrics.push("max".to_string());

	// Helper to check if we skip min/max
	let skip_minmax = \|dtype: &DataType\| -> bool {
	dtype.is_nested()
	\|\| matches!(
	dtype,
	DataType::Categorical(..) \| DataType::Null \| DataType::Unknown(_)
	)
	};

	// Build all metric expressions for all columns in a single pass
	let mut metric_exprs = Vec::new();

	// Loop over columns and datatypes (like Python: for c, dtype in schema.items())
	for (col_name, dtype) in schema.iter() {
	let col_name_str = col_name.to_string();
	let col = dsl::col(&col_name_str);

	// Determine if numeric or temporal
	let is_numeric = dtype.is_numeric();
	let is_temporal = !is_numeric && dtype.is_temporal();

	// Count expressions - for all columns
	let count_expr = col.clone().count().alias(format!("count:{col_name_str}"));
	let null_count_expr = col
	.clone()
	.null_count()
	.alias(format!("null_count:{col_name_str}"));

	// Mean - for temporal, numeric, or boolean
	let mean_expr = if is_temporal \|\| is_numeric \|\| dtype == &DataType::Boolean {
	if dtype == &DataType::Boolean {
	col.clone().cast(DataType::Float64).mean()
	} else {
	col.clone().mean()
	}
	} else {
	dsl::lit(NULL).cast(DataType::Float64)
	};
	let mean_expr = mean_expr.alias(format!("mean:{col_name_str}"));

	// Standard deviation - only for numeric
	let std_expr = if is_numeric {
	col.clone().std(1) // ddof=1 for sample std
	} else {
	dsl::lit(NULL).cast(DataType::Float64)
	};
	let std_expr = std_expr.alias(format!("std:{col_name_str}"));

	// Min/Max - based on skip_minmax
	let min_expr = if skip_minmax(dtype) {
	dsl::lit(NULL).cast(DataType::Float64)
	} else {
	col.clone().min()
	};
	let min_expr = min_expr.alias(format!("min:{col_name_str}"));

	let max_expr = if skip_minmax(dtype) {
	dsl::lit(NULL).cast(DataType::Float64)
	} else {
	col.clone().max()
	};
	let max_expr = max_expr.alias(format!("max:{col_name_str}"));

	// Percentiles - for numeric or temporal
	let mut pct_exprs = Vec::new();
	for (i, p) in percentiles.iter().enumerate() {
	let pct_expr = if is_numeric \|\| is_temporal {
	// For temporal types, we might need special handling
	// For now, just use quantile directly
	col.clone().quantile(dsl::lit(*p), QuantileMethod::Linear)
	} else {
	dsl::lit(NULL).cast(DataType::Float64)
	};
	pct_exprs.push(pct_expr.alias(format!("{p}:{i}:{col_name_str}")));
	}

	// Add all expressions for this column
	metric_exprs.push(count_expr);
	metric_exprs.push(null_count_expr);
	metric_exprs.push(mean_expr);
	metric_exprs.push(std_expr);
	metric_exprs.push(min_expr);
	metric_exprs.extend(pct_exprs);
	metric_exprs.push(max_expr);
	}

	// Execute all aggregations in a single pass
	let df_metrics = lazy_frame.clone().select(metric_exprs).collect()?;

	// Reshape the wide result into the final format
	let n_metrics = metrics.len();
	let mut result_series = Vec::new();

	// Add the statistic column first
	result_series.push(Series::new(
	"statistic".into(),
	metrics.clone(),
	));

	// Process each column's metrics
	for (col_name, dtype) in schema.iter() {
	let col_name_str = col_name.to_string();
	let mut col_values = Vec::new();

	// Extract values for this column from the metrics DataFrame
	// The metrics are in groups of n_metrics per column
	// let base_idx = idx * n_metrics; // Not needed with column name lookup

	// Helper to format values based on type
	let is_numeric_result = dtype.is_numeric()
	\|\| dtype.is_nested()
	\|\| matches!(dtype, DataType::Null \| DataType::Boolean);

	// Extract each metric for this column
	for metric_idx in 0..n_metrics {
	// let _col_idx = base_idx + metric_idx; // Not needed
	let metric_name = match metric_idx {
	0 => format!("count:{col_name_str}"),
	1 => format!("null_count:{col_name_str}"),
	2 => format!("mean:{col_name_str}"),
	3 => format!("std:{col_name_str}"),
	4 => format!("min:{col_name_str}"),
	i if i < n_metrics - 1 => {
	// Percentile
	let pct_idx = i - 5;
	let p = &percentiles[pct_idx];
	format!("{p}:{pct_idx}:{col_name_str}")
	}
	_ => format!("max:{col_name_str}"),
	};

	// Get the value from df_metrics
	if let Ok(val) = df_metrics.column(&metric_name)?.get(0) {
	// Format based on type and metric
	let formatted = if val.is_null() {
	"null".to_string()
	} else if metric_idx <= 1 {
	// count and null_count - always as integer
	format!("{val}")
	} else if is_numeric_result && (metric_idx == 2 \|\| metric_idx == 3) {
	// mean and std for numeric - format with decimals
	format!("{val:.6}")
	} else if dtype == &DataType::Boolean
	&& (metric_idx == 4 \|\| metric_idx == n_metrics - 1)
	{
	// min/max for boolean
	if metric_idx == 4 {
	"false".to_string()
	} else {
	"true".to_string()
	}
	} else {
	format!("{val}")
	};

	col_values.push(formatted);
	} else {
	col_values.push("null".to_string());
	}
	}

	// Add this column's values to the result
	result_series.push(Series::new(col_name_str.into(), col_values));
	}

	DataFrame::new(result_series).map_err(Into::into)
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn test_describe_numeric() -> Result<()> {
	let df = df! {
	"ints" => [1, 2, 3, 4, 5],
	"floats" => [1.0, 2.0, 3.0, 4.0, 5.0],
	}?;

	let stats = df.describe(None)?;

	// Check shape
	assert_eq!(stats.shape(), (9, 3)); // 9 stats x 3 columns (statistic + 2 data cols)

	// Check that statistic column exists
	assert!(stats.column("statistic").is_ok());

	Ok(())
	}

	#[test]
	fn test_describe_with_custom_percentiles() -> Result<()> {
	let df = df! {
	"values" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
	}?;

	let stats = df.describe(Some(vec![0.1, 0.5, 0.9]))?;

	// Check that we have the right number of rows
	// count, null_count, mean, std, min, 10%, 50%, 90%, max = 9 rows
	assert_eq!(stats.height(), 9);

	Ok(())
	}

	#[test]
	fn test_describe_mixed_types() -> Result<()> {
	let df = df! {
	"numbers" => [1, 2, 3],
	"strings" => ["a", "b", "c"],
	"bools" => [true, false, true],
	}?;

	let stats = df.describe(None)?;

	// Should not panic and should return stats for all columns
	assert_eq!(stats.width(), 4); // statistic + 3 data columns

	Ok(())
	}

	#[test]
	fn test_describe_lazy_frame() -> Result<()> {
	let df = df! {
	"a" => [1, 2, 3, 4, 5],
	"b" => [10.0, 20.0, 30.0, 40.0, 50.0],
	}?;

	let lf = df.lazy();
	let stats = lf.describe(None)?;

	// Should work with LazyFrame without collecting first
	assert_eq!(stats.shape(), (9, 3));

	Ok(())
	}
	}
	//! Example usage of the describe functionality

	use anyhow::Result;
	use polars::prelude::*;

	// Include the describe module
	mod describe;
	use describe::Describable;

	fn main() -> Result<()> {
	// Create a sample DataFrame with different data types
	let df = df! {
	"integers" => [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
	"floats" => [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5],
	"strings" => ["apple", "banana", "cherry", "date", "elderberry",
	"fig", "grape", "honeydew", "kiwi", "lemon"],
	"booleans" => [true, false, true, false, true,
	false, true, false, true, false],
	"nullables" => [Some(1), None, Some(3), None, Some(5),
	Some(6), None, Some(8), Some(9), Some(10)],
	}?;

	println!("Original DataFrame:");
	println!("{}\n", df);

	// Example 1: Basic describe with default percentiles (25%, 50%, 75%)
	println!("=== Basic describe() with default percentiles ===");
	let basic_stats = df.describe(None)?;
	println!("{}\n", basic_stats);

	// Example 2: Custom percentiles
	println!("=== describe() with custom percentiles (10%, 50%, 90%) ===");
	let custom_stats = df.describe(Some(vec![0.1, 0.5, 0.9]))?;
	println!("{}\n", custom_stats);

	// Example 3: Using with LazyFrame for efficiency
	println!("=== Using with LazyFrame (no unnecessary data collection) ===");
	let lazy_df = df.lazy();
	let lazy_stats = lazy_df.describe(Some(vec![0.25, 0.5, 0.75]))?;
	println!("{}\n", lazy_stats);

	// Example 4: Demonstrating with a larger dataset
	println!("=== Large dataset example ===");
	let large_df = df! {
	"values" => (0..10000).collect::<Vec<_>>(),
	"randoms" => (0..10000).map(\|i\| (i as f64) * 0.1).collect::<Vec<_>>(),
	}?;

	let large_stats = large_df.describe(Some(vec![0.05, 0.25, 0.5, 0.75, 0.95]))?;
	println!("Stats for 10,000 row DataFrame:");
	println!("{}\n", large_stats);

	// Example 5: Working with time series data
	use chrono::NaiveDate;

	let dates_df = df! {
	"dates" => vec![
	NaiveDate::from_ymd_opt(2024, 1, 1),
	NaiveDate::from_ymd_opt(2024, 2, 1),
	NaiveDate::from_ymd_opt(2024, 3, 1),
	NaiveDate::from_ymd_opt(2024, 4, 1),
	NaiveDate::from_ymd_opt(2024, 5, 1),
	].into_iter().flatten().collect::<Vec<_>>(),
	"values" => [100, 150, 120, 180, 200],
	}?;

	println!("=== Time series data ===");
	let time_stats = dates_df.describe(None)?;
	println!("{}", time_stats);

	Ok(())
	}