Skip to main content

Course Progress

Loading...

SQL Aggregate Functions

Duration: 85 minutes
Module 3: Session 4

Learning Objectives

  • Master the five core aggregate functions
  • Understand NULL handling in aggregates
  • Use GROUP BY for data segmentation
  • Apply HAVING for group filtering
  • Combine aggregates with JOINs
  • Use DISTINCT with aggregate functions
  • Implement advanced aggregation patterns
  • Optimize aggregate query performance

Understanding Aggregate Functions

Aggregate functions perform calculations on a set of values and return a single value. They're essential for data analysis and reporting! 📊

💡
Key Points About Aggregates
  • Aggregate functions ignore NULL values (except COUNT(*))
  • Cannot mix aggregate and non-aggregate columns without GROUP BY
  • Use DISTINCT to aggregate unique values
  • Aggregates can be used in SELECT, HAVING, and ORDER BY
  • Performance depends on indexes and data volume

COUNT Function - Counting Rows

graph TB COUNT[COUNT Function] --> ALL[COUNT*] COUNT --> COL[COUNT column] COUNT --> DIST[COUNT DISTINCT] ALL --> A1[Counts all rows] COL --> C1[Counts non-NULL values] DIST --> D1[Counts unique values] style COUNT fill:#dcfce7 style ALL fill:#dbeafe style COL fill:#fef3c7 style DIST fill:#fee2e2

COUNT Function Examples

-- ========================================
-- COUNT VARIATIONS
-- ========================================

-- COUNT(*) - Counts all rows including NULLs
SELECT COUNT(*) AS total_orders
FROM orders;

-- COUNT(column) - Counts non-NULL values only
SELECT 
    COUNT(customer_id) AS customers_with_id,
    COUNT(phone) AS customers_with_phone,
    COUNT(*) AS total_customers
FROM customers;

-- COUNT(DISTINCT) - Counts unique values
SELECT 
    COUNT(DISTINCT customer_id) AS unique_customers,
    COUNT(DISTINCT product_id) AS unique_products,
    COUNT(DISTINCT DATE(order_date)) AS days_with_orders,
    COUNT(*) AS total_orders
FROM orders
WHERE order_date >= '2024-01-01';

-- COUNT with conditions (using CASE)
SELECT 
    COUNT(*) AS total_orders,
    COUNT(CASE WHEN status = 'completed' THEN 1 END) AS completed_orders,
    COUNT(CASE WHEN status = 'pending' THEN 1 END) AS pending_orders,
    COUNT(CASE WHEN status = 'cancelled' THEN 1 END) AS cancelled_orders
FROM orders;

-- COUNT with IF (MySQL specific)
SELECT 
    COUNT(IF(amount > 100, 1, NULL)) AS large_orders,
    COUNT(IF(amount <= 100, 1, NULL)) AS small_orders
FROM orders;

-- ========================================
-- COUNT WITH GROUP BY
-- ========================================

-- Count orders per customer
SELECT 
    customer_id,
    COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
ORDER BY order_count DESC;

-- Count products per category
SELECT 
    c.category_name,
    COUNT(p.product_id) AS product_count,
    COUNT(DISTINCT p.brand) AS brand_count
FROM categories c
LEFT JOIN products p ON c.category_id = p.category_id
GROUP BY c.category_id;

-- Count with multiple grouping
SELECT 
    YEAR(order_date) AS year,
    MONTH(order_date) AS month,
    status,
    COUNT(*) AS order_count
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date), status
ORDER BY year DESC, month DESC, status;

-- ========================================
-- ADVANCED COUNT PATTERNS
-- ========================================

-- Running count with variable
SET @row_number = 0;
SELECT 
    (@row_number:=@row_number + 1) AS row_num,
    customer_name,
    email
FROM customers
ORDER BY customer_name;

-- Count with percentage
SELECT 
    status,
    COUNT(*) AS count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM orders), 2) AS percentage
FROM orders
GROUP BY status;

-- Conditional counting for pivot
SELECT 
    customer_id,
    COUNT(CASE WHEN YEAR(order_date) = 2023 THEN 1 END) AS orders_2023,
    COUNT(CASE WHEN YEAR(order_date) = 2024 THEN 1 END) AS orders_2024,
    COUNT(*) AS total_orders
FROM orders
GROUP BY customer_id
HAVING total_orders > 5;

SUM and AVG Functions

SUM and AVG Examples

-- ========================================
-- SUM FUNCTION
-- ========================================

-- Basic SUM
SELECT 
    SUM(amount) AS total_sales,
    SUM(quantity) AS total_items_sold
FROM order_items;

-- SUM with conditions
SELECT 
    SUM(amount) AS total_revenue,
    SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue,
    SUM(CASE WHEN status = 'refunded' THEN amount ELSE 0 END) AS refunded_amount
FROM orders;

-- SUM with GROUP BY
SELECT 
    c.category_name,
    SUM(oi.quantity) AS units_sold,
    SUM(oi.quantity * oi.unit_price) AS revenue,
    SUM(oi.quantity * (oi.unit_price - p.cost)) AS profit
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
JOIN categories c ON p.category_id = c.category_id
GROUP BY c.category_id
ORDER BY revenue DESC;

-- Running SUM (cumulative)
SELECT 
    order_date,
    amount,
    SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM orders
WHERE customer_id = 123;

-- ========================================
-- AVG FUNCTION
-- ========================================

-- Basic AVG
SELECT 
    AVG(amount) AS avg_order_value,
    AVG(DISTINCT amount) AS avg_unique_amounts
FROM orders;

-- AVG with NULL handling
SELECT 
    AVG(rating) AS avg_rating_exclude_null,
    AVG(COALESCE(rating, 0)) AS avg_rating_null_as_zero,
    SUM(rating) / COUNT(*) AS manual_avg_with_nulls
FROM reviews;

-- Weighted average
SELECT 
    SUM(grade * credit_hours) / SUM(credit_hours) AS weighted_gpa
FROM courses
WHERE student_id = 123;

-- AVG with GROUP BY
SELECT 
    p.category_id,
    c.category_name,
    AVG(p.price) AS avg_price,
    ROUND(AVG(p.price), 2) AS avg_price_rounded,
    MIN(p.price) AS min_price,
    MAX(p.price) AS max_price,
    MAX(p.price) - MIN(p.price) AS price_range
FROM products p
JOIN categories c ON p.category_id = c.category_id
GROUP BY p.category_id
HAVING AVG(p.price) > 50
ORDER BY avg_price DESC;

-- Moving average
SELECT 
    order_date,
    amount,
    AVG(amount) OVER (
        ORDER BY order_date 
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7_days
FROM orders;

-- ========================================
-- COMBINED AGGREGATES
-- ========================================

-- Statistical summary
SELECT 
    COUNT(*) AS total_orders,
    SUM(amount) AS total_revenue,
    AVG(amount) AS mean_amount,
    MIN(amount) AS min_amount,
    MAX(amount) AS max_amount,
    MAX(amount) - MIN(amount) AS range_amount,
    STD(amount) AS std_deviation,
    VARIANCE(amount) AS variance
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY);

MIN and MAX Functions

MIN and MAX Applications

-- ========================================
-- MIN FUNCTION
-- ========================================

-- Basic MIN
SELECT 
    MIN(price) AS lowest_price,
    MIN(created_at) AS first_order_date,
    MIN(product_name) AS first_alphabetically
FROM products;

-- MIN with GROUP BY
SELECT 
    category_id,
    MIN(price) AS starting_price,
    MIN(created_at) AS oldest_product
FROM products
GROUP BY category_id;

-- Find products with minimum price per category
SELECT 
    p1.*
FROM products p1
INNER JOIN (
    SELECT category_id, MIN(price) AS min_price
    FROM products
    GROUP BY category_id
) p2 ON p1.category_id = p2.category_id 
    AND p1.price = p2.min_price;

-- ========================================
-- MAX FUNCTION
-- ========================================

-- Basic MAX
SELECT 
    MAX(amount) AS highest_sale,
    MAX(order_date) AS most_recent_order
FROM orders;

-- MAX with conditions
SELECT 
    MAX(CASE WHEN status = 'completed' THEN amount END) AS max_completed,
    MAX(CASE WHEN status = 'pending' THEN amount END) AS max_pending
FROM orders;

-- Find latest record per group
SELECT 
    c.customer_id,
    c.customer_name,
    MAX(o.order_date) AS last_order_date,
    DATEDIFF(CURDATE(), MAX(o.order_date)) AS days_since_last_order
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;

-- ========================================
-- MIN/MAX WITH STRINGS AND DATES
-- ========================================

-- String comparisons
SELECT 
    MIN(customer_name) AS first_customer,
    MAX(customer_name) AS last_customer,
    MIN(email) AS first_email,
    MAX(email) AS last_email
FROM customers;

-- Date ranges
SELECT 
    product_id,
    product_name,
    MIN(review_date) AS first_review,
    MAX(review_date) AS latest_review,
    DATEDIFF(MAX(review_date), MIN(review_date)) AS review_span_days
FROM reviews r
JOIN products p ON r.product_id = p.product_id
GROUP BY p.product_id
HAVING COUNT(*) > 5;

-- ========================================
-- PRACTICAL MIN/MAX PATTERNS
-- ========================================

-- Find price range per brand
SELECT 
    brand,
    COUNT(*) AS product_count,
    MIN(price) AS min_price,
    MAX(price) AS max_price,
    CONCAT('$', MIN(price), ' - $', MAX(price)) AS price_range
FROM products
WHERE is_active = TRUE
GROUP BY brand
ORDER BY brand;

-- Identify outliers
SELECT 
    product_id,
    product_name,
    price,
    CASE 
        WHEN price = (SELECT MIN(price) FROM products) THEN 'Lowest'
        WHEN price = (SELECT MAX(price) FROM products) THEN 'Highest'
        ELSE 'Normal'
    END AS price_position
FROM products
WHERE price IN (
    (SELECT MIN(price) FROM products),
    (SELECT MAX(price) FROM products)
);

-- Session tracking
SELECT 
    user_id,
    MIN(login_time) AS session_start,
    MAX(activity_time) AS session_end,
    TIMEDIFF(MAX(activity_time), MIN(login_time)) AS session_duration
FROM user_activity
WHERE DATE(login_time) = CURDATE()
GROUP BY user_id, session_id;

Advanced Aggregate Patterns

🚀
Pro Aggregate Techniques
Master these advanced patterns for complex data analysis!

Advanced Aggregation Techniques

-- ========================================
-- GROUP_CONCAT - Aggregate strings
-- ========================================

-- Concatenate values into a single string
SELECT 
    customer_id,
    GROUP_CONCAT(product_name) AS products_ordered,
    GROUP_CONCAT(product_name SEPARATOR ', ') AS products_comma_separated,
    GROUP_CONCAT(DISTINCT category ORDER BY category) AS categories
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
GROUP BY customer_id;

-- Custom formatting with GROUP_CONCAT
SELECT 
    category_id,
    GROUP_CONCAT(
        CONCAT(product_name, ' ($', price, ')')
        ORDER BY price DESC
        SEPARATOR ' | '
    ) AS product_list
FROM products
GROUP BY category_id;

-- ========================================
-- STATISTICAL AGGREGATES
-- ========================================

SELECT 
    product_id,
    COUNT(rating) AS review_count,
    AVG(rating) AS mean_rating,
    STD(rating) AS std_dev,
    VARIANCE(rating) AS variance,
    -- Median approximation
    SUBSTRING_INDEX(
        SUBSTRING_INDEX(
            GROUP_CONCAT(rating ORDER BY rating SEPARATOR ','),
            ',',
            CEIL(COUNT(*) / 2)
        ),
        ',',
        -1
    ) AS median_rating
FROM reviews
GROUP BY product_id
HAVING review_count >= 10;

-- ========================================
-- CONDITIONAL AGGREGATES (Pivoting)
-- ========================================

-- Pivot table with conditional aggregates
SELECT 
    DATE(order_date) AS order_date,
    -- Revenue by status
    SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue,
    SUM(CASE WHEN status = 'pending' THEN amount ELSE 0 END) AS pending_revenue,
    SUM(CASE WHEN status = 'cancelled' THEN amount ELSE 0 END) AS cancelled_revenue,
    -- Count by payment method
    COUNT(CASE WHEN payment_method = 'credit_card' THEN 1 END) AS credit_card_orders,
    COUNT(CASE WHEN payment_method = 'paypal' THEN 1 END) AS paypal_orders,
    COUNT(CASE WHEN payment_method = 'cash' THEN 1 END) AS cash_orders,
    -- Overall metrics
    COUNT(*) AS total_orders,
    SUM(amount) AS total_revenue
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
GROUP BY DATE(order_date)
ORDER BY order_date DESC;

-- ========================================
-- ROLLUP and CUBE (Subtotals)
-- ========================================

-- ROLLUP for hierarchical subtotals
SELECT 
    COALESCE(category, 'TOTAL') AS category,
    COALESCE(brand, 'Subtotal') AS brand,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price,
    SUM(stock_quantity * price) AS inventory_value
FROM products
GROUP BY category, brand WITH ROLLUP;

-- ========================================
-- Window Functions with Aggregates
-- ========================================

-- Ranking with aggregates
SELECT 
    customer_id,
    total_spent,
    RANK() OVER (ORDER BY total_spent DESC) AS spending_rank,
    NTILE(4) OVER (ORDER BY total_spent DESC) AS quartile,
    total_spent / SUM(total_spent) OVER () * 100 AS percentage_of_total
FROM (
    SELECT 
        customer_id,
        SUM(amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY customer_id
) customer_totals;

-- ========================================
-- AGGREGATE PERFORMANCE OPTIMIZATION
-- ========================================

-- Use indexes on GROUP BY columns
CREATE INDEX idx_order_customer_date ON orders(customer_id, order_date);

-- Pre-aggregate for performance
CREATE TABLE daily_sales_summary AS
SELECT 
    DATE(order_date) AS sale_date,
    COUNT(*) AS order_count,
    SUM(amount) AS total_revenue,
    AVG(amount) AS avg_order_value
FROM orders
GROUP BY DATE(order_date);

-- Use materialized view (if supported)
CREATE VIEW monthly_product_stats AS
SELECT 
    product_id,
    YEAR(order_date) AS year,
    MONTH(order_date) AS month,
    COUNT(DISTINCT o.order_id) AS order_count,
    SUM(oi.quantity) AS units_sold,
    SUM(oi.quantity * oi.unit_price) AS revenue
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY product_id, YEAR(order_date), MONTH(order_date);

Practice Exercise

💻
Aggregate Function Challenges
Solve these real-world aggregation problems:
  1. Calculate the average order value per customer segment
  2. Find the top 3 selling products in each category
  3. Calculate month-over-month growth percentage
  4. Find customers whose average order exceeds overall average
  5. Create a summary report with multiple aggregates
  6. Calculate running totals and moving averages
  7. Find the median order value per category
  8. Build a customer lifetime value calculation
💡 Click for Solutions
-- 1. Average order value per customer segment
SELECT 
    CASE 
        WHEN total_orders < 3 THEN 'New'
        WHEN total_orders < 10 THEN 'Regular'
        ELSE 'VIP'
    END AS customer_segment,
    COUNT(*) AS customer_count,
    AVG(avg_order_value) AS segment_avg_order_value,
    MIN(avg_order_value) AS min_avg_order,
    MAX(avg_order_value) AS max_avg_order
FROM (
    SELECT 
        customer_id,
        COUNT(*) AS total_orders,
        AVG(amount) AS avg_order_value
    FROM orders
    WHERE status = 'completed'
    GROUP BY customer_id
) customer_stats
GROUP BY customer_segment
ORDER BY segment_avg_order_value DESC;

-- 2. Top 3 products per category
WITH product_rankings AS (
    SELECT 
        p.category_id,
        c.category_name,
        p.product_id,
        p.product_name,
        SUM(oi.quantity) AS units_sold,
        SUM(oi.quantity * oi.unit_price) AS revenue,
        ROW_NUMBER() OVER (
            PARTITION BY p.category_id 
            ORDER BY SUM(oi.quantity * oi.unit_price) DESC
        ) AS rank_in_category
    FROM products p
    JOIN categories c ON p.category_id = c.category_id
    JOIN order_items oi ON p.product_id = oi.product_id
    JOIN orders o ON oi.order_id = o.order_id
    WHERE o.status = 'completed'
    GROUP BY p.category_id, p.product_id
)
SELECT *
FROM product_rankings
WHERE rank_in_category <= 3
ORDER BY category_id, rank_in_category;

-- 3. Month-over-month growth
WITH monthly_sales AS (
    SELECT 
        DATE_FORMAT(order_date, '%Y-%m') AS month,
        SUM(amount) AS revenue
    FROM orders
    WHERE status = 'completed'
    GROUP BY DATE_FORMAT(order_date, '%Y-%m')
)
SELECT 
    month,
    revenue,
    LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue,
    ROUND(
        (revenue - LAG(revenue) OVER (ORDER BY month)) / 
        LAG(revenue) OVER (ORDER BY month) * 100, 
        2
    ) AS growth_percentage
FROM monthly_sales
ORDER BY month;

-- 4. Customers above average
WITH overall_avg AS (
    SELECT AVG(amount) AS avg_order_value
    FROM orders
    WHERE status = 'completed'
)
SELECT 
    c.customer_id,
    c.customer_name,
    COUNT(o.order_id) AS order_count,
    AVG(o.amount) AS customer_avg_order,
    (SELECT avg_order_value FROM overall_avg) AS overall_average,
    AVG(o.amount) - (SELECT avg_order_value FROM overall_avg) AS difference
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed'
GROUP BY c.customer_id
HAVING AVG(o.amount) > (SELECT avg_order_value FROM overall_avg)
ORDER BY customer_avg_order DESC;

-- 5. Comprehensive summary report
SELECT 
    'Overall Statistics' AS metric,
    COUNT(DISTINCT customer_id) AS unique_customers,
    COUNT(*) AS total_orders,
    SUM(amount) AS total_revenue,
    AVG(amount) AS avg_order_value,
    MIN(amount) AS min_order,
    MAX(amount) AS max_order,
    STD(amount) AS std_deviation
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)

UNION ALL

SELECT 
    CONCAT('Status: ', status) AS metric,
    COUNT(DISTINCT customer_id),
    COUNT(*),
    SUM(amount),
    AVG(amount),
    MIN(amount),
    MAX(amount),
    STD(amount)
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY status;

-- 6. Running totals and moving averages
SELECT 
    order_date,
    amount,
    SUM(amount) OVER (ORDER BY order_date) AS running_total,
    AVG(amount) OVER (
        ORDER BY order_date 
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7_days,
    COUNT(*) OVER (
        ORDER BY order_date 
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS orders_in_window
FROM orders
WHERE customer_id = 123
ORDER BY order_date;

-- 7. Median calculation
SELECT 
    category_id,
    AVG(price) AS mean_price,
    (
        SELECT AVG(price)
        FROM (
            SELECT price
            FROM products p2
            WHERE p2.category_id = p1.category_id
            ORDER BY price
            LIMIT 2 - (SELECT COUNT(*) FROM products p3 WHERE p3.category_id = p1.category_id) % 2
            OFFSET (SELECT (COUNT(*) - 1) / 2 FROM products p4 WHERE p4.category_id = p1.category_id)
        ) AS median_calc
    ) AS median_price
FROM products p1
GROUP BY category_id;

-- 8. Customer lifetime value
SELECT 
    c.customer_id,
    c.customer_name,
    DATE(c.created_at) AS acquisition_date,
    COUNT(DISTINCT o.order_id) AS total_orders,
    SUM(o.amount) AS lifetime_revenue,
    AVG(o.amount) AS avg_order_value,
    DATEDIFF(MAX(o.order_date), MIN(o.order_date)) AS customer_lifespan_days,
    SUM(o.amount) / GREATEST(DATEDIFF(CURDATE(), c.created_at), 1) AS daily_value,
    SUM(o.amount) / GREATEST(DATEDIFF(CURDATE(), c.created_at) / 30, 1) AS monthly_value
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id 
    AND o.status = 'completed'
GROUP BY c.customer_id
ORDER BY lifetime_revenue DESC;

Additional Resources