SQL Aggregate Functions
Learning Objectives
- Master the five core aggregate functions
- Understand NULL handling in aggregates
- Use GROUP BY for data segmentation
- Apply HAVING for group filtering
- Combine aggregates with JOINs
- Use DISTINCT with aggregate functions
- Implement advanced aggregation patterns
- Optimize aggregate query performance
Understanding Aggregate Functions
Aggregate functions perform calculations on a set of values and return a single value. They're essential for data analysis and reporting! 📊
Key Points About Aggregates
COUNT Function - Counting Rows
graph TB
COUNT[COUNT Function] --> ALL[COUNT*]
COUNT --> COL[COUNT column]
COUNT --> DIST[COUNT DISTINCT]
ALL --> A1[Counts all rows]
COL --> C1[Counts non-NULL values]
DIST --> D1[Counts unique values]
style COUNT fill:#dcfce7
style ALL fill:#dbeafe
style COL fill:#fef3c7
style DIST fill:#fee2e2
COUNT Function Examples
-- ========================================
-- COUNT VARIATIONS
-- ========================================
-- COUNT(*) - Counts all rows including NULLs
SELECT COUNT(*) AS total_orders
FROM orders;
-- COUNT(column) - Counts non-NULL values only
SELECT
COUNT(customer_id) AS customers_with_id,
COUNT(phone) AS customers_with_phone,
COUNT(*) AS total_customers
FROM customers;
-- COUNT(DISTINCT) - Counts unique values
SELECT
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(DISTINCT product_id) AS unique_products,
COUNT(DISTINCT DATE(order_date)) AS days_with_orders,
COUNT(*) AS total_orders
FROM orders
WHERE order_date >= '2024-01-01';
-- COUNT with conditions (using CASE)
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN status = 'completed' THEN 1 END) AS completed_orders,
COUNT(CASE WHEN status = 'pending' THEN 1 END) AS pending_orders,
COUNT(CASE WHEN status = 'cancelled' THEN 1 END) AS cancelled_orders
FROM orders;
-- COUNT with IF (MySQL specific)
SELECT
COUNT(IF(amount > 100, 1, NULL)) AS large_orders,
COUNT(IF(amount <= 100, 1, NULL)) AS small_orders
FROM orders;
-- ========================================
-- COUNT WITH GROUP BY
-- ========================================
-- Count orders per customer
SELECT
customer_id,
COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
ORDER BY order_count DESC;
-- Count products per category
SELECT
c.category_name,
COUNT(p.product_id) AS product_count,
COUNT(DISTINCT p.brand) AS brand_count
FROM categories c
LEFT JOIN products p ON c.category_id = p.category_id
GROUP BY c.category_id;
-- Count with multiple grouping
SELECT
YEAR(order_date) AS year,
MONTH(order_date) AS month,
status,
COUNT(*) AS order_count
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date), status
ORDER BY year DESC, month DESC, status;
-- ========================================
-- ADVANCED COUNT PATTERNS
-- ========================================
-- Running count with variable
SET @row_number = 0;
SELECT
(@row_number:=@row_number + 1) AS row_num,
customer_name,
email
FROM customers
ORDER BY customer_name;
-- Count with percentage
SELECT
status,
COUNT(*) AS count,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM orders), 2) AS percentage
FROM orders
GROUP BY status;
-- Conditional counting for pivot
SELECT
customer_id,
COUNT(CASE WHEN YEAR(order_date) = 2023 THEN 1 END) AS orders_2023,
COUNT(CASE WHEN YEAR(order_date) = 2024 THEN 1 END) AS orders_2024,
COUNT(*) AS total_orders
FROM orders
GROUP BY customer_id
HAVING total_orders > 5;
SUM and AVG Functions
SUM and AVG Examples
-- ========================================
-- SUM FUNCTION
-- ========================================
-- Basic SUM
SELECT
SUM(amount) AS total_sales,
SUM(quantity) AS total_items_sold
FROM order_items;
-- SUM with conditions
SELECT
SUM(amount) AS total_revenue,
SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue,
SUM(CASE WHEN status = 'refunded' THEN amount ELSE 0 END) AS refunded_amount
FROM orders;
-- SUM with GROUP BY
SELECT
c.category_name,
SUM(oi.quantity) AS units_sold,
SUM(oi.quantity * oi.unit_price) AS revenue,
SUM(oi.quantity * (oi.unit_price - p.cost)) AS profit
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
JOIN categories c ON p.category_id = c.category_id
GROUP BY c.category_id
ORDER BY revenue DESC;
-- Running SUM (cumulative)
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM orders
WHERE customer_id = 123;
-- ========================================
-- AVG FUNCTION
-- ========================================
-- Basic AVG
SELECT
AVG(amount) AS avg_order_value,
AVG(DISTINCT amount) AS avg_unique_amounts
FROM orders;
-- AVG with NULL handling
SELECT
AVG(rating) AS avg_rating_exclude_null,
AVG(COALESCE(rating, 0)) AS avg_rating_null_as_zero,
SUM(rating) / COUNT(*) AS manual_avg_with_nulls
FROM reviews;
-- Weighted average
SELECT
SUM(grade * credit_hours) / SUM(credit_hours) AS weighted_gpa
FROM courses
WHERE student_id = 123;
-- AVG with GROUP BY
SELECT
p.category_id,
c.category_name,
AVG(p.price) AS avg_price,
ROUND(AVG(p.price), 2) AS avg_price_rounded,
MIN(p.price) AS min_price,
MAX(p.price) AS max_price,
MAX(p.price) - MIN(p.price) AS price_range
FROM products p
JOIN categories c ON p.category_id = c.category_id
GROUP BY p.category_id
HAVING AVG(p.price) > 50
ORDER BY avg_price DESC;
-- Moving average
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7_days
FROM orders;
-- ========================================
-- COMBINED AGGREGATES
-- ========================================
-- Statistical summary
SELECT
COUNT(*) AS total_orders,
SUM(amount) AS total_revenue,
AVG(amount) AS mean_amount,
MIN(amount) AS min_amount,
MAX(amount) AS max_amount,
MAX(amount) - MIN(amount) AS range_amount,
STD(amount) AS std_deviation,
VARIANCE(amount) AS variance
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY);
MIN and MAX Functions
MIN and MAX Applications
-- ========================================
-- MIN FUNCTION
-- ========================================
-- Basic MIN
SELECT
MIN(price) AS lowest_price,
MIN(created_at) AS first_order_date,
MIN(product_name) AS first_alphabetically
FROM products;
-- MIN with GROUP BY
SELECT
category_id,
MIN(price) AS starting_price,
MIN(created_at) AS oldest_product
FROM products
GROUP BY category_id;
-- Find products with minimum price per category
SELECT
p1.*
FROM products p1
INNER JOIN (
SELECT category_id, MIN(price) AS min_price
FROM products
GROUP BY category_id
) p2 ON p1.category_id = p2.category_id
AND p1.price = p2.min_price;
-- ========================================
-- MAX FUNCTION
-- ========================================
-- Basic MAX
SELECT
MAX(amount) AS highest_sale,
MAX(order_date) AS most_recent_order
FROM orders;
-- MAX with conditions
SELECT
MAX(CASE WHEN status = 'completed' THEN amount END) AS max_completed,
MAX(CASE WHEN status = 'pending' THEN amount END) AS max_pending
FROM orders;
-- Find latest record per group
SELECT
c.customer_id,
c.customer_name,
MAX(o.order_date) AS last_order_date,
DATEDIFF(CURDATE(), MAX(o.order_date)) AS days_since_last_order
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;
-- ========================================
-- MIN/MAX WITH STRINGS AND DATES
-- ========================================
-- String comparisons
SELECT
MIN(customer_name) AS first_customer,
MAX(customer_name) AS last_customer,
MIN(email) AS first_email,
MAX(email) AS last_email
FROM customers;
-- Date ranges
SELECT
product_id,
product_name,
MIN(review_date) AS first_review,
MAX(review_date) AS latest_review,
DATEDIFF(MAX(review_date), MIN(review_date)) AS review_span_days
FROM reviews r
JOIN products p ON r.product_id = p.product_id
GROUP BY p.product_id
HAVING COUNT(*) > 5;
-- ========================================
-- PRACTICAL MIN/MAX PATTERNS
-- ========================================
-- Find price range per brand
SELECT
brand,
COUNT(*) AS product_count,
MIN(price) AS min_price,
MAX(price) AS max_price,
CONCAT('$', MIN(price), ' - $', MAX(price)) AS price_range
FROM products
WHERE is_active = TRUE
GROUP BY brand
ORDER BY brand;
-- Identify outliers
SELECT
product_id,
product_name,
price,
CASE
WHEN price = (SELECT MIN(price) FROM products) THEN 'Lowest'
WHEN price = (SELECT MAX(price) FROM products) THEN 'Highest'
ELSE 'Normal'
END AS price_position
FROM products
WHERE price IN (
(SELECT MIN(price) FROM products),
(SELECT MAX(price) FROM products)
);
-- Session tracking
SELECT
user_id,
MIN(login_time) AS session_start,
MAX(activity_time) AS session_end,
TIMEDIFF(MAX(activity_time), MIN(login_time)) AS session_duration
FROM user_activity
WHERE DATE(login_time) = CURDATE()
GROUP BY user_id, session_id;
Advanced Aggregate Patterns
Pro Aggregate Techniques
Advanced Aggregation Techniques
-- ========================================
-- GROUP_CONCAT - Aggregate strings
-- ========================================
-- Concatenate values into a single string
SELECT
customer_id,
GROUP_CONCAT(product_name) AS products_ordered,
GROUP_CONCAT(product_name SEPARATOR ', ') AS products_comma_separated,
GROUP_CONCAT(DISTINCT category ORDER BY category) AS categories
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
GROUP BY customer_id;
-- Custom formatting with GROUP_CONCAT
SELECT
category_id,
GROUP_CONCAT(
CONCAT(product_name, ' ($', price, ')')
ORDER BY price DESC
SEPARATOR ' | '
) AS product_list
FROM products
GROUP BY category_id;
-- ========================================
-- STATISTICAL AGGREGATES
-- ========================================
SELECT
product_id,
COUNT(rating) AS review_count,
AVG(rating) AS mean_rating,
STD(rating) AS std_dev,
VARIANCE(rating) AS variance,
-- Median approximation
SUBSTRING_INDEX(
SUBSTRING_INDEX(
GROUP_CONCAT(rating ORDER BY rating SEPARATOR ','),
',',
CEIL(COUNT(*) / 2)
),
',',
-1
) AS median_rating
FROM reviews
GROUP BY product_id
HAVING review_count >= 10;
-- ========================================
-- CONDITIONAL AGGREGATES (Pivoting)
-- ========================================
-- Pivot table with conditional aggregates
SELECT
DATE(order_date) AS order_date,
-- Revenue by status
SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue,
SUM(CASE WHEN status = 'pending' THEN amount ELSE 0 END) AS pending_revenue,
SUM(CASE WHEN status = 'cancelled' THEN amount ELSE 0 END) AS cancelled_revenue,
-- Count by payment method
COUNT(CASE WHEN payment_method = 'credit_card' THEN 1 END) AS credit_card_orders,
COUNT(CASE WHEN payment_method = 'paypal' THEN 1 END) AS paypal_orders,
COUNT(CASE WHEN payment_method = 'cash' THEN 1 END) AS cash_orders,
-- Overall metrics
COUNT(*) AS total_orders,
SUM(amount) AS total_revenue
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
GROUP BY DATE(order_date)
ORDER BY order_date DESC;
-- ========================================
-- ROLLUP and CUBE (Subtotals)
-- ========================================
-- ROLLUP for hierarchical subtotals
SELECT
COALESCE(category, 'TOTAL') AS category,
COALESCE(brand, 'Subtotal') AS brand,
COUNT(*) AS product_count,
AVG(price) AS avg_price,
SUM(stock_quantity * price) AS inventory_value
FROM products
GROUP BY category, brand WITH ROLLUP;
-- ========================================
-- Window Functions with Aggregates
-- ========================================
-- Ranking with aggregates
SELECT
customer_id,
total_spent,
RANK() OVER (ORDER BY total_spent DESC) AS spending_rank,
NTILE(4) OVER (ORDER BY total_spent DESC) AS quartile,
total_spent / SUM(total_spent) OVER () * 100 AS percentage_of_total
FROM (
SELECT
customer_id,
SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
) customer_totals;
-- ========================================
-- AGGREGATE PERFORMANCE OPTIMIZATION
-- ========================================
-- Use indexes on GROUP BY columns
CREATE INDEX idx_order_customer_date ON orders(customer_id, order_date);
-- Pre-aggregate for performance
CREATE TABLE daily_sales_summary AS
SELECT
DATE(order_date) AS sale_date,
COUNT(*) AS order_count,
SUM(amount) AS total_revenue,
AVG(amount) AS avg_order_value
FROM orders
GROUP BY DATE(order_date);
-- Use materialized view (if supported)
CREATE VIEW monthly_product_stats AS
SELECT
product_id,
YEAR(order_date) AS year,
MONTH(order_date) AS month,
COUNT(DISTINCT o.order_id) AS order_count,
SUM(oi.quantity) AS units_sold,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY product_id, YEAR(order_date), MONTH(order_date);
Practice Exercise
Aggregate Function Challenges
💡 Click for Solutions
-- 1. Average order value per customer segment
SELECT
CASE
WHEN total_orders < 3 THEN 'New'
WHEN total_orders < 10 THEN 'Regular'
ELSE 'VIP'
END AS customer_segment,
COUNT(*) AS customer_count,
AVG(avg_order_value) AS segment_avg_order_value,
MIN(avg_order_value) AS min_avg_order,
MAX(avg_order_value) AS max_avg_order
FROM (
SELECT
customer_id,
COUNT(*) AS total_orders,
AVG(amount) AS avg_order_value
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
) customer_stats
GROUP BY customer_segment
ORDER BY segment_avg_order_value DESC;
-- 2. Top 3 products per category
WITH product_rankings AS (
SELECT
p.category_id,
c.category_name,
p.product_id,
p.product_name,
SUM(oi.quantity) AS units_sold,
SUM(oi.quantity * oi.unit_price) AS revenue,
ROW_NUMBER() OVER (
PARTITION BY p.category_id
ORDER BY SUM(oi.quantity * oi.unit_price) DESC
) AS rank_in_category
FROM products p
JOIN categories c ON p.category_id = c.category_id
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
WHERE o.status = 'completed'
GROUP BY p.category_id, p.product_id
)
SELECT *
FROM product_rankings
WHERE rank_in_category <= 3
ORDER BY category_id, rank_in_category;
-- 3. Month-over-month growth
WITH monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(amount) AS revenue
FROM orders
WHERE status = 'completed'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
)
SELECT
month,
revenue,
LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue,
ROUND(
(revenue - LAG(revenue) OVER (ORDER BY month)) /
LAG(revenue) OVER (ORDER BY month) * 100,
2
) AS growth_percentage
FROM monthly_sales
ORDER BY month;
-- 4. Customers above average
WITH overall_avg AS (
SELECT AVG(amount) AS avg_order_value
FROM orders
WHERE status = 'completed'
)
SELECT
c.customer_id,
c.customer_name,
COUNT(o.order_id) AS order_count,
AVG(o.amount) AS customer_avg_order,
(SELECT avg_order_value FROM overall_avg) AS overall_average,
AVG(o.amount) - (SELECT avg_order_value FROM overall_avg) AS difference
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed'
GROUP BY c.customer_id
HAVING AVG(o.amount) > (SELECT avg_order_value FROM overall_avg)
ORDER BY customer_avg_order DESC;
-- 5. Comprehensive summary report
SELECT
'Overall Statistics' AS metric,
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(*) AS total_orders,
SUM(amount) AS total_revenue,
AVG(amount) AS avg_order_value,
MIN(amount) AS min_order,
MAX(amount) AS max_order,
STD(amount) AS std_deviation
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
UNION ALL
SELECT
CONCAT('Status: ', status) AS metric,
COUNT(DISTINCT customer_id),
COUNT(*),
SUM(amount),
AVG(amount),
MIN(amount),
MAX(amount),
STD(amount)
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY status;
-- 6. Running totals and moving averages
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7_days,
COUNT(*) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS orders_in_window
FROM orders
WHERE customer_id = 123
ORDER BY order_date;
-- 7. Median calculation
SELECT
category_id,
AVG(price) AS mean_price,
(
SELECT AVG(price)
FROM (
SELECT price
FROM products p2
WHERE p2.category_id = p1.category_id
ORDER BY price
LIMIT 2 - (SELECT COUNT(*) FROM products p3 WHERE p3.category_id = p1.category_id) % 2
OFFSET (SELECT (COUNT(*) - 1) / 2 FROM products p4 WHERE p4.category_id = p1.category_id)
) AS median_calc
) AS median_price
FROM products p1
GROUP BY category_id;
-- 8. Customer lifetime value
SELECT
c.customer_id,
c.customer_name,
DATE(c.created_at) AS acquisition_date,
COUNT(DISTINCT o.order_id) AS total_orders,
SUM(o.amount) AS lifetime_revenue,
AVG(o.amount) AS avg_order_value,
DATEDIFF(MAX(o.order_date), MIN(o.order_date)) AS customer_lifespan_days,
SUM(o.amount) / GREATEST(DATEDIFF(CURDATE(), c.created_at), 1) AS daily_value,
SUM(o.amount) / GREATEST(DATEDIFF(CURDATE(), c.created_at) / 30, 1) AS monthly_value
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
AND o.status = 'completed'
GROUP BY c.customer_id
ORDER BY lifetime_revenue DESC;