Mastering SQL JOINs
Learning Objectives
- Understand the concept of table relationships
- Master INNER JOIN for matching records
- Use LEFT/RIGHT JOIN for optional relationships
- Implement FULL OUTER JOIN (MySQL workaround)
- Apply CROSS JOIN and SELF JOIN
- Optimize JOIN performance with indexes
- Handle NULL values in JOINs
- Write complex multi-table queries
Understanding SQL JOINs
JOINs combine rows from two or more tables based on related columns. They're the heart of relational databases! 🔗
JOIN Quick Reference
INNER JOIN - The Foundation
graph LR
A[Table A] -->|matching
records| C[Result] B[Table B] -->|matching
records| C style A fill:#dbeafe style B fill:#dcfce7 style C fill:#fef3c7
records| C[Result] B[Table B] -->|matching
records| C style A fill:#dbeafe style B fill:#dcfce7 style C fill:#fef3c7
INNER JOIN Examples
-- ========================================
-- BASIC INNER JOIN
-- ========================================
-- Simple INNER JOIN
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date,
o.total_amount
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id;
-- Multiple conditions in JOIN
SELECT
p.product_name,
oi.quantity,
oi.unit_price,
o.order_date
FROM products p
INNER JOIN order_items oi
ON p.product_id = oi.product_id
AND oi.unit_price > 100 -- Additional condition
INNER JOIN orders o
ON oi.order_id = o.order_id;
-- Using USING clause (when column names match)
SELECT
c.customer_name,
o.order_id,
o.total_amount
FROM customers c
INNER JOIN orders o USING (customer_id);
-- ========================================
-- MULTIPLE TABLE JOINS
-- ========================================
-- Joining 3 tables
SELECT
c.customer_name,
o.order_id,
p.product_name,
oi.quantity,
oi.unit_price
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
WHERE o.order_date >= '2024-01-01';
-- Complex join with aggregation
SELECT
c.customer_id,
c.customer_name,
COUNT(DISTINCT o.order_id) AS order_count,
SUM(oi.quantity * oi.unit_price) AS total_spent,
AVG(oi.quantity * oi.unit_price) AS avg_order_value
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY c.customer_id
HAVING total_spent > 1000
ORDER BY total_spent DESC;
-- ========================================
-- INNER JOIN WITH SUBQUERIES
-- ========================================
-- Join with derived table
SELECT
c.customer_name,
c.email,
customer_stats.total_orders,
customer_stats.total_spent
FROM customers c
INNER JOIN (
SELECT
customer_id,
COUNT(*) AS total_orders,
SUM(total_amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
) AS customer_stats ON c.customer_id = customer_stats.customer_id
WHERE customer_stats.total_orders > 5;
-- ========================================
-- NATURAL JOIN (use with caution!)
-- ========================================
-- Natural join automatically matches columns with same names
SELECT *
FROM customers
NATURAL JOIN orders; -- Automatically joins on customer_id
-- Equivalent to:
SELECT *
FROM customers c
INNER JOIN orders o USING (customer_id);
LEFT JOIN (LEFT OUTER JOIN)
LEFT JOIN Use Cases
-- ========================================
-- LEFT JOIN EXAMPLES
-- ========================================
-- Find all customers with their orders (including those with no orders)
SELECT
c.customer_id,
c.customer_name,
c.email,
COUNT(o.order_id) AS order_count,
COALESCE(SUM(o.total_amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;
-- Find customers who have NEVER placed an order
SELECT
c.customer_id,
c.customer_name,
c.registration_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL; -- Key pattern for "not exists"
-- Products with optional reviews
SELECT
p.product_id,
p.product_name,
p.price,
COUNT(r.review_id) AS review_count,
COALESCE(AVG(r.rating), 0) AS avg_rating
FROM products p
LEFT JOIN reviews r ON p.product_id = r.product_id
GROUP BY p.product_id
ORDER BY avg_rating DESC;
-- ========================================
-- MULTIPLE LEFT JOINS
-- ========================================
-- Complete customer profile with optional data
SELECT
c.customer_id,
c.customer_name,
a.street_address,
a.city,
ph.phone_number,
COUNT(DISTINCT o.order_id) AS order_count,
COUNT(DISTINCT r.review_id) AS review_count
FROM customers c
LEFT JOIN addresses a ON c.customer_id = a.customer_id
AND a.is_primary = TRUE
LEFT JOIN phones ph ON c.customer_id = ph.customer_id
AND ph.phone_type = 'primary'
LEFT JOIN orders o ON c.customer_id = o.customer_id
LEFT JOIN reviews r ON c.customer_id = r.user_id
GROUP BY c.customer_id;
-- ========================================
-- LEFT JOIN WITH WHERE CONDITIONS
-- ========================================
-- Be careful with WHERE on LEFT JOIN!
-- Wrong: This turns LEFT JOIN into INNER JOIN
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed'; -- Filters out NULLs!
-- Correct: Put condition in ON clause
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
AND o.status = 'completed'; -- Preserves NULLs
-- Or use WHERE with NULL check
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed' OR o.status IS NULL;
-- ========================================
-- LEFT JOIN FOR GAP ANALYSIS
-- ========================================
-- Find products not in any active order
SELECT
p.product_id,
p.product_name,
p.stock_quantity
FROM products p
LEFT JOIN (
SELECT DISTINCT oi.product_id
FROM order_items oi
JOIN orders o ON oi.order_id = o.order_id
WHERE o.status IN ('pending', 'processing')
) AS active_products ON p.product_id = active_products.product_id
WHERE active_products.product_id IS NULL
AND p.stock_quantity > 0;
RIGHT JOIN and FULL OUTER JOIN
MySQL FULL OUTER JOIN
-- ========================================
-- RIGHT JOIN (Less Common)
-- ========================================
-- RIGHT JOIN example (rarely used, LEFT JOIN preferred)
SELECT
o.order_id,
o.order_date,
c.customer_name
FROM orders o
RIGHT JOIN customers c ON o.customer_id = c.customer_id;
-- Equivalent LEFT JOIN (more readable)
SELECT
o.order_id,
o.order_date,
c.customer_name
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;
-- ========================================
-- FULL OUTER JOIN (MySQL Workaround)
-- ========================================
-- MySQL doesn't support FULL OUTER JOIN, use UNION
-- Find all customers and all orders, even unmatched ones
-- Method 1: UNION of LEFT and RIGHT JOIN
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
UNION
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date
FROM customers c
RIGHT JOIN orders o ON c.customer_id = o.customer_id;
-- Method 2: UNION ALL with WHERE
(
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
)
UNION ALL
(
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date
FROM orders o
LEFT JOIN customers c ON o.customer_id = c.customer_id
WHERE c.customer_id IS NULL -- Only non-matching orders
);
-- ========================================
-- EXCLUSIVE JOINS (Anti-Joins)
-- ========================================
-- LEFT EXCLUDING JOIN (A - B)
-- Customers with no orders
SELECT c.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL;
-- RIGHT EXCLUDING JOIN (B - A)
-- Orders with no valid customer (orphaned)
SELECT o.*
FROM customers c
RIGHT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.customer_id IS NULL;
-- FULL OUTER EXCLUDING (A ∪ B) - (A ∩ B)
-- All non-matching records from both tables
(
SELECT c.customer_id, 'customer_only' AS source
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL
)
UNION ALL
(
SELECT o.customer_id, 'order_only' AS source
FROM orders o
LEFT JOIN customers c ON o.customer_id = c.customer_id
WHERE c.customer_id IS NULL
);
CROSS JOIN and SELF JOIN
Special Join Types
-- ========================================
-- CROSS JOIN (Cartesian Product)
-- ========================================
-- Generate all possible combinations
SELECT
s.size,
c.color,
CONCAT(c.color, ' - ', s.size) AS variant
FROM sizes s
CROSS JOIN colors c
ORDER BY c.color, s.size;
-- Implicit cross join (older syntax)
SELECT
s.size,
c.color
FROM sizes s, colors c;
-- Generate date range
SELECT
DATE_ADD('2024-01-01', INTERVAL n DAY) AS date
FROM (
SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2
UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5
UNION ALL SELECT 6
) numbers
CROSS JOIN (SELECT 0 AS m UNION ALL SELECT 7 UNION ALL SELECT 14) weeks;
-- Generate price matrix
SELECT
p.product_name,
d.discount_percent,
p.price AS original_price,
ROUND(p.price * (1 - d.discount_percent/100), 2) AS discounted_price
FROM products p
CROSS JOIN (
SELECT 10 AS discount_percent
UNION ALL SELECT 20
UNION ALL SELECT 30
UNION ALL SELECT 50
) d
WHERE p.category_id = 1;
-- ========================================
-- SELF JOIN
-- ========================================
-- Employee hierarchy (employees and their managers)
SELECT
e1.employee_id,
e1.employee_name AS employee,
e2.employee_name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;
-- Find duplicate records
SELECT
c1.customer_id AS id1,
c2.customer_id AS id2,
c1.email
FROM customers c1
INNER JOIN customers c2 ON c1.email = c2.email
AND c1.customer_id < c2.customer_id; -- Avoid duplicate pairs
-- Product comparisons within same category
SELECT
p1.product_name AS product_1,
p1.price AS price_1,
p2.product_name AS product_2,
p2.price AS price_2,
ABS(p1.price - p2.price) AS price_difference
FROM products p1
INNER JOIN products p2 ON p1.category_id = p2.category_id
AND p1.product_id < p2.product_id
AND ABS(p1.price - p2.price) < 10 -- Similar price range
ORDER BY price_difference;
-- Find customers in same city
SELECT
c1.customer_name AS customer_1,
c2.customer_name AS customer_2,
c1.city
FROM customers c1
INNER JOIN customers c2 ON c1.city = c2.city
AND c1.customer_id < c2.customer_id
ORDER BY c1.city;
-- Category hierarchy (parent-child relationships)
SELECT
c1.category_name AS category,
c2.category_name AS parent_category,
c3.category_name AS grandparent_category
FROM categories c1
LEFT JOIN categories c2 ON c1.parent_id = c2.category_id
LEFT JOIN categories c3 ON c2.parent_id = c3.category_id;
Complex JOIN Patterns
Advanced JOIN Patterns
-- ========================================
-- COMPLEX MULTI-TABLE QUERIES
-- ========================================
-- Complete order details with customer and product info
SELECT
c.customer_name,
o.order_id,
o.order_date,
p.product_name,
cat.category_name,
oi.quantity,
oi.unit_price,
(oi.quantity * oi.unit_price) AS line_total
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
LEFT JOIN categories cat ON p.category_id = cat.category_id
WHERE o.order_date >= DATE_SUB(NOW(), INTERVAL 30 DAY)
ORDER BY o.order_date DESC, o.order_id, oi.order_item_id;
-- Customer 360 view
SELECT
c.customer_id,
c.customer_name,
c.email,
COALESCE(order_summary.total_orders, 0) AS total_orders,
COALESCE(order_summary.total_spent, 0) AS total_spent,
COALESCE(order_summary.last_order_date, 'Never') AS last_order_date,
COALESCE(review_summary.total_reviews, 0) AS reviews_written,
COALESCE(review_summary.avg_rating_given, 0) AS avg_rating_given,
COALESCE(cart_items.items_in_cart, 0) AS current_cart_items
FROM customers c
LEFT JOIN (
SELECT
customer_id,
COUNT(*) AS total_orders,
SUM(total_amount) AS total_spent,
MAX(order_date) AS last_order_date
FROM orders
WHERE status NOT IN ('cancelled')
GROUP BY customer_id
) order_summary ON c.customer_id = order_summary.customer_id
LEFT JOIN (
SELECT
user_id,
COUNT(*) AS total_reviews,
AVG(rating) AS avg_rating_given
FROM reviews
GROUP BY user_id
) review_summary ON c.customer_id = review_summary.user_id
LEFT JOIN (
SELECT
user_id,
COUNT(*) AS items_in_cart
FROM cart
GROUP BY user_id
) cart_items ON c.customer_id = cart_items.user_id
ORDER BY total_spent DESC;
-- ========================================
-- JOIN PERFORMANCE OPTIMIZATION
-- ========================================
-- Optimize with proper index usage
EXPLAIN SELECT
c.customer_name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;
-- Use straight_join to force join order
SELECT STRAIGHT_JOIN
c.customer_name,
o.order_id,
oi.quantity
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
WHERE c.customer_id = 123;
-- Avoid SELECT * in joins
-- Bad:
SELECT * FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;
-- Good:
SELECT
c.customer_id,
c.customer_name,
o.order_id,
o.order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;
Practice Exercise
JOIN Challenges
💡 Click for Solutions
-- 1. Products with optional categories
SELECT
p.product_id,
p.product_name,
p.price,
COALESCE(c.category_name, 'Uncategorized') AS category
FROM products p
LEFT JOIN categories c ON p.category_id = c.category_id
ORDER BY c.category_name, p.product_name;
-- 2. Customers who never wrote reviews
SELECT
c.customer_id,
c.customer_name,
c.email,
c.registration_date
FROM customers c
LEFT JOIN reviews r ON c.customer_id = r.user_id
WHERE r.review_id IS NULL
ORDER BY c.registration_date;
-- 3. Employee hierarchy (self-join)
SELECT
e.employee_id,
e.employee_name,
e.title,
COALESCE(m.employee_name, 'No Manager') AS manager_name,
m.title AS manager_title
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.employee_id
ORDER BY m.employee_id, e.employee_id;
-- 4. All dates with sales data (using date table or generation)
WITH RECURSIVE date_range AS (
SELECT DATE('2024-01-01') AS date
UNION ALL
SELECT DATE_ADD(date, INTERVAL 1 DAY)
FROM date_range
WHERE date < DATE('2024-01-31')
)
SELECT
dr.date,
COUNT(o.order_id) AS orders_count,
COALESCE(SUM(o.total_amount), 0) AS daily_sales
FROM date_range dr
LEFT JOIN orders o ON DATE(o.order_date) = dr.date
GROUP BY dr.date
ORDER BY dr.date;
-- 5. Products never ordered
SELECT
p.product_id,
p.product_name,
p.price,
p.stock_quantity
FROM products p
LEFT JOIN order_items oi ON p.product_id = oi.product_id
WHERE oi.order_item_id IS NULL
AND p.is_active = TRUE
ORDER BY p.product_name;
-- 6. All product-coupon combinations
SELECT
p.product_id,
p.product_name,
p.price,
c.coupon_code,
c.discount_type,
c.discount_value,
CASE
WHEN c.discount_type = 'percentage'
THEN p.price * (1 - c.discount_value/100)
ELSE p.price - c.discount_value
END AS price_after_coupon
FROM products p
CROSS JOIN coupons c
WHERE c.is_active = TRUE
AND c.valid_from <= CURDATE()
AND c.valid_until >= CURDATE()
ORDER BY p.product_id, c.discount_value DESC;
-- 7. Customers who ordered every product in a category
SELECT c.customer_id, c.customer_name
FROM customers c
WHERE NOT EXISTS (
SELECT p.product_id
FROM products p
WHERE p.category_id = 1 -- Electronics category
AND NOT EXISTS (
SELECT 1
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
WHERE o.customer_id = c.customer_id
AND oi.product_id = p.product_id
)
);
-- 8. This month vs last month comparison
WITH this_month AS (
SELECT
DATE(order_date) AS date,
COUNT(*) AS orders,
SUM(total_amount) AS sales
FROM orders
WHERE MONTH(order_date) = MONTH(CURDATE())
AND YEAR(order_date) = YEAR(CURDATE())
GROUP BY DATE(order_date)
),
last_month AS (
SELECT
DATE_ADD(DATE(order_date), INTERVAL 1 MONTH) AS date,
COUNT(*) AS orders,
SUM(total_amount) AS sales
FROM orders
WHERE MONTH(order_date) = MONTH(DATE_SUB(CURDATE(), INTERVAL 1 MONTH))
AND YEAR(order_date) = YEAR(DATE_SUB(CURDATE(), INTERVAL 1 MONTH))
GROUP BY DATE(order_date)
)
SELECT
COALESCE(t.date, l.date) AS date,
COALESCE(t.orders, 0) AS this_month_orders,
COALESCE(l.orders, 0) AS last_month_orders,
COALESCE(t.sales, 0) AS this_month_sales,
COALESCE(l.sales, 0) AS last_month_sales,
COALESCE(t.sales, 0) - COALESCE(l.sales, 0) AS difference
FROM this_month t
FULL OUTER JOIN last_month l ON t.date = l.date
ORDER BY date;