Skip to main content

Course Progress

Loading...

Mastering SQL JOINs

Duration: 90 minutes
Module 3: Session 4

Learning Objectives

  • Understand the concept of table relationships
  • Master INNER JOIN for matching records
  • Use LEFT/RIGHT JOIN for optional relationships
  • Implement FULL OUTER JOIN (MySQL workaround)
  • Apply CROSS JOIN and SELF JOIN
  • Optimize JOIN performance with indexes
  • Handle NULL values in JOINs
  • Write complex multi-table queries

Understanding SQL JOINs

JOINs combine rows from two or more tables based on related columns. They're the heart of relational databases! 🔗

💡
JOIN Quick Reference
  • INNER JOIN:Returns only matching rows from both tables
  • LEFT JOIN:Returns all rows from left table, matching from right
  • RIGHT JOIN:Returns all rows from right table, matching from left
  • FULL OUTER JOIN:Returns all rows from both tables
  • CROSS JOIN:Cartesian product of both tables

INNER JOIN - The Foundation

graph LR A[Table A] -->|matching
records| C[Result] B[Table B] -->|matching
records| C style A fill:#dbeafe style B fill:#dcfce7 style C fill:#fef3c7

INNER JOIN Examples

-- ========================================
-- BASIC INNER JOIN
-- ========================================

-- Simple INNER JOIN
SELECT 
    c.customer_id,
    c.customer_name,
    o.order_id,
    o.order_date,
    o.total_amount
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id;

-- Multiple conditions in JOIN
SELECT 
    p.product_name,
    oi.quantity,
    oi.unit_price,
    o.order_date
FROM products p
INNER JOIN order_items oi 
    ON p.product_id = oi.product_id
    AND oi.unit_price > 100  -- Additional condition
INNER JOIN orders o 
    ON oi.order_id = o.order_id;

-- Using USING clause (when column names match)
SELECT 
    c.customer_name,
    o.order_id,
    o.total_amount
FROM customers c
INNER JOIN orders o USING (customer_id);

-- ========================================
-- MULTIPLE TABLE JOINS
-- ========================================

-- Joining 3 tables
SELECT 
    c.customer_name,
    o.order_id,
    p.product_name,
    oi.quantity,
    oi.unit_price
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
WHERE o.order_date >= '2024-01-01';

-- Complex join with aggregation
SELECT 
    c.customer_id,
    c.customer_name,
    COUNT(DISTINCT o.order_id) AS order_count,
    SUM(oi.quantity * oi.unit_price) AS total_spent,
    AVG(oi.quantity * oi.unit_price) AS avg_order_value
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY c.customer_id
HAVING total_spent > 1000
ORDER BY total_spent DESC;

-- ========================================
-- INNER JOIN WITH SUBQUERIES
-- ========================================

-- Join with derived table
SELECT 
    c.customer_name,
    c.email,
    customer_stats.total_orders,
    customer_stats.total_spent
FROM customers c
INNER JOIN (
    SELECT 
        customer_id,
        COUNT(*) AS total_orders,
        SUM(total_amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY customer_id
) AS customer_stats ON c.customer_id = customer_stats.customer_id
WHERE customer_stats.total_orders > 5;

-- ========================================
-- NATURAL JOIN (use with caution!)
-- ========================================

-- Natural join automatically matches columns with same names
SELECT *
FROM customers
NATURAL JOIN orders;  -- Automatically joins on customer_id

-- Equivalent to:
SELECT *
FROM customers c
INNER JOIN orders o USING (customer_id);

LEFT JOIN (LEFT OUTER JOIN)

LEFT JOIN Use Cases

-- ========================================
-- LEFT JOIN EXAMPLES
-- ========================================

-- Find all customers with their orders (including those with no orders)
SELECT 
    c.customer_id,
    c.customer_name,
    c.email,
    COUNT(o.order_id) AS order_count,
    COALESCE(SUM(o.total_amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;

-- Find customers who have NEVER placed an order
SELECT 
    c.customer_id,
    c.customer_name,
    c.registration_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL;  -- Key pattern for "not exists"

-- Products with optional reviews
SELECT 
    p.product_id,
    p.product_name,
    p.price,
    COUNT(r.review_id) AS review_count,
    COALESCE(AVG(r.rating), 0) AS avg_rating
FROM products p
LEFT JOIN reviews r ON p.product_id = r.product_id
GROUP BY p.product_id
ORDER BY avg_rating DESC;

-- ========================================
-- MULTIPLE LEFT JOINS
-- ========================================

-- Complete customer profile with optional data
SELECT 
    c.customer_id,
    c.customer_name,
    a.street_address,
    a.city,
    ph.phone_number,
    COUNT(DISTINCT o.order_id) AS order_count,
    COUNT(DISTINCT r.review_id) AS review_count
FROM customers c
LEFT JOIN addresses a ON c.customer_id = a.customer_id 
    AND a.is_primary = TRUE
LEFT JOIN phones ph ON c.customer_id = ph.customer_id 
    AND ph.phone_type = 'primary'
LEFT JOIN orders o ON c.customer_id = o.customer_id
LEFT JOIN reviews r ON c.customer_id = r.user_id
GROUP BY c.customer_id;

-- ========================================
-- LEFT JOIN WITH WHERE CONDITIONS
-- ========================================

-- Be careful with WHERE on LEFT JOIN!
-- Wrong: This turns LEFT JOIN into INNER JOIN
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed';  -- Filters out NULLs!

-- Correct: Put condition in ON clause
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
    AND o.status = 'completed';  -- Preserves NULLs

-- Or use WHERE with NULL check
SELECT c.*, o.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.status = 'completed' OR o.status IS NULL;

-- ========================================
-- LEFT JOIN FOR GAP ANALYSIS
-- ========================================

-- Find products not in any active order
SELECT 
    p.product_id,
    p.product_name,
    p.stock_quantity
FROM products p
LEFT JOIN (
    SELECT DISTINCT oi.product_id
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    WHERE o.status IN ('pending', 'processing')
) AS active_products ON p.product_id = active_products.product_id
WHERE active_products.product_id IS NULL
    AND p.stock_quantity > 0;

RIGHT JOIN and FULL OUTER JOIN

⚠️
MySQL FULL OUTER JOIN
MySQL doesn't support FULL OUTER JOIN directly. Use UNION of LEFT and RIGHT JOINs instead!
-- ========================================
-- RIGHT JOIN (Less Common)
-- ========================================

-- RIGHT JOIN example (rarely used, LEFT JOIN preferred)
SELECT 
    o.order_id,
    o.order_date,
    c.customer_name
FROM orders o
RIGHT JOIN customers c ON o.customer_id = c.customer_id;

-- Equivalent LEFT JOIN (more readable)
SELECT 
    o.order_id,
    o.order_date,
    c.customer_name
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;

-- ========================================
-- FULL OUTER JOIN (MySQL Workaround)
-- ========================================

-- MySQL doesn't support FULL OUTER JOIN, use UNION
-- Find all customers and all orders, even unmatched ones

-- Method 1: UNION of LEFT and RIGHT JOIN
SELECT 
    c.customer_id,
    c.customer_name,
    o.order_id,
    o.order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id

UNION

SELECT 
    c.customer_id,
    c.customer_name,
    o.order_id,
    o.order_date
FROM customers c
RIGHT JOIN orders o ON c.customer_id = o.customer_id;

-- Method 2: UNION ALL with WHERE
(
    SELECT 
        c.customer_id,
        c.customer_name,
        o.order_id,
        o.order_date
    FROM customers c
    LEFT JOIN orders o ON c.customer_id = o.customer_id
)
UNION ALL
(
    SELECT 
        c.customer_id,
        c.customer_name,
        o.order_id,
        o.order_date
    FROM orders o
    LEFT JOIN customers c ON o.customer_id = c.customer_id
    WHERE c.customer_id IS NULL  -- Only non-matching orders
);

-- ========================================
-- EXCLUSIVE JOINS (Anti-Joins)
-- ========================================

-- LEFT EXCLUDING JOIN (A - B)
-- Customers with no orders
SELECT c.*
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_id IS NULL;

-- RIGHT EXCLUDING JOIN (B - A)
-- Orders with no valid customer (orphaned)
SELECT o.*
FROM customers c
RIGHT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.customer_id IS NULL;

-- FULL OUTER EXCLUDING (A ∪ B) - (A ∩ B)
-- All non-matching records from both tables
(
    SELECT c.customer_id, 'customer_only' AS source
    FROM customers c
    LEFT JOIN orders o ON c.customer_id = o.customer_id
    WHERE o.order_id IS NULL
)
UNION ALL
(
    SELECT o.customer_id, 'order_only' AS source
    FROM orders o
    LEFT JOIN customers c ON o.customer_id = c.customer_id
    WHERE c.customer_id IS NULL
);

CROSS JOIN and SELF JOIN

Special Join Types

-- ========================================
-- CROSS JOIN (Cartesian Product)
-- ========================================

-- Generate all possible combinations
SELECT 
    s.size,
    c.color,
    CONCAT(c.color, ' - ', s.size) AS variant
FROM sizes s
CROSS JOIN colors c
ORDER BY c.color, s.size;

-- Implicit cross join (older syntax)
SELECT 
    s.size,
    c.color
FROM sizes s, colors c;

-- Generate date range
SELECT 
    DATE_ADD('2024-01-01', INTERVAL n DAY) AS date
FROM (
    SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 
    UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 
    UNION ALL SELECT 6
) numbers
CROSS JOIN (SELECT 0 AS m UNION ALL SELECT 7 UNION ALL SELECT 14) weeks;

-- Generate price matrix
SELECT 
    p.product_name,
    d.discount_percent,
    p.price AS original_price,
    ROUND(p.price * (1 - d.discount_percent/100), 2) AS discounted_price
FROM products p
CROSS JOIN (
    SELECT 10 AS discount_percent
    UNION ALL SELECT 20
    UNION ALL SELECT 30
    UNION ALL SELECT 50
) d
WHERE p.category_id = 1;

-- ========================================
-- SELF JOIN
-- ========================================

-- Employee hierarchy (employees and their managers)
SELECT 
    e1.employee_id,
    e1.employee_name AS employee,
    e2.employee_name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;

-- Find duplicate records
SELECT 
    c1.customer_id AS id1,
    c2.customer_id AS id2,
    c1.email
FROM customers c1
INNER JOIN customers c2 ON c1.email = c2.email
    AND c1.customer_id < c2.customer_id;  -- Avoid duplicate pairs

-- Product comparisons within same category
SELECT 
    p1.product_name AS product_1,
    p1.price AS price_1,
    p2.product_name AS product_2,
    p2.price AS price_2,
    ABS(p1.price - p2.price) AS price_difference
FROM products p1
INNER JOIN products p2 ON p1.category_id = p2.category_id
    AND p1.product_id < p2.product_id
    AND ABS(p1.price - p2.price) < 10  -- Similar price range
ORDER BY price_difference;

-- Find customers in same city
SELECT 
    c1.customer_name AS customer_1,
    c2.customer_name AS customer_2,
    c1.city
FROM customers c1
INNER JOIN customers c2 ON c1.city = c2.city
    AND c1.customer_id < c2.customer_id
ORDER BY c1.city;

-- Category hierarchy (parent-child relationships)
SELECT 
    c1.category_name AS category,
    c2.category_name AS parent_category,
    c3.category_name AS grandparent_category
FROM categories c1
LEFT JOIN categories c2 ON c1.parent_id = c2.category_id
LEFT JOIN categories c3 ON c2.parent_id = c3.category_id;

Complex JOIN Patterns

Advanced JOIN Patterns

-- ========================================
-- COMPLEX MULTI-TABLE QUERIES
-- ========================================

-- Complete order details with customer and product info
SELECT 
    c.customer_name,
    o.order_id,
    o.order_date,
    p.product_name,
    cat.category_name,
    oi.quantity,
    oi.unit_price,
    (oi.quantity * oi.unit_price) AS line_total
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
LEFT JOIN categories cat ON p.category_id = cat.category_id
WHERE o.order_date >= DATE_SUB(NOW(), INTERVAL 30 DAY)
ORDER BY o.order_date DESC, o.order_id, oi.order_item_id;

-- Customer 360 view
SELECT 
    c.customer_id,
    c.customer_name,
    c.email,
    COALESCE(order_summary.total_orders, 0) AS total_orders,
    COALESCE(order_summary.total_spent, 0) AS total_spent,
    COALESCE(order_summary.last_order_date, 'Never') AS last_order_date,
    COALESCE(review_summary.total_reviews, 0) AS reviews_written,
    COALESCE(review_summary.avg_rating_given, 0) AS avg_rating_given,
    COALESCE(cart_items.items_in_cart, 0) AS current_cart_items
FROM customers c
LEFT JOIN (
    SELECT 
        customer_id,
        COUNT(*) AS total_orders,
        SUM(total_amount) AS total_spent,
        MAX(order_date) AS last_order_date
    FROM orders
    WHERE status NOT IN ('cancelled')
    GROUP BY customer_id
) order_summary ON c.customer_id = order_summary.customer_id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) AS total_reviews,
        AVG(rating) AS avg_rating_given
    FROM reviews
    GROUP BY user_id
) review_summary ON c.customer_id = review_summary.user_id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) AS items_in_cart
    FROM cart
    GROUP BY user_id
) cart_items ON c.customer_id = cart_items.user_id
ORDER BY total_spent DESC;

-- ========================================
-- JOIN PERFORMANCE OPTIMIZATION
-- ========================================

-- Optimize with proper index usage
EXPLAIN SELECT 
    c.customer_name,
    COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id;

-- Use straight_join to force join order
SELECT STRAIGHT_JOIN
    c.customer_name,
    o.order_id,
    oi.quantity
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
WHERE c.customer_id = 123;

-- Avoid SELECT * in joins
-- Bad:
SELECT * FROM customers c 
LEFT JOIN orders o ON c.customer_id = o.customer_id;

-- Good:
SELECT 
    c.customer_id,
    c.customer_name,
    o.order_id,
    o.order_date
FROM customers c 
LEFT JOIN orders o ON c.customer_id = o.customer_id;

Practice Exercise

💻
JOIN Challenges
Write queries using appropriate JOINs for these scenarios:
  1. List all products with their category names (handle products without category)
  2. Find customers who have never written a review
  3. Show all employees with their manager names (self-join)
  4. Create a sales report showing all dates in a month, even those with no sales
  5. Find products that have never been ordered
  6. List all possible product-coupon combinations
  7. Find customers who have ordered every product in a category
  8. Create a full comparison of this month vs last month sales
💡 Click for Solutions
-- 1. Products with optional categories
SELECT 
    p.product_id,
    p.product_name,
    p.price,
    COALESCE(c.category_name, 'Uncategorized') AS category
FROM products p
LEFT JOIN categories c ON p.category_id = c.category_id
ORDER BY c.category_name, p.product_name;

-- 2. Customers who never wrote reviews
SELECT 
    c.customer_id,
    c.customer_name,
    c.email,
    c.registration_date
FROM customers c
LEFT JOIN reviews r ON c.customer_id = r.user_id
WHERE r.review_id IS NULL
ORDER BY c.registration_date;

-- 3. Employee hierarchy (self-join)
SELECT 
    e.employee_id,
    e.employee_name,
    e.title,
    COALESCE(m.employee_name, 'No Manager') AS manager_name,
    m.title AS manager_title
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.employee_id
ORDER BY m.employee_id, e.employee_id;

-- 4. All dates with sales data (using date table or generation)
WITH RECURSIVE date_range AS (
    SELECT DATE('2024-01-01') AS date
    UNION ALL
    SELECT DATE_ADD(date, INTERVAL 1 DAY)
    FROM date_range
    WHERE date < DATE('2024-01-31')
)
SELECT 
    dr.date,
    COUNT(o.order_id) AS orders_count,
    COALESCE(SUM(o.total_amount), 0) AS daily_sales
FROM date_range dr
LEFT JOIN orders o ON DATE(o.order_date) = dr.date
GROUP BY dr.date
ORDER BY dr.date;

-- 5. Products never ordered
SELECT 
    p.product_id,
    p.product_name,
    p.price,
    p.stock_quantity
FROM products p
LEFT JOIN order_items oi ON p.product_id = oi.product_id
WHERE oi.order_item_id IS NULL
    AND p.is_active = TRUE
ORDER BY p.product_name;

-- 6. All product-coupon combinations
SELECT 
    p.product_id,
    p.product_name,
    p.price,
    c.coupon_code,
    c.discount_type,
    c.discount_value,
    CASE 
        WHEN c.discount_type = 'percentage' 
        THEN p.price * (1 - c.discount_value/100)
        ELSE p.price - c.discount_value
    END AS price_after_coupon
FROM products p
CROSS JOIN coupons c
WHERE c.is_active = TRUE
    AND c.valid_from <= CURDATE()
    AND c.valid_until >= CURDATE()
ORDER BY p.product_id, c.discount_value DESC;

-- 7. Customers who ordered every product in a category
SELECT c.customer_id, c.customer_name
FROM customers c
WHERE NOT EXISTS (
    SELECT p.product_id
    FROM products p
    WHERE p.category_id = 1  -- Electronics category
    AND NOT EXISTS (
        SELECT 1
        FROM orders o
        JOIN order_items oi ON o.order_id = oi.order_id
        WHERE o.customer_id = c.customer_id
        AND oi.product_id = p.product_id
    )
);

-- 8. This month vs last month comparison
WITH this_month AS (
    SELECT 
        DATE(order_date) AS date,
        COUNT(*) AS orders,
        SUM(total_amount) AS sales
    FROM orders
    WHERE MONTH(order_date) = MONTH(CURDATE())
        AND YEAR(order_date) = YEAR(CURDATE())
    GROUP BY DATE(order_date)
),
last_month AS (
    SELECT 
        DATE_ADD(DATE(order_date), INTERVAL 1 MONTH) AS date,
        COUNT(*) AS orders,
        SUM(total_amount) AS sales
    FROM orders
    WHERE MONTH(order_date) = MONTH(DATE_SUB(CURDATE(), INTERVAL 1 MONTH))
        AND YEAR(order_date) = YEAR(DATE_SUB(CURDATE(), INTERVAL 1 MONTH))
    GROUP BY DATE(order_date)
)
SELECT 
    COALESCE(t.date, l.date) AS date,
    COALESCE(t.orders, 0) AS this_month_orders,
    COALESCE(l.orders, 0) AS last_month_orders,
    COALESCE(t.sales, 0) AS this_month_sales,
    COALESCE(l.sales, 0) AS last_month_sales,
    COALESCE(t.sales, 0) - COALESCE(l.sales, 0) AS difference
FROM this_month t
FULL OUTER JOIN last_month l ON t.date = l.date
ORDER BY date;

Additional Resources