Skip to main content

Course Progress

Loading...

Homework: SQL Query Mastery

Estimated Time: 3-4 hours
Module 3: Comprehensive Assignment

Assignment Overview

Apply your SQL knowledge to solve real-world database challenges! Work with an e-commerce database to practice all DML operations.

🎯
Skills You'll Practice
  • Complex SELECT queries with filtering and sorting
  • Data manipulation with INSERT, UPDATE, DELETE
  • JOIN operations across multiple tables
  • Aggregate functions and grouping
  • Subqueries and advanced techniques
  • Performance optimization

Part A: Database Setup

📋 Task A.1: Create the Database

Create a database calledecommerce_hwwith the following schema:

-- Create and use database
CREATE DATABASE IF NOT EXISTS ecommerce_hw
    CHARACTER SET utf8mb4
    COLLATE utf8mb4_unicode_ci;

USE ecommerce_hw;

-- Users table
CREATE TABLE users (
    user_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(255) UNIQUE NOT NULL,
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    phone VARCHAR(20),
    registration_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    last_login TIMESTAMP NULL,
    is_active BOOLEAN DEFAULT TRUE,
    user_type ENUM('customer', 'admin', 'vendor') DEFAULT 'customer',
    INDEX idx_email (email),
    INDEX idx_registration (registration_date)
);

-- Categories table
CREATE TABLE categories (
    category_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    category_name VARCHAR(100) UNIQUE NOT NULL,
    parent_category_id INT UNSIGNED NULL,
    description TEXT,
    is_active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (parent_category_id) REFERENCES categories(category_id)
);

-- Products table
CREATE TABLE products (
    product_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    sku VARCHAR(50) UNIQUE NOT NULL,
    product_name VARCHAR(200) NOT NULL,
    category_id INT UNSIGNED,
    description TEXT,
    price DECIMAL(10,2) NOT NULL,
    cost DECIMAL(10,2),
    weight DECIMAL(8,3),
    is_active BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (category_id) REFERENCES categories(category_id),
    INDEX idx_category (category_id),
    INDEX idx_price (price),
    FULLTEXT ft_search (product_name, description)
);

-- Inventory table
CREATE TABLE inventory (
    inventory_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    product_id INT UNSIGNED UNIQUE NOT NULL,
    quantity_available INT UNSIGNED DEFAULT 0,
    quantity_reserved INT UNSIGNED DEFAULT 0,
    reorder_level INT UNSIGNED DEFAULT 10,
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    FOREIGN KEY (product_id) REFERENCES products(product_id)
);

-- Orders table
CREATE TABLE orders (
    order_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    user_id INT UNSIGNED NOT NULL,
    order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    status ENUM('pending', 'processing', 'shipped', 'delivered', 'cancelled') DEFAULT 'pending',
    total_amount DECIMAL(10,2) NOT NULL DEFAULT 0,
    shipping_address TEXT,
    payment_method VARCHAR(50),
    notes TEXT,
    FOREIGN KEY (user_id) REFERENCES users(user_id),
    INDEX idx_user (user_id),
    INDEX idx_status (status),
    INDEX idx_date (order_date)
);

-- Order items table
CREATE TABLE order_items (
    order_item_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    order_id INT UNSIGNED NOT NULL,
    product_id INT UNSIGNED NOT NULL,
    quantity INT UNSIGNED NOT NULL,
    unit_price DECIMAL(10,2) NOT NULL,
    discount_amount DECIMAL(10,2) DEFAULT 0,
    FOREIGN KEY (order_id) REFERENCES orders(order_id) ON DELETE CASCADE,
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    INDEX idx_order (order_id),
    INDEX idx_product (product_id)
);

-- Reviews table
CREATE TABLE reviews (
    review_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    product_id INT UNSIGNED NOT NULL,
    user_id INT UNSIGNED NOT NULL,
    rating INT UNSIGNED NOT NULL CHECK (rating BETWEEN 1 AND 5),
    title VARCHAR(200),
    comment TEXT,
    is_verified_purchase BOOLEAN DEFAULT FALSE,
    helpful_count INT UNSIGNED DEFAULT 0,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    FOREIGN KEY (user_id) REFERENCES users(user_id),
    UNIQUE KEY unique_review (product_id, user_id),
    INDEX idx_product_rating (product_id, rating)
);

-- Shopping cart table
CREATE TABLE cart (
    cart_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    user_id INT UNSIGNED NOT NULL,
    product_id INT UNSIGNED NOT NULL,
    quantity INT UNSIGNED NOT NULL DEFAULT 1,
    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE,
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    UNIQUE KEY unique_cart_item (user_id, product_id)
);

-- Coupons table
CREATE TABLE coupons (
    coupon_id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    coupon_code VARCHAR(20) UNIQUE NOT NULL,
    discount_type ENUM('percentage', 'fixed') NOT NULL,
    discount_value DECIMAL(10,2) NOT NULL,
    minimum_order DECIMAL(10,2) DEFAULT 0,
    valid_from DATE NOT NULL,
    valid_until DATE NOT NULL,
    usage_limit INT UNSIGNED,
    times_used INT UNSIGNED DEFAULT 0,
    is_active BOOLEAN DEFAULT TRUE,
    INDEX idx_code (coupon_code),
    INDEX idx_dates (valid_from, valid_until)
);

📋 Task A.2: Insert Sample Data

Populate your database with sample data (you'll need this for the queries):

Click to see sample data insertion script
-- Insert sample users
INSERT INTO users (username, email, first_name, last_name, phone, user_type) VALUES
('johndoe', 'john@example.com', 'John', 'Doe', '555-0101', 'customer'),
('janesmith', 'jane@example.com', 'Jane', 'Smith', '555-0102', 'customer'),
('bobwilson', 'bob@example.com', 'Bob', 'Wilson', '555-0103', 'customer'),
('alicebrown', 'alice@example.com', 'Alice', 'Brown', '555-0104', 'customer'),
('admin', 'admin@example.com', 'Admin', 'User', '555-0100', 'admin'),
('vendor1', 'vendor1@example.com', 'Vendor', 'One', '555-0201', 'vendor');

-- Insert categories
INSERT INTO categories (category_name, parent_category_id, description) VALUES
('Electronics', NULL, 'Electronic devices and accessories'),
('Computers', 1, 'Desktop and laptop computers'),
('Phones', 1, 'Mobile phones and accessories'),
('Clothing', NULL, 'Apparel and fashion'),
('Men', 4, 'Men clothing'),
('Women', 4, 'Women clothing'),
('Books', NULL, 'Books and publications'),
('Home & Garden', NULL, 'Home improvement and gardening');

-- Insert products
INSERT INTO products (sku, product_name, category_id, description, price, cost) VALUES
('LAP001', 'Gaming Laptop Pro', 2, 'High-performance gaming laptop', 1499.99, 1200.00),
('LAP002', 'Business Laptop', 2, 'Professional business laptop', 899.99, 700.00),
('PHN001', 'Smartphone X', 3, 'Latest smartphone with 5G', 799.99, 600.00),
('PHN002', 'Budget Phone', 3, 'Affordable smartphone', 299.99, 200.00),
('SHT001', 'Men T-Shirt', 5, 'Cotton t-shirt', 29.99, 15.00),
('SHT002', 'Women Blouse', 6, 'Elegant blouse', 49.99, 25.00),
('BK001', 'SQL Mastery', 7, 'Complete guide to SQL', 39.99, 20.00),
('BK002', 'Web Development', 7, 'Modern web development', 44.99, 22.00),
('GDN001', 'Garden Tools Set', 8, 'Complete gardening toolkit', 89.99, 50.00),
('LAP003', 'Ultrabook', 2, 'Ultra-thin laptop', 1299.99, 1000.00);

-- Insert inventory
INSERT INTO inventory (product_id, quantity_available, quantity_reserved, reorder_level) VALUES
(1, 50, 5, 10),
(2, 75, 0, 15),
(3, 100, 10, 20),
(4, 200, 0, 30),
(5, 500, 20, 50),
(6, 300, 10, 40),
(7, 150, 0, 20),
(8, 100, 5, 15),
(9, 80, 0, 10),
(10, 30, 2, 5);

-- Continue with more sample data...

Part B: SELECT Query Challenges

📝
Task B: Write SELECT Queries
Complete the following 15 SELECT query challenges. Each query should be optimized and properly formatted.

SELECT Query Tasks

  1. Find Active Premium Customers
    List all customers who have placed more than 5 orders with a total spending over $1000. Include their name, email, order count, and total spent.
  2. Low Stock Alert
    Find all products where available quantity is below the reorder level. Show product name, SKU, available quantity, and reorder level.
  3. Category Sales Report
    Calculate total sales for each category in the last 30 days. Include category name, number of items sold, and total revenue.
  4. Top Rated Products
    Find the top 10 products by average rating (minimum 5 reviews). Show product name, average rating, and review count.
  5. Customer Purchase Pattern
    For each customer, find their most frequently purchased product category.
  6. Abandoned Carts
    List all cart items that have been in the cart for more than 7 days. Include user email, product name, and days in cart.
  7. Product Profitability
    Calculate profit margin for each product (price - cost) / price * 100. Order by profit margin descending.
  8. Monthly Sales Trend
    Show monthly sales totals for the current year. Include month name, order count, and total revenue.
  9. Cross-Sell Opportunities
    Find products frequently bought together (in the same order).
  10. User Activity Report
    List users who haven't logged in for more than 30 days but have items in cart.
  11. Review Analysis
    Find products with declining ratings (average of last 10 reviews < overall average).
  12. Coupon Effectiveness
    Analyze which coupons are most used and their impact on order value.
  13. Inventory Turnover
    Calculate how many times each product's inventory has been sold completely.
  14. Customer Lifetime Value
    Calculate the total value each customer has brought (orders + potential cart value).
  15. Product Search
    Implement a search that finds products by name or description, ordered by relevance.

Part C: Data Manipulation Tasks

📝
Task C: INSERT, UPDATE, DELETE Operations
Complete these data manipulation tasks using proper transactions and safety checks.

Data Manipulation Tasks

  1. Bulk Product Import
    Insert 10 new products with inventory in a single transaction. Ensure SKUs are unique and set appropriate inventory levels.
  2. Price Adjustment
    Apply a 15% discount to all products in the Electronics category. Log the original prices before updating.
  3. Order Processing
    Move all items from a user's cart to a new order. Update inventory quantities and clear the cart.
  4. User Data Update
    Update last_login for active users and mark inactive users (no login > 90 days).
  5. Review Cleanup
    Delete spam reviews (those with specific patterns) and update helpful_count.
  6. Inventory Restock
    Implement an UPSERT operation to restock products based on reorder levels.
  7. Coupon Generation
    Generate 20 unique coupon codes with different discount values and validity periods.
  8. Data Archival
    Archive orders older than 2 years to archive tables, then delete from main tables.
  9. Cart Merge
    Merge guest cart items with user cart when user logs in (handle duplicates).
  10. Cascading Delete
    Safely delete a product and handle all related data (reviews, cart items, etc.).

Part D: Advanced Query Challenges

📝
Task D: Complex Queries with JOINs and Subqueries
Solve these advanced scenarios using JOINs, subqueries, and aggregate functions.

Advanced Query Tasks

  1. Customer Segmentation
    Segment customers into tiers (Gold, Silver, Bronze) based on purchase history and frequency. Use CASE statements and aggregate functions.
  2. Product Recommendation Engine
    For a given product, find the top 5 related products based on co-purchase patterns. Use self-joins and subqueries.
  3. Sales Performance Dashboard
    Create a comprehensive query showing daily, weekly, and monthly sales comparisons. Include year-over-year growth percentages.
  4. Inventory Forecast
    Predict when products will run out of stock based on current sales velocity. Use date functions and calculations.
  5. Customer Behavior Analysis
    Find customers whose purchase patterns have changed significantly. Compare first 3 months vs last 3 months of activity.

Part E: Performance Optimization

Task E: Query Optimization
Optimize the provided slow queries and explain your optimization strategies.
-- Slow Query 1: Fix this performance issue
SELECT DISTINCT 
    u.username,
    COUNT(*) as order_count,
    (SELECT SUM(total_amount) FROM orders WHERE user_id = u.user_id) as total_spent
FROM users u, orders o
WHERE u.user_id = o.user_id
    AND YEAR(o.order_date) = 2024
GROUP BY u.user_id
HAVING total_spent > 500
ORDER BY RAND();

-- Slow Query 2: Optimize this search
SELECT * FROM products 
WHERE UPPER(product_name) LIKE UPPER('%laptop%')
    OR description LIKE '%laptop%'
    OR sku LIKE '%laptop%'
ORDER BY (price * 0.9);

-- Slow Query 3: Improve this reporting query
SELECT 
    p.*,
    (SELECT COUNT(*) FROM order_items WHERE product_id = p.product_id) as times_ordered,
    (SELECT AVG(rating) FROM reviews WHERE product_id = p.product_id) as avg_rating,
    (SELECT quantity_available FROM inventory WHERE product_id = p.product_id) as stock
FROM products p
WHERE p.category_id IN (
    SELECT category_id FROM categories 
    WHERE category_name LIKE '%Electronics%' 
        OR parent_category_id IN (
            SELECT category_id FROM categories WHERE category_name = 'Electronics'
        )
);

Submission Requirements

📦 What to Submit

📋
Deliverables Checklist
  1. SQL Script File(homework_solutions.sql)
    • All CREATE TABLE statements
    • Sample data INSERT statements
    • All query solutions with comments
    • Performance optimization explanations
  2. Query Results(results.txt or screenshots)
    • Output from each SELECT query
    • Row counts for DML operations
    • EXPLAIN output for optimized queries
  3. Documentation(README.md)
    • Approach for complex queries
    • Optimization strategies used
    • Challenges faced and solutions
    • Performance improvements achieved
  4. Bonus Challenge(optional)
    • Create stored procedures for common operations
    • Implement triggers for audit logging
    • Design views for reporting

Grading Rubric

Component Points Criteria
Database Setup 10 Correct schema, constraints, indexes
SELECT Queries 30 Correct results, efficient queries
DML Operations 20 Safe operations, transactions used
Advanced Queries 20 Complex JOINs, subqueries work
Optimization 15 Improved performance, explanations
Documentation 5 Clear explanations, well-commented
Total 100

Solution Guidelines

💡 Click for Example Solutions (Part B, Questions 1-3)
-- B.1: Find Active Premium Customers
SELECT 
    u.user_id,
    CONCAT(u.first_name, ' ', u.last_name) AS customer_name,
    u.email,
    COUNT(DISTINCT o.order_id) AS order_count,
    SUM(o.total_amount) AS total_spent,
    AVG(o.total_amount) AS avg_order_value
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE 
    o.status NOT IN ('cancelled', 'pending')
    AND u.is_active = TRUE
GROUP BY u.user_id
HAVING 
    order_count > 5 
    AND total_spent > 1000
ORDER BY total_spent DESC;

-- B.2: Low Stock Alert
SELECT 
    p.product_id,
    p.sku,
    p.product_name,
    c.category_name,
    i.quantity_available,
    i.quantity_reserved,
    i.reorder_level,
    (i.reorder_level - i.quantity_available) AS units_to_reorder
FROM products p
INNER JOIN inventory i ON p.product_id = i.product_id
LEFT JOIN categories c ON p.category_id = c.category_id
WHERE 
    i.quantity_available < i.reorder_level
    AND p.is_active = TRUE
ORDER BY 
    units_to_reorder DESC,
    p.product_name;

-- B.3: Category Sales Report (Last 30 Days)
SELECT 
    c.category_name,
    COALESCE(pc.category_name, 'Top Level') AS parent_category,
    COUNT(DISTINCT o.order_id) AS order_count,
    SUM(oi.quantity) AS items_sold,
    SUM(oi.quantity * oi.unit_price) AS gross_revenue,
    SUM(oi.discount_amount) AS total_discounts,
    SUM(oi.quantity * oi.unit_price - oi.discount_amount) AS net_revenue
FROM categories c
LEFT JOIN categories pc ON c.parent_category_id = pc.category_id
INNER JOIN products p ON c.category_id = p.category_id
INNER JOIN order_items oi ON p.product_id = oi.product_id
INNER JOIN orders o ON oi.order_id = o.order_id
WHERE 
    o.order_date >= DATE_SUB(NOW(), INTERVAL 30 DAY)
    AND o.status NOT IN ('cancelled', 'pending')
GROUP BY c.category_id
ORDER BY net_revenue DESC;

Helpful Resources

-->