top of page
  • Writer's pictureAlice Hoang

Online Sports Retail Revenue Analysis

ROP TABLE info;

CREATE TABLE info

(

product_name VARCHAR(100),

product_id VARCHAR(11) PRIMARY KEY,

description VARCHAR(700)

);

DROP TABLE finance;

CREATE TABLE finance

(

product_id VARCHAR(11) PRIMARY KEY,

listing_price FLOAT,

sale_price FLOAT,

discount FLOAT,

revenue FLOAT

);

DROP TABLE reviews;

CREATE TABLE reviews

(

product_id VARCHAR(11) PRIMARY KEY,

rating FLOAT,

reviews FLOAT

);

DROP TABLE traffic;

CREATE TABLE traffic

(

product_id VARCHAR(11) PRIMARY KEY,

last_visited TIMESTAMP

);

DROP TABLE brands;

CREATE TABLE brands

(

product_id VARCHAR(11) PRIMARY KEY,

brand VARCHAR(7)

);

\copy info FROM 'info_v2.csv' DELIMITER ',' CSV HEADER;

\copy finance FROM 'finance.csv' DELIMITER ',' CSV HEADER;

\copy reviews FROM 'reviews_v2.csv' DELIMITER ',' CSV HEADER;

\copy traffic FROM 'traffic_v3.csv' DELIMITER ',' CSV HEADER;

\copy brands FROM 'brands_v2.csv' DELIMITER ',' CSV HEADER;



-- Count all columns as total_rows

-- Count the number of non-missing entries for description, listing_price, and last_visited

-- Join info, finance, and traffic

SELECT COUNT(*) AS total_rows,

COUNT(i.description) as count_description,

COUNT(f.listing_price)as count_listing_price,

COUNT( t.last_visited) as count_last_visited

FROM info AS i

INNER JOIN finance AS f

ON i.product_id = f.product_id

INNER JOIN traffic AS t

ON f.product_id = t.product_id;



-- Select the brand, listing_price as an integer, and a count of all products in finance

-- Join brands to finance on product_id

-- Filter for products with a listing_price more than zero

-- Aggregate results by brand and listing_price, and sort the results by listing_price in descending order

SELECT b.brand,

f.listing_price :: integer,

COUNT(f.*)

FROM finance as f

INNER JOIN brands as b

ON b.product_id = f.product_id

WHERE listing_price > 0

GROUP BY b.brand, f.listing_price

ORDER BY listing_price desc;



-- Select the brand, a count of all products in the finance table, and total revenue

-- Create four labels for products based on their price range, aliasing as price_category

-- Join brands to finance on product_id and filter out products missing a value for brand

-- Group results by brand and price_category, sort by total_revenue

SELECT b.brand,

COUNT(*),

SUM(revenue) AS total_revenue,

CASE WHEN listing_price < 41 THEN 'Budget'

WHEN listing_price >= 42 AND listing_price < 74 THEN 'Average'

WHEN listing_price >= 74 AND listing_price < 129 THEN 'Expensive'

ELSE 'Elite' END AS price_category

FROM finance as f

left JOIN brands AS b

ON f.product_id = b.product_id

WHERE b.brand IS NOT NULL

GROUP BY b.brand, price_category

ORDER BY total_revenue desc;



-- Select brand and average_discount as a percentage

-- Join brands to finance on product_id

-- Aggregate by brand

-- Filter for products without missing values for brand

SELECT b.brand,

AVG(f.discount) * 100 as average_discount

FROM finance as f

INNER JOIN brands as b

ON f.product_id = b.product_id

GROUP BY b.brand

HAVING b.brand is not NULL;

-- Calculate the correlation between reviews and revenue as review_revenue_corr

-- Join the reviews and finance tables on product_id

SELECT corr(r.reviews, f.revenue) AS review_revenue_corr

FROM finance as f

INNER JOIN reviews as r

ON r.product_id = f.product_id;



-- Calculate description_length

-- Convert rating to a numeric data type and calculate average_rating

-- Join info to reviews on product_id and group the results by description_length

-- Filter for products without missing values for description, and sort results by description_length

SELECT TRUNC(length(i.description), -2) as description_length,

ROUND(AVG( r.rating::numeric), 2) as average_rating

FROM info as i

LEFT JOIN reviews as r

ON i.product_id = r.product_id

WHERE i.description IS NOT NULL

Group by description_length

Order by description_length;



-- Select brand, month from last_visited, and a count of all products in reviews aliased as num_reviews

-- Join traffic with reviews and brands on product_id

-- Group by brand and month, filtering out missing values for brand and month

-- Order the results by brand and month

Select b.brand,

date_part('month', t.last_visited) as month,

COUNT(*) as num_reviews

FROM traffic as t

LEFT JOIN reviews as r

ON t.product_id = r.product_id

LEFT JOIN brands as b

ON t.product_id = b.product_id

where b.brand IS NOT NULL

AND date_part('month', t.last_visited) IS NOT NULL

GROUP BY b.brand, month

ORDER BY b.brand, month;



-- Create the footwear CTE, containing description and revenue

-- Filter footwear for products with a description containing %shoe%, %trainer, or %foot%

-- Also filter for products that are not missing values for description

-- Calculate the number of products and median revenue for footwear products

WITH footwear AS (

SELECT i.description AS description,

f.revenue AS revenue

FROM info AS i

INNER JOIN finance AS f

ON i.product_id = f.product_id

WHERE description ILIKE '%shoe%'

OR description ILIKE '%trainer%'

OR description ILIKE '%foot%'

AND description IS NOT NULL)

SELECT COUNT(*) AS num_footwear_products,

PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY revenue) AS median_footwear_revenue

FROM footwear;



-- Copy the footwear CTE from the previous task

-- Calculate the number of products in info and median revenue from finance

-- Inner join info with finance on product_id

-- Filter the selection for products with a description not in footwear

SELECT COUNT(*) as num_clothing_products,

PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY f.revenue) as median_clothing_revenue

FROM info as i

INNER JOIN finance as f

ON i.product_id = f.product_id

WHERE i.description not in (

SELECT i.description as description

FROM info as i

INNER JOIN finance as f

ON i.product_id = f.product_id

WHERE description ILIKE '%shoe%'

OR description ILIKE '%trainer%'

OR description ILIKE '%foot%'

AND description IS NOT NULL);


2 views0 comments

Recent Posts

See All

Commentaires


bottom of page