Query Challenge #1
The orders table shows the number of products ordered each day. Query the data to return the most frequent item(s) ordered each day. Return multiple items if there are ties.
orders
order_id
|
date
|
product
|
1
|
2021-08-01
|
iPhone
|
2
|
2021-08-01
|
iPhone
|
3
|
2021-08-01
|
macbook
|
4
|
2021-08-01
|
macbook
|
5
|
2021-08-02
|
macbook
|
6
|
2021-08-02
|
macbook
|
7
|
2021-08-02
|
iPad
|
8
|
2021-08-03
|
airpod
|
9
|
2021-08-03
|
macbook
|
10
|
2021-08-03
|
macbook
|
11
|
2021-08-03
|
iWatch
|
12
|
2021-08-03
|
iWatch
|
13
|
2021-08-03
|
iWatch
|
14
|
2021-08-04
|
airpod
|
SQL
with product_ranking as (
select
*,
rank() over (partition by date order by product_count desc) as product_rank
from (
select
date,
product,
count(*) as product_count
from orders
group by date, product
)
)
select
date,
product,
product_count as n
from product_ranking
where product_rank = 1
order by date
date
|
product
|
n
|
2021-08-01
|
iPhone
|
2
|
2021-08-01
|
macbook
|
2
|
2021-08-02
|
macbook
|
2
|
2021-08-03
|
iWatch
|
3
|
2021-08-04
|
airpod
|
1
|
R
product_orders <- orders %>%
count(date, product) %>%
group_by(date) %>%
filter(n == max(n))
date
|
product
|
n
|
2021-08-01
|
iPhone
|
2
|
2021-08-01
|
macbook
|
2
|
2021-08-02
|
macbook
|
2
|
2021-08-03
|
iWatch
|
3
|
2021-08-04
|
airpod
|
1
|
Python
orders = r.orders
product_orders = (
r.orders
.groupby(['date', 'product'])
.size()
.reset_index(name='n')
.sort_values(by=['date', 'n'], ascending=(True, False))
)
product_orders['ranking'] = product_orders.groupby('date')['n'].rank(method='dense', ascending=False)
product_orders[product_orders["ranking"] == 1].drop(columns='ranking')
date product n
0 2021-08-01 iPhone 2
1 2021-08-01 macbook 2
3 2021-08-02 macbook 2
5 2021-08-03 iWatch 3
7 2021-08-04 airpod 1