-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCase Study 1- Job_data_analysis.sql
More file actions
64 lines (56 loc) · 2.01 KB
/
Case Study 1- Job_data_analysis.sql
File metadata and controls
64 lines (56 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
create database project3;
show databases;
use project3;
# table creation
create table job_data
(ds date,
job_id int not null,
actor_id int not null,
event varchar(15) not null,
language varchar(15) not null,
time_spent int not null,
org char(2)
);
select * from job_data;
# value insertation
insert into job_data (ds, job_id, actor_id, event, language, time_spent, org)
values
("2020-11-30", 21, 1001, "skip", "English", 15, "A"),
("2020-11-30", 22, 1006, "transfer", "Arabic", 25, "B"),
("2020-11-29", 23, 1003, "decision", "Persian", 20, "C"),
("2020-11-28", 23, 1005, "transfer", "Persian", 22, "D"),
("2020-11-28", 25, 1002, "decision", "Hindi", 11, "B"),
("2020-11-27", 11, 1007, "decision", "French", 104, "D"),
("2020-11-26", 23, 1004, "skip", "Persian", 56, "A"),
("2020-11-25", 20, 1003, "transfer", "Italian", 45, "C");
/*Job_data_analysis*/
# 1. Jobs Reviewed Over Time: Calculate the number of jobs reviewed per hour for each day in November 2020.
select
date(ds) as review_date,
hour(ds) as review_hour,
count(*) as job_reviewed_per_hr_day
from job_data
where month(ds) = 11 and year(ds) = 2020
group by review_date, review_hour
order by review_date, review_hour;
# 2. Throughput Analysis: Calculate the 7-day rolling average of throughput.
select ds, jobs_reviewed, total_events, avg(total_events)
over ( order by ds rows between 6 preceding and current row)
as avg_7day_rolling_throughput
from
(select ds, count(distinct event) as total_events,
count(distinct job_id) as jobs_reviewed
from job_data
group by ds
order by ds) base;
# 3. Language Share Analysis: Calculate the percentage share of each language over the last 30 days
select language, count(language) as total_language,
(count(language)*100) / sum(count(language))
over () as percentage_share_language
from job_data
group by language
order by language desc;
# 4. Duplicate Rows Detection: Display duplicate rows from the job_data table.
With T as (select *, row_number() over (partition by event) as Duplicate_rows
from job_data)
select * from T where duplicate_rows >= 1;