-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL code.sql
More file actions
408 lines (331 loc) · 12.3 KB
/
SQL code.sql
File metadata and controls
408 lines (331 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
-- Отфильтровать потенциальных кандидатов на сокращение выделив запрос в подзапрос:
SELECT
max_s.id,
max_s.first_name,
max_s.department,
max_s.max_gross_salary
FROM
(SELECT
s.id,
s.first_name,
s.department,
s.gross_salary,
MAX(s.gross_salary) OVER(PARTITION BY s.department) AS max_gross_salary
FROM
windows_functions.salary AS s) AS max_s
WHERE
max_s.max_gross_salary = max_s.gross_salary
ORDER BY
max_s.id;
-- Показать пропорцию зарплат в отделе относительно суммы всех зарплат в этом отделе,
-- а также относительно всего фонда оплаты труда
select
id
, department
, first_name
, gross_salary
, MAX(gross_salary) over(partition by department) as max_salary
from windows_functions."salary"
--- Кто получает больше всего в каждом департаменте (дополнительно вывести идентификатор сотрудника и его имя)?
select
id
, department
, first_name
, gross_salary
, round(gross_salary::numeric / sum(gross_salary) over (partition by department), 2) as dep_ratio
, round(gross_salary::numeric / sum(gross_salary) over (), 2) as total_ratio
from windows_functions."salary"
---- Вернуть имя сотрудника у которого самая высокая зарплата в дерпартаменте используя оконные функции ---
select
id
, department
, first_name
, gross_salary
, first_value(first_name) over(partition by department order by gross_salary desc)
from windows_functions."salary"
---- Вернуть имя сотрудника у которого самая низкая зарплата в дерпартаменте используя оконные функции ---
SELECT
s.id,
s.first_name,
s.department,
s.gross_salary,
LAST_VALUE(s.first_name) OVER(PARTITION BY s.department ORDER BY s.gross_salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_employee
FROM
windows_functions.salary AS s;
----- Вывести данные о сумме прироста последователей для акков инстаграм за весь период
----- добавим running_total, который будеи от обрадать нарастающую сумму последователей
----- из месяца в месяц в порядке возрастания
select
month
, change_in_followers
, sum(change_in_followers) over(order by month asc) as running_total
from windows_functions.social_media sm
where username = 'instagram'
----- найдеми кумулятивное среднее, изменив функции SUM на функции AVG
select
month
, change_in_followers
, avg(change_in_followers) over(order by month asc) as running_avg
from windows_functions.social_media sm
where username = 'instagram'
--- рассмотрим пример того как работает PARTITION BY в оконных функциях
select
username
, month
, change_in_followers
, sum(change_in_followers) over(PARTITION by username order by month asc) as running_total
, avg(change_in_followers) over(PARTITION by username order by month asc) as running_avg
from windows_functions.social_media sm
--- Пример использвания First_value()
select
username
, month
, posts
, first_value(posts) over (partition by username order by posts) as least_posts
from windows_functions.social_media sm
--- Пример использвания last_value()
select
username
, month
, posts
, last_value(posts) over (partition by username order by posts range between unbounded preceding and unbounded following) as least_posts
from windows_functions.social_media sm
--- lead lag
select
artist
, week
, streams_millions
, LAG(streams_millions, 2, 0.0) over (order by week asc) as previous_week_streams
from windows_functions.streams s
where artist = 'Lady Gaga'
--- lag
select
artist
, week
, streams_millions
, streams_millions - LAG(streams_millions, 1, streams_millions) over (order by week asc) as streams_millions_change
from windows_functions.streams s
where artist = 'Lady Gaga'
--- расчёт изменений streams_millions и chart_position от недели к неделе для всеъ артистов с помощью оконной функции LAG
select
artist
, week
, streams_millions
, streams_millions - LAG(streams_millions, 1, streams_millions) over (partition by artist order by week asc) as streams_millions_change
, chart_position
, LAG(chart_position, 1, chart_position) over (partition by artist order by week asc) - chart_position as chart_position_change
from windows_functions.streams s;
---
select
artist
, week
, streams_millions
, streams_millions - LAG(streams_millions, 1, streams_millions) over (partition by artist order by week asc) as streams_millions_change
, chart_position
, LAG(chart_position, 1, chart_position) over (partition by artist order by week asc) - chart_position as chart_position_change
from windows_functions.streams s;
--- lead
select
artist
, week
, streams_millions
, lead(streams_millions, 1) over (partition by artist order by week asc) - streams_millions as streams_millions_change
, chart_position
, chart_position - lead(chart_position, 1) over (partition by artist order by week asc) as chart_position_change
from windows_functions.streams s;
--- ROW_NUMBER
select
artist
, week
, streams_millions
, row_number() over(order by streams_millions asc) as row_number
from windows_functions.streams s
--- RANK, DENSE_RANK
select
artist
, week
, streams_millions
, rank() over(partition by week order by streams_millions asc) as rank_result
, dense_rank() over(partition by week order by streams_millions asc) as dense_rank_result
from windows_functions.streams s
--- NTILE - разбиение примерно разные группы
select
artist
, week
, streams_millions
, ntile(5) over(order by streams_millions desc) as weekly_streams_group
from windows_functions.streams s
select
artist
, week
, streams_millions
, ntile(4) over(partition by week order by streams_millions desc) as weekly_streams_group
from windows_functions.streams s
--------------
select *
from windows_functions.state_climate sc
--- Посмотрим как изменяется средняя температура с течением времени в каждом штате
select
state
, "year"
, tempf
, avg(tempf) over (partition by state order by year) as running_avg_tempf
, tempc
, avg(tempc) over (partition by state order by year) as running_avg_tempc
from windows_functions.state_climate sc
--- Найдем самую низкую температуру по каждому штату
select
state
, "year"
, tempf
, first_value (tempf) over (partition by state order by tempf ) as lowest_tempf
, tempc
, first_value (tempc) over (partition by state order by tempc ) as lowest_tempc
from windows_functions.state_climate sc
--- Найдем самую высокую температуру по каждому штату
select
state
, "year"
, tempf
, last_value (tempf) over (partition by state order by tempf range between unbounded preceding and unbounded following) as highest_tempf
, tempc
, last_value (tempc) over (partition by state order by tempc range between unbounded preceding and unbounded following) as highest_tempc
from windows_functions.state_climate sc
--- Посмотрим на сколько меняется температура каждый год в каждом штате
select
state
, "year"
, tempf
, tempf - lag (tempf, 1, tempf) over (partition by state order by year) as change_tempf
, tempc
, tempc - lag (tempc, 1, tempc) over (partition by state order by year) as change_tempc
from windows_functions.state_climate sc
--- найдем самую низкую температуру за всю историю
select
state
, "year"
, tempf
, rank() over(order by tempf asc) as coldest_rankf
, tempc
, rank() over(order by tempc asc) as coldest_rankc
from windows_functions.state_climate sc
--- найдем самую высокую температуру за всю историю
select
state
, "year"
, tempf
, rank() over(partition by state order by tempf desc) as warmest_rankf
, tempc
, rank() over(partition by state order by tempc desc) as warmest_rankc
from windows_functions.state_climate sc
-- выведем среднегодовые температуры в квартилях и квантилях, а не в рейтингах для каждого штата
select
state
, "year"
, tempf
, ntile(4) over(partition by state order by tempf asc) as quartile_f
, ntile(5) over(partition by state order by tempf asc) as quintile_f
, tempc
, ntile(4) over(partition by state order by tempf asc) as quartile_c
, ntile(5) over(partition by state order by tempf asc) as quintile_c
from windows_functions.state_climate sc
-- присвоим номер каждой выбранной записи с помощью оконной функции ROW_NUMBER()
select athlete
, event
, row_number() over() as row_number
from windows_functions.summer_medals sm
--Присвоим номер каждой выбранной записи с помощью оконной функции ROW_NUMBER()
SELECT
athlete,
event,
ROW_NUMBER() OVER() AS row_number
FROM
windows_functions.summer_medals
ORDER BY
row_number ASC;
--Найдем всех олимпийских чемпионов по теннису (мужчин и женщин отдельно), начиная с 2004 года,
--и для каждого из них выяснить, кто был предыдущим чемпионом.
select * from windows_functions.summer_medals
--Табличное выражение ищет теннисных чемпионов и выбирает нужные столбцы
--Оконная функция разделяет по полу и берёт чемпиона из предыдущей строки
WITH tennis_gold AS
(SELECT
athlete
, gender
, year
, country
FROM
windows_functions.summer_medals
WHERE
year >= 2004
AND
sport = 'Tennis'
AND
event = 'singles'
AND
medal = 'Gold')
SELECT
athlete as champion,
gender,
year,
LAG(athlete) OVER (PARTITION BY gender ORDER BY year ASC) AS last_champion
FROM
tennis_gold
ORDER BY
gender ASC,
year ASC;
--Найдем всех олимпийских чемпионов по теннису (мужчин и женщин отдельно), начиная с 2004 года,
--и для каждого из них выяснить, кто был cледующим чемпионом.
--Табличное выражение ищет теннисных чемпионов и выбирает нужные столбцы
WITH tennis_gold AS
(SELECT
athlete,
gender,
year,
country
FROM
windows_functions.summer_medals
WHERE
year >= 2004
AND
sport = 'Tennis'
AND
event = 'singles'
AND
medal = 'Gold')
--Оконная функция разделяет по полу и берёт чемпиона из следующей строки
SELECT
athlete as champion,
gender,
year,
LEAD(athlete) OVER (PARTITION BY gender ORDER BY year ASC) AS last_champion
FROM
tennis_gold
ORDER BY
gender ASC,
year ASC;
-- Ранжирование стран по числу олимпиад, в которых они участвовали, разными оконными функциями:
-- Табличное выражение выбирает страны и считает годы
WITH countries AS
(SELECT
sm.country,
COUNT(DISTINCT sm.year) AS participated
FROM
windows_functions.summer_medals AS sm
WHERE
sm.country
IN
('GBR', 'DEN', 'FRA', 'ITA','AUT')
GROUP BY
sm.country)
-- Разные оконные функции ранжируют страны
SELECT
c.country,
c.participated,
ROW_NUMBER() OVER(ORDER BY c.participated DESC) AS "row_number",
RANK() OVER(ORDER BY c.participated DESC) AS rank_number,
DENSE_RANK() OVER(ORDER BY c.participated DESC) AS "dense_rank"
FROM
countries AS c
ORDER BY
c.participated DESC;