-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMichael Dask Workspace.py
More file actions
49 lines (28 loc) · 1.34 KB
/
Michael Dask Workspace.py
File metadata and controls
49 lines (28 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Dask Workspace
# %%
import dask.dataframe as dd
import time
# Create an object that is a link to data
birds_link = 'https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-cap.256.10&entityid=53edaa7a0e083013d9bf20322db1780e'
# Create the same data frame using pandas
birds_dask = dd.read_csv(birds_link)
# Compare your pandas data frame with your dask data frame
print(type(birds_dask))
# %%
start_time = time.time()
birds_daskx2 = dd.multi.concat([birds_dask, birds_dask])
birds_daskx4 = dd.multi.concat([birds_daskx2, birds_daskx2])
birds_daskx8 = dd.multi.concat([birds_daskx4, birds_daskx4])
birds_daskx16 = dd.multi.concat([birds_daskx8, birds_daskx8])
birds_daskx32 = dd.multi.concat([birds_daskx16, birds_daskx16])
birds_daskx64 = dd.multi.concat([birds_daskx32, birds_daskx32])
birds_daskx128 = dd.multi.concat([birds_daskx64, birds_daskx64])
birds_daskx256 = dd.multi.concat([birds_daskx128, birds_daskx128])
birds_daskx512 = dd.multi.concat([birds_daskx256, birds_daskx256])
birds_daskx1024 = dd.multi.concat([birds_daskx512, birds_daskx512])
birds_daskx2048 = dd.multi.concat([birds_daskx1024, birds_daskx1024])
birds_daskx4096 = dd.multi.concat([birds_daskx2048, birds_daskx2048])
birds_daskx4096['distancex100'] = birds_daskx4096['distance']*100
end_time = time.time()
print(f"It took {end_time-start_time} seconds")
# %%