-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathresearch.py
More file actions
135 lines (106 loc) · 3.94 KB
/
research.py
File metadata and controls
135 lines (106 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import json
import csv
import time
import psutil
import os
from memory_profiler import memory_usage
import pandas as pd
# Constants
ITERATIONS = 10
DATASET_SIZES = ['small', 'medium', 'large']
DATA_DIR = 'Datasets'
def load_csv(file_path):
df = pd.read_csv(file_path)
return df
def load_json(file_path):
with open(file_path, 'r') as f:
raw = f.read()
data = json.loads(raw) # parse stringified JSON
if isinstance(data, str):
data = json.loads(data) # if double-encoded
if not isinstance(data, list):
raise ValueError(f"Expected list of dicts in JSON, got {type(data)}")
df = pd.DataFrame(data)
return df
def process_data(df):
# Perform data processing operation
# Sort the 'Name' column in ascending order
df.sort_values('Name', ascending=True, inplace=True)
return df
def measure_execution_time(func, *args):
start_time = time.time()
result = func(*args)
end_time = time.time()
return end_time - start_time, result
def measure_memory_usage(func, *args):
mem_usage = memory_usage((func, args), max_iterations=1)
return max(mem_usage)
def measure_cpu_usage(func, *args):
process = psutil.Process(os.getpid())
cpu_percent_sum = 0
cpu_percent_count = 0
result = func(*args)
while True:
cpu_percent = process.cpu_percent(interval=0.1)
cpu_percent_sum += cpu_percent
cpu_percent_count += 1
if cpu_percent == 0.0:
break
avg_cpu_percent = cpu_percent_sum / cpu_percent_count
return avg_cpu_percent, result
def run_experiment(dataset_size):
csv_file = f'{DATA_DIR}/{dataset_size}.csv'
json_file = f'{DATA_DIR}/{dataset_size}.json'
if not os.path.exists(csv_file):
print(f"❌ CSV file not found: {csv_file}")
return
if not os.path.exists(json_file):
print(f"❌ JSON file not found: {json_file}")
return
csv_times = []
csv_memory = []
csv_cpu = []
json_times = []
json_memory = []
json_cpu = []
# print("-" * 50)
for i in range(ITERATIONS):
# print(f"▶️ Iteration {i+1}/{ITERATIONS} - {dataset_size.upper()}")
# CSV
csv_time, csv_df = measure_execution_time(load_csv, csv_file)
csv_times.append(csv_time)
csv_mem = measure_memory_usage(load_csv, csv_file)
csv_memory.append(csv_mem)
csv_cpu_usage, csv_processed_df = measure_cpu_usage(process_data, csv_df)
csv_cpu.append(csv_cpu_usage)
# JSON
json_time, json_df = measure_execution_time(load_json, json_file)
json_times.append(json_time)
json_mem = measure_memory_usage(load_json, json_file)
json_memory.append(json_mem)
json_cpu_usage, json_processed_df = measure_cpu_usage(process_data, json_df)
json_cpu.append(json_cpu_usage)
# Calculate average metrics
avg_csv_time = sum(csv_times) / ITERATIONS
avg_csv_memory = sum(csv_memory) / ITERATIONS
avg_csv_cpu = sum(csv_cpu) / ITERATIONS
avg_json_time = sum(json_times) / ITERATIONS
avg_json_memory = sum(json_memory) / ITERATIONS
avg_json_cpu = sum(json_cpu) / ITERATIONS
# Print results
# print(f"Dataset Size: {dataset_size}")
print(f"📊 Results for Dataset Size: {dataset_size.upper()}")
print(f"CSV - Avg Execution Time: {avg_csv_time:.3f} seconds")
print(f"CSV - Avg Memory Usage: {avg_csv_memory:.2f} MB")
print(f"CSV - Avg CPU Usage: {avg_csv_cpu:.2f}%")
print()
print(f"JSON - Avg Execution Time: {avg_json_time:.3f} seconds")
print(f"JSON - Avg Memory Usage: {avg_json_memory:.2f} MB")
print(f"JSON - Avg CPU Usage: {avg_json_cpu:.2f}%")
print()
print(f"✅ Finished {dataset_size.upper()} experiment in {ITERATIONS} iteration(s)")
print("-" * 50)
# print()
if __name__ == '__main__':
for size in DATASET_SIZES:
run_experiment(size)