-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathechonest_dictionary_functions.py
More file actions
212 lines (178 loc) · 7.21 KB
/
echonest_dictionary_functions.py
File metadata and controls
212 lines (178 loc) · 7.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import json
import urllib2
import math
# Loads in database of echonest attributes for each track id
def loadEchonestAttributes():
# Filename of database
database_file = 'track_ids70_1_attributes.txt'
# Dictionary structure to be filled and returned
echonest_attributes = {}
database = open(database_file, 'r')
for line in database:
row = line.rstrip().split(',') # Removes \n character and splits on ','
values = [] # List that will contain all 8 attributes for a given track_id
for i in range (1, 14):
# If echonest has a valid attribute
if row[i] != 'None':
values.append(float(row[i]))
# If no parameter received from echnoest, set to inf
else:
values.append(float('inf'))
echonest_attributes[row[0]] = values # Add track_id : attributes to dictionary
database.close()
return echonest_attributes
# Ensures given track_id has corresponding attributes in dictionary
def fetchEchonestAttributes(track_id):
# First check if track_id is already in dictionary of attributes
if track_id in echonest_attributes:
return True
# If it is not, then we need to query echonest to get the attributes for track_id
else:
URL = 'http://developer.echonest.com/api/v4/track/profile?api_key=' + 'VBFP0ICNRRIKKKQO6' + '&id=spotify:track:' + track_id + '&bucket=audio_summary'
data = urllib2.urlopen(URL)
trackSummary = json.loads(data.read())
# Too many requests to echonest
if trackSummary['response']['status']['code'] == 3:
return False
# Deal with other errors in echonest db
elif 'track' not in trackSummary['response'].keys():
return False
# If track_id couldn't be found
elif 'audio_summary' not in trackSummary['response']['track'].keys():
return False
# Some other echonest error
elif trackSummary['response']['status']['code'] != 0:
return False
# If no errors so far, try to get relevant info
try:
summary = trackSummary['response']['track']['audio_summary']
except KeyError, e:
return False
values = []
keys = summary.keys()
# If all keys exist then create list to be returned
if keys == ['key','tempo','energy','liveness','analysis_url','speechiness','acousticness','instrumentalness','mode','time_signature','duration','loudness','valence','danceability']:
for param in keys:
# Ignore url
if param == 'analysis_url':
continue
# Check if value is none
if summary[param]:
values.append(float(summary[param]))
# If no parameter received from echnoest, set to inf
else:
values.append(float('inf'))
# Add attributes to dictionary
echonest_attributes[track_id] = values
return True
else:
return False
# Computes hamRadio distance between two tracks.
# Ensure that tracks are in dictionary before calling this function.
def getHamRadioDistance(seed, track):
seed_vals = echonest_attributes[seed]
track_vals = echonest_attributes[track]
hamRadio_dist = 0
for i in range (0, len(seed_vals)):
current_term = (track_vals[i] - seed_vals[i]) ** 2
if current_term == float('inf') or math.isnan(current_term):
continue
else:
hamRadio_dist += current_term
hamRadio_dist = hamRadio_dist ** 0.5
return hamRadio_dist
# Sorts a given list of lists by hamRadio_dist. Input list should be of the form:
# [[track_id1, hamRadio_dist1], [track_id2, hamRadio_dist2], [track_id3, hamRadio_dist3]]
def sortByDist(distances):
return sorted(distances, key = lambda x: float(x[1]))
def computeLevDist(track_1, track_2):
if len(track_1) < len(track_2):
return computeLevDist(track_2, track_1)
# At this point, len(track_1) >= len(track_2)
if len(track_2) == 0:
return len(track_1)
previous_row = range(len(track_2) + 1)
for i, c1 in enumerate(track_1):
current_row = [i + 1]
for j, c2 in enumerate(track_2):
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
deletions = current_row[j] + 1 # than track_2
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
# Computes the longest common substring
def longest_common_substring(s1, s2):
m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))]
longest, x_longest = 0, 0
for x in xrange(1, 1 + len(s1)):
for y in xrange(1, 1 + len(s2)):
if s1[x - 1] == s2[y - 1]:
m[x][y] = m[x - 1][y - 1] + 1
if m[x][y] > longest:
longest = m[x][y]
x_longest = x
else:
m[x][y] = 0
return s1[x_longest - longest: x_longest]
# Optimized similarity testing. Returns a value between 0 and 1.
def computeSimilarity(track_1, track_2):
track_1 = track_1.rstrip()
track_2 = track_2.rstrip()
track_1_list = track_1.replace('-', ' ').split()
track_2_list = track_2.replace('-', ' ').split()
# Checking for invalid inputs
if len(track_1) <= 0 or len(track_2) <= 0:
return 1
# Checking if a string is entirely contained in another and is longer than two words
elif (track_1 in track_2 and len(track_1_list) > 2) or (track_2 in track_1 and len(track_2_list) > 2):
return 1
# Counts number of common words across 2 strings
num_common_words = 0
for word in track_1_list:
if word in track_2_list:
num_common_words += 1
track_1_list.remove(word)
track_2_list.remove(word)
continue
# Resets track_1_list and track_2_list
track_1_list = track_1.replace('-', ' ').split()
track_2_list = track_2.replace('-', ' ').split()
common_word_ratio = 2.0 * num_common_words / (len(track_1_list) + len(track_2_list))
# lcs = longest_common_substring
lcs = longest_common_substring(track_1, track_2)
lcs_ratio = 2.0 * len(lcs) / (len(track_1) + len(track_2))
# Check how many starting words are the same
pos = 0
while pos < len(track_1_list) and pos < len(track_2_list) and track_1_list[pos] == track_2_list[pos]:
# Check if first word is followed by hyphen
if (track_1_list[pos] + ' -') in track_2 or (track_2_list[pos] + ' -') in track_1:
return 1
pos += 1
if pos == 0:
return max(common_word_ratio, lcs_ratio)
else:
return max(common_word_ratio, lcs_ratio) ** (1/pos)
def filterCandidateList(playlist, candidate_list):
for playlist_track in playlist:
for candidate_track in candidate_list:
if 'Commentary' in candidate_track or ' - Live' in candidate_track:
candidate_list.remove(candidate_track)
continue
elif computeSimilarity(playlist_track[1], candidate_track[1]) > 0.7:
candidate_list.remove(candidate_track)
continue
return candidate_list
def main():
# global echonest_attributes
# echonest_attributes = loadEchonestAttributes()
# fetchEchonestAttributes('4lwGyv3tbahmN1Z25wdCxa')
# fetchEchonestAttributes('4qikXelSRKvoCqFcHLB2H2')
print filterCandidateList([(1, 'help'), (2, 'lol')], [(0, 'helper'), (3, 'this is')])
# print computeLevDist('apple', 'apple bottom')
# print getHamRadioDistance('4lwGyv3tbahmN1Z25wdCxa', '4qikXelSRKvoCqFcHLB2H2')
# print sortByDist([['A', float('inf')], ['B', -4.219], ['C', 0]])
# print computeSimilarity('Years - Vocal Extended Mix', 'Years of Life - Extended Instrumental Mix')
print computeSimilarity('Love Me', 'Love Me Again')
if __name__ == '__main__':
main()