Skip to content
This repository was archived by the owner on May 1, 2024. It is now read-only.

Commit de61f86

Browse files
author
Jesse Shapiro
committed
Implementation of report download API endpoint
1 parent 8cb6a33 commit de61f86

11 files changed

Lines changed: 392 additions & 3 deletions

File tree

analytics_data_api/utils.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,36 @@
11
import datetime
22
from importlib import import_module
3+
import re
34

45
from django.db.models import Q
6+
from django.conf import settings
7+
from django.core.files.storage import default_storage
8+
from django.core.exceptions import SuspiciousFileOperation, SuspiciousOperation
59
from rest_framework.authtoken.models import Token
10+
from opaque_keys.edx.locator import CourseKey
11+
from opaque_keys import InvalidKeyError
12+
13+
from analytics_data_api.v0.exceptions import (
14+
ReportFileNotFoundError,
15+
CannotCreateReportDownloadLinkError
16+
)
17+
18+
19+
def get_filename_safe_course_id(course_id, replacement_char='_'):
20+
"""
21+
Create a representation of a course_id that can be used safely in a filepath.
22+
"""
23+
try:
24+
course_key = CourseKey.from_string(course_id)
25+
filename = unicode(replacement_char).join([course_key.org, course_key.course, course_key.run])
26+
except InvalidKeyError:
27+
# If the course_id doesn't parse, we will still return a value here.
28+
filename = course_id
29+
30+
# The safest characters are A-Z, a-z, 0-9, <underscore>, <period> and <hyphen>.
31+
# We represent the first four with \w.
32+
# TODO: Once we support courses with unicode characters, we will need to revisit this.
33+
return re.sub(r'[^\w\.\-]', unicode(replacement_char), filename)
634

735

836
def delete_user_auth_token(username):
@@ -84,3 +112,121 @@ def date_range(start_date, end_date, delta=datetime.timedelta(days=1)):
84112
while cur_date < end_date:
85113
yield cur_date
86114
cur_date += delta
115+
116+
117+
def get_course_report_download_details(course_id, report_name):
118+
"""
119+
Determine the path that the report file should be located at,
120+
then return metadata sufficient for downloading it.
121+
"""
122+
report_location_template = getattr(
123+
settings,
124+
'COURSE_REPORT_FILE_LOCATION_TEMPLATE',
125+
'{course_id}_{report_name}.csv'
126+
)
127+
# Course IDs contain characters that may not be valid in various
128+
# filesystems; here we remove them before looking for the file or
129+
# creating the downloadable filename.
130+
course_id = get_filename_safe_course_id(course_id)
131+
report_location = report_location_template.format(
132+
course_id=course_id,
133+
report_name=report_name
134+
)
135+
try:
136+
if not default_storage.exists(report_location):
137+
raise ReportFileNotFoundError(course_id=course_id, report_name=report_name)
138+
except (
139+
AttributeError,
140+
NotImplementedError,
141+
ImportError,
142+
SuspiciousFileOperation,
143+
SuspiciousOperation
144+
):
145+
# Error out if:
146+
# - We don't have a method to determine file existence
147+
# - Such a method isn't implemented
148+
# - We can't import the specified storage class
149+
# - We don't have privileges for the specified file location
150+
raise CannotCreateReportDownloadLinkError
151+
152+
try:
153+
last_modified = default_storage.modified_time(report_location)
154+
except (NotImplementedError, AttributeError):
155+
last_modified = None
156+
157+
try:
158+
download_size = default_storage.size(report_location)
159+
except (NotImplementedError, AttributeError):
160+
download_size = None
161+
162+
download_filename = '{}-{}-{}.csv'.format(
163+
course_id,
164+
report_name,
165+
# We need a date for the filename; if we don't know when it was last modified,
166+
# use the current date and time to stamp the filename.
167+
(last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')
168+
)
169+
url, expiration_date = get_file_object_url(report_location, download_filename)
170+
171+
details = {
172+
'course_id': course_id,
173+
'report_name': report_name,
174+
'download_url': url
175+
}
176+
# These are all optional items that aren't guaranteed. The URL isn't guaranteed
177+
# either, but we'll raise an exception earlier if we don't have it.
178+
if last_modified is not None:
179+
details.update({'last_modified': last_modified.strftime(settings.DATETIME_FORMAT)})
180+
if expiration_date is not None:
181+
details.update({'expiration_date': expiration_date.strftime(settings.DATETIME_FORMAT)})
182+
if download_size is not None:
183+
details.update({'file_size': download_size})
184+
return details
185+
186+
187+
def get_file_object_url(filename, download_filename):
188+
"""
189+
Retrieve a download URL for the file, as well as a datetime object
190+
indicating when the URL expires.
191+
192+
We need to pass extra details to the URL method, above and beyond just the
193+
file location, to give us what we need.
194+
195+
This method supports S3 storage's optional response parameters that allow
196+
us to set expiry time, as well as content disposition and content type
197+
on any download made using the generated link.
198+
"""
199+
# Default to expiring the link after two minutes
200+
expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 120)
201+
expires_at = get_expiration_date(expire_length)
202+
try:
203+
url = default_storage.url(
204+
name=filename,
205+
response_headers={
206+
'response-content-disposition': 'attachment; filename={}'.format(download_filename),
207+
'response-content-type': 'text/csv',
208+
# The Expires header requires a very particular timestamp format
209+
'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT')
210+
},
211+
expire=expire_length
212+
)
213+
except TypeError:
214+
# We got a TypeError when calling `.url()`; typically, this means that the arguments
215+
# we passed aren't allowed. Retry with no extra arguments.
216+
try:
217+
url = default_storage.url(name=filename)
218+
expires_at = None
219+
except (AttributeError, TypeError, NotImplementedError):
220+
# Another error, for unknown reasons. Can't recover from this; fail fast
221+
raise CannotCreateReportDownloadLinkError
222+
except (AttributeError, NotImplementedError):
223+
# Either we can't find a .url() method, or we can't use it. Raise an exception.
224+
raise CannotCreateReportDownloadLinkError
225+
return url, expires_at
226+
227+
228+
def get_expiration_date(seconds):
229+
"""
230+
Determine when a given link will expire, based on a given lifetime
231+
"""
232+
return datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds)

analytics_data_api/v0/exceptions.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,26 @@ class ParameterValueError(BaseError):
7272
def __init__(self, message, *args, **kwargs):
7373
super(ParameterValueError, self).__init__(*args, **kwargs)
7474
self.message = message
75+
76+
77+
class ReportFileNotFoundError(BaseError):
78+
"""
79+
Raise if we couldn't find the file we need to produce the report
80+
"""
81+
def __init__(self, *args, **kwargs):
82+
course_id = kwargs.pop('course_id')
83+
report_name = kwargs.pop('report_name')
84+
super(ReportFileNotFoundError, self).__init__(*args, **kwargs)
85+
self.message = self.message_template.format(course_id=course_id, report_name=report_name)
86+
87+
@property
88+
def message_template(self):
89+
return 'Could not find report \'{report_name}\' for course {course_id}.'
90+
91+
92+
class CannotCreateReportDownloadLinkError(BaseError):
93+
"""
94+
Raise if we cannot create a link for the file to be downloaded
95+
"""
96+
97+
message = 'Could not create a downloadable link to the report.'

analytics_data_api/v0/middleware.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
LearnerEngagementTimelineNotFoundError,
99
LearnerNotFoundError,
1010
ParameterValueError,
11+
ReportFileNotFoundError,
12+
CannotCreateReportDownloadLinkError,
1113
)
1214

1315

@@ -129,3 +131,39 @@ def error_code(self):
129131
@property
130132
def status_code(self):
131133
return status.HTTP_400_BAD_REQUEST
134+
135+
136+
class ReportFileNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
137+
"""
138+
Raise 404 if the report file isn't present
139+
"""
140+
141+
@property
142+
def error(self):
143+
return ReportFileNotFoundError
144+
145+
@property
146+
def error_code(self):
147+
return 'report_file_not_found'
148+
149+
@property
150+
def status_code(self):
151+
return status.HTTP_404_NOT_FOUND
152+
153+
154+
class CannotCreateDownloadLinkErrorMiddleware(BaseProcessErrorMiddleware):
155+
"""
156+
Raise 501 if the filesystem doesn't support creating download links
157+
"""
158+
159+
@property
160+
def error(self):
161+
return CannotCreateReportDownloadLinkError
162+
163+
@property
164+
def error_code(self):
165+
return 'cannot_create_report_download_link'
166+
167+
@property
168+
def status_code(self):
169+
return status.HTTP_501_NOT_IMPLEMENTED

analytics_data_api/v0/tests/views/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
from opaque_keys.edx.keys import CourseKey
44
from rest_framework import status
55

6+
from analytics_data_api.utils import get_filename_safe_course_id
7+
68
DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014'
9+
SANITIZED_DEMO_COURSE_ID = get_filename_safe_course_id(DEMO_COURSE_ID)
710

811

912
class DemoCourseMixin(object):

analytics_data_api/v0/tests/views/test_courses.py

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212
from django.conf import settings
1313
from django_dynamic_fixture import G
1414
import pytz
15+
from mock import patch, Mock
1516

1617
from analytics_data_api.constants.country import get_country
1718
from analytics_data_api.v0 import models
1819
from analytics_data_api.constants import country, enrollment_modes, genders
1920
from analytics_data_api.v0.models import CourseActivityWeekly
2021
from analytics_data_api.v0.tests.utils import flatten
21-
from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID
22+
from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID, SANITIZED_DEMO_COURSE_ID
2223
from analyticsdataserver.tests import TestCaseWithAuthentication
2324

2425

@@ -785,3 +786,119 @@ def test_get(self):
785786
def test_get_404(self):
786787
response = self._get_data('foo/bar/course')
787788
self.assertEquals(response.status_code, 404)
789+
790+
791+
class CourseReportDownloadViewTests(DemoCourseMixin, TestCaseWithAuthentication):
792+
793+
path = '/api/v0/courses/{course_id}/reports/{report_name}'
794+
795+
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=False))
796+
def test_report_file_not_found(self):
797+
response = self.authenticated_get(
798+
self.path.format(
799+
course_id=DEMO_COURSE_ID,
800+
report_name='problem_response'
801+
)
802+
)
803+
self.assertEqual(response.status_code, 404)
804+
805+
def test_report_not_supported(self):
806+
response = self.authenticated_get(
807+
self.path.format(
808+
course_id=DEMO_COURSE_ID,
809+
report_name='fake_problem_that_we_dont_support'
810+
)
811+
)
812+
self.assertEqual(response.status_code, 404)
813+
814+
@patch('analytics_data_api.utils.default_storage', object())
815+
def test_incompatible_storage_provider(self):
816+
response = self.authenticated_get(
817+
self.path.format(
818+
course_id=DEMO_COURSE_ID,
819+
report_name='problem_response'
820+
)
821+
)
822+
self.assertEqual(response.status_code, 501)
823+
824+
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
825+
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
826+
@patch(
827+
'django.core.files.storage.default_storage.modified_time',
828+
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
829+
)
830+
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
831+
@patch(
832+
'analytics_data_api.utils.get_expiration_date',
833+
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
834+
)
835+
def test_make_working_link(self):
836+
response = self.authenticated_get(
837+
self.path.format(
838+
course_id=DEMO_COURSE_ID,
839+
report_name='problem_response'
840+
)
841+
)
842+
self.assertEqual(response.status_code, 200)
843+
expected = {
844+
'course_id': SANITIZED_DEMO_COURSE_ID,
845+
'report_name': 'problem_response',
846+
'download_url': 'http://fake',
847+
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
848+
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
849+
'file_size': 1000
850+
}
851+
self.assertEqual(response.data, expected)
852+
853+
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
854+
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
855+
@patch(
856+
'django.core.files.storage.default_storage.modified_time',
857+
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
858+
)
859+
@patch('django.core.files.storage.default_storage.size', Mock(side_effect=NotImplementedError()))
860+
@patch(
861+
'analytics_data_api.utils.get_expiration_date',
862+
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
863+
)
864+
def test_make_working_link_with_missing_size(self):
865+
response = self.authenticated_get(
866+
self.path.format(
867+
course_id=DEMO_COURSE_ID,
868+
report_name='problem_response'
869+
)
870+
)
871+
self.assertEqual(response.status_code, 200)
872+
expected = {
873+
'course_id': SANITIZED_DEMO_COURSE_ID,
874+
'report_name': 'problem_response',
875+
'download_url': 'http://fake',
876+
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
877+
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
878+
}
879+
self.assertEqual(response.data, expected)
880+
881+
@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
882+
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
883+
@patch('django.core.files.storage.default_storage.modified_time', Mock(side_effect=NotImplementedError()))
884+
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
885+
@patch(
886+
'analytics_data_api.utils.get_expiration_date',
887+
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
888+
)
889+
def test_make_working_link_with_missing_last_modified_date(self):
890+
response = self.authenticated_get(
891+
self.path.format(
892+
course_id=DEMO_COURSE_ID,
893+
report_name='problem_response'
894+
)
895+
)
896+
self.assertEqual(response.status_code, 200)
897+
expected = {
898+
'course_id': SANITIZED_DEMO_COURSE_ID,
899+
'report_name': 'problem_response',
900+
'download_url': 'http://fake',
901+
'file_size': 1000,
902+
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
903+
}
904+
self.assertEqual(response.data, expected)

analytics_data_api/v0/urls/courses.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
('enrollment/location', views.CourseEnrollmentByLocationView, 'enrollment_by_location'),
1515
('problems', views.ProblemsListView, 'problems'),
1616
('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'),
17-
('videos', views.VideosListView, 'videos')
17+
('videos', views.VideosListView, 'videos'),
18+
('reports/(?P<report_name>[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports'),
1819
]
1920

2021
urlpatterns = []

0 commit comments

Comments
 (0)