Skip to content
This repository was archived by the owner on May 1, 2024. It is now read-only.

Commit b38ecd3

Browse files
authored
Merge pull request #150 from edx/thallada/local-video-data
AN-7775 Generate local video data
2 parents d90a368 + 66d2d3a commit b38ecd3

8 files changed

Lines changed: 308 additions & 21 deletions

File tree

README.rst

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,83 @@ database.
7171

7272
$ make loaddata
7373

74+
Loading Video Data
75+
~~~~~~~~~~~~~~~~~~
76+
77+
The above command should work fine on its own, but you may see warnings about
78+
video ids:
79+
80+
::
81+
82+
WARNING:analyticsdataserver.clients:Course Blocks API failed to return
83+
video ids (401). See README for instructions on how to authenticate the
84+
API with your local LMS.
85+
86+
In order to generate video data, the API has to be authenticated with
87+
your local LMS so that it can access the video ids for each course. Instead of
88+
adding a whole OAuth client to the API for this one procedure, we will piggyback
89+
off of the Insights OAuth client by taking the OAuth token it generates and
90+
using it here.
91+
92+
1. Start your local LMS server. (e.g. in devstack, run `paver devstack --fast lms`).
93+
94+
2. If your local LMS server is running on any address other than the default of
95+
`http://localhost:8000/`, make sure to add this setting to
96+
`analyticsdataserver/settings/local.py` with the correct URL. (you will
97+
likely not need to do this):
98+
99+
::
100+
101+
# Don't forget to add the trailing forward slash
102+
LMS_BASE_URL = 'http://example.com:8000/'
103+
104+
3. Sign into your local Insights server making sure to use your local LMS for
105+
authentication. This will generate a new OAuth access token if you do not
106+
already have one that isn't expired.
107+
108+
The user you sign in with must have staff access to the courses for which you
109+
want generated video data.
110+
111+
4. Visit your local LMS server's admin site (by default, this is at
112+
`http://localhost:8000/admin`).
113+
114+
5. Sign in with a superuser account. Don't have one? Make one with this command
115+
in your devstack as the `edxapp` user:
116+
117+
::
118+
119+
$ edxapp@precise64:~/edx-platform$ ./manage.py lms createsuperuser
120+
121+
Enter a username and password that you will remember.
122+
123+
6. On the admin site, find the "Oauth2" section and click the link "Access
124+
tokens". The breadcrumbs should show "Home > Oauth2 > Access tokens".
125+
126+
Copy the string in the "Token" column for the first row in the table. Also,
127+
make sure the "User" of the first row is the same user that you signed in
128+
with in step 3.
129+
130+
7. Paste the string as a new setting in `analyticsdataserver/settings/local.py`:
131+
132+
::
133+
134+
COURSE_BLOCK_API_AUTH_TOKEN = '<paste access token here>'
135+
136+
8. Run `make loaddata` again and ensure that you see the following log message
137+
in the output:
138+
139+
::
140+
141+
INFO:analyticsdataserver.clients:Successfully authenticated with the
142+
Course Blocks API.
143+
144+
9. Check if you now have video data in the API. Either by querying the API in
145+
the swagger docs at `/docs/#!/api/Videos_List_GET`, or visiting the Insights
146+
`engagement/videos/` page for a course.
147+
148+
Note: the access tokens expire in one year so you should only have to follow the
149+
above steps once a year.
150+
74151
Running Tests
75152
-------------
76153

analytics_data_api/management/commands/generate_fake_course_data.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
import logging
55
import math
66
import random
7+
78
from tqdm import tqdm
89

10+
from django.conf import settings
911
from django.core.management.base import BaseCommand
1012
from django.utils import timezone
11-
from analytics_data_api.v0 import models
13+
1214
from analytics_data_api.constants import engagement_events
15+
from analytics_data_api.v0 import models
16+
from analyticsdataserver.clients import CourseBlocksApiClient
1317

1418
logging.basicConfig(level=logging.INFO)
1519
logger = logging.getLogger(__name__)
@@ -44,7 +48,7 @@ def add_arguments(self, parser):
4448
'--course_id',
4549
action='store',
4650
dest='course_id',
47-
default='edX/DemoX/Demo_Course',
51+
default='course-v1:edX+DemoX+Demo_Courset',
4852
help='Course ID for which to generate fake data',
4953
)
5054
parser.add_argument(
@@ -175,7 +179,7 @@ def generate_weekly_data(self, course_id, start_date, end_date):
175179

176180
logger.info("Generating new weekly course activity data...")
177181

178-
progress = tqdm(total=math.ceil((end_date - start).days / 7.0) + 1)
182+
progress = tqdm(total=math.ceil((end_date - start).days / 7.0))
179183
while start < end_date:
180184
active_students = random.randint(100, 4000)
181185
# End date should occur on Saturday at 23:59:59
@@ -198,23 +202,13 @@ def generate_weekly_data(self, course_id, start_date, end_date):
198202
logger.info("Done!")
199203

200204
def generate_video_timeline_data(self, video_id):
201-
logger.info("Deleting video timeline data...")
202-
models.VideoTimeline.objects.all().delete()
203-
204-
logger.info("Generating new video timeline...")
205205
for segment in range(100):
206206
active_students = random.randint(100, 4000)
207207
counts = constrained_sum_sample_pos(2, active_students)
208208
models.VideoTimeline.objects.create(pipeline_video_id=video_id, segment=segment,
209209
num_users=counts[0], num_views=counts[1])
210210

211-
logger.info("Done!")
212-
213211
def generate_video_data(self, course_id, video_id, module_id):
214-
logger.info("Deleting course video data...")
215-
models.Video.objects.all().delete()
216-
217-
logger.info("Generating new course videos...")
218212
users_at_start = 1234
219213
models.Video.objects.create(course_id=course_id, pipeline_video_id=video_id,
220214
encoded_module_id=module_id, duration=500, segment_length=5,
@@ -288,11 +282,44 @@ def generate_tags_distribution_data(self, course_id):
288282
total_submissions=total_submissions, correct_submissions=correct_submissions
289283
)
290284

285+
def fetch_videos_from_course_blocks(self, course_id):
286+
logger.info("Fetching video ids from Course Blocks API...")
287+
try:
288+
api_base_url = settings.LMS_BASE_URL + 'api/courses/v1/'
289+
except AttributeError:
290+
logger.warning("LMS_BASE_URL is not configured! Cannot get video ids.")
291+
return None
292+
logger.info("Assuming the Course Blocks API is hosted at: %s", api_base_url)
293+
294+
blocks_api = CourseBlocksApiClient(api_base_url, settings.COURSE_BLOCK_API_AUTH_TOKEN, timeout=5)
295+
return blocks_api.all_videos(course_id)
296+
297+
def generate_all_video_data(self, course_id, videos):
298+
logger.info("Deleting course video data...")
299+
models.Video.objects.all().delete()
300+
301+
logger.info("Deleting video timeline data...")
302+
models.VideoTimeline.objects.all().delete()
303+
304+
logger.info("Generating new course videos and video timeline data...")
305+
for video in tqdm(videos):
306+
self.generate_video_data(course_id, video['video_id'], video['video_module_id'])
307+
self.generate_video_timeline_data(video['video_id'])
308+
309+
logger.info("Done!")
310+
291311
def handle(self, *args, **options):
292312
course_id = options['course_id']
293313
username = options['username']
294-
video_id = '0fac49ba'
295-
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
314+
video_ids = self.fetch_videos_from_course_blocks(course_id)
315+
if not video_ids:
316+
logger.warning("Falling back to fake video id due to Course Blocks API failure...")
317+
video_ids = [
318+
{
319+
'video_id': '0fac49ba',
320+
'video_module_id': 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
321+
}
322+
]
296323
start_date = timezone.now() - datetime.timedelta(weeks=10)
297324

298325
num_weeks = options['num_weeks']
@@ -304,8 +331,7 @@ def handle(self, *args, **options):
304331
logger.info("Generating data for %s...", course_id)
305332
self.generate_weekly_data(course_id, start_date, end_date)
306333
self.generate_daily_data(course_id, start_date, end_date)
307-
self.generate_video_data(course_id, video_id, video_module_id)
308-
self.generate_video_timeline_data(video_id)
334+
self.generate_all_video_data(course_id, video_ids)
309335
self.generate_learner_engagement_data(course_id, username, start_date, end_date)
310336
self.generate_learner_engagement_range_data(course_id, start_date.date(), end_date.date())
311337
self.generate_tags_distribution_data(course_id)

analyticsdataserver/clients.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import logging
2+
3+
from edx_rest_api_client.client import EdxRestApiClient
4+
from edx_rest_api_client.exceptions import HttpClientError
5+
from opaque_keys.edx.keys import UsageKey
6+
from opaque_keys import InvalidKeyError
7+
8+
from analyticsdataserver.utils import temp_log_level
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class CourseBlocksApiClient(EdxRestApiClient):
14+
"""
15+
This class is a sub-class of the edX Rest API Client
16+
(https://github.com/edx/edx-rest-api-client).
17+
18+
Details about the API itself can be found at
19+
https://openedx.atlassian.net/wiki/display/AN/Course+Structure+API.
20+
21+
Currently, this client is only used for a local-only developer script (generate_fake_course_data).
22+
"""
23+
def __init__(self, url, access_token, timeout):
24+
super(CourseBlocksApiClient, self).__init__(url, oauth_access_token=access_token, timeout=timeout)
25+
26+
def all_videos(self, course_id):
27+
try:
28+
logger.debug('Retrieving course video blocks for course_id: %s', course_id)
29+
response = self.blocks.get(course_id=course_id, all_blocks=True, depth='all', block_types_filter='video')
30+
logger.info("Successfully authenticated with the Course Blocks API.")
31+
except HttpClientError as e:
32+
if e.response.status_code == 401:
33+
logger.warning("Course Blocks API failed to return video ids (%s). " +
34+
"See README for instructions on how to authenticate the API with your local LMS.",
35+
e.response.status_code)
36+
elif e.response.status_code == 404:
37+
logger.warning("Course Blocks API failed to return video ids (%s). " +
38+
"Does the course exist in the LMS?",
39+
e.response.status_code)
40+
else:
41+
logger.warning("Course Blocks API failed to return video ids (%s).", e.response.status_code)
42+
return None
43+
44+
# Setup a terrible hack to silence mysterious flood of ImportErrors from stevedore inside edx-opaque-keys.
45+
# (The UsageKey utility still works despite the import errors, so I think the errors are not important).
46+
with temp_log_level('stevedore', log_level=logging.CRITICAL):
47+
videos = []
48+
for video in response['blocks'].values():
49+
try:
50+
encoded_id = UsageKey.from_string(video['id']).html_id()
51+
except InvalidKeyError:
52+
encoded_id = video['id'] # just pass through any wonky ids we don't understand
53+
videos.append({'video_id': course_id + '|' + encoded_id,
54+
'video_module_id': encoded_id})
55+
56+
return videos

analyticsdataserver/settings/local.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,15 @@
5656
ANALYTICS_DATABASE = 'analytics'
5757
ENABLE_ADMIN_SITE = True
5858

59-
########## END ANALYTICS DATA API CONFIGURATION
60-
6159
TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
6260

6361
SWAGGER_SETTINGS = {
6462
'api_key': 'edx'
6563
}
64+
65+
# These two settings are used in generate_fake_course_data.py.
66+
# Replace with correct values to generate local fake video data.
67+
LMS_BASE_URL = 'http://localhost:8000/' # the base URL for your running local LMS instance
68+
COURSE_BLOCK_API_AUTH_TOKEN = 'paste auth token here' # see README for instructions on how to configure this value
69+
70+
########## END ANALYTICS DATA API CONFIGURATION

analyticsdataserver/tests.py

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
import json
2+
import logging
13
from contextlib import contextmanager
24

5+
import mock
6+
import responses
7+
38
from django.conf import settings
49
from django.contrib.auth.models import User
510
from django.db.utils import ConnectionHandler, DatabaseError
611
from django.test import TestCase
712
from django.test.utils import override_settings
8-
9-
import mock
1013
from rest_framework.authtoken.models import Token
14+
1115
from analytics_data_api.v0.models import CourseEnrollmentDaily, CourseEnrollmentByBirthYear
16+
from analyticsdataserver.clients import CourseBlocksApiClient
1217
from analyticsdataserver.router import AnalyticsApiRouter
18+
from analyticsdataserver.utils import temp_log_level
1319

1420

1521
class TestCaseWithAuthentication(TestCase):
@@ -97,3 +103,98 @@ def test_allow_relation(self):
97103
"""
98104
self.assertFalse(self.router.allow_relation(CourseEnrollmentDaily, User))
99105
self.assertTrue(self.router.allow_relation(CourseEnrollmentDaily, CourseEnrollmentByBirthYear))
106+
107+
108+
class UtilsTests(TestCase):
109+
def setUp(self):
110+
self.logger = logging.getLogger('test_logger')
111+
112+
def test_temp_log_level(self):
113+
"""Ensures log level is adjusted within context manager and returns to original level when exited."""
114+
original_level = self.logger.getEffectiveLevel()
115+
with temp_log_level('test_logger'): # NOTE: defaults to logging.CRITICAL
116+
self.assertEqual(self.logger.getEffectiveLevel(), logging.CRITICAL)
117+
self.assertEqual(self.logger.getEffectiveLevel(), original_level)
118+
119+
# test with log_level option used
120+
with temp_log_level('test_logger', log_level=logging.DEBUG):
121+
self.assertEqual(self.logger.getEffectiveLevel(), logging.DEBUG)
122+
self.assertEqual(self.logger.getEffectiveLevel(), original_level)
123+
124+
125+
class ClientTests(TestCase):
126+
@mock.patch('analyticsdataserver.clients.EdxRestApiClient')
127+
def setUp(self, *args, **kwargs): # pylint: disable=unused-argument
128+
self.client = CourseBlocksApiClient('http://example.com/', 'token', 5)
129+
130+
@responses.activate
131+
def test_all_videos(self):
132+
responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
133+
'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
134+
'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9'
135+
},
136+
'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
137+
'id': 'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807'
138+
}
139+
}}), status=200, content_type='application/json')
140+
videos = self.client.all_videos('course_id')
141+
self.assertListEqual(videos, [
142+
{
143+
'video_id': 'course_id|5c90cffecd9b48b188cbfea176bf7fe9',
144+
'video_module_id': '5c90cffecd9b48b188cbfea176bf7fe9'
145+
},
146+
{
147+
'video_id': 'course_id|7e9b434e6de3435ab99bd3fb25bde807',
148+
'video_module_id': '7e9b434e6de3435ab99bd3fb25bde807'
149+
}
150+
])
151+
152+
@responses.activate
153+
@mock.patch('analyticsdataserver.clients.logger')
154+
def test_all_videos_401(self, logger):
155+
responses.add(responses.GET, 'http://example.com/blocks/', status=401, content_type='application/json')
156+
videos = self.client.all_videos('course_id')
157+
logger.warning.assert_called_with(
158+
'Course Blocks API failed to return video ids (%s). ' +
159+
'See README for instructions on how to authenticate the API with your local LMS.', 401)
160+
self.assertEqual(videos, None)
161+
162+
@responses.activate
163+
@mock.patch('analyticsdataserver.clients.logger')
164+
def test_all_videos_404(self, logger):
165+
responses.add(responses.GET, 'http://example.com/blocks/', status=404, content_type='application/json')
166+
videos = self.client.all_videos('course_id')
167+
logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s). ' +
168+
'Does the course exist in the LMS?', 404)
169+
self.assertEqual(videos, None)
170+
171+
@responses.activate
172+
@mock.patch('analyticsdataserver.clients.logger')
173+
def test_all_videos_500(self, logger):
174+
responses.add(responses.GET, 'http://example.com/blocks/', status=418, content_type='application/json')
175+
videos = self.client.all_videos('course_id')
176+
logger.warning.assert_called_with('Course Blocks API failed to return video ids (%s).', 418)
177+
self.assertEqual(videos, None)
178+
179+
@responses.activate
180+
def test_all_videos_pass_through_bad_id(self):
181+
responses.add(responses.GET, 'http://example.com/blocks/', body=json.dumps({'blocks': {
182+
'block-v1:edX+DemoX+Demo_Course+type@video+block@5c90cffecd9b48b188cbfea176bf7fe9': {
183+
'id': 'bad_key'
184+
},
185+
'block-v1:edX+DemoX+Demo_Course+type@video+block@7e9b434e6de3435ab99bd3fb25bde807': {
186+
'id': 'bad_key'
187+
}
188+
}}), status=200, content_type='application/json')
189+
responses.add(responses.GET, 'http://example.com/blocks/', status=200, content_type='application/json')
190+
videos = self.client.all_videos('course_id')
191+
self.assertListEqual(videos, [
192+
{
193+
'video_id': 'course_id|bad_key',
194+
'video_module_id': 'bad_key'
195+
},
196+
{
197+
'video_id': 'course_id|bad_key',
198+
'video_module_id': 'bad_key'
199+
}
200+
])

0 commit comments

Comments
 (0)