Skip to content
This repository was archived by the owner on May 1, 2024. It is now read-only.

Commit a4fd62a

Browse files
authored
Merge pull request #145 from edx/dsjen/course-list-metadata
Adds the course metadata enrollment summary endpoint.
2 parents d2fcb7b + 8103ad0 commit a4fd62a

11 files changed

Lines changed: 487 additions & 42 deletions

File tree

analytics_data_api/management/commands/generate_fake_course_data.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import datetime
44
import logging
5-
from optparse import make_option
5+
import math
66
import random
7+
from optparse import make_option
8+
from tqdm import tqdm
79

810
from django.core.management.base import BaseCommand
911
from django.utils import timezone
@@ -87,7 +89,8 @@ def generate_daily_data(self, course_id, start_date, end_date):
8789
models.CourseEnrollmentByGender,
8890
models.CourseEnrollmentByEducation,
8991
models.CourseEnrollmentByBirthYear,
90-
models.CourseEnrollmentByCountry]:
92+
models.CourseEnrollmentByCountry,
93+
models.CourseMetaSummaryEnrollment]:
9194
model.objects.all().delete()
9295

9396
logger.info("Deleted all daily course enrollment data.")
@@ -98,6 +101,7 @@ def generate_daily_data(self, course_id, start_date, end_date):
98101
date = start_date
99102
cumulative_count = 0
100103

104+
progress = tqdm(total=(end_date - date).days + 2)
101105
while date <= end_date:
102106
daily_total = get_count(daily_total)
103107
models.CourseEnrollmentDaily.objects.create(course_id=course_id, date=date, count=daily_total)
@@ -128,8 +132,21 @@ def generate_daily_data(self, course_id, start_date, end_date):
128132
models.CourseEnrollmentByBirthYear.objects.create(course_id=course_id, date=date, count=count,
129133
birth_year=birth_year)
130134

135+
progress.update(1)
131136
date = date + datetime.timedelta(days=1)
132137

138+
for mode, ratio in enrollment_mode_ratios.iteritems():
139+
count = int(ratio * daily_total)
140+
cumulative_count = count + random.randint(0, 100)
141+
models.CourseMetaSummaryEnrollment.objects.create(
142+
course_id=course_id, catalog_course_title='Demo Course', catalog_course='Demo_Course',
143+
start_date=timezone.now() - datetime.timedelta(weeks=6),
144+
end_date=timezone.now() + datetime.timedelta(weeks=10),
145+
pacing_type='self_paced', availability='Current', mode=mode, count=count,
146+
cumulative_count=cumulative_count, count_change_7_days=random.randint(-50, 50))
147+
148+
progress.update(1)
149+
progress.close()
133150
logger.info("Done!")
134151

135152
def generate_weekly_data(self, course_id, start_date, end_date):
@@ -144,6 +161,7 @@ def generate_weekly_data(self, course_id, start_date, end_date):
144161

145162
logger.info("Generating new weekly course activity data...")
146163

164+
progress = tqdm(total=math.ceil((end_date - start).days / 7.0) + 1)
147165
while start < end_date:
148166
active_students = random.randint(100, 4000)
149167
# End date should occur on Saturday at 23:59:59
@@ -159,8 +177,10 @@ def generate_weekly_data(self, course_id, start_date, end_date):
159177
count=active_students,
160178
interval_start=start, interval_end=end)
161179

180+
progress.update(1)
162181
start = end
163182

183+
progress.close()
164184
logger.info("Done!")
165185

166186
def generate_video_timeline_data(self, video_id):
@@ -193,6 +213,7 @@ def generate_learner_engagement_data(self, course_id, username, start_date, end_
193213

194214
logger.info("Generating learner engagement module data...")
195215
current = start_date
216+
progress = tqdm(total=(end_date - start_date).days + 1)
196217
while current < end_date:
197218
current = current + datetime.timedelta(days=1)
198219
for metric in engagement_events.INDIVIDUAL_EVENTS:
@@ -206,7 +227,9 @@ def generate_learner_engagement_data(self, course_id, username, start_date, end_
206227
models.ModuleEngagement.objects.create(
207228
course_id=course_id, username=username, date=current,
208229
entity_type=entity_type, entity_id=entity_id, event=event, count=count)
209-
logger.info("Done!")
230+
progress.update(1)
231+
progress.close()
232+
logger.info("Done!")
210233

211234
def generate_learner_engagement_range_data(self, course_id, start_date, end_date, max_value=100):
212235
logger.info("Deleting engagement range data...")
@@ -256,7 +279,7 @@ def handle(self, *args, **options):
256279
username = options['username']
257280
video_id = '0fac49ba'
258281
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
259-
start_date = datetime.datetime(year=2016, month=1, day=1, tzinfo=timezone.utc)
282+
start_date = timezone.now() - datetime.timedelta(weeks=10)
260283

261284
num_weeks = options['num_weeks']
262285
if num_weeks:

analytics_data_api/v0/models.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,39 @@
1212
from analytics_data_api.utils import date_range
1313

1414

15-
class CourseActivityWeekly(models.Model):
16-
"""A count of unique users who performed a particular action during a week."""
15+
class BaseCourseModel(models.Model):
16+
course_id = models.CharField(db_index=True, max_length=255)
17+
created = models.DateTimeField(auto_now_add=True)
1718

1819
class Meta(object):
20+
abstract = True
21+
22+
23+
class CourseActivityWeekly(BaseCourseModel):
24+
"""A count of unique users who performed a particular action during a week."""
25+
26+
class Meta(BaseCourseModel.Meta):
1927
db_table = 'course_activity'
2028
index_together = [['course_id', 'activity_type']]
2129
ordering = ('interval_end', 'interval_start', 'course_id')
2230
get_latest_by = 'interval_end'
2331

24-
course_id = models.CharField(db_index=True, max_length=255)
2532
interval_start = models.DateTimeField()
2633
interval_end = models.DateTimeField(db_index=True)
2734
activity_type = models.CharField(db_index=True, max_length=255, db_column='label')
2835
count = models.IntegerField()
29-
created = models.DateTimeField(auto_now_add=True)
3036

3137
@classmethod
3238
def get_most_recent(cls, course_id, activity_type):
3339
"""Activity for the week that was mostly recently computed."""
3440
return cls.objects.filter(course_id=course_id, activity_type=activity_type).latest('interval_end')
3541

3642

37-
class BaseCourseEnrollment(models.Model):
38-
course_id = models.CharField(max_length=255)
43+
class BaseCourseEnrollment(BaseCourseModel):
3944
date = models.DateField(null=False, db_index=True)
4045
count = models.IntegerField(null=False)
41-
created = models.DateTimeField(auto_now_add=True)
4246

43-
class Meta(object):
47+
class Meta(BaseCourseModel.Meta):
4448
abstract = True
4549
get_latest_by = 'date'
4650
index_together = [('course_id', 'date',)]
@@ -63,6 +67,24 @@ class Meta(BaseCourseEnrollment.Meta):
6367
unique_together = [('course_id', 'date', 'mode')]
6468

6569

70+
class CourseMetaSummaryEnrollment(BaseCourseModel):
71+
catalog_course_title = models.CharField(db_index=True, max_length=255)
72+
catalog_course = models.CharField(db_index=True, max_length=255)
73+
start_date = models.DateTimeField()
74+
end_date = models.DateTimeField()
75+
pacing_type = models.CharField(db_index=True, max_length=255)
76+
availability = models.CharField(db_index=True, max_length=255)
77+
mode = models.CharField(max_length=255)
78+
count = models.IntegerField(null=False)
79+
cumulative_count = models.IntegerField(null=False)
80+
count_change_7_days = models.IntegerField(default=0)
81+
82+
class Meta(BaseCourseModel.Meta):
83+
db_table = 'course_meta_summary_enrollment'
84+
ordering = ('course_id',)
85+
unique_together = [('course_id', 'mode',)]
86+
87+
6688
class CourseEnrollmentByBirthYear(BaseCourseEnrollment):
6789
birth_year = models.IntegerField(null=False)
6890

@@ -103,14 +125,13 @@ class Meta(BaseCourseEnrollment.Meta):
103125
unique_together = [('course_id', 'date', 'gender')]
104126

105127

106-
class BaseProblemResponseAnswerDistribution(models.Model):
128+
class BaseProblemResponseAnswerDistribution(BaseCourseModel):
107129
""" Base model for the answer_distribution table. """
108130

109-
class Meta(object):
131+
class Meta(BaseCourseModel.Meta):
110132
db_table = 'answer_distribution'
111133
abstract = True
112134

113-
course_id = models.CharField(db_index=True, max_length=255)
114135
module_id = models.CharField(db_index=True, max_length=255)
115136
part_id = models.CharField(db_index=True, max_length=255)
116137
correct = models.NullBooleanField()
@@ -119,7 +140,6 @@ class Meta(object):
119140
variant = models.IntegerField(null=True)
120141
problem_display_name = models.TextField(null=True)
121142
question_text = models.TextField(null=True)
122-
created = models.DateTimeField(auto_now_add=True)
123143

124144

125145
class ProblemResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
@@ -131,19 +151,17 @@ class Meta(BaseProblemResponseAnswerDistribution.Meta):
131151
count = models.IntegerField()
132152

133153

134-
class ProblemsAndTags(models.Model):
154+
class ProblemsAndTags(BaseCourseModel):
135155
""" Model for the tags_distribution table """
136156

137-
class Meta(object):
157+
class Meta(BaseCourseModel.Meta):
138158
db_table = 'tags_distribution'
139159

140-
course_id = models.CharField(db_index=True, max_length=255)
141160
module_id = models.CharField(db_index=True, max_length=255)
142161
tag_name = models.CharField(max_length=255)
143162
tag_value = models.CharField(max_length=255)
144163
total_submissions = models.IntegerField(default=0)
145164
correct_submissions = models.IntegerField(default=0)
146-
created = models.DateTimeField(auto_now_add=True)
147165

148166

149167
class ProblemFirstLastResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
@@ -172,30 +190,26 @@ class Meta(BaseCourseEnrollment.Meta):
172190
unique_together = [('course_id', 'date', 'country_code')]
173191

174192

175-
class GradeDistribution(models.Model):
193+
class GradeDistribution(BaseCourseModel):
176194
""" Each row stores the count of a particular grade on a module for a given course. """
177195

178-
class Meta(object):
196+
class Meta(BaseCourseModel.Meta):
179197
db_table = 'grade_distribution'
180198

181199
module_id = models.CharField(db_index=True, max_length=255)
182-
course_id = models.CharField(db_index=True, max_length=255)
183200
grade = models.IntegerField()
184201
max_grade = models.IntegerField()
185202
count = models.IntegerField()
186-
created = models.DateTimeField(auto_now_add=True)
187203

188204

189-
class SequentialOpenDistribution(models.Model):
205+
class SequentialOpenDistribution(BaseCourseModel):
190206
""" Each row stores the count of views a particular module has had in a given course. """
191207

192-
class Meta(object):
208+
class Meta(BaseCourseModel.Meta):
193209
db_table = 'sequential_open_distribution'
194210

195211
module_id = models.CharField(db_index=True, max_length=255)
196-
course_id = models.CharField(db_index=True, max_length=255)
197212
count = models.IntegerField()
198-
created = models.DateTimeField(auto_now_add=True)
199213

200214

201215
class BaseVideo(models.Model):
@@ -465,10 +479,9 @@ def get_timeline(self, course_id, username):
465479
return full_timeline
466480

467481

468-
class ModuleEngagement(models.Model):
482+
class ModuleEngagement(BaseCourseModel):
469483
"""User interactions with entities within the courseware."""
470484

471-
course_id = models.CharField(db_index=True, max_length=255)
472485
username = models.CharField(max_length=255)
473486
date = models.DateField()
474487
# This will be one of "problem", "video" or "discussion"
@@ -483,18 +496,17 @@ class ModuleEngagement(models.Model):
483496

484497
objects = ModuleEngagementTimelineManager()
485498

486-
class Meta(object):
499+
class Meta(BaseCourseModel.Meta):
487500
db_table = 'module_engagement'
488501

489502

490-
class ModuleEngagementMetricRanges(models.Model):
503+
class ModuleEngagementMetricRanges(BaseCourseModel):
491504
"""
492505
Represents the low and high values for a module engagement entity and event
493506
pair, known as the metric. The range_type will either be low, normal, or
494507
high, bounded by low_value and high_value.
495508
"""
496509

497-
course_id = models.CharField(db_index=True, max_length=255)
498510
start_date = models.DateField()
499511
# This is a left-closed interval. No data from the end_date is included in the analysis.
500512
end_date = models.DateField()
@@ -505,5 +517,5 @@ class ModuleEngagementMetricRanges(models.Model):
505517
high_value = models.FloatField()
506518
low_value = models.FloatField()
507519

508-
class Meta(object):
520+
class Meta(BaseCourseModel.Meta):
509521
db_table = 'module_engagement_metric_ranges'

analytics_data_api/v0/serializers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,3 +507,50 @@ def get_engagement_ranges(self, obj):
507507
})
508508

509509
return engagement_ranges
510+
511+
512+
class DynamicFieldsModelSerializer(serializers.ModelSerializer):
513+
"""
514+
A ModelSerializer that takes an additional `fields` argument that controls which
515+
fields should be displayed.
516+
517+
Blatantly taken from http://www.django-rest-framework.org/api-guide/serializers/#dynamically-modifying-fields
518+
"""
519+
520+
def __init__(self, *args, **kwargs):
521+
# Don't pass the 'fields' arg up to the superclass
522+
fields = kwargs.pop('fields', None)
523+
524+
# Instantiate the superclass normally
525+
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
526+
527+
if fields is not None:
528+
# Drop any fields that are not specified in the `fields` argument.
529+
allowed = set(fields)
530+
existing = set(self.fields.keys())
531+
for field_name in existing - allowed:
532+
self.fields.pop(field_name)
533+
534+
535+
class CourseMetaSummaryEnrollmentSerializer(ModelSerializerWithCreatedField, DynamicFieldsModelSerializer):
536+
"""
537+
Serializer for course and enrollment counts per mode.
538+
"""
539+
course_id = serializers.CharField()
540+
catalog_course_title = serializers.CharField()
541+
catalog_course = serializers.CharField()
542+
start_date = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
543+
end_date = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
544+
pacing_type = serializers.CharField()
545+
availability = serializers.CharField()
546+
count = serializers.IntegerField(default=0)
547+
cumulative_count = serializers.IntegerField(default=0)
548+
count_change_7_days = serializers.IntegerField(default=0)
549+
modes = serializers.SerializerMethodField()
550+
551+
def get_modes(self, obj):
552+
return obj.get('modes', None)
553+
554+
class Meta(object):
555+
model = models.CourseMetaSummaryEnrollment
556+
exclude = ('id', 'mode')

0 commit comments

Comments
 (0)