Skip to content

Commit b5f8752

Browse files
authored
I1544 backend dependencies (#1554)
1 parent 9fb4601 commit b5f8752

17 files changed

+132
-94
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ WORKDIR /code
3939
COPY requirements.txt .
4040
RUN apt-get update && \
4141
apt-get install -y --no-install-recommends \
42-
build-essential curl apt-transport-https libpq-dev netcat-traditional jq python3-dev xmlsec1 cron git && \
42+
build-essential curl apt-transport-https libpq-dev netcat-traditional default-libmysqlclient-dev pkg-config jq python3-dev xmlsec1 cron git && \
4343
apt-get upgrade -y
4444

4545
# Install MariaDB from the mariadb repository rather than using Debians

config/cron_udp.hjson

+16-13
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
left join entity.course_grade cg
4242
on cse.course_section_id = cg.course_section_id and cse.person_id = cg.person_id
4343
where
44-
co.lms_int_id in %(course_ids)s
45-
and cse.role in ('Student', 'TeachingAssistant', 'Teacher')
44+
co.lms_int_id = ANY(%(course_ids)s)
45+
and cse.role = ANY(ARRAY['Student', 'Teacher', 'TeachingAssistant']::text[])
4646
and cse.role_status = 'Enrolled'
4747
and cse.enrollment_status = 'Active'
4848
order by user_id
@@ -56,14 +56,14 @@
5656
la.visibility = 'everyone'
5757
and la.status = 'published'
5858
and la.course_offering_id = co.id
59-
and co.lms_int_id in %(course_ids)s
59+
and co.lms_int_id = ANY(%(course_ids)s)
6060
), assignment_grp as (
6161
select lg.*
6262
from entity.learner_activity_group lg, keymap.course_offering co
6363
where
6464
lg.status = 'available'
6565
and lg.course_offering_id = co.id
66-
and co.lms_int_id in %(course_ids)s
66+
and co.lms_int_id = ANY(%(course_ids)s)
6767
), assign_more as (
6868
select distinct(a.learner_activity_group_id), da.group_points
6969
from assignment_details a
@@ -125,7 +125,7 @@
125125
la.visibility = 'everyone'
126126
and la.status = 'published'
127127
and la.course_offering_id = co.id
128-
and co.lms_int_id in %(course_ids)s
128+
and co.lms_int_id = ANY(%(course_ids)s)
129129
and la.learner_activity_id = la_km.id
130130
and la.learner_activity_group_id = lag_km.id
131131
)
@@ -147,7 +147,7 @@
147147
keymap.course_offering co_km
148148
where
149149
lag.course_offering_id = co_km.id
150-
and co_km.lms_int_id in %(course_ids)s
150+
and co_km.lms_int_id = ANY(%(course_ids)s)
151151
group by co_km.lms_int_id
152152
''',
153153
"term":
@@ -182,7 +182,7 @@
182182
LEFT OUTER JOIN entity.academic_term at1 on (co.academic_term_id = at1.academic_term_id),
183183
keymap.course_offering co2,
184184
keymap.academic_term at2
185-
WHERE co2.lms_int_id in %(course_ids)s
185+
WHERE co2.lms_int_id = ANY(%(course_ids)s)
186186
and co.course_offering_id = co2.id
187187
and at1.academic_term_id = at2.id
188188
''',
@@ -196,7 +196,7 @@
196196
where
197197
f.course_offering_id = co_km.id
198198
and f.file_id = f_km.id
199-
and co_km.lms_int_id in %(course_ids)s
199+
and co_km.lms_int_id = ANY(%(course_ids)s)
200200
order by id
201201
''',
202202
"submission":
@@ -212,7 +212,7 @@
212212
left join keymap.course_offering co
213213
on cs.le_current_course_offering_id = co.id
214214
where
215-
co.lms_int_id in %(course_ids)s
215+
co.lms_int_id = ANY(:course_ids)
216216
and cse.role_status ='Enrolled'
217217
and cse."role" = 'Student'
218218
and cse.enrollment_status = 'Active'
@@ -228,13 +228,13 @@
228228
lar.published_score as published_score,
229229
lar.response_date as submitted_at,
230230
lar.graded_date as graded_at,
231-
timezone(%(time_zone)s, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
231+
timezone(:time_zone, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
232232
lar.grading_status as submission_workflow_state,
233233
la.title as title,
234234
lar.learner_activity_result_id as learner_activity_result_id,
235235
lar.person_id as short_user_id,
236236
cast(lar2.lms_int_id as BIGINT) as submission_id,
237-
(cast(%(canvas_data_id_increment)s as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
237+
(cast(:canvas_data_id_increment as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
238238
from entity.learner_activity_result lar
239239
join enrollment on lar.person_id= enrollment.user_id
240240
join enrollment e on lar.person_id = e.user_id
@@ -244,7 +244,7 @@
244244
left join keymap.course_offering co on co.id = la.course_offering_id
245245
join keymap.person p on p.id = lar.person_id
246246
where
247-
co.lms_int_id in %(course_ids)s
247+
co.lms_int_id = ANY(:course_ids)
248248
and la.status = 'published'
249249
)
250250
select
@@ -267,7 +267,10 @@
267267
grade_posted_local_date
268268
from
269269
submission
270-
);
270+
)
271+
''',
272+
"submission_with_avg_score":
273+
'''
271274
select
272275
f.id::bigint,
273276
f.assignment_id::bigint assignment_id,

config/env_sample.hjson

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
# Enable secure cookies, also set your trusted origin (example of instructure.com)
5454
# This needs to be true for deployments or when testing LTI with ngrok or loophole.
5555
"CSRF_COOKIE_SECURE": false,
56-
"CSRF_TRUSTED_ORIGINS": ["instructure.com"],
56+
"CSRF_TRUSTED_ORIGINS": ["https://*.instructure.com", "https://*.umich.edu"],
5757
# If you have a proxy that sets this header then set this to true. Default is false
5858
"USE_X_FORWARDED_HOST": false,
5959
# SameSite settings for Session and CSRF (defaults in settings.py should work), if you do want non-string None set to null.

dashboard/admin.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def clean(self):
6868
return self.cleaned_data
6969

7070

71+
@admin.register(AcademicTerms)
7172
class TermAdmin(admin.ModelAdmin):
7273
exclude = ('id',)
7374
list_display = ('canvas_id', 'name', 'date_start', 'date_end')
@@ -77,6 +78,7 @@ def has_add_permission(self, request):
7778
return False
7879

7980

81+
@admin.register(Course)
8082
class CourseAdmin(admin.ModelAdmin):
8183
inlines = [CourseViewOptionInline, ]
8284
form = CourseForm
@@ -95,11 +97,13 @@ def clear_course_updated_dates(self, request, queryset):
9597
self.message_user(request, "All selected last updated values cleared.")
9698

9799
# Need this method to correctly display the line breaks
100+
@admin.display(
101+
description="Course View Option(s)"
102+
)
98103
def _courseviewoption(self, obj):
99104
return mark_safe(linebreaksbr(obj.courseviewoption))
100105

101106
# Known mypy issue: https://github.com/python/mypy/issues/708
102-
_courseviewoption.short_description = "Course View Option(s)" # type: ignore[attr-defined]
103107

104108
def course_link(self, obj):
105109
return format_html('<a href="{}">Link</a>', obj.absolute_url)
@@ -160,8 +164,6 @@ def has_change_permission(request, obj=None):
160164
def has_delete_permission(request, obj=None):
161165
return False
162166

163-
admin.site.register(AcademicTerms, TermAdmin)
164-
admin.site.register(Course, CourseAdmin)
165167

166168
# Remove the pinax LogAdmin and add ours
167169
admin.site.unregister(Log)

dashboard/common/db_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def create_sqlalchemy_engine(db_params: DjangoDBParams) -> Engine:
3737
if new_db_params['ENGINE'] == (BACKENDS_PATH + 'mysql'):
3838
return create_engine(f'mysql+mysqldb://{core_string}?charset=utf8mb4')
3939
else:
40-
return create_engine('postgresql://' + core_string)
40+
return create_engine('postgresql+psycopg://' + core_string)
4141

4242

4343
def canvas_id_to_incremented_id(canvas_id):

dashboard/cron.py

+49-28
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,22 @@
22
import logging
33
from collections import namedtuple
44
from typing import Any, Dict, List, Union
5+
from zoneinfo import ZoneInfo
56

67
import hjson
78
import pandas as pd
8-
import pytz
99
import pangres
1010

1111
from django.conf import settings
1212
from django.db import connections as conns, models
1313
from django.db.models import QuerySet
1414
from django_cron import CronJobBase, Schedule
1515
from google.cloud import bigquery
16-
from sqlalchemy import types
16+
from sqlalchemy import types, text
1717
from sqlalchemy.engine import ResultProxy
18+
from sqlalchemy.orm import sessionmaker
1819

19-
from dashboard.common import db_util, utils
20+
from dashboard.common import db_util
2021
from dashboard.models import Course, Resource, AcademicTerms, User
2122

2223

@@ -67,17 +68,17 @@ def util_function(sql_string, mysql_table, param_object=None, table_identifier=N
6768

6869

6970
# execute database query
70-
def execute_db_query(query: str, params: List = None) -> ResultProxy:
71-
with engine.connect() as connection:
71+
def execute_db_query(query: str, params: Dict = None) -> ResultProxy:
72+
with engine.begin() as connection:
7273
connection.detach()
7374
if params:
74-
return connection.execute(query, params)
75+
return connection.execute(text(query), params)
7576
else:
76-
return connection.execute(query)
77+
return connection.execute(text(query))
7778

7879

7980
# remove all records inside the specified table
80-
def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: List = None):
81+
def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: Dict = None):
8182
# delete all records in the table first, can have an optional where clause
8283
result_proxy = execute_db_query(f"delete from {table_name} {where_clause}", where_params)
8384
return(f"\n{result_proxy.rowcount} rows deleted from {table_name}\n")
@@ -99,7 +100,7 @@ def soft_update_datetime_field(
99100
f'Skipped update of {field_name} for {model_name} instance ({model_inst.id}); existing value was found')
100101
else:
101102
if warehouse_field_value:
102-
warehouse_field_value = warehouse_field_value.replace(tzinfo=pytz.UTC)
103+
warehouse_field_value = warehouse_field_value.replace(tzinfo=ZoneInfo('UTC'))
103104
setattr(model_inst, field_name, warehouse_field_value)
104105
logger.info(f'Updated {field_name} for {model_name} instance ({model_inst.id})')
105106
return [field_name]
@@ -124,7 +125,7 @@ def verify_course_ids(self):
124125
logger.debug("in checking course")
125126
supported_courses = Course.objects.get_supported_courses()
126127
course_ids = [str(x) for x in supported_courses.values_list('id', flat=True)]
127-
courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': tuple(course_ids)})
128+
courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': course_ids})
128129
# error out when course id is invalid, otherwise add DataFrame to list
129130
for course_id, data_last_updated in supported_courses:
130131
if course_id not in list(courses_data['id']):
@@ -151,7 +152,7 @@ def update_user(self):
151152
# cron status
152153
status = ""
153154

154-
logger.debug("in update with data warehouse user")
155+
logger.info("in update with data warehouse user")
155156

156157
# delete all records in the table first
157158
status += delete_all_records_in_table("user")
@@ -160,7 +161,7 @@ def update_user(self):
160161
status += util_function(
161162
queries['user'],
162163
'user',
163-
{'course_ids': tuple(self.valid_locked_course_ids),
164+
{'course_ids': self.valid_locked_course_ids,
164165
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT
165166
})
166167

@@ -193,13 +194,13 @@ def update_canvas_resource(self):
193194
# cron status
194195
status = ""
195196

196-
logger.debug("in update canvas resource")
197+
logger.info("in update canvas resource")
197198

198199
# Select all the files for these courses
199200
# convert int array to str array
200201
df_attach = pd.read_sql(queries['resource'],
201202
data_warehouse_engine,
202-
params={'course_ids': tuple(self.valid_locked_course_ids)})
203+
params={'course_ids': self.valid_locked_course_ids })
203204
logger.debug(df_attach)
204205
# Update these back again based on the dataframe
205206
# Remove any rows where file_state is not available!
@@ -217,6 +218,8 @@ def update_resource_access(self):
217218
# cron status
218219
status = ""
219220

221+
logger.info("in update resource access")
222+
220223
# return string with concatenated SQL insert result
221224
return_string = ""
222225

@@ -231,7 +234,7 @@ def update_resource_access(self):
231234

232235
logger.info(f"Deleting all records in resource_access after {data_last_updated}")
233236

234-
status += delete_all_records_in_table("resource_access", f"WHERE access_time > %s", [data_last_updated, ])
237+
status += delete_all_records_in_table("resource_access", f"WHERE access_time > :data_last_updated", {'data_last_updated': data_last_updated })
235238

236239
# loop through multiple course ids, 20 at a time
237240
# (This is set by the CRON_BQ_IN_LIMIT from settings)
@@ -393,7 +396,7 @@ def update_resource_access(self):
393396
student_enrollment_type = User.EnrollmentType.STUDENT
394397
student_enrollment_df = pd.read_sql(
395398
'select user_id, course_id from user where enrollment_type= %s',
396-
engine, params={student_enrollment_type})
399+
engine, params=[(str(student_enrollment_type),)])
397400
resource_access_df = pd.merge(
398401
resource_access_df, student_enrollment_df,
399402
on=['user_id', 'course_id'],
@@ -437,6 +440,8 @@ def update_groups(self):
437440
# cron status
438441
status = ""
439442

443+
logger.info("update_groups(): ")
444+
440445
# delete all records in assignment_group table
441446
status += delete_all_records_in_table("assignment_groups")
442447

@@ -447,7 +452,7 @@ def update_groups(self):
447452
# loop through multiple course ids
448453
status += util_function(queries['assignment_groups'],
449454
'assignment_groups',
450-
{'course_ids': tuple(self.valid_locked_course_ids)})
455+
{'course_ids': self.valid_locked_course_ids})
451456

452457
return status
453458

@@ -463,7 +468,7 @@ def update_assignment(self):
463468
# loop through multiple course ids
464469
status += util_function(queries['assignment'],
465470
'assignment',
466-
{'course_ids': tuple(self.valid_locked_course_ids),
471+
{'course_ids': self.valid_locked_course_ids,
467472
'time_zone': settings.TIME_ZONE})
468473

469474
return status
@@ -480,14 +485,30 @@ def submission(self):
480485

481486
# loop through multiple course ids
482487
# filter out not released grades (submission_dim.posted_at date is not null) and partial grades (submission_dim.workflow_state != 'graded')
483-
status += util_function(queries['submission'],
484-
'submission',
485-
{
486-
'course_ids': tuple(self.valid_locked_course_ids),
487-
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
488-
'time_zone': settings.TIME_ZONE
489-
})
488+
query_params = {
489+
'course_ids': self.valid_locked_course_ids,
490+
'time_zone': settings.TIME_ZONE,
491+
'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
492+
}
493+
Session = sessionmaker(bind=data_warehouse_engine)
494+
try:
495+
# Create a session
496+
with Session() as session:
497+
# Execute the first query to create the temporary table
498+
session.execute(text(queries['submission']).bindparams(**query_params))
499+
500+
# Execute the second query using the temporary table
501+
result = session.execute(text(queries['submission_with_avg_score']))
502+
df = pd.DataFrame(result.fetchall(), columns=result.keys())
503+
df = df.drop_duplicates(keep='first')
504+
df.to_sql(con=engine, name='submission', if_exists='append', index=False)
505+
506+
except Exception as e:
507+
logger.exception('Error running sql on table submission', str(e))
508+
raise
509+
status+=f"{str(df.shape[0])} submission: {query_params}\n"
490510

511+
# returns the row size of dataframe
491512
return status
492513

493514
def weight_consideration(self):
@@ -503,7 +524,7 @@ def weight_consideration(self):
503524
# loop through multiple course ids
504525
status += util_function(queries['assignment_weight'],
505526
'assignment_weight_consideration',
506-
{'course_ids': tuple(self.valid_locked_course_ids)},
527+
{'course_ids': self.valid_locked_course_ids },
507528
'weight')
508529

509530
logger.debug(status + "\n\n")
@@ -543,7 +564,7 @@ def update_course(self, warehouse_courses_data: pd.DataFrame) -> str:
543564
Updates course records with data returned from verify_course_ids, only making changes when necessary.
544565
"""
545566
status: str = ''
546-
logger.debug('update_course()')
567+
logger.info('update_course()')
547568

548569
logger.debug(warehouse_courses_data.to_json(orient='records'))
549570
courses: QuerySet = Course.objects.filter(id__in=self.valid_locked_course_ids)
@@ -588,7 +609,7 @@ def do(self) -> str:
588609

589610
status = ""
590611

591-
run_start = datetime.now(pytz.UTC)
612+
run_start = datetime.now(ZoneInfo('UTC'))
592613
status += f"Start cron: {str(run_start)} UTC\n"
593614
course_verification = self.verify_course_ids()
594615
invalid_course_id_list = course_verification.invalid_course_ids

0 commit comments

Comments
 (0)