tl-its-umich-edu
diff --git a/‎Dockerfile
Lines changed: 1 addition & 1 deletion b/‎Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/cron_udp.hjson
Lines changed: 16 additions & 13 deletions b/‎config/cron_udp.hjson
Lines changed: 16 additions & 13 deletions
diff --git a/‎config/env_sample.hjson
Lines changed: 1 addition & 1 deletion b/‎config/env_sample.hjson
Lines changed: 1 addition & 1 deletion
diff --git a/‎dashboard/admin.py
Lines changed: 5 additions & 3 deletions b/‎dashboard/admin.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎dashboard/common/db_util.py
Lines changed: 1 addition & 1 deletion b/‎dashboard/common/db_util.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dashboard/cron.py
Lines changed: 49 additions & 28 deletions b/‎dashboard/cron.py
Lines changed: 49 additions & 28 deletions
@@ -39,7 +39,7 @@ WORKDIR /code
 COPY requirements.txt .
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        build-essential curl apt-transport-https libpq-dev netcat-traditional jq python3-dev xmlsec1 cron git && \
+        build-essential curl apt-transport-https libpq-dev netcat-traditional default-libmysqlclient-dev pkg-config jq python3-dev xmlsec1 cron git && \
     apt-get upgrade -y
 
 # Install MariaDB from the mariadb repository rather than using Debians 
 
@@ -41,8 +41,8 @@
         left join entity.course_grade cg
             on cse.course_section_id = cg.course_section_id and cse.person_id = cg.person_id
         where
-            co.lms_int_id in %(course_ids)s
-            and cse.role in ('Student', 'TeachingAssistant', 'Teacher')
+            co.lms_int_id = ANY(%(course_ids)s)
+            and cse.role = ANY(ARRAY['Student', 'Teacher', 'TeachingAssistant']::text[])
             and cse.role_status = 'Enrolled'
             and cse.enrollment_status = 'Active'
         order by user_id
@@ -56,14 +56,14 @@
                 la.visibility = 'everyone'
                 and	la.status = 'published'
                 and la.course_offering_id = co.id
-                and co.lms_int_id in %(course_ids)s
+                and co.lms_int_id = ANY(%(course_ids)s)
             ), assignment_grp as (
                 select lg.*
                 from entity.learner_activity_group lg, keymap.course_offering co
                 where
                 lg.status = 'available'
                 and lg.course_offering_id = co.id
-                and co.lms_int_id in %(course_ids)s
+                and co.lms_int_id = ANY(%(course_ids)s)
             ), assign_more as (
                 select distinct(a.learner_activity_group_id), da.group_points
                 from assignment_details a
@@ -125,7 +125,7 @@
                 la.visibility = 'everyone'
                 and	la.status = 'published'
                 and la.course_offering_id = co.id
-                and co.lms_int_id in %(course_ids)s
+                and co.lms_int_id = ANY(%(course_ids)s)
                 and la.learner_activity_id = la_km.id
                 and la.learner_activity_group_id = lag_km.id
         )
@@ -147,7 +147,7 @@
             keymap.course_offering co_km
         where
             lag.course_offering_id = co_km.id
-            and co_km.lms_int_id in %(course_ids)s
+            and co_km.lms_int_id = ANY(%(course_ids)s)
             group by co_km.lms_int_id
         ''',
     "term":
@@ -182,7 +182,7 @@
             LEFT OUTER JOIN entity.academic_term at1 on (co.academic_term_id = at1.academic_term_id),
             keymap.course_offering co2,
             keymap.academic_term at2
-            WHERE co2.lms_int_id in %(course_ids)s
+            WHERE co2.lms_int_id = ANY(%(course_ids)s)
             and co.course_offering_id = co2.id
             and at1.academic_term_id = at2.id
         ''',
@@ -196,7 +196,7 @@
         where
             f.course_offering_id = co_km.id
             and f.file_id = f_km.id
-            and co_km.lms_int_id in %(course_ids)s
+            and co_km.lms_int_id = ANY(%(course_ids)s)
         order by id
         ''',
     "submission":
@@ -212,7 +212,7 @@
                 left join keymap.course_offering co
                     on cs.le_current_course_offering_id = co.id
             where 
-                co.lms_int_id in %(course_ids)s
+                co.lms_int_id = ANY(:course_ids)
                 and cse.role_status ='Enrolled' 
                 and cse."role" = 'Student'
                 and cse.enrollment_status = 'Active'
@@ -228,13 +228,13 @@
                 lar.published_score as published_score,
                 lar.response_date as submitted_at,
                 lar.graded_date as graded_at,
-                timezone(%(time_zone)s, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
+                timezone(:time_zone, lar.posted_at AT TIME ZONE 'UTC') as grade_posted_local_date,
                 lar.grading_status as submission_workflow_state,
                 la.title as title,
                 lar.learner_activity_result_id as learner_activity_result_id,
                 lar.person_id as short_user_id,
                 cast(lar2.lms_int_id as BIGINT) as submission_id,
-                (cast(%(canvas_data_id_increment)s as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
+                (cast(:canvas_data_id_increment as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
             from entity.learner_activity_result lar
             join enrollment on lar.person_id= enrollment.user_id
             join enrollment e on lar.person_id = e.user_id
@@ -244,7 +244,7 @@
                 left join keymap.course_offering co on co.id = la.course_offering_id
                 join keymap.person p on p.id = lar.person_id
             where
-                co.lms_int_id in %(course_ids)s
+                co.lms_int_id = ANY(:course_ids)
                 and la.status = 'published'
 		)
             select
@@ -267,7 +267,10 @@
                 grade_posted_local_date 
         from 
             submission
-        );
+        )
+        ''',
+    "submission_with_avg_score": 
+        '''
         select 
             f.id::bigint,
             f.assignment_id::bigint assignment_id,
 
@@ -53,7 +53,7 @@
     # Enable secure cookies, also set your trusted origin (example of instructure.com)
     # This needs to be true for deployments or when testing LTI with ngrok or loophole.
     "CSRF_COOKIE_SECURE": false,
-    "CSRF_TRUSTED_ORIGINS": ["instructure.com"],
+    "CSRF_TRUSTED_ORIGINS": ["https://*.instructure.com", "https://*.umich.edu"],
     # If you have a proxy that sets this header then set this to true. Default is false
     "USE_X_FORWARDED_HOST": false,
     # SameSite settings for Session and CSRF (defaults in settings.py should work), if you do want non-string None set to null.
 
@@ -68,6 +68,7 @@ def clean(self):
         return self.cleaned_data
 
 
+@admin.register(AcademicTerms)
 class TermAdmin(admin.ModelAdmin):
     exclude = ('id',)
     list_display = ('canvas_id', 'name', 'date_start', 'date_end')
@@ -77,6 +78,7 @@ def has_add_permission(self, request):
         return False
 
 
+@admin.register(Course)
 class CourseAdmin(admin.ModelAdmin):
     inlines = [CourseViewOptionInline, ]
     form = CourseForm
@@ -95,11 +97,13 @@ def clear_course_updated_dates(self, request, queryset):
         self.message_user(request, "All selected last updated values cleared.")
 
     # Need this method to correctly display the line breaks
+    @admin.display(
+        description="Course View Option(s)"
+    )
     def _courseviewoption(self, obj):
         return mark_safe(linebreaksbr(obj.courseviewoption))
 
     # Known mypy issue: https://github.com/python/mypy/issues/708
-    _courseviewoption.short_description = "Course View Option(s)" # type: ignore[attr-defined]
 
     def course_link(self, obj):
         return format_html('<a href="{}">Link</a>', obj.absolute_url)
@@ -160,8 +164,6 @@ def has_change_permission(request, obj=None):
     def has_delete_permission(request, obj=None):
         return False
 
-admin.site.register(AcademicTerms, TermAdmin)
-admin.site.register(Course, CourseAdmin)
 
 # Remove the pinax LogAdmin and add ours
 admin.site.unregister(Log)
 
@@ -37,7 +37,7 @@ def create_sqlalchemy_engine(db_params: DjangoDBParams) -> Engine:
     if new_db_params['ENGINE'] == (BACKENDS_PATH + 'mysql'):
         return create_engine(f'mysql+mysqldb://{core_string}?charset=utf8mb4')
     else:
-        return create_engine('postgresql://' + core_string)
+        return create_engine('postgresql+psycopg://' + core_string)
 
 
 def canvas_id_to_incremented_id(canvas_id):
 
@@ -2,21 +2,22 @@
 import logging
 from collections import namedtuple
 from typing import Any, Dict, List, Union
+from zoneinfo import ZoneInfo
 
 import hjson
 import pandas as pd
-import pytz
 import pangres
 
 from django.conf import settings
 from django.db import connections as conns, models
 from django.db.models import QuerySet
 from django_cron import CronJobBase, Schedule
 from google.cloud import bigquery
-from sqlalchemy import types
+from sqlalchemy import types, text
 from sqlalchemy.engine import ResultProxy
+from sqlalchemy.orm import sessionmaker
 
-from dashboard.common import db_util, utils
+from dashboard.common import db_util
 from dashboard.models import Course, Resource, AcademicTerms, User
 
 
@@ -67,17 +68,17 @@ def util_function(sql_string, mysql_table, param_object=None, table_identifier=N
 
 
 # execute database query
-def execute_db_query(query: str, params: List = None) -> ResultProxy:
-    with engine.connect() as connection:
+def execute_db_query(query: str, params: Dict = None) -> ResultProxy:
+    with engine.begin() as connection:
         connection.detach()
         if params:
-            return connection.execute(query, params)
+            return connection.execute(text(query), params)
         else:
-            return connection.execute(query)
+            return connection.execute(text(query))
 
 
 # remove all records inside the specified table
-def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: List = None):
+def delete_all_records_in_table(table_name: str, where_clause: str = "", where_params: Dict = None):
     # delete all records in the table first, can have an optional where clause
     result_proxy = execute_db_query(f"delete from {table_name} {where_clause}", where_params)
     return(f"\n{result_proxy.rowcount} rows deleted from {table_name}\n")
@@ -99,7 +100,7 @@ def soft_update_datetime_field(
             f'Skipped update of {field_name} for {model_name} instance ({model_inst.id}); existing value was found')
     else:
         if warehouse_field_value:
-            warehouse_field_value = warehouse_field_value.replace(tzinfo=pytz.UTC)
+            warehouse_field_value = warehouse_field_value.replace(tzinfo=ZoneInfo('UTC'))
             setattr(model_inst, field_name, warehouse_field_value)
             logger.info(f'Updated {field_name} for {model_name} instance ({model_inst.id})')
             return [field_name]
@@ -124,7 +125,7 @@ def verify_course_ids(self):
         logger.debug("in checking course")
         supported_courses = Course.objects.get_supported_courses()
         course_ids = [str(x) for x in supported_courses.values_list('id', flat=True)]
-        courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': tuple(course_ids)})
+        courses_data = pd.read_sql(queries['course'], data_warehouse_engine, params={'course_ids': course_ids})
         # error out when course id is invalid, otherwise add DataFrame to list
         for course_id, data_last_updated in supported_courses:
             if course_id not in list(courses_data['id']):
@@ -151,7 +152,7 @@ def update_user(self):
         # cron status
         status = ""
 
-        logger.debug("in update with data warehouse user")
+        logger.info("in update with data warehouse user")
 
         # delete all records in the table first
         status += delete_all_records_in_table("user")
@@ -160,7 +161,7 @@ def update_user(self):
         status += util_function(
                                 queries['user'],
                                 'user',
-                                {'course_ids': tuple(self.valid_locked_course_ids),
+                                {'course_ids': self.valid_locked_course_ids,
                                 'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT
                                 })
 
@@ -193,13 +194,13 @@ def update_canvas_resource(self):
         # cron status
         status = ""
 
-        logger.debug("in update canvas resource")
+        logger.info("in update canvas resource")
 
         # Select all the files for these courses
         # convert int array to str array
         df_attach = pd.read_sql(queries['resource'],
                                 data_warehouse_engine,
-                                params={'course_ids': tuple(self.valid_locked_course_ids)})
+                                params={'course_ids': self.valid_locked_course_ids })
         logger.debug(df_attach)
         # Update these back again based on the dataframe
         # Remove any rows where file_state is not available!
@@ -217,6 +218,8 @@ def update_resource_access(self):
         # cron status
         status = ""
 
+        logger.info("in update resource access")
+
         # return string with concatenated SQL insert result
         return_string = ""
 
@@ -231,7 +234,7 @@ def update_resource_access(self):
 
         logger.info(f"Deleting all records in resource_access after {data_last_updated}")
 
-        status += delete_all_records_in_table("resource_access", f"WHERE access_time > %s", [data_last_updated, ])
+        status += delete_all_records_in_table("resource_access", f"WHERE access_time > :data_last_updated", {'data_last_updated': data_last_updated })
 
         # loop through multiple course ids, 20 at a time
         # (This is set by the CRON_BQ_IN_LIMIT from settings)
@@ -393,7 +396,7 @@ def update_resource_access(self):
             student_enrollment_type = User.EnrollmentType.STUDENT
             student_enrollment_df = pd.read_sql(
                 'select user_id, course_id from user where enrollment_type= %s',
-                engine, params={student_enrollment_type})
+                engine, params=[(str(student_enrollment_type),)])
             resource_access_df = pd.merge(
                 resource_access_df, student_enrollment_df,
                 on=['user_id', 'course_id'],
@@ -437,6 +440,8 @@ def update_groups(self):
         # cron status
         status = ""
 
+        logger.info("update_groups(): ")
+
         # delete all records in assignment_group table
         status += delete_all_records_in_table("assignment_groups")
 
@@ -447,7 +452,7 @@ def update_groups(self):
         # loop through multiple course ids
         status += util_function(queries['assignment_groups'],
                                 'assignment_groups',
-                                {'course_ids': tuple(self.valid_locked_course_ids)})
+                                {'course_ids': self.valid_locked_course_ids})
 
         return status
 
@@ -463,7 +468,7 @@ def update_assignment(self):
         # loop through multiple course ids
         status += util_function(queries['assignment'],
                                 'assignment',
-                                {'course_ids': tuple(self.valid_locked_course_ids),
+                                {'course_ids': self.valid_locked_course_ids,
                                  'time_zone': settings.TIME_ZONE})
 
         return status
@@ -480,14 +485,30 @@ def submission(self):
 
         # loop through multiple course ids
         # filter out not released grades (submission_dim.posted_at date is not null) and partial grades (submission_dim.workflow_state != 'graded')
-        status += util_function(queries['submission'],
-                                'submission',
-                                {
-                                    'course_ids': tuple(self.valid_locked_course_ids),
-                                    'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
-                                    'time_zone': settings.TIME_ZONE
-                                })
+        query_params = {
+                        'course_ids': self.valid_locked_course_ids,
+                         'time_zone': settings.TIME_ZONE,
+                        'canvas_data_id_increment': settings.CANVAS_DATA_ID_INCREMENT,
+                        }
+        Session = sessionmaker(bind=data_warehouse_engine)
+        try:
+    # Create a session
+            with Session() as session:
+                # Execute the first query to create the temporary table
+                session.execute(text(queries['submission']).bindparams(**query_params))
+
+                # Execute the second query using the temporary table
+                result = session.execute(text(queries['submission_with_avg_score']))
+                df = pd.DataFrame(result.fetchall(), columns=result.keys())
+                df = df.drop_duplicates(keep='first')
+                df.to_sql(con=engine, name='submission', if_exists='append', index=False)
+
+        except Exception as e:
+            logger.exception('Error running sql on table submission', str(e))
+            raise
+        status+=f"{str(df.shape[0])} submission: {query_params}\n"
 
+    # returns the row size of dataframe
         return status
 
     def weight_consideration(self):
@@ -503,7 +524,7 @@ def weight_consideration(self):
         # loop through multiple course ids
         status += util_function(queries['assignment_weight'],
                                 'assignment_weight_consideration',
-                                {'course_ids': tuple(self.valid_locked_course_ids)},
+                                {'course_ids': self.valid_locked_course_ids },
                                 'weight')
 
         logger.debug(status + "\n\n")
@@ -543,7 +564,7 @@ def update_course(self, warehouse_courses_data: pd.DataFrame) -> str:
         Updates course records with data returned from verify_course_ids, only making changes when necessary.
         """
         status: str = ''
-        logger.debug('update_course()')
+        logger.info('update_course()')
 
         logger.debug(warehouse_courses_data.to_json(orient='records'))
         courses: QuerySet = Course.objects.filter(id__in=self.valid_locked_course_ids)
@@ -588,7 +609,7 @@ def do(self) -> str:
 
         status = ""
 
-        run_start = datetime.now(pytz.UTC)
+        run_start = datetime.now(ZoneInfo('UTC'))
         status += f"Start cron: {str(run_start)} UTC\n"
         course_verification = self.verify_course_ids()
         invalid_course_id_list = course_verification.invalid_course_ids