@@ -761,7 +761,7 @@ def _load_from_checkpoint(
761
761
Step 2: Loop through each channel. For each channel:
762
762
Step 2.1: Get messages within the time range.
763
763
Step 2.2: Process messages in parallel, yield back docs.
764
- Step 2.3: Update checkpoint with new_latest , seen_thread_ts, and current_channel.
764
+ Step 2.3: Update checkpoint with new_oldest , seen_thread_ts, and current_channel.
765
765
Slack returns messages from newest to oldest, so we need to keep track of
766
766
the latest message we've seen in each channel.
767
767
Step 2.4: If there are no more messages in the channel, switch the current
@@ -837,6 +837,7 @@ def _load_from_checkpoint(
837
837
838
838
channel_message_ts = checkpoint .channel_completion_map .get (channel_id )
839
839
if channel_message_ts :
840
+ # Set oldest to the checkpoint timestamp to resume from where we left off
840
841
oldest = channel_message_ts
841
842
842
843
logger .debug (
@@ -855,7 +856,8 @@ def _load_from_checkpoint(
855
856
f"{ latest = } "
856
857
)
857
858
858
- new_latest = message_batch [0 ]["ts" ] if message_batch else latest
859
+ # message_batch[0] is the newest message (Slack returns newest to oldest)
860
+ new_oldest = message_batch [0 ]["ts" ] if message_batch else latest
859
861
860
862
num_threads_start = len (seen_thread_ts )
861
863
@@ -906,15 +908,14 @@ def _load_from_checkpoint(
906
908
num_threads_processed = len (seen_thread_ts ) - num_threads_start
907
909
908
910
# calculate a percentage progress for the current channel by determining
909
- # our viable range start and end, and the latest timestamp we are querying
910
- # up to
911
- new_latest_seconds_epoch = SecondsSinceUnixEpoch ( new_latest )
912
- if new_latest_seconds_epoch > end :
911
+ # how much of the time range we've processed so far
912
+ new_oldest_seconds_epoch = SecondsSinceUnixEpoch ( new_oldest )
913
+ range_start = start if start else max ( 0 , channel_created )
914
+ if new_oldest_seconds_epoch < range_start :
913
915
range_complete = 0.0
914
916
else :
915
- range_complete = end - new_latest_seconds_epoch
917
+ range_complete = new_oldest_seconds_epoch - range_start
916
918
917
- range_start = max (0 , channel_created )
918
919
range_total = end - range_start
919
920
if range_total <= 0 :
920
921
range_total = 1
@@ -935,7 +936,7 @@ def _load_from_checkpoint(
935
936
)
936
937
937
938
checkpoint .seen_thread_ts = list (seen_thread_ts )
938
- checkpoint .channel_completion_map [channel ["id" ]] = new_latest
939
+ checkpoint .channel_completion_map [channel ["id" ]] = new_oldest
939
940
940
941
# bypass channels where the first set of messages seen are all bots
941
942
# check at least MIN_BOT_MESSAGE_THRESHOLD messages are in the batch
0 commit comments