Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
import com.google.common.flogger.FluentLogger;
import com.vladsch.flexmark.util.misc.Pair;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Stream;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsage;
import org.mobilitydata.gtfsvalidator.performance.MemoryUsageRegister;
import org.mobilitydata.gtfsvalidator.reportsummary.AgencyMetadata;
import org.mobilitydata.gtfsvalidator.reportsummary.JsonReportCounts;
import org.mobilitydata.gtfsvalidator.reportsummary.JsonReportFeedInfo;
import org.mobilitydata.gtfsvalidator.table.*;
import org.mobilitydata.gtfsvalidator.util.CalendarUtil;
import org.mobilitydata.gtfsvalidator.util.ServicePeriod;

public class FeedMetadata {

Expand Down Expand Up @@ -87,16 +85,28 @@ public static FeedMetadata from(GtfsFeedContainer feedContainer, ImmutableSet<St
feedMetadata.loadAgencyData(agencyTableOptional.get());
}

if (feedContainer.getTableForFilename(GtfsTrip.FILENAME).isPresent()
&& (feedContainer.getTableForFilename(GtfsCalendar.FILENAME).isPresent()
|| feedContainer.getTableForFilename(GtfsCalendarDate.FILENAME).isPresent())) {
Optional<GtfsTripTableContainer> tripTableContainer =
feedContainer
.getTableForFilename(GtfsTrip.FILENAME)
.filter(GtfsEntityContainer::isParsedSuccessfully)
.map(c -> (GtfsTripTableContainer) c);

Optional<GtfsCalendarTableContainer> calendarTableContainer =
feedContainer
.getTableForFilename(GtfsCalendar.FILENAME)
.filter(GtfsEntityContainer::isParsedSuccessfully)
.map(c -> (GtfsCalendarTableContainer) c);

Optional<GtfsCalendarDateTableContainer> calendarDateTableContainer =
feedContainer
.getTableForFilename(GtfsCalendarDate.FILENAME)
.filter(GtfsEntityContainer::isParsedSuccessfully)
.map(c -> (GtfsCalendarDateTableContainer) c);

if (tripTableContainer.isPresent()
&& (calendarTableContainer.isPresent() || calendarDateTableContainer.isPresent())) {
feedMetadata.loadServiceWindow(
(GtfsTableContainer<GtfsTrip, ?>)
feedContainer.getTableForFilename(GtfsTrip.FILENAME).get(),
(GtfsTableContainer<GtfsCalendar, ?>)
feedContainer.getTableForFilename(GtfsCalendar.FILENAME).get(),
(GtfsTableContainer<GtfsCalendarDate, ?>)
feedContainer.getTableForFilename(GtfsCalendarDate.FILENAME).get());
tripTableContainer.get(), calendarTableContainer, calendarDateTableContainer);
}

feedMetadata.loadSpecFeatures(feedContainer);
Expand Down Expand Up @@ -467,129 +477,47 @@ private String checkLocalDate(LocalDate localDate) {
/**
* Loads the service date range by determining the earliest start date and the latest end date for
* all services referenced with a trip\_id in `trips.txt`. It handles three cases: 1. When only
* `calendars.txt` is used. 2. When only `calendar\_dates.txt` is used. 3. When both
* `calendars.txt` and `calendar\_dates.txt` are used.
* `calendar.txt` is used. 2. When only `calendar\_dates.txt` is used. 3. When both `calendar.txt`
* and `calendar\_dates.txt` are used.
*
* @param tripContainer the container for `trips.txt` data
* @param calendarTable the container for `calendars.txt` data
* @param calendarTable the container for `calendar.txt` data
* @param calendarDateTable the container for `calendar\_dates.txt` data
*/
public void loadServiceWindow(
GtfsTableContainer<GtfsTrip, ?> tripContainer,
GtfsTableContainer<GtfsCalendar, ?> calendarTable,
GtfsTableContainer<GtfsCalendarDate, ?> calendarDateTable) {
List<GtfsTrip> trips = tripContainer.getEntities();

GtfsTripTableContainer tripContainer,
Optional<GtfsCalendarTableContainer> calendarTable,
Optional<GtfsCalendarDateTableContainer> calendarDateTable) {
LocalDate earliestStartDate = null;
LocalDate latestEndDate = null;
try {
if ((calendarDateTable == null) && (calendarTable != null)) {
// When only calendars.txt is used
List<GtfsCalendar> calendars = calendarTable.getEntities();
for (GtfsTrip trip : trips) {
String serviceId = trip.serviceId();
for (GtfsCalendar calendar : calendars) {
if (calendar.serviceId().equals(serviceId)) {
LocalDate startDate = calendar.startDate().getLocalDate();
LocalDate endDate = calendar.endDate().getLocalDate();
if (startDate != null || endDate != null) {
if (startDate.toString().equals(LocalDate.EPOCH.toString())
|| endDate.toString().equals(LocalDate.EPOCH.toString())) {
continue;
}
if (earliestStartDate == null || startDate.isBefore(earliestStartDate)) {
earliestStartDate = startDate;
}
if (latestEndDate == null || endDate.isAfter(latestEndDate)) {
latestEndDate = endDate;
}
}
}
}
}
} else if ((calendarDateTable != null) && (calendarTable == null)) {
// When only calendar_dates.txt is used
List<GtfsCalendarDate> calendarDates = calendarDateTable.getEntities();
for (GtfsTrip trip : trips) {
String serviceId = trip.serviceId();
for (GtfsCalendarDate calendarDate : calendarDates) {
if (calendarDate.serviceId().equals(serviceId)) {
LocalDate date = calendarDate.date().getLocalDate();
if (date != null && !date.toString().equals(LocalDate.EPOCH.toString())) {
if (earliestStartDate == null || date.isBefore(earliestStartDate)) {
earliestStartDate = date;
}
if (latestEndDate == null || date.isAfter(latestEndDate)) {
latestEndDate = date;
}
}
}
}
}
} else if ((calendarTable != null) && (calendarDateTable != null)) {
// When both calendars.txt and calendar_dates.txt are used
Map<String, ServicePeriod> servicePeriods =
CalendarUtil.buildServicePeriodMap(
(GtfsCalendarTableContainer) calendarTable,
(GtfsCalendarDateTableContainer) calendarDateTable);
List<LocalDate> removedDates = new ArrayList<>();
for (GtfsTrip trip : trips) {
String serviceId = trip.serviceId();
ServicePeriod servicePeriod = servicePeriods.get(serviceId);
LocalDate startDate = servicePeriod.getServiceStart();
LocalDate endDate = servicePeriod.getServiceEnd();
if (startDate != null && endDate != null) {
if (startDate.toString().equals(LocalDate.EPOCH.toString())
|| endDate.toString().equals(LocalDate.EPOCH.toString())) {
continue;
}
if (earliestStartDate == null || startDate.isBefore(earliestStartDate)) {
earliestStartDate = startDate;
}
if (latestEndDate == null || endDate.isAfter(latestEndDate)) {
latestEndDate = endDate;
}
}
removedDates.addAll(servicePeriod.getRemovedDays());
}

for (LocalDate date : removedDates) {
if (date.isEqual(earliestStartDate)) {
earliestStartDate = date.plusDays(1);
}
if (date.isEqual(latestEndDate)) {
latestEndDate = date.minusDays(1);
}
}
Optional<ServiceWindow> serviceWindow =
ServiceWindow.get(
tripContainer.getEntities(),
calendarTable.map(GtfsCalendarTableContainer::getEntities),
calendarDateTable.map(GtfsCalendarDateTableContainer::getEntities));
if (serviceWindow.isEmpty()) {
logger.atWarning().log(
"Could not compute service window. Check that `calendar.txt` and `calendar_dates.txt` contain data if they are present.");
}
earliestStartDate = serviceWindow.map(ServiceWindow::startDate).orElse(null);
latestEndDate = serviceWindow.map(ServiceWindow::endDate).orElse(null);
} catch (Exception e) {
logger.atSevere().withCause(e).log("Error while loading Service Window");
} finally {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("MMMM d, yyyy");
if ((earliestStartDate == null) && (latestEndDate == null)) {
feedInfo.put(JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, "");
} else if (earliestStartDate == null && latestEndDate != null) {
feedInfo.put(JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, latestEndDate.format(formatter));
} else if (latestEndDate == null && earliestStartDate != null) {
if (earliestStartDate.isAfter(latestEndDate)) {
feedInfo.put(JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, "");
} else {
feedInfo.put(
JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, earliestStartDate.format(formatter));
}
} else {
StringBuilder serviceWindow = new StringBuilder();
serviceWindow.append(earliestStartDate);
serviceWindow.append(" to ");
serviceWindow.append(latestEndDate);
feedInfo.put(JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, serviceWindow.toString());
}
String serviceWindowStr =
String.join(
" to ",
Stream.of(earliestStartDate, latestEndDate)
.filter(Objects::nonNull)
.map(LocalDate::toString)
.toList());
feedInfo.put(JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW, serviceWindowStr);
feedInfo.put(
JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW_START,
earliestStartDate == null ? "" : earliestStartDate.toString());
Objects.toString(earliestStartDate, ""));
feedInfo.put(
JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW_END,
latestEndDate == null ? "" : latestEndDate.toString());
JsonReportFeedInfo.FEED_INFO_SERVICE_WINDOW_END, Objects.toString(latestEndDate, ""));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package org.mobilitydata.gtfsvalidator.reportsummary.model;

import static java.util.stream.Collectors.*;

import java.time.LocalDate;
import java.util.*;
import org.mobilitydata.gtfsvalidator.table.GtfsCalendar;
import org.mobilitydata.gtfsvalidator.table.GtfsCalendarDate;
import org.mobilitydata.gtfsvalidator.table.GtfsCalendarDateExceptionType;
import org.mobilitydata.gtfsvalidator.table.GtfsTrip;
import org.mobilitydata.gtfsvalidator.util.FuncUtil;

record ServiceWindow(LocalDate startDate, LocalDate endDate) {
static Optional<ServiceWindow> fromCalendars(
List<GtfsTrip> trips, List<GtfsCalendar> allCalendars) {
Set<String> serviceIds = new HashSet<>(trips.stream().map(GtfsTrip::serviceId).toList());

List<GtfsCalendar> calendars =
allCalendars.stream()
.filter(calendar -> serviceIds.contains(calendar.serviceId()))
.toList();

// Only empty if there are no calendars.
Optional<LocalDate> startDate =
calendars.stream().map(c -> c.startDate().getLocalDate()).min(LocalDate::compareTo);
Optional<LocalDate> endDate =
calendars.stream().map(c -> c.endDate().getLocalDate()).max(LocalDate::compareTo);
return startDate.map(d -> new ServiceWindow(d, endDate.get()));
}

static Optional<ServiceWindow> fromCalendarDates(
List<GtfsTrip> trips, List<GtfsCalendarDate> allCalendarDates) {
Set<String> serviceIds = new HashSet<>(trips.stream().map(GtfsTrip::serviceId).toList());

List<LocalDate> calendarDates =
allCalendarDates.stream()
.filter(
d ->
serviceIds.contains(d.serviceId())
&& d.exceptionType() == GtfsCalendarDateExceptionType.SERVICE_ADDED)
.map(d -> d.date().getLocalDate())
.toList();

// Only empty if there are no calendar dates.
Optional<LocalDate> startDate = calendarDates.stream().min(LocalDate::compareTo);
Optional<LocalDate> endDate = calendarDates.stream().max(LocalDate::compareTo);
return startDate.map(d -> new ServiceWindow(d, endDate.get()));
}

static Optional<ServiceWindow> fromCalendarsAndCalendarDates(
List<GtfsTrip> trips, List<GtfsCalendar> calendars, List<GtfsCalendarDate> calendarDates) {
Optional<ServiceWindow> serviceWindowFromCalendars =
ServiceWindow.fromCalendars(trips, calendars);
if (serviceWindowFromCalendars.isEmpty()) {
return Optional.empty();
}

Set<String> serviceIds = new HashSet<>(trips.stream().map(GtfsTrip::serviceId).toList());

Map<String, Set<LocalDate>> removedDaysByServiceId =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sylvansson @davidgamez one question, the function seems to create removedDaysByServiceId by using only GtfsCalendarDate (calendar_dates.txt). However, there might be services in calendar.txt that don't have a removed date in calendar_dates.txt.

So I think the way to check this is by making sure that for each removed date, it is removed for every service_id that figures either in calendar.txt or in calendar_dates.txt (as ADDED SERVICE: exception_type=1).

Eg:
calendar.txt: service_id s_1 from 1-5-2025 to 30-5-2025.

calendar_dates.txt:
service_id s_1 removed on 1-5-2025
service_id s_3 added on 24-5-2025
service_id s_3 removed on 31-5-2025

The service window is 2-5-2025 to 31-5-2025 because:

  • May 1st is removed for all services that exist on May 1st (s_1 only)
  • s_3 was added for May 24th and removed on May 31st. So no service operates on May 31st. If s_1 were to end on May 31st then the service window would include that date.

I suggest that for each date, we count the total unique removed service_ids, then compare it with the count of unique service_ids that operate on that date (both from calendar.txt and the added services in calendar_dates.txt), but there might be better ways to do that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, @skalexch. You are right. The spec doesn't force all service IDs to be defined in the calendar.txt and calendar_dates.txt when both files are defined. In this case, we can have service IDs from calendar.txt that are not present in the calendar_dates and will have a service on a day removed by a calendar_date entry.

Following this logic, I agree that we need to keep a total of "all unique service IDs" per day, so we make sure that all services are not present to mark the date as "removed." The "all unique service IDs" can be computed by looking at the trips.txt file.
In addition, we need to ensure that the added service "expands" the start and end dates of the service window. So, we must iterate over all additions and set the start and end dates accordingly. This should be done after computing the start and end dates from all calendar ranges.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I'll try to get this fixed sometime next week.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I'll try to get this fixed sometime next week.

Thanks!

calendarDates.stream()
.filter(
d ->
d.exceptionType() == GtfsCalendarDateExceptionType.SERVICE_REMOVED
&& serviceIds.contains(d.serviceId()))
.collect(
groupingBy(
GtfsCalendarDate::serviceId, mapping(d -> d.date().getLocalDate(), toSet())));

// We compute the set of days that are removed across all services in
// order to shift the start and end dates.
Set<LocalDate> removedDays = FuncUtil.intersectAll(removedDaysByServiceId.values());

LocalDate startDate = serviceWindowFromCalendars.get().startDate();
LocalDate endDate = serviceWindowFromCalendars.get().endDate();

while (removedDays.contains(startDate)) {
startDate = startDate.plusDays(1);
}
while (removedDays.contains(endDate)) {
endDate = endDate.minusDays(1);
}
return Optional.of(new ServiceWindow(startDate, endDate));
}

static Optional<ServiceWindow> get(
List<GtfsTrip> trips,
Optional<List<GtfsCalendar>> calendars,
Optional<List<GtfsCalendarDate>> calendarDates) {
if (calendarDates.isEmpty() && calendars.isPresent()) {
return ServiceWindow.fromCalendars(trips, calendars.get());
}

if (calendarDates.isPresent() && calendars.isEmpty()) {
return ServiceWindow.fromCalendarDates(trips, calendarDates.get());
}

if (calendars.isPresent() && calendarDates.isPresent()) {
return ServiceWindow.fromCalendarsAndCalendarDates(
trips, calendars.get(), calendarDates.get());
}

return Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.mobilitydata.gtfsvalidator.util;

import static java.util.Collections.emptySet;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

public class FuncUtil {
public static <T> Set<T> intersectAll(Collection<Set<T>> sets) {
if (sets.isEmpty()) {
return emptySet();
}

Set<T> intersection = null;
for (Set<T> set : sets) {
if (intersection == null) {
intersection = new HashSet<>(set);
} else {
intersection.retainAll(set);
}
}
return intersection;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.io.IOException;
import java.time.LocalDate;
import java.util.List;
import java.util.Optional;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -126,7 +127,10 @@ public void testLoadServiceWindow() {
List.of(calendarDate1, calendarDate2), noticeContainer);

// Call the method
feedMetadata.loadServiceWindow(tripContainer, calendarTable, calendarDateTable);
feedMetadata.loadServiceWindow(
(GtfsTripTableContainer) tripContainer,
Optional.of((GtfsCalendarTableContainer) calendarTable),
Optional.of((GtfsCalendarDateTableContainer) calendarDateTable));

// Verify the result
String expectedServiceWindow = "2024-01-02 to 2024-12-31";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.mobilitydata.gtfsvalidator.util;

import static com.google.common.truth.Truth.assertThat;
import static java.util.Collections.emptyList;
import static java.util.Collections.emptySet;

import java.util.List;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public class FuncUtilTest {
@Test
public void testIntersectAll() {
assertThat(
FuncUtil.intersectAll(
List.of(
Set.of(1, 2, 3, 4, 5, 6),
Set.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
Set.of(2, 4, 6, 8, 10))))
.isEqualTo(Set.of(2, 4, 6));

assertThat(FuncUtil.intersectAll(emptyList())).isEqualTo(emptySet());
}
}
Loading