diff --git a/src/main/java/com/conveyal/gtfs/GTFS.java b/src/main/java/com/conveyal/gtfs/GTFS.java index 769154445..59e5ec83d 100644 --- a/src/main/java/com/conveyal/gtfs/GTFS.java +++ b/src/main/java/com/conveyal/gtfs/GTFS.java @@ -7,6 +7,7 @@ import com.conveyal.gtfs.loader.JdbcGtfsSnapshotter; import com.conveyal.gtfs.loader.SnapshotResult; import com.conveyal.gtfs.util.InvalidNamespaceException; +import com.conveyal.gtfs.validator.FeedValidatorCreator; import com.conveyal.gtfs.validator.ValidationResult; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.io.Files; @@ -15,10 +16,10 @@ import org.apache.commons.dbcp2.DriverManagerConnectionFactory; import org.apache.commons.dbcp2.PoolableConnectionFactory; import org.apache.commons.dbcp2.PoolingDataSource; -import org.apache.commons.dbutils.DbUtils; import org.apache.commons.pool2.impl.GenericObjectPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import javax.sql.DataSource; import java.io.File; import java.io.IOException; @@ -28,7 +29,8 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; +import java.util.List; +import java.util.function.BiFunction; import static com.conveyal.gtfs.util.Util.ensureValidNamespace; @@ -94,9 +96,9 @@ public static SnapshotResult makeSnapshot (String feedId, DataSource dataSource) /** * Once a feed has been loaded into the database, examine its contents looking for various problems and errors. */ - public static ValidationResult validate (String feedId, DataSource dataSource) { + public static ValidationResult validate (String feedId, DataSource dataSource, FeedValidatorCreator... additionalValidators) { Feed feed = new Feed(dataSource, feedId); - ValidationResult result = feed.validate(); + ValidationResult result = feed.validate(additionalValidators); return result; } diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 88911627d..eb12337c2 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -3,76 +3,81 @@ import com.conveyal.gtfs.validator.model.Priority; public enum NewGTFSErrorType { - + // Standard errors. + BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), + COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), + COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), + CURRENCY_UNKNOWN(Priority.MEDIUM, "The currency code was not recognized."), DATE_FORMAT(Priority.MEDIUM, "Date format should be YYYYMMDD."), - DATE_RANGE(Priority.MEDIUM, "Date should is extremely far in the future or past."), DATE_NO_SERVICE(Priority.MEDIUM, "No service_ids were active on a date within the range of dates with defined service."), - TIME_FORMAT(Priority.MEDIUM, "Time format should be HH:MM:SS."), - URL_FORMAT(Priority.MEDIUM, "URL format should be ://?#"), - LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."), - ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."), - INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."), - FARE_TRANSFER_MISMATCH(Priority.MEDIUM, "A fare that does not permit transfers has a non-zero transfer duration."), - FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."), - FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."), - COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), - NUMBER_PARSING(Priority.MEDIUM, "Unable to parse number from value."), - NUMBER_NEGATIVE(Priority.MEDIUM, "Number was expected to be non-negative."), - NUMBER_TOO_SMALL(Priority.MEDIUM, "Number was below the allowed range."), - NUMBER_TOO_LARGE(Priority.MEDIUM, "Number was above the allowed range."), + DATE_RANGE(Priority.MEDIUM, "Date should is extremely far in the future or past."), + DEPARTURE_BEFORE_ARRIVAL(Priority.MEDIUM, "The vehicle departs from this stop before it arrives."), + DUPLICATE_HEADER(Priority.MEDIUM, "More than one column in a table had the same name in the header row."), DUPLICATE_ID(Priority.MEDIUM, "More than one entity in a table had the same ID."), - DUPLICATE_TRIP(Priority.MEDIUM, "More than one trip had an identical schedule and stops."), DUPLICATE_STOP(Priority.MEDIUM, "More than one stop was located in exactly the same place."), - DUPLICATE_HEADER(Priority.MEDIUM, "More than one column in a table had the same name in the header row."), - MISSING_TABLE(Priority.MEDIUM, "This table is required by the GTFS specification but is missing."), + DUPLICATE_TRIP(Priority.MEDIUM, "More than one trip had an identical schedule and stops."), + FARE_TRANSFER_MISMATCH(Priority.MEDIUM, "A fare that does not permit transfers has a non-zero transfer duration."), + FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."), + FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."), + FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."), + ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."), + INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."), + LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."), + MISSING_ARRIVAL_OR_DEPARTURE(Priority.MEDIUM, "First and last stop times are required to have both an arrival and departure time."), MISSING_COLUMN(Priority.MEDIUM, "A required column was missing from a table."), - MISSING_SHAPE(Priority.MEDIUM, "???"), MISSING_FIELD(Priority.MEDIUM, "A required field was missing or empty in a particular row."), + MISSING_SHAPE(Priority.MEDIUM, "???"), + MISSING_TABLE(Priority.MEDIUM, "This table is required by the GTFS specification but is missing."), MULTIPLE_SHAPES_FOR_PATTERN(Priority.MEDIUM, "Multiple shapes found for a single unique sequence of stops (i.e, trip pattern)."), - WRONG_NUMBER_OF_FIELDS(Priority.MEDIUM, "A row did not have the same number of fields as there are headers in its table."), NO_SERVICE(Priority.HIGH, "There is no service defined on any day in this feed."), + NUMBER_NEGATIVE(Priority.MEDIUM, "Number was expected to be non-negative."), + NUMBER_PARSING(Priority.MEDIUM, "Unable to parse number from value."), + NUMBER_TOO_LARGE(Priority.MEDIUM, "Number was above the allowed range."), + NUMBER_TOO_SMALL(Priority.MEDIUM, "Number was below the allowed range."), OVERLAPPING_TRIP(Priority.MEDIUM, "Blocks?"), - SHAPE_REVERSED(Priority.MEDIUM, "A shape appears to be intended for vehicles running the opposite direction on the route."), - SHAPE_MISSING_COORDINATE(Priority.MEDIUM, "???"), - TABLE_IN_SUBDIRECTORY(Priority.HIGH, "Rather than being at the root of the zip file, a table was nested in a subdirectory."), - TABLE_MISSING_COLUMN_HEADERS(Priority.HIGH, "Table is missing column headers."), - TABLE_TOO_LONG(Priority.MEDIUM, "Table is too long to record line numbers with a 32-bit integer, overflow will occur."), - TIME_ZONE_FORMAT(Priority.MEDIUM, "Time zone format should match value from the Time Zone Database https://en.wikipedia.org/wiki/List_of_tz_database_time_zones."), + REFERENTIAL_INTEGRITY(Priority.HIGH, "This line references an ID that does not exist in the target table."), REQUIRED_TABLE_EMPTY(Priority.MEDIUM, "This table is required by the GTFS specification but is empty."), - FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."), ROUTE_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a route is identical to its name, so does not add any information."), ROUTE_LONG_NAME_CONTAINS_SHORT_NAME(Priority.LOW, "The long name of a route should complement the short name, not include it."), ROUTE_SHORT_AND_LONG_NAME_MISSING(Priority.MEDIUM, "A route has neither a long nor a short name."), ROUTE_SHORT_NAME_TOO_LONG(Priority.MEDIUM, "The short name of a route is too long for display in standard GTFS consumer applications."), + ROUTE_UNUSED(Priority.HIGH, "This route is defined but has no trips."), SERVICE_NEVER_ACTIVE(Priority.MEDIUM, "A service code was defined, but is never active on any date."), SERVICE_UNUSED(Priority.MEDIUM, "A service code was defined, but is never referenced by any trips."), SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase with stop times."), + SHAPE_MISSING_COORDINATE(Priority.MEDIUM, "???"), + SHAPE_REVERSED(Priority.MEDIUM, "A shape appears to be intended for vehicles running the opposite direction on the route."), STOP_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a stop is identical to its name, so does not add any information."), + STOP_GEOGRAPHIC_OUTLIER(Priority.HIGH, "This stop is located very far from the middle 90% of stops in this feed."), STOP_LOW_POPULATION_DENSITY(Priority.HIGH, "A stop is located in a geographic area with very low human population density."), STOP_NAME_MISSING(Priority.MEDIUM, "A stop does not have a name."), - STOP_GEOGRAPHIC_OUTLIER(Priority.HIGH, "This stop is located very far from the middle 90% of stops in this feed."), STOP_TIME_UNUSED(Priority.LOW, "This stop time allows neither pickup nor drop off and is not a timepoint, so it serves no purpose and should be removed from trip."), STOP_UNUSED(Priority.MEDIUM, "This stop is not referenced by any trips."), + TABLE_IN_SUBDIRECTORY(Priority.HIGH, "Rather than being at the root of the zip file, a table was nested in a subdirectory."), + TABLE_MISSING_COLUMN_HEADERS(Priority.HIGH, "Table is missing column headers."), + TABLE_TOO_LONG(Priority.MEDIUM, "Table is too long to record line numbers with a 32-bit integer, overflow will occur."), + TIME_FORMAT(Priority.MEDIUM, "Time format should be HH:MM:SS."), + TIME_ZONE_FORMAT(Priority.MEDIUM, "Time zone format should match value from the Time Zone Database https://en.wikipedia.org/wiki/List_of_tz_database_time_zones."), TIMEPOINT_MISSING_TIMES(Priority.MEDIUM, "This stop time is marked as a timepoint, but is missing both arrival and departure times."), + TRAVEL_DISTANCE_ZERO(Priority.MEDIUM, "The vehicle does not cover any distance between the last stop and this one."), + TRAVEL_TIME_NEGATIVE(Priority.HIGH, "The vehicle arrives at this stop before it departs from the previous one."), + TRAVEL_TIME_ZERO(Priority.HIGH, "The vehicle arrives at this stop at the same time it departs from the previous stop."), + TRAVEL_TOO_FAST(Priority.MEDIUM, "The vehicle travels extremely fast to reach this stop from the previous one."), + TRAVEL_TOO_SLOW(Priority.MEDIUM, "The vehicle is traveling very slowly to reach this stop from the previous one."), TRIP_EMPTY(Priority.HIGH, "This trip is defined but has no stop times."), TRIP_HEADSIGN_CONTAINS_ROUTE_NAME(Priority.LOW, "A trip headsign contains the route name, but should only contain information to distinguish it from other trips for the route."), TRIP_HEADSIGN_SHOULD_DESCRIBE_DESTINATION_OR_WAYPOINTS(Priority.LOW, "A trip headsign begins with 'to' or 'towards', but should begin with destination or direction and optionally include waypoints with 'via'"), TRIP_NEVER_ACTIVE(Priority.MEDIUM, "A trip is defined, but its service is never running on any date."), - ROUTE_UNUSED(Priority.HIGH, "This route is defined but has no trips."), - TRAVEL_DISTANCE_ZERO(Priority.MEDIUM, "The vehicle does not cover any distance between the last stop and this one."), - TRAVEL_TIME_NEGATIVE(Priority.HIGH, "The vehicle arrives at this stop before it departs from the previous one."), - TRAVEL_TIME_ZERO(Priority.HIGH, "The vehicle arrives at this stop at the same time it departs from the previous stop."), - MISSING_ARRIVAL_OR_DEPARTURE(Priority.MEDIUM, "First and last stop times are required to have both an arrival and departure time."), - TRIP_TOO_FEW_STOP_TIMES(Priority.MEDIUM, "A trip must have at least two stop times to represent travel."), TRIP_OVERLAP_IN_BLOCK(Priority.MEDIUM, "A trip overlaps another trip and shares the same block_id."), - TRAVEL_TOO_SLOW(Priority.MEDIUM, "The vehicle is traveling very slowly to reach this stop from the previous one."), - TRAVEL_TOO_FAST(Priority.MEDIUM, "The vehicle travels extremely fast to reach this stop from the previous one."), + TRIP_TOO_FEW_STOP_TIMES(Priority.MEDIUM, "A trip must have at least two stop times to represent travel."), + URL_FORMAT(Priority.MEDIUM, "URL format should be ://?#"), VALIDATOR_FAILED(Priority.HIGH, "The specified validation stage failed due to an error encountered during loading. This is likely due to an error encountered during loading (e.g., a date or number field is formatted incorrectly.)."), - DEPARTURE_BEFORE_ARRIVAL(Priority.MEDIUM, "The vehicle departs from this stop before it arrives."), - REFERENTIAL_INTEGRITY(Priority.HIGH, "This line references an ID that does not exist in the target table."), - BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), - COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), - CURRENCY_UNKNOWN(Priority.MEDIUM, "The currency code was not recognized."), + WRONG_NUMBER_OF_FIELDS(Priority.MEDIUM, "A row did not have the same number of fields as there are headers in its table."), + + // MTC-specific errors. + FIELD_VALUE_TOO_LONG(Priority.MEDIUM, "Field value has too many characters."), + + // Unknown errors. OTHER(Priority.LOW, "Other errors."); public final Priority priority; diff --git a/src/main/java/com/conveyal/gtfs/loader/Feed.java b/src/main/java/com/conveyal/gtfs/loader/Feed.java index 475f1e707..46b294b40 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Feed.java +++ b/src/main/java/com/conveyal/gtfs/loader/Feed.java @@ -1,20 +1,18 @@ package com.conveyal.gtfs.loader; -import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.model.*; import com.conveyal.gtfs.storage.StorageException; import com.conveyal.gtfs.util.InvalidNamespaceException; import com.conveyal.gtfs.validator.*; +import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.sql.DataSource; import java.sql.Connection; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import static com.conveyal.gtfs.error.NewGTFSErrorType.VALIDATOR_FAILED; @@ -32,20 +30,15 @@ public class Feed { // This may be the empty string if the feed is stored in the root ("public") schema. public final String tablePrefix; - public final TableReader agencies; - public final TableReader calendars; - public final TableReader calendarDates; + public final TableReader agencies; + public final TableReader calendars; + public final TableReader calendarDates; public final TableReader fareAttributes; - public final TableReader frequencies; - public final TableReader routes; - public final TableReader stops; - public final TableReader trips; -// public final TableReader shapePoints; - public final TableReader stopTimes; - - /* A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. */ - // TODO remove this and use only NewGTFSErrors in Validators, loaded into a JDBC table - public final List errors = new ArrayList<>(); + public final TableReader frequencies; + public final TableReader routes; + public final TableReader stops; + public final TableReader trips; + public final TableReader stopTimes; /** * Create a feed that reads tables over a JDBC connection. The connection should already be set to the right @@ -65,44 +58,51 @@ public Feed (DataSource dataSource, String tablePrefix) { routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE); stops = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP); trips = new JDBCTableReader(Table.TRIPS, dataSource, tablePrefix, EntityPopulator.TRIP); -// shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT); stopTimes = new JDBCTableReader(Table.STOP_TIMES, dataSource, tablePrefix, EntityPopulator.STOP_TIME); } /** + * Run the standard validation checks for this feed and store the validation errors in the database. Optionally, + * takes one or more {@link FeedValidatorCreator} in the form of lambda method refs (e.g., {@code MTCValidator::new}), + * which this method will instantiate and run after the standard validation checks have been completed. + * * TODO check whether validation has already occurred, overwrite results. - * TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded data is 100% visible. - * That would also avoid having to reconnect the error storage to the DB. + * TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded + * data is 100% visible. That would also avoid having to reconnect the error storage to the DB. */ - public ValidationResult validate () { + public ValidationResult validate (FeedValidatorCreator... additionalValidators) { long validationStartTime = System.currentTimeMillis(); // Create an empty validation result that will have its fields populated by certain validators. ValidationResult validationResult = new ValidationResult(); // Error tables should already be present from the initial load. // Reconnect to the existing error tables. - SQLErrorStorage errorStorage = null; + SQLErrorStorage errorStorage; try { errorStorage = new SQLErrorStorage(dataSource.getConnection(), tablePrefix, false); } catch (SQLException | InvalidNamespaceException ex) { throw new StorageException(ex); } int errorCountBeforeValidation = errorStorage.getErrorCount(); - - List feedValidators = Arrays.asList( - new MisplacedStopValidator(this, errorStorage, validationResult), - new DuplicateStopsValidator(this, errorStorage), - new FaresValidator(this, errorStorage), - new FrequencyValidator(this, errorStorage), - new TimeZoneValidator(this, errorStorage), - new NewTripTimesValidator(this, errorStorage), - new NamesValidator(this, errorStorage)); + // Create list of standard validators to run on every feed. + List feedValidators = Lists.newArrayList( + new MisplacedStopValidator(this, errorStorage, validationResult), + new DuplicateStopsValidator(this, errorStorage), + new FaresValidator(this, errorStorage), + new FrequencyValidator(this, errorStorage), + new TimeZoneValidator(this, errorStorage), + new NewTripTimesValidator(this, errorStorage), + new NamesValidator(this, errorStorage) + ); + // Create additional validators specified in this method's args and add to list of feed validators to run. + for (FeedValidatorCreator creator : additionalValidators) { + if (creator != null) feedValidators.add(creator.create(this, errorStorage)); + } for (FeedValidator feedValidator : feedValidators) { String validatorName = feedValidator.getClass().getSimpleName(); try { LOG.info("Running {}.", validatorName); int errorCountBefore = errorStorage.getErrorCount(); - // todo why not just pass the feed and errorstorage in here? feedValidator.validate(); LOG.info("{} found {} errors.", validatorName, errorStorage.getErrorCount() - errorCountBefore); } catch (Exception e) { diff --git a/src/main/java/com/conveyal/gtfs/validator/FeedValidatorCreator.java b/src/main/java/com/conveyal/gtfs/validator/FeedValidatorCreator.java new file mode 100644 index 000000000..39fbc67e4 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/validator/FeedValidatorCreator.java @@ -0,0 +1,24 @@ +package com.conveyal.gtfs.validator; + +import com.conveyal.gtfs.error.SQLErrorStorage; +import com.conveyal.gtfs.loader.Feed; + +/** + * A functional interface used to create {@link FeedValidator} instances. This interface supports the ability to pass + * validators by lambda method references ({@code SomeValidator::new}) to the {@link Feed#validate(FeedValidatorCreator...)} + * method in order to run special validation checks on specific feeds (e.g., {@link MTCValidator} should be run only on + * GTFS files loaded for those MTC operators). To instantiate the FeedValidator, simply call the + * {@link #create(Feed, SQLErrorStorage)} method, passing in the feed and errorStorage arguments. This is in lieu of + * instantiating the FeedValidator with those arguments in the constructor because these objects are not available before + * the validate method is invoked. + */ +@FunctionalInterface +public interface FeedValidatorCreator { + /** + * The callback that instantiates and returns instances of custom FeedValidator objects + * constructed using the provided feed and error storage objects. + * @param feed The feed being validated. + * @param errorStorage The object that handles error storage. + */ + FeedValidator create(Feed feed, SQLErrorStorage errorStorage); +} diff --git a/src/main/java/com/conveyal/gtfs/validator/MTCValidator.java b/src/main/java/com/conveyal/gtfs/validator/MTCValidator.java new file mode 100644 index 000000000..0453c32e0 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/validator/MTCValidator.java @@ -0,0 +1,63 @@ +package com.conveyal.gtfs.validator; + +import com.conveyal.gtfs.error.SQLErrorStorage; +import com.conveyal.gtfs.loader.Feed; +import com.conveyal.gtfs.model.*; + +import java.net.URL; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.FIELD_VALUE_TOO_LONG; + +/** + * MTCValidator runs a set of custom validation checks for GTFS feeds managed by MTC in Data Tools. + * The checks consist of validating field lengths at this time per the 511 MTC guidelines at + * https://github.com/ibi-group/datatools-ui/files/4438625/511.Transit_Data.Guidelines_V2.0_3-27-2020.pdf. + * For specific field lengths, search the guidelines for the word 'character'. + * + * Note that other validations, e.g. on GTFS+ files, are discussed in + * https://github.com/ibi-group/datatools-ui/issues/544. + */ +public class MTCValidator extends FeedValidator { + + public MTCValidator(Feed feed, SQLErrorStorage errorStorage) { + super(feed, errorStorage); + } + + @Override + public void validate() { + for (Agency agency : feed.agencies) { + validateFieldLength(agency, agency.agency_id, 50); + validateFieldLength(agency, agency.agency_name, 50); + validateFieldLength(agency, agency.agency_url, 500); + } + + for (Stop stop : feed.stops) { + validateFieldLength(stop, stop.stop_name, 100); + } + + for (Trip trip : feed.trips) { + validateFieldLength(trip, trip.trip_headsign, 120); + validateFieldLength(trip, trip.trip_short_name, 50); + } + } + + /** + * Checks that the length of a string does not exceed a certain length. + * Reports an error if the length is exceeded. + * @param entity The containing GTFS entity (for error reporting purposes). + * @param value The String value to check. + * @param maxLength The length to check, should be positive or zero. + * @return true if value.length() is maxLength or less, or if value is null; false otherwise. + */ + public boolean validateFieldLength(Entity entity, String value, int maxLength) { + if (value != null && value.length() > maxLength) { + if (errorStorage != null) registerError(entity, FIELD_VALUE_TOO_LONG, "[over " + maxLength + " characters] " + value); + return false; + } + return true; + } + + public boolean validateFieldLength(Entity entity, URL url, int maxLength) { + return validateFieldLength(entity, url != null ? url.toString() : "", maxLength); + } +} diff --git a/src/main/java/com/conveyal/gtfs/validator/ReversedTripValidator.java b/src/main/java/com/conveyal/gtfs/validator/ReversedTripValidator.java index ebe1c1d1c..b2a4c7f90 100644 --- a/src/main/java/com/conveyal/gtfs/validator/ReversedTripValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/ReversedTripValidator.java @@ -50,7 +50,8 @@ public boolean validate(Feed feed, boolean repair) { if (trip.shape_id == null) { isValid = false; if (missingShapeErrorCount < errorLimit) { - feed.errors.add(new MissingShapeError(trip)); + // FIXME store MissingShape errors + // feed.errors.add(new MissingShapeError(trip)); } missingShapeErrorCount++; continue; @@ -111,7 +112,8 @@ public boolean validate(Feed feed, boolean repair) { // check if first stop is x times closer to end of shape than the beginning or last stop is x times closer to start than the end if (distanceFirstStopToStart > (distanceFirstStopToEnd * distanceMultiplier) && distanceLastStopToEnd > (distanceLastStopToStart * distanceMultiplier)) { if (reversedTripShapeErrorCount < errorLimit) { - feed.errors.add(new ReversedTripShapeError(trip)); + // FIXME store ReversedTripShape errors + // feed.errors.add(new ReversedTripShapeError(trip)); } reversedTripShapeErrorCount++; isValid = false; @@ -120,7 +122,8 @@ public boolean validate(Feed feed, boolean repair) { if (missingCoordinatesErrorCount > 0) { for (Map.Entry> shapeError : missingShapesMap.entrySet()) { String[] tripIdList = shapeError.getValue().toArray(new String[shapeError.getValue().size()]); - feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList)); + // FIXME store ShapeMissingCoordinates errors + // feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList)); } } return isValid; diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index 25be50ac8..a3da82bf1 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -9,12 +9,13 @@ import com.conveyal.gtfs.storage.PersistenceExpectation; import com.conveyal.gtfs.storage.RecordExpectation; import com.conveyal.gtfs.util.InvalidNamespaceException; +import com.conveyal.gtfs.validator.FeedValidatorCreator; +import com.conveyal.gtfs.validator.MTCValidator; import com.conveyal.gtfs.validator.ValidationResult; import com.csvreader.CsvReader; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import com.google.common.io.Files; -import org.apache.commons.dbutils.DbUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.input.BOMInputStream; import org.hamcrest.Matcher; @@ -32,16 +33,11 @@ import java.io.PrintStream; import java.nio.charset.Charset; import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Date; import java.util.Iterator; -import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -306,6 +302,34 @@ public void canLoadAndExportSimpleAgencyWithOnlyCalendarDates() { ); } + /** + * Tests that a GTFS feed with long field values generates corresponding + * validation errors per MTC guidelines. + */ + @Test + public void canLoadFeedWithLongFieldValues () { + PersistenceExpectation[] expectations = PersistenceExpectation.list(); + ErrorExpectation[] errorExpectations = ErrorExpectation.list( + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG), + new ErrorExpectation(NewGTFSErrorType.FEED_TRAVEL_TIMES_ROUNDED) // Not related, not worrying about this one. + ); + assertThat( + "Long-field-value test passes", + runIntegrationTestOnFolder( + "fake-agency-mtc-long-fields", + nullValue(), + expectations, + errorExpectations, + MTCValidator::new + ), + equalTo(true) + ); + } /** * A helper method that will zip a specified folder in test/main/resources and call @@ -315,7 +339,8 @@ private boolean runIntegrationTestOnFolder( String folderName, Matcher fatalExceptionExpectation, PersistenceExpectation[] persistenceExpectations, - ErrorExpectation[] errorExpectations + ErrorExpectation[] errorExpectations, + FeedValidatorCreator... customValidators ) { LOG.info("Running integration test on folder {}", folderName); // zip up test folder into temp zip file @@ -326,7 +351,13 @@ private boolean runIntegrationTestOnFolder( e.printStackTrace(); return false; } - return runIntegrationTestOnZipFile(zipFileName, fatalExceptionExpectation, persistenceExpectations, errorExpectations); + return runIntegrationTestOnZipFile( + zipFileName, + fatalExceptionExpectation, + persistenceExpectations, + errorExpectations, + customValidators + ); } /** @@ -342,7 +373,8 @@ private boolean runIntegrationTestOnZipFile( String zipFileName, Matcher fatalExceptionExpectation, PersistenceExpectation[] persistenceExpectations, - ErrorExpectation[] errorExpectations + ErrorExpectation[] errorExpectations, + FeedValidatorCreator... customValidators ) { String testDBName = TestUtils.generateNewDB(); String dbConnectionUrl = String.join("/", JDBC_URL, testDBName); @@ -359,7 +391,7 @@ private boolean runIntegrationTestOnZipFile( // load and validate feed LOG.info("load and validate GTFS file {}", zipFileName); FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource); - ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource); + ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource, customValidators); assertThat(validationResult.fatalException, is(fatalExceptionExpectation)); namespace = loadResult.uniqueIdentifier; diff --git a/src/test/java/com/conveyal/gtfs/validator/MTCValidatorTest.java b/src/test/java/com/conveyal/gtfs/validator/MTCValidatorTest.java new file mode 100644 index 000000000..4cd33521c --- /dev/null +++ b/src/test/java/com/conveyal/gtfs/validator/MTCValidatorTest.java @@ -0,0 +1,28 @@ +package com.conveyal.gtfs.validator; + +import org.junit.Test; + +import java.net.MalformedURLException; +import java.net.URL; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class MTCValidatorTest { + @Test + public void fieldlLengthShouldNotExceed() { + MTCValidator validator = new MTCValidator(null, null); + assertThat(validator.validateFieldLength(null, "abcdefghijklmnopqrstwxyz1234567890", 20), is(false)); + assertThat(validator.validateFieldLength(null, "abcdef", 20), is(true)); + } + + @Test + public void fieldlLengthShouldNotExceed_usingObject() throws MalformedURLException { + MTCValidator validator = new MTCValidator(null, null); + + // You can also pass objects, in that case it will use toString(). + URL url = new URL("http://www.gtfs.org"); + assertThat(validator.validateFieldLength(null, url, 10), is(false)); + assertThat(validator.validateFieldLength(null, url, 30), is(true)); + } +} diff --git a/src/test/resources/fake-agency-mtc-long-fields/agency.txt b/src/test/resources/fake-agency-mtc-long-fields/agency.txt new file mode 100755 index 000000000..ecd839d0b --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/agency.txt @@ -0,0 +1,5 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,http://www.example.com,,,,America/Los_Angeles,, +2,Agency name with more than fifty 50 characters that does not meet MTC guidelines,http://www.example.com,,,,America/Los_Angeles,, +Agency_id_with_more_than_50_characters_that_does_not_meet_MTC_guidelines,Fake Transit,http://www.example.com,,,,America/Los_Angeles,, +4,Fake Transit,http://www.agency.com/long/url/____________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________,,,,America/Los_Angeles,, diff --git a/src/test/resources/fake-agency-mtc-long-fields/calendar.txt b/src/test/resources/fake-agency-mtc-long-fields/calendar.txt new file mode 100755 index 000000000..4c9b7fd13 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/calendar.txt @@ -0,0 +1,2 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +04100312-8fe1-46a5-a9f2-556f39478f57,1,1,1,1,1,1,1,20170915,20170917 diff --git a/src/test/resources/fake-agency-mtc-long-fields/calendar_dates.txt b/src/test/resources/fake-agency-mtc-long-fields/calendar_dates.txt new file mode 100755 index 000000000..74c1ef632 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/calendar_dates.txt @@ -0,0 +1 @@ +service_id,date,exception_type diff --git a/src/test/resources/fake-agency-mtc-long-fields/fare_attributes.txt b/src/test/resources/fake-agency-mtc-long-fields/fare_attributes.txt new file mode 100755 index 000000000..3173d016d --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/fare_attributes.txt @@ -0,0 +1,2 @@ +fare_id,price,currency_type,payment_method,transfers,transfer_duration +route_based_fare,1.23,USD,0,0,0 \ No newline at end of file diff --git a/src/test/resources/fake-agency-mtc-long-fields/fare_rules.txt b/src/test/resources/fake-agency-mtc-long-fields/fare_rules.txt new file mode 100755 index 000000000..05d2aadf8 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/fare_rules.txt @@ -0,0 +1,2 @@ +fare_id,route_id,origin_id,destination_id,contains_id +route_based_fare,1,,, \ No newline at end of file diff --git a/src/test/resources/fake-agency-mtc-long-fields/feed_info.txt b/src/test/resources/fake-agency-mtc-long-fields/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/fake-agency-mtc-long-fields/frequencies.txt b/src/test/resources/fake-agency-mtc-long-fields/frequencies.txt new file mode 100755 index 000000000..9baceff3a --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/frequencies.txt @@ -0,0 +1,2 @@ +trip_id,start_time,end_time,headway_secs,exact_times +frequency-trip,08:00:00,09:00:00,1800,0 \ No newline at end of file diff --git a/src/test/resources/fake-agency-mtc-long-fields/routes.txt b/src/test/resources/fake-agency-mtc-long-fields/routes.txt new file mode 100755 index 000000000..10f39224c --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/routes.txt @@ -0,0 +1,2 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,101A,Example Route,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/fake-agency-mtc-long-fields/shapes.txt b/src/test/resources/fake-agency-mtc-long-fields/shapes.txt new file mode 100755 index 000000000..3f2e3fd13 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/shapes.txt @@ -0,0 +1,8 @@ +shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled +5820f377-f947-4728-ac29-ac0102cbc34e,37.0612132,-122.0074332,1,0.0000000 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0611720,-122.0075000,2,7.4997067 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0613590,-122.0076830,3,33.8739075 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0608780,-122.0082780,4,109.0402932 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0603590,-122.0088280,5,184.6078298 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0597610,-122.0093540,6,265.8053023 +5820f377-f947-4728-ac29-ac0102cbc34e,37.0590660,-122.0099190,7,357.8617018 diff --git a/src/test/resources/fake-agency-mtc-long-fields/stop_times.txt b/src/test/resources/fake-agency-mtc-long-fields/stop_times.txt new file mode 100755 index 000000000..33ab446ac --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/stop_times.txt @@ -0,0 +1,7 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +a30277f8-e50a-4a85-9141-b1e0da9d429d,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +a30277f8-e50a-4a85-9141-b1e0da9d429d,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +frequency-trip,08:00:00,08:00:00,4u6g,1,,0,0,0.0000000, +frequency-trip,08:29:00,08:29:00,1234,2,,0,0,341.4491961, +trip-with-long-shortname,08:00:00,08:00:00,4u6g,1,,0,0,0.0000000, +trip-with-long-shortname,08:29:00,08:29:00,1234,2,,0,0,341.4491961, diff --git a/src/test/resources/fake-agency-mtc-long-fields/stops.txt b/src/test/resources/fake-agency-mtc-long-fields/stops.txt new file mode 100755 index 000000000..9ac372a00 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/stops.txt @@ -0,0 +1,5 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,,Long Stop Name of more than 100 characters that exceeds MTC guidelines regarding the length of this field,,37.06662,-122.07772,,,0,123,, diff --git a/src/test/resources/fake-agency-mtc-long-fields/transfers.txt b/src/test/resources/fake-agency-mtc-long-fields/transfers.txt new file mode 100755 index 000000000..357103c47 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/transfers.txt @@ -0,0 +1 @@ +from_stop_id,to_stop_id,transfer_type,min_transfer_time diff --git a/src/test/resources/fake-agency-mtc-long-fields/trips.txt b/src/test/resources/fake-agency-mtc-long-fields/trips.txt new file mode 100755 index 000000000..9b6feaa36 --- /dev/null +++ b/src/test/resources/fake-agency-mtc-long-fields/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,a30277f8-e50a-4a85-9141-b1e0da9d429d,,,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57 +1,frequency-trip,This is a trip headsign of more than 120 characters that exceeds the MTC guidelines on the number of characters for the trip headsign,trip-short-name,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57 +1,trip-with-long-shortname,trip headsign,This is a trip short name of more than 50 characters beyond MTC guidelines,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57 \ No newline at end of file