diff --git a/src/data/invalid/GeoLocationNameValue-no_country_or_sea.yaml b/src/data/invalid/GeoLocationNameValue-no_country_or_sea.yaml new file mode 100644 index 0000000000..e2eb30bdcc --- /dev/null +++ b/src/data/invalid/GeoLocationNameValue-no_country_or_sea.yaml @@ -0,0 +1,4 @@ +has_raw_value: "USA: Maryland: Bethesda" +subnational_1_or_ocean_region: Maryland +locality: Bethesda +type: nmdc:GeoLocationNameValue diff --git a/src/data/invalid/GeoLocationNameValue-no_raw_value.yaml b/src/data/invalid/GeoLocationNameValue-no_raw_value.yaml new file mode 100644 index 0000000000..07c89bb026 --- /dev/null +++ b/src/data/invalid/GeoLocationNameValue-no_raw_value.yaml @@ -0,0 +1,4 @@ +country_or_sea: USA +subnational_1_or_ocean_region: Maryland +locality: Bethesda +type: nmdc:GeoLocationNameValue diff --git a/src/data/valid/GeoLocationNameValue-1.yaml b/src/data/valid/GeoLocationNameValue-1.yaml new file mode 100644 index 0000000000..c12f084b8a --- /dev/null +++ b/src/data/valid/GeoLocationNameValue-1.yaml @@ -0,0 +1,5 @@ +has_raw_value: "USA: Maryland: Bethesda" +country_or_sea: USA +subnational_1_or_ocean_region: Maryland +locality: Bethesda +type: nmdc:GeoLocationNameValue diff --git a/src/data/valid/GeoLocationNameValue-2.yaml b/src/data/valid/GeoLocationNameValue-2.yaml new file mode 100644 index 0000000000..4d70f94f65 --- /dev/null +++ b/src/data/valid/GeoLocationNameValue-2.yaml @@ -0,0 +1,5 @@ +has_raw_value: "USA: Bethesda, Maryland" +country_or_sea: USA +subnational_1_or_ocean_region: Maryland +locality: Bethesda +type: nmdc:GeoLocationNameValue diff --git a/src/data/valid/GeoLocationNameValue-3.yaml b/src/data/valid/GeoLocationNameValue-3.yaml new file mode 100644 index 0000000000..52dfd3fc1a --- /dev/null +++ b/src/data/valid/GeoLocationNameValue-3.yaml @@ -0,0 +1,4 @@ +has_raw_value: "Atlantic Ocean: Charlie Gibbs Fracture Zone" +country_or_sea: Atlantic Ocean +subnational_1_or_ocean_region: Charlie Gibbs Fracture Zone +type: nmdc:GeoLocationNameValue diff --git a/src/schema/attribute_values.yaml b/src/schema/attribute_values.yaml index 4e55073ecf..ad9fe12d72 100644 --- a/src/schema/attribute_values.yaml +++ b/src/schema/attribute_values.yaml @@ -15,11 +15,31 @@ classes: class_uri: 'nmdc:AttributeValue' description: >- The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic - value and the structured value + value and the structured value. slots: - has_raw_value - type + GeoLocationNameValue: + class_uri: nmdc:GeoLocationNameValue + description: >- + A structured address-like record of where something is located, came from, or took place + comments: + - When an instance of this class is populated via an ETL pipeline, the raw value from the source should go into this class's has_raw_value slot. + The ETL pipeline can parse that raw value and insert the results into this class's other slots. + - "This class is more useful that string-only representations when data may be ingested from multiple sources, some of which may use a style like 'Nation: State, Locality' and other might use 'Nation: Locality, State'." + - country_or_sea is required. Instances of this class should have either a subnational_1_or_ocean_region or a locality. Preferably both. + is_a: AttributeValue + narrow_mappings: + - mixs:0000010 + slots: + - country_or_sea + - subnational_1_or_ocean_region + - locality + slot_usage: + has_raw_value: + required: true + QuantityValue: class_uri: nmdc:QuantityValue is_a: AttributeValue diff --git a/src/schema/basic_slots.yaml b/src/schema/basic_slots.yaml index 065ce5d2ac..ecf37c1ebe 100644 --- a/src/schema/basic_slots.yaml +++ b/src/schema/basic_slots.yaml @@ -22,6 +22,56 @@ default_range: string slots: + country_or_sea: + range: string + description: A maximally coarse-grained, geo-political description of where something is, came from, took place, etc. + comments: + - MIxS states that country or sea names should be chosen from the INSDC country list aka the geo_loc_name-qualifier-vocabulary + - https://www.insdc.org/submitting-standards/geo_loc_name-qualifier-vocabulary/ + - https://www.cia.gov/the-world-factbook/ and http://unstats.un.org/unsd/methods/m49/m49.htm + todos: + - "ValueError: https://www.cia.gov/the-world-factbook/ and http://unstats.un.org/unsd/methods/m49/m49.htm is not a valid URI or CURIE" + - when populating see_also + examples: + - value: "Canada" + - value: "Atlantic Ocean" + - value: "USA" + narrow_mappings: + - mixs:0000010 + rank: 1 + required: true + + subnational_1_or_ocean_region: + range: string + description: A geo-political description of a region that is a direct, unambiguous subdivision of a paired country_or_sea value, + OR an unambiguous region or feature within an ocean or "sea". + comments: + - "INSDC's guidance could be taken to imply that Vancouver or Cote d’Azur are appropriate values for this slot, but NMDC would place those in the locality slot." + aliases: + - state + examples: + - value: "British Columbia" + - value: "Charlie Gibbs Fracture Zone" + - value: "Maryland" + narrow_mappings: + - mixs:0000010 + rank: 2 + recommended: true + + locality: + range: string + description: A geo-political description of a region or feature that is within or overlaps with a paired subnational_1_or_ocean_region value. + aliases: + - state + examples: + - value: "Vancouver" + - value: "Cote d’Azur" + - value: "Bethesda" + narrow_mappings: + - mixs:0000010 + rank: 3 + recommended: true + qc_comment: range: string description: >- @@ -598,27 +648,27 @@ enums: Virus Summary: description: Tab separated file listing the viruses found by geNomad. - see_also: + see_also: - https://portal.nersc.gov/genomad/ annotations: file_name_pattern: '^_virus_summary\.tsv?$' Plasmid Summary: description: Tab separated file listing the plasmids found be geNomad. - see_also: + see_also: - https://portal.nersc.gov/genomad/ annotations: file_name_pattern: '^_plasmid_summary\.tsv?$' - + GeNomad Aggregated Classification: description: >- Tab separated file which combines the results from neural network-based classification and marker-based classification for virus and plasmid detection with geNomad. - see_also: + see_also: - https://portal.nersc.gov/genomad/ annotations: - file_name_pattern: '^_aggregated_classification\.tsv?$' - + file_name_pattern: '^_aggregated_classification\.tsv?$' + Reference Calibration File: description: A file that contains data used to calibrate a natural organic matter or metabalomics analysis. @@ -1090,12 +1140,12 @@ enums: title: Metagenome metatranscriptome: aliases: - - metaT + - metaT title: Metatranscriptome amplicon_sequencing_assay: meaning: OBI:0002767 title: Amplicon - + MassSpectrometryEnum: permissible_values: metaproteome: @@ -1106,13 +1156,13 @@ enums: aliases: - metaB title: Metabolome - lipidome: + lipidome: title: Lipidome nom: aliases: - natural organic matter title: Natural Organic Matter - + ExtractionTargetEnum: permissible_values: DNA: { }