from hestia_earth.schema import NodeType, SiteSiteType, TermTermType, CycleFunctionalUnit
from hestia_earth.utils.tools import flatten, list_sum
from hestia_earth.utils.date import diff_in_days, is_in_days
from hestia_earth.utils.model import find_term_match, filter_list_term_type
from hestia_earth.utils.lookup import get_table_value, download_lookup, column_name
from hestia_earth.validation.utils import (
_filter_list_errors, _find_linked_node, _value_average, _list_sum_terms, is_same_product
)
from hestia_earth.validation.terms import get_crop_residue_terms
from .aggregated_shared import (
validate_quality_score_min
)
from .shared import (
validate_dates, validate_list_dates, validate_list_dates_after, validate_date_lt_today, validate_list_min_below_max,
validate_list_term_percent, validate_linked_source_privacy, validate_list_dates_length, validate_list_date_lt_today,
validate_list_model, validate_list_model_config, validate_list_dates_format,
validate_list_duplicate_values, validate_private_has_source, validate_list_value_between_min_max,
validate_duplicated_term_units
)
from .animal import validate_has_animals
from .emission import validate_linked_terms, validate_method_not_relevant, validate_methodTier_not_relevant
from .input import (
validate_must_include_id, validate_input_country, validate_related_impacts, validate_input_distribution_value,
validate_animalFeed_requires_isAnimalFeed
)
from .practice import (
validate_cropResidueManagement, validate_longFallowPeriod, validate_excretaManagement, validate_no_tillage,
validate_tillage_site_type, validate_tillage_values, validate_liveAnimal_system, validate_pastureGrass_key_units,
validate_has_pastureGrass, validate_pastureGrass_key_value, validate_waterRegime, validate_defaultValue
)
from .product import (
validate_economicValueShare, validate_value_empty, validate_value_0, validate_excreta,
validate_primary as validate_product_primary, validate_product_ha_functional_unit_ha, validate_product_yield,
validate_liveAnimal_requires_excreta
)
from .completeness import validate_completeness
from .transformation import (
validate_previous_transformation, validate_transformation_excretaManagement, validate_linked_emission
)
from .property import (
validate_all as validate_properties,
validate_volatileSolidsContent
)
SITE_TYPES_CROP_RESIDUE = [
SiteSiteType.CROPLAND.value,
SiteSiteType.GLASS_OR_HIGH_ACCESSIBLE_COVER.value
]
SITE_TYPES_NOT_1_HA = [
SiteSiteType.AGRI_FOOD_PROCESSOR.value
]
PRODUCTS_MODEL_CONFIG = {
'aboveGroundCropResidueTotal': {
'level': 'warning',
'model': 'ipcc2006',
'delta': 0.5,
'resetDataCompleteness': True
}
}
DUPLICATED_TERM_UNITS_TERM_TYPES = [
TermTermType.ANIMALPRODUCT,
TermTermType.ORGANICFERTILISER
]
[docs]def validate_functionalUnit_not_1_ha(cycle: dict, site: dict):
site_type = site.get('siteType')
value = cycle.get('functionalUnit')
forbidden = CycleFunctionalUnit._1_HA.value
return site_type not in SITE_TYPES_NOT_1_HA or value != forbidden or {
'level': 'error',
'dataPath': '.functionalUnit',
'message': f"must not be equal to {forbidden}",
'params': {
'siteType': site_type
}
}
[docs]def validate_cycle_dates(cycle: dict):
return validate_dates(cycle) or {
'level': 'error',
'dataPath': '.endDate',
'message': 'must be greater than startDate'
}
def _should_validate_cycleDuration(cycle: dict):
return 'cycleDuration' in cycle and is_in_days(cycle.get('startDate')) and is_in_days(cycle.get('endDate'))
[docs]def validate_cycleDuration(cycle: dict):
duration = diff_in_days(cycle.get('startDate'), cycle.get('endDate'))
return duration == round(cycle.get('cycleDuration'), 1) or {
'level': 'error',
'dataPath': '.cycleDuration',
'message': f"must equal to endDate - startDate in days (~{duration})"
}
[docs]def validate_sum_aboveGroundCropResidue(products: list):
prefix = 'aboveGroundCropResidue'
total_residue_index = next((n for n in range(len(products)) if 'Total' in products[n].get(
'term', {}).get('@id') and products[n].get('term', {}).get('@id').startswith(prefix)), None)
total_residue = None if total_residue_index is None else _value_average(products[total_residue_index])
other_residues = list(filter(lambda n: n.get('term').get('@id').startswith(prefix)
and 'Total' not in n.get('term').get('@id'), products))
other_residues_ids = list(map(lambda n: n.get('term').get('@id'), other_residues))
other_sum = sum([_value_average(node) for node in other_residues])
return total_residue_index is None or len(other_residues) == 0 or (total_residue * 1.01) >= other_sum or {
'level': 'error',
'dataPath': f".products[{total_residue_index}].value",
'message': f"must be more than or equal to ({' + '.join(other_residues_ids)})"
}
def _crop_residue_fate(cycle: dict):
practices = filter_list_term_type(cycle.get('practices', []), TermTermType.CROPRESIDUEMANAGEMENT)
products = filter_list_term_type(cycle.get('products', []), TermTermType.CROPRESIDUE)
terms = get_crop_residue_terms()
above_terms = list(filter(lambda term: term.startswith('above'), terms))
sum_above_ground = _list_sum_terms(products, above_terms)
below_terms = list(filter(lambda term: term.startswith('below'), terms))
sum_below_ground = _list_sum_terms(products, below_terms)
return (practices, sum_above_ground, sum_below_ground)
[docs]def validate_crop_residue_complete(cycle: dict, site: dict):
def validate():
practices, sum_above_ground, sum_below_ground = _crop_residue_fate(cycle)
return all([len(practices) > 0, sum_above_ground > 0, sum_below_ground > 0]) or {
'level': 'error',
'dataPath': '',
'message': 'must specify the fate of cropResidue',
'params': {
'siteType': SITE_TYPES_CROP_RESIDUE
}
}
data_complete = cycle.get('completeness', {}).get(TermTermType.CROPRESIDUE.value, False)
site_type = site.get('siteType')
return not data_complete or site_type not in SITE_TYPES_CROP_RESIDUE or validate()
[docs]def validate_crop_residue_incomplete(cycle: dict, site: dict):
def validate():
practices, sum_above_ground, sum_below_ground = _crop_residue_fate(cycle)
return any([len(practices) > 0, sum_above_ground > 0, sum_below_ground > 0]) or {
'level': 'warning',
'dataPath': '',
'message': 'should specify the fate of cropResidue',
'params': {
'siteType': SITE_TYPES_CROP_RESIDUE
}
}
data_complete = cycle.get('completeness', {}).get(TermTermType.CROPRESIDUE.value, False)
site_type = site.get('siteType')
return data_complete or site_type not in SITE_TYPES_CROP_RESIDUE or validate()
[docs]def validate_siteDuration(cycle: dict):
cycleDuration = cycle.get('cycleDuration')
siteDuration = cycle.get('siteDuration')
has_multiple_sites = len(cycle.get('otherSites', [])) > 0
return cycleDuration is None or siteDuration is None or (cycleDuration != siteDuration or {
'level': 'error',
'dataPath': '.siteDuration',
'message': 'must not be equal to cycleDuration'
} if has_multiple_sites else cycleDuration == siteDuration or {
'level': 'error',
'dataPath': '.siteDuration',
'message': 'must be equal to cycleDuration'
})
[docs]def validate_otherSites_cycleDuration(cycle: dict):
cycleDuration = cycle.get('cycleDuration', 0)
siteDuration = cycle.get('siteDuration')
total_duration = list_sum([siteDuration or 0] + cycle.get('otherSitesDuration', [0]))
return siteDuration is None or cycleDuration == total_duration or {
'level': 'error',
'dataPath': '.cycleDuration',
'message': 'must be equal to the sum of siteDuration and otherSitesDuration'
}
def _product_cover_crop(product: dict):
term_id = product.get('term', {}).get('@id')
term_type = product.get('term', {}).get('termType')
lookup = download_lookup(f"{term_type}.csv")
is_cover_crop = get_table_value(lookup, 'termid', term_id, column_name('possibleCoverCrop'))
return not (not is_cover_crop) # convert numpy boolean to boolean
[docs]def validate_possibleCoverCrop(cycle: dict):
cover_crop = find_term_match(cycle.get('practices', []), 'coverCrop', None)
cover_crop_value = cover_crop.get('value', []) if cover_crop else None
has_cover_crop = cover_crop_value is not None and (
len(cover_crop_value) == 0 or (cover_crop_value[0] != 0 and cover_crop_value[0] != 'false')
)
invalid_product = next((p for p in cycle.get('products', []) if not _product_cover_crop(p)), None)
return not has_cover_crop or invalid_product is None or {
'level': 'error',
'dataPath': '',
'message': 'cover crop cycle contains non cover crop product'
}
[docs]def validate_set_treatment(cycle: dict, source: dict):
key = 'treatment'
has_experimentDesign = 'experimentDesign' in source
has_treatment = key in cycle
return not has_experimentDesign or has_treatment or {
'level': 'warning',
'dataPath': f".{key}",
'message': f"should specify a {key} when experimentDesign is specified"
}
[docs]def validate_products_animals(cycle: dict):
products = cycle.get('products', [])
has_liveAnimal = len(filter_list_term_type(products, TermTermType.LIVEANIMAL)) > 0
has_animalProduct = len(filter_list_term_type(products, TermTermType.ANIMALPRODUCT)) > 0
return not all([has_liveAnimal, has_animalProduct]) or {
'level': 'warning',
'dataPath': '.products',
'message': 'should not specify both liveAnimal and animalProduct'
}
def _filter_same_cycle(cycle: dict):
def filter(impact_assessment: dict):
ia_cycle = impact_assessment.get('cycle', {})
return any([
ia_cycle.get('id') and ia_cycle.get('id') == cycle.get('id'),
ia_cycle.get('@id') and ia_cycle.get('@id') == cycle.get('@id')
])
return filter
[docs]def validate_single_linked_impact_assessment(cycle: dict, node_map: dict = {}):
uploaded_impact_assessments = node_map.get(NodeType.IMPACTASSESSMENT.value, {}).values()
related_impact_assessments = list(filter(_filter_same_cycle(cycle), uploaded_impact_assessments))
def validate(values: tuple):
index, product = values
same_products = [v for v in related_impact_assessments if is_same_product(product, v.get('product', {}))]
return len(same_products) <= 1 or {
'level': 'error',
'dataPath': f".products[{index}].term",
'message': 'multiple ImpactAssessment are associated with this Product',
'params': {
'product': product,
'node': {
'type': 'Cycle',
'id': cycle.get('id', cycle.get('@id'))
}
}
}
return _filter_list_errors(flatten(map(validate, enumerate(cycle.get('products', [])))))
[docs]def validate_cycle(cycle: dict, node_map: dict = {}):
"""
Validates a single `Cycle`.
Parameters
----------
cycle : dict
The `Cycle` to validate.
node_map : dict
The list of all nodes to do cross-validation, grouped by `type` and `id`.
Returns
-------
List
The list of errors for the `Cycle`, which can be empty if no errors detected.
"""
site = _find_linked_node(node_map, cycle.get('site', {}))
source = _find_linked_node(node_map, cycle.get('defaultSource', {}))
return flatten([
validate_cycle_dates(cycle),
validate_date_lt_today(cycle, 'startDate'),
validate_date_lt_today(cycle, 'endDate'),
validate_linked_source_privacy(cycle, 'defaultSource', node_map),
validate_private_has_source(cycle, 'defaultSource'),
validate_cycleDuration(cycle) if _should_validate_cycleDuration(cycle) else True,
validate_completeness(cycle, site) if 'completeness' in cycle else True,
validate_siteDuration(cycle),
validate_otherSites_cycleDuration(cycle),
validate_possibleCoverCrop(cycle),
validate_products_animals(cycle),
validate_set_treatment(cycle, source) if source else True,
validate_functionalUnit_not_1_ha(cycle, site) if site else True
]) + flatten(
([
validate_list_model(cycle, 'emissions'),
validate_list_dates(cycle, 'emissions'),
validate_list_dates_after(cycle, 'startDate', 'emissions', ['startDate', 'endDate']),
validate_list_dates_format(cycle, 'emissions'),
validate_list_min_below_max(cycle, 'emissions'),
validate_list_value_between_min_max(cycle, 'emissions'),
validate_list_term_percent(cycle, 'emissions'),
validate_list_dates_length(cycle, 'emissions'),
validate_list_date_lt_today(cycle, 'emissions', ['startDate', 'endDate']),
validate_properties(cycle, 'emissions'),
validate_linked_terms(cycle, 'emissions', 'inputs', 'inputs', True),
validate_linked_terms(cycle, 'emissions', 'transformation', 'transformations', True),
validate_method_not_relevant(cycle, 'emissions'),
validate_methodTier_not_relevant(cycle, 'emissions')
] if len(cycle.get('emissions', [])) > 0 else []) +
([
validate_list_dates(cycle, 'inputs'),
validate_list_dates_after(cycle, 'startDate', 'inputs', ['startDate', 'endDate', 'dates']),
validate_list_dates_format(cycle, 'inputs'),
validate_list_dates_length(cycle, 'inputs'),
validate_list_date_lt_today(cycle, 'inputs', ['startDate', 'endDate']),
validate_list_min_below_max(cycle, 'inputs'),
validate_list_value_between_min_max(cycle, 'inputs'),
validate_list_term_percent(cycle, 'inputs'),
validate_properties(cycle, 'inputs'),
validate_volatileSolidsContent(cycle, 'inputs'),
validate_must_include_id(cycle['inputs']),
validate_input_country(cycle, 'inputs'),
validate_related_impacts(cycle, 'inputs', node_map),
validate_input_distribution_value(cycle, site, 'inputs') if site else True,
validate_animalFeed_requires_isAnimalFeed(cycle, site, 'inputs') if site else True,
validate_duplicated_term_units(cycle, 'inputs', DUPLICATED_TERM_UNITS_TERM_TYPES)
] if len(cycle.get('inputs', [])) > 0 else []) +
([
validate_single_linked_impact_assessment(cycle, node_map),
validate_list_dates(cycle, 'products'),
validate_list_dates_after(cycle, 'startDate', 'products', ['startDate', 'endDate', 'dates']),
validate_list_dates_format(cycle, 'products'),
validate_list_dates_length(cycle, 'products'),
validate_list_date_lt_today(cycle, 'products', ['startDate', 'endDate']),
validate_list_min_below_max(cycle, 'products'),
validate_list_value_between_min_max(cycle, 'products'),
validate_list_term_percent(cycle, 'products'),
validate_economicValueShare(cycle.get('products')),
validate_sum_aboveGroundCropResidue(cycle.get('products')),
validate_value_empty(cycle.get('products')),
validate_value_0(cycle.get('products')),
validate_product_primary(cycle.get('products')),
validate_volatileSolidsContent(cycle, 'products'),
validate_volatileSolidsContent(cycle, 'products'),
validate_crop_residue_complete(cycle, site) if site else True,
validate_crop_residue_incomplete(cycle, site) if site else True,
validate_list_model_config(cycle, 'products', PRODUCTS_MODEL_CONFIG),
validate_liveAnimal_requires_excreta(cycle, 'products'),
validate_excreta(cycle, 'products'),
validate_product_ha_functional_unit_ha(cycle, 'products'),
validate_product_yield(cycle, site, 'products') if site else True,
validate_has_animals(cycle),
validate_duplicated_term_units(cycle, 'products', DUPLICATED_TERM_UNITS_TERM_TYPES)
] if len(cycle.get('products', [])) > 0 else []) +
([
validate_list_dates(cycle, 'practices'),
validate_list_dates_after(cycle, 'startDate', 'practices', ['startDate', 'endDate', 'dates']),
validate_list_dates_format(cycle, 'practices'),
validate_list_date_lt_today(cycle, 'practices', ['startDate', 'endDate']),
validate_list_min_below_max(cycle, 'practices'),
validate_list_value_between_min_max(cycle, 'practices'),
validate_list_term_percent(cycle, 'practices'),
validate_defaultValue(cycle, 'practices'),
validate_cropResidueManagement(cycle.get('practices')),
validate_waterRegime(cycle.get('practices')),
validate_longFallowPeriod(cycle.get('practices')),
validate_volatileSolidsContent(cycle, 'practices'),
validate_list_duplicate_values(cycle, 'practices', 'term.termType', TermTermType.EXCRETAMANAGEMENT.value),
validate_excretaManagement(cycle, cycle.get('practices')),
validate_no_tillage(cycle.get('practices')),
validate_tillage_site_type(cycle.get('practices'), site) if site else True,
validate_tillage_values(cycle.get('practices')),
validate_liveAnimal_system(cycle),
validate_pastureGrass_key_units(cycle, 'practices'),
validate_pastureGrass_key_value(cycle, 'practices'),
validate_has_pastureGrass(cycle, site, 'practices') if site else True
] if len(cycle.get('practices', [])) > 0 else []) +
([
validate_volatileSolidsContent(cycle, 'animals')
] if len(cycle.get('animals', [])) > 0 else []) +
([
validate_list_dates(cycle, 'transformations'),
validate_list_dates_after(cycle, 'startDate', 'transformations', ['startDate', 'endDate']),
validate_list_dates_format(cycle, 'transformations'),
validate_list_date_lt_today(cycle, 'transformations', ['startDate', 'endDate']),
validate_previous_transformation(cycle, 'transformations'),
validate_transformation_excretaManagement(cycle, 'transformations'),
validate_linked_emission(cycle, 'transformations')
] if len(cycle.get('transformations', [])) > 0 else [])
)
[docs]def validate_cycle_aggregated(cycle: dict, node_map: dict = {}):
return flatten([
validate_quality_score_min(cycle)
])