This notebook shows how to generate OSC Projects, Products and Workflows using pystac. EarthCODE provides a GUI editor that offers this and more functionality, including a user interface. However, if you decide to manually create items, using a library like pystac can save some time. The code described here does not carry out all the required steps to pass the automated OSC validation. For example, you still have to generate all return links as described in the manual PR tutorial. You’ll also have to manually open the PR in the end.
NOTE: Before you run the notebook you’ll need a fork of the open-science-catalog-metadata repository. See the Manual PR Tutorial about how to do it.
Import libraries¶
import pystac
from datetime import datetime
from pystac.extensions.projection import ProjectionExtension
Get all entries from the Open Science Catalog¶
# read the catalog root
catalog = pystac.Catalog.from_file('../../open-science-catalog-metadata/catalog.json')
# access the list of the themes in open science catalog
themes = catalog.get_child('themes')
allowed_themes = [child.id for child in themes.get_children()]
# access the list of available ESA missions
missions = catalog.get_child('eo-missions')
allowed_missions = [child.id for child in missions.get_children()]
# access the list of avaiable variables
variables = catalog.get_child('variables')
allowed_variables = [child.id for child in variables.get_children()]
# access the list of existing projects, products and workflows
products = catalog.get_child('products')
projects = catalog.get_child('projects')
workflows = catalog.get_child('workflows')
Define helper functions | Add new variables, theme and eo missions¶
def add_product_variables(collection, variables_to_add):
'''Add variables to the collection custom fields and add links to the missions collection.'''
for variable in variables_to_add:
assert variable in allowed_variables
# add the correct link
collection.add_link(
pystac.Link(rel="related",
target=variables.get_child(variable).get_links('self')[0].href,
media_type="application/json",
title=f"Variable: {variables.get_child(variable).title}")
)
# Add themes to the custom fields
collection.extra_fields.update({
"osc:variables": variables_to_add
})
def add_themes(collection, themes_to_add):
'''Add themes to the collection custom fields and add links to the themes collection.'''
themes_list = []
for theme in themes_to_add:
assert theme in allowed_themes
# add the correct link
collection.add_link(
pystac.Link(rel="related",
target=themes.get_child(theme).get_links('self')[0].href,
media_type="application/json",
title=f"Theme: {themes.get_child(theme).title}")
)
themes_list.append(
{
"scheme": "https://github.com/stac-extensions/osc#theme",
"concepts": [{"id": theme}]
}
)
# Add themes to the custom fields
collection.extra_fields.update({
"themes": themes_list
}
)
def add_links(collection, relations, targets, titles):
'''Add links from the collection to outside websites.'''
links = []
for rel, target, title in zip(relations, targets, titles):
links.append(pystac.Link(rel=rel, target=target, title=title)),
collection.add_links(links)
def create_contract(name, roles, emails):
'''Create a contact template'''
contact = {
"name": name,
"roles": [r for r in roles]
}
if emails:
contact['emails'] = [{"value":email} for email in emails]
return contact
def add_product_missions(collection, missions_to_add):
'''Add missions to the collection custom fields and add links to the missions collection.'''
for mission in missions_to_add:
assert mission in allowed_missions
# add the correct link
collection.add_link(
pystac.Link(rel="related",
target=missions.get_child(mission).get_links('self')[0].href,
media_type="application/json",
title=f"EO Mission: {missions.get_child(mission).title}"
)
)
# Add themes to the custom fields
collection.extra_fields.update({
"osc:missions": missions_to_add
}
)
Define helper functions | Create new project collection¶
def create_project_collection(project_id, project_title, project_description,
project_status, extent, project_license):
'''Create project collection template from the provided information.'''
# Create the collection
collection = pystac.Collection(
id=project_id,
description=project_description,
extent=extent,
license=project_license,
title=project_title,
extra_fields = {
"osc:status": project_status,
"osc:type": "project",
"updated": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
},
stac_extensions=[
"https://stac-extensions.github.io/osc/v1.0.0/schema.json",
"https://stac-extensions.github.io/themes/v1.0.0/schema.json",
"https://stac-extensions.github.io/contacts/v0.1.1/schema.json"
]
)
# Add pre-determined links
collection.add_links([
pystac.Link(rel="root", target="../../catalog.json", media_type="application/json", title="Open Science Catalog"),
pystac.Link(rel="parent", target="../catalog.json", media_type="application/json", title="Projects"),
# pystac.Link(rel="self", target=f"https://esa-earthcode.github.io/open-science-catalog-metadata/projects/{project_id}/collection.json", media_type="application/json"),
])
return collection
Define helper functions | Create new product collection¶
def create_product_collection(product_id, product_title, product_description, product_extent, product_license,
product_keywords, product_status, product_region, product_project_id, product_project_title,
product_parameters=None, product_doi=None):
collection = pystac.Collection(
id=product_id,
title=product_title,
description=product_description,
extent=product_extent,
license=product_license,
keywords=product_keywords,
stac_extensions=[
"https://stac-extensions.github.io/osc/v1.0.0/schema.json",
"https://stac-extensions.github.io/themes/v1.0.0/schema.json",
"https://stac-extensions.github.io/cf/v0.2.0/schema.json"
],
)
# Add pre-determined links
collection.add_links([
pystac.Link(rel="root", target="../../catalog.json", media_type="application/json", title="Open Science Catalog"),
pystac.Link(rel="parent", target="../catalog.json", media_type="application/json", title="Products"),
# pystac.Link(rel="self", target=f"https://esa-earthcode.github.io/open-science-catalog-metadata/products/{project_id}/collection.json", media_type="application/json"),
pystac.Link(rel="related", target=f"../../projects/{product_project_id}/collection.json", media_type="application/json", title=f"Project: {product_project_title}"),
])
# Add extra properties
collection.extra_fields.update({
"osc:project": product_project_id,
"osc:status": product_status,
"osc:region": product_region,
"osc:type": "product",
"created": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
"updated": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
})
if product_doi is not None:
collection.extra_fields["sci:doi"] = product_doi
if product_parameters:
collection.extra_fields["cf:parameter"] = [{"name": p} for p in product_parameters]
return collection
Define helper functions | Create new workflow record¶
def create_workflow_collection(workflow_id, workflow_title,
workflow_description, workflow_license, workflow_extent,
workflow_keywords, workflow_formats, workflow_project, workflow_project_title):
'''Create a workflow collection template from the provided information.'''
# Create the collection
collection = {
'id': workflow_id,
'type': 'Feature',
'geometry': None,
"conformsTo": ["http://www.opengis.net/spec/ogcapi-records-1/1.0/req/record-core"],
"properties": {
"title": workflow_title,
"description": workflow_description,
"osc:type": "workflow",
"osc:project": workflow_project,
"osc:status": "completed",
"formats": [{"name": f} for f in workflow_formats],
"updated": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
"created": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
"keywords": workflow_keywords,
"license": workflow_license,
"version": "1"
},
"linkTemplates": [],
"links": [
{
"rel": "root",
"href": "../../catalog.json",
"type": "application/json",
"title": "Open Science Catalog"
},
{
"rel": "parent",
"href": "../catalog.json",
"type": "application/json",
"title": "Workflows"
},
{
"rel": "related",
"href": f"../../projects/{workflow_project}/collection.json",
"type": "application/json",
"title": f"Project: {workflow_project_title}"
},
]
}
return collection
Create a metadata collection for new project¶
# Define id, title, description, project status, license
project_id = "worldcereal2"
project_title = "WorldCereal2"
project_description = "WorldCereal is an ESA initiative that provides global cropland and crop type maps at 10-meter resolution, offering seasonally updated data on temporary crops, croptypes (maize, winter cereals and spring cereals), and irrigation."
project_status = "completed"
project_license = 'proprietary'
# Define spatial and temporal extent
spatial_extent = pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]])
temporal_extent = pystac.TemporalExtent([[datetime(2021, 1, 1), datetime(2021, 12, 31, 23, 59, 59)]])
extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
# Define links and link titles
project_link_targets = ["https://esa-worldcereal.org/en",
"https://eo4society.esa.int/projects/worldcereal-global-crop-monitoring-at-field-scale/"]
project_link_relations = ["via", "via"]
project_link_titles = ["Website", "EO4Society Link"]
# Define project themes
project_themes = ["land"]
# contacts
project_contracts_info = [
("Zoltan Szantoi", ["technical_officer"], ["Zoltan.Szantoi@esa.int"]),
("VITO Remote Sensing", ["consortium_member"], None)
]
collection = create_project_collection(project_id, project_title, project_description,
project_status, extent, project_license)
# add links
add_links(collection, project_link_relations, project_link_targets, project_link_titles)
## add themes
add_themes(collection, project_themes)
# Add contacts
collection.extra_fields.update({
"contacts": [create_contract(*info) for info in project_contracts_info]
})
collection.validate()
collection
# save this file and copy it to the catalog/projects/{project}/collection.json
collection.save_object(dest_href='project_collection.json')
# optionally run this code to transfer the generated file to the OSC folder, ready to be commited.
!mkdir -p ../open-science-catalog-metadata-staging/projects/worldcereal2/
!cp project_collection.json ../open-science-catalog-metadata-staging/projects/worldcereal2/collection.json
Create a metadata collection for new product¶
product_id = "worldcereal-crop-extent-belgium2"
product_title = "WorldCereal Crop Extent - Belgium2"
product_description = "WorldCereal is an ESA initiative that provides global cropland and crop type maps at 10-meter resolution, offering seasonally updated data on temporary crops, croptypes (maize, winter cereals and spring cereals), and irrigation. This dataset provides the outputs for Belgium."
product_keywords = [
"Crops",
"Cereal"
]
product_status = "ongoing"
product_license = "proprietary"
# Define spatial and temporal extent
product_spatial_extent = pystac.SpatialExtent([[2.5135, 49.529, 6.156, 51.475]])
product_temporal_extent = pystac.TemporalExtent([[datetime(2021, 1, 1), datetime(2021, 12, 31, 23, 59, 59)]])
product_extent = pystac.Extent(spatial=product_spatial_extent, temporal=product_temporal_extent)
product_region = "Belgium"
product_themes = ["land"]
product_missions = [ "sentinel-2"]
product_variables = [ "crop-yield-forecast" ]
product_parameters = [ "crop-yield-forecast" ]
product_project_id = "worldcereal2"
product_project_title = "WorldCereal2"
product_doi = "https://doi.org/10.57780/s3d-83ad619"
# define links to add
product_target_relations = ['child', 'via', 'via']
product_target_links = ['https://eoresults.esa.int/stac/collections/sentinel3-ampli-ice-sheet-elevation',
'https://eoresults.esa.int/browser/#/external/eoresults.esa.int/stac/collections/sentinel3-ampli-ice-sheet-elevation',
'https://eoresults.esa.int/d/sentinel3-ampli-ice-sheet-elevation/2025/05/07/sentinel-3-ampli-user-handbook/S3_AMPLI_User_Handbook.pdf']
product_target_titles = ['PRR link', 'Access', 'Documentation']
product_collection = create_product_collection(
product_id, product_title, product_description, product_extent,
product_license, product_keywords, product_status, product_region,
product_project_id, product_project_title, product_parameters, product_doi)
# add themes
add_themes(product_collection, product_themes)
add_product_missions(product_collection, product_missions)
add_product_variables(product_collection, product_variables)
# add links
add_links(product_collection,
product_target_relations,
product_target_links,
product_target_titles
)
product_collection.validate()
product_collection
# save this file and copy it to the catalog/products/{product_id}/collection.json
product_collection.save_object(dest_href='product_collection.json')
# optionally run this code to transfer the generated file to the OSC folder, ready to be commited.
!mkdir -p ../open-science-catalog-metadata-staging/products/worldcereal-crop-extent-belgium2/
!cp product_collection.json ../open-science-catalog-metadata-staging/products/worldcereal-crop-extent-belgium2/collection.json
Create a metadata collection for new workflow¶
workflow_id = "worldcereal-workflow2"
workflow_title="ESA worldcereal global crop extent detector2"
workflow_description="Detects crop land at 10m resolution, trained for global use. Based on Sentinel-1 and 2 data..."
workflow_license = "proprietary"
workflow_keywords= ["agriculture", "crops"]
workflow_formats = ["GeoTIFF"]
workflow_project = "worldcereal2"
workflow_project_title = "WorldCereal2"
workflow_themes = ['land']
# Define spatial and temporal extent
spatial_extent = pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]])
temporal_extent = pystac.TemporalExtent([[datetime(2022, 2, 1), datetime(2026, 1, 31, 23, 59, 59)]])
workflow_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent)
# add custom theme schemas
workflow_contracts_info = [
("Marie-Helene Rio", ["technical_officer"], ["marie-helene.rio@esa.int"]),
("CNR-INSTITUTE OF MARINE SCIENCES-ISMAR (IT)", ["consortium_member"], None),
("+ATLANTIC – Association for an Atla (PT)", ["consortium_member"], None),
]
workflow_collection = create_workflow_collection(workflow_id, workflow_title,
workflow_description, workflow_license, workflow_extent,
workflow_keywords, workflow_formats, workflow_project, workflow_project_title)
# add contacts
workflow_collection['properties'].update({
"contacts": [create_contract(*info) for info in workflow_contracts_info]
})
workflow_collection['properties']['themes'] = [
{
"scheme": "https://github.com/stac-extensions/osc#theme",
"concepts": [{"id": t} for t in workflow_themes]
}
]
for t in workflow_themes:
workflow_collection['links'].append(
{
"rel": 'related',
"href": f"../../{t}/land/catalog.json",
"type": "application/json",
"title": f'Theme: {t.capitalize()}'
}
)
workflow_target_relations = ['openeo-process', 'git', 'service']
workflow_target_links = ['https://raw.githubusercontent.com/WorldCereal/worldcereal-classification/refs/tags/worldcereal_crop_extent_v1.0.1/src/worldcereal/udp/worldcereal_crop_extent.json',
'https://github.com/WorldCereal/worldcereal-classification.git',
'https://openeofed.dataspace.copernicus.eu']
workflow_target_titles = ['openEO Process Definition', 'Git source repository', 'CDSE openEO federation']
for rel, link, title in zip(workflow_target_relations, workflow_target_links, workflow_target_titles):
workflow_collection['links'].append(
{
"rel": rel,
"href": link,
"type": "application/json",
"title": title
}
)
import json
with open('record.json', 'w') as f:
json.dump(workflow_collection, f)
# optionally run this code to transfer the generated file to the OSC folder, ready to be commited.
!mkdir -p ../open-science-catalog-metadata-staging/workflows/worldcereal-workflow2/
!cp record.json ../open-science-catalog-metadata-staging/workflows/worldcereal-workflow2/record.json