This notebook shows how to
Combine a number of .nc files into a .zarr
generate a valid STAC collection, which is a requirement to upload research outcomes to the ESA Project Results Repository (PRR). The code below demonstrates how to perform the necessary steps using real data from the ESA project SMART-CH4. The focus of SMART-CH4 is to build upon previous experience and projects in satellite-based methane quantification, aiming to enhance emission products derived from satellites.
Check the EarthCODE documentation, and PRR STAC introduction example for a more general introduction to STAC and the ESA PRR.
🔗 Check the project website: SMART-CH4 – Website
🔗 Check the eo4society page: SMART-CH4 – eo4society
1. Combine the data into a single .zarr file¶
# import libraries
import xarray as xr
from pystac import Item, Collection
import pystac
from datetime import datetime
from shapely.geometry import box, mapping
from xstac import xarray_to_stac
import glob
import json
import shapely
import numpy as np
import geopandas as gpd
import pandas as pd
import os
import scipyimport glob
datadir = '/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/*'
subdirectories = glob.glob(datadir)
subdirectories['/home/etapin/Téléchargements/5_zipped/data',
 '/home/etapin/Téléchargements/5_zipped/metadata']agw_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-agw' + '/*')
bb_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-bb' + '/*')
ff_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-ff' + '/*')
nat_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-nat' + '/*')
wet_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-wet' + '/*')
short_names = ["agw", "bb", "ffg", "nat", "wet"]
long_names = [
    "agricultural and waste",
    "biomass burning",
    "fossil fuel and geological",
    "natural non-wetland",
    "wetlands"
]
subsector_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-subsectors' + '/*')dss = []
for sname,lname,files in zip(short_names, long_names, [agw_files, bb_files, ff_files, nat_files, wet_files]):
    print(files)
    ds = xr.open_mfdataset(
        files,
        concat_dim="time",
        combine="nested",
        compat="equals",
        join="exact"
    )
    # replace acronym with long name
    ds.sign_d13c.attrs['long_name'] = ds.sign_d13c.long_name.replace(sname.upper(), lname)
    # assign all the sigma values to each variable
    ds.sign_d13c.attrs.update({k:v for k,v in ds.attrs.items() if k.startswith('sigma_')})
    # rename sign_d13c to have accronym extension
    ds = ds.rename_vars({'sign_d13c': f'sign_d13c_{sname}'})
    ds.attrs = {
        "title": "d13C(CH4) isotopic signature",
        "history": "Sub-sectors aggregated into sectors (flux-weighted). NaN values filled with flux-weighted global mean.",
        "time_period": "Monthly data for 1998-2022",
        "fluxes_source_for_flux_weighted": "AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, COAL, LANDFILLS, LIVESTOCK, OILGASIND, RICE, WASTE_WATER: EDGARv8 (Crippa et al. 2023), GEOLOGICAL, OCEANIC, TERMITES, WETLAND: Prior fluxes provided with the Global Methane budget inversion protocol (Martinez et al. 2024)",
        "signature_source" : "LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. AL (2021), WASTE_WATER, LANDFILLS, AGRICULTURAL_WASTE, RICE: MENOUD ET AL. (2022), COAL, OILGASIND: LAN ET. AL (2021), MENOUD ET AL. (2022), WETLAND: OH ET AL. (2022), OCEANIC: SANSONE ET AL. (2001), TERMITES: THOMPSON ET AL. (2018), GEOLOGICAL: ETIOPE ET AL. (2019)",
        "created_by": "Emeline Tapin on 29 September 2025",
        "creator_email": "emeline.tapin@lsce.ipsl.fr",
        "institution": "LSCE (Laboratoire des Sciences du Climat et de l’Environnement)",
        "conventions": "CF-1.9"
    }
    
    dss.append(ds)['/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2022.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2021.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2020.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2002.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2016.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2011.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2017.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2013.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2001.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2008.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2009.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2012.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2000.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.1998.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2006.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2018.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2004.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2005.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2010.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2003.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2007.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2014.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2019.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.1999.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2015.1x1.nc']
['/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2020.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2016.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2010.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2003.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2019.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2014.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2007.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2021.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2005.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2002.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2000.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2017.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2009.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2008.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2011.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.1999.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2001.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2013.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2004.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2022.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.1998.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2015.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2012.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2006.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2018.1x1.nc']
['/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2004.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2005.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2001.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2015.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2006.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2021.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2009.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2020.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2008.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2014.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2016.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2010.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2019.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.1999.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2017.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2022.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.1998.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2012.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2013.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2003.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2000.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2002.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2018.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2011.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2007.1x1.nc']
['/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2012.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.1998.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2013.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2015.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2008.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2009.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2004.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2001.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2021.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2014.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2016.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2006.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2019.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2018.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2010.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2003.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2000.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2022.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2017.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2007.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.1999.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2011.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2020.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2002.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2005.1x1.nc']
['/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2015.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2003.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2018.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2000.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2008.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2006.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2010.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2002.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2014.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2022.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2020.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2004.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.1998.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2001.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2021.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2009.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2019.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2011.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.1999.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2007.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2013.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2016.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2005.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2012.1x1.nc', '/home/etapin/Téléchargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2017.1x1.nc']
ds = xr.merge(dss)
ds### Process subsectors
# Load and process all subsector datasets
subsector_datasets = []
for subsector_file in subsector_files:
    # Extract filename and subsector name
    filename = os.path.basename(subsector_file)
    subsector_name = filename.split('.')[1]
    print(f"Processing subsector: {subsector_name}")
    subsector_ds = xr.open_dataset(
        subsector_file,
        chunks={"time": 12, "latitude": 180, "longitude": 360}
    )
    # Check and align the time dimension
    if 'time' in subsector_ds.dims:
        if len(subsector_ds.time) == 24: # for annual data
            subsector_ds = subsector_ds.reindex(
                time=ds.time, method=None, fill_value=np.nan
            )
            subsector_ds = subsector_ds.ffill(dim='time')
        else: 
            subsector_ds = subsector_ds.interp(time=ds.time, method='linear')
    else:
        # Replicate the data along the main dataset time dimension
        subsector_ds_list = [subsector_ds for _ in range(len(ds.time))]
        subsector_ds = xr.concat(subsector_ds_list, dim='time')
        subsector_ds = subsector_ds.assign_coords(time=ds.time)
    # Reorder dimensions to match (time, latitude, longitude)
    subsector_ds = subsector_ds.transpose("time", "latitude", "longitude")
    # Define standardized variable name
    var_name = f'sign_d13c_sub_sector_{subsector_name}'
    subsector_ds = subsector_ds.rename({'sign_d13c': var_name})
    # Harmonize variable attributes
    subsector_ds[var_name].attrs['long_name'] = f"Monthly δ13C(CH4) isotopic signature for {subsector_name.replace('_', ' ')}"
    subsector_ds[var_name].attrs.update({k:v for k,v in subsector_ds.attrs.items() if k.startswith('std_')})
    subsector_datasets.append(subsector_ds)
# Clean and consistent merge with main dataset
print("\nMerging subsector data with main dataset...")
for subsector_ds in subsector_datasets:
    ds = xr.merge([ds, subsector_ds], compat='override', join='exact')
    
# Rechunk all variables to the same chunk size
ds = ds.chunk({"time": 12, "latitude": 90, "longitude": 180})
# Final verification
print(f"\n Final dataset now contains {len(ds.data_vars)} variables:")
for var in ds.data_vars:
    v = ds[var]
    print(f"  - {var}: {v.dims}, dtype={v.dtype}, chunks={getattr(v.data, 'chunks', None)}")Processing subsector: geological
Processing subsector: coal
Processing subsector: termites
Processing subsector: oceanic
Processing subsector: rice
Processing subsector: landfills
Processing subsector: agricultural_waste
Processing subsector: livestock
Processing subsector: wetlands
Processing subsector: biomassbiofuels
Processing subsector: waste_water
Processing subsector: oilandgas
Merging subsector data with main dataset...
 Final dataset now contains 17 variables:
  - sign_d13c_agw: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_bb: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_ffg: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_nat: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_wet: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_geological: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_coal: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_termites: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_oceanic: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_rice: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_landfills: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_agricultural_waste: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_livestock: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_wetlands: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_biomassbiofuels: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_waste_water: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
  - sign_d13c_sub_sector_oilandgas: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))
2. Generate metadata¶
The root STAC Collection provides a general description of all project outputs which will be stored on the PRR. The PRR STAC Collection template enforces some required fields that you need to provide in order to build its valid description. Most of these metadata fields should already be available and can be extracted from your data.
# create the parent collection
collectionid = "d13c-ch4-signatures-smart-ch4"
collection = Collection.from_dict(
    
{
  "type": "Collection",
  "id": collectionid,
  "stac_version": "1.1.0",
  "title": "Global delta-13C(CH4) Source Signatures",
  "description": "This dataset provides monthly gridded δ13C(CH₄) isotopic signatures (‰ vs V-PDB) for five methane emission sectors: Agricultural & Waste (AGW), Fossil Fuel & Geological (FFG), Biomass Burning (BB), Natural non-wetland (NAT), and Wetlands (WET), and their subsequent sub-sectors, at 1°×1° spatial resolution for the period 1998–2022. Each sector file contains monthly flux-weighted δ13C(CH₄) signatures computed from prior CH₄ emissions used in the Global Methane Budget inversion protocol (Martinez et al., 2024), EDGARv8 (Crippa et al., 2023), and GFED4s (van Wees et al., 2022). Sub-sector δ13C-CH₄ source values are derived from recent literature (Lan et al., 2021; Menoud et al., 2022; Oh et al., 2022; Thompson et al., 2018; Etiope et al., 2019; Sansone et al., 2001). The dataset also includes uncertainty components: propagated (σprop), aggregation (σagg), and total (σtotal), expressed in ‰ vs V-PDB. Data are suitable for use in atmospheric inversion and isotope modeling frameworks.",
  "extent": {
    "spatial": {
      "bbox": [
         [-180, -90, 180, 90]
      ]
    },
    "temporal": {
      "interval": [
        [
          "1998-01-01T00:00:00Z",
          "2022-12-31T23:59:59Z"
        ]
      ]
    }
  },
  "license": "CC-BY-4.0",
  "links": []
}
)
collection # visualise the metadata of your collection Create STAC Items and STAC Assets from original dataset¶
The second step is to describe the different files as STAC Items and Assets. Take your time to decide how your data should be categorised to improve usability of the data, and ensure intuitive navigation through different items in the collections. There are multiple strategies for doing this and this tutorial demonstrate one of the possible ways of doing that. Examples of how other ESA projects are doing this are available in the EarthCODE documentation .
bbox = [-180, -90, 180, 90]
geometry = json.loads(json.dumps(shapely.box(*bbox).__geo_interface__))template = {
    "id": f"{collectionid}-zarr",
    "type": "Feature",
    "stac_version": "1.0.0",
    "properties": {
        "title": 'd13C(CH4) isotopic signatures',
        "history":ds.attrs['history'],
        'time_period': "Monthly data from 1998 to 2022",
        "fluxes_source_for_flux_weighted": ds.attrs['fluxes_source_for_flux_weighted'],
        "signature_source": ds.attrs['signature_source'],
        "start_datetime": "1998-01-01T00:00:00Z",
        "end_datetime": "2022-12-31T23:59:59Z",
        "license": "CC-BY-4.0",
        'Conventions': 'CF-1.9',
        "references": [
      "Lan et al. (2021)",
      "Menoud et al. (2022)",
      "Oh et al. (2022)",
      "Thompson et al. (2018)",
      "Etiope et al. (2019)",
      "Sansone et al. (2001)",
      "Crippa et al. (2023)",
      "Martinez et al. (2024)"
    ],
    },
    "geometry": geometry,
    "bbox": bbox,
    "assets": {
        "data": {
            "href": f"./{collectionid}/ds.zarr",  # or local path
            "type": "application/vnd+zarr",
            "roles": ["data"],
            "title": "Zarr Store of d13C(CH4) signatures"
        }
    }
}
# 3. Generate the STAC Item
item = xarray_to_stac(
    ds,
    template,
    temporal_dimension="time" if 'time' in ds.coords else False,
    x_dimension='longitude',
    y_dimension='latitude',
    reference_system=False
)
# validate and add the STAC Item to the collection
item.validate()
collection.add_item(item)collectionitemds# save the full self-contained collection
collection.normalize_and_save(
    root_href=f'../../prr_preview/{collectionid}',
    catalog_type=pystac.CatalogType.SELF_CONTAINED
)collection# verify data is correctly alignedassert np.isclose(
    #original data
    xr.open_dataset(ff_files[3]).sign_d13c.sel(time='2001-05-01').values,
    # data from the zarr store
    ds.sign_d13c_ffg.sel(time='2001-05-01').values
).all()ds.to_zarr('../../data/d13c-ch4-signatures-smart-ch4.zarr', mode='w')Acknowledgments¶
We gratefully acknowledge the SMART-CH4 for providing access to the data used in this example, as well as their support in creating it.