WAPOSAL - Wave power density, zero-crossing wave period, and significant wave height product
This notebook shows how to
Combine a number of .nc files into a .zarr
Generate a valid STAC collection, which is a requirement to upload research outcomes to the ESA Project Results Repository (PRR).
The code below demonstrates how to perform the necessary steps using real data from the ESA project WAPOSAL. The focus of WAPOSAL is to analyse wave power density, zero-crossing wave period, and significant wave height product obtained along track based on Sentinel-3 A/B and Cryosat-2 altimeter data processed by SAMOSA+ retracker.
Check the EarthCODE documentation, and PRR STAC introduction example for a more general introduction to STAC and the ESA PRR.
π Check the project website: WAPOSAL β Website
π Check the eo4society page: WAPOSAL β eo4society
Combine the multiple trajectory files into zarr stores.ΒΆ
from pathlib import Path
import numpy as np
import xarray as xr
import re
import os
from joblib import Parallel, delayed
import pickle# define region information
bbox_ = {}
bbox_['UK'] = [-11.0,47.5,10.0,60.0]
bbox_['BN'] = [3.0,53.0,30.35,72.0]
bbox_['FF'] = [-10.0,43.0,-0.5,49.0]
bbox_['NS'] = [-10.0,42.0,-1.0,45.0]
bbox_['FP'] = [-155.0,-27.0,-134.0,-7.0]
bbox_['MT'] = [-6.0,30.1,41.8,47.4]
bbox_['FG'] = [-54.0,4.1,-51.5,6.0]
bbox_['CN'] = [-19.0,26.0,-10.0,31.0]
bbox_['PT'] = [-10.0,37.0,-8.0,42.0]
bbox_['MD'] = [-18.5,27.0,-13.0,30.0]
bbox_['AZ'] = [-32.0,36.5,-24.0,40.0]
satellite_names = {'S3A': 'Sentinel-3', 'S3B': 'Sentinel-3','CS2': 'CryoSat-2'}
platform_id = {'S3A': 'A', 'S3B': 'B','CS2': ''}
sensor = {'S3A': 'SRAL', 'S3B': 'SRAL','CS2': 'SIRAL'}
zone = {'AZ': 'Azores',
'FG': 'French Guiana',
'FF': 'French Facade',
'UK': 'UK',
'FP': 'French Polinesia',
'CN': 'Canaries',
'PT': 'Portugal',
'NS': 'North Spain',
'BN': 'Baltic and Norway',
'MT': 'Mediterranean',
'MD': 'Madeira'}
epoch_2000 = np.datetime64('2000-01-01T00:00:00')def return_temporal_size(f):
return xr.open_dataset(f, decode_timedelta=False).time.shape[0]
def combine_to_trajectory_zarr(region, satellite, files):
# # define the coordinates shapes
# sizes = Parallel(n_jobs=-1)(delayed(return_temporal_size)(f) for f in files)
# max_obs = max(sizes)
# trajectories_len = len(files)
# obs_len = max_obs
# read coordinate shapes
trajectories_len, obs_len = dimensions[f'{region}-{satellite}']
join_strategy = 'exact' if obs_len is not None else 'outer'
datasets_list = []
for filepath in files:
# open file and drop dim
ds = xr.open_dataset(filepath, decode_times=False)
ds = ds.sel(dim=0)
# rename the 'time' dimension to 'obs'
ds = ds.rename_dims({'time': 'obs'})
# pad to maximum observations
ds = ds.pad(obs=(0, obs_len - ds.obs.size), mode='constant', constant_values=np.nan)
# expand trakectory dimension
ds = ds.expand_dims('trajectory')
# Create the 'trajectory_info' variable
filename = os.path.basename(filepath)
ds['trajectory_info'] = xr.DataArray(
[filename], # Data is the filename
dims=['trajectory'], # Depends on the new dimension
attrs={'long_name': 'trajectory info'} # Add attributes
)
# add to concat list
datasets_list.append(ds)
# combine the datasets
combined_ds = xr.concat(
datasets_list,
dim='trajectory',
join=join_strategy,
fill_value=np.nan
)
del datasets_list
# # update metadata
combined_ds = combined_ds.chunk(trajectory=1000)
start_time = epoch_2000 + np.timedelta64(int(combined_ds.TAI_Time_20Hz.isel(trajectory=0).min(skipna=True).values), 's')
end_time = epoch_2000 + np.timedelta64(int(combined_ds.TAI_Time_20Hz.isel(trajectory=-1).max(skipna=True).values), 's')
combined_ds.attrs.update({
"Conventions" : "CF-1.6/CF-1.7",
"feature_type" :"trajectory",
"start_datetime": str(start_time),
"end_datetime": str(end_time),
"created": '2025-11-14T00:00:00Z',
"description": f"Wave power density, zero-crossing wave period, and significant wave height product obtained along-track from {satellite_names[satellite]} altimeter data processed by SAMOSA+ retracker over the {zone[region]} region.",
"zone": zone[region], # region for example Azores
"platform_name": satellite,
"institution": 'CENTEC-IST-ID',
"license": "CC-BY-SA-4.0",
"min_lon": bbox_[region][0],
"min_lat": bbox_[region][1],
"max_lon": bbox_[region][2],
"max_lat": bbox_[region][3],
"crs": 'epsg:4326',
"referencfe": "https://opensciencedata.esa.int/products/waposal-waves/collection"
})
return combined_dsrootpath = "/mnt/d/data/waposal/extracted/"
regions = ['AZ', 'BN', 'CN', 'FF', 'FG', 'FP', 'MD', 'MT', 'NS', 'PT', 'UK']
satellites = ['S3A', 'S3B', 'CS2']# define the dimensions of each zarr store.
# We need to know this ahead of time, to align all the observations
import gc
from joblib import Parallel, delayed
def return_temporal_size(f):
return xr.open_dataset(f, decode_timedelta=False).time.shape[0]
dimensions = {}
for region in regions[3:]:
for satellite in satellites:
print(region + '-' + satellite)
files = sorted(Path(f'{rootpath}/{region}/{region}/{satellite}/').glob("./*/**/*.nc"))
if len(files) == 0: continue
sizes = Parallel(n_jobs=-1)(delayed(return_temporal_size)(f) for f in files)
obs_len = max(sizes)
trajectories_len = len(files)
dimensions[f'{region}-{satellite}'] = (trajectories_len, obs_len)
del sizes
gc.collect()
with open('dimensions.pkl', 'wb') as f:
pickle.dump(dimensions, f)with open('dimensions.pkl', 'rb') as f:
dimensions = pickle.load(f)dimensions{'AZ-S3A': (1711, 1218),
'AZ-S3B': (1019, 1227),
'AZ-CS2': (1554, 573),
'BN-S3A': (8719, 6882),
'BN-S3B': (5302, 6927),
'BN-CS2': (8632, 5309),
'CN-S3A': (1998, 1735),
'CN-S3B': (1259, 1750),
'FF-S3A': (2274, 2095),
'FF-S3B': (1608, 2112),
'FF-CS2': (4533, 2245),
'FG-S3A': (527, 658),
'FG-S3B': (327, 658),
'FP-S3A': (4864, 6885),
'FP-S3B': (3046, 6956),
'FP-CS2': (1668, 7019),
'MD-S3A': (1248, 1041),
'MD-S3B': (778, 1050),
'MD-CS2': (4406, 2929),
'MT-S3A': (10339, 6263),
'MT-S3B': (6344, 6072),
'MT-CS2': (21047, 5495),
'NS-S3A': (1867, 1046),
'NS-S3B': (1210, 1061),
'NS-CS2': (3453, 1123),
'PT-S3A': (625, 1740),
'PT-S3B': (445, 1740),
'PT-CS2': (4208, 3932),
'UK-S3A': (5322, 4488),
'UK-S3B': (3273, 4434),
'UK-CS2': (9520, 4671)}import gc
for region in regions:
for satellite in satellites:
print(region + '-' + satellite)
files = sorted(Path(f'{rootpath}/{region}/{region}/{satellite}/').glob("./*/**/*.nc"))
if len(files) == 0:
continue
ds = combine_to_trajectory_zarr(region, satellite, files)
ds.to_zarr(f'{rootpath}combined/{region}-{satellite}.zarr', mode='w', write_empty_chunks=False, zarr_format=2)
del ds
gc.collect()## assert values are not changed between the originals and zarr store
for i, f in zip(np.arange(len(files)), files):
true_values = xr.open_dataset(f, decode_times=False).Sigma0_20Hz.values.flatten()
# check that all values are the same
assert np.isclose(
true_values,
ds.sel(trajectory=i).Sigma0_20Hz.values.flatten()[:true_values.shape[0]],
equal_nan=True
).all()
# check that all padding is NA
assert np.isnan(ds.sel(trajectory=i).Sigma0_20Hz.values.flatten()[true_values.shape[0]:]).all()2. Generate STAC collection for the new filesΒΆ
from pystac import Collection
import pystac
import xarray as xr
import shapely
import json
from datetime import datetime
from xstac import xarray_to_stac
from xstac._xstac import build_horizontal_dimension
# define collection id, since it will be reused
collectionid = "waposal"
# create the root collection using pystac.Collection
collection = Collection.from_dict(
{
"type": "Collection",
"id": "waposal",
"stac_version": "1.1.0",
"description": "Wave power density, zero-crossing wave period, and significant wave height product obtained along track based on Sentinel-3 A/B and Cryosat-2 altimeter data processed by SAMOSA+ retracker. The products cover the Atlantic coast of Europe, Madeira, the French Polynesian archipelagos, the Azores, the Canary Islands, the Mediterranean and Baltic Seas, and the coastal zone of French Guiana.",
"links": [],
"stac_extensions": [
"https://stac-extensions.github.io/osc/v1.0.0/schema.json",
"https://stac-extensions.github.io/themes/v1.0.0/schema.json",
"https://stac-extensions.github.io/cf/v0.2.0/schema.json"
],
"osc:project": "waposal",
"osc:status": "completed",
"osc:region": "severeal",
"osc:type": "product",
"created": "2025-10-29T11:40:41Z",
"updated": "2025-10-29T11:40:41Z",
"sci:doi": "https://doi.org/10.57780/s3d-83ad619",
"cf:parameter": [
{
"name": "wave-period"
},
{
"name": "wave-height"
}
],
"themes": [
{
"scheme": "https://github.com/stac-extensions/osc#theme",
"concepts": [
{
"id": "oceans"
}
]
}
],
"osc:missions": [
"sentinel-3",
"cryosat-2"
],
"osc:variables": [
"wave-period",
"wave-height"
],
"title": "High-resolution WAPOSAL wave period and wave power density from Sentinel-3 A/B, CryoSat-2, and SAMOSA+ retracker",
"extent": {
"spatial": {
"bbox": [
[
-155,
-27,
41.8,
72
],
[
-32,
36.499999999999986,
-24,
40
],
[
-10,
42.00000000000003,
-1,
45
],
[
-10,
43,
-0.5,
49
],
[
3,
53,
30.35,
72
],
[
-11,
47.5,
10,
60.00000000000003
],
[
-155,
-27,
-134,
-6.999999999999986
],
[
-54,
4.1000000000000085,
-51.5,
6
],
[
-19,
26,
-10,
31
],
[
-6,
30.09999999999998,
41.79999999999999,
47.400000000000006
],
[
-19,
27,
-5,
39
],
[
-13.000000000000002,
34,
-5.5,
44.5
]
]
},
"temporal": {
"interval": [
[
"2011-01-01T00:00:00Z",
"2023-12-31T23:59:59Z"
]
]
}
},
"license": "CC-BY-SA-4.0",
"keywords": [
"altimeter data",
"wave energy",
"wave power",
"wave frequency",
"wave"
]
}
)
collectionfiles = sorted(Path(f'./combined').glob("*.zarr"))
for f in files:
ds = xr.open_zarr(f,decode_times=False)
# Describe the first file following the datacube stac extension standards.
# All data is extracted from the metadata / data already present in the file we only specify
# the template and what information is extracted
bbox = [ds.attrs['min_lon'], ds.attrs['min_lat'], ds.attrs['max_lon'],ds.attrs['max_lat']]
geometry = json.loads(json.dumps(shapely.box(*bbox).__geo_interface__))
template = {
"id": f"{collectionid}-{f.stem.lower()}",
"type": "Feature",
"stac_version": "1.1.0",
"description": ds.attrs['description'],
"title": f.stem,
"properties": {
"feature_type" : "trajectory",
"start_datetime": ds.attrs['start_datetime'] + "Z",
"end_datetime": ds.attrs['end_datetime'] + "Z",
"zone" : ds.attrs['zone'],
"platform_name" : ds.attrs['platform_name'],
"institution" : ds.attrs['institution'],
"license" : ds.attrs['license'],
"reference": ds.attrs['referencfe']
},
"geometry": geometry,
"bbox": bbox,
"assets": {
"data": {
"href": f"https://s3.waw4-1.cloudferro.com/EarthCODE/OSCAssets/waposal/{f.stem}.zarr", # or local path
"type": "application/vnd+zarr",
"roles": ["data"],
"title": f.stem
}
}
}
# 3. Generate the STAC Item
item = xarray_to_stac(
ds,
template,
temporal_dimension=False,
x_dimension=False,
y_dimension=False,
reference_system=False
)
# add the multiple dimensions
dims = item.properties['cube:dimensions']
for d in list(ds.dims):
r = build_horizontal_dimension(ds, d, None, None, None, None, False).to_dict()
r['type'] = "trajectory" if d == 'trajectory' else "observation"
for k, v in r.copy().items():
if v is None:
del r[k]
dims[d] = r
item.validate()
collection.add_item(item)
collection.normalize_and_save(
root_href=f'./{collectionid}',
catalog_type=pystac.CatalogType.SELF_CONTAINED
)AcknowledgmentsΒΆ
We gratefully acknowledge the WAPOSAL team for providing access to the data used in this example, as well as their support in creating it.