Dynamic Top N: window rank + filter
Rank with window rank, then filter on the rank — the clean pattern for keeping only the top N categories, whatever the dataset.
Prerequisites
Vega-Lite v5
Vega-Lite
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": [
{ "message": "timeout DB" }, { "message": "timeout DB" }, { "message": "timeout DB" },
{ "message": "timeout DB" }, { "message": "timeout DB" }, { "message": "timeout DB" },
{ "message": "connexion refusée" }, { "message": "connexion refusée" }, { "message": "connexion refusée" },
{ "message": "connexion refusée" }, { "message": "connexion refusée" },
{ "message": "OOM worker" }, { "message": "OOM worker" }, { "message": "OOM worker" }, { "message": "OOM worker" },
{ "message": "certificat expiré" }, { "message": "certificat expiré" }, { "message": "certificat expiré" },
{ "message": "quota dépassé" }, { "message": "quota dépassé" },
{ "message": "schéma invalide" }, { "message": "schéma invalide" },
{ "message": "disque plein" },
{ "message": "DNS introuvable" },
{ "message": "rate limit atteint" },
{ "message": "deadlock SQL" }
]
},
"transform": [
{ "aggregate": [{ "op": "count", "as": "n" }], "groupby": ["message"] },
{
"window": [{ "op": "rank", "as": "rang" }],
"sort": [{ "field": "n", "order": "descending" }]
},
{ "filter": "datum.rang <= 8" }
],
"mark": "bar",
"encoding": {
"y": { "field": "message", "type": "nominal", "sort": "-x", "title": null },
"x": { "field": "n", "type": "quantitative", "title": "Occurrences" }
}
}Vega-LiteTop Nrankwindow