topK返回一个大多数 frequent 值的数组,因此在这里无济于事。
似乎需要使用这样的简单方式:
SELECT
page,groupArray((city,metric)) AS cityMetricArray,/* Assign each City the numeric unique ID.
If your dataset contains CityId then use it instead of this artificial key. */
arrayMap((x,id) -> (x.1,x.2,id),cityMetricArray,arrayEnumerateDense(arrayMap(x -> (x.1),cityMetricArray))) AS cityMetricCityIdArray,/* Calculate the sum of metrics for each city.
Unfortunately sumMap-function accepted only numeric array as key-array,otherwise,passing an array with city names as keys would make code more simple. */
arrayReduce('sumMap',[arrayMap(x -> x.3,cityMetricCityIdArray)],[arrayMap(x -> x.2,cityMetricCityIdArray)]) AS cityMetricSumArray,/* Take 5-top cities Ids. */
arrayReverseSort((cityId,sumMetric) -> sumMetric,cityMetricSumArray.1,cityMetricSumArray.2) AS cityIds,arraySlice(cityIds,1,5) AS topNCityIds,/* Map cityIds to city names. */
arrayMap(cityId -> arrayFirst(x -> x.3 = cityId,cityMetricCityIdArray).1,topNCityIds) AS topCities
FROM
( /* test data */
SELECT
data.1 AS city,data.2 AS metric,'page' AS page
FROM
(
SELECT arrayJoin([
('city1',11),('city2',('city3',('city4',22),('city5',5),('city6',('city7',10)]) AS data
)
)
GROUP BY page
FORMAT Vertical
/* Result:
page: page
cityMetricArray: [('city1',10)]
cityMetricCityIdArray: [('city1',11,1),2),3),4),22,5,6),10,7)]
cityMetricSumArray: ([1,2,3,4,6,7],[11,33,10])
cityIds: [4,7,5]
topNCityIds: [4,3]
topCities: ['city4','city2','city6','city1','city3']
*/
本文链接:https://www.f2er.com/2976999.html