通过多段排序+取模来拆分数据
sqlWITH base_data AS (
SELECT
a.*,
ROW_NUMBER() OVER(PARTITION BY bucket_no) AS rk,
COUNT(1) OVER (PARTITION BY bucket_no ) AS bucket_size
FROM
(
SELECT
*,
hash(rand()) % 100000 AS bucket_no
FROM
tmp_gumu_data_html_score_low_reward_hash_1030_4
)a
),
bucket_data AS (
SELECT
bucket_no,
SUM(bucket_size) OVER() - bucket_size AS bucket_base,
SUM(bucket_size) OVER() AS total_cnt
FROM (SELECT DISTINCT bucket_no,bucket_size FROM base_data)
)
SELECT
a.* except(rk,bucket_size,bucket_no),
LPAD((b.bucket_base+a.rk - 1) % 12, 2, '0') AS part_num
FROM
base_data a
INNER JOIN
bucket_data b
ON a.bucket_no = b.bucket_no
;
本文作者:ender
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!