考虑以下...
DROP TABLE IF EXISTS my_table;
CREATE TABLE my_table (category_id INT NOT NULL,val INT NOT NULL,PRIMARY KEY(category_id,val));
INSERT INTO my_table VALUES (1,101),(1,102),(1,103),(1,104),(2,103),(2,104),(2,105),(3,104),(3,105),(4,106);
SELECT * FROM my_table;
+-------------+-----+
| category_id | val |
+-------------+-----+
| 1 | 101 |
| 1 | 102 |
| 1 | 103 |
| 1 | 104 |
| 2 | 103 |
| 2 | 104 |
| 2 | 105 |
| 3 | 104 |
| 3 | 105 |
| 4 | 106 |
+-------------+-----+
SELECT x.*
, COUNT(*) rank
FROM my_table x
JOIN my_table y
ON y.category_id = x.category_id
AND RAND(y.val) <= RAND(x.val)
GROUP
BY x.category_id
, x.val
ORDER BY category_id
, rank;
+-------------+-----+------+
| category_id | val | rank |
+-------------+-----+------+
| 1 | 104 | 1 |
| 1 | 101 | 2 |
| 1 | 102 | 3 |
| 1 | 103 | 4 |
| 2 | 104 | 1 |
| 2 | 105 | 2 |
| 2 | 103 | 3 |
| 3 | 104 | 1 |
| 3 | 105 | 2 |
| 4 | 106 | 1 |
+-------------+-----+------+
所以,假设我们要考虑所有排名高于“2”的行,那么这个查询可以重写如下......
SELECT x.*
FROM my_table x
JOIN my_table y
ON y.category_id = x.category_id
AND RAND(y.val) <= RAND(x.val)
GROUP
BY x.category_id
, x.val
HAVING COUNT(*)> 2;
...然后可以通过 JOIN 的简单代理转换为 DELETE
DELETE a FROM my_table a
JOIN
( SELECT x.*
FROM my_table x
JOIN my_table y
ON y.category_id = x.category_id
AND RAND(y.val) <= RAND(x.val)
GROUP
BY x.category_id
, x.val
HAVING COUNT(*)> 2
) b
ON b.category_id = a.category_id
AND b.val = a.val;
Query OK, 3 rows affected (0.08 sec)
SELECT * FROM my_table;
+-------------+-----+
| category_id | val |
+-------------+-----+
| 1 | 101 |
| 1 | 104 |
| 2 | 104 |
| 2 | 105 |
| 3 | 104 |
| 3 | 105 |
| 4 | 106 |
+-------------+-----+
7 rows in set (0.00 sec)