我有一个包含大约 1M 条记录的数据库表。我需要在此表中找到所有重复的名称并使它们唯一。
例如...
Id Name
-----------
1 A
2 A
3 B
4 C
5 C
应该改成...
Id Name
-----------
1 A-1
2 A-2
3 B
4 C-1
5 C-2
有没有一种有效的方法可以通过 mysql 查询或过程来做到这一点?
提前致谢!
这有点棘手..我在本地主机上进行了测试,它可以满足您的需求..如果您有任何问题,请告诉我。SQL 小提琴
UPDATE temp t1,
(
SELECT
id as unique_id,
new_name
FROM(
SELECT
id,
IF(@ROW = Name, @COUNT, @COUNT := 1),
CONCAT(Name, ' - ', @COUNT) AS new_name,
@ROW := Name,
@COUNT := @COUNT + 1
FROM temp
JOIN (SELECT @COUNT := 0, @ROW := "") AS t
WHERE Name IN(SELECT Name FROM temp
GROUP BY Name
HAVING COUNT(Name) > 1)
) AS temp_test
) as testing
SET t1.Name = testing.new_name where t1.id = testing.unique_id
最终输出如下所示:图片
编辑: 为了性能,这可能会更好
1.首先运行这个查询
SET SESSION group_concat_max_len = 1000000; -- longer if needed
SET @query1 := (
SELECT
GROUP_CONCAT(DISTINCT unique_name)
FROM temp
JOIN(
select Name as unique_name
FROM temp
GROUP BY name
HAVING COUNT(Name) > 1
) as t
);
2.然后运行此更新
UPDATE temp t1,
(
SELECT
id as unique_id,
new_name
FROM(
SELECT
id,
IF(@ROW = Name, @COUNT, @COUNT := 1),
CONCAT(Name, ' - ', @COUNT) AS new_name,
@ROW := Name,
@COUNT := @COUNT + 1
FROM temp
JOIN (SELECT @COUNT := 0, @ROW := "") AS t
WHERE FIND_IN_SET (`name`, @query1)
) AS temp_test
) as testing
SET t1.Name = testing.new_name where t1.id = testing.unique_id
我在我的本地测试了它并且它有效,所以你应该能够让它运行:)
UPDATE table_x AS upd
SET upd.Name = CONCAT(upd.Name, '-', upd.Id)
WHERE upd.id IN(
SELECT sel.id
FROM table_x AS sel
WHERE sel.Name = upd.Name
AND sel.Id != upd.Id
);
首先,您应该将重复的 Id 存储在临时表中。
Drop temporary table if not exist temp;
Create temporary table temp (
Select max(id)'id' from table_x group by Name having count(*)>1
);
Delete from table_x as x,temp as t where x.id = t.id;
只需重复执行此操作...在为名称字段设置唯一键之后,您将获得唯一行。
一个可读的解决方案
CREATE TEMPORARY TABLE duplicate_names
SELECT name FROM records_table
GROUP BY name
HAVING count(name) > 1;
SET @row_number = 1;
SET @name = '';
CREATE TEMPORARY TABLE duplicates
SELECT
CASE
WHEN @name = name THEN @row_number:=@row_number + 1
ELSE
@row_number:=1
END AS identifier,
@name:=name AS name,id
FROM
records_table WHERE name IN (SELECT name FROM duplicate_names)
ORDER BY records_table.name;
UPDATE records_table INNER JOIN duplicates
ON records_table.id = duplicates.id
SET records_table.name =
CONCAT(duplicates.name,'-',duplicates.identifier);