如何找出資料表中重複的資料

Designed by Freepik
如何找出資料表中重複的資料?
圖片來源Designed by Freepik

  1. 只看「哪些值重複 + 重複次數」
SELECT
  YourColumn,
  COUNT(*) AS DuplicateCount
FROM YourTable
GROUP BY YourColumn
HAVING COUNT(*) > 1
ORDER BY DuplicateCount DESC;

  1. 取得「所有重複的完整資料列」
SELECT t.*
FROM YourTable t
JOIN (
  SELECT YourColumn
  FROM YourTable
  GROUP BY YourColumn
  HAVING COUNT(*) > 1
) d ON t.YourColumn = d.YourColumn
ORDER BY t.YourColumn;

  1. 如果資料量大,建議用這種
SELECT *
FROM (
    SELECT *,
        COUNT(*) OVER (PARTITION BY YourColumn) AS Cnt
    FROM YourTable
) t
WHERE t.Cnt > 1
ORDER BY YourColumn;

  1. 刪除表中多餘的重複記錄,留下最舊CreateDate那筆的紀錄
WITH cte AS (
  SELECT *,
    ROW_NUMBER() OVER (
      PARTITION BY YourColumn
      ORDER BY CreateDate ASC
    ) AS rn
  FROM YourTable
)
--先查看
SELECT *
FROM cte
WHERE rn > 1;
--確認後刪除
DELETE FROM cte
WHERE rn > 1;

  1. 查閱資料表中多餘的重複記錄(多個欄位)
WITH cte AS (
  SELECT *,
    ROW_NUMBER() OVER (
      PARTITION BY CustomerId, ProductId
      ORDER BY CreateDate ASC
    ) AS rn
  FROM Orders
)
SELECT *
FROM cte
ORDER BY CustomerId, ProductId, rn;

  1. 只列出「哪些組合有重複」
SELECT
  CustomerId,
  ProductId,
  COUNT(*) AS Cnt
FROM Orders
GROUP BY CustomerId, ProductId
HAVING COUNT(*) > 1
ORDER BY Cnt DESC;

  1. 表中多餘的多欄位重複記錄,留下最舊CreateDate那筆的紀錄
WITH cte AS (
  SELECT *,
     ROW_NUMBER() OVER (
       PARTITION BY CustomerId, ProductId
       ORDER BY CreateDate ASC, Id ASC
     ) AS rn
  FROM Orders
)
--先查看
SELECT *
FROM cte
WHERE rn > 1;
--確認後刪除
DELETE FROM cte
WHERE rn > 1;

沒有留言:

張貼留言

技術提供:Blogger.