DECLARE @sourcetab TABLE ( id1 int, id2 int, update_on smalldatetime, val varchar(50) ) DECLARE @targettab TABLE ( id1 int, id2 int, update_on smalldatetime, val varchar(50) ) insert into @sourcetab (id1, id2, update_on, val) SELECT 10, 1, '20120110', 'testrec1' UNION SELECT 10, 1, '20120201', 'testrec2' UNION SELECT 5, 2, '20120201', 'testrec3' UNION SELECT 5, 1, '20120201', 'testrec4' UNION SELECT 5, 1, '20120205', 'testrec5' UNION SELECT 12, 18, '20120201', 'testrec6' UNION SELECT 12, 18, '20120205', 'testre7' UNION SELECT 12, 5, '20120201', 'testrec8' UNION SELECT 17, 3, '20120201', 'testrec9' UNION SELECT 18, 4, '20120201', 'testrec10' insert into @targettab (id1, id2, update_on, val) SELECT 10, 1, '20120101', 'testrec01' UNION SELECT 12, 5, '20120101', 'testrec02' UNION SELECT 20, 19, '20120101', 'testrec03' --not deduped SELECT A.id1, A.id2, A.update_on, A.val FROM @sourcetab A ORDER BY A.id1, A.id2, A.update_on --deduped with most current wins SELECT A.id1, A.id2, A.update_on, A.val FROM @sourcetab A WHERE A.update_on = ( SELECT max(A1.update_on) FROM @sourcetab A1 WHERE A1.id1 = A.id1 AND A1.id2 = A.id2 ) --update with most recent UPDATE A SET A.val = B.val , A.update_on = B.update_on FROM @targettab A INNER JOIN @sourcetab B ON A.id1 = B.id1 AND A.id2 = B.id2 WHERE B.update_on > A.update_on AND B.update_on = ( SELECT max(B1.update_on) FROM @sourcetab B1 WHERE B1.id1 = B.id1 AND B1.id2 = B.id2 ) --insert new recs INSERT INTO @targettab ( id1, id2, update_on, val ) SELECT A.id1, A.id2, A.update_on, A.val FROM @sourcetab A WHERE A.update_on = ( SELECT max(A1.update_on) FROM @sourcetab A1 WHERE A1.id1 = A.id1 AND A1.id2 = A.id2 ) AND NOT EXISTS ( SELECT TOP 1 1 FROM @targettab B WHERE B.id1 = A.id1 AND B.id2 = A.id2 ) --final result SELECT A.* FROM @targettab A ORDER BY A.id1, A.id2, A.update_on
Friday, August 17, 2012
Dedupe based on compound key and timestamp
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment