Hi
A little complex one
What i really want is that my table contains duplicate
firstname,lastname and dob.
what I need to do is check the two fields datecreated and
datelastupdated and retain the latest record based on
date of these two columns and update the other records
with OLD.
so the result I want is retain row 1 as datelastupdated
is latest for John Li as it is and update the 2 John Li
as John (OLD) Li
and also retain record 5 as datecreated is the latest so
retain this one and update the other one as OLD.
I have about 700 records to do and some are 2 duplicates
and some 3.
How can i write a query/cursor to solve this one.
DDL and DML are posted
CREATE TABLE Employees (
[EmployeeId] [int] IDENTITY (1, 1) NOT NULL ,
[FirstName] [varchar] (45) COLLATE
SQL_Latin1_General_CP1_CI_AS NOT NULL ,
[LastName] [varchar] (30) COLLATE
SQL_Latin1_General_CP1_CI_AS NOT NULL ,
[Dob] [datetime] NOT NULL ,
[DateCreated] [datetime] NOT NULL ,
[DateLastUpdated] [datetime] NULL ,
[EmployerId] [int] NOT NULL
) ON [PRIMARY]
GO
insert into employees
values('John','Li','1971-02-09 00:00:00','2004-12-06
12:20:48.577','2005-02-01 17:21:33.047',111)
insert into employees
values('John','Li','1971-02-09 00:00:00','2004-03-26
12:20:48.577','2004-02-01 17:21:33.047',111)
insert into employees
values('John','Li','1971-02-09 00:00:00','2004-12-10
12:20:48.577','2003-02-01 17:21:33.047',111)
insert into employees
values('Tom','Lan','1979-05-07 00:00:00','2005-03-22
14:00:15.207','2004-04-21 13:20:20.273',111)
insert into employees
values('Tom','Lan','1979-05-07 00:00:00','2004-10-22
14:00:15.207','2004-09-25 13:20:20.273',111)
--list the duplicates
select firstname,lastname,dob,count(*) as count
from employees e
group by firstname,lastname,dob
having count(*)>1
order by count(*) desc
--list the table
select * from employeesHi
Adding a status column...
CREATE TABLE Employees (
[EmployeeId] [int] IDENTITY (1, 1) NOT NULL ,
[FirstName] [varchar] (45) COLLATE
SQL_Latin1_General_CP1_CI_AS NOT NULL ,
[LastName] [varchar] (30) COLLATE
SQL_Latin1_General_CP1_CI_AS NOT NULL ,
[Dob] [datetime] NOT NULL ,
[DateCreated] [datetime] NOT NULL ,
[DateLastUpdated] [datetime] NULL ,
[EmployerId] [int] NOT NULL,
[Status] CHAR(3) NULL
) ON [PRIMARY]
GO
insert into employees( [FirstName],
[LastName],
[Dob],
[DateCreated],
[DateLastUpdated],
[EmployerId] )
values('John','Li','19710209 00:00:00','20041206 12:20:48.577','20050201
17:21:33.047',111)
insert into employees( [FirstName],
[LastName],
[Dob],
[DateCreated],
[DateLastUpdated],
[EmployerId] )
values('John','Li','19710209 00:00:00','20040326 12:20:48.577','20040201
17:21:33.047',111)
insert into employees( [FirstName],
[LastName],
[Dob],
[DateCreated],
[DateLastUpdated],
[EmployerId] )
values('John','Li','19710209 00:00:00','20041210 12:20:48.577','20030201
17:21:33.047',111)
insert into employees( [FirstName],
[LastName],
[Dob],
[DateCreated],
[DateLastUpdated],
[EmployerId] )
values('Tom','Lan','19790507 00:00:00','20050322 14:00:15.207','20040421
13:20:20.273',111)
insert into employees( [FirstName],
[LastName],
[Dob],
[DateCreated],
[DateLastUpdated],
[EmployerId] )
values('Tom','Lan','19790507 00:00:00','20041022 14:00:15.207','20040925
13:20:20.273',111)
--list the duplicates
select firstname,lastname,dob,count(*) as count
from employees e
group by firstname,lastname,dob
having count(*)>1
order by count(*) desc
--list the table
UPDATE e
SET status = 'Old'
FROM employees e
WHERE EXISTS ( SELECT 1 FROM Employees p where p.firstname = e.firstname and
p.lastname = e.lastname and p.dob = e.dob and
e.[DateLastUpdated] < p.[DateLastUpdated] )
select * from employees
This is ignoring datecreated as it seems to be obsolete once it has been on
older record has been updated!
John
"Apok" wrote:
> Hi
> A little complex one
> What i really want is that my table contains duplicate
> firstname,lastname and dob.
> what I need to do is check the two fields datecreated and
> datelastupdated and retain the latest record based on
> date of these two columns and update the other records
> with OLD.
> so the result I want is retain row 1 as datelastupdated
> is latest for John Li as it is and update the 2 John Li
> as John (OLD) Li
> and also retain record 5 as datecreated is the latest so
> retain this one and update the other one as OLD.
> I have about 700 records to do and some are 2 duplicates
> and some 3.
> How can i write a query/cursor to solve this one.
> DDL and DML are posted
> CREATE TABLE Employees (
> [EmployeeId] [int] IDENTITY (1, 1) NOT NULL ,
> [FirstName] [varchar] (45) COLLATE
> SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [LastName] [varchar] (30) COLLATE
> SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [Dob] [datetime] NOT NULL ,
> [DateCreated] [datetime] NOT NULL ,
> [DateLastUpdated] [datetime] NULL ,
> [EmployerId] [int] NOT NULL
> ) ON [PRIMARY]
> GO
>
> insert into employees
> values('John','Li','1971-02-09 00:00:00','2004-12-06
> 12:20:48.577','2005-02-01 17:21:33.047',111)
> insert into employees
> values('John','Li','1971-02-09 00:00:00','2004-03-26
> 12:20:48.577','2004-02-01 17:21:33.047',111)
> insert into employees
> values('John','Li','1971-02-09 00:00:00','2004-12-10
> 12:20:48.577','2003-02-01 17:21:33.047',111)
>
> insert into employees
> values('Tom','Lan','1979-05-07 00:00:00','2005-03-22
> 14:00:15.207','2004-04-21 13:20:20.273',111)
>
> insert into employees
> values('Tom','Lan','1979-05-07 00:00:00','2004-10-22
> 14:00:15.207','2004-09-25 13:20:20.273',111)
> --list the duplicates
> select firstname,lastname,dob,count(*) as count
> from employees e
> group by firstname,lastname,dob
> having count(*)>1
> order by count(*) desc
> --list the table
> select * from employees
>|||No But there is record where datecreated is the latest
one
your query does not cover that
I need to compare both the dates and find the latest one
leave that one and update the rest
>--Original Message--
>Hi
>Adding a status column...
>CREATE TABLE Employees (
> [EmployeeId] [int] IDENTITY (1, 1) NOT NULL ,
> [FirstName] [varchar] (45) COLLATE
>SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [LastName] [varchar] (30) COLLATE
>SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [Dob] [datetime] NOT NULL ,
> [DateCreated] [datetime] NOT NULL ,
> [DateLastUpdated] [datetime] NULL ,
> [EmployerId] [int] NOT NULL,
> [Status] CHAR(3) NULL
> ) ON [PRIMARY]
>GO
>
>insert into employees( [FirstName],
>[LastName],
>[Dob],
>[DateCreated],
>[DateLastUpdated],
>[EmployerId] )
>values('John','Li','19710209 00:00:00','20041206
12:20:48.577','20050201
>17:21:33.047',111)
>insert into employees( [FirstName],
>[LastName],
>[Dob],
>[DateCreated],
>[DateLastUpdated],
>[EmployerId] )
>values('John','Li','19710209 00:00:00','20040326
12:20:48.577','20040201
>17:21:33.047',111)
>insert into employees( [FirstName],
>[LastName],
>[Dob],
>[DateCreated],
>[DateLastUpdated],
>[EmployerId] )
>values('John','Li','19710209 00:00:00','20041210
12:20:48.577','20030201
>17:21:33.047',111)
>
>insert into employees( [FirstName],
>[LastName],
>[Dob],
>[DateCreated],
>[DateLastUpdated],
>[EmployerId] )
>values('Tom','Lan','19790507 00:00:00','20050322
14:00:15.207','20040421
>13:20:20.273',111)
>
>insert into employees( [FirstName],
>[LastName],
>[Dob],
>[DateCreated],
>[DateLastUpdated],
>[EmployerId] )
>values('Tom','Lan','19790507 00:00:00','20041022
14:00:15.207','20040925
>13:20:20.273',111)
>--list the duplicates
>select firstname,lastname,dob,count(*) as count
>from employees e
>group by firstname,lastname,dob
>having count(*)>1
>order by count(*) desc
>--list the table
>UPDATE e
>SET status = 'Old'
>FROM employees e
>WHERE EXISTS ( SELECT 1 FROM Employees p where
p.firstname = e.firstname and
>p.lastname = e.lastname and p.dob = e.dob and
>e.[DateLastUpdated] < p.[DateLastUpdated] )
>select * from employees
>This is ignoring datecreated as it seems to be obsolete
once it has been on
>older record has been updated!
>John
>"Apok" wrote:
>
and
datelastupdated
Li
so
duplicates
>.
>|||Apok,
It's not clear what you mean by "latest record based on
date of these two columns." But here's a try:
update employees set
Status = 'Old'
where exists (
select * from employees as Ecopy
where Ecopy.FirstName = employees.FirstName
and Ecopy.LastName = employees.LastName
and (
(Ecopy.DateCreated > employees.DateCreated
and Ecopy.DateCreated > employees.DateLastUpdated)
or
(Ecopy.DateLastUpdated > employees.DateCreated
and Ecopy.DateLastUpdated > employees.DateLastUpdated)
)
)
Steve Kass
Drew University
Apok wrote:
>Hi
>A little complex one
>What i really want is that my table contains duplicate
>firstname,lastname and dob.
>what I need to do is check the two fields datecreated and
>datelastupdated and retain the latest record based on
>date of these two columns and update the other records
>with OLD.
>so the result I want is retain row 1 as datelastupdated
>is latest for John Li as it is and update the 2 John Li
>as John (OLD) Li
>and also retain record 5 as datecreated is the latest so
>retain this one and update the other one as OLD.
>I have about 700 records to do and some are 2 duplicates
>and some 3.
>How can i write a query/cursor to solve this one.
>DDL and DML are posted
>CREATE TABLE Employees (
> [EmployeeId] [int] IDENTITY (1, 1) NOT NULL ,
> [FirstName] [varchar] (45) COLLATE
>SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [LastName] [varchar] (30) COLLATE
>SQL_Latin1_General_CP1_CI_AS NOT NULL ,
> [Dob] [datetime] NOT NULL ,
> [DateCreated] [datetime] NOT NULL ,
> [DateLastUpdated] [datetime] NULL ,
> [EmployerId] [int] NOT NULL
> ) ON [PRIMARY]
>GO
>
>insert into employees
>values('John','Li','1971-02-09 00:00:00','2004-12-06
>12:20:48.577','2005-02-01 17:21:33.047',111)
>insert into employees
>values('John','Li','1971-02-09 00:00:00','2004-03-26
>12:20:48.577','2004-02-01 17:21:33.047',111)
>insert into employees
>values('John','Li','1971-02-09 00:00:00','2004-12-10
>12:20:48.577','2003-02-01 17:21:33.047',111)
>
>insert into employees
>values('Tom','Lan','1979-05-07 00:00:00','2005-03-22
>14:00:15.207','2004-04-21 13:20:20.273',111)
>
>insert into employees
>values('Tom','Lan','1979-05-07 00:00:00','2004-10-22
>14:00:15.207','2004-09-25 13:20:20.273',111)
>--list the duplicates
>select firstname,lastname,dob,count(*) as count
>from employees e
>group by firstname,lastname,dob
>having count(*)>1
>order by count(*) desc
>--list the table
>select * from employees
>
>
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment