SQL Server update group by multiple table names?

advertisements

I have two tales Recipe and Parameter. Recipes table contains multiple versions with the same recipe name. Parameter table contains multiple parameter names for one recipe. How can I update a constantguid to the same recipe and same parameter group by recipe name and parameter name?

CREATE TABLE [dbo].[Recipe](
    [VersionGUID] [varchar](36) NULL,
    [Name] [nvarchar](50) NULL
) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Parameter](
    [ParameterGUID] [varchar](36) NULL,
    [VersionGUID] [varchar](36) NULL,
    [ParameterName] [nvarchar](50) NULL,
    [ConstantGUID] [varchar](50) NULL
) ON [PRIMARY]

GO

-- Add 5 rows to [dbo].[Parameter]
INSERT INTO [dbo].[Parameter] ([ParameterGUID], [VersionGUID], [ParameterName], [ConstantGUID]) VALUES ('1B00E5ED-25A5-4FEA-AE73-14CDC7871951', 'AB00E5ED-25A5-4FEA-AE73-14CDC787195A', N'Parameter1', '26976642-12B6-462A-982B-74448DDA33B6')
INSERT INTO [dbo].[Parameter] ([ParameterGUID], [VersionGUID], [ParameterName], [ConstantGUID]) VALUES ('5AD70E77-E377-4661-A525-6711A1992217', '8B00E5ED-25A5-4FEA-AE73-14CDC7871953', N'Parameter1', '55D724C5-51A6-48B7-9161-2364F004BB7F')
INSERT INTO [dbo].[Parameter] ([ParameterGUID], [VersionGUID], [ParameterName], [ConstantGUID]) VALUES ('A0142B6A-52F6-4A49-ADBA-DC5D4C2BEF7A', '20142B6A-52F6-4A49-ADBA-DC5D4C2BEF71', N'Parameter2', 'B5903A3D-B606-49DD-A75C-A1B29740EBEB')
INSERT INTO [dbo].[Parameter] ([ParameterGUID], [VersionGUID], [ParameterName], [ConstantGUID]) VALUES ('A96113F9-1046-4E35-B320-BE3126D522CF', '20142B6A-52F6-4A49-ADBA-DC5D4C2BEF71', N'Parameter1', '108197E8-47CA-477C-B97F-4BF0321C1D91')
INSERT INTO [dbo].[Parameter] ([ParameterGUID], [VersionGUID], [ParameterName], [ConstantGUID]) VALUES ('AB00E5ED-25A5-4FEA-AE73-14CDC787195A', '8B00E5ED-25A5-4FEA-AE73-14CDC7871953', N'Parameter2', 'B3AC6268-6257-4E3B-B0C9-4418E4A09E40')

-- Add 3 rows to [dbo].[Recipe] with non-unique comparison key
SET ROWCOUNT 1
INSERT INTO [dbo].[Recipe] ([VersionGUID], [Name]) VALUES ('20142B6A-52F6-4A49-ADBA-DC5D4C2BEF71', N'Recipe1')
INSERT INTO [dbo].[Recipe] ([VersionGUID], [Name]) VALUES ('8B00E5ED-25A5-4FEA-AE73-14CDC7871953', N'Recipe1')
INSERT INTO [dbo].[Recipe] ([VersionGUID], [Name]) VALUES ('AB00E5ED-25A5-4FEA-AE73-14CDC787195A', N'Recipe2')
SET ROWCOUNT 0

If you run the following query, I want 108197E8-47CA-477C-B97F-4BF0321C1D91 and 55D724C5-51A6-48B7-9161-2364F004BB7F be the same. B3AC6268-6257-4E3B-B0C9-4418E4A09E40 and B5903A3D-B606-49DD-A75C-A1B29740EBEB be the same and so forth. I have 5 mil records to update.

select name+parametername as name_parametername,name as RecipeName,parametername,constantguid,Parameter.parameterguid
from dbo.Recipe,dbo.Parameter
where dbo.Recipe.versionguid=dbo.Parameter.versionguid
order by name_parametername

Thanks in advance.


Note 1: Before updating the production database take a backup. Restore this backup and do some tests, first with few rows and then with more rows.

Note 2: Check if you have enough free space for database log files.

Note 3: You could see how many rows were updated with new id GUIDs if you uncomment EXEC sp_trace_generateevent lines and then create a trace to catch UserConfigurable0 event (server traces have a less overhead than SQL Profiler traces).

Note 4: Execute just one step at time.

Note 5: Before updating the production database take another backup. This note is intentionally duplicated.

-- dbo.NewGUIDs is used to generate the new ConstantGUIDs
-- AnotherDB.dbo.NewGUIDs: this table could be created in another DB to minimize DB log increase for current DB
-- Note: This script assumes that Parameter.ParameterGUID column has an unique index (ex. is PRIMARY KEY or UNIQUE [KEY])

-- Step 1: It creates the NewGUIDs table, it insert current ParameterGUID guids and
-- it generates some kind of group ID (column Rnk) for every (Receipt.Name and Parameter.ParameterName) pair
    /*
    CREATE AnotherDB;
    GO
    USE AnotherDB;
    GO
    */
    CREATE TABLE /*AnotherDB.*/dbo.NewGUIDs(
        ParameterGUID UNIQUEIDENTIFIER PRIMARY KEY,
        Rnk INT NOT NULL,
        NewConstantGUID UNIQUEIDENTIFIER NULL
    );

    INSERT INTO /*AnotherDB.*/dbo.NewGUIDs(ParameterGUID,Rnk) -- Ar  INSERT INTO AnotherDB.dbo.NewGUIDs
    SELECT  p.ParameterGUID,
            -- r.Name,p.ParameterName,
            DENSE_RANK() OVER(ORDER BY r.Name,p.ParameterName) AS Rnk
    FROM    dbo.Recipe r JOIN dbo.Parameter p ON r.VersionGUID=p.VersionGUID;
    GO

    -- This index is useful for the next queries
    CREATE INDEX IN_NewGUIDs_Rnk
    ON /*AnotherDB.*/dbo.NewGUIDs(Rnk);
    GO
-- End of Step 1

-- Step 2: It generates the new GUIDs for ConstantGUID column
    DECLARE @GUIDs TABLE(
        Rnk INT PRIMARY KEY,
        NewGUID UNIQUEIDENTIFIER DEFAULT NEWSEQUENTIALID() -- or NEWID()
    );
    INSERT  @GUIDs(Rnk)
    SELECT  Rnk
    FROM    (SELECT DISTINCT Rnk FROM /*AnotherDB.*/dbo.NewGUIDs)a;

    UPDATE  x
    SET     NewConstantGUID=y.NewGUID
    FROM    /*AnotherDB.*/dbo.NewGUIDs x
    INNER JOIN @GUIDs y ON x.Rnk=y.Rnk;

    -- Check: this query should return 0 rows
    SELECT * FROM dbo.NewGUIDs n WHERE NewConstantGUID IS NULL;
    GO
-- End of Step 2

-- Step 3: It adds a new column (IsUpdated) to dbo.Parameter.
-- This column will be updated (SET IsUpdated=1) when ConstantGUID is updated with the new value
ALTER TABLE dbo.Parameter
ADD IsUpdated BIT NULL;
GO
CREATE INDEX IN_Parameter_IsUpdated
ON dbo.Parameter(IsUpdated);
GO
-- End of Step 3

-- Step 4 (final step): It updates ConstantGUID with new values from /*AnotherDB.*/dbo.NewGUIDs
SET XACT_ABORT ON;
DECLARE @AffectedRowsCount INT,
    @TotalAffectedRowsCount INT,
    @EventClass INT,
    @UserInfo NVARCHAR(128);

SELECT @EventClass=82,
    @AffectedRowsCount=0,
    @TotalAffectedRowsCount=0;

WHILE 1=1
BEGIN
    BEGIN TRANSACTION;
    UPDATE  TOP(4000) p -- TOP(4000) to prevent lock escalation; See http://msdn.microsoft.com/en-us/library/ms184286(v=sql.105).aspx
    SET     ConstantGUID=n.NewConstantGUID,
            IsUpdated=1 -- It marks current so I know that it was updated and I don't have to update the same row again
    FROM    dbo.Parameter p INNER JOIN /*AnotherDB.*/dbo.NewGUIDs n ON p.ParameterGUID=n.ParameterGUID
    WHERE   p.IsUpdated IS NULL -- Wasn't updated
    SET @AffectedRowsCount=@@ROWCOUNT;
    SET @TotalAffectedRowsCount=@TotalAffectedRowsCount+@AffectedRowsCount;
    IF @AffectedRowsCount=0
    BEGIN
        SET @UserInfo='Finish! > '+CONVERT(VARCHAR(11),@TotalAffectedRowsCount);
        -- EXEC sp_trace_generateevent @EventClass,@UserInfo;
        BREAK;
    END
    ELSE
    BEGIN
        SET @UserInfo='Processing > '+CONVERT(VARCHAR(11),@TotalAffectedRowsCount);
        -- It generates a custom event (82). You may use SQL Profiler to catch this event (UserConfigurable0)
        -- EXEC sp_trace_generateevent @EventClass,@UserInfo;
    END

    COMMIT;
END;
GO
-- End of Step 4

-- Cleanup
/*
ALTER TABLE dbo.Parameter
DROP COLUMN IsUpdated;
GO

DROP INDEX IN_Parameter_IsUpdated
ON dbo.Parameter

DROP TABLE /*AnotherDB.*/dbo.NewGUIDs
-- DROP DATABASE AnotherDB
*/