โซลูชันสำหรับการกำหนดค่าเฉพาะให้กับแถวด้วยระยะการทำงานร่วมกันที่ จำกัด

ฉันมีตารางที่สามารถสร้างและเติมด้วยรหัสต่อไปนี้:

CREATE TABLE dbo.Example(GroupKey int NOT NULL, RecordKey varchar(12) NOT NULL);
ALTER TABLE dbo.Example
    ADD CONSTRAINT iExample PRIMARY KEY CLUSTERED(GroupKey ASC, RecordKey ASC);
INSERT INTO dbo.Example(GroupKey, RecordKey)
VALUES (1, 'Archimedes'), (1, 'Newton'), (1, 'Euler'), (2, 'Euler'), (2, 'Gauss'),
       (3, 'Gauss'), (3, 'Poincaré'), (4, 'Ramanujan'), (5, 'Neumann'),
       (5, 'Grothendieck'), (6, 'Grothendieck'), (6, 'Tao');

สำหรับแถวทั้งหมดที่มีระยะทางการทำงานร่วมกันที่จำกัดโดยอิงRecordKeyกับแถวอื่นฉันต้องการกำหนดค่าที่ไม่ซ้ำกัน - ฉันไม่สนใจว่าข้อมูลที่เป็นค่าเฉพาะนั้นพิมพ์หรือไม่

ชุดผลลัพธ์ที่ถูกต้องที่ตรงกับสิ่งที่ฉันขอสามารถสร้างขึ้นด้วยแบบสอบถามต่อไปนี้:

SELECT 1 AS SupergroupKey, GroupKey, RecordKey
FROM dbo.Example
WHERE GroupKey IN(1, 2, 3)
UNION ALL
SELECT 2 AS SupergroupKey, GroupKey, RecordKey
FROM dbo.Example
WHERE GroupKey = 4
UNION ALL
SELECT 3 AS SupergroupKey, GroupKey, RecordKey
FROM dbo.Example
WHERE GroupKey IN(5, 6)
ORDER BY SupergroupKey ASC, GroupKey ASC, RecordKey ASC;

เพื่อช่วยสิ่งที่ฉันขอดีกว่าฉันจะอธิบายว่าทำไมGroupKey1-3 จึงมีเหมือนSupergroupKeyกัน:

GroupKey1 ประกอบด้วยRecordKeyออยเลอร์ซึ่งจะมีอยู่ในGroupKey2; จึงGroupKeys ที่ 1 และ 2 SupergroupKeyจะต้องมีเหมือนกัน
เพราะเกาส์ที่มีอยู่ในทั้งสองGroupKeys 2 และ 3 SupergroupKeyพวกเขาก็ต้องมีเหมือนกัน นำไปสู่การนี้เพื่อGroupKeys 1-3 SupergroupKeyมีเดียวกัน
ตั้งแต่GroupKey1–3 ไม่แบ่งปันRecordKeys ใด ๆกับ s ที่เหลืออยู่GroupKeyพวกเขาเป็นคนเดียวที่กำหนดSupergroupKeyค่า 1

ฉันควรเพิ่มว่าโซลูชันต้องเป็นแบบทั่วไป ตารางด้านบนและชุดผลลัพธ์เป็นเพียงตัวอย่างเท่านั้น

ภาคผนวก

ฉันลบข้อกำหนดสำหรับวิธีการแก้ปัญหาที่ไม่ใช่ซ้ำแล้วซ้ำอีก ในขณะที่ฉันต้องการโซลูชันดังกล่าว แต่ฉันเชื่อว่าเป็นข้อ จำกัด ที่ไม่สมเหตุสมผล น่าเสียดายที่ฉันไม่สามารถใช้โซลูชันที่ใช้ CLR ได้ แต่ถ้าคุณต้องการที่จะรวมวิธีการแก้ปัญหาดังกล่าวรู้สึกอิสระที่จะ ฉันจะไม่ยอมรับมันเป็นคำตอบ

จำนวนแถวในตารางจริงของฉันมีขนาดใหญ่ถึง 5 ล้าน แต่มีวันที่จำนวนแถวจะ "เท่านั้น" อยู่ประมาณหนึ่งหมื่น โดยเฉลี่ยมี 8 RecordKeys ต่อGroupKeyและ 4 GroupKeys RecordKeyต่อ ฉันจินตนาการว่าวิธีแก้ปัญหาจะมีความซับซ้อนของเวลาแบบเอกซ์โปเนนเชียล แต่ฉันสนใจวิธีการแก้ปัญหา

sql-server sql-server-2016

— basketballfan22
แหล่งที่มา

คำตอบ:

นี่เป็นโซลูชัน T-SQL แบบวนซ้ำสำหรับการเปรียบเทียบประสิทธิภาพ

สันนิษฐานว่าสามารถเพิ่มคอลัมน์พิเศษลงในตารางเพื่อจัดเก็บคีย์กลุ่มพิเศษและสามารถเปลี่ยนดัชนีได้:

ติดตั้ง

DROP TABLE IF EXISTS 
    dbo.Example;

CREATE TABLE dbo.Example
(
    SupergroupKey integer NOT NULL
        DEFAULT 0, 
    GroupKey integer NOT NULL, 
    RecordKey varchar(12) NOT NULL,

    CONSTRAINT iExample 
    PRIMARY KEY CLUSTERED 
        (GroupKey ASC, RecordKey ASC),

    CONSTRAINT [IX dbo.Example RecordKey, GroupKey]
    UNIQUE NONCLUSTERED (RecordKey, GroupKey),

    INDEX [IX dbo.Example SupergroupKey, GroupKey]
        (SupergroupKey ASC, GroupKey ASC)
);

INSERT dbo.Example
    (GroupKey, RecordKey)
VALUES 
    (1, 'Archimedes'), 
    (1, 'Newton'),
    (1, 'Euler'),
    (2, 'Euler'),
    (2, 'Gauss'),
    (3, 'Gauss'),
    (3, 'Poincaré'),
    (4, 'Ramanujan'),
    (5, 'Neumann'),
    (5, 'Grothendieck'),
    (6, 'Grothendieck'),
    (6, 'Tao');

หากคุณสามารถกลับลำดับของคีย์ของคีย์หลักปัจจุบันได้ดัชนีที่ไม่ซ้ำกันจะไม่จำเป็น

เค้าโครง

วิธีการแก้ปัญหานี้คือ:

ตั้งค่า id กลุ่มพิเศษเป็น 1
ค้นหาคีย์กลุ่มที่ไม่ได้ประมวลผลที่ต่ำที่สุด
หากไม่พบให้ออก
ตั้งค่ากลุ่มซุปเปอร์สำหรับทุกแถวด้วยคีย์กลุ่มปัจจุบัน
ตั้งค่ากลุ่มขั้นสูงสำหรับแถวทั้งหมดที่เกี่ยวข้องกับแถวในกลุ่มปัจจุบัน
ทำซ้ำขั้นตอนที่ 5 จนกว่าจะไม่มีการปรับปรุงแถว
เพิ่ม id กลุ่มซุปเปอร์ปัจจุบัน
ไปที่ขั้นตอนที่ 2

การดำเนินงาน

ความคิดเห็นแบบอินไลน์:

-- No execution plans or rows affected messages
SET NOCOUNT ON;
SET STATISTICS XML OFF;

-- Reset all supergroups
UPDATE E
SET SupergroupKey = 0
FROM dbo.Example AS E
    WITH (TABLOCKX)
WHERE 
    SupergroupKey != 0;

DECLARE 
    @CurrentSupergroup integer = 0,
    @CurrentGroup integer = 0;

WHILE 1 = 1
BEGIN
    -- Next super group
    SET @CurrentSupergroup += 1;

    -- Find the lowest unprocessed group key
    SELECT 
        @CurrentGroup = MIN(E.GroupKey)
    FROM dbo.Example AS E
    WHERE 
        E.SupergroupKey = 0;

    -- Exit when no more unprocessed groups
    IF @CurrentGroup IS NULL BREAK;

    -- Set super group for all records in the current group
    UPDATE E
    SET E.SupergroupKey = @CurrentSupergroup
    FROM dbo.Example AS E 
    WHERE 
        E.GroupKey = @CurrentGroup;

    -- Iteratively find all groups for the super group
    WHILE 1 = 1
    BEGIN
        WITH 
            RecordKeys AS
            (
                SELECT DISTINCT
                    E.RecordKey
                FROM dbo.Example AS E
                WHERE
                    E.SupergroupKey = @CurrentSupergroup
            ),
            GroupKeys AS
            (
                SELECT DISTINCT
                    E.GroupKey
                FROM RecordKeys AS RK
                JOIN dbo.Example AS E
                    WITH (FORCESEEK)
                    ON E.RecordKey = RK.RecordKey
            )
        UPDATE E WITH (TABLOCKX)
        SET SupergroupKey = @CurrentSupergroup
        FROM GroupKeys AS GK
        JOIN dbo.Example AS E
            ON E.GroupKey = GK.GroupKey
        WHERE
            E.SupergroupKey = 0
        OPTION (RECOMPILE, QUERYTRACEON 9481); -- The original CE does better

        -- Break when no more related groups found
        IF @@ROWCOUNT = 0 BREAK;
    END;
END;

SELECT
    E.SupergroupKey,
    E.GroupKey,
    E.RecordKey
FROM dbo.Example AS E;

แผนการดำเนินการ

สำหรับการอัปเดตที่สำคัญ:

ผลลัพธ์

สถานะสุดท้ายของตารางคือ:

╔═══════════════╦══════════╦══════════════╗
║ SupergroupKey ║ GroupKey ║  RecordKey   ║
╠═══════════════╬══════════╬══════════════╣
║             1 ║        1 ║ Archimedes   ║
║             1 ║        1 ║ Euler        ║
║             1 ║        1 ║ Newton       ║
║             1 ║        2 ║ Euler        ║
║             1 ║        2 ║ Gauss        ║
║             1 ║        3 ║ Gauss        ║
║             1 ║        3 ║ Poincaré     ║
║             2 ║        4 ║ Ramanujan    ║
║             3 ║        5 ║ Grothendieck ║
║             3 ║        5 ║ Neumann      ║
║             3 ║        6 ║ Grothendieck ║
║             3 ║        6 ║ Tao          ║
╚═══════════════╩══════════╩══════════════╝

การสาธิต: db <> ซอ

การทดสอบประสิทธิภาพ

ใช้ขยายการทดสอบชุดข้อมูลที่ให้ไว้ในไมเคิลกรีนคำตอบ , การกำหนดเวลาในแล็ปท็อปของฉัน^*คือ:

╔═════════════╦════════╗
║ Record Keys ║  Time  ║
╠═════════════╬════════╣
║ 10k         ║ 2s     ║
║ 100k        ║ 12s    ║
║ 1M          ║ 2m 30s ║
╚═════════════╩════════╝

_{* Microsoft SQL Server 2017 (RTM-CU13), Developer Edition (64- บิต), Windows 10 Pro, 16GB RAM, SSD, 4 แกนไฮเปอร์เธรด i7, 2.4GHz ที่ระบุ}

— พอลไวท์ 9
แหล่งที่มา

นี่เป็นคำตอบที่ยอดเยี่ยม คาดเดาในคำถามของฉันมันช้าเกินไปสำหรับ "วันใหญ่" แต่มันยอดเยี่ยมสำหรับวันเล็ก ๆ ของฉัน ใช้เวลาประมาณ 5 ชั่วโมงในการวิ่งบนโต๊ะของฉันที่ 2.5 ล้านแถว

— basketballfan22

ปัญหานี้เกี่ยวกับการติดตามลิงก์ระหว่างรายการ สิ่งนี้ทำให้มันอยู่ในขอบเขตของกราฟและการประมวลผลกราฟ โดยเฉพาะชุดข้อมูลทั้งหมดจะสร้างกราฟและเรากำลังมองหาส่วนประกอบของกราฟนั้น สิ่งนี้สามารถแสดงได้โดยพล็อตของข้อมูลตัวอย่างจากคำถาม

คำถามบอกว่าเราสามารถติดตาม GroupKey หรือ RecordKey เพื่อค้นหาแถวอื่น ๆ ที่แชร์ค่านั้น ดังนั้นเราสามารถรักษาทั้งสองจุดสุดยอดในกราฟ คำถามอธิบายเพื่ออธิบายว่า GroupKeys 1–3 มี SupergroupKey เดียวกันได้อย่างไร นี่เป็นคลัสเตอร์ด้านซ้ายเข้าร่วมด้วยเส้นบาง ๆ รูปภาพยังแสดงให้เห็นถึงสององค์ประกอบอื่น ๆ (SupergroupKey) ที่เกิดขึ้นจากข้อมูลดั้งเดิม

SQL Server มีความสามารถในการประมวลผลกราฟใน T-SQL ในเวลานี้มันค่อนข้างน้อยและไม่เป็นประโยชน์กับปัญหานี้ SQL Server ยังมีความสามารถในการโทรออกไปยัง R และ Python และชุดของแพ็คเกจที่สมบูรณ์และมีประสิทธิภาพ หนึ่งเป็นเช่นigraph มันถูกเขียนขึ้นเพื่อ "การจัดการกราฟขนาดใหญ่อย่างรวดเร็วพร้อมจุดยอดและขอบนับล้าน ( ลิงก์ )"

การใช้ R และ igraph ผมสามารถที่จะดำเนินการหนึ่งล้านแถวใน 2 นาที 22 วินาทีในการทดสอบในท้องถิ่น1นี่คือวิธีเปรียบเทียบกับทางออกที่ดีที่สุดในปัจจุบัน:

Record Keys     Paul White  R               
------------    ----------  --------
Per question    15ms        ~220ms
100             80ms        ~270ms
1,000           250ms       430ms
10,000          1.4s        1.7s
100,000         14s         14s
1M              2m29        2m22s
1M              n/a         1m40    process only, no display

The first column is the number of distinct RecordKey values. The number of rows
in the table will be 8 x this number.

เมื่อประมวลผลแถว 1M จะมีการใช้ 1m40s เพื่อโหลดและประมวลผลกราฟและเพื่อปรับปรุงตาราง 42s ถูกต้องการเพื่อเติมตารางผลลัพธ์ SSMS ด้วยเอาต์พุต

การสังเกต Task Manager ในขณะที่ประมวลผลแถว 1M ขอแนะนำให้ใช้หน่วยความจำการทำงานประมาณ 3GB สิ่งนี้มีอยู่ในระบบนี้โดยไม่มีการเพจ

ฉันสามารถยืนยันการประเมิน Ypercube ของวิธีการเรียกซ้ำ CTE ด้วยคีย์เรคคอร์ดสองสามร้อยตัวมันใช้ CPU 100% และ RAM ที่มีอยู่ทั้งหมด ในที่สุด tempdb ก็เพิ่มขึ้นเป็นมากกว่า 80GB และ SPID ล้มเหลว

ฉันใช้ตารางของ Paul กับคอลัมน์ SupergroupKey ดังนั้นจึงมีการเปรียบเทียบที่เป็นธรรมระหว่างโซลูชัน

ด้วยเหตุผลบางอย่าง R คัดค้านสำเนียงPoincaré การเปลี่ยนเป็น "e" แบบธรรมดาอนุญาตให้เรียกใช้ ฉันไม่ได้ตรวจสอบเนื่องจากไม่ใช่ปัญหาที่ใกล้เคียง ฉันแน่ใจว่ามีทางออก

นี่คือรหัส

-- This captures the output from R so the base table can be updated.
drop table if exists #Results;

create table #Results
(
    Component   int         not NULL,
    Vertex      varchar(12) not NULL primary key
);


truncate table #Results;    -- facilitates re-execution

declare @Start time = sysdatetimeoffset();  -- for a 'total elapsed' calculation.

insert #Results(Component, Vertex)
exec sp_execute_external_script   
    @language = N'R',
    @input_data_1 = N'select GroupKey, RecordKey from dbo.Example',
    @script = N'
library(igraph)
df.g <- graph.data.frame(d = InputDataSet, directed = FALSE)
cpts <- components(df.g, mode = c("weak"))
OutputDataSet <- data.frame(cpts$membership)
OutputDataSet$VertexName <- V(df.g)$name
';

-- Write SuperGroupKey to the base table, as other solutions do
update e
set
    SupergroupKey = r.Component
from dbo.Example as e
inner join #Results as r
    on r.Vertex = e.RecordKey;

-- Return all rows, as other solutions do
select
    e.SupergroupKey,
    e.GroupKey,
    e.RecordKey
from dbo.Example as e;

-- Calculate the elapsed
declare @End time = sysdatetimeoffset();
select Elapse_ms = DATEDIFF(MILLISECOND, @Start, @End);

นี่คือสิ่งที่รหัส R ทำ

@input_data_1 คือวิธีที่ SQL Server ถ่ายโอนข้อมูลจากตารางไปยังรหัส R และแปลเป็น R dataframe ชื่อ InputDataSet
library(igraph) อิมพอร์ตไลบรารีลงในสภาวะแวดล้อมการเรียกใช้งาน R
df.g <- graph.data.frame(d = InputDataSet, directed = FALSE)โหลดข้อมูลลงในวัตถุ igraph นี่เป็นกราฟที่ไม่ได้บอกทิศทางเนื่องจากเราสามารถติดตามลิงก์จากกลุ่มเพื่อบันทึกหรือบันทึกเป็นกลุ่ม InputDataSet เป็นชื่อเริ่มต้นของ SQL Server สำหรับชุดข้อมูลที่ส่งไปยัง R
cpts <- components(df.g, mode = c("weak")) ประมวลผลกราฟเพื่อค้นหากราฟย่อยแบบแยกส่วน (ส่วนประกอบ) และมาตรการอื่น ๆ
OutputDataSet <- data.frame(cpts$membership)SQL Server คาดว่ากรอบข้อมูลกลับจากอาร์ชื่อเริ่มต้นคือ OutputDataSet ส่วนประกอบจะถูกเก็บไว้ในเวกเตอร์ที่เรียกว่า "การเป็นสมาชิก" คำสั่งนี้แปลเวกเตอร์เป็นกรอบข้อมูล
OutputDataSet$VertexName <- V(df.g)$nameV () เป็นเวกเตอร์ของจุดยอดในกราฟ - รายการ GroupKeys และ RecordKeys สิ่งนี้คัดลอกข้อมูลเหล่านั้นลงในเฟรมข้อมูล ouput สร้างคอลัมน์ใหม่ชื่อ VertexName นี่คือกุญแจสำคัญที่ใช้ในการจับคู่กับตารางต้นฉบับเพื่ออัปเดต SupergroupKey

ฉันไม่ใช่ผู้เชี่ยวชาญ R น่าจะเป็นสิ่งนี้สามารถปรับให้เหมาะสม

ทดสอบข้อมูล

ข้อมูลของ OP ใช้สำหรับตรวจสอบความถูกต้อง สำหรับการทดสอบระดับฉันใช้สคริปต์ต่อไปนี้

drop table if exists Records;
drop table if exists Groups;

create table Groups(GroupKey int NOT NULL primary key);
create table Records(RecordKey varchar(12) NOT NULL primary key);
go

set nocount on;

-- Set @RecordCount to the number of distinct RecordKey values desired.
-- The number of rows in dbo.Example will be 8 * @RecordCount.
declare @RecordCount    int             = 1000000;

-- @Multiplier was determined by experiment.
-- It gives the OP's "8 RecordKeys per GroupKey and 4 GroupKeys per RecordKey"
-- and allows for clashes of the chosen random values.
declare @Multiplier     numeric(4, 2)   = 2.7;

-- The number of groups required to reproduce the OP's distribution.
declare @GroupCount     int             = FLOOR(@RecordCount * @Multiplier);


-- This is a poor man's numbers table.
insert Groups(GroupKey)
select top(@GroupCount)
    ROW_NUMBER() over (order by (select NULL))
from sys.objects as a
cross join sys.objects as b
--cross join sys.objects as c  -- include if needed


declare @c int = 0
while @c < @RecordCount
begin
    -- Can't use a set-based method since RAND() gives the same value for all rows.
    -- There are better ways to do this, but it works well enough.
    -- RecordKeys will be 10 letters, a-z.
    insert Records(RecordKey)
    select
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND())) +
        CHAR(97 + (26*RAND()));

    set @c += 1;
end


-- Process each RecordKey in alphabetical order.
-- For each choose 8 GroupKeys to pair with it.
declare @RecordKey varchar(12) = '';
declare @Groups table (GroupKey int not null);

truncate table dbo.Example;

select top(1) @RecordKey = RecordKey 
from Records 
where RecordKey > @RecordKey 
order by RecordKey;

while @@ROWCOUNT > 0
begin
    print @Recordkey;

    delete @Groups;

    insert @Groups(GroupKey)
    select distinct C
    from
    (
        -- Hard-code * from OP's statistics
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
        union all
        select FLOOR(RAND() * @GroupCount)
    ) as T(C);

    insert dbo.Example(GroupKey, RecordKey)
    select
        GroupKey, @RecordKey
    from @Groups;

    select top(1) @RecordKey = RecordKey 
    from Records 
    where RecordKey > @RecordKey 
    order by RecordKey;
end

-- Rebuild the indexes to have a consistent environment
alter index iExample on dbo.Example rebuild partition = all 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, 
      ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON);


-- Check what we ended up with:
select COUNT(*) from dbo.Example;  -- Should be @RecordCount * 8
                                   -- Often a little less due to random clashes
select 
    ByGroup = AVG(C)
from
(
    select CONVERT(float, COUNT(1) over(partition by GroupKey)) 
    from dbo.Example
) as T(C);

select
    ByRecord = AVG(C)
from
(
    select CONVERT(float, COUNT(1) over(partition by RecordKey)) 
    from dbo.Example
) as T(C);

ตอนนี้ฉันเพิ่งรู้ว่าฉันได้อัตราส่วนที่ผิดจากคำจำกัดความของ OP ฉันไม่เชื่อว่าสิ่งนี้จะส่งผลกระทบต่อการกำหนดเวลา บันทึกและกลุ่มมีความสมมาตรกับกระบวนการนี้ สำหรับอัลกอริทึมพวกมันทั้งหมดเป็นเพียงแค่โหนดในกราฟ

ในการทดสอบข้อมูลที่เกิดขึ้นเป็นองค์ประกอบเดียว ฉันเชื่อว่านี่เป็นเพราะการกระจายข้อมูลที่สม่ำเสมอ หากแทนที่จะใช้อัตราส่วนคงที่ 1: 8 ฮาร์ดโค้ดลงในรูทีนการสร้างฉันได้อนุญาตให้อัตราส่วนแตกต่างกันไปมีแนวโน้มที่จะเป็นองค์ประกอบเพิ่มเติม

¹ข้อมูลจำเพาะของเครื่อง: Microsoft SQL Server 2017 (RTM-CU12), Developer Edition (64- บิต), Windows 10 Home หน่วยความจำ 16GB, SSD, Hyperthreaded i7 แบบ 4 คอร์, ชื่อ 2.8GHz การทดสอบเป็นรายการเดียวที่ทำงานในเวลาอื่นนอกเหนือจากกิจกรรมของระบบปกติ (ประมาณ 4% CPU)

— ไมเคิลกรีน
แหล่งที่มา

วิธี CTE แบบเรียกซ้ำ - ที่มีแนวโน้มว่าจะไม่มีประสิทธิภาพอย่างน่ากลัวในตารางขนาดใหญ่:

WITH rCTE AS 
(
    -- Anchor
    SELECT 
        GroupKey, RecordKey, 
        CAST('|' + CAST(GroupKey AS VARCHAR(10)) + '|' AS VARCHAR(100)) AS GroupKeys,
        CAST('|' + CAST(RecordKey AS VARCHAR(10)) + '|' AS VARCHAR(100)) AS RecordKeys,
        1 AS lvl
    FROM Example

    UNION ALL

    -- Recursive
    SELECT
        e.GroupKey, e.RecordKey, 
        CASE WHEN r.GroupKeys NOT LIKE '%|' + CAST(e.GroupKey AS VARCHAR(10)) + '|%'
            THEN CAST(r.GroupKeys + CAST(e.GroupKey AS VARCHAR(10)) + '|' AS VARCHAR(100))
            ELSE r.GroupKeys
        END,
        CASE WHEN r.RecordKeys NOT LIKE '%|' + CAST(e.RecordKey AS VARCHAR(10)) + '|%'
            THEN CAST(r.RecordKeys + CAST(e.RecordKey AS VARCHAR(10)) + '|' AS VARCHAR(100))
            ELSE r.RecordKeys
        END,
        r.lvl + 1
    FROM rCTE AS r
         JOIN Example AS e
         ON  e.RecordKey = r.RecordKey
         AND r.GroupKeys NOT LIKE '%|' + CAST(e.GroupKey AS VARCHAR(10)) + '|%'
         -- 
         OR e.GroupKey = r.GroupKey
         AND r.RecordKeys NOT LIKE '%|' + CAST(e.RecordKey AS VARCHAR(10)) + '|%'
)
SELECT 
    ROW_NUMBER() OVER (ORDER BY GroupKeys) AS SuperGroupKey,
    GroupKeys, RecordKeys
FROM rCTE AS c
WHERE NOT EXISTS
      ( SELECT 1
        FROM rCTE AS m
        WHERE m.lvl > c.lvl
          AND m.GroupKeys LIKE '%|' + CAST(c.GroupKey AS VARCHAR(10)) + '|%'
        OR    m.lvl = c.lvl
          AND ( m.GroupKey > c.GroupKey
             OR m.GroupKey = c.GroupKey
             AND m.RecordKeys > c.RecordKeys
              )
          AND m.GroupKeys LIKE '%|' + CAST(c.GroupKey AS VARCHAR(10)) + '|%'
          AND c.GroupKeys LIKE '%|' + CAST(m.GroupKey AS VARCHAR(10)) + '|%'
      ) 
OPTION (MAXRECURSION 0) ;

ทดสอบในdbfiddle.uk

— ypercubeᵀᴹ
แหล่งที่มา