ดัชนีเซิร์ฟเวอร์ SQL เทียบกับสถิติ

อะไรคือความแตกต่างระหว่างCREATE INDEXและCREATE STATISTICSและเมื่อฉันควรใช้แต่ละคน?

sql-server index statistics

ดัชนีจะจัดเก็บข้อมูลจริง (หน้าข้อมูลหรือหน้าดัชนีขึ้นอยู่กับประเภทของดัชนีที่เรากำลังพูดถึง) และการกระจายข้อมูลของที่เก็บสถิติ ดังนั้นCREATE INDEXจะเป็น DDL เพื่อสร้างดัชนี (คลัสเตอร์, ไม่รวมคลัสเตอร์ ฯลฯ ) และCREATE STATISTICSเป็น DDL เพื่อสร้างสถิติในคอลัมน์ภายในตาราง

ฉันขอแนะนำให้คุณอ่านเกี่ยวกับข้อมูลเชิงสัมพันธ์เหล่านี้ ด้านล่างนี้เป็นบทความเบื้องต้นสำหรับผู้เริ่มต้น เหล่านี้เป็นหัวข้อที่กว้างมากและดังนั้นข้อมูลที่พวกเขาสามารถไปได้กว้างและลึกมาก อ่านแนวคิดทั่วไปของพวกเขาด้านล่างและถามคำถามที่เฉพาะเจาะจงมากขึ้นเมื่อเกิดขึ้น

การอ้างอิง BOL ในองค์กรตารางและดัชนี
การอ้างอิง BOL ในโครงสร้างดัชนีแบบคลัสเตอร์การ
อ้างอิง BOL ในโครงสร้างดัชนีที่ไม่ได้จัดกลุ่ม
SQL Server Central บนบทนำสู่ดัชนี
การอ้างอิง BOL ในสถิติ

นี่คือตัวอย่างการใช้งานเพื่อดูการทำงานสองส่วน (แสดงความคิดเห็นเพื่ออธิบาย):

use testdb;
go

create table MyTable1
(
    id int identity(1, 1) not null,
    my_int_col int not null
);
go

insert into MyTable1(my_int_col)
values(1);
go 100

-- this statement will create a clustered index
-- on MyTable1.  The index key is the id field
-- but due to the nature of a clustered index
-- it will contain all of the table data
create clustered index MyTable1_CI
on MyTable1(id);
go


-- by default, SQL Server will create a statistics
-- on this index.  Here is proof.  We see a stat created
-- with the name of the index, and the consisting stat 
-- column of the index key column
select
    s.name as stats_name,
    c.name as column_name
from sys.stats s
inner join sys.stats_columns sc
on s.object_id = sc.object_id
and s.stats_id = sc.stats_id
inner join sys.columns c
on sc.object_id = c.object_id
and sc.column_id = c.column_id
where s.object_id = object_id('MyTable1');


-- here is a standalone statistics on a single column
create statistics MyTable1_MyIntCol
on MyTable1(my_int_col);
go

-- now look at the statistics that exist on the table.
-- we have the additional statistics that's not necessarily
-- corresponding to an index
select
    s.name as stats_name,
    c.name as column_name
from sys.stats s
inner join sys.stats_columns sc
on s.object_id = sc.object_id
and s.stats_id = sc.stats_id
inner join sys.columns c
on sc.object_id = c.object_id
and sc.column_id = c.column_id
where s.object_id = object_id('MyTable1');


-- what is a stat look like?  run DBCC SHOW_STATISTICS
-- to get a better idea of what is stored
dbcc show_statistics('MyTable1', 'MyTable1_CI');
go

นี่คือตัวอย่างทดสอบสถิติที่มีลักษณะดังนี้:

ป้อนคำอธิบายรูปภาพที่นี่

ขอให้สังเกตว่าสถิติคือการบรรจุของการกระจายข้อมูล พวกเขาช่วยให้ SQL Server กำหนดแผนที่เหมาะสมที่สุด ตัวอย่างที่ดีของเรื่องนี้คือลองจินตนาการว่าคุณกำลังจะมีชีวิตอยู่กับสิ่งที่มีน้ำหนักมาก หากคุณรู้ว่ามีน้ำหนักเท่าไหร่เพราะมีเครื่องหมายระบุน้ำหนักคุณจะต้องกำหนดวิธีที่ดีที่สุดในการยกและกล้ามเนื้อด้วย นั่นคือสิ่งที่ SQL Server ทำกับสถิติ

-- create a nonclustered index
-- with the key column as my_int_col
create index IX_MyTable1_MyIntCol
on MyTable1(my_int_col);
go

-- let's look at this index
select
    object_name(object_id) as object_name,
    name as index_name,
    index_id,
    type_desc,
    is_unique,
    fill_factor
from sys.indexes
where name = 'IX_MyTable1_MyIntCol';

-- now let's see some physical aspects
-- of this particular index
-- (I retrieved index_id from the above query)
select *
from sys.dm_db_index_physical_stats
(
    db_id('TestDB'),
    object_id('MyTable1'),
    4,
    null,
    'detailed'
);

เราสามารถเห็นได้จากตัวอย่างข้างต้นว่าดัชนีมีข้อมูลอยู่จริง (ขึ้นอยู่กับประเภทของดัชนีหน้าใบไม้จะแตกต่างกัน)

โพสต์นี้แสดงเฉพาะภาพรวมโดยย่อของSQL Server สองด้านที่มีขนาดใหญ่มาก ทั้งสองอย่างนี้อาจใช้เวลาเป็นบทและหนังสือ อ่านข้อมูลอ้างอิงจากนั้นคุณจะเข้าใจได้ดีขึ้น

— Thomas Stringer
แหล่งที่มา

ฉันรู้ว่านี่เป็นบทความเก่า แต่คิดว่าน่าสังเกตว่าการสร้างดัชนีจะ (โดยส่วนใหญ่) จะสร้างสถิติสำหรับดัชนีโดยอัตโนมัติ สิ่งเดียวกันไม่สามารถกล่าวได้ว่าเป็นการสร้างสถิติ

— Steve Mangiameli