我在Azure数据工厂中有两个SQL插入过程。第一个是插入更改,第二个是插入新行,以防发现新CSV文件中不存在新行。尤其是第二次插入过程太慢了。
作为背景,dbo.reservations2表中每天大约有50-100万行。插入的表在20万到200万行之间变化。当前,第一个过程大约需要2.5分钟,这当然是可以的。但是第二个似乎要花几个小时。第二个所需的速度将低于一小时。
要进一步指定,将出现3种时间范围,一种为7天,第二种为14天,最后为30天。 30天更新仅一天一次,每小时14天一次,每15分钟更新7天。在30天的更新中,有一个时隙,每天早晨约1.5个小时什么都没有发生
我尝试了以下索引,但不确定它们是否有帮助。至少第二个程序实际上在测试时减慢了第一个程序的插入速度(即使Azure数据工作室的Explain功能确实建议这样做)。
CREATE NONCLUSTERED INDEX reservations_date_custom on dbo.reservations2 (datetime) include (dv_id,dv_datahash,id,unit,room,duration,specialist,specialisation1,specialisation2,specialisation3,timetype)
CREATE NONCLUSTERED INDEX reservations_date_custom2 on dbo.reservations2 (datetime) include (dv_datahash,dv_load_time,id)
CREATE NONCLUSTERED INDEX staging_id on staging.reservations (id)
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[dv_insert]
AS
INSERT INTO
dbo.reservations2 (
dv_id,dv_expir_time,datetime,timetype,reserved
)
SELECT
dv_id,reserved
FROM (
SELECT
CONVERT(VARCHAR(32),HashBytes('MD5',UPPER(IsnULL(id,'-1'))),2) AS dv_id,CONVERT(VARCHAR(32),UPPER(IsnULL(CAST(id AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(unit AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(room AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(datetime AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(duration AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(specialist AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(specialisation1 AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(specialisation2 AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(specialisation3 AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(timetype AS nvarchar(max)),'-1') + '~' + IsnULL(CAST(reserved AS nvarchar(max)),2) AS dv_datahash,GETDATE() AS dv_load_time,null as dv_expir_time,id AS id,unit AS unit,room AS room,datetime AS datetime,duration AS duration,specialist AS specialist,timetype AS timetype,reserved AS reserved
FROM staging.reservations2) t1
WHERE t1.dv_datahash NOT IN (
Select dv_datahash
From (Select dv_datahash,row_number() over(partition by id order by dv_load_time desc) as ranking
from dbo.reservations2
Where datetime >= (Select min([datetime]) from staging.reservations2)) as t2
Where t2.ranking = '1'
)
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[dv_insert_deleted]
AS
INSERT INTO
dbo.reservations2 (
dv_id,reserved
FROM (
SELECT
dv_id,Getdate() AS dv_load_time,2 AS reserved
FROM dbo.reservations2) t3
WHERE t3.[datetime] >= (Select min([datetime]) from staging.reservations2)
AND t3.[datetime] <= (Select max([datetime]) from staging.reservations2)
AND t3.id NOT in(
SELECT distinct id
FROM staging.reservations2)
AND t3.id Not in(
Select id
From (Select reserved,row_number() over(partition by id order by dv_load_time desc) as ranking
from dbo.reservations2
Where datetime >= (Select min([datetime]) from staging.reservations2)
AND datetime <= (Select max([datetime]) from staging.reservations2)) as t2
Where t2.ranking = '1'
AND reserved = '2')