-- ***************************************************************************** -- Don McMunn's FREE Data Profiling Script for SQL Server - Column Summary tables -- http://www.ipcdesigns.com/dim_date -- Last release: 16-Sep-2007 -- ***************************************************************************** if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[FKPROFILE_COL_SUM_METAKEY]') and OBJECTPROPERTY(id, N'IsForeignKey') = 1) ALTER TABLE [dbo].[PROFILE_COL_SUM] DROP CONSTRAINT FKPROFILE_COL_SUM_METAKEY GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[PKPROFILE_META]') and OBJECTPROPERTY(id, N'IsPrimaryKey') = 1) ALTER TABLE [dbo].[PROFILE_META] DROP CONSTRAINT PKPROFILE_META GO if exists (select * from dbo.sysobjects where id = object_id(N'dbo.PROFILE_META') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table dbo.PROFILE_META GO CREATE TABLE dbo.PROFILE_META ( PROFILE_META_KEY int IDENTITY(1,1) , PROFILE_DATE datetime NOT NULL CONSTRAINT DF_ETL_META_COL_SUM_CDTE DEFAULT (GETDATE()) , PROFILE_BY varchar(128) NOT NULL CONSTRAINT DF_ETL_META_COL_SUM_CBY DEFAULT (USER) , PROFILE_VERSION decimal(10, 5) NOT NULL , PROFILE_REASON varchar(255) NOT NULL , PROFILE_DESCRIPTION varchar(2048) NOT NULL ); ALTER TABLE dbo.PROFILE_META ADD CONSTRAINT PKProfile_Meta PRIMARY KEY CLUSTERED (PROFILE_META_KEY); if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[FKPROFILE_COL_SUM_METAKEY]') and OBJECTPROPERTY(id, N'IsForeignKey') = 1) ALTER TABLE [dbo].[PROFILE_COL_SUM] DROP CONSTRAINT FKPROFILE_COL_SUM_METAKEY GO if exists (select * from dbo.sysobjects where id = object_id(N'dbo.PROFILE_COL_SUM') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table dbo.PROFILE_COL_SUM GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[PKPROFILE_COL_SUM]') and OBJECTPROPERTY(id, N'IsPrimaryKey') = 1) ALTER TABLE [dbo].[PROFILE_COL_SUM] DROP CONSTRAINT PKPROFILE_COL_SUM GO if exists (select * from dbo.sysobjects where id = object_id(N'dbo.PROFILE_META') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table dbo.PROFILE_META GO CREATE TABLE dbo.PROFILE_META ( PROFILE_META_KEY int IDENTITY(1,1) , PROFILE_DATE datetime NOT NULL CONSTRAINT DF_ETL_META_COL_SUM_CDTE DEFAULT (GETDATE()) , PROFILE_BY varchar(128) NOT NULL CONSTRAINT DF_ETL_META_COL_SUM_CBY DEFAULT (USER) , PROFILE_VERSION decimal(10, 5) NOT NULL , PROFILE_REASON varchar(255) NOT NULL , PROFILE_DESCRIPTION varchar(2048) NOT NULL ); ALTER TABLE dbo.PROFILE_META ADD CONSTRAINT PKProfile_Meta PRIMARY KEY CLUSTERED (PROFILE_META_KEY); exec sp_addextendedproperty N'MS_Description', N'Captures metadata about each pass of data profiling; copyright c. 2007 IPC Designs, LLC; Author: Don McMunn;', N'user', N'dbo', N'table', N'PROFILE_META' -- , N'column', N'date_key' GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[FKPROFILE_COL_SUM_METAKEY]') and OBJECTPROPERTY(id, N'IsForeignKey') = 1) ALTER TABLE [dbo].[PROFILE_COL_SUM] DROP CONSTRAINT FKPROFILE_COL_SUM_METAKEY GO if exists (select * from dbo.sysobjects where id = object_id(N'dbo.PROFILE_COL_SUM') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table dbo.PROFILE_COL_SUM GO CREATE TABLE dbo.PROFILE_COL_SUM ( PROFILE_COL_SUM_KEY INT IDENTITY(1,1) , TNAME varchar (255) NOT NULL , CNAME varchar (255) NOT NULL , DATA_TYPE varchar (50) NOT NULL , DATA_LENGTH int NULL , DATA_PRECISION int NULL , DATA_SCALE int NULL , NULLABLE varchar (3) NULL , NUM_TABLE_ROWS decimal(18, 0) NULL , NUM_NULLS decimal(18, 0) NULL , NUM_DISTINCT_VALUES decimal(18, 0) NULL , DENSITY decimal(18, 5) NULL , MIN_DATA_LENGTH int NULL , MAX_DATA_LENGTH int NULL , AVG_DATA_LENGTH int NULL , MIN_DATE_VALUE datetime NULL , MAX_DATE_VALUE datetime NULL , MIN_NUMERIC_VALUE decimal(18, 5) NULL , MAX_NUMERIC_VALUE decimal(18, 5) NULL , MIN_ALPHANUM_VALUE varchar (2048) NULL , MAX_ALPHANUM_VALUE varchar (2048) NULL , NUMERIC_COUNT decimal(18) NULL , DATE_COUNT decimal(18) NULL , ALPHANUM_COUNT decimal(18) NULL , NOPRINT_COUNT decimal(18) NULL , TABLE_COMMENT varchar (512) NULL , COLUMN_COMMENT varchar (512) NULL , PROFILE_META_KEY int NOT NULL ) ON [PRIMARY] GO ALTER TABLE dbo.PROFILE_COL_SUM ADD CONSTRAINT PKProfile_Col_Sum PRIMARY KEY CLUSTERED (PROFILE_COL_SUM_KEY) GO ALTER TABLE dbo.PROFILE_COL_SUM ADD CONSTRAINT FKPROFILE_COL_SUM_METAKEY FOREIGN KEY (PROFILE_META_KEY) REFERENCES dbo.PROFILE_META (PROFILE_META_KEY) GO exec sp_addextendedproperty N'MS_Description', N'Captures metadata about each table/column in a data profiling pass; copyright c. 2007 IPC Designs, LLC; Author: Don McMunn;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM' -- , N'column', N'date_key' GO exec sp_addextendedproperty N'MS_Description', N'The database user who started the data profiling pass;', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_BY' GO exec sp_addextendedproperty N'MS_Description', N'Date and time when the data profiling pass was begun; defaults to current server date and time;', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_DATE' GO exec sp_addextendedproperty N'MS_Description', N'A description of what this data profiling pass is to address with its results;', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_DESCRIPTION' GO exec sp_addextendedproperty N'MS_Description', N'Integer identity; surrogate primary key; uniquely identifies each row in the data profile metadata table', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_META_KEY' GO exec sp_addextendedproperty N'MS_Description', N'The reason for performing this data profiling pass', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_REASON' GO exec sp_addextendedproperty N'MS_Description', N'A numeric value to be used in tagging this data profile pass;', N'user', N'dbo', N'table', N'PROFILE_META', N'column', N'PROFILE_VERSION' GO exec sp_addextendedproperty N'MS_Description', N'Number of rows containing ONLY an alphanumeric value that is not either a date or a numeric only value', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'ALPHANUM_COUNT' GO exec sp_addextendedproperty N'MS_Description', N'Average length of data values found in this column; mainly useful for string data types', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'AVG_DATA_LENGTH' GO exec sp_addextendedproperty N'MS_Description', N'Name of column in table being profiled', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'CNAME' GO exec sp_addextendedproperty N'MS_Description', N'Any comment associated with the current column;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'COLUMN_COMMENT' GO exec sp_addextendedproperty N'MS_Description', N'Maximum length of the column in bytes', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DATA_LENGTH' GO exec sp_addextendedproperty N'MS_Description', N'Numeric magnitude / vector length; for numeric columns only', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DATA_PRECISION' GO exec sp_addextendedproperty N'MS_Description', N'Number of decimal places for numeric columns;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DATA_SCALE' GO exec sp_addextendedproperty N'MS_Description', N'Name of the data type of the column', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DATA_TYPE' GO exec sp_addextendedproperty N'MS_Description', N'Number of rows containing ONLY a valid date value in this column', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DATE_COUNT' GO exec sp_addextendedproperty N'MS_Description', N'A ratio of non-null values to total rows in the table', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'DENSITY' GO exec sp_addextendedproperty N'MS_Description', N'maximum alphanumeric value found in a string column; based on default page', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MAX_ALPHANUM_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'Longest length of data value found in this column; mainly useful for string data types', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MAX_DATA_LENGTH' GO exec sp_addextendedproperty N'MS_Description', N'most recent data found in this date column;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MAX_DATE_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'largest numeric value found in a numeric column', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MAX_NUMERIC_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'minimum alphanumeric value found in a string column; based on default page', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MIN_ALPHANUM_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'Shortest length of data value found in this column; mainly useful for string data types', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MIN_DATA_LENGTH' GO exec sp_addextendedproperty N'MS_Description', N'oldest date value found in this column', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MIN_DATE_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'smallest numeric value found in a numeric column', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'MIN_NUMERIC_VALUE' GO exec sp_addextendedproperty N'MS_Description', N'Number of columns containing non-printable ASCII values; useful for identifying UNICODE or potential data quality issues', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NOPRINT_COUNT' GO exec sp_addextendedproperty N'MS_Description', N'Yes if the column allows NULL values; No otherwise', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NULLABLE' GO exec sp_addextendedproperty N'MS_Description', N'Total number of unique values found in the column at the time of the data profiling pass', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NUM_DISTINCT_VALUES' GO exec sp_addextendedproperty N'MS_Description', N'Total number of NULL values found in the column at the time of the data profiling pass', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NUM_NULLS' GO exec sp_addextendedproperty N'MS_Description', N'Total number of rows contained in the table at the time of the data profiling pass', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NUM_TABLE_ROWS' GO exec sp_addextendedproperty N'MS_Description', N'Number of rows containing ONLY numeric values in this column regardless of data type', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'NUMERIC_COUNT' GO exec sp_addextendedproperty N'MS_Description', N'Integer identity; surrogate primary key; uniquely identifies each row in the column data profile summary table', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'PROFILE_COL_SUM_KEY' GO exec sp_addextendedproperty N'MS_Description', N'A foreign key referencing an single row in the PROFILE_META table;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'PROFILE_META_KEY' GO exec sp_addextendedproperty N'MS_Description', N'Any comment associated with the current table;', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'TABLE_COMMENT' GO exec sp_addextendedproperty N'MS_Description', N'Name of table containing column being profiled', N'user', N'dbo', N'table', N'PROFILE_COL_SUM', N'column', N'TNAME' GO -- ***************************************************************************** -- Don McMunn's FREE Data Profiling Script for SQL Server - Column Summary tables -- http://www.ipcdesigns.com/dim_date -- Last release: 16-Sep-2007 -- ***************************************************************************** Don McMunn's Data Profiling Script generator (Crude Software can be helpful) -- -- 1. configure your query tool (SQL Query Analyzer) to output to text -- 2. Plug in the table name you want -- 3. Run this query -- 4. Capture the output of this query and execute it in another query window -- 5. Voila! very crude, summary level data profiling for all date-related columns in the table -- 6. Use this with the DDL script and you have a simple data profiling summary -- -- //////////////////////////////////////////////////////////// -- Data Profiling for: -- String data types (char, varchar, excludes UNICODE) -- Date data types: smalldatetime, datetime, timestamp -- Numeric data types: tinyint, smallint, int, integer, bigint, currency, smallmoney, money, numeric, decimal) -- //////////////////////////////////////////////////////////// select cast('SELECT ''' + o.name + ''' as TNAME, ''' + c.name + ''' as CNAME, ''' + t.name + ''' as DATA_TYPE,' as varchar(2048)) as p1 , cast( cast(c.length as varchar(10)) + ' as DATA_LENGTH, ' + cast(c.xprec as varchar(10)) + ' as DATA_PRECISION, ' + cast(c.xscale as varchar(10)) + ' as DATA_SCALE, ''' + case when c.isnullable = 1 then 'YES' else 'NO' end + ''' as NULLABLE ' as varchar(2048)) as P2 , cast( ', count(*) as NUM_TABLE_ROWS, ' + ' sum(case when ' + c.name + ' is null then 1 else 0 end) as NUM_NULLS,' + ' count(distinct ' + c.name + ') as NUM_DISTINCT_VALUES, ' as varchar(2048)) as P3 , cast( '(sum(case when ' + c.name + ' is not null then 1 else 0 end) * 1.0) / (count(*) * 1.0) as DENSITY, ' as varchar(2048)) as P4 , cast( ' min(datalength(' + c.name + ')) as MIN_DATA_LENGTH, max(datalength(' + c.name + ')) as MAX_DATA_LENGTH,' as varchar(2048)) as P5 , cast( ' (sum(case when ' + c.name + ' is not null then datalength(' + c.name + ') else 0 end) / coalesce(sum(case when ' + c.name + ' is not null then 1 else 0 end),1)) as AVG_DATA_LENGTH, ' as varchar(2048)) as P6 , cast( ' NULL as MIN_DATE_VALUE, NULL as MAX_DATE_VALUE, ' as varchar(2048)) as P7 , cast( ' NULL AS MIN_NUMERIC_VALUE, NULL AS MAX_NUMERIC_VALUE,' as varchar(2048)) as P8 , cast( ' min(' + c.name + ') as MIN_ALPHANUM_VALUE, max( ' + c.name + ') as MAX_ALPHANUM_VALUE, ' as varchar(2048)) as P9 , cast(+ ' sum(isnumeric(' + c.name + ')) as NUMERIC_COUNT, ' + ' sum(isdate(' + c.name + ')) as DATE_COUNT, ' as varchar(2048)) as P10 , cast(+ ' sum(case when (isdate('+ c.name + ') + isnumeric(' + c.name + ')) > 0 then 0 else 1 end) as ALPHANUM_COUNT ' + ' from ' + o.name + char(13) + char(10) + ' union all ' + char(13) + char(10) as varchar(2048)) as P11 from syscolumns c inner join sysobjects o on c.[id] = o.[id] inner join systypes t on c.xtype = t.xtype where o.xtype = 'U' and t.name in ('char', 'character', 'varchar') -- and o.[name] = 'dim_day' -- <== put your tablename here if you want union all select cast('SELECT ''' + o.name + ''' as TNAME, ''' + c.name + ''' as CNAME, ''' + t.name + ''' as DATA_TYPE,' as varchar(2048)) as p1 , cast( cast(c.length as varchar(10)) + ' as DATA_LENGTH, ' + cast(c.xprec as varchar(10)) + ' as DATA_PRECISION, ' + cast(c.xscale as varchar(10)) + ' as DATA_SCALE, ''' + case when c.isnullable = 1 then 'YES' else 'NO' end + ''' as NULLABLE ' as varchar(2048)) as P2 , cast( ', count(*) as NUM_TABLE_ROWS, ' + ' sum(case when ' + c.name + ' is null then 1 else 0 end) as NUM_NULLS,' + ' count(distinct ' + c.name + ') as NUM_DISTINCT_VALUES, ' as varchar(2048)) as P3 , cast( '(sum(case when ' + c.name + ' is not null then 1 else 0 end) * 1.0) / (coalesce(nullif(count(*),0),1) * 1.0) as DENSITY, ' as varchar(2048)) as P4 , cast( ' min(datalength(' + c.name + ')) as MIN_DATA_LENGTH, max(datalength(' + c.name + ')) as MAX_DATA_LENGTH,' as varchar(2048)) as P5 , cast( ' (sum(case when ' + c.name + ' is not null then datalength(' + c.name + ') else 0 end) / coalesce(nullif(sum(case when ' + c.name + ' is not null then 1 else 0 end),0),1)) as AVG_DATA_LENGTH, ' as varchar(2048)) as P6 , cast( 'min( ' + c.name + ') as MIN_DATE_VALUE, max( ' + c.name + ') as MAX_DATE_VALUE, ' as varchar(2048)) as P7 , cast( ' NULL AS MIN_NUMERIC_VALUE, NULL AS MAX_NUMERIC_VALUE,' as varchar(2048)) as P8 , cast( ' NULL as MIN_ALPHANUM_VALUE, NULL as MAX_ALPHANUM_VALUE, ' as varchar(2048)) as P9 , cast(+ ' 0 as NUMERIC_COUNT, ' + ' sum(case when ' + c.name + ' is not null then 1 else 0 end) as DATE_COUNT, ' as varchar(2048)) as P10 , cast(+ ' 0 as ALPHANUM_COUNT ' + ' from ' + o.name + char(13) + char(10) + ' union all ' + char(13) + char(10) as varchar(2048)) as P11 from syscolumns c inner join sysobjects o on c.[id] = o.[id] inner join systypes t on c.xtype = t.xtype where o.xtype = 'U' and t.name in ('smalldatetime', 'datetime', 'timestamp') -- and o.[name] = 'dim_day' -- <== put your tablename here if you want union all select cast('SELECT ''' + o.name + ''' as TNAME, ''' + c.name + ''' as CNAME, ''' + t.name + ''' as DATA_TYPE,' as varchar(2048)) as p1 , cast( cast(c.length as varchar(10)) + ' as DATA_LENGTH, ' + cast(c.xprec as varchar(10)) + ' as DATA_PRECISION, ' + cast(c.xscale as varchar(10)) + ' as DATA_SCALE, ''' + case when c.isnullable = 1 then 'YES' else 'NO' end + ''' as NULLABLE ' as varchar(2048)) as P2 , cast( ', count(*) as NUM_TABLE_ROWS, ' + ' sum(case when ' + c.name + ' is null then 1 else 0 end) as NUM_NULLS,' + ' count(distinct ' + c.name + ') as NUM_DISTINCT_VALUES, ' as varchar(2048)) as P3 , cast( '(sum(case when ' + c.name + ' is not null then 1 else 0 end) * 1.0) / (coalesce(nullif(count(*),0),1) * 1.0) as DENSITY, ' as varchar(2048)) as P4 , cast( ' min(datalength(' + c.name + ')) as MIN_DATA_LENGTH, max(datalength(' + c.name + ')) as MAX_DATA_LENGTH,' as varchar(2048)) as P5 , cast( ' (sum(case when ' + c.name + ' is not null then datalength(' + c.name + ') else 0 end) / coalesce(nullif(sum(case when ' + c.name + ' is not null then 1 else 0 end),0),1)) as AVG_DATA_LENGTH, ' as varchar(2048)) as P6 , cast( 'NULL as MIN_DATE_VALUE, NULL as MAX_DATE_VALUE, ' as varchar(2048)) as P7 , cast( ' min( ' + c.name + ') as MIN_NUMERIC_VALUE, max( ' + c.name + ') as MAX_NUMERIC_VALUE,' as varchar(2048)) as P8 , cast( ' NULL as MIN_ALPHANUM_VALUE, NULL as MAX_ALPHANUM_VALUE, ' as varchar(2048)) as P9 , cast(+ 'sum(case when ' + c.name + ' is not null then 1 else 0 end) as NUMERIC_COUNT, ' + ' 0 as DATE_COUNT, ' as varchar(2048)) as P10 , cast(+ ' 0 as ALPHANUM_COUNT ' + ' from ' + o.name + char(13) + char(10) + ' union all ' + char(13) + char(10) as varchar(2048)) as P11 from syscolumns c inner join sysobjects o on c.[id] = o.[id] inner join systypes t on c.xtype = t.xtype where o.xtype = 'U' and t.name in ('tinyint', 'smallint', 'int', 'integer', 'bigint', 'currency', 'smallmoney', 'money', 'numeric', 'decimal') -- and o.[name] = 'dim_day' -- <== put your tablename here if you want /* Don McMunn is a professional, business-focused consultant specializing in data warehousing and business intelligence design, development and delivery for over a decade with the Fortune 500 and SMB segments. He may be reached at: dmcmunn at ipcdesigns dot com */