-- Use this to generate an Oracle table/column summary data profiling set of sql DROP TABLE DATA_PROFILE_COL_SUM CASCADE CONSTRAINTS; CREATE TABLE DATA_PROFILE_COL_SUM ( TNAME VARCHAR2(50 BYTE), CNAME VARCHAR2(50 BYTE), DATA_TYPE VARCHAR2(50 BYTE), DATA_LENGTH INTEGER, DATA_PRECISION INTEGER, DATA_SCALE INTEGER, NULLABLE CHAR(1 BYTE), NUM_DISTINCT NUMBER(20), DENSITY NUMBER(20,8), NUM_NULLS NUMBER(20), COL_LENGTH INTEGER, AVG_COL_LENGTH NUMBER(20,2), CHAR_LENGTH INTEGER, TABLE_COMMENTS VARCHAR2(2048 BYTE), COLUMN_COMMENTS VARCHAR2(2048 BYTE), RCOUNT NUMBER(20), MIN_DATE DATE, MAX_DATE DATE, MIN_NUMBER NUMBER(30,10), MAX_NUMBER NUMBER(30,10), MIN_INTEGER INTEGER, MAX_INTEGER INTEGER, MIN_VARCHAR2 VARCHAR2(2048 BYTE), MAX_VARCHAR2 VARCHAR2(2048 BYTE), MIN_LEN INTEGER, MAX_LEN INTEGER, LAST_UPDATED DATE DEFAULT SYSDATE ) TABLESPACE USERS PCTUSED 0 PCTFREE 10 INITRANS 1 MAXTRANS 255 STORAGE ( INITIAL 64K MINEXTENTS 1 MAXEXTENTS UNLIMITED PCTINCREASE 0 BUFFER_POOL DEFAULT ) LOGGING NOCACHE NOPARALLEL; --prefix all the resulting queries from below with this... INSERT INTO DATA_PROFILE_COL_SUM ( tname , cname , data_type , data_length , data_precision , data_scale , nullable , num_distinct , density , num_nulls , col_length , avg_col_length , char_length , table_comments , column_comments , rcount , min_date , max_date , min_number , max_number , min_integer , max_integer , min_varchar2 , max_varchar2 , min_len , max_len , last_updated ) select 'select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || '''' || c.data_type || '''' || ' as data_type, ' || '''' || nvl(c.data_length, 0) || '''' || ' as data_length, ' || '''' || nvl(c.data_precision, 0) || '''' || ' as data_precision, ' || '''' || nvl(c.data_scale, 0) || '''' || ' as data_scale, ' || '''' || c.nullable || '''' || ' as nullable, ' || '''' || nvl(c.num_distinct,0) || '''' || ' as num_distinct, ' || '''' || nvl(c.density,0.0) || '''' || ' as density, ' || '''' || nvl(c.num_nulls,0) || '''' || ' as num_nulls, ' || '''' || nvl(c.char_col_decl_length,0) || '''' || ' as col_length, ' || '''' || nvl(c.avg_col_len,0) || '''' || ' as avg_col_length, ' || '''' || nvl(c.char_length,0) || '''' || ' as char_length, ' || '''' || nvl(tblc.comments, 'NONE') || '''' || ' as table_comments, ' || '''' || nvl(colc.comments, 'NONE') || '''' || ' as column_comments, ' || ' count(*) as rcount, min(' || c.column_name || ') as min_date, max(' || c.column_name || ') as max_date, ' || ' null as min_integer, null as max_integer, ' || ' null as min_number, null as max_number, ' || ' null as min_varchar2, null as max_varchar2, min(length(' || c.column_name || ')) as min_len, max(length(' || c.column_name || ')) as max_len, sysdate' || ' from ' || t.owner || '.' || c.table_name || ' ' || CHR(13) || chr(10) || ' UNION ALL ' || chr(13) || chr(10) -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- left outer join sys.all_tab_comments tblc on t.owner = tblc.owner and t.table_name = tblc.table_name -- --------------------------------- left outer join sys.all_col_comments colc on c.owner = colc.owner and c.table_name = colc.table_name and c.column_name = colc.column_name -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type = 'DATE' and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- union all -- --------------------------------- select 'select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || '''' || c.data_type || '''' || ' as data_type, ' || '''' || nvl(c.data_length, 0) || '''' || ' as data_length, ' || '''' || nvl(c.data_precision, 0) || '''' || ' as data_precision, ' || '''' || nvl(c.data_scale, 0) || '''' || ' as data_scale, ' || '''' || c.nullable || '''' || ' as nullable, ' || '''' || nvl(c.num_distinct,0) || '''' || ' as num_distinct, ' || '''' || nvl(c.density,0.0) || '''' || ' as density, ' || '''' || nvl(c.num_nulls,0) || '''' || ' as num_nulls, ' || '''' || nvl(c.char_col_decl_length,0) || '''' || ' as col_length, ' || '''' || nvl(c.avg_col_len,0) || '''' || ' as avg_col_length, ' || '''' || nvl(c.char_length,0) || '''' || ' as char_length, ' || '''' || nvl(tblc.comments, 'NONE') || '''' || ' as table_comments, ' || '''' || nvl(colc.comments, 'NONE') || '''' || ' as column_comments, ' || ' count(*) as rcount, null as min_date, null as max_date, ' || ' null as min_integer, null as max_integer, min(' || c.column_name || ') as min_number, max(' || c.column_name || ') as max_number, null as min_varchar2, null as max_varchar2, min(length(' || c.column_name || ')) as min_len, max(length(' || c.column_name || ')) as max_len, sysdate ' || ' from ' || t.owner || '.' || c.table_name || ' ' || CHR(13) || chr(10) || ' UNION ALL ' || chr(13) || chr(10) -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- left outer join sys.all_tab_comments tblc on t.owner = tblc.owner and t.table_name = tblc.table_name -- --------------------------------- left outer join sys.all_col_comments colc on c.owner = colc.owner and c.table_name = colc.table_name and c.column_name = colc.column_name -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type = 'NUMBER' and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- union all -- --------------------------------- select 'select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || '''' || c.data_type || '''' || ' as data_type, ' || '''' || nvl(c.data_length, 0) || '''' || ' as data_length, ' || '''' || nvl(c.data_precision, 0) || '''' || ' as data_precision, ' || '''' || nvl(c.data_scale, 0) || '''' || ' as data_scale, ' || '''' || c.nullable || '''' || ' as nullable, ' || '''' || nvl(c.num_distinct,0) || '''' || ' as num_distinct, ' || '''' || nvl(c.density,0.0) || '''' || ' as density, ' || '''' || nvl(c.num_nulls,0) || '''' || ' as num_nulls, ' || '''' || nvl(c.char_col_decl_length,0) || '''' || ' as col_length, ' || '''' || nvl(c.avg_col_len,0) || '''' || ' as avg_col_length, ' || '''' || nvl(c.char_length,0) || '''' || ' as char_length, ' || '''' || nvl(tblc.comments, 'NONE') || '''' || ' as table_comments, ' || '''' || nvl(colc.comments, 'NONE') || '''' || ' as column_comments, ' || ' count(*) as rcount, null as min_date, null as max_date, min(' || c.column_name || ') as min_integer, max(' || c.column_name || ') as max_integer, ' || ' null as min_number, null as max_number, ' || ' null as min_varchar2, null as max_varchar2, min(length(' || c.column_name || ')) as min_len, max(length(' || c.column_name || ')) as max_len, sysdate ' || ' from ' || t.owner || '.' || c.table_name || ' ' || CHR(13) || chr(10) || ' UNION ALL ' || chr(13) || chr(10) -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- left outer join sys.all_tab_comments tblc on t.owner = tblc.owner and t.table_name = tblc.table_name -- --------------------------------- left outer join sys.all_col_comments colc on c.owner = colc.owner and c.table_name = colc.table_name and c.column_name = colc.column_name -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type = 'INTEGER' and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- union all -- --------------------------------- select 'select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || '''' || c.data_type || '''' || ' as data_type, ' || '''' || nvl(c.data_length, 0) || '''' || ' as data_length, ' || '''' || nvl(c.data_precision, 0) || '''' || ' as data_precision, ' || '''' || nvl(c.data_scale, 0) || '''' || ' as data_scale, ' || '''' || c.nullable || '''' || ' as nullable, ' || '''' || nvl(c.num_distinct,0) || '''' || ' as num_distinct, ' || '''' || nvl(c.density,0.0) || '''' || ' as density, ' || '''' || nvl(c.num_nulls,0) || '''' || ' as num_nulls, ' || '''' || nvl(c.char_col_decl_length,0) || '''' || ' as col_length, ' || '''' || nvl(c.avg_col_len,0) || '''' || ' as avg_col_length, ' || '''' || nvl(c.char_length,0) || '''' || ' as char_length, ' || '''' || nvl(tblc.comments, 'NONE') || '''' || ' as table_comments, ' || '''' || nvl(colc.comments, 'NONE') || '''' || ' as column_comments, ' || ' count(*) as rcount, null as min_date, null as max_date, ' || ' null as min_integer, null as max_integer, ' || ' null as min_number, null as max_number, min(' || c.column_name || ') as min_varchar2, max(' || c.column_name || ') as max_varchar2, min(length(' || c.column_name || ')) as min_len, max(length(' || c.column_name || ')) as max_len, sysdate' || ' from ' || t.owner || '.' || c.table_name || ' ' || CHR(13) || chr(10) || ' UNION ALL ' || chr(13) || chr(10) -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- left outer join sys.all_tab_comments tblc on t.owner = tblc.owner and t.table_name = tblc.table_name -- --------------------------------- left outer join sys.all_col_comments colc on c.owner = colc.owner and c.table_name = colc.table_name and c.column_name = colc.column_name -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type in ('VARCHAR2', 'CHAR') and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) order by 1 ; -- drop any existing data profiling metadata detail table drop table DATA_PROFILE_COL_DTL CASCADE CONSTRAINTS; -- create new data profiling metadata detail table create table DATA_PROFILE_COL_DTL ( tname varchar(255) NOT NULL , cname varchar(255) NOT NULL , freq_rank integer NOT NULL , date_column_value date , integer_column_value integer , number_column_value number , varchar2_column_value varchar2(2048) , occurs integer , total_rows decimal(18,0) , occurs_percent decimal(18,5) , capture_date date ); -- whats there ? select * from DATA_PROFILE_COL_DTL -- remove them delete DATA_PROFILE_COL_DTL -- put this in front of the SQL statements generated below. insert INTO DATA_PROFILE_COL_DTL ( tname , cname , freq_rank , date_column_value , integer_column_value , number_column_value , varchar2_column_value , occurs , total_rows , occurs_percent , capture_date ) select ' select a.tname as table_name, a.cname as column_name, rownum as freq_rank, ' || 'a.column_value as date_column_value, ' || 'null as integer_column_value, null as number_column_value, null as varchar2_column_value, ' || ' a.occurs, t.total_rows, round((((a.occurs * 1.0) / (t.total_rows * 1.0)) * 100), 5) as occurs_percent, ' || ' a.capture_date' || ' from ( select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || c.column_name || ' as column_value, ' || ' count(*) as occurs, sysdate as capture_date' || ' from ' || t.owner || '.' || c.table_name || ' group by ' || c.column_name || ' order by count(*) desc )a inner join ' || ' ( select ''' || c.table_name || ''' as tname, count(*) as total_rows from ' || c.table_name || ') t ' || ' on a.tname = t.tname where rownum <= 100' || CHR(13) || chr(10) || ' UNION ALL ' -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type IN ('DATE') and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- UNION ALL -- --------------------------------- select ' select a.tname as table_name, a.cname as column_name, rownum as freq_rank, ' || 'null as date_column_value, ' || 'a.column_value as integer_column_value, null as number_column_value, null as varchar2_column_value, ' || ' a.occurs, t.total_rows, round((((a.occurs * 1.0) / (t.total_rows * 1.0)) * 100), 5) as occurs_percent, ' || ' a.capture_date' || ' from ( select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || c.column_name || ' as column_value, ' || ' count(*) as occurs, sysdate as capture_date' || ' from ' || t.owner || '.' || c.table_name || ' group by ' || c.column_name || ' order by count(*) desc )a inner join ' || ' ( select ''' || c.table_name || ''' as tname, count(*) as total_rows from ' || c.table_name || ') t ' || ' on a.tname = t.tname where rownum <= 100' || CHR(13) || chr(10) || ' UNION ALL ' -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type = 'INTEGER' and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- UNION ALL -- --------------------------------- select ' select a.tname as table_name, a.cname as column_name, rownum as freq_rank, ' || 'null as date_column_value, ' || 'null as integer_column_value, a.column_value as number_column_value, null as varchar2_column_value, ' || ' a.occurs, t.total_rows, round((((a.occurs * 1.0) / (t.total_rows * 1.0)) * 100), 5) as occurs_percent, ' || ' a.capture_date' || ' from ( select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || c.column_name || ' as column_value, ' || ' count(*) as occurs, sysdate as capture_date' || ' from ' || t.owner || '.' || c.table_name || ' group by ' || c.column_name || ' order by count(*) desc )a inner join ' || ' ( select ''' || c.table_name || ''' as tname, count(*) as total_rows from ' || c.table_name || ') t ' || ' on a.tname = t.tname where rownum <= 100' || CHR(13) || chr(10) || ' UNION ALL ' -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type in ('NUMBER', 'DECIMAL') and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) -- --------------------------------- UNION ALL -- --------------------------------- select ' select a.tname as table_name, a.cname as column_name, rownum as freq_rank, ' || 'null as date_column_value, ' || 'null as integer_column_value, null as number_column_value, a.column_value as varchar2_column_value, ' || ' a.occurs, t.total_rows, round((((a.occurs * 1.0) / (t.total_rows * 1.0)) * 100), 5) as occurs_percent, ' || ' a.capture_date' || ' from ( select ' || '''' || c.table_name || '''' || ' as tname, ' || '''' || c.column_name || '''' || ' as cname, ' || c.column_name || ' as column_value, ' || ' count(*) as occurs, sysdate as capture_date' || ' from ' || t.owner || '.' || c.table_name || ' group by ' || c.column_name || ' order by count(*) desc )a inner join ' || ' ( select ''' || c.table_name || ''' as tname, count(*) as total_rows from ' || c.table_name || ') t ' || ' on a.tname = t.tname where rownum <= 100' || CHR(13) || chr(10) || ' UNION ALL ' -- --------------------------------- from sys.all_tab_columns c -- --------------------------------- inner join sys.all_tables t on c.table_name = t.table_name and c.owner = t.owner -- --------------------------------- where c.owner in ('YOUR_SCHEMA_OWNER_HERE') and c.data_type in ('VARCHAR2', 'CHAR') and c.table_name in ( 'YOUR' , 'TABLES_TO_PROFILE' , 'LIST' , 'HERE' ) /* Don McMunn is a professional, business-focused consultant specializing in data warehousing and business intelligence design, development and delivery for over a decade with the Fortune 500 and SMB segments. He may be reached at: dmcmunn at ipcdesigns dot com */