One thing that every data warehouse needs is an ability to handle unknown data. This is done by creating a record in the dimension table to handle the unknowns. Then when we load a fact table we map any values we cannot find a match for to this unknown record.
One of the things I do is populate the Unknown record in the ETL so I know it is always there. One of the things I dislike is creating the SQL INSERT statement for every dimension. Writing this statement once isn’t a huge burden but typically dimension tables evolve and updating these statements multiple times becomes tedious.
Recently I have been working on a project with a lot of dynamic SQL and it got me thinking that it was time to construct the INSERT statement for the Unknown record on the fly.
I created an SSIS Script Task in C# and used SMO to access the table that I was loading. I then gathered up the columns in the table along with their corresponding data type. As I iterated over the columns I populated a prefix string to hold the column definition and a suffix string to hold the values to be inserted. I used a method to lookup the columns data type and return the desired value.
The result is the script below:
public void Main() { Boolean bProcessLive = Convert.ToBoolean(Dts.Variables["etlProcessLive"].Value); int ErrorNumber = 9000; try { string sSQLServer = this.Dts.Connections["OLEDB_Connection"].Properties["ServerName"].GetValue(Dts.Connections["OLEDB_Connection"]).ToString(); if (bProcessLive == false) MessageBox.Show("Server Name: " + sSQLServer, "SQL Objects"); string sSQLDatabase = this.Dts.Connections["OLEDB_Connection"].Properties["InitialCatalog"].GetValue(Dts.Connections["OLEDB_Connection"]).ToString(); if (bProcessLive == false) MessageBox.Show("Database Name: " + sSQLDatabase, "SQL Objects"); // Connect to the instance of SQL Server. Server oSQLServer = new Server(sSQLServer); // Reference the database. Database db = oSQLServer.Databases[sSQLDatabase]; // Reference the table String sDestTable = Dts.Variables["etlDestinationTable"].Value.ToString(); // Bug in SMO doesn't return schema data. ConnectID:696411 // The following line won't work // Table table = db.Tables["dimDate","dbo"]; // Iterate through the tables in database and script each one. Display the script. foreach (Table tb in db.Tables) { Boolean bResult = tb.ToString() == sDestTable.ToString(); if (bProcessLive == false) MessageBox.Show("Enumerated Table: " + tb.ToString() + "Destination Table: " + sDestTable + " Result: " + bResult.ToString(), "SQL Objects"); // Check if the table is our destination table and that it is not a system table if (bResult == true && tb.IsSystemObject == false) { string sql; if (bProcessLive == false) MessageBox.Show("Scripting columns for table: " + tb.Name, "SQL Objects"); // Count the columns on the table int colCount = tb.Columns.Count; if (bProcessLive == false) MessageBox.Show("Column Count: " + colCount.ToString(), "Column Count"); string InsertColumns = ""; string ValuesColumns = ""; // Script out each column while (colCount > 0) { colCount--; MessageBox.Show("Column: " + tb.Columns[colCount].Name + "; Data Type: " + tb.Columns[colCount].DataType, "Column Information"); // Create Columns portion of Insert statement if (InsertColumns == "") InsertColumns = "[" + tb.Columns[colCount].Name + "]"; else InsertColumns = InsertColumns + ",[" + tb.Columns[colCount].Name + "]"; // Create Values portion of Insert statement if (ValuesColumns == "") ValuesColumns = "'" + GetValue(tb.Columns[colCount].DataType.ToString(), 0, 0) + "'"; else ValuesColumns = ValuesColumns + ",'" + GetValue(tb.Columns[colCount].DataType.ToString(), 0, 0) + "'"; } sql = "INSERT INTO " + sDestTable + " (" + InsertColumns + " ) VALUES (" + ValuesColumns + ")"; MessageBox.Show(sql, "SQL Statement"); db.ExecuteNonQuery(sql); } } } catch (Exception ex) { if (bProcessLive == false) MessageBox.Show(ex.Message, "Script Task Failure"); Dts.Events.FireError(ErrorNumber, "Script Task Failure", ex.Message, ex.HelpLink, 0); Dts.TaskResult = (int)ScriptResults.Failure; } Dts.TaskResult = (int)ScriptResults.Success; } private string GetValue(string datatype, int length, int precision) { string colvalue; switch (datatype) { case "bit": colvalue = "0"; break; case "tinyint": colvalue = "0"; break; case "smallint": colvalue = "-1"; break; case "int": colvalue = "-1"; break; case "bigint": colvalue = "-1"; break; case "varchar": colvalue = "Unknown"; break; case "nvarchar": colvalue = "Unknown"; break; case "date": colvalue = "1/1/1900"; break; case "time": colvalue = "00:00:00.0000000"; break; case "datetime": colvalue = "1900-01-01 00:00:00.000"; break; case "datetime2": colvalue = "1900-01-01 00:00:00.0000000"; break; default: colvalue = "-1"; break; } return colvalue; }