How to handle variable column count with the SSIS 2008 Flat File Source.

If you want to import flat files (e.g. *.csv) that contain rows with dynamic column count, the best way is to use this codeplex component: http://ssisdfs.codeplex.com/

 

If you don’t want to use that component, here is a script task that alters the import file, before the flat file source executes.

 

 

public void Main()
{
Dts.TaskResult = (int)ScriptResults.Success;
// Files should contain 46 columns
int maxColumnCount = 46;
// Determines if an event can be fired more than once for the duration of the current execution.
Boolean fireAgain = true;
Dts.Events.FireInformation(0, "Info: ", "Update file process started (update file so it contains the correct column count per row).", string.Empty, 0, ref fireAgain);
// Get file path.
string importFile = Dts.Variables[@"User::FileToImport"].Value.ToString();
// Check if file exists.
if (System.IO.File.Exists(importFile))
{
// Will contain the new file content with corrected column count.
var newFileContents = new System.Text.StringBuilder(string.Empty);
using (var streamReader = new System.IO.StreamReader(spiFile))
{
// Continue reading until end of file.
while (!streamReader.EndOfStream)
{
// Read a line.
string line = streamReader.ReadLine();
// Ignore empty lines (empty lines will not be added to the new file contents).
if (!string.IsNullOrEmpty(line))
{
// Split on ",", so we can count columns. Use string[] for performance reasons (later the array is used to join).
string[] lineColumns = line.Split(',');
// Check if current row contains less columns, then max column count.
bool lessColumns = (lineColumns.Length < maxColumnCount);
if (lessColumns)
{
// Add column until maxColumnCount.
Array.Resize(ref lineColumns, maxColumnCount);
}
// Store only max column count in new file contents (ignoring extra columns).
string newLine = string.Join(",", lineColumns, 0, maxColumnCount);
newFileContents.AppendLine(newLine);
}
}
}
// Save new file contents.
System.IO.File.WriteAllText(importFile, newFileContents.ToString());
}
else
{
Dts.Events.FireInformation(0, "Info: ", string.Format("File: {0} not found.", importFile), string.Empty, 0, ref fireAgain);
Dts.TaskResult = (int)ScriptResults.Failure;
}
Dts.Events.FireInformation(0, "Info: ", "Update file process completed.", string.Empty, 0, ref fireAgain);
}

3 Comments

  1. Anthony

    This is not a great solution. If there is a comma inside a column (meaning wrapped with quotes), which is correct csv, this will mistake it for a column and mess things up. Basically, it is not handling csv format correctly.

    I have adjusted this and using MS VB lib to properly get the columns:

    using Microsoft.VisualBasic;
    using System.Text;

    public void Main()
    {
    Dts.TaskResult = (int)ScriptResults.Success;
    // Files should contain 11 columns
    int maxColumnCount = 11;
    // Determines if an event can be fired more than once for the duration of the current execution.
    Boolean fireAgain = true;
    Dts.Events.FireInformation(0, “Info: “, “Update file process started (update file so it contains the correct column count per row).”, string.Empty, 0, ref fireAgain);
    // Get file path.
    string importFile = Dts.Variables[@”User::InputPathFile”].Value.ToString();
    // Check if file exists.
    if (System.IO.File.Exists(importFile))
    {
    // Will contain the new file content with corrected column count.
    var newFileContents = new System.Text.StringBuilder(string.Empty);

    using (Microsoft.VisualBasic.FileIO.TextFieldParser MyReader = new Microsoft.VisualBasic.FileIO.TextFieldParser(importFile))
    {
    MyReader.TextFieldType = Microsoft.VisualBasic.FileIO.FieldType.Delimited;
    MyReader.SetDelimiters(“,”);
    string[] currentRow = null;
    while (!MyReader.EndOfData)
    {
    try
    {
    currentRow = MyReader.ReadFields();
    string newLine = null;
    for (int i = 0; i < maxColumnCount; i++)
    {
    newLine += StringToCSVCell(currentRow[i]);
    if (i != 10) newLine += ",";
    }
    newFileContents.AppendLine(newLine);
    }
    catch (Microsoft.VisualBasic.FileIO.MalformedLineException ex)
    {
    Dts.Events.FireInformation(0, "Info: ", "Line " + ex.Message + "is not valid and will be skipped.", string.Empty, 0, ref fireAgain);
    }
    }
    }
    // Save new file contents.
    System.IO.File.WriteAllText(importFile, newFileContents.ToString());
    }
    else
    {
    Dts.Events.FireInformation(0, "Info: ", string.Format("File: {0} not found.", importFile), string.Empty, 0, ref fireAgain);
    Dts.TaskResult = (int)ScriptResults.Failure;
    }
    Dts.Events.FireInformation(0, "Info: ", "Update file process completed.", string.Empty, 0, ref fireAgain);
    }

    ///
    /// Turn a string into a CSV cell output
    ///
    /// String to output
    /// The CSV cell formatted string
    public static string StringToCSVCell(string str)
    {
    bool mustQuote = (str.Contains(“,”) || str.Contains(“\””) || str.Contains(“\r”) || str.Contains(“\n”));
    if (mustQuote)
    {
    StringBuilder sb = new StringBuilder();
    sb.Append(“\””);
    foreach (char nextChar in str)
    {
    sb.Append(nextChar);
    if (nextChar == ‘”‘)
    sb.Append(“\””);
    }
    sb.Append(“\””);
    return sb.ToString();
    }

    return str;
    }

Leave a Reply

Your email address will not be published. Required fields are marked *