28 August, 2013
3 Comments
1 category
If you want to import flat files (e.g. *.csv) that contain rows with dynamic column count, the best way is to use this codeplex component: http://ssisdfs.codeplex.com/
If you don’t want to use that component, here is a script task that alters the import file, before the flat file source executes.
public void Main() { Dts.TaskResult = (int)ScriptResults.Success; // Files should contain 46 columns int maxColumnCount = 46; // Determines if an event can be fired more than once for the duration of the current execution. Boolean fireAgain = true; Dts.Events.FireInformation(0, "Info: ", "Update file process started (update file so it contains the correct column count per row).", string.Empty, 0, ref fireAgain); // Get file path. string importFile = Dts.Variables[@"User::FileToImport"].Value.ToString(); // Check if file exists. if (System.IO.File.Exists(importFile)) { // Will contain the new file content with corrected column count. var newFileContents = new System.Text.StringBuilder(string.Empty); using (var streamReader = new System.IO.StreamReader(spiFile)) { // Continue reading until end of file. while (!streamReader.EndOfStream) { // Read a line. string line = streamReader.ReadLine(); // Ignore empty lines (empty lines will not be added to the new file contents). if (!string.IsNullOrEmpty(line)) { // Split on ",", so we can count columns. Use string[] for performance reasons (later the array is used to join). string[] lineColumns = line.Split(','); // Check if current row contains less columns, then max column count. bool lessColumns = (lineColumns.Length < maxColumnCount); if (lessColumns) { // Add column until maxColumnCount. Array.Resize(ref lineColumns, maxColumnCount); } // Store only max column count in new file contents (ignoring extra columns). string newLine = string.Join(",", lineColumns, 0, maxColumnCount); newFileContents.AppendLine(newLine); } } } // Save new file contents. System.IO.File.WriteAllText(importFile, newFileContents.ToString()); } else { Dts.Events.FireInformation(0, "Info: ", string.Format("File: {0} not found.", importFile), string.Empty, 0, ref fireAgain); Dts.TaskResult = (int)ScriptResults.Failure; } Dts.Events.FireInformation(0, "Info: ", "Update file process completed.", string.Empty, 0, ref fireAgain); }
Tags: SSIS
Category: Uncategorized
using (var streamReader = new System.IO.StreamReader(spiFile))
–where does this spiFile come in? I’m not sure what it is referencing
@Errol I believe that should be
using (var streamReader = new System.IO.StreamReader(importFile))
instead.
This is not a great solution. If there is a comma inside a column (meaning wrapped with quotes), which is correct csv, this will mistake it for a column and mess things up. Basically, it is not handling csv format correctly.
I have adjusted this and using MS VB lib to properly get the columns:
using Microsoft.VisualBasic;
using System.Text;
public void Main()
{
Dts.TaskResult = (int)ScriptResults.Success;
// Files should contain 11 columns
int maxColumnCount = 11;
// Determines if an event can be fired more than once for the duration of the current execution.
Boolean fireAgain = true;
Dts.Events.FireInformation(0, “Info: “, “Update file process started (update file so it contains the correct column count per row).”, string.Empty, 0, ref fireAgain);
// Get file path.
string importFile = Dts.Variables[@”User::InputPathFile”].Value.ToString();
// Check if file exists.
if (System.IO.File.Exists(importFile))
{
// Will contain the new file content with corrected column count.
var newFileContents = new System.Text.StringBuilder(string.Empty);
using (Microsoft.VisualBasic.FileIO.TextFieldParser MyReader = new Microsoft.VisualBasic.FileIO.TextFieldParser(importFile))
{
MyReader.TextFieldType = Microsoft.VisualBasic.FileIO.FieldType.Delimited;
MyReader.SetDelimiters(“,”);
string[] currentRow = null;
while (!MyReader.EndOfData)
{
try
{
currentRow = MyReader.ReadFields();
string newLine = null;
for (int i = 0; i < maxColumnCount; i++)
{
newLine += StringToCSVCell(currentRow[i]);
if (i != 10) newLine += ",";
}
newFileContents.AppendLine(newLine);
}
catch (Microsoft.VisualBasic.FileIO.MalformedLineException ex)
{
Dts.Events.FireInformation(0, "Info: ", "Line " + ex.Message + "is not valid and will be skipped.", string.Empty, 0, ref fireAgain);
}
}
}
// Save new file contents.
System.IO.File.WriteAllText(importFile, newFileContents.ToString());
}
else
{
Dts.Events.FireInformation(0, "Info: ", string.Format("File: {0} not found.", importFile), string.Empty, 0, ref fireAgain);
Dts.TaskResult = (int)ScriptResults.Failure;
}
Dts.Events.FireInformation(0, "Info: ", "Update file process completed.", string.Empty, 0, ref fireAgain);
}
///
/// Turn a string into a CSV cell output
///
/// String to output
/// The CSV cell formatted string
public static string StringToCSVCell(string str)
{
bool mustQuote = (str.Contains(“,”) || str.Contains(“\””) || str.Contains(“\r”) || str.Contains(“\n”));
if (mustQuote)
{
StringBuilder sb = new StringBuilder();
sb.Append(“\””);
foreach (char nextChar in str)
{
sb.Append(nextChar);
if (nextChar == ‘”‘)
sb.Append(“\””);
}
sb.Append(“\””);
return sb.ToString();
}
return str;
}