No edit summary
No edit summary
Line 13: Line 13:
Using GemBox.Spreadsheet for Excel file processing:
Using GemBox.Spreadsheet for Excel file processing:


<syntaxhighlight lang="csharp">
 
using GemBox.Spreadsheet;
using GemBox.Spreadsheet;


Line 43: Line 43:
     return results;
     return results;
}
}
</syntaxhighlight>
 


=== CSV Files ===
=== CSV Files ===
Line 49: Line 49:
Using CsvHelper for CSV processing with flexible date handling:
Using CsvHelper for CSV processing with flexible date handling:


<syntaxhighlight lang="csharp">
 
using CsvHelper;
using CsvHelper;
using CsvHelper.Configuration;
using CsvHelper.Configuration;
Line 93: Line 93:
     }
     }
}
}
</syntaxhighlight>
 


=== XML Files ===
=== XML Files ===


<syntaxhighlight lang="csharp">
 
using System.Xml.Linq;
using System.Xml.Linq;
using System.Xml.Serialization;
using System.Xml.Serialization;
Line 124: Line 124:
     return serializer.Deserialize(stream) as T;
     return serializer.Deserialize(stream) as T;
}
}
</syntaxhighlight>
 


=== JSON Files ===
=== JSON Files ===


<syntaxhighlight lang="csharp">
 
using Newtonsoft.Json;
using Newtonsoft.Json;


Line 142: Line 142:
     return JsonConvert.DeserializeObject<List<T>>(json) ?? new List<T>();
     return JsonConvert.DeserializeObject<List<T>>(json) ?? new List<T>();
}
}
</syntaxhighlight>
 


=== Tab-Delimited and Fixed-Width Files ===
=== Tab-Delimited and Fixed-Width Files ===


<syntaxhighlight lang="csharp">
 
// Tab-delimited files
// Tab-delimited files
public List<Dictionary<string, string>> ImportTabDelimited(string filePath)
public List<Dictionary<string, string>> ImportTabDelimited(string filePath)
Line 204: Line 204:
     return results;
     return results;
}
}
</syntaxhighlight>
 


== Generic Import Script Template ==
== Generic Import Script Template ==


<syntaxhighlight lang="csharp">
 
using System;
using System;
using System.IO;
using System.IO;
Line 376: Line 376:
     }
     }
}
}
</syntaxhighlight>
 


== Import Mapping Configuration ==
== Import Mapping Configuration ==


<syntaxhighlight lang="csharp">
 
public class ImportMapping
public class ImportMapping
{
{
Line 403: Line 403:
     public int MaxErrors { get; set; } = 100;
     public int MaxErrors { get; set; } = 100;
}
}
</syntaxhighlight>
 


== Common Import Patterns ==
== Common Import Patterns ==
Line 409: Line 409:
=== Batch Processing ===
=== Batch Processing ===


<syntaxhighlight lang="csharp">
 
public async Task<ImportResult> BatchImportAsync<T>(
public async Task<ImportResult> BatchImportAsync<T>(
     IEnumerable<T> data,  
     IEnumerable<T> data,  
Line 446: Line 446:
     return result;
     return result;
}
}
</syntaxhighlight>
 


=== Duplicate Detection ===
=== Duplicate Detection ===


<syntaxhighlight lang="csharp">
 
public class DuplicateDetector<T>
public class DuplicateDetector<T>
{
{
Line 481: Line 481:
     }
     }
}
}
</syntaxhighlight>
 


=== Data Transformation Pipeline ===
=== Data Transformation Pipeline ===


<syntaxhighlight lang="csharp">
 
public class TransformPipeline<TSource, TTarget>
public class TransformPipeline<TSource, TTarget>
{
{
Line 520: Line 520:
     }
     }
}
}
</syntaxhighlight>
 


== Error Handling and Logging ==
== Error Handling and Logging ==


<syntaxhighlight lang="csharp">
 
public class ImportLogger
public class ImportLogger
{
{
Line 577: Line 577:
     public DateTime Timestamp { get; set; }
     public DateTime Timestamp { get; set; }
}
}
</syntaxhighlight>
 


== File Upload and Validation ==
== File Upload and Validation ==


<syntaxhighlight lang="csharp">
 
public class FileImportValidator
public class FileImportValidator
{
{
Line 697: Line 697:
     public void AddWarning(string message) => Warnings.Add(message);
     public void AddWarning(string message) => Warnings.Add(message);
}
}
</syntaxhighlight>
 


== Performance Optimization ==
== Performance Optimization ==
Line 703: Line 703:
=== Memory-Efficient Large File Processing ===
=== Memory-Efficient Large File Processing ===


<syntaxhighlight lang="csharp">
 
public class LargeFileProcessor
public class LargeFileProcessor
{
{
Line 727: Line 727:
     }
     }
}
}
</syntaxhighlight>
 


=== Parallel Processing ===
=== Parallel Processing ===


<syntaxhighlight lang="csharp">
 
public class ParallelImporter<T>
public class ParallelImporter<T>
{
{
Line 776: Line 776:
     public double SuccessRate => TotalCount > 0 ? (double)SuccessCount / TotalCount * 100 : 0;
     public double SuccessRate => TotalCount > 0 ? (double)SuccessCount / TotalCount * 100 : 0;
}
}
</syntaxhighlight>
 


== Best Practices ==
== Best Practices ==

Revision as of 03:06, 10 November 2025

File Import System Documentation

This page documents the file import infrastructure in ZinformAccounts, providing templates and patterns for implementing various file format imports including CSV, XML, Excel, JSON, and other data sources.

Overview

The import system provides a flexible framework for processing external data files into the ZinformAccounts database. All imports follow consistent patterns for error handling, validation, and transaction management.

Supported File Formats

Excel Files (.xlsx, .xls)

Using GemBox.Spreadsheet for Excel file processing:


using GemBox.Spreadsheet;

public List<T> ImportExcel<T>(string filePath) where T : new() {

   var workbook = ExcelFile.Load(filePath);
   var worksheet = workbook.Worksheets[0];
   var results = new List<T>();
   
   // Get header row to map columns
   int headerRowIndex = 0;
   var headers = new Dictionary<int, string>();
   
   for (int col = 0; col < worksheet.CalculateMaxUsedColumns(); col++)
   {
       var headerValue = worksheet.Cells[headerRowIndex, col].Value?.ToString();
       if (!string.IsNullOrEmpty(headerValue))
           headers[col] = headerValue;
   }
   
   // Process data rows
   for (int row = headerRowIndex + 1; row < worksheet.Rows.Count; row++)
   {
       var item = MapRowToObject<T>(worksheet.Rows[row], headers);
       if (item != null)
           results.Add(item);
   }
   
   return results;

}


CSV Files

Using CsvHelper for CSV processing with flexible date handling:


using CsvHelper; using CsvHelper.Configuration; using System.Globalization;

public List<T> ImportCsv<T>(string filePath, bool hasHeader = true) {

   var config = new CsvConfiguration(CultureInfo.InvariantCulture)
   {
       HasHeaderRecord = hasHeader,
       MissingFieldFound = null,
       BadDataFound = null,
       TrimOptions = TrimOptions.Trim
   };
   
   using var reader = new StreamReader(filePath);
   using var csv = new CsvReader(reader, config);
   
   // Register custom type converters for dates
   csv.Context.TypeConverterCache.AddConverter<DateTime>(new FlexibleDateConverter());
   
   return csv.GetRecords<T>().ToList();

}

public class FlexibleDateConverter : ITypeConverter {

   private readonly string[] _dateFormats = new[]
   {
       "dd/MM/yyyy",
       "d/M/yyyy",
       "yyyy-MM-dd",
       "dd-MM-yyyy",
       "dd.MM.yyyy"
   };
   
   public object ConvertFromString(string text, IReaderRow row, MemberMapData memberMapData)
   {
       if (DateTime.TryParseExact(text, _dateFormats, 
           CultureInfo.InvariantCulture, DateTimeStyles.None, out var date))
           return date;
       
       throw new InvalidOperationException($"Cannot parse date: {text}");
   }

}


XML Files

using System.Xml.Linq; using System.Xml.Serialization;

// Method 1: Using XDocument for flexible parsing public List<T> ImportXmlFlexible<T>(string filePath, Func<XElement, T> mapper) {

   var doc = XDocument.Load(filePath);
   var results = new List<T>();
   
   foreach (var element in doc.Descendants("Record"))
   {
       var item = mapper(element);
       if (item != null)
           results.Add(item);
   }
   
   return results;

}

// Method 2: Using XML Serialization for strongly-typed objects public T ImportXmlSerialized<T>(string filePath) where T : class {

   var serializer = new XmlSerializer(typeof(T));
   using var stream = File.OpenRead(filePath);
   return serializer.Deserialize(stream) as T;

}


JSON Files

using Newtonsoft.Json;

public T ImportJson<T>(string filePath) {

   var json = File.ReadAllText(filePath);
   return JsonConvert.DeserializeObject<T>(json);

}

public List<T> ImportJsonArray<T>(string filePath) {

   var json = File.ReadAllText(filePath);
   return JsonConvert.DeserializeObject<List<T>>(json) ?? new List<T>();

}


Tab-Delimited and Fixed-Width Files

// Tab-delimited files public List<Dictionary<string, string>> ImportTabDelimited(string filePath) {

   var lines = File.ReadAllLines(filePath);
   if (lines.Length == 0) return new List<Dictionary<string, string>>();
   
   var headers = lines[0].Split('\t');
   var results = new List<Dictionary<string, string>>();
   
   for (int i = 1; i < lines.Length; i++)
   {
       var values = lines[i].Split('\t');
       var record = new Dictionary<string, string>();
       
       for (int j = 0; j < Math.Min(headers.Length, values.Length); j++)
       {
           record[headers[j]] = values[j];
       }
       
       results.Add(record);
   }
   
   return results;

}

// Fixed-width files public class FixedWidthField {

   public string Name { get; set; }
   public int StartPosition { get; set; }
   public int Length { get; set; }
   public Type DataType { get; set; }

}

public List<Dictionary<string, object>> ImportFixedWidth(string filePath, List<FixedWidthField> fields) {

   var lines = File.ReadAllLines(filePath);
   var results = new List<Dictionary<string, object>>();
   
   foreach (var line in lines)
   {
       var record = new Dictionary<string, object>();
       
       foreach (var field in fields)
       {
           if (line.Length >= field.StartPosition + field.Length)
           {
               var value = line.Substring(field.StartPosition, field.Length).Trim();
               record[field.Name] = ConvertToType(value, field.DataType);
           }
       }
       
       results.Add(record);
   }
   
   return results;

}


Generic Import Script Template

using System; using System.IO; using System.Data; using System.Text; using System.Windows; using System.Linq; using System.Collections.Generic; using ZinBusinessLayer; using ZinDATA; using ZinBusinessLayer.Models;

public class GenericImportScripter {

   private BusinessLayerMain zinBL;
   private readonly List<string> errors = new List<string>();
   private readonly List<string> warnings = new List<string>();
   
   public bool Main(BusinessLayerMain bl, ZinParameters parameters, ZinParameters returnParameters)
   {
       zinBL = bl;
       
       try
       {
           // Get parameters
           var filename = parameters.GetItemString("FileName");
           var importType = parameters.GetItemString("ImportType");
           var validateOnly = parameters.GetItemBool("ValidateOnly") ?? false;
           
           // Determine file type and import
           var extension = Path.GetExtension(filename).ToLower();
           dynamic data = null;
           
           switch (extension)
           {
               case ".xlsx":
               case ".xls":
                   data = ImportExcel(filename);
                   break;
               case ".csv":
                   data = ImportCsv(filename);
                   break;
               case ".xml":
                   data = ImportXml(filename);
                   break;
               case ".json":
                   data = ImportJson(filename);
                   break;
               case ".txt":
                   data = DetermineAndImportText(filename);
                   break;
               default:
                   throw new NotSupportedException($"File type {extension} is not supported");
           }
           
           // Validate data
           if (!ValidateData(data))
           {
               returnParameters.SetItem("Errors", string.Join("\n", errors));
               return false;
           }
           
           // Process data if not validation only
           if (!validateOnly)
           {
               return ProcessData(data, importType);
           }
           
           returnParameters.SetItem("ValidationResult", "Data validated successfully");
           return true;
       }
       catch (Exception ex)
       {
           returnParameters.SetItem("Error", $"Import failed: {ex.Message}");
           LogError($"Import failed: {ex.ToString()}");
           return false;
       }
   }
   
   private bool ValidateData(dynamic data)
   {
       // Implement validation logic based on data type
       if (data == null || (data is ICollection coll && coll.Count == 0))
       {
           errors.Add("No data found in import file");
           return false;
       }
       
       // Add custom validation rules here
       return errors.Count == 0;
   }
   
   private bool ProcessData(dynamic data, string importType)
   {
       using (var transaction = zinBL.BeginTransaction())
       {
           try
           {
               int recordsProcessed = 0;
               int recordsFailed = 0;
               
               foreach (var record in data)
               {
                   try
                   {
                       ProcessRecord(record, importType);
                       recordsProcessed++;
                   }
                   catch (Exception ex)
                   {
                       recordsFailed++;
                       LogError($"Failed to process record: {ex.Message}");
                       
                       // Optionally continue or rollback on error
                       if (recordsFailed > 10) // Threshold for failures
                       {
                           transaction.Rollback();
                           return false;
                       }
                   }
               }
               
               transaction.Commit();
               LogInfo($"Import completed: {recordsProcessed} records processed, {recordsFailed} failed");
               return recordsFailed == 0;
           }
           catch (Exception ex)
           {
               transaction.Rollback();
               LogError($"Transaction failed: {ex.Message}");
               return false;
           }
       }
   }
   
   private void ProcessRecord(dynamic record, string importType)
   {
       // Implement record processing based on import type
       switch (importType)
       {
           case "Invoice":
               ProcessInvoiceRecord(record);
               break;
           case "Payment":
               ProcessPaymentRecord(record);
               break;
           case "Customer":
               ProcessCustomerRecord(record);
               break;
           case "Item":
               ProcessItemRecord(record);
               break;
           default:
               throw new NotSupportedException($"Import type {importType} is not supported");
       }
   }
   
   private void LogError(string message)
   {
       errors.Add(message);
       zinBL.LoggingFunctions?.LogError(message);
   }
   
   private void LogInfo(string message)
   {
       zinBL.LoggingFunctions?.LogInfo(message);
   }

}


Import Mapping Configuration

public class ImportMapping {

   public string SourceField { get; set; }
   public string TargetField { get; set; }
   public Type DataType { get; set; }
   public bool Required { get; set; }
   public object DefaultValue { get; set; }
   public Func<object, object> Transform { get; set; }
   public Func<object, bool> Validate { get; set; }

}

public class ImportConfiguration {

   public List<ImportMapping> Mappings { get; set; } = new List<ImportMapping>();
   public bool SkipEmptyRows { get; set; } = true;
   public bool TrimValues { get; set; } = true;
   public int HeaderRow { get; set; } = 0;
   public int DataStartRow { get; set; } = 1;
   public string DateFormat { get; set; } = "dd/MM/yyyy";
   public bool StopOnError { get; set; } = false;
   public int MaxErrors { get; set; } = 100;

}


Common Import Patterns

Batch Processing

public async Task<ImportResult> BatchImportAsync<T>(

   IEnumerable<T> data, 
   int batchSize = 100,
   Func<List<T>, Task<bool>> processBatch = null)

{

   var result = new ImportResult();
   var batch = new List<T>(batchSize);
   
   foreach (var item in data)
   {
       batch.Add(item);
       
       if (batch.Count >= batchSize)
       {
           var success = await processBatch(batch);
           if (success)
               result.SuccessCount += batch.Count;
           else
               result.FailureCount += batch.Count;
           
           batch.Clear();
       }
   }
   
   // Process remaining items
   if (batch.Count > 0)
   {
       var success = await processBatch(batch);
       if (success)
           result.SuccessCount += batch.Count;
       else
           result.FailureCount += batch.Count;
   }
   
   return result;

}


Duplicate Detection

public class DuplicateDetector<T> {

   private readonly Func<T, string> _keySelector;
   private readonly HashSet<string> _existingKeys;
   
   public DuplicateDetector(Func<T, string> keySelector)
   {
       _keySelector = keySelector;
       _existingKeys = new HashSet<string>();
   }
   
   public bool IsDuplicate(T item)
   {
       var key = _keySelector(item);
       return !_existingKeys.Add(key);
   }
   
   public List<T> RemoveDuplicates(IEnumerable<T> items)
   {
       var unique = new List<T>();
       
       foreach (var item in items)
       {
           if (!IsDuplicate(item))
               unique.Add(item);
       }
       
       return unique;
   }

}


Data Transformation Pipeline

public class TransformPipeline<TSource, TTarget> {

   private readonly List<Func<TSource, TSource>> _transformations = new();
   private Func<TSource, TTarget> _finalMapping;
   
   public TransformPipeline<TSource, TTarget> AddTransformation(Func<TSource, TSource> transform)
   {
       _transformations.Add(transform);
       return this;
   }
   
   public TransformPipeline<TSource, TTarget> SetMapping(Func<TSource, TTarget> mapping)
   {
       _finalMapping = mapping;
       return this;
   }
   
   public TTarget Process(TSource source)
   {
       var current = source;
       
       foreach (var transform in _transformations)
       {
           current = transform(current);
       }
       
       return _finalMapping(current);
   }
   
   public List<TTarget> ProcessBatch(IEnumerable<TSource> sources)
   {
       return sources.Select(Process).ToList();
   }

}


Error Handling and Logging

public class ImportLogger {

   private readonly BusinessLayerMain _bl;
   private readonly List<ImportError> _errors = new();
   private readonly StringBuilder _log = new();
   
   public ImportLogger(BusinessLayerMain bl)
   {
       _bl = bl;
   }
   
   public void LogError(int rowNumber, string field, string message, object value = null)
   {
       var error = new ImportError
       {
           RowNumber = rowNumber,
           Field = field,
           Message = message,
           Value = value?.ToString(),
           Timestamp = DateTime.Now
       };
       
       _errors.Add(error);
       
       var logMessage = $"Row {rowNumber}, Field '{field}': {message}";
       if (value != null)
           logMessage += $" (Value: {value})";
       
       _log.AppendLine(logMessage);
       _bl.LoggingFunctions?.LogError(logMessage);
   }
   
   public void ExportErrorReport(string filePath)
   {
       using var writer = new StreamWriter(filePath);
       writer.WriteLine("Row,Field,Error,Value,Time");
       
       foreach (var error in _errors)
       {
           writer.WriteLine($"{error.RowNumber},{error.Field},\"{error.Message}\",\"{error.Value}\",{error.Timestamp:yyyy-MM-dd HH:mm:ss}");
       }
   }

}

public class ImportError {

   public int RowNumber { get; set; }
   public string Field { get; set; }
   public string Message { get; set; }
   public string Value { get; set; }
   public DateTime Timestamp { get; set; }

}


File Upload and Validation

public class FileImportValidator {

   private readonly long _maxFileSize = 50 * 1024 * 1024; // 50MB
   private readonly string[] _allowedExtensions = { ".csv", ".xlsx", ".xls", ".xml", ".json", ".txt" };
   
   public ValidationResult ValidateFile(string filePath)
   {
       var result = new ValidationResult();
       
       // Check file exists
       if (!File.Exists(filePath))
       {
           result.AddError("File does not exist");
           return result;
       }
       
       // Check file extension
       var extension = Path.GetExtension(filePath).ToLower();
       if (!_allowedExtensions.Contains(extension))
       {
           result.AddError($"File type {extension} is not allowed");
       }
       
       // Check file size
       var fileInfo = new FileInfo(filePath);
       if (fileInfo.Length > _maxFileSize)
       {
           result.AddError($"File size exceeds maximum allowed size of {_maxFileSize / (1024 * 1024)}MB");
       }
       
       // Check file is readable
       try
       {
           using var stream = File.OpenRead(filePath);
           // Attempt to read first byte to ensure file is accessible
           stream.ReadByte();
       }
       catch (Exception ex)
       {
           result.AddError($"Cannot read file: {ex.Message}");
       }
       
       // Validate file content structure
       if (result.IsValid)
       {
           result = ValidateFileContent(filePath, extension);
       }
       
       return result;
   }
   
   private ValidationResult ValidateFileContent(string filePath, string extension)
   {
       var result = new ValidationResult();
       
       switch (extension)
       {
           case ".csv":
               ValidateCsvStructure(filePath, result);
               break;
           case ".xlsx":
           case ".xls":
               ValidateExcelStructure(filePath, result);
               break;
           case ".xml":
               ValidateXmlStructure(filePath, result);
               break;
           case ".json":
               ValidateJsonStructure(filePath, result);
               break;
       }
       
       return result;
   }
   
   private void ValidateCsvStructure(string filePath, ValidationResult result)
   {
       try
       {
           var lines = File.ReadAllLines(filePath).Take(10).ToList();
           
           if (lines.Count == 0)
           {
               result.AddError("CSV file is empty");
               return;
           }
           
           // Check for consistent column count
           var firstLineColumns = lines[0].Split(',').Length;
           for (int i = 1; i < lines.Count; i++)
           {
               var columnCount = lines[i].Split(',').Length;
               if (columnCount != firstLineColumns)
               {
                   result.AddWarning($"Inconsistent column count at line {i + 1}");
               }
           }
       }
       catch (Exception ex)
       {
           result.AddError($"Failed to validate CSV structure: {ex.Message}");
       }
   }

}

public class ValidationResult {

   public List<string> Errors { get; } = new List<string>();
   public List<string> Warnings { get; } = new List<string>();
   
   public bool IsValid => Errors.Count == 0;
   
   public void AddError(string message) => Errors.Add(message);
   public void AddWarning(string message) => Warnings.Add(message);

}


Performance Optimization

Memory-Efficient Large File Processing

public class LargeFileProcessor {

   public async Task ProcessLargeFileAsync(string filePath, Func<string, Task<bool>> processLine)
   {
       const int bufferSize = 65536; // 64KB buffer
       
       using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize, useAsync: true);
       using var reader = new StreamReader(fileStream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, bufferSize);
       
       string line;
       int lineNumber = 0;
       
       while ((line = await reader.ReadLineAsync()) != null)
       {
           lineNumber++;
           
           if (!await processLine(line))
           {
               throw new ImportException($"Failed to process line {lineNumber}");
           }
       }
   }

}


Parallel Processing

public class ParallelImporter<T> {

   public async Task<ImportResult> ImportParallelAsync(
       IEnumerable<T> data,
       Func<T, Task<bool>> processItem,
       int maxDegreeOfParallelism = 4)
   {
       var result = new ImportResult();
       var semaphore = new SemaphoreSlim(maxDegreeOfParallelism);
       var tasks = new List<Task<bool>>();
       
       foreach (var item in data)
       {
           await semaphore.WaitAsync();
           
           tasks.Add(Task.Run(async () =>
           {
               try
               {
                   return await processItem(item);
               }
               finally
               {
                   semaphore.Release();
               }
           }));
       }
       
       var results = await Task.WhenAll(tasks);
       
       result.SuccessCount = results.Count(r => r);
       result.FailureCount = results.Count(r => !r);
       
       return result;
   }

}

public class ImportResult {

   public int SuccessCount { get; set; }
   public int FailureCount { get; set; }
   public int TotalCount => SuccessCount + FailureCount;
   public double SuccessRate => TotalCount > 0 ? (double)SuccessCount / TotalCount * 100 : 0;

}


Best Practices

  1. Always validate before import
    • Check file format and structure
    • Validate required fields
    • Check data types and ranges
    • Identify duplicates
  1. Use transactions
    • Wrap imports in database transactions
    • Implement rollback on failure
    • Consider savepoints for partial commits
  1. Implement proper error handling
    • Log all errors with context
    • Provide detailed error reports
    • Allow partial success where appropriate
  1. Optimize for performance
    • Use batch processing for large datasets
    • Implement async/await patterns
    • Consider parallel processing where safe
    • Use streaming for large files
  1. Maintain data integrity
    • Check foreign key relationships
    • Validate business rules
    • Ensure referential integrity
    • Handle nullable fields properly
  1. Provide user feedback
    • Show progress indicators
    • Report success/failure counts
    • Export error logs
    • Allow preview before commit
  1. Security considerations
    • Validate file types and sizes
    • Scan for malicious content
    • Sanitize input data
    • Implement access controls
  1. Make imports resumable
    • Track processed records
    • Allow restart from failure point
    • Implement idempotent operations

See Also