Monday, April 20, 2020

Split CSV files c#

Splitting logic of a CSV files into n number of files with .csv extension

Code Snippet:
for (int i = 1; i <= noOfFiles; i++)
{
    take = i == 1 ? finsPerCsv + mod : finsPerCsv;
    var fins = AllFins.GetRange(beginIndex, take);
    groupByfins.Add(fins);

    List<string> listw = masterData_raw.Where(item => col1Data.Any(category => category.col1.Equals(CSVParser2.Split(item)[0].Replace("\"", ""))));
    List<Items> list = masterData_cls.Where(item => col1Data.Any(category => category.col1.Equals(item.col1)));
   var csvFormattedData = list.Select(str =>
   "\"" + str.col1
   + "\",\"" + str.col2
   + "\",\"" + str.col3
   + "\",\"" + str.col4
   + "\",\"" + str.col5)
       .ToArray();
   csvHeader.CopyTo(csvFormattedData, 0);
   File.WriteAllLines(filepath, csvFormattedData);

   beginIndex = beginIndex + take;
}


Read CSV File c#

File Class - It is having methods for creation, copying, deletion, moving and opening of a single file.

Code Snippet:
List<string> masterDataList = File.ReadLines(filepath).ToList();

System.Text.RegularExpressions.Regex CSVParser = new System.Text.RegularExpressions.Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");

var columnData = masterDataList.Select(line =>
{
    String[] split = CSVParser.Split(line);
    string column = split[6].Replace("\"", "");
    return column;
}).ToList<string>();




Scenarios

var masterList = File.ReadLines(filePath).ToList();
System.Text.RegularExpressions.Regex CSVParser = new System.Text.RegularExpressions.Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
List<Items> items_MasterData = File.ReadLines(filePath).Select(line =>
{
Items ch = new Items();
System.Text.RegularExpressions.Regex CSVParser = new System.Text.RegularExpressions.Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
String[] split = CSVParser.Split(line);
       ch.col1 = split[0].Replace("\"", "");
       ch.col2 = split[1].Replace("\"", "");
       return ch;
}).ToList<Items>(); 

// CSV files having multiple rows based on col1, so grouping them by col1 data
var group = (from d in items_MasterData
             select new Items
             {
                 col1 = d.col1,
                 col2 = d.col2
             }).GroupBy(n => new { n.col1 })
               .Select(g => g.FirstOrDefault());

//int countCol1Data = items_MasterData.Select(x => x.col1).Distinct().Count();
           
List<Items> col1Data = items_MasterData.GroupBy(n => new { n.col1 }).Select(g => g.FirstOrDefault()).ToList();