Saturday, July 18, 2009

Outer join data tables using LINQ

Hi,

i had a problem last time to outer join data from two data tables.

the functionality to just join just using SQL on the database is easy, but i had to do this on the data that comes not from the database, but from the sharepoiont lists!

In this case SQL can not help, but LINQ is great. Below my code to perform outer join on twa datables:

   1:  /// 
   2:  /// Performs outer join on two DataTables.
   3:  /// 
   4:  /// Return one DataTable that are created based on two DataTables. Outer join.
   5:  private static DataTable OuterJoin(DataTable table1, DataTable table2, string table1_key_column, string table2_key_column)
   6:  {
   7:      var dtBase = table2.AsEnumerable();
   8:   
   9:      //dynamic LINQ - query from left to right
  10:      var query = dtBase.GroupJoin(table1.AsEnumerable(),
  11:      br => new
  12:      {
  13:          id = br.Field(table2_key_column)
  14:      },    
  15:   
  16:      jr => new
  17:      {
  18:          id = jr.Field(table1_key_column)
  19:   
  20:      },
  21:   
  22:      (baseRow, joinRow) => joinRow.DefaultIfEmpty()
  23:      .Select(row => new
  24:      {
  25:          flatRow = baseRow.ItemArray.Concat((row == null) ? new object[table1.Columns.Count] : row.ItemArray).ToArray()
  26:      })
  27:      ).SelectMany(s => s);
  28:   
  29:      var dtBase2 = table1.AsEnumerable();
  30:   
  31:      //dynamic LINQ - query from right to left
  32:      var query2 = dtBase2.GroupJoin(table2.Select(),
  33:      br => new
  34:      {
  35:          id = br.Field(table1_key_column)
  36:      },
  37:      jr => new
  38:      {
  39:          id = jr.Field(table2_key_column)
  40:      },
  41:      (baseRow, joinRow) => joinRow.DefaultIfEmpty()
  42:      .Select(row => new
  43:      {
  44:          flatRow = (row == null) ? new object[table2.Columns.Count].ToArray().Concat(baseRow.ItemArray).ToArray() : row.ItemArray.Concat(baseRow.ItemArray).ToArray()
  45:      })
  46:      ).SelectMany(s => s);
  47:   
  48:   
  49:      //do UNION operation on two join results - create one result
  50:      query = query.Union(query2);
  51:   
  52:      //create new data table with all columns for LINQ query results
  53:      DataTable flatDataTable = new DataTable();
  54:   
  55:   
  56:      //add column from first joined table
  57:      foreach (DataColumn column in table2.Columns)
  58:      {
  59:          flatDataTable.Columns.Add(new DataColumn(column.ToString()));
  60:      }
  61:   
  62:      //add column from second joined table
  63:      foreach (DataColumn column in table1.Columns)
  64:      {
  65:          flatDataTable.Columns.Add(new DataColumn(column.ToString()));
  66:      }
  67:   
  68:   
  69:      //load results from LINQ IEnumerable to DataTable
  70:      foreach (var result in query)
  71:      {
  72:          flatDataTable.LoadDataRow(result.flatRow, false);
  73:      }
  74:   
  75:      //do DISTINCT operation on results
  76:      IEnumerable rows = flatDataTable.Select().Distinct(DataRowComparer.Default);
  77:   
  78:   
  79:      //TODO: why Distinct return Rows[] not DataTable??? :(
  80:      //create one datatable for final results and populate with data
  81:   
  82:      DataTable distinctFlatDataTable = flatDataTable.Clone();
  83:      distinctFlatDataTable.Rows.Clear();
  84:   
  85:      foreach (DataRow row in rows)
  86:      {
  87:          distinctFlatDataTable.ImportRow(row);
  88:      }
  89:   
  90:   
  91:      //return flat DataTable
  92:      return distinctFlatDataTable;
  93:  }