Exception when Dispose() method is called on SpreadsheetDocument object with large dataset
SDNA-ALP opened this issue · comments
Describe the bug
When i generate an Excel file with many rows (more than 8000 i'd say) and many columns (more than 17) i got an exception when i want to save the document with the Dispose() method of the Spreadsheetdocument object. Up to now i got two different exceptions (seen in different computers) : One exception is about the Isolated Storage and the other is about a wrong hexadecimal value.
It should be noted that all works well with small dataset. If my document is generated with 6000 rows for example there will be no exceptions.
To Reproduce
Here is a simplified view of my code :
- I call the zInitializeClass() method
- Then the CreateWorkbook() method
- Then the SetRangeValue2() method
- And then the Save method that will call the Dispose() method on the Spreadsheetdocument object
/// <summary></summary>
private void zInitializeClass()
{
// Declare the variables
try
{
// Create the Spreadsheet document
oExcelApp = SpreadsheetDocument.Create(PathFileName, SpreadsheetDocumentType.Workbook, true);
}
catch
{
}
}
/// <summary></summary>
public void CreateWorkbook()
{
// Add a WorkbookPart to the document if necessary
oWorkbookPart ??= GetWorkbookPart();
// Add a Workbook if necessary
oWorkbookPart.Workbook ??= new Workbook();
// Add a WorksheetPart to the WorkbookPart
oWorksheetPart = oWorkbookPart.AddNewPart<WorksheetPart>();
// Create new SheetData
oSheetData = new();
// Add a Worksheet into the the WorksheetPart
oWorksheetPart.Worksheet ??= new Worksheet();
// Append SheetData
oWorksheetPart.Worksheet.Append(oSheetData);
// Add Sheets to the Workbook
oSheets = oExcelApp.WorkbookPart.Workbook.AppendChild(new Sheets());
// Append a new worksheet and associate it with the workbook.
oSheet = new()
{
Id = oExcelApp.WorkbookPart.GetIdOfPart(oWorksheetPart),
SheetId = 1,
Name = "Sheet1"
};
// Create the Cell
oCell = new(ref oWorkbookPart, ref oWorksheetPart);
}
/// <summary></summary>
public WorkbookPart GetWorkbookPart()
{
// Declare the variables
WorkbookPart mResult;
try
{
// Get the WorkwookPart
mResult = oExcelApp?.WorkbookPart;
// Create a new WorkbookPart if needed
mResult ??= oExcelApp.AddWorkbookPart();
// Return the result
return mResult;
}
catch
{
// As error occurred, return the default value
return null;
}
}
/// <summary></summary>
public void SetRangeValue2(string pStartCell, string pEndCell, object[,] Data)
{
// Declare the variables
Cell mCell;
Row mRow;
StringBuilder mCellText;
int mStartRowIndex;
int mStartColIndex;
int mEndRowIndex;
int mEndColIndex;
try
{
// Get the start indexes
mStartRowIndex = clsOpenXMLHelper.GetRowIndex(pStartCell);
mStartColIndex = clsOpenXMLHelper.GetColIndex(pStartCell);
// Get the end indexes
mEndRowIndex = clsOpenXMLHelper.GetRowIndex(pEndCell);
mEndColIndex = clsOpenXMLHelper.GetColIndex(pEndCell);
// Go through the rows
for (int mRowIndex = mStartRowIndex; mRowIndex <= mEndRowIndex; mRowIndex++)
{
// Get the Row
mRow = clsOpenXMLHelper.GetRow(oSheetData, mRowIndex);
// Go through the cols
for (int mColIndex = mStartColIndex; mColIndex <= mEndColIndex; mColIndex++)
{
// Get the cell reference
mCell = clsOpenXMLHelper.GetCell(mRow, mRowIndex, mColIndex);
// Set the CellValue
mCellText = new(Data[mRowIndex - 1, mColIndex - 1].ToString());
// Add the value
mCell.CellValue = new CellValue(mCellText.ToString());
// Apply the regular style
mCell.StyleIndex = Convert.ToUInt32(clsOpenXMLHelper.eSTYLEINDEX.CALIBRI_11_REGULAR);
// Set the DataType
mCell.DataType = new EnumValue<CellValues>(CellValues.String);
}
}
}
catch
{
}
}
/// <summary></summary>
public void Save(bool pShowExcel)
{
// Declare the variables
try
{
// Check if the object is null
if (oSheets == null || oWorkbookPart == null || oExcelApp == null) return;
// Add the Sheet
oSheets.Append(oSheet);
// Ignore all format Excel Warning
oWorksheetPart.Worksheet.Append(mIgnoredErrors);
// Save the Workbook
oWorkbookPart.Workbook.Save();
// Dispose the object
oExcelApp.Dispose();
// Check if the Excel document has to be displayed
if (pShowExcel)
{
// Display the Excel document
Process.Start(PathFileName);
}
}
catch
{
}
}
/// <summary></summary>
public static Row GetRow(SheetData SheetData, int pRow)
{
// Declare the variables
Row mRow;
try
{
// Find the row or create it if it doesn't exist
mRow = SheetData?.Elements<Row>().FirstOrDefault(r => r.RowIndex == pRow);
// Check if the object is null
if (mRow == null)
{
// Create the Row
mRow = new Row { RowIndex = (uint)pRow };
// Add a new child
SheetData.AppendChild(mRow);
}
// Return the result
return mRow;
}
catch
{
// As error occurred, return the default value
return null;
}
}
/// <summary></summary>
public static Cell GetCell(Row Row, int pRow, int pCol)
{
// Declare the variables
Cell mCell;
string mCellReference;
try
{
// Get the CellReference
mCellReference = GetCellReference(pRow, pCol);
// Find the cell in the row or create it if it doesn't exist
mCell = Row?.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == mCellReference);
// Check if the object is null
if (mCell == null)
{
// Create new object instance
mCell = new Cell
{
CellReference = mCellReference,
DataType = CellValues.String
};
// Add a new cell to the row
Row.InsertAt(mCell, GetCellIndex(Row, pCol));
}
// Return the result
return mCell;
}
catch
{
// As error occurred, return the default value
return null;
}
}
Steps to reproduce the behavior:
The Data object in SetRangeValue2() method has to be big, at least [8000, 17].
Observed behavior
In some case the Excel document will be readable in despite of the error and in other cases the document will be corrupted (Excel is not able to open the file). It seems that the behavior is not the same on all PCs i tested (powerful computers, virtual computers and on slow computers).
Expected behavior
The Excel file has to be readable, not corrupted and the Dispose() method of the Spreadsheetdocument object has to save the document properly without throwing errors.
Desktop (please complete the following information):
- OS: Windows 11
- Office version : Microsoft 365 MSO (Version 2311 Build 16.0.17029.20028) 64-bit
- .NET Target: 4.8
- DocumentFormat.OpenXml Version: 3.0.0
Additional context
I noticed that the Excel generation is very low compared to Microsoft.Interrop API. While we have a fast SetRange method in Interrop, in OpenXML it seems that we have to loop on the rows and then the columns in order to fill all the cells.
I have created a Test project in order to reproduce these issues and i have succeeded to solve one of the two issues i had.
The issue was because of multiple saves in the document :
/// <summary></summary>
public void SetValueAddress(int pRow, int pCol, string pValue)
{
// Declare the variables
Cell mCell;
// Get the Cell
mCell = clsOpenXMLHelper.GetCell(oSheetData, pRow, pCol);
// Set the value
mCell.CellValue = new CellValue(pValue);
// Source of error ↓
//// Save the changes into the WorksheetPart
//oWorksheetPart.Worksheet.Save();
//// Save the changes into the Workbook part
//oWorkbookPart.Workbook.Save();
// Source of error ↑
}
I was looping on each Header column in order to create formated cells and each time i was saving the Worksheet and the Workbook. Without these savings the document has no Isolate Storage errors when the Dispose method is called.
Now the only error i still have is an error related to the data. If the text of one of my cells is filled with this value "\u001f" it will throws an hexadecimal error :
System.ArgumentException HResult=0x80070057 Message='�', hexadecimal value 0x1F, is an invalid character. Source=System.Xml StackTrace: at System.Xml.XmlUtf8RawTextWriter.InvalidXmlChar(Int32 ch, Byte* pDst, Boolean entitize) in System.Xml\XmlUtf8RawTextWriter.cs:line 1187
In order to fix this issue i'm going to try to sanitize the data before i put them in the cells.
If you want to reproduce this hexadecimal issue, here is my complete test project :
/// <summary></summary>
private void cmdRun_Click(object sender, EventArgs e)
{
// Declare the variables
SpreadsheetDocument mSpreadsheetDocument = null;
WorkbookPart mWorkbookPart;
WorksheetPart mWorksheetPart;
SheetData mSheetData;
Sheets mSheets;
Sheet mSheet;
Row mRow;
Cell mCell;
Columns mColumns;
Column mColumn;
StringBuilder mCellText;
WorkbookStylesPart mWorkbookStylesPart;
string[,] mData;
string mPathFileName;
try
{
// Get the path
mPathFileName = txtBoxFilePath.Text;
// Get the Rows and Columns
int.TryParse(txtBoxRows.Text, out int mRows);
int.TryParse(txtBoxColumns.Text, out int mCols);
// Create new object instances
mSpreadsheetDocument = SpreadsheetDocument.Create(mPathFileName, SpreadsheetDocumentType.Workbook, true);
mData = new string[mRows, mCols];
mColumns = new Columns();
// Add the WorkbookPart
mWorkbookPart = mSpreadsheetDocument.AddWorkbookPart();
// Add a new Workbook
mWorkbookPart.Workbook = new Workbook();
// Add a new WorksheetPart to the WorkbookPart
mWorksheetPart = mWorkbookPart.AddNewPart<WorksheetPart>();
// Create a new SheetData
mSheetData = new SheetData();
// Add a new Worksheet into the WorksheetPart
mWorksheetPart.Worksheet = new Worksheet();
// Go through the columns
for (int mCounter = 0; mCounter < mCols; mCounter++)
{
// Create new object instance
mColumn = new Column() { Min = Convert.ToUInt32(mCounter + 1), Max = Convert.ToUInt32(mCounter + 1), Width = 20, CustomWidth = true };
// Append the Column
mColumns.Append(mColumn);
}
// Append the columns
mWorksheetPart.Worksheet.Append(mColumns);
// Add the SheetData to the tree
mWorksheetPart.Worksheet.Append(mSheetData);
// Create and add Sheets to the Workbook
mSheets = mSpreadsheetDocument.WorkbookPart.Workbook.AppendChild(new Sheets());
// Add a Sheet to the Workbook
mSheet = new Sheet()
{
Id = mSpreadsheetDocument.WorkbookPart.GetIdOfPart(mWorksheetPart),
SheetId = 1,
Name = "Sheet1",
};
// Create the Style sheet
mWorkbookStylesPart = mWorkbookPart.AddNewPart<WorkbookStylesPart>();
mWorkbookPart.WorkbookStylesPart.Stylesheet = new Stylesheet();
// Create the WorksheetStyle
zCreateWorksheetStyle(mWorkbookStylesPart);
// Add the headers
// Generate the Data
zGenerateData(mRows, mCols, mData);
// Initialize the ProgressBar
Pgb.Minimum = 0;
Pgb.Maximum = mRows;
Pgb.Step = 1;
// Go through the rows
for (int mRowIndex = 0; mRowIndex < mRows; mRowIndex++)
{
// Get the current Row or create it
mRow = zGetRow(mSheetData, mRowIndex + 1);
// Go through the cols
for (int mColIndex = 0; mColIndex < mCols; mColIndex++)
{
// Get the current Cell or create it
mCell = zGetCell(mRow, mRowIndex + 1, mColIndex + 1);
// Get the cell text
mCellText = new StringBuilder(mData[mRowIndex, mColIndex].ToString());
// Set the cell value
mCell.CellValue = new CellValue(mCellText.ToString());
// Set the style
mCell.StyleIndex = 6;
// Set the cell type
mCell.DataType = new EnumValue<CellValues>(CellValues.String);
}
// Increment the ProgressBar
Pgb.PerformStep();
}
// Reset the ProgressBar
Pgb.Value = Pgb.Minimum;
// Add the Sheet to the tree
mSheets.Append(mSheet);
// Save the changes
mSpreadsheetDocument.Dispose();
// Display the document
Process.Start(mPathFileName);
}
catch (Exception ex)
{
// Check if the object is null
if (mSpreadsheetDocument != null)
{
// Dispose the object
mSpreadsheetDocument.Dispose();
}
// Display the error
MessageBox.Show(ex.Message);
}
}
/// <summary></summary>
private void zGenerateData(int pRows, int pColumns, string[,] Data)
{
// Go through the rows
for (int mRowIndex = 0; mRowIndex < pRows; mRowIndex++)
{
// Go through the columns
for (int mColIndex = 0; mColIndex < pColumns; mColIndex++)
{
// Fill each cell with the value "Value {row + 1} {col + 1}".
Data[mRowIndex, mColIndex] = $"\u001fValue {mRowIndex + 1} {mColIndex + 1}";
}
}
}
/// <summary></summary>
private Row zGetRow(SheetData SheetData, int pRow)
{
// Declare the variables
Row mRow;
try
{
// Find the row or create it if it doesn't exist
mRow = SheetData?.Elements<Row>().FirstOrDefault(r => r.RowIndex == pRow);
// Check if the object is null
if (mRow == null)
{
// Create the Row
mRow = new Row { RowIndex = (uint)pRow };
// Add a new child
SheetData.AppendChild(mRow);
}
// Return the result
return mRow;
}
catch
{
// As error occurred, return the default value
return null;
}
}
/// <summary></summary>
private Cell zGetCell(Row Row, int pRow, int pCol)
{
// Declare the variables
Cell mCell;
string mCellReference;
try
{
// Get the CellReference
mCellReference = zGetCellReference(pRow, pCol);
// Find the cell in the row or create it if it doesn't exist
mCell = Row?.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == mCellReference);
// Check if the object is null
if (mCell == null)
{
// Create new object instance
mCell = new Cell
{
CellReference = mCellReference,
DataType = CellValues.String
};
// Add a new cell to the row
Row.InsertAt(mCell, zGetCellIndex(Row, pCol));
}
// Return the result
return mCell;
}
catch
{
// As error occurred, return the default value
return null;
}
}
/// <summary>Helper method to get the cell reference (e.g., "A1", "B2")</summary>
private string zGetCellReference(int pRow, int pCol)
{
// Return the Column name
return $"{zGetColumnName(pCol)}{pRow}";
}
/// <summary>Helper method to get the column name based on the column index</summary>
private string zGetColumnName(int pCol)
{
// Declare the variables
int mDividend;
string mColumnName;
int mModulo;
// Initialize the variables
mDividend = pCol;
mColumnName = string.Empty;
// While the dividend is greater than 0
while (mDividend > 0)
{
// Get the dividend
mModulo = (mDividend - 1) % 26;
mColumnName = Convert.ToChar(65 + mModulo) + mColumnName;
mDividend = (mDividend - mModulo) / 26;
}
// Return the result
return mColumnName;
}
/// <summary>Helper method to get the cell index within the row</summary>
private int zGetCellIndex(Row row, int pCol)
{
// Declare the variables
int mIndex;
int mCurrentCol;
// Initialize the variables
mIndex = 0;
// Go thourh all the Cells
foreach (Cell cell in row.Elements<Cell>())
{
// Get the current Col
mCurrentCol = zGetColumnIndex(cell.CellReference.Value);
// Check if the current col is higher then the Col
if (mCurrentCol > pCol)
{
// Return the index
return mIndex;
}
// Increment the index
mIndex++;
}
// Return the index
return mIndex;
}
/// <summary>Helper method to get the column index based on the cell reference</summary>
private int zGetColumnIndex(string pCellReference)
{
// Declare the variables
string columnName;
int columnIndex;
// Initialize the variables
columnName = Regex.Replace(pCellReference, @"\d", "");
columnIndex = 0;
// Go through the letters
for (int i = 0; i < columnName.Length; i++)
{
// Get the column index
columnIndex = columnIndex * 26 + (columnName[i] - 'A' + 1);
}
// Return the column index
return columnIndex;
}
private WorkbookStylesPart zCreateWorksheetStyle(WorkbookStylesPart WorkbookStylesPart)
{
// Declare the variables
Stylesheet workbookstylesheet;
Font mFont0;
FontName mArialFont;
FontSize mSizeArial;
Font mFont1;
Bold mBold;
Font mFont2;
FontName mCalibriFont;
FontSize mSizeCalibri;
Fonts mFonts;
Fill mFill0;
Fills mFills;
Border mBorder0;
Borders mBorders;
CellFormats mCellFormats;
CellFormat mCellFormat0_Regular_Arial_11;
CellFormat mCellFormat1_Bold_Arial_11;
CellFormat mCellFormat2_Regular_Calibri_11;
CellFormat mCellFormat3_DateFormat_Calibri_11;
CellFormat mCellFormat4_DoubleFormat_Calibri_11;
CellFormat mCellFormat5_IntegerFormat_Calibri_11;
CellFormat mCellFormat6_TextFormat_Calibri_11;
try
{
// Create new object instance
workbookstylesheet = new Stylesheet();
// Add the regular font
mFont0 = new Font(); // Default font
mArialFont = new FontName() { Val = "Arial" };
mSizeArial = new FontSize() { Val = 11 };
mFont0.Append(mArialFont);
mFont0.Append(mSizeArial);
// Add a Bolded font
mFont1 = new Font();
mBold = new Bold();
mFont1.Append(mBold);
// Add a Calibri font
mFont2 = new Font();
mCalibriFont = new FontName() { Val = "Calibri" };
mSizeCalibri = new FontSize() { Val = 11 };
mFont2.Append(mCalibriFont);
mFont2.Append(mSizeCalibri);
// Append all fonts
mFonts = new Fonts();
mFonts.Append(mFont0);
mFonts.Append(mFont1);
mFonts.Append(mFont2);
//Append fills - a must, in my case just default
mFill0 = new Fill();
mFills = new Fills();
mFills.Append(mFill0);
// Append borders - a must, in my case just default
mBorder0 = new Border();
mBorders = new Borders();
mBorders.Append(mBorder0);
// Create new object instance
mCellFormats = new CellFormats();
// Create the different styles (the order is not important)
mCellFormat0_Regular_Arial_11 = new CellFormat() { FontId = 0, FillId = 0, BorderId = 0 };
mCellFormat1_Bold_Arial_11 = new CellFormat() { FontId = 1 };
mCellFormat2_Regular_Calibri_11 = new CellFormat() { FontId = 2, FillId = 0, BorderId = 0 };
mCellFormat3_DateFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 14, FormatId = 0, ApplyNumberFormat = true };
mCellFormat4_DoubleFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 4, FormatId = 0, ApplyNumberFormat = true };
mCellFormat5_IntegerFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 3, FormatId = 0, ApplyNumberFormat = true };
mCellFormat6_TextFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 49, FormatId = 0, ApplyNumberFormat = true };
// Append the CellFormats (the order is important and related to the eSTYLE_INDEX enumeration !)
mCellFormats.Append(mCellFormat0_Regular_Arial_11); // ARIAL_11_REGULAR
mCellFormats.Append(mCellFormat1_Bold_Arial_11); // ARIAL_11_BOLD
mCellFormats.Append(mCellFormat2_Regular_Calibri_11); // CALIBRI_11_REGULAR
mCellFormats.Append(mCellFormat3_DateFormat_Calibri_11); // CALIBRI_11_DATE_FORMAT
mCellFormats.Append(mCellFormat4_DoubleFormat_Calibri_11); // CALIBRI_11_DOUBLE_FORMAT
mCellFormats.Append(mCellFormat5_IntegerFormat_Calibri_11); // CALIBRI_11_INTEGER_FORMAT
mCellFormats.Append(mCellFormat6_TextFormat_Calibri_11); // CALIBRI_11_TEXT_FORMAT
// Append everyting to stylesheet - Preserve the ORDER !
workbookstylesheet.Append(mFonts);
workbookstylesheet.Append(mFills);
workbookstylesheet.Append(mBorders);
workbookstylesheet.Append(mCellFormats);
// Save style for finish
WorkbookStylesPart.Stylesheet = workbookstylesheet;
WorkbookStylesPart.Stylesheet.Save();
// Return the value
return WorkbookStylesPart;
}
catch
{
// As error occurred, return the default value
return null;
}
}
This issue is closed.