dotnet / Open-XML-SDK

Open XML SDK by Microsoft

Home Page:https://www.nuget.org/packages/DocumentFormat.OpenXml/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Exception when Dispose() method is called on SpreadsheetDocument object with large dataset

SDNA-ALP opened this issue · comments

Describe the bug
When i generate an Excel file with many rows (more than 8000 i'd say) and many columns (more than 17) i got an exception when i want to save the document with the Dispose() method of the Spreadsheetdocument object. Up to now i got two different exceptions (seen in different computers) : One exception is about the Isolated Storage and the other is about a wrong hexadecimal value.

It should be noted that all works well with small dataset. If my document is generated with 6000 rows for example there will be no exceptions.

Screenshots
IsolatedStorageException
HexadecimalException

To Reproduce
Here is a simplified view of my code :

  1. I call the zInitializeClass() method
  2. Then the CreateWorkbook() method
  3. Then the SetRangeValue2() method
  4. And then the Save method that will call the Dispose() method on the Spreadsheetdocument object
/// <summary></summary>
private void zInitializeClass()
{
    // Declare the variables

    try
    {
        // Create the Spreadsheet document
        oExcelApp = SpreadsheetDocument.Create(PathFileName, SpreadsheetDocumentType.Workbook, true);
    }
    catch
    {

    }
}

/// <summary></summary>
public void CreateWorkbook()
{
    // Add a WorkbookPart to the document if necessary
    oWorkbookPart ??= GetWorkbookPart();

    // Add a Workbook if necessary
    oWorkbookPart.Workbook ??= new Workbook();

    // Add a WorksheetPart to the WorkbookPart
    oWorksheetPart = oWorkbookPart.AddNewPart<WorksheetPart>();

    // Create new SheetData
    oSheetData = new();

    // Add a Worksheet into the the WorksheetPart
    oWorksheetPart.Worksheet ??= new Worksheet();

    // Append SheetData
    oWorksheetPart.Worksheet.Append(oSheetData);

    // Add Sheets to the Workbook
    oSheets = oExcelApp.WorkbookPart.Workbook.AppendChild(new Sheets());

    // Append a new worksheet and associate it with the workbook.
    oSheet = new()
    {
        Id = oExcelApp.WorkbookPart.GetIdOfPart(oWorksheetPart),
        SheetId = 1,
        Name = "Sheet1"
    };

    // Create the Cell
    oCell = new(ref oWorkbookPart, ref oWorksheetPart);
}

/// <summary></summary>
public WorkbookPart GetWorkbookPart()
{
    // Declare the variables
    WorkbookPart mResult;

    try
    {
        // Get the WorkwookPart
        mResult = oExcelApp?.WorkbookPart;

        // Create a new WorkbookPart if needed
        mResult ??= oExcelApp.AddWorkbookPart();

        // Return the result
        return mResult;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

/// <summary></summary>
public void SetRangeValue2(string pStartCell, string pEndCell, object[,] Data)
{
    // Declare the variables
    Cell mCell;
    Row mRow;
    StringBuilder mCellText;
    int mStartRowIndex;
    int mStartColIndex;
    int mEndRowIndex;
    int mEndColIndex;

    try
    {
        // Get the start indexes
        mStartRowIndex = clsOpenXMLHelper.GetRowIndex(pStartCell);
        mStartColIndex = clsOpenXMLHelper.GetColIndex(pStartCell);

        // Get the end indexes
        mEndRowIndex = clsOpenXMLHelper.GetRowIndex(pEndCell);
        mEndColIndex = clsOpenXMLHelper.GetColIndex(pEndCell);

        // Go through the rows
        for (int mRowIndex = mStartRowIndex; mRowIndex <= mEndRowIndex; mRowIndex++)
        {
            // Get the Row
            mRow = clsOpenXMLHelper.GetRow(oSheetData, mRowIndex);

            // Go through the cols
            for (int mColIndex = mStartColIndex; mColIndex <= mEndColIndex; mColIndex++)
            {
                // Get the cell reference
                mCell = clsOpenXMLHelper.GetCell(mRow, mRowIndex, mColIndex);

                // Set the CellValue
                mCellText = new(Data[mRowIndex - 1, mColIndex - 1].ToString());

                // Add the value
                mCell.CellValue = new CellValue(mCellText.ToString());

                // Apply the regular style
                mCell.StyleIndex = Convert.ToUInt32(clsOpenXMLHelper.eSTYLEINDEX.CALIBRI_11_REGULAR);

                // Set the DataType
                mCell.DataType = new EnumValue<CellValues>(CellValues.String);
            }
        }
    }
    catch
    {

    }
}

/// <summary></summary>
public void Save(bool pShowExcel)
{
    // Declare the variables

    try
    {
        // Check if the object is null
        if (oSheets == null || oWorkbookPart == null || oExcelApp == null) return;

        // Add the Sheet
        oSheets.Append(oSheet);

        // Ignore all format Excel Warning
        oWorksheetPart.Worksheet.Append(mIgnoredErrors);

        // Save the Workbook
        oWorkbookPart.Workbook.Save();

        // Dispose the object
        oExcelApp.Dispose();

        // Check if the Excel document has to be displayed
        if (pShowExcel)
        {
            // Display the Excel document
            Process.Start(PathFileName);
        }
    }
    catch
    {

    }
}

/// <summary></summary>
public static Row GetRow(SheetData SheetData, int pRow)
{
    // Declare the variables
    Row mRow;

    try
    {
        // Find the row or create it if it doesn't exist
        mRow = SheetData?.Elements<Row>().FirstOrDefault(r => r.RowIndex == pRow);

        // Check if the object is null
        if (mRow == null)
        {
            // Create the Row
            mRow = new Row { RowIndex = (uint)pRow };

            // Add a new child
            SheetData.AppendChild(mRow);
        }

        // Return the result
        return mRow;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

/// <summary></summary>
public static Cell GetCell(Row Row, int pRow, int pCol)
{
    // Declare the variables
    Cell mCell;
    string mCellReference;

    try
    {
        // Get the CellReference
        mCellReference = GetCellReference(pRow, pCol);

        // Find the cell in the row or create it if it doesn't exist
        mCell = Row?.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == mCellReference);

        // Check if the object is null
        if (mCell == null)
        {
            // Create new object instance
            mCell = new Cell
            {
                CellReference = mCellReference,
                DataType = CellValues.String
            };

            // Add a new cell to the row
            Row.InsertAt(mCell, GetCellIndex(Row, pCol));
        }

        // Return the result 
        return mCell;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

Steps to reproduce the behavior:
The Data object in SetRangeValue2() method has to be big, at least [8000, 17].

Observed behavior
In some case the Excel document will be readable in despite of the error and in other cases the document will be corrupted (Excel is not able to open the file). It seems that the behavior is not the same on all PCs i tested (powerful computers, virtual computers and on slow computers).

Expected behavior
The Excel file has to be readable, not corrupted and the Dispose() method of the Spreadsheetdocument object has to save the document properly without throwing errors.

Desktop (please complete the following information):

  • OS: Windows 11
  • Office version : Microsoft 365 MSO (Version 2311 Build 16.0.17029.20028) 64-bit
  • .NET Target: 4.8
  • DocumentFormat.OpenXml Version: 3.0.0

Additional context
I noticed that the Excel generation is very low compared to Microsoft.Interrop API. While we have a fast SetRange method in Interrop, in OpenXML it seems that we have to loop on the rows and then the columns in order to fill all the cells.

I have created a Test project in order to reproduce these issues and i have succeeded to solve one of the two issues i had.

The issue was because of multiple saves in the document :

 /// <summary></summary>
public void SetValueAddress(int pRow, int pCol, string pValue)
{
    // Declare the variables
    Cell mCell;

    // Get the Cell
    mCell = clsOpenXMLHelper.GetCell(oSheetData, pRow, pCol);

    // Set the value
    mCell.CellValue = new CellValue(pValue);

    // Source of error ↓
    //// Save the changes into the WorksheetPart
    //oWorksheetPart.Worksheet.Save();

    //// Save the changes into the Workbook part
    //oWorkbookPart.Workbook.Save();
   // Source of error ↑
}

I was looping on each Header column in order to create formated cells and each time i was saving the Worksheet and the Workbook. Without these savings the document has no Isolate Storage errors when the Dispose method is called.

Now the only error i still have is an error related to the data. If the text of one of my cells is filled with this value "\u001f" it will throws an hexadecimal error :
System.ArgumentException HResult=0x80070057 Message='�', hexadecimal value 0x1F, is an invalid character. Source=System.Xml StackTrace: at System.Xml.XmlUtf8RawTextWriter.InvalidXmlChar(Int32 ch, Byte* pDst, Boolean entitize) in System.Xml\XmlUtf8RawTextWriter.cs:line 1187

In order to fix this issue i'm going to try to sanitize the data before i put them in the cells.

If you want to reproduce this hexadecimal issue, here is my complete test project :

/// <summary></summary>
private void cmdRun_Click(object sender, EventArgs e)
{
    // Declare the variables
    SpreadsheetDocument mSpreadsheetDocument = null;
    WorkbookPart mWorkbookPart;
    WorksheetPart mWorksheetPart;
    SheetData mSheetData;
    Sheets mSheets;
    Sheet mSheet;
    Row mRow;
    Cell mCell;
    Columns mColumns;
    Column mColumn;
    StringBuilder mCellText;
    WorkbookStylesPart mWorkbookStylesPart;
    string[,] mData;
    string mPathFileName;

    try
    {
        // Get the path
        mPathFileName = txtBoxFilePath.Text;

        // Get the Rows and Columns
        int.TryParse(txtBoxRows.Text, out int mRows);
        int.TryParse(txtBoxColumns.Text, out int mCols);

        // Create new object instances
        mSpreadsheetDocument = SpreadsheetDocument.Create(mPathFileName, SpreadsheetDocumentType.Workbook, true);
        mData = new string[mRows, mCols];
        mColumns = new Columns();

        // Add the WorkbookPart
        mWorkbookPart = mSpreadsheetDocument.AddWorkbookPart();

        // Add a new Workbook
        mWorkbookPart.Workbook = new Workbook();

        // Add a new WorksheetPart to the WorkbookPart
        mWorksheetPart = mWorkbookPart.AddNewPart<WorksheetPart>();

        // Create a new SheetData
        mSheetData = new SheetData();

        // Add a new Worksheet into the WorksheetPart
        mWorksheetPart.Worksheet = new Worksheet();

        // Go through the columns
        for (int mCounter = 0; mCounter < mCols; mCounter++)
        {
            // Create new object instance
            mColumn = new Column() { Min = Convert.ToUInt32(mCounter + 1), Max = Convert.ToUInt32(mCounter + 1), Width = 20, CustomWidth = true };

            // Append the Column
            mColumns.Append(mColumn);
        }

        // Append the columns 
        mWorksheetPart.Worksheet.Append(mColumns);

        // Add the SheetData to the tree
        mWorksheetPart.Worksheet.Append(mSheetData);

        // Create and add Sheets to the Workbook
        mSheets = mSpreadsheetDocument.WorkbookPart.Workbook.AppendChild(new Sheets());

        // Add a Sheet to the Workbook 
        mSheet = new Sheet()
        {
            Id = mSpreadsheetDocument.WorkbookPart.GetIdOfPart(mWorksheetPart),
            SheetId = 1,
            Name = "Sheet1",
        };

        // Create the Style sheet
        mWorkbookStylesPart = mWorkbookPart.AddNewPart<WorkbookStylesPart>();
        mWorkbookPart.WorkbookStylesPart.Stylesheet = new Stylesheet();

        // Create the WorksheetStyle
        zCreateWorksheetStyle(mWorkbookStylesPart);

        // Add the headers


        // Generate the Data
        zGenerateData(mRows, mCols, mData);

        // Initialize the ProgressBar
        Pgb.Minimum = 0;
        Pgb.Maximum = mRows;
        Pgb.Step = 1;

        // Go through the rows
        for (int mRowIndex = 0; mRowIndex < mRows; mRowIndex++)
        {
            // Get the current Row or create it
            mRow = zGetRow(mSheetData, mRowIndex + 1);

            // Go through the cols
            for (int mColIndex = 0; mColIndex < mCols; mColIndex++)
            {
                // Get the current Cell or create it
                mCell = zGetCell(mRow, mRowIndex + 1, mColIndex + 1);

                // Get the cell text
                mCellText = new StringBuilder(mData[mRowIndex, mColIndex].ToString());

                // Set the cell value
                mCell.CellValue = new CellValue(mCellText.ToString());

                // Set the style
                mCell.StyleIndex = 6;

                // Set the cell type
                mCell.DataType = new EnumValue<CellValues>(CellValues.String);
            }

            // Increment the ProgressBar
            Pgb.PerformStep();
        }

        // Reset the ProgressBar
        Pgb.Value = Pgb.Minimum;

        // Add the Sheet to the tree
        mSheets.Append(mSheet);

        // Save the changes
        mSpreadsheetDocument.Dispose();

        // Display the document
        Process.Start(mPathFileName);
    }
    catch (Exception ex)
    {
        // Check if the object is null
        if (mSpreadsheetDocument != null)
        {
            // Dispose the object
            mSpreadsheetDocument.Dispose();
        }

        // Display the error
        MessageBox.Show(ex.Message);
    }
}

/// <summary></summary>
private void zGenerateData(int pRows, int pColumns, string[,] Data)
{
    // Go through the rows
    for (int mRowIndex = 0; mRowIndex < pRows; mRowIndex++)
    {
        // Go through the columns
        for (int mColIndex = 0; mColIndex < pColumns; mColIndex++)
        {
            // Fill each cell with the value "Value {row + 1} {col + 1}".
            Data[mRowIndex, mColIndex] = $"\u001fValue {mRowIndex + 1} {mColIndex + 1}";
        }
    }
}

/// <summary></summary>
private Row zGetRow(SheetData SheetData, int pRow)
{
    // Declare the variables
    Row mRow;

    try
    {
        // Find the row or create it if it doesn't exist
        mRow = SheetData?.Elements<Row>().FirstOrDefault(r => r.RowIndex == pRow);

        // Check if the object is null
        if (mRow == null)
        {
            // Create the Row
            mRow = new Row { RowIndex = (uint)pRow };

            // Add a new child
            SheetData.AppendChild(mRow);
        }

        // Return the result
        return mRow;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

/// <summary></summary>
private Cell zGetCell(Row Row, int pRow, int pCol)
{
    // Declare the variables
    Cell mCell;
    string mCellReference;

    try
    {
        // Get the CellReference
        mCellReference = zGetCellReference(pRow, pCol);

        // Find the cell in the row or create it if it doesn't exist
        mCell = Row?.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == mCellReference);

        // Check if the object is null
        if (mCell == null)
        {
            // Create new object instance
            mCell = new Cell
            {
                CellReference = mCellReference,
                DataType = CellValues.String
            };

            // Add a new cell to the row
            Row.InsertAt(mCell, zGetCellIndex(Row, pCol));
        }

        // Return the result 
        return mCell;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

/// <summary>Helper method to get the cell reference (e.g., "A1", "B2")</summary>
private string zGetCellReference(int pRow, int pCol)
{
    // Return the Column name
    return $"{zGetColumnName(pCol)}{pRow}";
}

/// <summary>Helper method to get the column name based on the column index</summary>
private string zGetColumnName(int pCol)
{
    // Declare the variables
    int mDividend;
    string mColumnName;
    int mModulo;

    // Initialize the variables
    mDividend = pCol;
    mColumnName = string.Empty;

    // While the dividend is greater than 0
    while (mDividend > 0)
    {
        // Get the dividend
        mModulo = (mDividend - 1) % 26;
        mColumnName = Convert.ToChar(65 + mModulo) + mColumnName;
        mDividend = (mDividend - mModulo) / 26;
    }

    // Return the result
    return mColumnName;
}

/// <summary>Helper method to get the cell index within the row</summary>
private int zGetCellIndex(Row row, int pCol)
{
    // Declare the variables
    int mIndex;
    int mCurrentCol;

    // Initialize the variables
    mIndex = 0;

    // Go thourh all the Cells
    foreach (Cell cell in row.Elements<Cell>())
    {
        // Get the current Col
        mCurrentCol = zGetColumnIndex(cell.CellReference.Value);

        // Check if the current col is higher then the Col
        if (mCurrentCol > pCol)
        {
            // Return the index
            return mIndex;
        }

        // Increment the index
        mIndex++;
    }

    // Return the index
    return mIndex;
}

/// <summary>Helper method to get the column index based on the cell reference</summary>
private int zGetColumnIndex(string pCellReference)
{
    // Declare the variables
    string columnName;
    int columnIndex;

    // Initialize the variables
    columnName = Regex.Replace(pCellReference, @"\d", "");
    columnIndex = 0;

    // Go through the letters
    for (int i = 0; i < columnName.Length; i++)
    {
        // Get the column index
        columnIndex = columnIndex * 26 + (columnName[i] - 'A' + 1);
    }

    // Return the column index
    return columnIndex;
}

private WorkbookStylesPart zCreateWorksheetStyle(WorkbookStylesPart WorkbookStylesPart)
{
    // Declare the variables
    Stylesheet workbookstylesheet;

    Font mFont0;
    FontName mArialFont;
    FontSize mSizeArial;

    Font mFont1;
    Bold mBold;

    Font mFont2;
    FontName mCalibriFont;
    FontSize mSizeCalibri;

    Fonts mFonts;

    Fill mFill0;
    Fills mFills;

    Border mBorder0;
    Borders mBorders;

    CellFormats mCellFormats;
    CellFormat mCellFormat0_Regular_Arial_11;
    CellFormat mCellFormat1_Bold_Arial_11;
    CellFormat mCellFormat2_Regular_Calibri_11;
    CellFormat mCellFormat3_DateFormat_Calibri_11;
    CellFormat mCellFormat4_DoubleFormat_Calibri_11;
    CellFormat mCellFormat5_IntegerFormat_Calibri_11;
    CellFormat mCellFormat6_TextFormat_Calibri_11;

    try
    {
        // Create new object instance
        workbookstylesheet = new Stylesheet();

        // Add the regular font
        mFont0 = new Font(); // Default font
        mArialFont = new FontName() { Val = "Arial" };
        mSizeArial = new FontSize() { Val = 11 };
        mFont0.Append(mArialFont);
        mFont0.Append(mSizeArial);

        // Add a Bolded font
        mFont1 = new Font();
        mBold = new Bold();
        mFont1.Append(mBold);

        // Add a Calibri font
        mFont2 = new Font();
        mCalibriFont = new FontName() { Val = "Calibri" };
        mSizeCalibri = new FontSize() { Val = 11 };
        mFont2.Append(mCalibriFont);
        mFont2.Append(mSizeCalibri);

        // Append all fonts
        mFonts = new Fonts();
        mFonts.Append(mFont0);
        mFonts.Append(mFont1);
        mFonts.Append(mFont2);

        //Append fills - a must, in my case just default
        mFill0 = new Fill();
        mFills = new Fills();
        mFills.Append(mFill0);

        // Append borders - a must, in my case just default
        mBorder0 = new Border();
        mBorders = new Borders();
        mBorders.Append(mBorder0);

        // Create new object instance
        mCellFormats = new CellFormats();

        // Create the different styles (the order is not important)
        mCellFormat0_Regular_Arial_11 = new CellFormat() { FontId = 0, FillId = 0, BorderId = 0 };
        mCellFormat1_Bold_Arial_11 = new CellFormat() { FontId = 1 };
        mCellFormat2_Regular_Calibri_11 = new CellFormat() { FontId = 2, FillId = 0, BorderId = 0 };
        mCellFormat3_DateFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 14, FormatId = 0, ApplyNumberFormat = true };
        mCellFormat4_DoubleFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 4, FormatId = 0, ApplyNumberFormat = true };
        mCellFormat5_IntegerFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 3, FormatId = 0, ApplyNumberFormat = true };
        mCellFormat6_TextFormat_Calibri_11 = new CellFormat() { BorderId = 0, FillId = 0, FontId = 2, NumberFormatId = 49, FormatId = 0, ApplyNumberFormat = true };

        // Append the CellFormats (the order is important and related to the eSTYLE_INDEX enumeration !)
        mCellFormats.Append(mCellFormat0_Regular_Arial_11);                     // ARIAL_11_REGULAR
        mCellFormats.Append(mCellFormat1_Bold_Arial_11);                        // ARIAL_11_BOLD
        mCellFormats.Append(mCellFormat2_Regular_Calibri_11);                   // CALIBRI_11_REGULAR
        mCellFormats.Append(mCellFormat3_DateFormat_Calibri_11);                // CALIBRI_11_DATE_FORMAT
        mCellFormats.Append(mCellFormat4_DoubleFormat_Calibri_11);              // CALIBRI_11_DOUBLE_FORMAT
        mCellFormats.Append(mCellFormat5_IntegerFormat_Calibri_11);             // CALIBRI_11_INTEGER_FORMAT
        mCellFormats.Append(mCellFormat6_TextFormat_Calibri_11);                // CALIBRI_11_TEXT_FORMAT

        // Append everyting to stylesheet  - Preserve the ORDER !
        workbookstylesheet.Append(mFonts);
        workbookstylesheet.Append(mFills);
        workbookstylesheet.Append(mBorders);
        workbookstylesheet.Append(mCellFormats);

        // Save style for finish
        WorkbookStylesPart.Stylesheet = workbookstylesheet;
        WorkbookStylesPart.Stylesheet.Save();

        // Return the value
        return WorkbookStylesPart;
    }
    catch
    {
        // As error occurred, return the default value
        return null;
    }
}

This issue is closed.