Skip to content

Commit 30040f5

Browse files
authored
bugfix: multiple empty rows in query result due to empty first row (#811)
1 parent 9c70b5d commit 30040f5

File tree

3 files changed

+137
-113
lines changed

3 files changed

+137
-113
lines changed

samples/xlsx/TestIssue809.xlsx

8.4 KB
Binary file not shown.

src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs

Lines changed: 96 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ internal partial class ExcelOpenXmlSheetReader : IExcelReader
2222
private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns };
2323
private List<SheetRecord> _sheetRecords;
2424
internal IDictionary<int, string> _sharedStrings;
25-
private MergeCells _mergeCells;
2625
private ExcelOpenXmlStyles _style;
2726
internal readonly ExcelOpenXmlZip _archive;
2827
private readonly OpenXmlConfiguration _config;
@@ -140,7 +139,6 @@ public IAsyncEnumerable<IDictionary<string, object>> QueryRangeAsync(bool useHea
140139
return QueryImplAsync<T>(QueryRangeAsync(false, sheetName, startRowIndex, startColumnIndex, endRowIndex, endColumnIndex, cancellationToken), ReferenceHelper.ConvertXyToCell(startColumnIndex, startRowIndex), hasHeader, _config, cancellationToken);
141140
}
142141

143-
144142
[Zomp.SyncMethodGenerator.CreateSyncVersion]
145143
internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeAsync(bool useHeaderRow, string sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default)
146144
{
@@ -159,15 +157,12 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
159157
// TODO: need to optimize performance
160158
// Q. why need 3 times openstream merge one open read? A. no, zipstream can't use position = 0
161159

162-
var mergeCellsContext = new MergeCellsContext { };
163-
160+
var mergeCellsContext = new MergeCellsContext();
164161
if (_config.FillMergedCells && !await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false))
165162
{
166163
yield break;
167164
}
168165

169-
_mergeCells = mergeCellsContext.MergeCells;
170-
171166
var maxRowColumnIndexResult = await TryGetMaxRowColumnIndexAsync(sheetEntry, cancellationToken).ConfigureAwait(false);
172167
if (!maxRowColumnIndexResult.IsSuccess)
173168
{
@@ -223,89 +218,16 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
223218
break;
224219
}
225220

226-
// fill empty rows
227-
if (!_config.IgnoreEmptyRows)
228-
{
229-
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
230-
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
231-
{
232-
for (int i = expectedRowIndex; i < rowIndex; i++)
233-
{
234-
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
235-
}
236-
}
237-
}
238-
239-
// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
240-
if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows)
221+
await foreach (var row in QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex, withoutCR, useHeaderRow, headRows, mergeCellsContext.MergeCells, cancellationToken).ConfigureAwait(false))
241222
{
242-
//Fill in case of self closed empty row tag eg. <row r="1"/>
243-
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
244-
continue;
245-
}
246-
247-
#region Set Cells
248-
249-
var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
250-
var columnIndex = withoutCR ? -1 : 0;
251-
while (!reader.EOF)
252-
{
253-
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
223+
if (isFirstRow)
254224
{
255-
var aS = reader.GetAttribute("s");
256-
var aR = reader.GetAttribute("r");
257-
var aT = reader.GetAttribute("t");
258-
var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR,
259-
startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false);
260-
261-
var cellValue = cellAndColumn.CellValue;
262-
columnIndex = cellAndColumn.ColumnIndex;
263-
264-
if (_config.FillMergedCells)
265-
{
266-
if (_mergeCells.MergesValues.ContainsKey(aR))
267-
{
268-
_mergeCells.MergesValues[aR] = cellValue;
269-
}
270-
else if (_mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
271-
{
272-
_mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
273-
}
274-
}
275-
276-
if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
225+
isFirstRow = false; // for startcell logic
226+
if (useHeaderRow)
277227
continue;
278-
279-
if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
280-
{
281-
int xfIndex = -1;
282-
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
283-
out var styleIndex))
284-
xfIndex = styleIndex;
285-
286-
// only when have s attribute then load styles xml data
287-
if (_style == null)
288-
_style = new ExcelOpenXmlStyles(_archive);
289-
290-
cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
291-
}
292-
293-
SetCellsValueAndHeaders(cellValue, useHeaderRow, ref headRows, ref isFirstRow, ref cell, columnIndex);
294228
}
295-
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
296-
break;
229+
yield return row;
297230
}
298-
299-
#endregion
300-
301-
if (isFirstRow)
302-
{
303-
isFirstRow = false; // for startcell logic
304-
if (useHeaderRow)
305-
continue;
306-
}
307-
308-
yield return cell;
309231
}
310232
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
311233
{
@@ -321,6 +243,94 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
321243
}
322244
}
323245

246+
[Zomp.SyncMethodGenerator.CreateSyncVersion]
247+
private async IAsyncEnumerable<IDictionary<string, object>> QueryRowAsync(
248+
XmlReader reader,
249+
bool isFirstRow,
250+
int startRowIndex,
251+
int nextRowIndex,
252+
int rowIndex,
253+
int startColumnIndex,
254+
int? endColumnIndex,
255+
int maxColumnIndex,
256+
bool withoutCR,
257+
bool useHeaderRow,
258+
Dictionary<int, string> headRows,
259+
MergeCells mergeCells,
260+
[EnumeratorCancellation] CancellationToken cancellationToken = default)
261+
{
262+
// fill empty rows
263+
if (!_config.IgnoreEmptyRows)
264+
{
265+
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
266+
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
267+
{
268+
for (int i = expectedRowIndex; i < rowIndex; i++)
269+
{
270+
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
271+
}
272+
}
273+
}
274+
275+
// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
276+
if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows)
277+
{
278+
//Fill in case of self closed empty row tag eg. <row r="1"/>
279+
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
280+
yield break;
281+
}
282+
283+
var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
284+
var columnIndex = withoutCR ? -1 : 0;
285+
while (!reader.EOF)
286+
{
287+
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
288+
{
289+
var aS = reader.GetAttribute("s");
290+
var aR = reader.GetAttribute("r");
291+
var aT = reader.GetAttribute("t");
292+
var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR, startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false);
293+
294+
var cellValue = cellAndColumn.CellValue;
295+
columnIndex = cellAndColumn.ColumnIndex;
296+
297+
if (_config.FillMergedCells)
298+
{
299+
if (mergeCells.MergesValues.ContainsKey(aR))
300+
{
301+
mergeCells.MergesValues[aR] = cellValue;
302+
}
303+
else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
304+
{
305+
mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
306+
}
307+
}
308+
309+
if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
310+
continue;
311+
312+
if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
313+
{
314+
int xfIndex = -1;
315+
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
316+
out var styleIndex))
317+
xfIndex = styleIndex;
318+
319+
// only when have s attribute then load styles xml data
320+
if (_style == null)
321+
_style = new ExcelOpenXmlStyles(_archive);
322+
323+
cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
324+
}
325+
326+
SetCellsValueAndHeaders(cellValue, useHeaderRow, headRows, isFirstRow, cell, columnIndex);
327+
}
328+
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
329+
break;
330+
}
331+
yield return cell;
332+
}
333+
324334
[Zomp.SyncMethodGenerator.CreateSyncVersion]
325335
public static async IAsyncEnumerable<T> QueryImplAsync<T>(IAsyncEnumerable<IDictionary<string, object>> values, string startCell, bool hasHeader, Configuration configuration, [EnumeratorCancellation] CancellationToken cancellationToken = default) where T : class, new()
326336
{
@@ -434,7 +444,7 @@ private static IDictionary<string, object> GetCell(bool useHeaderRow, int maxCol
434444
return useHeaderRow ? CustomPropertyHelper.GetEmptyExpandoObject(headRows) : CustomPropertyHelper.GetEmptyExpandoObject(maxColumnIndex, startColumnIndex);
435445
}
436446

437-
private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, ref Dictionary<int, string> headRows, ref bool isFirstRow, ref IDictionary<string, object> cell, int columnIndex)
447+
private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, Dictionary<int, string> headRows, bool isFirstRow, IDictionary<string, object> cell, int columnIndex)
438448
{
439449
if (!useHeaderRow)
440450
{
@@ -1093,7 +1103,7 @@ internal static async Task<bool> TryGetMergeCellsAsync(ZipArchiveEntry sheetEntr
10931103
#else
10941104
true
10951105
#endif
1096-
);
1106+
);
10971107
var mergeCells = new MergeCells();
10981108
using (var sheetStream = sheetEntry.Open())
10991109
using (XmlReader reader = XmlReader.Create(sheetStream, xmlSettings))

0 commit comments

Comments
 (0)