Can't use readNumEvents with zipped LHE-files
SundeMarius opened this issue · comments
Matthew Feickert commented
I can reproduce this in the tests with
diff --git a/tests/test_lhe_reader.py b/tests/test_lhe_reader.py
index aff4674..eae3019 100644
--- a/tests/test_lhe_reader.py
+++ b/tests/test_lhe_reader.py
@@ -46,8 +46,9 @@ def test_gzip_open(tmpdir, testdata_gzip_file):
assert pylhe.readLHEInit(TEST_FILE) == pylhe.readLHEInit(testdata_gzip_file)
-def test_event_count():
+def test_event_count(testdata_gzip_file):
assert pylhe.readNumEvents(TEST_FILE) == 791
+ assert pylhe.readNumEvents(testdata_gzip_file) == 791
def test_lhe_init():
then
$ pytest -sx tests/test_lhe_reader.py -k test_event_count
=========================================================================================== test session starts ===========================================================================================
platform linux -- Python 3.8.6, pytest-6.2.1, py-1.10.0, pluggy-0.13.1
rootdir: /home/feickert/Code/GitHub/Scikit-HEP/pylhe, configfile: pytest.ini
plugins: cov-2.11.1
collected 4 items / 3 deselected / 1 selected
tests/test_lhe_reader.py F
================================================================================================ FAILURES =================================================================================================
____________________________________________________________________________________________ test_event_count _____________________________________________________________________________________________
testdata_gzip_file = PosixPath('/tmp/tmp8un5qya1')
def test_event_count(testdata_gzip_file):
assert pylhe.readNumEvents(TEST_FILE) == 791
> assert pylhe.readNumEvents(testdata_gzip_file) == 791
tests/test_lhe_reader.py:51:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
src/pylhe/__init__.py:254: in readNumEvents
return sum(
src/pylhe/__init__.py:254: in <genexpr>
return sum(
../../../../.pyenv/versions/3.8.6/lib/python3.8/xml/etree/ElementTree.py:1227: in iterator
yield from pullparser.read_events()
../../../../.pyenv/versions/3.8.6/lib/python3.8/xml/etree/ElementTree.py:1302: in read_events
raise event
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <xml.etree.ElementTree.XMLPullParser object at 0x7f72dbaeec10>
data = b'\x1f\x8b\x08\x08VSg`\x02\xfftmp8un5qya1\x00\xb4\xbdK\xb3$Iv\xdf\xb7\xe7\xa7(QKX\xdd\xf1\xf7C\x06q!3PZPK\x9aq;\x04\x9...xb1\xea\x89m\t\x8d\xa1(\xfd\xb0\xed\xb9\x89h0\x08\xde\x12\xf7\x1d\x0c\xe8\xe0\xf1[\x8c\xcf\xf5\x14\xe3\x11\xee\xda\x97'
def feed(self, data):
"""Feed encoded data to parser."""
if self._parser is None:
raise ValueError("feed() called after end of stream")
if data:
try:
> self._parser.feed(data)
E File "<string>", line None
E xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 1, column 0
../../../../.pyenv/versions/3.8.6/lib/python3.8/xml/etree/ElementTree.py:1274: ParseError
============================================================================================ warnings summary =============================================================================================
../../../../.pyenv/versions/3.8.6/envs/pylhe-dev/lib/python3.8/site-packages/_pytest/config/__init__.py:1233
/home/feickert/.pyenv/versions/3.8.6/envs/pylhe-dev/lib/python3.8/site-packages/_pytest/config/__init__.py:1233: PytestConfigWarning: Unknown config option: docstyle_convention
self._warn_or_fail_if_strict(f"Unknown config option: {key}\n")
-- Docs: https://docs.pytest.org/en/stable/warnings.html
----------- coverage: platform linux, python 3.8.6-final-0 -----------
Name Stmts Miss Branch BrPart Cover Missing
-------------------------------------------------------------------
src/pylhe/__init__.py 159 118 60 0 20% 13, 20-26, 29, 36-39, 43, 64-67, 71, 74-76, 98, 102, 111, 115, 119, 135-139, 155-190, 194-205, 213-247, 265-288
Coverage XML written to file coverage.xml
========================================================================================= short test summary info =========================================================================================
FAILED tests/test_lhe_reader.py::test_event_count - File "<string>", line None
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
=============================================================================== 1 failed, 3 deselected, 1 warning in 0.70s ================================================================================
Matthew Feickert commented
And with
git diff src/
diff --git a/src/pylhe/__init__.py b/src/pylhe/__init__.py
index bacd65d..4049113 100644
--- a/src/pylhe/__init__.py
+++ b/src/pylhe/__init__.py
@@ -247,13 +247,15 @@ def readLHEWithAttributes(filepath):
return
-def readNumEvents(file):
+def readNumEvents(filepath):
"""
Moderately efficent way to get the number of events stored in file.
"""
- return sum(
- element.tag == "event" for event, element in ET.iterparse(file, events=["end"])
- )
+ with _extract_fileobj(filepath) as fileobj:
+ return sum(
+ element.tag == "event"
+ for event, element in ET.iterparse(fileobj, events=["end"])
+ )
def visualize(event, outputname):
things pass. So should be an easy fix. Thanks for reporting @SundeMarius.
Marius commented
Yep, that fixed it :)
Thanks!
Matthew Feickert commented
@SundeMarius I'm going to leave this open until we actually fix this with a PR.
Matthew Feickert commented
@SundeMarius If you need this sooner then the next patch release you can get the TestPyPI dev release: https://test.pypi.org/project/pylhe/0.2.2.dev3/ (though we only support the releases on PyPI).