diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt index 406f9f0f..5fe97cab 100644 --- a/.cspell/custom-dictionary.txt +++ b/.cspell/custom-dictionary.txt @@ -126,6 +126,7 @@ ftype fwhm genindex getgid +getgrgid getmtime gpfs griddata @@ -290,6 +291,7 @@ ptargs pullrequest pval pyarrow +pydantic pyenv pygments pynxtools diff --git a/poetry.lock b/poetry.lock index 9d92d6f7..af2a8893 100644 --- a/poetry.lock +++ b/poetry.lock @@ -40,6 +40,17 @@ files = [ {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + [[package]] name = "anyio" version = "3.7.1" @@ -3489,6 +3500,127 @@ files = [ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] +[[package]] +name = "pydantic" +version = "2.9.2" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"}, + {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.23.4" +typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata"] + +[[package]] +name = "pydantic-core" +version = "2.23.4" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"}, + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"}, + {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"}, + {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"}, + {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"}, + {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"}, + {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"}, + {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"}, + {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"}, + {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"}, + {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"}, + {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"}, + {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"}, + {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"}, + {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"}, + {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"}, + {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"}, + {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"}, + {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pyerfa" version = "2.0.1.4" @@ -5089,4 +5221,4 @@ notebook = ["ipykernel", "jupyter", "jupyterlab", "jupyterlab-h5web"] [metadata] lock-version = "2.0" python-versions = ">=3.9, <3.13" -content-hash = "a98574d0e71b520a728922e9494f6948468f589f1fd023996911dcc2eddba7fc" +content-hash = "c2633c759cb833df4706cde9489249d5987be07180f9967ae1164b329f616224" diff --git a/pyproject.toml b/pyproject.toml index 6e525864..9bdf216e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ tqdm = ">=4.62.3" xarray = ">=0.20.2" joblib = ">=1.2.0" pyarrow = ">=14.0.1, <17.0" +pydantic = ">=2.8.2" jupyter = {version = ">=1.0.0", optional = true} ipykernel = {version = ">=6.9.1", optional = true} jupyterlab = {version = "^3.4.0", optional = true} diff --git a/sed/calibrator/delay.py b/sed/calibrator/delay.py index 000fe9cf..14758b41 100644 --- a/sed/calibrator/delay.py +++ b/sed/calibrator/delay.py @@ -51,10 +51,10 @@ def __init__( self._verbose = verbose set_verbosity(logger, self._verbose) - self.adc_column: str = self._config["dataframe"].get("adc_column", None) - self.delay_column: str = self._config["dataframe"]["delay_column"] - self.corrected_delay_column = self._config["dataframe"].get( - "corrected_delay_column", + self.adc_column: str = config["dataframe"]["columns"]["adc"] + self.delay_column: str = config["dataframe"]["columns"]["delay"] + self.corrected_delay_column = self._config["dataframe"]["columns"].get( + "corrected_delay", self.delay_column, ) self.calibration: dict[str, Any] = self._config["delay"].get("calibration", {}) @@ -102,9 +102,9 @@ def append_delay_axis( df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where to apply the delay calibration to. adc_column (str, optional): Source column for delay calibration. - Defaults to config["dataframe"]["adc_column"]. + Defaults to config["dataframe"]["columns"]["adc"]. delay_column (str, optional): Destination column for delay calibration. - Defaults to config["dataframe"]["delay_column"]. + Defaults to config["dataframe"]["columns"]["delay"]. calibration (dict, optional): Calibration dictionary with parameters for delay calibration. adc_range (tuple | list | np.ndarray, optional): The range of used @@ -146,7 +146,7 @@ def append_delay_axis( or datafile is not None ): calibration = {} - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now() if adc_range is not None: calibration["adc_range"] = adc_range if delay_range is not None: @@ -158,9 +158,7 @@ def append_delay_axis( else: # report usage of loaded parameters if "creation_date" in calibration and not suppress_output: - datestring = datetime.fromtimestamp(calibration["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = calibration["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using delay calibration parameters generated on {datestring}") if adc_column is None: @@ -212,7 +210,7 @@ def append_delay_axis( ) if not suppress_output: logger.info(f"Converted delay_range (ps) = {calibration['delay_range']}") - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now() if "delay_range" in calibration.keys(): df[delay_column] = calibration["delay_range"][0] + ( @@ -285,9 +283,10 @@ def add_offsets( # pylint:disable=duplicate-code # use passed parameters, overwrite config offsets = {} - offsets["creation_date"] = datetime.now().timestamp() + offsets["creation_date"] = datetime.now() # column-based offsets if columns is not None: + offsets["columns"] = {} if weights is None: weights = 1 if isinstance(weights, (int, float, np.integer, np.floating)): @@ -314,7 +313,7 @@ def add_offsets( # store in offsets dictionary for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions): - offsets[col] = { + offsets["columns"][col] = { "weight": weight, "preserve_mean": pmean, "reduction": red, @@ -330,9 +329,7 @@ def add_offsets( offsets["flip_delay_axis"] = flip_delay_axis elif "creation_date" in offsets and not suppress_output: - datestring = datetime.fromtimestamp(offsets["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = offsets["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using delay offset parameters generated on {datestring}") if len(offsets) > 0: @@ -359,21 +356,23 @@ def add_offsets( f"Invalid value for flip_delay_axis in config: {flip_delay_axis}.", ) log_str += f"\n Flip delay axis: {flip_delay_axis}" - else: - columns.append(k) - try: - weight = v["weight"] - except KeyError: - weight = 1 - weights.append(weight) - pm = v.get("preserve_mean", False) - preserve_mean.append(pm) - red = v.get("reduction", None) - reductions.append(red) - log_str += ( - f"\n Column[{k}]: Weight={weight}, Preserve Mean: {pm}, " - f"Reductions: {red}." - ) + elif k == "columns": + for column_name, column_dict in offsets["columns"].items(): + columns.append(column_name) + weight = column_dict.get("weight", 1) + if not isinstance(weight, (int, float, np.integer, np.floating)): + raise TypeError( + f"Invalid type for weight of column {column_name}: {type(weight)}", + ) + weights.append(weight) + pm = column_dict.get("preserve_mean", False) + preserve_mean.append(pm) + red = column_dict.get("reduction", None) + reductions.append(red) + log_str += ( + f"\n Column[{column_name}]: Weight={weight}, Preserve Mean: {pm}, " + f"Reductions: {red}." + ) if not suppress_output: logger.info(log_str) diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py index e3e7d1ca..65493f66 100644 --- a/sed/calibrator/energy.py +++ b/sed/calibrator/energy.py @@ -107,12 +107,12 @@ def __init__( self.peaks: np.ndarray = np.asarray([]) self.calibration: dict[str, Any] = self._config["energy"].get("calibration", {}) - self.tof_column = self._config["dataframe"]["tof_column"] - self.tof_ns_column = self._config["dataframe"].get("tof_ns_column", None) - self.corrected_tof_column = self._config["dataframe"]["corrected_tof_column"] - self.energy_column = self._config["dataframe"]["energy_column"] - self.x_column = self._config["dataframe"]["x_column"] - self.y_column = self._config["dataframe"]["y_column"] + self.tof_column = self._config["dataframe"]["columns"]["tof"] + self.tof_ns_column = self._config["dataframe"]["columns"].get("tof_ns", None) + self.corrected_tof_column = self._config["dataframe"]["columns"]["corrected_tof"] + self.energy_column = self._config["dataframe"]["columns"]["energy"] + self.x_column = self._config["dataframe"]["columns"]["x"] + self.y_column = self._config["dataframe"]["columns"]["y"] self.binwidth: float = self._config["dataframe"]["tof_binwidth"] self.binning: int = self._config["dataframe"]["tof_binning"] self.x_width = self._config["energy"]["x_width"] @@ -121,7 +121,7 @@ def __init__( self.tof_fermi = self._config["energy"]["tof_fermi"] / self.binning self.color_clip = self._config["energy"]["color_clip"] self.sector_delays = self._config["dataframe"].get("sector_delays", None) - self.sector_id_column = self._config["dataframe"].get("sector_id_column", None) + self.sector_id_column = self._config["dataframe"]["columns"].get("sector_id", None) self.offsets: dict[str, Any] = self._config["energy"].get("offsets", {}) self.correction: dict[str, Any] = self._config["energy"].get("correction", {}) @@ -217,7 +217,7 @@ def bin_data( Args: data_files (list[str]): list of file names to bin axes (list[str], optional): bin axes. Defaults to - config["dataframe"]["tof_column"]. + config["dataframe"]["columns"]["tof"]. bins (list[int], optional): number of bins. Defaults to config["energy"]["bins"]. ranges (Sequence[tuple[float, float]], optional): bin ranges. @@ -612,7 +612,7 @@ def calibrate( else: raise NotImplementedError() - self.calibration["creation_date"] = datetime.now().timestamp() + self.calibration["creation_date"] = datetime.now() return self.calibration def view( @@ -802,9 +802,9 @@ def append_energy_axis( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the energy axis calibration to. tof_column (str, optional): Label of the source column. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. energy_column (str, optional): Label of the destination column. - Defaults to config["dataframe"]["energy_column"]. + Defaults to config["dataframe"]["columns"]["energy"]. calibration (dict, optional): Calibration dictionary. If provided, overrides calibration from class or config. Defaults to self.calibration or config["energy"]["calibration"]. @@ -843,12 +843,10 @@ def append_energy_axis( if len(kwds) > 0: for key, value in kwds.items(): calibration[key] = value - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now() elif "creation_date" in calibration and not suppress_output: - datestring = datetime.fromtimestamp(calibration["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = calibration["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using energy calibration parameters generated on {datestring}") # try to determine calibration type if not provided @@ -915,17 +913,17 @@ def append_energy_axis( df[energy_column] = df[energy_column] + scale_sign * bias_voltage if not suppress_output: logger.debug(f"Shifted energy column by constant bias value: {bias_voltage}.") - elif self._config["dataframe"]["bias_column"] in df.columns: + elif self._config["dataframe"]["columns"]["bias"] in df.columns: df = dfops.offset_by_other_columns( df=df, target_column=energy_column, - offset_columns=self._config["dataframe"]["bias_column"], + offset_columns=self._config["dataframe"]["columns"]["bias"], weights=scale_sign, ) if not suppress_output: logger.debug( "Shifted energy column by bias column: " - f"{self._config['dataframe']['bias_column']}.", + f"{self._config['dataframe']['columns']['bias']}.", ) else: logger.warning( @@ -948,9 +946,9 @@ def append_tof_ns_axis( Args: df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to convert. tof_column (str, optional): Name of the column containing the - time-of-flight steps. Defaults to config["dataframe"]["tof_column"]. + time-of-flight steps. Defaults to config["dataframe"]["columns"]["tof"]. tof_ns_column (str, optional): Name of the column to store the - time-of-flight in nanoseconds. Defaults to config["dataframe"]["tof_ns_column"]. + time-of-flight in nanoseconds. Defaults to config["dataframe"]["columns"]["tof_ns"]. binwidth (float, optional): Time-of-flight binwidth in ns. Defaults to config["energy"]["tof_binwidth"]. binning (int, optional): Time-of-flight binning factor. @@ -1202,7 +1200,7 @@ def common_apply_func(apply: bool): # noqa: ARG001 self.correction["amplitude"] = correction["amplitude"] self.correction["center"] = correction["center"] self.correction["correction_type"] = correction["correction_type"] - self.correction["creation_date"] = datetime.now().timestamp() + self.correction["creation_date"] = datetime.now() amplitude_slider.close() x_center_slider.close() y_center_slider.close() @@ -1381,9 +1379,9 @@ def apply_energy_correction( df (pd.DataFrame | dask.dataframe.DataFrame): The dataframe where to apply the energy correction to. tof_column (str, optional): Name of the source column to convert. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. new_tof_column (str, optional): Name of the destination column to convert. - Defaults to config["dataframe"]["corrected_tof_column"]. + Defaults to config["dataframe"]["columns"]["corrected_tof"]. correction_type (str, optional): Type of correction to apply to the TOF axis. Valid values are: @@ -1440,12 +1438,10 @@ def apply_energy_correction( for key, value in kwds.items(): correction[key] = value - correction["creation_date"] = datetime.now().timestamp() + correction["creation_date"] = datetime.now() elif "creation_date" in correction and not suppress_output: - datestring = datetime.fromtimestamp(correction["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = correction["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using energy correction parameters generated on {datestring}") missing_keys = {"correction_type", "center", "amplitude"} - set(correction.keys()) @@ -1494,9 +1490,9 @@ def align_dld_sectors( Args: df (dask.dataframe.DataFrame): Dataframe to use. tof_column (str, optional): Name of the column containing the time-of-flight values. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. sector_id_column (str, optional): Name of the column containing the sector id values. - Defaults to config["dataframe"]["sector_id_column"]. + Defaults to config["dataframe"]["columns"]["sector_id"]. sector_delays (np.ndarray, optional): Array containing the sector delays. Defaults to config["dataframe"]["sector_delays"]. @@ -1592,9 +1588,10 @@ def add_offsets( # pylint:disable=duplicate-code # use passed parameters, overwrite config offsets = {} - offsets["creation_date"] = datetime.now().timestamp() + offsets["creation_date"] = datetime.now() # column-based offsets if columns is not None: + offsets["columns"] = {} if isinstance(columns, str): columns = [columns] @@ -1623,7 +1620,7 @@ def add_offsets( # store in offsets dictionary for col, weight, pmean, red in zip(columns, weights, preserve_mean, reductions): - offsets[col] = { + offsets["columns"][col] = { "weight": weight, "preserve_mean": pmean, "reduction": red, @@ -1636,9 +1633,7 @@ def add_offsets( raise TypeError(f"Invalid type for constant: {type(constant)}") elif "creation_date" in offsets and not suppress_output: - datestring = datetime.fromtimestamp(offsets["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = offsets["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using energy offset parameters generated on {datestring}") if len(offsets) > 0: @@ -1652,35 +1647,31 @@ def add_offsets( for k, v in offsets.items(): if k == "creation_date": continue - if k == "constant": + elif k == "constant": # flip sign if binding energy scale constant = v * scale_sign log_str += f"\n Constant: {constant}" - else: - columns.append(k) - try: - weight = v["weight"] - except KeyError: - weight = 1 - if not isinstance(weight, (int, float, np.integer, np.floating)): - raise TypeError(f"Invalid type for weight of column {k}: {type(weight)}") - # flip sign if binding energy scale - weight = weight * scale_sign - weights.append(weight) - pm = v.get("preserve_mean", False) - if str(pm).lower() in ["false", "0", "no"]: - pm = False - elif str(pm).lower() in ["true", "1", "yes"]: - pm = True - preserve_mean.append(pm) - red = v.get("reduction", None) - if str(red).lower() in ["none", "null"]: - red = None - reductions.append(red) - log_str += ( - f"\n Column[{k}]: Weight={weight}, Preserve Mean: {pm}, " - f"Reductions: {red}." - ) + elif k == "columns": + for column_name, column_dict in offsets["columns"].items(): + columns.append(column_name) + weight = column_dict.get("weight", 1) + if not isinstance(weight, (int, float, np.integer, np.floating)): + raise TypeError( + f"Invalid type for weight of column {column_name}: {type(weight)}", + ) + # flip sign if binding energy scale + weight = weight * scale_sign + weights.append(weight) + pm = column_dict.get("preserve_mean", False) + preserve_mean.append(pm) + red = column_dict.get("reduction", None) + if str(red).lower() in ["none", "null"]: + red = None + reductions.append(red) + log_str += ( + f"\n Column[{column_name}]: Weight={weight}, Preserve Mean: {pm}, " + f"Reductions: {red}." + ) if not suppress_output: logger.info(log_str) diff --git a/sed/calibrator/momentum.py b/sed/calibrator/momentum.py index fed52473..e2a40e38 100644 --- a/sed/calibrator/momentum.py +++ b/sed/calibrator/momentum.py @@ -123,12 +123,12 @@ def __init__( self.adjust_params: dict[str, Any] = {} self.calibration: dict[str, Any] = self._config["momentum"].get("calibration", {}) - self.x_column = self._config["dataframe"]["x_column"] - self.y_column = self._config["dataframe"]["y_column"] - self.corrected_x_column = self._config["dataframe"]["corrected_x_column"] - self.corrected_y_column = self._config["dataframe"]["corrected_y_column"] - self.kx_column = self._config["dataframe"]["kx_column"] - self.ky_column = self._config["dataframe"]["ky_column"] + self.x_column = self._config["dataframe"]["columns"]["x"] + self.y_column = self._config["dataframe"]["columns"]["y"] + self.corrected_x_column = self._config["dataframe"]["columns"]["corrected_x"] + self.corrected_y_column = self._config["dataframe"]["columns"]["corrected_y"] + self.kx_column = self._config["dataframe"]["columns"]["kx"] + self.ky_column = self._config["dataframe"]["columns"]["ky"] self._state: int = 0 @@ -678,7 +678,7 @@ def spline_warp_estimate( if self.pouter_ord is None: if self.pouter is not None: self.pouter_ord = po.pointset_order(self.pouter) - self.correction["creation_date"] = datetime.now().timestamp() + self.correction["creation_date"] = datetime.now() else: try: features = np.asarray( @@ -693,11 +693,7 @@ def spline_warp_estimate( ascale = np.asarray(ascale) if "creation_date" in self.correction: - datestring = datetime.fromtimestamp( - self.correction["creation_date"], - ).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = self.correction["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info( "No landmarks defined, using momentum correction parameters " f"generated on {datestring}", @@ -715,7 +711,7 @@ def spline_warp_estimate( self.add_features(features=features, rotsym=rotsym) else: - self.correction["creation_date"] = datetime.now().timestamp() + self.correction["creation_date"] = datetime.now() if ascale is not None: if isinstance(ascale, (int, float, np.floating, np.integer)): @@ -1135,9 +1131,7 @@ def pose_adjustment( ) elif "creation_date" in transformations: - datestring = datetime.fromtimestamp(transformations["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = transformations["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using transformation parameters generated on {datestring}") def update(scale: float, xtrans: float, ytrans: float, angle: float): @@ -1257,7 +1251,7 @@ def apply_func(apply: bool): # noqa: ARG001 fig.canvas.draw_idle() if transformations != self.transformations: - transformations["creation_date"] = datetime.now().timestamp() + transformations["creation_date"] = datetime.now() self.transformations = transformations if self._verbose: @@ -1714,7 +1708,7 @@ def calibrate( # Assemble into return dictionary self.calibration = {} - self.calibration["creation_date"] = datetime.now().timestamp() + self.calibration["creation_date"] = datetime.now() self.calibration["kx_axis"] = k_row self.calibration["ky_axis"] = k_col self.calibration["grid"] = (k_rowgrid, k_colgrid) @@ -1749,15 +1743,15 @@ def apply_corrections( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the distortion correction to. x_column (str, optional): Label of the 'X' column before momentum - distortion correction. Defaults to config["momentum"]["x_column"]. + distortion correction. Defaults to config["dataframe"]["columns"]["x"]. y_column (str, optional): Label of the 'Y' column before momentum - distortion correction. Defaults to config["momentum"]["y_column"]. + distortion correction. Defaults to config["dataframe"]["columns"]["y"]. new_x_column (str, optional): Label of the 'X' column after momentum distortion correction. - Defaults to config["momentum"]["corrected_x_column"]. + Defaults to config["dataframe"]["columns"]["corrected_x"]. new_y_column (str, optional): Label of the 'Y' column after momentum distortion correction. - Defaults to config["momentum"]["corrected_y_column"]. + Defaults to config["dataframe"]["columns"]["corrected_y"]. Returns: tuple[pd.DataFrame | dask.dataframe.DataFrame, dict]: Dataframe with @@ -1825,7 +1819,7 @@ def gather_correction_metadata(self) -> dict: pass if len(self.adjust_params) > 0: metadata["registration"] = self.adjust_params - metadata["registration"]["creation_date"] = datetime.now().timestamp() + metadata["registration"]["creation_date"] = datetime.now() metadata["registration"]["applied"] = True metadata["registration"]["depends_on"] = ( "/entry/process/registration/transformations/rot_z" @@ -1898,15 +1892,15 @@ def append_k_axis( df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to apply the distortion correction to. x_column (str, optional): Label of the source 'X' column. - Defaults to config["momentum"]["corrected_x_column"] or - config["momentum"]["x_column"] (whichever is present). + Defaults to config["dataframe"]["columns"]["corrected_x"] or + config["dataframe"]["columns"]["x"] (whichever is present). y_column (str, optional): Label of the source 'Y' column. - Defaults to config["momentum"]["corrected_y_column"] or - config["momentum"]["y_column"] (whichever is present). + Defaults to config["dataframe"]["columns"]["corrected_y"] or + config["dataframe"]["columns"]["y"] (whichever is present). new_x_column (str, optional): Label of the destination 'X' column after - momentum calibration. Defaults to config["momentum"]["kx_column"]. + momentum calibration. Defaults to config["dataframe"]["columns"]["kx"]. new_y_column (str, optional): Label of the destination 'Y' column after - momentum calibration. Defaults to config["momentum"]["ky_column"]. + momentum calibration. Defaults to config["dataframe"]["columns"]["ky"]. calibration (dict, optional): Dictionary containing calibration parameters. Defaults to 'self.calibration' or config["momentum"]["calibration"]. suppress_output (bool, optional): Option to suppress output of diagnostic information. @@ -1952,15 +1946,13 @@ def append_k_axis( ]: if key in kwds: calibration[key] = kwds.pop(key) - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now() if len(kwds) > 0: raise TypeError(f"append_k_axis() got unexpected keyword arguments {kwds.keys()}.") if "creation_date" in calibration and not suppress_output: - datestring = datetime.fromtimestamp(calibration["creation_date"]).strftime( - "%m/%d/%Y, %H:%M:%S", - ) + datestring = calibration["creation_date"].strftime("%m/%d/%Y, %H:%M:%S") logger.info(f"Using momentum calibration parameters generated on {datestring}") try: diff --git a/sed/config/config_model.py b/sed/config/config_model.py new file mode 100644 index 00000000..9bdcdec1 --- /dev/null +++ b/sed/config/config_model.py @@ -0,0 +1,354 @@ +"""Pydantic model to validate the config for SED package.""" +import grp +from collections.abc import Sequence +from datetime import datetime +from typing import Literal +from typing import Optional +from typing import Union + +from pydantic import BaseModel +from pydantic import ConfigDict +from pydantic import DirectoryPath +from pydantic import field_validator +from pydantic import FilePath +from pydantic import HttpUrl +from pydantic import NewPath +from pydantic import PositiveInt +from pydantic import SecretStr + +from sed.loader.loader_interface import get_names_of_all_loaders + +## Best to not use futures annotations with pydantic models +## https://github.com/astral-sh/ruff/issues/5434 + + +class PathsModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + raw: DirectoryPath + processed: Optional[Union[DirectoryPath, NewPath]] = None + + +class CopyToolModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + source: DirectoryPath + dest: DirectoryPath + safety_margin: Optional[float] = None + gid: Optional[int] = None + scheduler: Optional[str] = None + + @field_validator("gid") + @classmethod + def validate_gid(cls, v: int) -> int: + """Checks if the gid is valid on the system""" + try: + grp.getgrgid(v) + except KeyError: + raise ValueError(f"Invalid value {v} for gid. Group not found.") + return v + + +class CoreModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + loader: str + verbose: Optional[bool] = None + paths: Optional[PathsModel] = None + num_cores: Optional[PositiveInt] = None + year: Optional[int] = None + beamtime_id: Optional[Union[int, str]] = None + instrument: Optional[str] = None + beamline: Optional[str] = None + copy_tool: Optional[CopyToolModel] = None + stream_name_prefixes: Optional[dict] = None + stream_name_postfixes: Optional[dict] = None + beamtime_dir: Optional[dict] = None + + @field_validator("loader") + @classmethod + def validate_loader(cls, v: str) -> str: + """Checks if the loader is one of the valid ones""" + names = get_names_of_all_loaders() + if v not in names: + raise ValueError(f"Invalid loader {v}. Available loaders are: {names}") + return v + + +class ColumnsModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + x: str + y: str + tof: str + tof_ns: str + kx: str + ky: str + energy: str + delay: str + adc: str + bias: str + timestamp: str + corrected_x: str + corrected_y: str + corrected_tof: str + corrected_delay: Optional[str] = None + sector_id: Optional[str] = None + auxiliary: Optional[str] = None + + +class ChannelModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + format: Literal["per_train", "per_electron", "per_pulse", "per_file"] + dataset_key: str + index_key: Optional[str] = None + slice: Optional[int] = None + dtype: Optional[str] = None + max_hits: Optional[int] = None + scale: Optional[float] = None + + class subChannel(BaseModel): + model_config = ConfigDict(extra="forbid") + + slice: int + dtype: Optional[str] = None + + sub_channels: Optional[dict[str, subChannel]] = None + + +class DataframeModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + columns: ColumnsModel + units: Optional[dict[str, str]] = None + channels: Optional[dict[str, ChannelModel]] = None + # other settings + tof_binwidth: float + tof_binning: int + adc_binning: int + jitter_cols: Sequence[str] + jitter_amps: Union[float, Sequence[float]] + timed_dataframe_unit_time: float + # mpes specific settings + first_event_time_stamp_key: Optional[str] = None + ms_markers_key: Optional[str] = None + # flash specific settings + forward_fill_iterations: Optional[int] = None + ubid_offset: Optional[int] = None + split_sector_id_from_dld_time: Optional[bool] = None + sector_id_reserved_bits: Optional[int] = None + sector_delays: Optional[Sequence[float]] = None + daq: Optional[str] = None + # SXP specific settings + num_trains: Optional[PositiveInt] = None + + +class BinningModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + hist_mode: Literal["numpy", "numba"] + mode: Literal["fast", "lean", "legacy"] + pbar: bool + threads_per_worker: PositiveInt + threadpool_API: Literal["blas", "openmp"] + + +class HistogramModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + bins: Sequence[PositiveInt] + axes: Sequence[str] + ranges: Sequence[tuple[float, float]] + + +class EnergyModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + bins: PositiveInt + ranges: tuple[int, int] + normalize: bool + normalize_span: int + normalize_order: int + fastdtw_radius: int + peak_window: int + calibration_method: Literal["lmfit", "lstsq", "lsq"] + energy_scale: Literal["binding", "kinetic"] + tof_fermi: int + tof_width: tuple[int, int] + x_width: tuple[int, int] + y_width: tuple[int, int] + color_clip: int + bias_key: Optional[str] = None + + class EnergyCalibrationModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + d: Optional[float] = None + t0: Optional[float] = None + E0: Optional[float] = None + coeffs: Optional[Sequence[float]] = None + offset: Optional[float] = None + energy_scale: Literal["binding", "kinetic"] + + calibration: Optional[EnergyCalibrationModel] = None + + class EnergyOffsets(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + constant: Optional[float] = None + + class OffsetColumn(BaseModel): + weight: float + preserve_mean: bool + reduction: Optional[str] = None + + columns: Optional[dict[str, OffsetColumn]] = None + + offsets: Optional[EnergyOffsets] = None + + class EnergyCorrectionModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + correction_type: Literal["Gaussian", "Lorentzian", "spherical", "Lorentzian_asymmetric"] + amplitude: float + center: tuple[float, float] + gamma: Optional[float] = None + sigma: Optional[float] = None + diameter: Optional[float] = None + sigma2: Optional[float] = None + amplitude2: Optional[float] = None + + correction: Optional[EnergyCorrectionModel] = None + + +class MomentumModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + axes: Sequence[str] + bins: Sequence[PositiveInt] + ranges: Sequence[tuple[int, int]] + detector_ranges: Sequence[tuple[int, int]] + center_pixel: tuple[int, int] + sigma: int + fwhm: int + sigma_radius: int + + class MomentumCalibrationModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + kx_scale: float + ky_scale: float + x_center: float + y_center: float + rstart: float + cstart: float + rstep: float + cstep: float + + calibration: Optional[MomentumCalibrationModel] = None + + class MomentumCorrectionModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + feature_points: Sequence[tuple[float, float]] + rotation_symmetry: PositiveInt + include_center: bool + use_center: bool + ascale: Optional[Sequence[float]] = None + center_point: Optional[tuple[float, float]] = None + outer_points: Optional[Sequence[tuple[float, float]]] = None + + correction: Optional[MomentumCorrectionModel] = None + + class MomentumTransformationsModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + scale: Optional[float] = None + angle: Optional[float] = None + xtrans: Optional[float] = None + ytrans: Optional[float] = None + + transformations: Optional[MomentumTransformationsModel] = None + + +class DelayModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + adc_range: tuple[int, int] + flip_time_axis: bool + # Group keys in the datafile + p1_key: Optional[str] = None + p2_key: Optional[str] = None + t0_key: Optional[str] = None + + class DelayCalibration(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + adc_range: Optional[tuple[int, int]] = None + delay_range: Optional[tuple[float, float]] = None + time0: Optional[float] = None + delay_range_mm: Optional[tuple[float, float]] = None + datafile: Optional[FilePath] # .h5 extension in filepath + + calibration: Optional[DelayCalibration] = None + + class DelayOffsets(BaseModel): + model_config = ConfigDict(extra="forbid") + + creation_date: Optional[datetime] = None + constant: Optional[float] = None + flip_delay_axis: Optional[bool] = False + + class OffsetColumn(BaseModel): + weight: float + preserve_mean: bool + reduction: Optional[str] = None + + columns: Optional[dict[str, OffsetColumn]] = None + + offsets: Optional[DelayOffsets] = None + + +class MetadataModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + archiver_url: Optional[HttpUrl] = None + token: Optional[SecretStr] = None + epics_pvs: Optional[Sequence[str]] = None + fa_in_channel: Optional[str] = None + fa_hor_channel: Optional[str] = None + ca_in_channel: Optional[str] = None + aperture_config: Optional[dict[datetime, dict]] = None + lens_mode_config: Optional[dict[str, dict]] = None + + +class NexusModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + # Currently only mpes reader is supported + reader: Literal["mpes"] + # Currently only NXmpes definition is supported + definition: Literal["NXmpes"] + input_files: Sequence[FilePath] + + +class ConfigModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + core: CoreModel + dataframe: DataframeModel + energy: EnergyModel + momentum: MomentumModel + delay: DelayModel + binning: BinningModel + histogram: HistogramModel + metadata: Optional[MetadataModel] = None + nexus: Optional[NexusModel] = None diff --git a/sed/config/default.yaml b/sed/config/default.yaml index b047d8a8..28b9be3b 100644 --- a/sed/config/default.yaml +++ b/sed/config/default.yaml @@ -3,32 +3,23 @@ core: loader: generic dataframe: - # dataframe column containing x coordinates - x_column: "X" - # dataframe column containing y coordinates - y_column: "Y" - # dataframe column containing time-of-flight data - tof_column: "t" - # dataframe column containing time-of-flight data in nanoseconds - tof_ns_column: "t_ns" - # dataframe column containing analog-to-digital data - adc_column: "ADC" - # dataframe column containing bias voltage data - bias_column: "sampleBias" - # dataframe column containing corrected x coordinates - corrected_x_column: "Xm" - # dataframe column containing corrected y coordinates - corrected_y_column: "Ym" - # dataframe column containing corrected time-of-flight data - corrected_tof_column: "tm" - # dataframe column containing kx coordinates - kx_column: "kx" - # dataframe column containing ky coordinates - ky_column: "ky" - # dataframe column containing energy data - energy_column: "energy" - # dataframe column containing delay data - delay_column: "delay" + # Column settings + columns: + x: X # dataframe column containing x coordinates + y: Y # dataframe column containing y coordinates + tof: t # dataframe column containing time-of-flight data + tof_ns: t_ns # dataframe column containing time-of-flight data in nanoseconds + corrected_x: Xm # dataframe column containing corrected x coordinates + corrected_y: Ym # dataframe column containing corrected y coordinates + corrected_tof: tm # dataframe column containing corrected time-of-flight data + kx: kx # dataframe column containing kx coordinates + ky: ky # dataframe column containing ky coordinates + energy: energy # dataframe column containing energy data + delay: delay # dataframe column containing delay data + adc: ADC # dataframe column containing analog-to-digital data + bias: sampleBias # dataframe column containing bias voltage data + timestamp: timeStamp # dataframe column containing timestamp data + # time length of a base time-of-flight bin in s tof_binwidth: 4.125e-12 # Binning factor of the tof_column-data compared to tof_binwidth @@ -36,7 +27,7 @@ dataframe: # binning factor used for the adc coordinate adc_binning: 1 # list of columns to apply jitter to. - jitter_cols: ["@x_column", "@y_column", "@tof_column"] + jitter_cols: ["@x", "@y", "@tof"] # Jitter amplitude or list of jitter amplitudes. Should equal half the digital step size of each jitter_column jitter_amps: 0.5 # Time stepping in seconds of the successive events in the timed dataframe @@ -77,7 +68,7 @@ energy: momentum: # binning axes to use for momentum correction/calibration. # Axes names starting with "@" refer to keys in the "dataframe" section - axes: ["@x_column", "@y_column", "@tof_column"] + axes: ["@x", "@y", "@tof"] # Bin numbers used for the respective axes bins: [512, 512, 300] # bin ranges to use (in unbinned detector coordinates) @@ -97,8 +88,6 @@ delay: # value ranges of the analog-to-digital converter axes used for encoding the delay stage position # (in unbinned coordinates) adc_range: [1900, 25600] - # pump probe time overlap in ps - time0: 0 # if to flip the time axis flip_time_axis: False @@ -119,6 +108,6 @@ histogram: bins: [80, 80, 80] # default axes to use for histogram visualization. # Axes names starting with "@" refer to keys in the "dataframe" section - axes: ["@x_column", "@y_column", "@tof_column"] + axes: ["@x", "@y", "@tof"] # default ranges to use for histogram visualization (in unbinned detector coordinates) ranges: [[0, 1800], [0, 1800], [0, 150000]] diff --git a/sed/config/flash_example_config.yaml b/sed/config/flash_example_config.yaml index ccc7dcac..823a53fe 100644 --- a/sed/config/flash_example_config.yaml +++ b/sed/config/flash_example_config.yaml @@ -1,5 +1,4 @@ # This file contains the default configuration for the flash loader. - core: # defines the loader loader: flash @@ -13,14 +12,26 @@ core: year: 2023 # the instrument used instrument: hextof # hextof, wespe, etc - # The paths to the raw and parquet data directories. If these are not # provided, the loader will try to find the data based on year beamtimeID etc - paths: - # location of the raw data. - raw: "" - # location of the intermediate parquet files. - processed: "" + # paths: + # # location of the raw data. + # raw: "" + # # location of the intermediate parquet files. + # processed: "" + # The prefixes of the stream names for different DAQ systems for parsing filenames + stream_name_prefixes: + pbd: "GMD_DATA_gmd_data" + pbd2: "FL2PhotDiag_pbd2_gmd_data" + fl1user1: "FLASH1_USER1_stream_2" + fl1user2: "FLASH1_USER2_stream_2" + fl1user3: "FLASH1_USER3_stream_2" + fl2user1: "FLASH2_USER1_stream_2" + fl2user2: "FLASH2_USER2_stream_2" + # The beamtime directories for different DAQ systems. + # (Not to be changed by user) + beamtime_dir: + pg2: "/asap3/flash/gpfs/pg2/" binning: # Histogram computation mode to use. @@ -35,57 +46,39 @@ binning: threadpool_API: "blas" dataframe: - # The name of the DAQ system to use. Necessary to resolve the filenames/paths. - daq: fl1user3 - # The offset correction to the pulseId - ubid_offset: 5 - - # the number of iterations to fill the pulseId forward. - forward_fill_iterations: 2 - # if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column - split_sector_id_from_dld_time: True - # bits reserved for dldSectorID in the dldTimeSteps column - sector_id_reserved_bits: 3 - # dataframe column containing x coordinates - x_column: dldPosX - # dataframe column containing corrected x coordinates - corrected_x_column: "X" - # dataframe column containing kx coordinates - kx_column: "kx" - # dataframe column containing y coordinates - y_column: dldPosY - # dataframe column containing corrected y coordinates - corrected_y_column: "Y" - # dataframe column containing kx coordinates - ky_column: "ky" - # dataframe column containing time-of-flight data - tof_column: dldTimeSteps - # dataframe column containing time-of-flight data in ns - tof_ns_column: dldTime - # dataframe column containing corrected time-of-flight data - corrected_tof_column: "tm" - # the time stamp column - time_stamp_alias: timeStamp - # auxiliary channel alias - aux_alias: dldAux - # aux subchannels alias - aux_subchannels_alias: dldAuxChannels - # time length of a base time-of-flight bin in seconds - tof_binwidth: 2.0576131995767355E-11 - # binning parameter for time-of-flight data. - tof_binning: 8 - # dataframe column containing sector ID. obtained from dldTimeSteps column - sector_id_column: dldSectorID - sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] - # the delay stage column - delay_column: delayStage - # the corrected pump-probe time axis - corrected_delay_column: pumpProbeTime - # the columns to be used for jitter correction - jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] - + daq: fl1user3 # DAQ system name to resolve filenames/paths + ubid_offset: 5 # Offset correction to the pulseId + forward_fill_iterations: 2 # Number of iterations to fill the pulseId forward + split_sector_id_from_dld_time: True # Remove reserved bits for dldSectorID from dldTimeSteps column + sector_id_reserved_bits: 3 # Bits reserved for dldSectorID in the dldTimeSteps column + sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] # Sector delays + + # Time and binning settings + tof_binwidth: 2.0576131995767355E-11 # Base time-of-flight bin width in seconds + tof_binning: 8 # Binning parameter for time-of-flight data + + # Columns used for jitter correction + jitter_cols: [dldPosX, dldPosY, dldTimeSteps] + + # Column settings + columns: + x: dldPosX + corrected_x: X + kx: kx + y: dldPosY + corrected_y: Y + ky: ky + tof: dldTimeSteps + tof_ns: dldTime + corrected_tof: tm + timestamp: timeStamp + auxiliary: dldAux + sector_id: dldSectorID + delay: delayStage + corrected_delay: pumpProbeTime + + # These are the units of the columns units: - # These are the units of the columns dldPosX: 'step' dldPosY: 'step' dldTimeSteps: 'step' @@ -102,7 +95,7 @@ dataframe: kx: '1/A' ky: '1/A' - # The channels to load. + # The channels to load from the raw data. The channels have the following structure: # channels have the following structure: # : # format: per_pulse/per_electron/per_train @@ -160,11 +153,11 @@ dataframe: # The auxiliary channel has a special structure where the group further contains # a multidimensional structure so further aliases are defined below dldAux: - format: per_pulse + format: per_train index_key: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/index" dataset_key: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/value" slice: 4 - subChannels: + sub_channels: sampleBias: slice: 0 dtype: float32 @@ -215,22 +208,6 @@ dataframe: index_key: "/zraw/FLASH.SYNC/LASER.LOCK.EXP/F1.PG.OSC/FMC0.MD22.1.ENCODER_POSITION.RD/dGroup/index" dataset_key: "/zraw/FLASH.SYNC/LASER.LOCK.EXP/F1.PG.OSC/FMC0.MD22.1.ENCODER_POSITION.RD/dGroup/value" - # The prefixes of the stream names for different DAQ systems for parsing filenames - # (Not to be changed by user) - stream_name_prefixes: - pbd: "GMD_DATA_gmd_data" - pbd2: "FL2PhotDiag_pbd2_gmd_data" - fl1user1: "FLASH1_USER1_stream_2" - fl1user2: "FLASH1_USER2_stream_2" - fl1user3: "FLASH1_USER3_stream_2" - fl2user1: "FLASH2_USER1_stream_2" - fl2user2: "FLASH2_USER2_stream_2" - - # The beamtime directories for different DAQ systems. - # (Not to be changed by user) - beamtime_dir: - pg2: "/asap3/flash/gpfs/pg2/" - # metadata collection from scicat # metadata: # scicat_url: @@ -238,6 +215,6 @@ dataframe: # The nexus collection routine shall be finalized soon for both instruments # nexus: -# reader: "flash" +# reader: "mpes" # definition: "NXmpes" -# input_files: ["NXmpes_config_HEXTOF_light.json"] +# input_files: ["NXmpes_config_HEXTOF_light.json"] \ No newline at end of file diff --git a/sed/config/mpes_example_config.yaml b/sed/config/mpes_example_config.yaml index 4848945f..c45ca865 100644 --- a/sed/config/mpes_example_config.yaml +++ b/sed/config/mpes_example_config.yaml @@ -4,19 +4,15 @@ core: # Number of parallel threads to use for parallelized jobs (e.g. binning, data conversion, copy, ...) num_cores: 20 # Option to use the copy tool to mirror data to a local storage location before processing. - use_copy_tool: False - # path to the root of the source data directory - copy_tool_source: "/path/to/data/" - # path to the root or the local data storage - copy_tool_dest: "/path/to/localDataStore/" - # optional keywords for the copy tool: - copy_tool_kwds: - # group id to set for copied files and folders - gid: 1001 + # copy_tool: + # # path to the root of the source data directory + # source: "/path/to/data/" + # # path to the root or the local data storage + # dest: "/path/to/localDataStore/" + # # group id to set for copied files and folders + # gid: 1000 dataframe: - # dataframe column name for the time stamp column - time_stamp_alias: "timeStamps" # hdf5 group name containing eventIDs occurring at every millisecond (used to calculate timestamps) ms_markers_key: "msMarkers" # hdf5 attribute containing the timestamp of the first event in a file @@ -25,30 +21,20 @@ dataframe: timed_dataframe_unit_time: 0.001 # list of columns to apply jitter to jitter_cols: ["X", "Y", "t", "ADC"] - # dataframe column containing x coordinates - x_column: "X" - # dataframe column containing y coordinates - y_column: "Y" - # dataframe column containing time-of-flight data - tof_column: "t" - # dataframe column containing analog-to-digital data - adc_column: "ADC" - # dataframe column containing bias voltage data - bias_column: "sampleBias" - # dataframe column containing corrected x coordinates - corrected_x_column: "Xm" - # dataframe column containing corrected y coordinates - corrected_y_column: "Ym" - # dataframe column containing corrected time-of-flight data - corrected_tof_column: "tm" - # dataframe column containing kx coordinates - kx_column: "kx" - # dataframe column containing ky coordinates - ky_column: "ky" - # dataframe column containing energy data - energy_column: "energy" - # dataframe column containing delay data - delay_column: "delay" + columns: + x: X # dataframe column containing x coordinates + y: Y # dataframe column containing y coordinates + tof: t # dataframe column containing time-of-flight data + adc: ADC # dataframe column containing analog-to-digital data + bias: sampleBias # dataframe column containing bias voltage data + corrected_x: Xm # dataframe column containing corrected x coordinates + corrected_y: Ym # dataframe column containing corrected y coordinates + corrected_tof: tm # dataframe column containing corrected time-of-flight data + kx: kx # dataframe column containing kx coordinates + ky: ky # dataframe column containing ky coordinates + energy: energy # dataframe column containing energy data + delay: delay # dataframe column containing delay data + timestamp: timeStamps # dataframe column containing timestamp data # time length of a base time-of-flight bin in ns tof_binwidth: 4.125e-12 # Binning factor of the tof_column-data compared to tof_binwidth @@ -155,7 +141,7 @@ energy: momentum: # binning axes to use for momentum correction/calibration. # Axes names starting with "@" refer to keys in the "dataframe" section - axes: ["@x_column", "@y_column", "@tof_column"] + axes: ["@x", "@y", "@tof"] # Bin numbers used for the respective axes bins: [512, 512, 300] # bin ranges to use (in unbinned detector coordinates) @@ -226,7 +212,7 @@ histogram: bins: [80, 80, 80, 80] # default axes to use for histogram visualization. # Axes names starting with "@" refer to keys in the "dataframe" section - axes: ["@x_column", "@y_column", "@tof_column", "@adc_column"] + axes: ["@x", "@y", "@tof", "@adc"] # default ranges to use for histogram visualization (in unbinned detector coordinates) ranges: [[0, 1800], [0, 1800], [256000, 276000], [0, 32000]] diff --git a/sed/config/sxp_example_config.yaml b/sed/config/sxp_example_config.yaml index c0757fa5..9b48c28c 100644 --- a/sed/config/sxp_example_config.yaml +++ b/sed/config/sxp_example_config.yaml @@ -6,10 +6,16 @@ core: year: 202302 beamline: sxp instrument: sxp + stream_name_prefixes: + DA03: "RAW-R" + stream_name_postfixes: + DA03: "-DA03-" + beamtime_dir: + sxp: "/gpfs/exfel/exp/SXP/" paths: - data_raw_dir: "/path/to/data" + raw: "/path/to/data" # change this to a local directory where you want to store the parquet files - data_parquet_dir: "/path/to/parquet" + processed: "/path/to/parquet" binning: # Histogram computation mode to use. @@ -28,20 +34,27 @@ dataframe: daq: DA03 forward_fill_iterations: 2 num_trains: 10 - x_column: dldPosX - corrected_x_column: "X" - kx_column: "kx" - y_column: dldPosY - corrected_y_column: "Y" - ky_column: "ky" - tof_column: dldTimeSteps - tof_ns_column: dldTime - corrected_tof_column: "tm" - bias_column: "sampleBias" tof_binwidth: 6.875E-12 # in seconds tof_binning: 1 jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] + # Column settings + columns: + x: dldPosX + corrected_x: X + kx: kx + y: dldPosY + corrected_y: Y + ky: ky + tof: dldTimeSteps + tof_ns: dldTime + corrected_tof: tm + timestamp: timeStamp + auxiliary: dldAux + sector_id: dldSectorID + delay: delayStage + corrected_delay: pumpProbeTime + units: dldPosX: 'step' dldPosY: 'step' @@ -95,11 +108,3 @@ dataframe: format: per_train dataset_key: "/CONTROL/SCS_ILH_LAS/MDL/OPTICALDELAY_PP800/actualPosition/value" index_key: "/INDEX/trainId" - - stream_name_prefixes: - DA03: "RAW-R" - stream_name_postfixes: - DA03: "-DA03-" - - beamtime_dir: - sxp: "/gpfs/exfel/exp/SXP/" diff --git a/sed/core/config.py b/sed/core/config.py index 22e60fe5..ca29285c 100644 --- a/sed/core/config.py +++ b/sed/core/config.py @@ -12,6 +12,7 @@ import yaml from platformdirs import user_config_path +from sed.config.config_model import ConfigModel from sed.core.logging import setup_logging package_dir = os.path.dirname(find_spec("sed").origin) @@ -29,6 +30,7 @@ def parse_config( system_config: dict | str = None, default_config: (dict | str) = f"{package_dir}/config/default.yaml", verbose: bool = True, + verify_config: bool = True, ) -> dict: """Load the config dictionary from a file, or pass the provided config dictionary. The content of the loaded config dictionary is then completed from a set of pre-configured @@ -55,12 +57,13 @@ def parse_config( or file path. The loaded dictionary is completed with the default values. Defaults to *package_dir*/config/default.yaml". verbose (bool, optional): Option to report loaded config files. Defaults to True. + verify_config (bool, optional): Option to verify config file. Defaults to True. Raises: TypeError: Raised if the provided file is neither *json* nor *yaml*. FileNotFoundError: Raised if the provided file is not found. Returns: - dict: Loaded and possibly completed config dictionary. + dict: Loaded and completed config dict, possibly verified by pydantic config model. """ if config is None: config = {} @@ -141,7 +144,11 @@ def parse_config( base_dictionary=default_dict, ) - return config_dict + if not verify_config: + return config_dict + # Run the config through the ConfigModel to ensure it is valid + config_model = ConfigModel(**config_dict) + return config_model.model_dump(exclude_unset=True, exclude_none=True) def load_config(config_path: str) -> dict: diff --git a/sed/core/processor.py b/sed/core/processor.py index e931d0a9..1a7781f7 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -5,6 +5,7 @@ import pathlib from collections.abc import Sequence +from copy import deepcopy from datetime import datetime from typing import Any from typing import cast @@ -161,22 +162,17 @@ def __init__( verbose=self._verbose, ) - self.use_copy_tool = self._config.get("core", {}).get( - "use_copy_tool", - False, - ) + self.use_copy_tool = "copy_tool" in self._config["core"] if self.use_copy_tool: try: self.ct = CopyTool( - source=self._config["core"]["copy_tool_source"], - dest=self._config["core"]["copy_tool_dest"], num_cores=self._config["core"]["num_cores"], - **self._config["core"].get("copy_tool_kwds", {}), + **self._config["core"]["copy_tool"], ) logger.debug( - f"Initialized copy tool: Copy file from " - f"'{self._config['core']['copy_tool_source']}' " - f"to '{self._config['core']['copy_tool_dest']}'.", + f"Initialized copy tool: Copy files from " + f"'{self._config['core']['copy_tool']['source']}' " + f"to '{self._config['core']['copy_tool']['dest']}'.", ) except KeyError: self.use_copy_tool = False @@ -703,11 +699,13 @@ def save_splinewarp( correction[key] = [] for point in value: correction[key].append([float(i) for i in point]) + elif key == "creation_date": + correction[key] = value.isoformat() else: correction[key] = float(value) if "creation_date" not in correction: - correction["creation_date"] = datetime.now().timestamp() + correction["creation_date"] = datetime.now().isoformat() config = { "momentum": { @@ -790,10 +788,13 @@ def save_transformations( raise ValueError("No momentum transformation parameters to save!") transformations = {} for key, value in self.mc.transformations.items(): - transformations[key] = float(value) + if key == "creation_date": + transformations[key] = value.isoformat() + else: + transformations[key] = float(value) if "creation_date" not in transformations: - transformations["creation_date"] = datetime.now().timestamp() + transformations["creation_date"] = datetime.now().isoformat() config = { "momentum": { @@ -823,8 +824,8 @@ def apply_momentum_correction( - **inv_dfield** (np.ndarray, optional): Inverse deformation field. """ - x_column = self._config["dataframe"]["x_column"] - y_column = self._config["dataframe"]["y_column"] + x_column = self._config["dataframe"]["columns"]["x"] + y_column = self._config["dataframe"]["columns"]["y"] if self._dataframe is not None: logger.info("Adding corrected X/Y columns to dataframe:") @@ -938,11 +939,13 @@ def save_momentum_calibration( for key, value in self.mc.calibration.items(): if key in ["kx_axis", "ky_axis", "grid", "extent"]: continue - - calibration[key] = float(value) + elif key == "creation_date": + calibration[key] = value.isoformat() + else: + calibration[key] = float(value) if "creation_date" not in calibration: - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now().isoformat() config = {"momentum": {"calibration": calibration}} save_config(config, filename, overwrite) @@ -967,8 +970,8 @@ def apply_momentum_calibration( Defaults to False. **kwds: Keyword args passed to ``MomentumCalibrator.append_k_axis``. """ - x_column = self._config["dataframe"]["x_column"] - y_column = self._config["dataframe"]["y_column"] + x_column = self._config["dataframe"]["columns"]["x"] + y_column = self._config["dataframe"]["columns"]["y"] if self._dataframe is not None: logger.info("Adding kx/ky columns to dataframe:") @@ -1074,16 +1077,18 @@ def save_energy_correction( if len(self.ec.correction) == 0: raise ValueError("No energy correction parameters to save!") correction = {} - for key, val in self.ec.correction.items(): + for key, value in self.ec.correction.items(): if key == "correction_type": - correction[key] = val + correction[key] = value elif key == "center": - correction[key] = [float(i) for i in val] + correction[key] = [float(i) for i in value] + elif key == "creation_date": + correction[key] = value.isoformat() else: - correction[key] = float(val) + correction[key] = float(value) if "creation_date" not in correction: - correction["creation_date"] = datetime.now().timestamp() + correction["creation_date"] = datetime.now().isoformat() config = {"energy": {"correction": correction}} save_config(config, filename, overwrite) @@ -1108,7 +1113,7 @@ def apply_energy_correction( **kwds: Keyword args passed to ``EnergyCalibrator.apply_energy_correction()``. """ - tof_column = self._config["dataframe"]["tof_column"] + tof_column = self._config["dataframe"]["columns"]["tof"] if self._dataframe is not None: logger.info("Applying energy correction to dataframe...") @@ -1162,11 +1167,11 @@ def load_bias_series( Args: binned_data (xr.DataArray | tuple[np.ndarray, np.ndarray, np.ndarray], optional): Binned data If provided as DataArray, Needs to contain dimensions - config["dataframe"]["tof_column"] and config["dataframe"]["bias_column"]. If - provided as tuple, needs to contain elements tof, biases, traces. + config["dataframe"]["columns"]["tof"] and config["dataframe"]["columns"]["bias"]. + If provided as tuple, needs to contain elements tof, biases, traces. data_files (list[str], optional): list of file paths to bin axes (list[str], optional): bin axes. - Defaults to config["dataframe"]["tof_column"]. + Defaults to config["dataframe"]["columns"]["tof"]. bins (list, optional): number of bins. Defaults to config["energy"]["bins"]. ranges (Sequence[tuple[float, float]], optional): bin ranges. @@ -1187,16 +1192,16 @@ def load_bias_series( if binned_data is not None: if isinstance(binned_data, xr.DataArray): if ( - self._config["dataframe"]["tof_column"] not in binned_data.dims - or self._config["dataframe"]["bias_column"] not in binned_data.dims + self._config["dataframe"]["columns"]["tof"] not in binned_data.dims + or self._config["dataframe"]["columns"]["bias"] not in binned_data.dims ): raise ValueError( "If binned_data is provided as an xarray, it needs to contain dimensions " - f"'{self._config['dataframe']['tof_column']}' and " - f"'{self._config['dataframe']['bias_column']}'!.", + f"'{self._config['dataframe']['columns']['tof']}' and " + f"'{self._config['dataframe']['columns']['bias']}'!.", ) - tof = binned_data.coords[self._config["dataframe"]["tof_column"]].values - biases = binned_data.coords[self._config["dataframe"]["bias_column"]].values + tof = binned_data.coords[self._config["dataframe"]["columns"]["tof"]].values + biases = binned_data.coords[self._config["dataframe"]["columns"]["bias"]].values traces = binned_data.values[:, :] else: try: @@ -1435,11 +1440,13 @@ def save_energy_calibration( calibration[key] = value elif key == "coeffs": calibration[key] = [float(i) for i in value] + elif key == "creation_date": + calibration[key] = value.isoformat() else: calibration[key] = float(value) if "creation_date" not in calibration: - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now().isoformat() config = {"energy": {"calibration": calibration}} save_config(config, filename, overwrite) @@ -1470,7 +1477,7 @@ def append_energy_axis( **kwds: Keyword args passed to ``EnergyCalibrator.append_energy_axis()``. """ - tof_column = self._config["dataframe"]["tof_column"] + tof_column = self._config["dataframe"]["columns"]["tof"] if self._dataframe is not None: logger.info("Adding energy column to dataframe:") @@ -1536,7 +1543,7 @@ def add_energy_offset( Raises: ValueError: If the energy column is not in the dataframe. """ - energy_column = self._config["dataframe"]["energy_column"] + energy_column = self._config["dataframe"]["columns"]["energy"] if energy_column not in self._dataframe.columns: raise ValueError( f"Energy column {energy_column} not found in dataframe! " @@ -1600,10 +1607,14 @@ def save_energy_offset( if len(self.ec.offsets) == 0: raise ValueError("No energy offset parameters to save!") - if "creation_date" not in self.ec.offsets.keys(): - self.ec.offsets["creation_date"] = datetime.now().timestamp() + offsets = deepcopy(self.ec.offsets) - config = {"energy": {"offsets": self.ec.offsets}} + if "creation_date" not in offsets.keys(): + offsets["creation_date"] = datetime.now() + + offsets["creation_date"] = offsets["creation_date"].isoformat() + + config = {"energy": {"offsets": offsets}} save_config(config, filename, overwrite) logger.info(f'Saved energy offset parameters to "{filename}".') @@ -1618,13 +1629,13 @@ def append_tof_ns_axis( Args: tof_ns_column (str, optional): Name of the generated column containing the time-of-flight in nanosecond. - Defaults to config["dataframe"]["tof_ns_column"]. + Defaults to config["dataframe"]["columns"]["tof_ns"]. preview (bool, optional): Option to preview the first elements of the data frame. Defaults to False. **kwds: additional arguments are passed to ``EnergyCalibrator.append_tof_ns_axis()``. """ - tof_column = self._config["dataframe"]["tof_column"] + tof_column = self._config["dataframe"]["columns"]["tof"] if self._dataframe is not None: logger.info("Adding time-of-flight column in nanoseconds to dataframe.") @@ -1671,7 +1682,7 @@ def align_dld_sectors( Defaults to False. **kwds: additional arguments are passed to ``EnergyCalibrator.align_dld_sectors()``. """ - tof_column = self._config["dataframe"]["tof_column"] + tof_column = self._config["dataframe"]["columns"]["tof"] if self._dataframe is not None: logger.info("Aligning 8s sectors of dataframe") @@ -1725,7 +1736,7 @@ def calibrate_delay_axis( Defaults to False. **kwds: Keyword args passed to ``DelayCalibrator.append_delay_axis``. """ - adc_column = self._config["dataframe"]["adc_column"] + adc_column = self._config["dataframe"]["columns"]["adc"] if adc_column not in self._dataframe.columns: raise ValueError(f"ADC column {adc_column} not found in dataframe, cannot calibrate!") @@ -1796,11 +1807,13 @@ def save_delay_calibration( calibration[key] = value elif key in ["adc_range", "delay_range", "delay_range_mm"]: calibration[key] = [float(i) for i in value] + elif key == "creation_date": + calibration[key] = value.isoformat() else: calibration[key] = float(value) if "creation_date" not in calibration: - calibration["creation_date"] = datetime.now().timestamp() + calibration["creation_date"] = datetime.now().isoformat() config = { "delay": { @@ -1841,7 +1854,7 @@ def add_delay_offset( Raises: ValueError: If the delay column is not in the dataframe. """ - delay_column = self._config["dataframe"]["delay_column"] + delay_column = self._config["dataframe"]["columns"]["delay"] if delay_column not in self._dataframe.columns: raise ValueError(f"Delay column {delay_column} not found in dataframe! ") @@ -1903,14 +1916,14 @@ def save_delay_offsets( if len(self.dc.offsets) == 0: raise ValueError("No delay offset parameters to save!") - if "creation_date" not in self.ec.offsets.keys(): - self.ec.offsets["creation_date"] = datetime.now().timestamp() + offsets = deepcopy(self.dc.offsets) - config = { - "delay": { - "offsets": self.dc.offsets, - }, - } + if "creation_date" not in offsets.keys(): + offsets["creation_date"] = datetime.now() + + offsets["creation_date"] = offsets["creation_date"].isoformat() + + config = {"delay": {"offsets": offsets}} save_config(config, filename, overwrite) logger.info(f'Saved delay offset parameters to "{filename}".') @@ -1964,7 +1977,7 @@ def add_jitter( cols = self._config["dataframe"]["jitter_cols"] for loc, col in enumerate(cols): if col.startswith("@"): - cols[loc] = self._config["dataframe"].get(col.strip("@")) + cols[loc] = self._config["dataframe"]["columns"].get(col.strip("@")) if amps is None: amps = self._config["dataframe"]["jitter_amps"] @@ -2024,7 +2037,7 @@ def add_time_stamped_data( """ time_stamp_column = kwds.pop( "time_stamp_column", - self._config["dataframe"].get("time_stamp_alias", ""), + self._config["dataframe"]["columns"].get("timestamp", ""), ) if time_stamps is None and data is None: @@ -2099,7 +2112,7 @@ def pre_binning( axes = self._config["momentum"]["axes"] for loc, axis in enumerate(axes): if axis.startswith("@"): - axes[loc] = self._config["dataframe"].get(axis.strip("@")) + axes[loc] = self._config["dataframe"]["columns"].get(axis.strip("@")) if bins is None: bins = self._config["momentum"]["bins"] @@ -2333,14 +2346,14 @@ def get_normalization_histogram( self._dataframe.partitions[df_partitions], axis, self._binned.coords[axis].values, - self._config["dataframe"]["time_stamp_alias"], + self._config["dataframe"]["columns"]["timestamp"], ) else: self._normalization_histogram = normalization_histogram_from_timestamps( self._dataframe, axis, self._binned.coords[axis].values, - self._config["dataframe"]["time_stamp_alias"], + self._config["dataframe"]["columns"]["timestamp"], ) else: if df_partitions is not None: @@ -2406,13 +2419,13 @@ def view_event_histogram( axes = list(axes) for loc, axis in enumerate(axes): if axis.startswith("@"): - axes[loc] = self._config["dataframe"].get(axis.strip("@")) + axes[loc] = self._config["dataframe"]["columns"].get(axis.strip("@")) if ranges is None: ranges = list(self._config["histogram"]["ranges"]) for loc, axis in enumerate(axes): - if axis == self._config["dataframe"]["tof_column"]: + if axis == self._config["dataframe"]["columns"]["tof"]: ranges[loc] = np.asarray(ranges[loc]) / self._config["dataframe"]["tof_binning"] - elif axis == self._config["dataframe"]["adc_column"]: + elif axis == self._config["dataframe"]["columns"]["adc"]: ranges[loc] = np.asarray(ranges[loc]) / self._config["dataframe"]["adc_binning"] input_types = map(type, [axes, bins, ranges]) @@ -2515,7 +2528,7 @@ def save( ) input_files = kwds.pop( "input_files", - self._config["nexus"]["input_files"], + [str(path) for path in self._config["nexus"]["input_files"]], ) except KeyError as exc: raise ValueError( diff --git a/sed/dataset/dataset.py b/sed/dataset/dataset.py index 15bf8b4f..fca7fc83 100644 --- a/sed/dataset/dataset.py +++ b/sed/dataset/dataset.py @@ -55,6 +55,7 @@ def load_datasets_dict() -> dict: system_config={}, default_config=DatasetsManager.json_path["module"], verbose=False, + verify_config=False, ) @staticmethod diff --git a/sed/loader/flash/dataframe.py b/sed/loader/flash/dataframe.py index 887cb9dd..6501c82a 100644 --- a/sed/loader/flash/dataframe.py +++ b/sed/loader/flash/dataframe.py @@ -248,7 +248,7 @@ def df_train(self) -> pd.DataFrame: aux_alias = self._config.get("aux_alias", "dldAux") if channel == aux_alias: try: - sub_channels = self._config["channels"][aux_alias]["subChannels"] + sub_channels = self._config["channels"][aux_alias]["sub_channels"] except KeyError: raise KeyError( f"Provide 'subChannels' for auxiliary channel '{aux_alias}'.", diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py index 9b3524bc..6c84fc34 100644 --- a/sed/loader/flash/loader.py +++ b/sed/loader/flash/loader.py @@ -110,7 +110,7 @@ def _initialize_dirs(self) -> None: ) from exc beamtime_dir = Path( - self._config["dataframe"]["beamtime_dir"][self._config["core"]["beamline"]], + self._config["core"]["beamtime_dir"][self._config["core"]["beamline"]], ) beamtime_dir = beamtime_dir.joinpath(f"{year}/data/{beamtime_id}/") @@ -175,7 +175,7 @@ def get_files_from_run_id( # type: ignore[override] FileNotFoundError: If no files are found for the given run in the directory. """ # Define the stream name prefixes based on the data acquisition identifier - stream_name_prefixes = self._config["dataframe"]["stream_name_prefixes"] + stream_name_prefixes = self._config["core"]["stream_name_prefixes"] if folders is None: folders = self._config["core"]["base_folder"] @@ -183,7 +183,7 @@ def get_files_from_run_id( # type: ignore[override] if isinstance(folders, str): folders = [folders] - daq = self._config["dataframe"].get("daq") + daq = self._config["dataframe"]["daq"] # Generate the file patterns to search for in the directory file_pattern = f"{stream_name_prefixes[daq]}_run{run_id}_*." + extension diff --git a/sed/loader/flash/utils.py b/sed/loader/flash/utils.py index 6eb2ac30..85bca9a4 100644 --- a/sed/loader/flash/utils.py +++ b/sed/loader/flash/utils.py @@ -78,7 +78,7 @@ def get_channels( if format_ == FORMATS[2] and aux_alias in available_channels: if extend_aux: channels.extend( - channel_dict[aux_alias]["subChannels"].keys(), + channel_dict[aux_alias]["sub_channels"].keys(), ) else: channels.extend([aux_alias]) diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py index ebc9a08c..a7cb82d0 100644 --- a/sed/loader/mpes/loader.py +++ b/sed/loader/mpes/loader.py @@ -72,7 +72,6 @@ def hdf5_to_dataframe( electron_channels = [] column_names = [] - for name, channel in channels.items(): if channel["format"] == "per_electron": if channel["dataset_key"] in test_proc: @@ -749,7 +748,7 @@ def get_files_from_run_id( ) if folders is None: - folders = self._config["core"]["paths"]["data_raw_dir"] + folders = str(self._config["core"]["paths"]["raw"]) if isinstance(folders, str): folders = [folders] @@ -854,7 +853,7 @@ def gather_metadata( # Get metadata from Epics archive if not present already epics_channels = self._config["metadata"]["epics_pvs"] - start = datetime.datetime.utcfromtimestamp(ts_from).isoformat() + start = datetime.datetime.utcfromtimestamp(ts_from) channels_missing = set(epics_channels) - set( metadata["file"].keys(), @@ -862,7 +861,7 @@ def gather_metadata( for channel in channels_missing: try: _, vals = get_archiver_data( - archiver_url=self._config["metadata"].get("archiver_url"), + archiver_url=str(self._config["metadata"].get("archiver_url")), archiver_channel=channel, ts_from=ts_from, ts_to=ts_to, @@ -892,7 +891,7 @@ def gather_metadata( # Determine the correct aperture_config stamps = sorted( - list(self._config["metadata"]["aperture_config"]) + [start], + list(self._config["metadata"]["aperture_config"].keys()) + [start], ) current_index = stamps.index(start) timestamp = stamps[current_index - 1] # pick last configuration before file date diff --git a/sed/loader/sxp/loader.py b/sed/loader/sxp/loader.py index a77e8ed6..9ff0ac9a 100644 --- a/sed/loader/sxp/loader.py +++ b/sed/loader/sxp/loader.py @@ -116,7 +116,7 @@ def _initialize_dirs(self): ) from exc beamtime_dir = Path( - self._config["dataframe"]["beamtime_dir"][self._config["core"]["beamline"]], + self._config["core"]["beamtime_dir"][self._config["core"]["beamline"]], ) beamtime_dir = beamtime_dir.joinpath(f"{year}/{beamtime_id}/") @@ -158,8 +158,8 @@ def get_files_from_run_id( FileNotFoundError: If no files are found for the given run in the directory. """ # Define the stream name prefixes based on the data acquisition identifier - stream_name_prefixes = self._config["dataframe"]["stream_name_prefixes"] - stream_name_postfixes = self._config["dataframe"].get("stream_name_postfixes", {}) + stream_name_prefixes = self._config["core"]["stream_name_prefixes"] + stream_name_postfixes = self._config["core"].get("stream_name_postfixes", {}) if isinstance(run_id, (int, np.integer)): run_id = str(run_id).zfill(4) @@ -686,7 +686,7 @@ def create_dataframe_per_file( with h5py.File(file_path, "r") as h5_file: self.reset_multi_index() # Reset MultiIndexes for next file df = self.concatenate_channels(h5_file) - df = df.dropna(subset=self._config["dataframe"].get("tof_column", "dldTimeSteps")) + df = df.dropna(subset=self._config["dataframe"]["columns"].get("tof", "dldTimeSteps")) # correct the 3 bit shift which encodes the detector ID in the 8s time if self._config["dataframe"].get("split_sector_id_from_dld_time", False): df, _ = split_dld_time_from_sector_id(df, config=self._config) @@ -763,7 +763,7 @@ def buffer_file_handler( parquet_schemas = [pq.read_schema(file) for file in existing_parquet_filenames] config_schema = set(self.get_channels(formats="all", index=True)) if self._config["dataframe"].get("split_sector_id_from_dld_time", False): - config_schema.add(self._config["dataframe"].get("sector_id_column", False)) + config_schema.add(self._config["dataframe"]["columns"].get("sector_id", False)) for i, schema in enumerate(parquet_schemas): schema_set = set(schema.names) diff --git a/sed/loader/utils.py b/sed/loader/utils.py index 6bcce9f8..4f18cf0f 100644 --- a/sed/loader/utils.py +++ b/sed/loader/utils.py @@ -160,9 +160,9 @@ def split_dld_time_from_sector_id( Args: df (pd.DataFrame | dask.dataframe.DataFrame): Dataframe to use. tof_column (str, optional): Name of the column containing the - time-of-flight steps. Defaults to config["dataframe"]["tof_column"]. + time-of-flight steps. Defaults to config["dataframe"]["columns"]["tof"]. sector_id_column (str, optional): Name of the column containing the - sectorID. Defaults to config["dataframe"]["sector_id_column"]. + sectorID. Defaults to config["dataframe"]["columns"]["sector_id"]. sector_id_reserved_bits (int, optional): Number of bits reserved for the config (dict, optional): Dataframe configuration dictionary. Defaults to None. @@ -172,11 +172,11 @@ def split_dld_time_from_sector_id( if tof_column is None: if config is None: raise ValueError("Either tof_column or config must be given.") - tof_column = config["tof_column"] + tof_column = config["columns"]["tof"] if sector_id_column is None: if config is None: raise ValueError("Either sector_id_column or config must be given.") - sector_id_column = config["sector_id_column"] + sector_id_column = config["columns"]["sector_id"] if sector_id_reserved_bits is None: if config is None: raise ValueError("Either sector_id_reserved_bits or config must be given.") diff --git a/tests/calibrator/test_delay.py b/tests/calibrator/test_delay.py index d7677296..b5bbe49c 100644 --- a/tests/calibrator/test_delay.py +++ b/tests/calibrator/test_delay.py @@ -131,14 +131,13 @@ def test_delay_parameters_from_delay_range_mm() -> None: delay_stage_vals = np.linspace(0, 99, 100) cfg = { "core": {"loader": "flash"}, - "dataframe": {"delay_column": "delay"}, + "dataframe": {"columns": {"delay": "delay"}}, "delay": { "offsets": { "constant": 1, "flip_delay_axis": True, - "bam": { - "weight": 0.001, - "preserve_mean": False, + "columns": { + "bam": {"weight": 0.001, "preserve_mean": False}, }, }, }, @@ -168,7 +167,7 @@ def test_add_offset_from_config(df=test_dataframe) -> None: dc = DelayCalibrator(config=config) df, _ = dc.add_offsets(df.copy()) assert "delay" in df.columns - assert "bam" in dc.offsets.keys() + assert "bam" in dc.offsets["columns"].keys() np.testing.assert_allclose(expected, df["delay"]) @@ -190,7 +189,7 @@ def test_add_offset_from_args(df=test_dataframe) -> None: columns="bam", ) assert "delay" in df.columns - assert "bam" in dc.offsets.keys() + assert "bam" in dc.offsets["columns"].keys() expected = -np.array( delay_stage_vals + bam_vals * 1 + 1, ) @@ -201,7 +200,7 @@ def test_add_offset_from_dict(df=test_dataframe) -> None: """test that the timing offset is corrected for correctly from config""" cfg_ = cfg.copy() offsets = cfg["delay"]["offsets"] # type:ignore - offsets["bam"].pop("weight") + offsets["columns"]["bam"].pop("weight") offsets["flip_delay_axis"] = False cfg_.pop("delay") config = parse_config( @@ -216,5 +215,5 @@ def test_add_offset_from_dict(df=test_dataframe) -> None: dc = DelayCalibrator(config=config) df, _ = dc.add_offsets(df.copy(), offsets=offsets) assert "delay" in df.columns - assert "bam" in dc.offsets.keys() + assert "bam" in dc.offsets["columns"].keys() np.testing.assert_allclose(expected, df["delay"]) diff --git a/tests/calibrator/test_energy.py b/tests/calibrator/test_energy.py index 32dd1a4b..21bac331 100644 --- a/tests/calibrator/test_energy.py +++ b/tests/calibrator/test_energy.py @@ -210,7 +210,7 @@ def test_calibrate_append(energy_scale: str, calibration_method: str) -> None: method=calibration_method, ) df, metadata = ec.append_energy_axis(df) - assert config["dataframe"]["energy_column"] in df.columns + assert config["dataframe"]["columns"]["energy"] in df.columns axis = calibdict["axis"] diff = np.diff(axis) if energy_scale == "kinetic": @@ -256,7 +256,7 @@ def test_append_energy_axis_from_dict_kwds(calib_type: str, calib_dict: dict) -> df, _, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) ec = EnergyCalibrator(config=config, loader=loader) df, metadata = ec.append_energy_axis(df, calibration=calib_dict) - assert config["dataframe"]["energy_column"] in df.columns + assert config["dataframe"]["columns"]["energy"] in df.columns for key in calib_dict: np.testing.assert_equal(metadata["calibration"][key], calib_dict[key]) @@ -266,7 +266,7 @@ def test_append_energy_axis_from_dict_kwds(calib_type: str, calib_dict: dict) -> df, _, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) ec = EnergyCalibrator(config=config, loader=loader) df, metadata = ec.append_energy_axis(df, **calib_dict) - assert config["dataframe"]["energy_column"] in df.columns + assert config["dataframe"]["columns"]["energy"] in df.columns for key in calib_dict: np.testing.assert_equal(metadata["calibration"][key], calib_dict[key]) @@ -294,8 +294,10 @@ def test_append_tof_ns_axis() -> None: """ cfg = { "dataframe": { - "tof_column": "t", - "tof_ns_column": "t_ns", + "columns": { + "tof": "t", + "tof_ns": "t_ns", + }, "tof_binning": 2, "tof_binwidth": 1e-9, }, @@ -307,14 +309,14 @@ def test_append_tof_ns_axis() -> None: df, _, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) ec = EnergyCalibrator(config=config, loader=loader) df, _ = ec.append_tof_ns_axis(df, binwidth=2e-9, binning=2) - assert config["dataframe"]["tof_ns_column"] in df.columns + assert config["dataframe"]["columns"]["tof_ns"] in df.columns np.testing.assert_allclose(df[ec.tof_column], df[ec.tof_ns_column] / 4) # from config df, _, _ = loader.read_dataframe(folders=df_folder, collect_metadata=False) ec = EnergyCalibrator(config=config, loader=loader) df, _ = ec.append_tof_ns_axis(df) - assert config["dataframe"]["tof_ns_column"] in df.columns + assert config["dataframe"]["columns"]["tof_ns"] in df.columns np.testing.assert_allclose(df[ec.tof_column], df[ec.tof_ns_column] / 2) # illegal keywords: @@ -390,7 +392,7 @@ def test_energy_correction(correction_type: str, correction_kwd: dict) -> None: **correction_kwd, ) df, metadata = ec.apply_energy_correction(sample_df) - t = df[config["dataframe"]["corrected_tof_column"]] + t = df[config["dataframe"]["columns"]["corrected_tof"]] assert t[0] == t[2] assert t[0] < t[1] assert t[3] == t[5] @@ -426,7 +428,7 @@ def test_energy_correction(correction_type: str, correction_kwd: dict) -> None: **correction, ) df, metadata = ec.apply_energy_correction(sample_df) - t = df[config["dataframe"]["corrected_tof_column"]] + t = df[config["dataframe"]["columns"]["corrected_tof"]] assert t[0] == t[2] assert t[0] < t[1] assert t[3] == t[5] @@ -514,7 +516,7 @@ def test_energy_correction_from_dict_kwds(correction_type: str, correction_kwd: sample_df, correction=correction_dict, ) - t = df[config["dataframe"]["corrected_tof_column"]] + t = df[config["dataframe"]["columns"]["corrected_tof"]] assert t[0] == t[2] assert t[0] < t[1] assert t[3] == t[5] @@ -534,7 +536,7 @@ def test_energy_correction_from_dict_kwds(correction_type: str, correction_kwd: loader=get_loader("mpes", config=config), ) df, metadata = ec.apply_energy_correction(sample_df, **correction_dict) - t = df[config["dataframe"]["corrected_tof_column"]] + t = df[config["dataframe"]["columns"]["corrected_tof"]] assert t[0] == t[2] assert t[0] < t[1] assert t[3] == t[5] @@ -585,7 +587,7 @@ def test_apply_energy_correction_raises(correction_type: str) -> None: sample_df, correction=correction_dict, ) - assert config["dataframe"]["corrected_tof_column"] in df.columns + assert config["dataframe"]["columns"]["corrected_tof"] in df.columns @pytest.mark.parametrize( @@ -603,12 +605,14 @@ def test_add_offsets_functionality(energy_scale: str) -> None: }, "offsets": { "constant": 1, - "off1": { - "weight": 1, - "preserve_mean": True, + "columns": { + "off1": { + "weight": 1, + "preserve_mean": True, + }, + "off2": {"weight": -1, "preserve_mean": False}, + "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, }, - "off2": {"weight": -1, "preserve_mean": False}, - "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, }, }, }, @@ -684,9 +688,11 @@ def test_add_offset_raises() -> None: }, "offsets": { "constant": 1, - "off1": {"weight": -1, "preserve_mean": True}, - "off2": {"weight": -1, "preserve_mean": False}, - "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, + "columns": { + "off1": {"weight": -1, "preserve_mean": True}, + "off2": {"weight": -1, "preserve_mean": False}, + "off3": {"weight": 1, "preserve_mean": False, "reduction": "mean"}, + }, }, }, } @@ -719,17 +725,15 @@ def test_add_offset_raises() -> None: # invalid sign with pytest.raises(TypeError): - cfg = deepcopy(cfg_dict) - cfg["energy"]["offsets"]["off1"]["weight"] = "wrong_type" - config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={}) + config = parse_config(config=cfg_dict, folder_config={}, user_config={}, system_config={}) + config["energy"]["offsets"]["columns"]["off1"]["weight"] = "wrong_type" ec = EnergyCalibrator(config=config, loader=get_loader("flash", config=config)) _ = ec.add_offsets(t_df) # invalid constant with pytest.raises(TypeError): - cfg = deepcopy(cfg_dict) - cfg["energy"]["offsets"]["constant"] = "wrong_type" - config = parse_config(config=cfg, folder_config={}, user_config={}, system_config={}) + config = parse_config(config=cfg_dict, folder_config={}, user_config={}, system_config={}) + config["energy"]["offsets"]["constant"] = "wrong_type" ec = EnergyCalibrator(config=config, loader=get_loader("flash", config=config)) _ = ec.add_offsets(t_df) @@ -738,8 +742,10 @@ def test_align_dld_sectors() -> None: """test functionality and error handling of align_dld_sectors""" cfg_dict: dict[str, Any] = { "dataframe": { - "tof_column": "dldTimeSteps", - "sector_id_column": "dldSectorId", + "columns": { + "tof": "dldTimeSteps", + "sector_id": "dldSectorId", + }, "sector_delays": [-0.35, -0.25, -0.15, -0.05, 0.05, 0.15, 0.25, 0.35], }, } @@ -767,7 +773,7 @@ def test_align_dld_sectors() -> None: t_df = dask.dataframe.from_pandas(df.copy(), npartitions=2) res, meta = ec.align_dld_sectors( t_df, - tof_column=cfg_dict["dataframe"]["tof_column"], + tof_column=cfg_dict["dataframe"]["columns"]["tof"], sector_delays=cfg_dict["dataframe"]["sector_delays"], sector_id_column="dldSectorId", ) diff --git a/tests/data/loader/flash/config.yaml b/tests/data/loader/flash/config.yaml index 19e01a2a..fbbcba25 100644 --- a/tests/data/loader/flash/config.yaml +++ b/tests/data/loader/flash/config.yaml @@ -17,6 +17,21 @@ core: # beamtime_id: xxxxxxxx # year: 20xx + # The prefixes of the stream names for different DAQ systems for parsing filenames + stream_name_prefixes: + pbd: "GMD_DATA_gmd_data" + pbd2: "FL2PhotDiag_pbd2_gmd_data" + fl1user1: "FLASH1_USER1_stream_2" + fl1user2: "FLASH1_USER2_stream_2" + fl1user3: "FLASH1_USER3_stream_2" + fl2user1: "FLASH2_USER1_stream_2" + fl2user2: "FLASH2_USER2_stream_2" + + # The beamtime directories for different DAQ systems. + # (Not to be changed by user) + beamtime_dir: + pg2: "/asap3/flash/gpfs/pg2/" + dataframe: # The name of the DAQ system to use. Necessary to resolve the filenames/paths. @@ -29,39 +44,29 @@ dataframe: # if true, removes the 3 bits reserved for dldSectorID from the dldTimeSteps column split_sector_id_from_dld_time: True sector_id_reserved_bits: 3 - # dataframe column containing x coordinates - x_column: dldPosX - # dataframe column containing corrected x coordinates - corrected_x_column: "X" - # dataframe column containing kx coordinates - kx_column: "kx" - # dataframe column containing y coordinates - - y_column: dldPosY - # dataframe column containing corrected y coordinates - corrected_y_column: "Y" - # dataframe column containing kx coordinates - ky_column: "ky" - # dataframe column containing time-of-flight data - - tof_column: dldTimeSteps - # dataframe column containing time-of-flight data in ns - tof_ns_column: dldTime - # dataframe column containing corrected time-of-flight data - corrected_tof_column: "tm" - # the time stamp column - time_stamp_alias: timeStamp # time length of a base time-of-flight bin in seconds tof_binwidth: 2.0576131995767355E-11 # binning parameter for time-of-flight data. 2**tof_binning bins per base bin tof_binning: 3 # power of 2, 4 means 8 bins per step - # dataframe column containing sector ID. obtained from dldTimeSteps column - sector_id_column: dldSectorID sector_delays: [0., 0., 0., 0., 0., 0., 0., 0.] jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] - + columns: + x: dldPosX + corrected_x: X + kx: kx + y: dldPosY + corrected_y: Y + ky: ky + tof: dldTimeSteps + tof_ns: dldTime + corrected_tof: tm + timestamp: timeStamp + auxiliary: dldAux + sector_id: dldSectorID + delay: delayStage + corrected_delay: pumpProbeTime units: dldPosX: 'step' dldPosY: 'step' @@ -115,28 +120,21 @@ dataframe: index_key: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/index" dataset_key: "/uncategorised/FLASH.EXP/HEXTOF.DAQ/DLD1/value" slice: 4 - subChannels: + sub_channels: sampleBias: slice: 0 - dtype: float64 tofVoltage: slice: 1 - dtype: float64 extractorVoltage: slice: 2 - dtype: float64 extractorCurrent: slice: 3 - dtype: float64 cryoTemperature: slice: 4 - dtype: float64 sampleTemperature: slice: 5 - dtype: float64 dldTimeBinSize: slice: 15 - dtype: float64 timeStamp: format: per_train @@ -158,31 +156,3 @@ dataframe: index_key: "/FL1/Photon Diagnostic/GMD/Pulse resolved energy/energy tunnel/index" dataset_key: "/FL1/Photon Diagnostic/GMD/Pulse resolved energy/energy tunnel/value" slice: 0 - - # The prefixes of the stream names for different DAQ systems for parsing filenames - # (Not to be changed by user) - stream_name_prefixes: - pbd: "GMD_DATA_gmd_data" - pbd2: "FL2PhotDiag_pbd2_gmd_data" - fl1user1: "FLASH1_USER1_stream_2" - fl1user2: "FLASH1_USER2_stream_2" - fl1user3: "FLASH1_USER3_stream_2" - fl2user1: "FLASH2_USER1_stream_2" - fl2user2: "FLASH2_USER2_stream_2" - - # The beamtime directories for different DAQ systems. - # (Not to be changed by user) - beamtime_dir: - pg2: "/asap3/flash/gpfs/pg2/" - -# metadata collection from scicat -# metadata: -# scicat_url: -# scicat_username: -# scicat_password: - -# The nexus collection routine shall be finalized soon for both instruments -# nexus: -# reader: "flash" -# definition: "NXmpes" -# input_files: ["NXmpes_config_HEXTOF_light.json"] diff --git a/tests/data/loader/generic/config.yaml b/tests/data/loader/generic/config.yaml index e901b4d3..e1c6163f 100644 --- a/tests/data/loader/generic/config.yaml +++ b/tests/data/loader/generic/config.yaml @@ -1 +1,2 @@ -test: +core: + loader: generic diff --git a/tests/data/loader/mpes/config.yaml b/tests/data/loader/mpes/config.yaml index 1a411be2..62e46ebc 100644 --- a/tests/data/loader/mpes/config.yaml +++ b/tests/data/loader/mpes/config.yaml @@ -1,10 +1,8 @@ core: paths: - data_raw_dir: "tests/data/loader/mpes/" + raw: "tests/data/loader/mpes/" dataframe: - # dataframe column name for the time stamp column - time_stamp_alias: "timeStamps" # hdf5 group name containing eventIDs occuring at every millisecond (used to calculate timestamps) ms_markers_key: "msMarkers" # hdf5 attribute containing the timestamp of the first event in a file @@ -13,30 +11,6 @@ dataframe: timed_dataframe_unit_time: 0.001 # list of columns to apply jitter to jitter_cols: ["X", "Y", "t", "ADC"] - # dataframe column containing x coordinates - x_column: "X" - # dataframe column containing y coordinates - y_column: "Y" - # dataframe column containing time-of-flight data - tof_column: "t" - # dataframe column containing analog-to-digital data - adc_column: "ADC" - # dataframe column containing bias voltage data - bias_column: "sampleBias" - # dataframe column containing corrected x coordinates - corrected_x_column: "Xm" - # dataframe column containing corrected y coordinates - corrected_y_column: "Ym" - # dataframe column containing corrected time-of-flight data - corrected_tof_column: "tm" - # dataframe column containing kx coordinates - kx_column: "kx" - # dataframe column containing ky coordinates - ky_column: "ky" - # dataframe column containing energy data - energy_column: "energy" - # dataframe column containing delay data - delay_column: "delay" # time length of a base time-of-flight bin in ns tof_binwidth: 4.125e-12 # Binning factor of the tof_column-data compared to tof_binwidth (2^(tof_binning-1)) @@ -44,6 +18,22 @@ dataframe: # binning factor used for the adc coordinate (2^(adc_binning-1)) adc_binning: 3 # Default units for dataframe entries + + columns: + x: X # dataframe column containing x coordinates + y: Y # dataframe column containing y coordinates + tof: t # dataframe column containing time-of-flight data + adc: ADC # dataframe column containing analog-to-digital data + bias: sampleBias # dataframe column containing bias voltage data + corrected_x: Xm # dataframe column containing corrected x coordinates + corrected_y: Ym # dataframe column containing corrected y coordinates + corrected_tof: tm # dataframe column containing corrected time-of-flight data + kx: kx # dataframe column containing kx coordinates + ky: ky # dataframe column containing ky coordinates + energy: energy # dataframe column containing energy data + delay: delay # dataframe column containing delay data + timestamp: timeStamps # dataframe column containing timestamp data + units: X: 'step' Y: 'step' diff --git a/tests/data/loader/sxp/config.yaml b/tests/data/loader/sxp/config.yaml index 095178ff..2f88bc50 100644 --- a/tests/data/loader/sxp/config.yaml +++ b/tests/data/loader/sxp/config.yaml @@ -4,28 +4,40 @@ core: paths: raw: "tests/data/loader/sxp/" processed: "tests/data/loader/sxp/parquet" - -binning: num_cores: 10 + stream_name_prefixes: + DA03: "RAW-R" + stream_name_postfixes: + DA03: "-DA03-" + + beamtime_dir: + sxp: "/GPFS/exfel/exp/SXP/" dataframe: ubid_offset: 0 daq: DA03 forward_fill_iterations: 2 - x_column: dldPosX - corrected_x_column: "X" - kx_column: "kx" - y_column: dldPosY - corrected_y_column: "Y" - ky_column: "ky" - tof_column: dldTimeSteps - tof_ns_column: dldTime - corrected_tof_column: "tm" - bias_column: "sampleBias" tof_binwidth: 2.0576131995767355E-11 # in seconds tof_binning: 3 jitter_cols: ["dldPosX", "dldPosY", "dldTimeSteps"] + # Column settings + columns: + x: dldPosX + corrected_x: X + kx: kx + y: dldPosY + corrected_y: Y + ky: ky + tof: dldTimeSteps + tof_ns: dldTime + corrected_tof: tm + timestamp: timeStamp + auxiliary: dldAux + sector_id: dldSectorID + delay: delayStage + corrected_delay: pumpProbeTime + units: dldPosX: 'step' dldPosY: 'step' @@ -77,11 +89,3 @@ dataframe: format: per_train dataset_key: "/CONTROL/SCS_ILH_LAS/MDL/OPTICALDELAY_PP800/actualPosition/value" index_key: "/INDEX/trainId" - - stream_name_prefixes: - DA03: "RAW-R" - stream_name_postfixes: - DA03: "-DA03-" - - beamtime_dir: - sxp: "/GPFS/exfel/exp/SXP/" diff --git a/tests/loader/flash/test_dataframe_creator.py b/tests/loader/flash/test_dataframe_creator.py index 64e7712c..fe1c8f79 100644 --- a/tests/loader/flash/test_dataframe_creator.py +++ b/tests/loader/flash/test_dataframe_creator.py @@ -234,7 +234,7 @@ def test_create_dataframe_per_train(config_dataframe: dict, h5_paths: list[Path] # The subchannels are stored in the second dimension # Only index amount of values are stored in the first dimension, the rest are NaNs # hence the slicing - subchannels = config_dataframe["channels"]["dldAux"]["subChannels"] + subchannels = config_dataframe["channels"]["dldAux"]["sub_channels"] for subchannel, values in subchannels.items(): assert np.all(df.df_train[subchannel].dropna().values == data[: key.size, values["slice"]]) diff --git a/tests/loader/flash/test_flash_loader.py b/tests/loader/flash/test_flash_loader.py index a34a9977..de0bdf35 100644 --- a/tests/loader/flash/test_flash_loader.py +++ b/tests/loader/flash/test_flash_loader.py @@ -33,7 +33,7 @@ def test_initialize_dirs( config_["core"]["year"] = "2000" # Find base path of beamline from config. Here, we use pg2 - base_path = config_["dataframe"]["beamtime_dir"]["pg2"] + base_path = config_["core"]["beamtime_dir"]["pg2"] expected_path = ( Path(base_path) / config_["core"]["year"] / "data" / config_["core"]["beamtime_id"] ) diff --git a/tests/loader/sxp/test_sxp_loader.py b/tests/loader/sxp/test_sxp_loader.py index 09588152..1ee06d41 100644 --- a/tests/loader/sxp/test_sxp_loader.py +++ b/tests/loader/sxp/test_sxp_loader.py @@ -87,7 +87,7 @@ def test_initialize_dirs(config_file: dict, fs) -> None: config["core"]["year"] = "2000" # Find base path of beamline from config. - base_path = config["dataframe"]["beamtime_dir"]["sxp"] + base_path = config["core"]["beamtime_dir"]["sxp"] expected_path = Path(base_path) / config["core"]["year"] / config["core"]["beamtime_id"] # Create expected paths expected_raw_path = expected_path / "raw" @@ -150,7 +150,7 @@ def test_data_keys_not_in_h5(config_file: dict, key_type: str): sl = SXPLoader(config=config) with pytest.raises(ValueError) as e: - sl.create_dataframe_per_file(config["core"]["paths"]["raw"] + H5_PATH) + sl.create_dataframe_per_file(Path(config["core"]["paths"]["raw"], H5_PATH)) assert str(e.value.args[0]) == f"The {key_type} for channel dldPosX does not exist." diff --git a/tests/loader/test_loaders.py b/tests/loader/test_loaders.py index 734e7b44..f3e0bf84 100644 --- a/tests/loader/test_loaders.py +++ b/tests/loader/test_loaders.py @@ -70,6 +70,9 @@ def get_all_loaders() -> list[ParameterSet]: loader_name, "config.yaml", ), + folder_config={}, + user_config={}, + system_config={}, ), ) for loader_name in get_names_of_all_loaders() @@ -95,8 +98,9 @@ def test_has_correct_read_dataframe_func(loader: BaseLoader, read_type: str) -> # Fix for race condition during parallel testing if loader.__name__ in {"flash", "sxp"}: config = deepcopy(loader._config) # pylint: disable=protected-access - config["core"]["paths"]["processed"] = ( - config["core"]["paths"]["processed"] + f"_{read_type}" + config["core"]["paths"]["processed"] = Path( + config["core"]["paths"]["processed"], + f"_{read_type}", ) loader = get_loader(loader_name=loader.__name__, config=config) @@ -183,8 +187,9 @@ def test_timed_dataframe(loader: BaseLoader) -> None: # Fix for race condition during parallel testing if loader.__name__ in {"flash", "sxp"}: config = deepcopy(loader._config) # pylint: disable=protected-access - config["core"]["paths"]["processed"] = ( - config["core"]["paths"]["processed"] + "_timed_dataframe" + config["core"]["paths"]["processed"] = Path( + config["core"]["paths"]["processed"], + "_timed_dataframe", ) loader = get_loader(loader_name=loader.__name__, config=config) @@ -226,7 +231,10 @@ def test_get_count_rate(loader: BaseLoader) -> None: # Fix for race condition during parallel testing if loader.__name__ in {"flash", "sxp"}: config = deepcopy(loader._config) # pylint: disable=protected-access - config["core"]["paths"]["processed"] = config["core"]["paths"]["processed"] + "_count_rate" + config["core"]["paths"]["processed"] = Path( + config["core"]["paths"]["processed"], + "_count_rate", + ) loader = get_loader(loader_name=loader.__name__, config=config) if loader.__name__ != "BaseLoader": @@ -273,8 +281,9 @@ def test_get_elapsed_time(loader: BaseLoader) -> None: # Fix for race condition during parallel testing if loader.__name__ in {"flash", "sxp"}: config = deepcopy(loader._config) # pylint: disable=protected-access - config["core"]["paths"]["processed"] = ( - config["core"]["paths"]["processed"] + "_elapsed_time" + config["core"]["paths"]["processed"] = Path( + config["core"]["paths"]["processed"], + "_elapsed_time", ) loader = get_loader(loader_name=loader.__name__, config=config) diff --git a/tests/test_config.py b/tests/test_config.py index df875b45..bdbdd9c8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -9,6 +9,7 @@ from pathlib import Path import pytest +from pydantic import ValidationError from sed.core.config import complete_dictionary from sed.core.config import load_config @@ -16,6 +17,10 @@ from sed.core.config import save_config package_dir = os.path.dirname(find_spec("sed").origin) + +test_config_dir = Path(package_dir).joinpath("../tests/data/loader/") +config_paths = test_config_dir.glob("*/*.yaml") + default_config_keys = [ "binning", "histogram", @@ -36,7 +41,7 @@ def test_default_config() -> None: """Test the config loader for the default config.""" - config = parse_config() + config = parse_config(config={}, folder_config={}, user_config={}, system_config={}) assert isinstance(config, dict) for key in default_config_keys: assert key in config.keys() @@ -49,7 +54,7 @@ def test_default_config() -> None: def test_load_dict() -> None: """Test the config loader for a dict.""" config_dict = {"test_entry": True} - config = parse_config(config_dict) + config = parse_config(config_dict, verify_config=False) assert isinstance(config, dict) for key in default_config_keys: assert key in config.keys() @@ -69,7 +74,14 @@ def test_load_does_not_modify() -> None: default_dict = {"a": 1, "b": {"c": 13}, "c": {"e": 11}} default_copy = copy.deepcopy(default_dict) - parse_config(config_dict, folder_dict, user_dict, system_dict, default_dict) + parse_config( + config_dict, + folder_dict, + user_dict, + system_dict, + default_dict, + verify_config=False, + ) assert config_dict == config_copy assert folder_dict == folder_copy assert user_dict == user_copy @@ -133,3 +145,91 @@ def test_save_dict() -> None: save_config(config_dict, filename, overwrite=True) config = load_config(filename) assert "test_entry" not in config.keys() + + +@pytest.mark.parametrize("config_path", config_paths) +def test_config_model_valid(config_path) -> None: + """Test the config model for a valid config.""" + config = parse_config( + config_path, + folder_config={}, + user_config={}, + system_config={}, + verify_config=True, + ) + assert config is not None + + +def test_invalid_config_extra_field(): + """Test that an invalid config with an extra field fails validation.""" + default_config = parse_config( + folder_config={}, + user_config={}, + system_config={}, + verify_config=True, + ) + invalid_config = default_config.copy() + invalid_config["extra_field"] = "extra_value" + with pytest.raises(ValidationError): + parse_config( + invalid_config, + folder_config={}, + user_config={}, + system_config={}, + verify_config=True, + ) + + +def test_invalid_config_missing_field(): + """Test that an invalid config with a missing required field fails validation.""" + default_config = parse_config( + folder_config={}, + user_config={}, + system_config={}, + verify_config=True, + ) + invalid_config = default_config.copy() + del invalid_config["core"]["loader"] + with pytest.raises(ValidationError): + parse_config( + folder_config={}, + user_config={}, + system_config={}, + default_config=invalid_config, + verify_config=True, + ) + + +def test_invalid_config_wrong_values(): + """Test that the validators for certain fields fails validation if not fulfilled.""" + default_config = parse_config( + folder_config={}, + user_config={}, + system_config={}, + verify_config=True, + ) + invalid_config = default_config.copy() + invalid_config["core"]["loader"] = "nonexistent" + with pytest.raises(ValidationError) as e: + parse_config( + folder_config={}, + user_config={}, + system_config={}, + default_config=invalid_config, + verify_config=True, + ) + assert "Invalid loader nonexistent. Available loaders are:" in str(e.value) + invalid_config = default_config.copy() + invalid_config["core"]["copy_tool"] = {} + invalid_config["core"]["copy_tool"]["source"] = "./" + invalid_config["core"]["copy_tool"]["dest"] = "./" + invalid_config["core"]["copy_tool"]["gid"] = 9999 + with pytest.raises(ValidationError) as e: + parse_config( + folder_config={}, + user_config={}, + system_config={}, + default_config=invalid_config, + verify_config=True, + ) + assert "Invalid value 9999 for gid. Group not found." in str(e.value) diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index 2614e69d..cb5bcce6 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -18,7 +18,12 @@ df_folder = package_dir + "/../tests/data/loader/mpes/" folder = package_dir + "/../tests/data/calibrator/" files = glob.glob(df_folder + "*.h5") -config = parse_config(package_dir + "/../tests/data/config/config.yaml") +config = parse_config( + package_dir + "/../tests/data/loader/mpes/config.yaml", + folder_config={}, + user_config={}, + system_config={}, +) loader = get_loader("mpes", config=config) @@ -39,7 +44,7 @@ def test_plot_histogram(ncols: int, backend: str) -> None: bins = config["histogram"]["bins"] for loc, axis in enumerate(axes): if axis.startswith("@"): - axes[loc] = config["dataframe"].get(axis.strip("@")) + axes[loc] = config["dataframe"]["columns"].get(axis.strip("@")) values = {axis: dataframe[axis].compute() for axis in axes} grid_histogram(values, ncols, axes, bins, ranges, backend) diff --git a/tests/test_processor.py b/tests/test_processor.py index 21e4df86..709fc048 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -219,7 +219,7 @@ def test_attributes_setters() -> None: def test_copy_tool() -> None: """Test the copy tool functionality in the processor""" - config = {"core": {"loader": "mpes", "use_copy_tool": True}} + config: dict[str, dict[str, Any]] = {"core": {"loader": "mpes"}} processor = SedProcessor( config=config, folder_config={}, @@ -231,10 +231,7 @@ def test_copy_tool() -> None: config = { "core": { "loader": "mpes", - "use_copy_tool": True, - "copy_tool_source": source_folder, - "copy_tool_dest": dest_folder, - "copy_tool_kwds": {"gid": os.getgid()}, + "copy_tool": {"source": source_folder, "dest": dest_folder, "gid": os.getgid()}, }, } processor = SedProcessor( @@ -248,17 +245,6 @@ def test_copy_tool() -> None: processor.load(files=files) assert processor.files[0].find(dest_folder) > -1 - # test illegal keywords: - config["core"]["copy_tool_kwds"] = {"gid": os.getgid(), "illegal_keyword": True} - with pytest.raises(TypeError): - processor = SedProcessor( - config=config, - folder_config={}, - user_config={}, - system_config={}, - verbose=True, - ) - feature4 = np.array([[203.2, 341.96], [299.16, 345.32], [304.38, 149.88], [199.52, 152.48]]) feature5 = np.array( @@ -660,8 +646,9 @@ def test_align_dld_sectors() -> None: user_config={}, system_config={}, ) - config["core"]["paths"]["processed"] = ( - config["core"]["paths"]["processed"] + "_align_dld_sectors" + config["core"]["paths"]["processed"] = Path( + config["core"]["paths"]["processed"], + "_align_dld_sectors", ) processor = SedProcessor( folder=df_folder + "../flash/", @@ -719,7 +706,7 @@ def test_append_tof_ns_axis() -> None: verbose=True, ) processor.append_tof_ns_axis() - assert processor.config["dataframe"]["tof_ns_column"] in processor.dataframe + assert processor.config["dataframe"]["columns"]["tof_ns"] in processor.dataframe def test_delay_calibration_workflow() -> None: @@ -842,6 +829,7 @@ def test_add_time_stamped_data() -> None: system_config={}, time_stamps=True, verbose=True, + verify_config=False, ) df_ts = processor.dataframe.timeStamps.compute().values data = np.linspace(0, 1, 20) @@ -998,7 +986,7 @@ def test_compute_with_normalization() -> None: def test_get_normalization_histogram() -> None: """Test the generation function for the normalization histogram""" - config = {"core": {"loader": "mpes"}, "dataframe": {"time_stamp_alias": "timeStamps"}} + config = {"core": {"loader": "mpes"}, "dataframe": {"columns": {"timestamp": "timeStamps"}}} processor = SedProcessor( folder=df_folder, config=config, @@ -1068,12 +1056,14 @@ def test_save(caplog) -> None: folder_config={}, user_config=package_dir + "/../sed/config/mpes_example_config.yaml", system_config={}, + verify_config=False, ) config["metadata"]["lens_mode_config"]["6kV_kmodem4.0_30VTOF_453ns_focus.sav"][ "MCPfront" ] = 21.0 config["metadata"]["lens_mode_config"]["6kV_kmodem4.0_30VTOF_453ns_focus.sav"]["Z1"] = 2450 config["metadata"]["lens_mode_config"]["6kV_kmodem4.0_30VTOF_453ns_focus.sav"]["F"] = 69.23 + config["nexus"]["input_files"] = [package_dir + "/../sed/config/NXmpes_config.json"] processor = SedProcessor( folder=df_folder, config=config, @@ -1105,7 +1095,6 @@ def test_save(caplog) -> None: # and error if any validation problems occur. processor.save( "output.nxs", - input_files=df_folder + "../../../../sed/config/NXmpes_config.json", fail=True, ) assert os.path.isfile("output.nxs") @@ -1114,7 +1103,6 @@ def test_save(caplog) -> None: with pytest.raises(ValidationFailed): processor.save( "result.nxs", - input_files=df_folder + "../../../../sed/config/NXmpes_config.json", fail=True, ) # Check that the issues are raised as warnings per default: @@ -1123,7 +1111,7 @@ def test_save(caplog) -> None: yaml.dump({"Instrument": {"undocumented_field": "undocumented entry"}}, f) with open("temp_config.json", "w") as f: with open( - df_folder + "../../../../sed/config/NXmpes_config.json", + package_dir + "/../sed/config/NXmpes_config.json", encoding="utf-8", ) as stream: config_dict = json.load(stream) diff --git a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb index 82798d55..28865853 100644 --- a/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb +++ b/tutorial/2_conversion_pipeline_for_example_time-resolved_ARPES_data.ipynb @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# create sed processor using the config file:\n", - "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\", verbose=True)" + "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\", system_config={}, verbose=True)" ] }, { @@ -649,9 +649,6 @@ } ], "metadata": { - "interpreter": { - "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243" - }, "kernelspec": { "display_name": "python3", "language": "python", diff --git a/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb b/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb index 98c1258c..7ac5d39f 100644 --- a/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb +++ b/tutorial/3_metadata_collection_and_export_to_NeXus.ipynb @@ -143,7 +143,7 @@ "outputs": [], "source": [ "# create sed processor using the config file, and collect the meta data from the files:\n", - "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\", metadata=metadata, collect_metadata=True)" + "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\", system_config={}, metadata=metadata, collect_metadata=True)" ] }, { @@ -290,9 +290,6 @@ } ], "metadata": { - "interpreter": { - "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243" - }, "kernelspec": { "display_name": "python3", "language": "python", @@ -308,7 +305,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/tutorial/4_hextof_workflow.ipynb b/tutorial/4_hextof_workflow.ipynb index c9b36c04..e6484602 100644 --- a/tutorial/4_hextof_workflow.ipynb +++ b/tutorial/4_hextof_workflow.ipynb @@ -31,6 +31,8 @@ }, "outputs": [], "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", "from typing import List\n", "from pathlib import Path\n", "import os\n", @@ -964,7 +966,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9", + "display_name": "python3", "language": "python", "name": "python3" }, diff --git a/tutorial/5_sxp_workflow.ipynb b/tutorial/5_sxp_workflow.ipynb index bb7c6a7b..7e3d2dbc 100644 --- a/tutorial/5_sxp_workflow.ipynb +++ b/tutorial/5_sxp_workflow.ipynb @@ -53,8 +53,8 @@ "config = {\n", " \"core\": {\n", " \"paths\": {\n", - " \"data_raw_dir\": \"/gpfs/exfel/exp/SXP/202302/p004316/raw/\",\n", - " \"data_parquet_dir\": os.path.expanduser(\"~\") + \"/sxp_parquet/\",\n", + " \"raw\": \"/gpfs/exfel/exp/SXP/202302/p004316/raw/\",\n", + " \"processed\": os.path.expanduser(\"~\") + \"/sxp_parquet/\",\n", " }\n", " }\n", "}\n", @@ -297,7 +297,6 @@ "outputs": [], "source": [ "sp.calibrate_energy_axis(\n", - " ref_id=5,\n", " ref_energy=-33,\n", " method=\"lmfit\",\n", " energy_scale='kinetic',\n", @@ -316,15 +315,6 @@ "sp.save_energy_calibration()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sp.append_energy_axis()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -341,7 +331,7 @@ "sp.load(runs=np.arange(58, 62))\n", "sp.add_jitter()\n", "sp.filter_column(\"pulseId\", max_value=756)\n", - "sp.append_energy_axis()" + "sp.append_energy_axis(bias_voltage=957)" ] }, { @@ -394,7 +384,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/tutorial/6_binning_with_time-stamped_data.ipynb b/tutorial/6_binning_with_time-stamped_data.ipynb index 93080372..7978568f 100644 --- a/tutorial/6_binning_with_time-stamped_data.ipynb +++ b/tutorial/6_binning_with_time-stamped_data.ipynb @@ -68,7 +68,7 @@ "outputs": [], "source": [ "# create sed processor using the config file with time-stamps:\n", - "sp = sed.SedProcessor(folder=scandir, user_config=\"../sed/config/mpes_example_config.yaml\", time_stamps=True, verbose=True)" + "sp = sed.SedProcessor(folder=scandir, user_config=\"../sed/config/mpes_example_config.yaml\", system_config={}, time_stamps=True, verbose=True)" ] }, { @@ -162,7 +162,7 @@ "sp.load_bias_series(data_files=files, normalize=True, biases=voltages, ranges=[(64000, 76000)])\n", "rg = (65500, 66000)\n", "sp.find_bias_peaks(ranges=rg, ref_id=5, infer_others=True, apply=True)\n", - "sp.calibrate_energy_axis(ref_energy=-0.5, ref_id=4, energy_scale=\"kinetic\", method=\"lmfit\")" + "sp.calibrate_energy_axis(ref_energy=-0.5, energy_scale=\"kinetic\", method=\"lmfit\")" ] }, { diff --git a/tutorial/7_correcting_orthorhombic_symmetry.ipynb b/tutorial/7_correcting_orthorhombic_symmetry.ipynb index a385b3d1..faba30b6 100644 --- a/tutorial/7_correcting_orthorhombic_symmetry.ipynb +++ b/tutorial/7_correcting_orthorhombic_symmetry.ipynb @@ -60,7 +60,7 @@ "outputs": [], "source": [ "# create sed processor using the config file with time-stamps:\n", - "sp = sed.SedProcessor(folder=scandir, user_config=\"../sed/config/mpes_example_config.yaml\", time_stamps=True, verbose=True)\n", + "sp = sed.SedProcessor(folder=scandir, user_config=\"../sed/config/mpes_example_config.yaml\", system_config={}, time_stamps=True, verbose=True)\n", "sp.add_jitter()" ] }, @@ -216,11 +216,8 @@ } ], "metadata": { - "interpreter": { - "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243" - }, "kernelspec": { - "display_name": "python3", + "display_name": ".pyenv", "language": "python", "name": "python3" }, diff --git a/tutorial/8_jittering_tutorial.ipynb b/tutorial/8_jittering_tutorial.ipynb index ef11af7a..d98a19b2 100644 --- a/tutorial/8_jittering_tutorial.ipynb +++ b/tutorial/8_jittering_tutorial.ipynb @@ -58,7 +58,7 @@ "outputs": [], "source": [ "# create sed processor using the config file:\n", - "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\")" + "sp = sed.SedProcessor(folder=scandir, config=\"../sed/config/mpes_example_config.yaml\", system_config={})" ] }, { @@ -358,11 +358,8 @@ } ], "metadata": { - "interpreter": { - "hash": "728003ee06929e5fa5ff815d1b96bf487266025e4b7440930c6bf4536d02d243" - }, "kernelspec": { - "display_name": "python3", + "display_name": ".pyenv", "language": "python", "name": "python3" },