From 35dfa4020653c468636b70664b3f0051c8dd0547 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 May 2020 12:57:06 -0400 Subject: [PATCH 01/28] Add tests and implementation for ZoneInfo --- Lib/test/test_zoneinfo/__init__.py | 1 + Lib/test/test_zoneinfo/__main__.py | 3 + Lib/test/test_zoneinfo/_support.py | 54 + .../test_zoneinfo/data/update_test_data.py | 122 + .../test_zoneinfo/data/zoneinfo_data.json | 190 ++ Lib/test/test_zoneinfo/test_zoneinfo.py | 1970 ++++++++++++ Lib/zoneinfo/__init__.py | 29 + Lib/zoneinfo/_common.py | 160 + Lib/zoneinfo/_tzpath.py | 116 + Lib/zoneinfo/_zoneinfo.py | 756 +++++ Modules/Setup | 1 + Modules/zoneinfomodule.c | 2686 +++++++++++++++++ setup.py | 2 + 13 files changed, 6090 insertions(+) create mode 100644 Lib/test/test_zoneinfo/__init__.py create mode 100644 Lib/test/test_zoneinfo/__main__.py create mode 100644 Lib/test/test_zoneinfo/_support.py create mode 100644 Lib/test/test_zoneinfo/data/update_test_data.py create mode 100644 Lib/test/test_zoneinfo/data/zoneinfo_data.json create mode 100644 Lib/test/test_zoneinfo/test_zoneinfo.py create mode 100644 Lib/zoneinfo/__init__.py create mode 100644 Lib/zoneinfo/_common.py create mode 100644 Lib/zoneinfo/_tzpath.py create mode 100644 Lib/zoneinfo/_zoneinfo.py create mode 100644 Modules/zoneinfomodule.c diff --git a/Lib/test/test_zoneinfo/__init__.py b/Lib/test/test_zoneinfo/__init__.py new file mode 100644 index 00000000000000..98cc4412ae16c2 --- /dev/null +++ b/Lib/test/test_zoneinfo/__init__.py @@ -0,0 +1 @@ +from .test_zoneinfo import * diff --git a/Lib/test/test_zoneinfo/__main__.py b/Lib/test/test_zoneinfo/__main__.py new file mode 100644 index 00000000000000..5cc4e055d5e660 --- /dev/null +++ b/Lib/test/test_zoneinfo/__main__.py @@ -0,0 +1,3 @@ +import unittest + +unittest.main('test.test_zoneinfo') diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py new file mode 100644 index 00000000000000..b41ae25bd8a272 --- /dev/null +++ b/Lib/test/test_zoneinfo/_support.py @@ -0,0 +1,54 @@ +import contextlib +import functools +import sys +import threading +import unittest +from test.support import import_fresh_module + +OS_ENV_LOCK = threading.Lock() +TZPATH_LOCK = threading.Lock() +TZPATH_TEST_LOCK = threading.Lock() + + +@functools.lru_cache(1) +def get_modules(): + import zoneinfo as c_module + py_module = import_fresh_module("zoneinfo", blocked=["_czoneinfo"]) + + return py_module, c_module + + +@contextlib.contextmanager +def set_zoneinfo_module(module): + """Make sure sys.modules["zoneinfo"] refers to `module`. + + This is necessary because `pickle` will refuse to serialize + an type calling itself `zoneinfo.ZoneInfo` unless `zoneinfo.ZoneInfo` + refers to the same object. + """ + + NOT_PRESENT = object() + old_zoneinfo = sys.modules.get("zoneinfo", NOT_PRESENT) + sys.modules["zoneinfo"] = module + yield + if old_zoneinfo is not NOT_PRESENT: + sys.modules["zoneinfo"] = old_zoneinfo + else: # pragma: nocover + sys.modules.pop("zoneinfo") + + +class ZoneInfoTestBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.klass = cls.module.ZoneInfo + super().setUpClass() + + @contextlib.contextmanager + def tzpath_context(self, tzpath, lock=TZPATH_LOCK): + with lock: + old_path = self.module.TZPATH + try: + self.module.reset_tzpath(tzpath) + yield + finally: + self.module.reset_tzpath(old_path) diff --git a/Lib/test/test_zoneinfo/data/update_test_data.py b/Lib/test/test_zoneinfo/data/update_test_data.py new file mode 100644 index 00000000000000..f531ab316a1f21 --- /dev/null +++ b/Lib/test/test_zoneinfo/data/update_test_data.py @@ -0,0 +1,122 @@ +""" +Script to automatically generate a JSON file containing time zone information. + +This is done to allow "pinning" a small subset of the tzdata in the tests, +since we are testing properties of a file that may be subject to change. For +example, the behavior in the far future of any given zone is likely to change, +but "does this give the right answer for this file in 2040" is still an +important property to test. + +This must be run from a computer with zoneinfo data installed. +""" +from __future__ import annotations + +import base64 +import functools +import json +import lzma +import pathlib +import textwrap +import typing + +import zoneinfo + +KEYS = [ + "Africa/Abidjan", + "Africa/Casablanca", + "America/Los_Angeles", + "America/Santiago", + "Asia/Tokyo", + "Australia/Sydney", + "Europe/Dublin", + "Europe/Lisbon", + "Europe/London", + "Pacific/Kiritimati", + "UTC", +] + +TEST_DATA_LOC = pathlib.Path(__file__).parent + + +@functools.lru_cache(maxsize=None) +def get_zoneinfo_path() -> pathlib.Path: + """Get the first zoneinfo directory on TZPATH containing the "UTC" zone.""" + key = "UTC" + for path in map(pathlib.Path, zoneinfo.TZPATH): + if (path / key).exists(): + return path + else: + raise OSError("Cannot find time zone data.") + + +def get_zoneinfo_metadata() -> typing.Dict[str, str]: + path = get_zoneinfo_path() + + tzdata_zi = path / "tzdata.zi" + if not tzdata_zi.exists(): + # tzdata.zi is necessary to get the version information + raise OSError("Time zone data does not include tzdata.zi.") + + with open(tzdata_zi, "r") as f: + version_line = next(f) + + _, version = version_line.strip().rsplit(" ", 1) + + if ( + not version[0:4].isdigit() + or len(version) < 5 + or not version[4:].isalpha() + ): + raise ValueError( + "Version string should be YYYYx, " + + "where YYYY is the year and x is a letter; " + + f"found: {version}" + ) + + return {"version": version} + + +def get_zoneinfo(key: str) -> bytes: + path = get_zoneinfo_path() + + with open(path / key, "rb") as f: + return f.read() + + +def encode_compressed(data: bytes) -> typing.List[str]: + compressed_zone = lzma.compress(data) + raw = base64.b85encode(compressed_zone) + + raw_data_str = raw.decode("utf-8") + + data_str = textwrap.wrap(raw_data_str, width=70) + return data_str + + +def load_compressed_keys() -> typing.Dict[str, typing.List[str]]: + output = {key: encode_compressed(get_zoneinfo(key)) for key in KEYS} + + return output + + +def update_test_data(fname: str = "zoneinfo_data.json") -> None: + TEST_DATA_LOC.mkdir(exist_ok=True, parents=True) + + # Annotation required: https://github.com/python/mypy/issues/8772 + json_kwargs: typing.Dict[str, typing.Any] = dict( + indent=2, sort_keys=True, + ) + + compressed_keys = load_compressed_keys() + metadata = get_zoneinfo_metadata() + output = { + "metadata": metadata, + "data": compressed_keys, + } + + with open(TEST_DATA_LOC / fname, "w") as f: + json.dump(output, f, **json_kwargs) + + +if __name__ == "__main__": + update_test_data() diff --git a/Lib/test/test_zoneinfo/data/zoneinfo_data.json b/Lib/test/test_zoneinfo/data/zoneinfo_data.json new file mode 100644 index 00000000000000..ec4414a0cdedbe --- /dev/null +++ b/Lib/test/test_zoneinfo/data/zoneinfo_data.json @@ -0,0 +1,190 @@ +{ + "data": { + "Africa/Abidjan": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~f{VGF<>F7KxBg5R*{Ksocg8-YYVul=v7vZzaHN", + "uC=da5UI2rH18c!OnjV{y4u(+A!!VBKmY&$ORw>7UO^(500B;v0RR91bXh%WvBYQl0ssI2", + "00dcD" + ], + "Africa/Casablanca": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0b&Kz+C_;7KxBg5R*{N&yjMUR~;C-fDaSOU;q-~", + "FqW+4{YBjbcw}`a!dW>b)R2-0a+uwf`P3{_Y@HuCz}S$J$ZJ>R_V<~|Fk>sgX4=%0vUrh-", + "lt@YP^Wrus;j?`Th#xRPzf<<~Hp4DH^gZX>d{+WOp~HNu8!{uWu}&XphAd{j1;rB4|9?R!", + "pqruAFUMt8#*WcrVS{;kLlY(cJRV$w?d2car%Rs>q9BgTU4", + "Ht-tQKZ7Z`9QqOb?R#b%z?rk>!CkH7jy3wja4NG2q)H}fNRKg8v{);Em;K3Cncf4C6&Oaj", + "V+DbX%o4+)CV3+e!Lm6dutu(0BQpH1T?W(~cQtKV*^_Pdx!LirjpTs?Bmt@vktjLq4;)O!", + "rrly=c*rwTwMJFd0I57`hgkc?=nyI4RZf9W$6DCWugmf&)wk^tWH17owj=#PGH7Xv-?9$j", + "njwDlkOE+BFNR9YXEmBpO;rqEw=e2IR-8^(W;8ma?M3JVd($2T>IW+0tk|Gm8>ftukRQ9J", + "8k3brzqMnVyjsLI-CKneFa)Lxvp_aq40f}0J3VVoWL5rox", + "`Kptivcp}o5xA^@>qNI%?zo=Yj4AMV?kbAA)j(1%)+Pp)bSn+7Yk`M{oE}L-Z!G6OMr5G+h", + "p)$3Lg{ono{4cN>Vr&>L4kXH;_VnBL5U!LgzqE%P7QQ*tue}O`3(TZ0`aKn&~8trOQ-rBXCp)f@P6RMO4l0+;b|5-pk9_ryNh}Zc*v%mvz_#", + "yd6fjB0g9{MmMnu8bG%#C~ugXK^S^k@?ab#", + "O|aE>dDTt4s4n69(~@t~!wniV%g7khFx~I*4>Y|V$4j5%KPF*-FyKIi@!Ho&", + "x8QQsksYt8)D+W)Ni!=G`ogSu^vLL-l#7A7=iIAKL2SuZk9F}NfNk86VI)9WZE?%2wC-ya", + "F~z#Qsq)LH0|_D8^5fU8X%GeQ4TB>R-dlziA&tZe&1ada208!$nk`7bOFO2S00G`Z@1A~t&lyL{p{eM{5)QGf7Mo5FW9==mlyXJt2", + "UwpntR7H0eSq!(aYq#aqUz&RM*tvuMI)AsM?K3-dV3-TT{t)!Iy#JTo=tXkzAM9~j2YbiO", + "ls3(H8Dc>Y|D1aqL51vjLbpYG;GvGTQB4bXuJ%mA;(B4eUpu$$@zv2vVcq-Y)VKbzp^tei", + "uzy}R{LuvDjpuVb`79O+CBmg{Wx!bvx$eu4zRE&", + "PehMb=&G<9$>iZ|bFE)0=4I?KLFGBC0I(0_svgw0%FiMsT%koo*!nEYc6GY@QnU}&4Isg;", + "l=|khi(!VaiSE2=Ny`&&tpi~~;{$uN}%f|7mBhAy;s3YT^sy!$eG~?`9mNJC9@4Bac_p^BZh)Yd_rWW5qh-?tKY(>5VHO", + "L*iT8P@wCavLj^yYbnDR+4ukhS+xPrpl)iqB?u)bj9a2aW==g6G3lCJd>(+Blfr)~^40F4f>cRZ^UF;RibfZ>0m73hR", + "C{$vTfC(STN`g7(B<=Z2556{}0`?p&|Akkst!4Xy4OT;A@c$XTUI3FRRjy*KA7uC56FD)z", + "^X{WV*sr(w!c$W357o!&eLO2wTDNOyw@gf(&R<t;=-Tu1TV{>%8ZVATC9tjD8|(&`$9YHvZ9bVe#>w", + "|8c;Tg|xE&)`*}LwM*E}q}q8^Qja%p`_U)*5DdLI9O@!e=3jFjOCrCq28b_bb;s>%D#iJB", + "CWJi{JH!Js;6nfayos$kq^OEX00HO-lokL0!mqm{vBYQl0ssI200dcD" + ], + "America/Santiago": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0fRZ<6QtM7KxBg84(fsEAUJ$J{f-TXlPEUec5Ee", + "n+hsD4lC(QYax=JdSpoyje8%VM`GW}{bJ8@y$A8O&*$pw{(f~Os#}2w", + "eX6^Rgi$IT%n^V^85L>$_c7{cB^#ogV=rHBJGiz-RQNFGK?gdPi|q)j`&8)}KJ{qo6dixa", + "9@yYyVg+%lo0nO+Tw0-w2hJ%mafyWL)|", + ")?W6Bi%FWuGPA1Dru$XR4SZANsAthU2EoKHF6oEtKq`rwP", + "(VNegnI_NI%;ma$)wj{k!@KFB30Yo)IOrl>)$)D|+(5h&+%2vuwGuy^@S8FT^s21V5};>VA9Iu;?8bHz#r<;JtfZDI1(FT@edh0#", + "MYW$A1qkMGIwTZqqdYNE3gl#zp&NbL9Mp=voqN|;?gqR&4$)1`znddtEyuKS*^nMMD=0^>", + "7^z6-C4P67UWOXuMBubP>j6i~03aR@jD^-Y`JSYu#Yp0P8dLLJ0QOPE8=BoiuRX59YW7xg", + "WiexjHX%&0?`ZQCdxCdL^qd1v@kOjQKaWo2Y1++~LcA%FTq?5o%}fX1-RIvlB)1#iTNomGnUL=nM!>Ix|AGtON7!F1O?53kqlC2o-`ZGw*+s", + "NM$^9znsIJMwlgscE`|O3|;BRgsQMYm~`uv+nvuv`nigRa}X=BX=A5Sw$)WEklF7&c>_~$", + "zJ(m--bqXgiN^w-U=BJH9C0Qro(x90zo@rK;&TJ$nI@&k$ORgOb2s%gWbc}ok_27)Eoku~Fq|B-Ps+4J_", + "HPJMLJ2^_)cOU$p&3kNAlrV!)%~6r$BJ>OOi~=-<6byle{?zd4J{NG}o8tw|+#ZNLcpNwk", + "TuPE~sbJB8_RZb2DopStO+Wwux~F#S59zm%00I98;S&G=b(j+6vBYQl0ssI200dcD" + ], + "Asia/Tokyo": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~luMgIxeB7KxBg5R*;y?l4Rl4neXH3cv!OtfK@h", + "KZzauI)S!FSDREPhhBS6Fb$&Vv#7%;?Te|>pF^0HBr&z_Tk<%vMW_QqjevRZOp8XVFgP<8", + "TkT#`9H&0Ua;gT1#rZLV0HqbAKK;_z@nO;6t0L}hOdk<>TdUa07R(LPI6@!GU$ty4=mwqHG-XVe*n(Yvgdlr+FqIU18!osi)48t~eWX8)&L", + "G)Ud^0zz@*AF+2r7E}Nf9Y72K~o-T%}D&z%}#7g2br?oH6ZiYH^%>J3D)TPKV(JY*bwjuw5=DsPB@~CrROZeN", + "x>A*H&CHrWt0`EP`m!F%waepl#|w#&`XgVc?~2M3uw$fGX~tf_Il!q#Aa<*8xlzQ2+7r6Z", + "^;Laa9F(WB_O&Dy2r>~@kSi16W{=6+i5GV=Uq~KX*~&HUN4oz7*O(gXIr}sDVcD`Ikgw#|", + "50ssal8s)Qy;?YGCf;*UKKKN!T4!Kqy_G;7PfQapugqvVBKy12v3TVH^L2", + "0?#5*VP~MOYfe$h`*L!7@tiW|_^X1N%<}`7YahiUYtMu5XwmOf3?dr+@zXHwW`z}ZDqZlT", + "<2Cs(<1%M!i6o&VK89BY0J7HPIo;O62s=|IbV^@y$N&#=>i^F00FcHoDl#3", + "Mdv&xvBYQl0ssI200dcD" + ], + "Europe/Dublin": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0>b$_+0=h7KxBg5R*;&J77#T_U2R5sleVWFDmK~", + "Kzj5oh@`QKHvW^6V{jU-w>qg1tSt0c^vh;?qAqA0%t?;#S~6U8Qi", + "v&f1s9IH#g$m1k1a#3+lylw4mwT4QnEUUQdwg+xnEcBlgu31bAVabn41OMZVLGz6NDwG%X", + "uQar!b>GI{qSahE`AG}$kRWbuI~JCt;38)Xwbb~Qggs55t+MAHIxgDxzTJ;2xXx99+qCy4", + "45kC#v_l8fx|G&jlVvaciR<-wwf22l%4(t@S6tnX39#_K(4S0fu$FUs$isud9IKzCXB78NkARYq@9Dc0TGkhz);NtM_SSzEffN", + "l{2^*CKGdp52h!52A)6q9fUSltXF{T*Ehc9Q7u8!W7pE(Fv$D$cKUAt6wY=DA1mGgxC*VX", + "q_If3G#FY6-Voj`fIKk`0}Cc72_SD{v>468LV{pyBI33^p0E?}RwDA6Pkq--C~0jF&Z@Pv", + "!dx_1SN_)jwz@P$(oK%P!Tk9?fRjK88yxhxlcFtTjjZ$DYssSsa#ufYrR+}}nKS+r384o~", + "!Uw$nwTbF~qgRsgr0N#d@KIinx%hQB(SJyjJtDtIy(%mDm}ZBGN}dV6K~om|=U", + "VGkbciQ=^$_14|gT21!YQ)@y*Rd0i_lS6gtPBE9+ah%WIJPwzUTjIr+J1XckkmA!6WE16%", + "CVAl{Dn&-)=G$Bjh?bh0$Xt1UDcgXJjXzzojuw0>paV~?Sa`VN3FysqFxTzfKVAu*ucq#+m=|KSSMvp_#@-lwd+q*ue", + "FQ^5<|<0R-u4qYMbRqzSn&", + "Q7jSuvc%b+EZc%>nI(+&0Tl1Y>a6v4`uNFD-7$QrhHgS7Wnv~rDgfH;rQw3+m`LJxoM4v#", + "gK@?|B{RHJ*VxZgk#!p<_&-sjxOda0YaiJ1UnG41VPv(Et%ElzKRMcO$AfgU+Xnwg5p2_+", + "NrnZ1WfEj^fmHd^sx@%JWKkh#zaK0ox%rdP)zUmGZZnqmZ_9L=%6R8ibJH0bOT$AGhDo6{", + "fJ?;_U;D|^>5by2ul@i4Zf()InfFN}00EQ=q#FPL>RM>svBYQl0ssI200dcD" + ], + "Europe/Lisbon": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j;0=rf*IfWA7KxBg5R*;*X|PN+G3LqthM?xgkNUN_", + ")gCt1Sc%YT6^TTomk4yVHXeyvQj8}l<;q&s7K}#Vnc8lII1?)AHh$*>OKUU4S;*h>v*ep0", + "xTi1cK2{aY*|2D*-~K<;-{_W+r@NvZ7-|NZv($ek_C%VfP0xjWeZP#CPXD`IKkakjh(kUd", + "&H)m;^Q(jGjIyiyrcUMtOP)u3A>sw6ux;Bmp3x$4QvQKMx5TrCx_!$srWQuXNs&`9=^IY1", + "yc&C31!sQh7P=Mk*#6x8Z@5^%ehR8UW$OWw0KMw}P1ycI^", + "4eh12oBUOV?S>n*d!+EM@>x#9PZD12iD=zaC;7`8dTfkU_6d}OZvSFSbGgXeKw}XyX@D=(", + ")D0!^DBGr8pXWBT$S-yhLP>Z3ys^VW3}RQ6{NGGVJG6vf*MH93vvNW6yLjie1;{4tVhg-KnSf|G`!", + "Z;j$7gJ1ows~RD=@n7I6aFd8rOR_7Y?E-$clI%1o5gA@O!KPa^(8^iFFeFykI-+z>E$mvp", + "E_h`vbHPjqkLs`Dn-0FV`R@z|h!S(Lb;M&|Exr!biY`%bfp$6`hK;GDhdP|^Q", + "*Ty*}1d41K>H2B{jrjE9aFK>yAQJBX9CD%-384S;0fw`PlprHGS`^b$oS-`I4VH7ji8ou-", + "g|060jfb1XcxiInT0oOoeR7#%e5Ug5#KW)nVSRvLHNe$SQHM@2)`S9L7>RL@Qx%fmm7?3u7P5TywFQ}C@S(pq}|", + "eLPT{C^{<0Q?uU&kSVd%!~8q3;Z0s3OqzF`$HRkePL5Ywgiwn{R(zi+jmOBFrVpW;)@UsU#%$8BcV#h@}m$#!Fglo&bwb78aYqOG_W7h{eb(+39&-mk4EIXq_", + "_`30=8sfA3=!3TO_TyS5X22~?6nKngZ|bq=grdq=9X)3xAkA42L!~rmS)n3w-~;lgz%Fhn", + "(?rXdp2ho~9?wmVs2JwVt~?@FVD%`tN69{(i3oQa;O0$E$lF&~Y#_H6bu6(BiwblJ>;-Fs", + "gA$Y$*?=X)n1pFkKn}F~`>=4)+LLQk?L*P!bhAm0;`N~z3QbUIyVrm%kOZ(n1JJsm0pyb8", + "!GV{d*C!9KXv;4vD4Q>-k#+x(!V5L@w5M>v2V5a`B>t(|B", + "|Fqr4^-{S*%Ep~ojUtx_CRbSQ(uFwu2=KH)Q@EBs@ZqRXn4mU;B!68;;IQs3Ub=n&UU%*m", + "k&zwD36&JSwsN(%k&x?H+tN^6)23c`I0=5^N_R0~1>tsFZ`^`3z~rXSXT&qcwa#n!%+Z#P", + "PG}(D^_CCILXnF|GKwabBh*xFS?4rwGo2vtJUwzrbv_$5PO+`?$l{H-jGB@X%S!OAhw;D4", + "XFycN3!XqQ&EorJOD3>~^U%Luw!jF<;6_q-f-S|6{cQDfZ2(4Xf1MMLr1=SA=MwVf2%Pp%VP;jn)|5Tf!-DbUGn%I-rkYaH7?$$O!t)wwClAisr3eUoeB^~T=U*_P~Y2*KdnO87>B!19sV=xZ5", + "yApq26RxgqA|*tmsvtL#OhcF(C<0EGWHP)BFl?h)_*7!{LoJiv%RsOs!q->n+DcV%9~B@RbC_1G_1g6`Yd~8|%-=2l~oGN!~TVv2Bnk>7wW8L@^?vX$f3AiT)(4nrCuTm9%(XC6Nai", + "E(;}7&=YZagjAN$O-cN;1u{dTkElmB0GT$|Wa)QMmKrx<|LCJ9qlUoFsUbD^H^6_8(w<0{", + "ftj&O1~p_%lh5z;zNV&sP+", + "NF2>iK{8KMUf+)<-)VxXbLxD(alL}N$AT-ogNbJSMMYeX+Z{jS)b8TK^PB=FxyBxzfmFto", + "eo0R`a(%NO?#aEH9|?Cv00000NIsFh6BW2800DjO0RR918Pu^`vBYQl0ssI200dcD" + ], + "UTC": [ + "{Wp48S^xk9=GL@E0stWa761SMbT8$j-~e#|9bEt_7KxBg5R*|3h1|xhHLji!C57qW6L*|H", + "pEErm00000ygu;I+>V)?00B92fhY-(AGY&-0RR9100dcD" + ] + }, + "metadata": { + "version": "2020a" + } +} \ No newline at end of file diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py new file mode 100644 index 00000000000000..578eea5cfce7b1 --- /dev/null +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -0,0 +1,1970 @@ +from __future__ import annotations + +import base64 +import contextlib +import dataclasses +import importlib.metadata +import io +import json +import lzma +import os +import pathlib +import pickle +import re +import shutil +import struct +import tempfile +import unittest +from datetime import date, datetime, time, timedelta, timezone + +from . import _support as test_support +from ._support import ( + OS_ENV_LOCK, + TZPATH_LOCK, + TZPATH_TEST_LOCK, + ZoneInfoTestBase, +) + +py_zoneinfo, c_zoneinfo = test_support.get_modules() + +try: + importlib.metadata.metadata("tzdata") + HAS_TZDATA_PKG = True +except importlib.metadata.PackageNotFoundError: + HAS_TZDATA_PKG = False + +ZONEINFO_DATA = None +ZONEINFO_DATA_V1 = None +TEMP_DIR = None +DATA_DIR = pathlib.Path(__file__).parent / "data" +ZONEINFO_JSON = DATA_DIR / "zoneinfo_data.json" + +# Useful constants +ZERO = timedelta(0) +ONE_H = timedelta(hours=1) + + +def setUpModule(): + global TEMP_DIR + global ZONEINFO_DATA + global ZONEINFO_DATA_V1 + + TEMP_DIR = pathlib.Path(tempfile.mkdtemp(prefix="zoneinfo")) + ZONEINFO_DATA = ZoneInfoData(ZONEINFO_JSON, TEMP_DIR / "v2") + ZONEINFO_DATA_V1 = ZoneInfoData(ZONEINFO_JSON, TEMP_DIR / "v1", v1=True) + + +def tearDownModule(): + shutil.rmtree(TEMP_DIR) + + +class TzPathUserMixin: + """ + Adds a setUp() and tearDown() to make TZPATH manipulations thread-safe. + + Any tests that require manipulation of the TZPATH global are necessarily + thread unsafe, so we will acquire a lock and reset the TZPATH variable + to the default state before each test and release the lock after the test + is through. + """ + + @property + def tzpath(self): # pragma: nocover + return None + + def setUp(self): + with contextlib.ExitStack() as stack: + stack.enter_context( + self.tzpath_context(self.tzpath, lock=TZPATH_TEST_LOCK) + ) + self.addCleanup(stack.pop_all().close) + + super().setUp() + + +class DatetimeSubclassMixin: + """ + Replaces all ZoneTransition transition dates with a datetime subclass. + """ + + class DatetimeSubclass(datetime): + @classmethod + def from_datetime(cls, dt): + return cls( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + tzinfo=dt.tzinfo, + fold=dt.fold, + ) + + def load_transition_examples(self, key): + transition_examples = super().load_transition_examples(key) + for zt in transition_examples: + dt = zt.transition + new_dt = self.DatetimeSubclass.from_datetime(dt) + new_zt = dataclasses.replace(zt, transition=new_dt) + yield new_zt + + +class ZoneInfoTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + class_name = "ZoneInfo" + + def setUp(self): + super().setUp() + + # This is necessary because various subclasses pull from different + # data sources (e.g. tzdata, V1 files, etc). + self.klass.clear_cache() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def zone_from_key(self, key): + return self.klass(key) + + def zones(self): + return ZoneDumpData.transition_keys() + + def fixed_offset_zones(self): + return ZoneDumpData.fixed_offset_zones() + + def load_transition_examples(self, key): + return ZoneDumpData.load_transition_examples(key) + + def test_str(self): + # Zones constructed with a key must have str(zone) == key + for key in self.zones(): + with self.subTest(key): + zi = self.zone_from_key(key) + + self.assertEqual(str(zi), key) + + # Zones with no key constructed should have str(zone) == repr(zone) + file_key = self.zoneinfo_data.keys[0] + file_path = self.zoneinfo_data.path_from_key(file_key) + + with open(file_path, "rb") as f: + with self.subTest(test_name="Repr test", path=file_path): + zi_ff = self.klass.from_file(f) + self.assertEqual(str(zi_ff), repr(zi_ff)) + + def test_repr(self): + # The repr is not guaranteed, but I think we can insist that it at + # least contain the name of the class. + key = next(iter(self.zones())) + + zi = self.klass(key) + class_name = self.class_name + with self.subTest(name="from key"): + self.assertRegex(repr(zi), class_name) + + file_key = self.zoneinfo_data.keys[0] + file_path = self.zoneinfo_data.path_from_key(file_key) + with open(file_path, "rb") as f: + zi_ff = self.klass.from_file(f, key=file_key) + + with self.subTest(name="from file with key"): + self.assertRegex(repr(zi_ff), class_name) + + with open(file_path, "rb") as f: + zi_ff_nk = self.klass.from_file(f) + + with self.subTest(name="from file without key"): + self.assertRegex(repr(zi_ff_nk), class_name) + + def test_key_attribute(self): + key = next(iter(self.zones())) + + def from_file_nokey(key): + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + return self.klass.from_file(f) + + constructors = ( + ("Primary constructor", self.klass, key), + ("no_cache", self.klass.no_cache, key), + ("from_file", from_file_nokey, None), + ) + + for msg, constructor, expected in constructors: + zi = constructor(key) + + # Ensure that the key attribute is set to the input to ``key`` + with self.subTest(msg): + self.assertEqual(zi.key, expected) + + # Ensure that the key attribute is read-only + with self.subTest(f"{msg}: readonly"): + with self.assertRaises(AttributeError): + zi.key = "Some/Value" + + def test_bad_keys(self): + bad_keys = [ + "Eurasia/Badzone", # Plausible but does not exist + "BZQ", + "America.Los_Angeles", + "🇨🇦", # Non-ascii + "America/New\ud800York", # Contains surrogate character + ] + + for bad_key in bad_keys: + with self.assertRaises(self.module.ZoneInfoNotFoundError): + self.klass(bad_key) + + def test_bad_keys_paths(self): + bad_keys = [ + "/America/Los_Angeles", # Absolute path + "America/Los_Angeles/", # Trailing slash - not normalized + "../zoneinfo/America/Los_Angeles", # Traverses above TZPATH + "America/../America/Los_Angeles", # Not normalized + "America/./Los_Angeles", + ] + + for bad_key in bad_keys: + with self.assertRaises(ValueError): + self.klass(bad_key) + + def test_bad_zones(self): + bad_zones = [ + b"", # Empty file + b"AAAA3" + b" " * 15, # Bad magic + ] + + for bad_zone in bad_zones: + fobj = io.BytesIO(bad_zone) + with self.assertRaises(ValueError): + self.klass.from_file(fobj) + + def test_fromutc_errors(self): + key = next(iter(self.zones())) + zone = self.zone_from_key(key) + + bad_values = [ + (datetime(2019, 1, 1, tzinfo=timezone.utc), ValueError), + (datetime(2019, 1, 1), ValueError), + (date(2019, 1, 1), TypeError), + (time(0), TypeError), + (0, TypeError), + ("2019-01-01", TypeError), + ] + + for val, exc_type in bad_values: + with self.subTest(val=val): + with self.assertRaises(exc_type): + zone.fromutc(val) + + def test_utc(self): + zi = self.klass("UTC") + dt = datetime(2020, 1, 1, tzinfo=zi) + + self.assertEqual(dt.utcoffset(), ZERO) + self.assertEqual(dt.dst(), ZERO) + self.assertEqual(dt.tzname(), "UTC") + + def test_unambiguous(self): + test_cases = [] + for key in self.zones(): + for zone_transition in self.load_transition_examples(key): + test_cases.append( + ( + key, + zone_transition.transition - timedelta(days=2), + zone_transition.offset_before, + ) + ) + + test_cases.append( + ( + key, + zone_transition.transition + timedelta(days=2), + zone_transition.offset_after, + ) + ) + + for key, dt, offset in test_cases: + with self.subTest(key=key, dt=dt, offset=offset): + tzi = self.zone_from_key(key) + dt = dt.replace(tzinfo=tzi) + + self.assertEqual(dt.tzname(), offset.tzname, dt) + self.assertEqual(dt.utcoffset(), offset.utcoffset, dt) + self.assertEqual(dt.dst(), offset.dst, dt) + + def test_folds_and_gaps(self): + test_cases = [] + for key in self.zones(): + tests = {"folds": [], "gaps": []} + for zt in self.load_transition_examples(key): + if zt.fold: + test_group = tests["folds"] + elif zt.gap: + test_group = tests["gaps"] + else: + # Assign a random variable here to disable the peephole + # optimizer so that coverage can see this line. + # See bpo-2506 for more information. + no_peephole_opt = None + continue + + # Cases are of the form key, dt, fold, offset + dt = zt.anomaly_start - timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_before)) + + dt = zt.anomaly_start + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_start + timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end - timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_before)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end + test_group.append((dt, 0, zt.offset_after)) + test_group.append((dt, 1, zt.offset_after)) + + dt = zt.anomaly_end + timedelta(seconds=1) + test_group.append((dt, 0, zt.offset_after)) + test_group.append((dt, 1, zt.offset_after)) + + for grp, test_group in tests.items(): + test_cases.append(((key, grp), test_group)) + + for (key, grp), tests in test_cases: + with self.subTest(key=key, grp=grp): + tzi = self.zone_from_key(key) + + for dt, fold, offset in tests: + dt = dt.replace(fold=fold, tzinfo=tzi) + + self.assertEqual(dt.tzname(), offset.tzname, dt) + self.assertEqual(dt.utcoffset(), offset.utcoffset, dt) + self.assertEqual(dt.dst(), offset.dst, dt) + + def test_folds_from_utc(self): + tests = [] + for key in self.zones(): + zi = self.zone_from_key(key) + with self.subTest(key=key): + for zt in self.load_transition_examples(key): + if not zt.fold: + continue + + dt_utc = zt.transition_utc + dt_before_utc = dt_utc - timedelta(seconds=1) + dt_after_utc = dt_utc + timedelta(seconds=1) + + dt_before = dt_before_utc.astimezone(zi) + self.assertEqual(dt_before.fold, 0, (dt_before, dt_utc)) + + dt_after = dt_after_utc.astimezone(zi) + self.assertEqual(dt_after.fold, 1, (dt_after, dt_utc)) + + def test_time_variable_offset(self): + # self.zones() only ever returns variable-offset zones + for key in self.zones(): + zi = self.zone_from_key(key) + t = time(11, 15, 1, 34471, tzinfo=zi) + + with self.subTest(key=key): + self.assertIs(t.tzname(), None) + self.assertIs(t.utcoffset(), None) + self.assertIs(t.dst(), None) + + def test_time_fixed_offset(self): + for key, offset in self.fixed_offset_zones(): + zi = self.zone_from_key(key) + + t = time(11, 15, 1, 34471, tzinfo=zi) + + with self.subTest(key=key): + self.assertEqual(t.tzname(), offset.tzname) + self.assertEqual(t.utcoffset(), offset.utcoffset) + self.assertEqual(t.dst(), offset.dst) + + +class CZoneInfoTest(ZoneInfoTest): + module = c_zoneinfo + + def test_fold_mutate(self): + """Test that fold isn't mutated when no change is necessary. + + The underlying C API is capable of mutating datetime objects, and + may rely on the fact that addition of a datetime object returns a + new datetime; this test ensures that the input datetime to fromutc + is not mutated. + """ + + def to_subclass(dt): + class SameAddSubclass(type(dt)): + def __add__(self, other): + if other == timedelta(0): + return self + + return super().__add__(other) # pragma: nocover + + return SameAddSubclass( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + fold=dt.fold, + tzinfo=dt.tzinfo, + ) + + subclass = [False, True] + + key = "Europe/London" + zi = self.zone_from_key("Europe/London") + for zt in self.load_transition_examples(key): + if zt.fold and zt.offset_after.utcoffset == ZERO: + example = zt.transition_utc.replace(tzinfo=zi) + break + + for subclass in [False, True]: + if subclass: + dt = to_subclass(example) + else: + dt = example + + with self.subTest(subclass=subclass): + dt_fromutc = zi.fromutc(dt) + + self.assertEqual(dt_fromutc.fold, 1) + self.assertEqual(dt.fold, 0) + + +class ZoneInfoDatetimeSubclassTest(DatetimeSubclassMixin, ZoneInfoTest): + pass + + +class CZoneInfoDatetimeSubclassTest(DatetimeSubclassMixin, CZoneInfoTest): + pass + + +class ZoneInfoTestSubclass(ZoneInfoTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + + class ZISubclass(cls.klass): + pass + + cls.class_name = "ZISubclass" + cls.parent_klass = cls.klass + cls.klass = ZISubclass + + def test_subclass_own_cache(self): + base_obj = self.parent_klass("Europe/London") + sub_obj = self.klass("Europe/London") + + self.assertIsNot(base_obj, sub_obj) + self.assertIsInstance(base_obj, self.parent_klass) + self.assertIsInstance(sub_obj, self.klass) + + +class CZoneInfoTestSubclass(ZoneInfoTest): + module = c_zoneinfo + + +class ZoneInfoV1Test(ZoneInfoTest): + @property + def zoneinfo_data(self): + return ZONEINFO_DATA_V1 + + def load_transition_examples(self, key): + # We will discard zdump examples outside the range epoch +/- 2**31, + # because they are not well-supported in Version 1 files. + epoch = datetime(1970, 1, 1) + max_offset_32 = timedelta(seconds=2 ** 31) + min_dt = epoch - max_offset_32 + max_dt = epoch + max_offset_32 + + for zt in ZoneDumpData.load_transition_examples(key): + if min_dt <= zt.transition <= max_dt: + yield zt + + +class CZoneInfoV1Test(ZoneInfoV1Test): + module = c_zoneinfo + + +@unittest.skipIf( + not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" +) +class TZDataTests(ZoneInfoTest): + """ + Runs all the ZoneInfoTest tests, but against the tzdata package + + NOTE: The ZoneDumpData has frozen test data, but tzdata will update, so + some of the tests (particularly those related to the far future) may break + in the event that the time zone policies in the relevant time zones change. + """ + + @property + def tzpath(self): + return [] + + def zone_from_key(self, key): + return self.klass(key=key) + + +@unittest.skipIf( + not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" +) +class CTZDataTests(TZDataTests): + module = c_zoneinfo + + +class WeirdZoneTest(ZoneInfoTestBase): + module = py_zoneinfo + + def test_one_transition(self): + LMT = ZoneOffset("LMT", -timedelta(hours=6, minutes=31, seconds=2)) + STD = ZoneOffset("STD", -timedelta(hours=6)) + + transitions = [ + ZoneTransition(datetime(1883, 6, 9, 14), LMT, STD), + ] + + after = "STD6" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dt0 = datetime(1883, 6, 9, 1, tzinfo=zi) + dt1 = datetime(1883, 6, 10, 1, tzinfo=zi) + + for dt, offset in [(dt0, LMT), (dt1, STD)]: + with self.subTest(name="local", dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + dts = [ + ( + datetime(1883, 6, 9, 1, tzinfo=zi), + datetime(1883, 6, 9, 7, 31, 2, tzinfo=timezone.utc), + ), + ( + datetime(2010, 4, 1, 12, tzinfo=zi), + datetime(2010, 4, 1, 18, tzinfo=timezone.utc), + ), + ] + + for dt_local, dt_utc in dts: + with self.subTest(name="fromutc", dt=dt_local): + dt_actual = dt_utc.astimezone(zi) + self.assertEqual(dt_actual, dt_local) + + dt_utc_actual = dt_local.astimezone(timezone.utc) + self.assertEqual(dt_utc_actual, dt_utc) + + def test_one_zone_dst(self): + DST = ZoneOffset("DST", ONE_H, ONE_H) + transitions = [ + ZoneTransition(datetime(1970, 1, 1), DST, DST), + ] + + after = "STD0DST-1,0/0,J365/25" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dts = [ + datetime(1900, 3, 1), + datetime(1965, 9, 12), + datetime(1970, 1, 1), + datetime(2010, 11, 3), + datetime(2040, 1, 1), + ] + + for dt in dts: + dt = dt.replace(tzinfo=zi) + with self.subTest(dt=dt): + self.assertEqual(dt.tzname(), DST.tzname) + self.assertEqual(dt.utcoffset(), DST.utcoffset) + self.assertEqual(dt.dst(), DST.dst) + + def test_no_tz_str(self): + STD = ZoneOffset("STD", ONE_H, ZERO) + DST = ZoneOffset("DST", 2 * ONE_H, ONE_H) + + transitions = [] + for year in range(1996, 2000): + transitions.append( + ZoneTransition(datetime(year, 3, 1, 2), STD, DST) + ) + transitions.append( + ZoneTransition(datetime(year, 11, 1, 2), DST, STD) + ) + + after = "" + + zf = self.construct_zone(transitions, after) + + # According to RFC 8536, local times after the last transition time + # with an empty TZ string are unspecified. We will go with "hold the + # last transition", but the most we should promise is "doesn't crash." + zi = self.klass.from_file(zf) + + cases = [ + (datetime(1995, 1, 1), STD), + (datetime(1996, 4, 1), DST), + (datetime(1996, 11, 2), STD), + (datetime(2001, 1, 1), STD), + ] + + for dt, offset in cases: + dt = dt.replace(tzinfo=zi) + with self.subTest(dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + # Test that offsets return None when using a datetime.time + t = time(0, tzinfo=zi) + with self.subTest("Testing datetime.time"): + self.assertIs(t.tzname(), None) + self.assertIs(t.utcoffset(), None) + self.assertIs(t.dst(), None) + + def test_tz_before_only(self): + # From RFC 8536 Section 3.2: + # + # If there are no transitions, local time for all timestamps is + # specified by the TZ string in the footer if present and nonempty; + # otherwise, it is specified by time type 0. + + offsets = [ + ZoneOffset("STD", ZERO, ZERO), + ZoneOffset("DST", ONE_H, ONE_H), + ] + + for offset in offsets: + # Phantom transition to set time type 0. + transitions = [ + ZoneTransition(None, offset, offset), + ] + + after = "" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf) + + dts = [ + datetime(1900, 1, 1), + datetime(1970, 1, 1), + datetime(2000, 1, 1), + ] + + for dt in dts: + dt = dt.replace(tzinfo=zi) + with self.subTest(offset=offset, dt=dt): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + def test_empty_zone(self): + zf = self.construct_zone([], "") + + with self.assertRaises(ValueError): + self.klass.from_file(zf) + + def test_zone_very_large_timestamp(self): + """Test when a transition is in the far past or future. + + Particularly, this is a concern if something: + + 1. Attempts to call ``datetime.timestamp`` for a datetime outside + of ``[datetime.min, datetime.max]``. + 2. Attempts to construct a timedelta outside of + ``[timedelta.min, timedelta.max]``. + + This actually occurs "in the wild", as some time zones on Ubuntu (at + least as of 2020) have an initial transition added at ``-2**58``. + """ + + LMT = ZoneOffset("LMT", timedelta(seconds=-968)) + GMT = ZoneOffset("GMT", ZERO) + + transitions = [ + (-(1 << 62), LMT, LMT), + ZoneTransition(datetime(1912, 1, 1), LMT, GMT), + ((1 << 62), GMT, GMT), + ] + + after = "GMT0" + + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf, key="Africa/Abidjan") + + offset_cases = [ + (datetime.min, LMT), + (datetime.max, GMT), + (datetime(1911, 12, 31), LMT), + (datetime(1912, 1, 2), GMT), + ] + + for dt_naive, offset in offset_cases: + dt = dt_naive.replace(tzinfo=zi) + with self.subTest(name="offset", dt=dt, offset=offset): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + utc_cases = [ + (datetime.min, datetime.min + timedelta(seconds=968)), + (datetime(1898, 12, 31, 23, 43, 52), datetime(1899, 1, 1)), + ( + datetime(1911, 12, 31, 23, 59, 59, 999999), + datetime(1912, 1, 1, 0, 16, 7, 999999), + ), + (datetime(1912, 1, 1, 0, 16, 8), datetime(1912, 1, 1, 0, 16, 8)), + (datetime(1970, 1, 1), datetime(1970, 1, 1)), + (datetime.max, datetime.max), + ] + + for naive_dt, naive_dt_utc in utc_cases: + dt = naive_dt.replace(tzinfo=zi) + dt_utc = naive_dt_utc.replace(tzinfo=timezone.utc) + + self.assertEqual(dt_utc.astimezone(zi), dt) + self.assertEqual(dt, dt_utc) + + def test_fixed_offset_phantom_transition(self): + UTC = ZoneOffset("UTC", ZERO, ZERO) + + transitions = [ZoneTransition(datetime(1970, 1, 1), UTC, UTC)] + + after = "UTC0" + zf = self.construct_zone(transitions, after) + zi = self.klass.from_file(zf, key="UTC") + + dt = datetime(2020, 1, 1, tzinfo=zi) + with self.subTest("datetime.datetime"): + self.assertEqual(dt.tzname(), UTC.tzname) + self.assertEqual(dt.utcoffset(), UTC.utcoffset) + self.assertEqual(dt.dst(), UTC.dst) + + t = time(0, tzinfo=zi) + with self.subTest("datetime.time"): + self.assertEqual(t.tzname(), UTC.tzname) + self.assertEqual(t.utcoffset(), UTC.utcoffset) + self.assertEqual(t.dst(), UTC.dst) + + def construct_zone(self, transitions, after=None, version=3): + # These are not used for anything, so we're not going to include + # them for now. + isutc = [] + isstd = [] + leap_seconds = [] + + offset_lists = [[], []] + trans_times_lists = [[], []] + trans_idx_lists = [[], []] + + v1_range = (-(2 ** 31), 2 ** 31) + v2_range = (-(2 ** 63), 2 ** 63) + ranges = [v1_range, v2_range] + + def zt_as_tuple(zt): + # zt may be a tuple (timestamp, offset_before, offset_after) or + # a ZoneTransition object — this is to allow the timestamp to be + # values that are outside the valid range for datetimes but still + # valid 64-bit timestamps. + if isinstance(zt, tuple): + return zt + + if zt.transition: + trans_time = int(zt.transition_utc.timestamp()) + else: + trans_time = None + + return (trans_time, zt.offset_before, zt.offset_after) + + transitions = sorted(map(zt_as_tuple, transitions), key=lambda x: x[0]) + + for zt in transitions: + trans_time, offset_before, offset_after = zt + + for v, (dt_min, dt_max) in enumerate(ranges): + offsets = offset_lists[v] + trans_times = trans_times_lists[v] + trans_idx = trans_idx_lists[v] + + if trans_time is not None and not ( + dt_min <= trans_time <= dt_max + ): + continue + + if offset_before not in offsets: + offsets.append(offset_before) + + if offset_after not in offsets: + offsets.append(offset_after) + + if trans_time is not None: + trans_times.append(trans_time) + trans_idx.append(offsets.index(offset_after)) + + isutcnt = len(isutc) + isstdcnt = len(isstd) + leapcnt = len(leap_seconds) + + zonefile = io.BytesIO() + + time_types = ("l", "q") + for v in range(min((version, 2))): + offsets = offset_lists[v] + trans_times = trans_times_lists[v] + trans_idx = trans_idx_lists[v] + time_type = time_types[v] + + # Translate the offsets into something closer to the C values + abbrstr = bytearray() + ttinfos = [] + + for offset in offsets: + utcoff = int(offset.utcoffset.total_seconds()) + isdst = bool(offset.dst) + abbrind = len(abbrstr) + + ttinfos.append((utcoff, isdst, abbrind)) + abbrstr += offset.tzname.encode("ascii") + b"\x00" + abbrstr = bytes(abbrstr) + + typecnt = len(offsets) + timecnt = len(trans_times) + charcnt = len(abbrstr) + + # Write the header + zonefile.write(b"TZif") + zonefile.write(b"%d" % version) + zonefile.write(b" " * 15) + zonefile.write( + struct.pack( + ">6l", isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt + ) + ) + + # Now the transition data + zonefile.write(struct.pack(f">{timecnt}{time_type}", *trans_times)) + zonefile.write(struct.pack(f">{timecnt}B", *trans_idx)) + + for ttinfo in ttinfos: + zonefile.write(struct.pack(">lbb", *ttinfo)) + + zonefile.write(bytes(abbrstr)) + + # Now the metadata and leap seconds + zonefile.write(struct.pack(f"{isutcnt}b", *isutc)) + zonefile.write(struct.pack(f"{isstdcnt}b", *isstd)) + zonefile.write(struct.pack(f">{leapcnt}l", *leap_seconds)) + + # Finally we write the TZ string if we're writing a Version 2+ file + if v > 0: + zonefile.write(b"\x0A") + zonefile.write(after.encode("ascii")) + zonefile.write(b"\x0A") + + zonefile.seek(0) + return zonefile + + +class CWeirdZoneTest(WeirdZoneTest): + module = c_zoneinfo + + +class TZStrTest(ZoneInfoTestBase): + module = py_zoneinfo + + NORMAL = 0 + FOLD = 1 + GAP = 2 + + @classmethod + def setUpClass(cls): + super().setUpClass() + + cls._populate_test_cases() + cls.populate_tzstr_header() + + @classmethod + def populate_tzstr_header(cls): + out = bytearray() + # The TZif format always starts with a Version 1 file followed by + # the Version 2+ file. In this case, we have no transitions, just + # the tzstr in the footer, so up to the footer, the files are + # identical and we can just write the same file twice in a row. + for i in range(2): + out += b"TZif" # Magic value + out += b"3" # Version + out += b" " * 15 # Reserved + + # We will not write any of the manual transition parts + out += struct.pack(">6l", 0, 0, 0, 0, 0, 0) + + cls._tzif_header = bytes(out) + + def zone_from_tzstr(self, tzstr): + """Creates a zoneinfo file following a POSIX rule.""" + zonefile = io.BytesIO(self._tzif_header) + zonefile.seek(0, 2) + + # Write the footer + zonefile.write(b"\x0A") + zonefile.write(tzstr.encode("ascii")) + zonefile.write(b"\x0A") + + zonefile.seek(0) + + return self.klass.from_file(zonefile, key=tzstr) + + def test_tzstr_localized(self): + i = 0 + for tzstr, cases in self.test_cases.items(): + with self.subTest(tzstr=tzstr): + zi = self.zone_from_tzstr(tzstr) + + for dt_naive, offset, _ in cases: + dt = dt_naive.replace(tzinfo=zi) + + with self.subTest(tzstr=tzstr, dt=dt, offset=offset): + self.assertEqual(dt.tzname(), offset.tzname) + self.assertEqual(dt.utcoffset(), offset.utcoffset) + self.assertEqual(dt.dst(), offset.dst) + + def test_tzstr_from_utc(self): + for tzstr, cases in self.test_cases.items(): + with self.subTest(tzstr=tzstr): + zi = self.zone_from_tzstr(tzstr) + + for dt_naive, offset, dt_type in cases: + if dt_type == self.GAP: + continue # Cannot create a gap from UTC + + dt_utc = (dt_naive - offset.utcoffset).replace( + tzinfo=timezone.utc + ) + + # Check that we can go UTC -> Our zone + dt_act = dt_utc.astimezone(zi) + dt_exp = dt_naive.replace(tzinfo=zi) + + self.assertEqual(dt_act, dt_exp) + + if dt_type == self.FOLD: + self.assertEqual(dt_act.fold, dt_naive.fold, dt_naive) + else: + self.assertEqual(dt_act.fold, 0) + + # Now check that we can go our zone -> UTC + dt_act = dt_exp.astimezone(timezone.utc) + + self.assertEqual(dt_act, dt_utc) + + def test_invalid_tzstr(self): + invalid_tzstrs = [ + "PST8PDT", # DST but no transition specified + "+11", # Unquoted alphanumeric + "GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST + "GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST + "PST8PDT,M3.2.0/2", # Only one transition rule + # Invalid offsets + "STD+25", + "STD-25", + "STD+374", + "STD+374DST,M3.2.0/2,M11.1.0/3", + "STD+23DST+25,M3.2.0/2,M11.1.0/3", + "STD-23DST-25,M3.2.0/2,M11.1.0/3", + # Completely invalid dates + "AAA4BBB,M1443339,M11.1.0/3", + "AAA4BBB,M3.2.0/2,0349309483959c", + # Invalid months + "AAA4BBB,M13.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M13.1.1/2", + "AAA4BBB,M0.1.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M0.1.1/2", + # Invalid weeks + "AAA4BBB,M1.6.1/2,M1.1.1/2", + "AAA4BBB,M1.1.1/2,M1.6.1/2", + # Invalid weekday + "AAA4BBB,M1.1.7/2,M2.1.1/2", + "AAA4BBB,M1.1.1/2,M2.1.7/2", + # Invalid numeric offset + "AAA4BBB,-1/2,20/2", + "AAA4BBB,1/2,-1/2", + "AAA4BBB,367,20/2", + "AAA4BBB,1/2,367/2", + # Invalid julian offset + "AAA4BBB,J0/2,J20/2", + "AAA4BBB,J20/2,J366/2", + ] + + for invalid_tzstr in invalid_tzstrs: + with self.subTest(tzstr=invalid_tzstr): + # Not necessarily a guaranteed property, but we should show + # the problematic TZ string if that's the cause of failure. + tzstr_regex = re.escape(invalid_tzstr) + with self.assertRaisesRegex(ValueError, tzstr_regex): + self.zone_from_tzstr(invalid_tzstr) + + @classmethod + def _populate_test_cases(cls): + # This method uses a somewhat unusual style in that it populates the + # test cases for each tzstr by using a decorator to automatically call + # a function that mutates the current dictionary of test cases. + # + # The population of the test cases is done in individual functions to + # give each set of test cases its own namespace in which to define + # its offsets (this way we don't have to worry about variable reuse + # causing problems if someone makes a typo). + # + # The decorator for calling is used to make it more obvious that each + # function is actually called (if it's not decorated, it's not called). + def call(f): + """Decorator to call the addition methods. + + This will call a function which adds at least one new entry into + the `cases` dictionary. The decorator will also assert that + something was added to the dictionary. + """ + prev_len = len(cases) + f() + assert len(cases) > prev_len, "Function did not add a test case!" + + NORMAL = cls.NORMAL + FOLD = cls.FOLD + GAP = cls.GAP + + cases = {} + + @call + def _add(): + # Transition to EDT on the 2nd Sunday in March at 4 AM, and + # transition back on the first Sunday in November at 3AM + tzstr = "EST5EDT,M3.2.0/4:00,M11.1.0/3:00" + + EST = ZoneOffset("EST", timedelta(hours=-5), ZERO) + EDT = ZoneOffset("EDT", timedelta(hours=-4), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 3, 9), EST, NORMAL), + (datetime(2019, 3, 10, 3, 59), EST, NORMAL), + (datetime(2019, 3, 10, 4, 0, fold=0), EST, GAP), + (datetime(2019, 3, 10, 4, 0, fold=1), EDT, GAP), + (datetime(2019, 3, 10, 4, 1, fold=0), EST, GAP), + (datetime(2019, 3, 10, 4, 1, fold=1), EDT, GAP), + (datetime(2019, 11, 2), EDT, NORMAL), + (datetime(2019, 11, 3, 1, 59, fold=1), EDT, NORMAL), + (datetime(2019, 11, 3, 2, 0, fold=0), EDT, FOLD), + (datetime(2019, 11, 3, 2, 0, fold=1), EST, FOLD), + (datetime(2020, 3, 8, 3, 59), EST, NORMAL), + (datetime(2020, 3, 8, 4, 0, fold=0), EST, GAP), + (datetime(2020, 3, 8, 4, 0, fold=1), EDT, GAP), + (datetime(2020, 11, 1, 1, 59, fold=1), EDT, NORMAL), + (datetime(2020, 11, 1, 2, 0, fold=0), EDT, FOLD), + (datetime(2020, 11, 1, 2, 0, fold=1), EST, FOLD), + ) + + @call + def _add(): + # Transition to BST happens on the last Sunday in March at 1 AM GMT + # and the transition back happens the last Sunday in October at 2AM BST + tzstr = "GMT0BST-1,M3.5.0/1:00,M10.5.0/2:00" + + GMT = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + + cases[tzstr] = ( + (datetime(2019, 3, 30), GMT, NORMAL), + (datetime(2019, 3, 31, 0, 59), GMT, NORMAL), + (datetime(2019, 3, 31, 2, 0), BST, NORMAL), + (datetime(2019, 10, 26), BST, NORMAL), + (datetime(2019, 10, 27, 0, 59, fold=1), BST, NORMAL), + (datetime(2019, 10, 27, 1, 0, fold=0), BST, GAP), + (datetime(2019, 10, 27, 2, 0, fold=1), GMT, GAP), + (datetime(2020, 3, 29, 0, 59), GMT, NORMAL), + (datetime(2020, 3, 29, 2, 0), BST, NORMAL), + (datetime(2020, 10, 25, 0, 59, fold=1), BST, NORMAL), + (datetime(2020, 10, 25, 1, 0, fold=0), BST, FOLD), + (datetime(2020, 10, 25, 2, 0, fold=1), GMT, NORMAL), + ) + + @call + def _add(): + # Austrialian time zone - DST start is chronologically first + tzstr = "AEST-10AEDT,M10.1.0/2,M4.1.0/3" + + AEST = ZoneOffset("AEST", timedelta(hours=10), ZERO) + AEDT = ZoneOffset("AEDT", timedelta(hours=11), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 4, 6), AEDT, NORMAL), + (datetime(2019, 4, 7, 1, 59), AEDT, NORMAL), + (datetime(2019, 4, 7, 1, 59, fold=1), AEDT, NORMAL), + (datetime(2019, 4, 7, 2, 0, fold=0), AEDT, FOLD), + (datetime(2019, 4, 7, 2, 1, fold=0), AEDT, FOLD), + (datetime(2019, 4, 7, 2, 0, fold=1), AEST, FOLD), + (datetime(2019, 4, 7, 2, 1, fold=1), AEST, FOLD), + (datetime(2019, 4, 7, 3, 0, fold=0), AEST, NORMAL), + (datetime(2019, 4, 7, 3, 0, fold=1), AEST, NORMAL), + (datetime(2019, 10, 5, 0), AEST, NORMAL), + (datetime(2019, 10, 6, 1, 59), AEST, NORMAL), + (datetime(2019, 10, 6, 2, 0, fold=0), AEST, GAP), + (datetime(2019, 10, 6, 2, 0, fold=1), AEDT, GAP), + (datetime(2019, 10, 6, 3, 0), AEDT, NORMAL), + ) + + @call + def _add(): + # Irish time zone - negative DST + tzstr = "IST-1GMT0,M10.5.0,M3.5.0/1" + + GMT = ZoneOffset("GMT", ZERO, -ONE_H) + IST = ZoneOffset("IST", ONE_H, ZERO) + + cases[tzstr] = ( + (datetime(2019, 3, 30), GMT, NORMAL), + (datetime(2019, 3, 31, 0, 59), GMT, NORMAL), + (datetime(2019, 3, 31, 2, 0), IST, NORMAL), + (datetime(2019, 10, 26), IST, NORMAL), + (datetime(2019, 10, 27, 0, 59, fold=1), IST, NORMAL), + (datetime(2019, 10, 27, 1, 0, fold=0), IST, FOLD), + (datetime(2019, 10, 27, 1, 0, fold=1), GMT, FOLD), + (datetime(2019, 10, 27, 2, 0, fold=1), GMT, NORMAL), + (datetime(2020, 3, 29, 0, 59), GMT, NORMAL), + (datetime(2020, 3, 29, 2, 0), IST, NORMAL), + (datetime(2020, 10, 25, 0, 59, fold=1), IST, NORMAL), + (datetime(2020, 10, 25, 1, 0, fold=0), IST, FOLD), + (datetime(2020, 10, 25, 2, 0, fold=1), GMT, NORMAL), + ) + + @call + def _add(): + # Pacific/Kosrae: Fixed offset zone with a quoted numerical tzname + tzstr = "<+11>-11" + + cases[tzstr] = ( + ( + datetime(2020, 1, 1), + ZoneOffset("+11", timedelta(hours=11)), + NORMAL, + ), + ) + + @call + def _add(): + # Quoted STD and DST, transitions at 24:00 + tzstr = "<-04>4<-03>,M9.1.6/24,M4.1.6/24" + + M04 = ZoneOffset("-04", timedelta(hours=-4)) + M03 = ZoneOffset("-03", timedelta(hours=-3), ONE_H) + + cases[tzstr] = ( + (datetime(2020, 5, 1), M04, NORMAL), + (datetime(2020, 11, 1), M03, NORMAL), + ) + + @call + def _add(): + # Permanent daylight saving time is modeled with transitions at 0/0 + # and J365/25, as mentioned in RFC 8536 Section 3.3.1 + tzstr = "EST5EDT,0/0,J365/25" + + EDT = ZoneOffset("EDT", timedelta(hours=-4), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 1, 1), EDT, NORMAL), + (datetime(2019, 6, 1), EDT, NORMAL), + (datetime(2019, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + (datetime(2020, 1, 1), EDT, NORMAL), + (datetime(2020, 3, 1), EDT, NORMAL), + (datetime(2020, 6, 1), EDT, NORMAL), + (datetime(2020, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + (datetime(2400, 1, 1), EDT, NORMAL), + (datetime(2400, 3, 1), EDT, NORMAL), + (datetime(2400, 12, 31, 23, 59, 59, 999999), EDT, NORMAL), + ) + + @call + def _add(): + # Transitions on March 1st and November 1st of each year + tzstr = "AAA3BBB,J60/12,J305/12" + + AAA = ZoneOffset("AAA", timedelta(hours=-3)) + BBB = ZoneOffset("BBB", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2019, 1, 1), AAA, NORMAL), + (datetime(2019, 2, 28), AAA, NORMAL), + (datetime(2019, 3, 1, 11, 59), AAA, NORMAL), + (datetime(2019, 3, 1, 12, fold=0), AAA, GAP), + (datetime(2019, 3, 1, 12, fold=1), BBB, GAP), + (datetime(2019, 3, 1, 13), BBB, NORMAL), + (datetime(2019, 11, 1, 10, 59), BBB, NORMAL), + (datetime(2019, 11, 1, 11, fold=0), BBB, FOLD), + (datetime(2019, 11, 1, 11, fold=1), AAA, FOLD), + (datetime(2019, 11, 1, 12), AAA, NORMAL), + (datetime(2019, 12, 31, 23, 59, 59, 999999), AAA, NORMAL), + (datetime(2020, 1, 1), AAA, NORMAL), + (datetime(2020, 2, 29), AAA, NORMAL), + (datetime(2020, 3, 1, 11, 59), AAA, NORMAL), + (datetime(2020, 3, 1, 12, fold=0), AAA, GAP), + (datetime(2020, 3, 1, 12, fold=1), BBB, GAP), + (datetime(2020, 3, 1, 13), BBB, NORMAL), + (datetime(2020, 11, 1, 10, 59), BBB, NORMAL), + (datetime(2020, 11, 1, 11, fold=0), BBB, FOLD), + (datetime(2020, 11, 1, 11, fold=1), AAA, FOLD), + (datetime(2020, 11, 1, 12), AAA, NORMAL), + (datetime(2020, 12, 31, 23, 59, 59, 999999), AAA, NORMAL), + ) + + @call + def _add(): + # Taken from America/Godthab, this rule has a transition on the + # Saturday before the last Sunday of March and October, at 22:00 + # and 23:00, respectively. This is encoded with negative start + # and end transition times. + tzstr = "<-03>3<-02>,M3.5.0/-2,M10.5.0/-1" + + N03 = ZoneOffset("-03", timedelta(hours=-3)) + N02 = ZoneOffset("-02", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2020, 3, 27), N03, NORMAL), + (datetime(2020, 3, 28, 21, 59, 59), N03, NORMAL), + (datetime(2020, 3, 28, 22, fold=0), N03, GAP), + (datetime(2020, 3, 28, 22, fold=1), N02, GAP), + (datetime(2020, 3, 28, 23), N02, NORMAL), + (datetime(2020, 10, 24, 21), N02, NORMAL), + (datetime(2020, 10, 24, 22, fold=0), N02, FOLD), + (datetime(2020, 10, 24, 22, fold=1), N03, FOLD), + (datetime(2020, 10, 24, 23), N03, NORMAL), + ) + + @call + def _add(): + # Transition times with minutes and seconds + tzstr = "AAA3BBB,M3.2.0/01:30,M11.1.0/02:15:45" + + AAA = ZoneOffset("AAA", timedelta(hours=-3)) + BBB = ZoneOffset("BBB", timedelta(hours=-2), ONE_H) + + cases[tzstr] = ( + (datetime(2012, 3, 11, 1, 0), AAA, NORMAL), + (datetime(2012, 3, 11, 1, 30, fold=0), AAA, GAP), + (datetime(2012, 3, 11, 1, 30, fold=1), BBB, GAP), + (datetime(2012, 3, 11, 2, 30), BBB, NORMAL), + (datetime(2012, 11, 4, 1, 15, 44, 999999), BBB, NORMAL), + (datetime(2012, 11, 4, 1, 15, 45, fold=0), BBB, FOLD), + (datetime(2012, 11, 4, 1, 15, 45, fold=1), AAA, FOLD), + (datetime(2012, 11, 4, 2, 15, 45), AAA, NORMAL), + ) + + cls.test_cases = cases + + +class CTZStrTest(TZStrTest): + module = c_zoneinfo + + +class ZoneInfoCacheTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + def setUp(self): + self.klass.clear_cache() + super().setUp() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def test_ephemeral_zones(self): + self.assertIs( + self.klass("America/Los_Angeles"), self.klass("America/Los_Angeles") + ) + + def test_strong_refs(self): + tz0 = self.klass("Australia/Sydney") + tz1 = self.klass("Australia/Sydney") + + self.assertIs(tz0, tz1) + + def test_no_cache(self): + + tz0 = self.klass("Europe/Lisbon") + tz1 = self.klass.no_cache("Europe/Lisbon") + + self.assertIsNot(tz0, tz1) + + def test_cache_reset_tzpath(self): + """Test that the cache persists when tzpath has been changed. + + The PEP specifies that as long as a reference exists to one zone + with a given key, the primary constructor must continue to return + the same object. + """ + zi0 = self.klass("America/Los_Angeles") + with self.tzpath_context([]): + zi1 = self.klass("America/Los_Angeles") + + self.assertIs(zi0, zi1) + + def test_clear_cache_explicit_none(self): + la0 = self.klass("America/Los_Angeles") + self.klass.clear_cache(only_keys=None) + la1 = self.klass("America/Los_Angeles") + + self.assertIsNot(la0, la1) + + def test_clear_cache_one_key(self): + """Tests that you can clear a single key from the cache.""" + la0 = self.klass("America/Los_Angeles") + dub0 = self.klass("Europe/Dublin") + + self.klass.clear_cache(only_keys=["America/Los_Angeles"]) + + la1 = self.klass("America/Los_Angeles") + dub1 = self.klass("Europe/Dublin") + + self.assertIsNot(la0, la1) + self.assertIs(dub0, dub1) + + def test_clear_cache_two_keys(self): + la0 = self.klass("America/Los_Angeles") + dub0 = self.klass("Europe/Dublin") + tok0 = self.klass("Asia/Tokyo") + + self.klass.clear_cache( + only_keys=["America/Los_Angeles", "Europe/Dublin"] + ) + + la1 = self.klass("America/Los_Angeles") + dub1 = self.klass("Europe/Dublin") + tok1 = self.klass("Asia/Tokyo") + + self.assertIsNot(la0, la1) + self.assertIsNot(dub0, dub1) + self.assertIs(tok0, tok1) + + +class CZoneInfoCacheTest(ZoneInfoCacheTest): + module = c_zoneinfo + + +class ZoneInfoPickleTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + def setUp(self): + self.klass.clear_cache() + + with contextlib.ExitStack() as stack: + stack.enter_context(test_support.set_zoneinfo_module(self.module)) + self.addCleanup(stack.pop_all().close) + + super().setUp() + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + @property + def tzpath(self): + return [self.zoneinfo_data.tzpath] + + def test_cache_hit(self): + zi_in = self.klass("Europe/Dublin") + pkl = pickle.dumps(zi_in) + zi_rt = pickle.loads(pkl) + + with self.subTest(test="Is non-pickled ZoneInfo"): + self.assertIs(zi_in, zi_rt) + + zi_rt2 = pickle.loads(pkl) + with self.subTest(test="Is unpickled ZoneInfo"): + self.assertIs(zi_rt, zi_rt2) + + def test_cache_miss(self): + zi_in = self.klass("Europe/Dublin") + pkl = pickle.dumps(zi_in) + + del zi_in + self.klass.clear_cache() # Induce a cache miss + zi_rt = pickle.loads(pkl) + zi_rt2 = pickle.loads(pkl) + + self.assertIs(zi_rt, zi_rt2) + + def test_no_cache(self): + zi_no_cache = self.klass.no_cache("Europe/Dublin") + + pkl = pickle.dumps(zi_no_cache) + zi_rt = pickle.loads(pkl) + + with self.subTest(test="Not the pickled object"): + self.assertIsNot(zi_rt, zi_no_cache) + + zi_rt2 = pickle.loads(pkl) + with self.subTest(test="Not a second unpickled object"): + self.assertIsNot(zi_rt, zi_rt2) + + zi_cache = self.klass("Europe/Dublin") + with self.subTest(test="Not a cached object"): + self.assertIsNot(zi_rt, zi_cache) + + def test_from_file(self): + key = "Europe/Dublin" + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + zi_nokey = self.klass.from_file(f) + + f.seek(0) + zi_key = self.klass.from_file(f, key=key) + + test_cases = [ + (zi_key, "ZoneInfo with key"), + (zi_nokey, "ZoneInfo without key"), + ] + + for zi, test_name in test_cases: + with self.subTest(test_name=test_name): + with self.assertRaises(pickle.PicklingError): + pickle.dumps(zi) + + def test_pickle_after_from_file(self): + # This may be a bit of paranoia, but this test is to ensure that no + # global state is maintained in order to handle the pickle cache and + # from_file behavior, and that it is possible to interweave the + # constructors of each of these and pickling/unpickling without issues. + key = "Europe/Dublin" + zi = self.klass(key) + + pkl_0 = pickle.dumps(zi) + zi_rt_0 = pickle.loads(pkl_0) + self.assertIs(zi, zi_rt_0) + + with open(self.zoneinfo_data.path_from_key(key), "rb") as f: + zi_ff = self.klass.from_file(f, key=key) + + pkl_1 = pickle.dumps(zi) + zi_rt_1 = pickle.loads(pkl_1) + self.assertIs(zi, zi_rt_1) + + with self.assertRaises(pickle.PicklingError): + pickle.dumps(zi_ff) + + pkl_2 = pickle.dumps(zi) + zi_rt_2 = pickle.loads(pkl_2) + self.assertIs(zi, zi_rt_2) + + +class CZoneInfoPickleTest(ZoneInfoPickleTest): + module = c_zoneinfo + + +class CallingConventionTest(ZoneInfoTestBase): + """Tests for functions with restricted calling conventions.""" + + module = py_zoneinfo + + @property + def zoneinfo_data(self): + return ZONEINFO_DATA + + def test_from_file(self): + with open(self.zoneinfo_data.path_from_key("UTC"), "rb") as f: + with self.assertRaises(TypeError): + self.klass.from_file(fobj=f) + + def test_clear_cache(self): + with self.assertRaises(TypeError): + self.klass.clear_cache(["UTC"]) + + +class CCallingConventionTest(CallingConventionTest): + module = c_zoneinfo + + +class TzPathTest(TzPathUserMixin, ZoneInfoTestBase): + module = py_zoneinfo + + @staticmethod + @contextlib.contextmanager + def python_tzpath_context(value): + path_var = "PYTHONTZPATH" + try: + with OS_ENV_LOCK: + old_env = os.environ.get(path_var, None) + os.environ[path_var] = value + yield + finally: + if old_env is None: + del os.environ[path_var] + else: + os.environ[path_var] = old_env # pragma: nocover + + def test_env_variable(self): + """Tests that the environment variable works with reset_tzpath.""" + new_paths = [ + ("", []), + ("/etc/zoneinfo", ["/etc/zoneinfo"]), + (f"/a/b/c{os.pathsep}/d/e/f", ["/a/b/c", "/d/e/f"]), + ] + + for new_path_var, expected_result in new_paths: + with self.python_tzpath_context(new_path_var): + with self.subTest(tzpath=new_path_var): + self.module.reset_tzpath() + tzpath = self.module.TZPATH + self.assertSequenceEqual(tzpath, expected_result) + + def test_env_variable_relative_paths(self): + test_cases = [ + [("path/to/somewhere",), ()], + [ + ("/usr/share/zoneinfo", "path/to/somewhere",), + ("/usr/share/zoneinfo",), + ], + [("../relative/path",), ()], + [ + ("/usr/share/zoneinfo", "../relative/path",), + ("/usr/share/zoneinfo",), + ], + [("path/to/somewhere", "../relative/path",), ()], + [ + ( + "/usr/share/zoneinfo", + "path/to/somewhere", + "../relative/path", + ), + ("/usr/share/zoneinfo",), + ], + ] + + for input_paths, expected_paths in test_cases: + path_var = os.pathsep.join(input_paths) + with self.python_tzpath_context(path_var): + with self.subTest("warning", path_var=path_var): + # Note: Per PEP 615 the warning is implementation-defined + # behavior, other implementations need not warn. + with self.assertWarns(self.module.InvalidTZPathWarning): + self.module.reset_tzpath() + + tzpath = self.module.TZPATH + with self.subTest("filtered", path_var=path_var): + self.assertSequenceEqual(tzpath, expected_paths) + + def test_reset_tzpath_kwarg(self): + self.module.reset_tzpath(to=["/a/b/c"]) + + self.assertSequenceEqual(self.module.TZPATH, ("/a/b/c",)) + + def test_reset_tzpath_relative_paths(self): + bad_values = [ + ("path/to/somewhere",), + ("/usr/share/zoneinfo", "path/to/somewhere",), + ("../relative/path",), + ("/usr/share/zoneinfo", "../relative/path",), + ("path/to/somewhere", "../relative/path",), + ("/usr/share/zoneinfo", "path/to/somewhere", "../relative/path",), + ] + for input_paths in bad_values: + with self.subTest(input_paths=input_paths): + with self.assertRaises(ValueError): + self.module.reset_tzpath(to=input_paths) + + def test_tzpath_type_error(self): + bad_values = [ + "/etc/zoneinfo:/usr/share/zoneinfo", + b"/etc/zoneinfo:/usr/share/zoneinfo", + 0, + ] + + for bad_value in bad_values: + with self.subTest(value=bad_value): + with self.assertRaises(TypeError): + self.module.reset_tzpath(bad_value) + + def test_tzpath_attribute(self): + tzpath_0 = ["/one", "/two"] + tzpath_1 = ["/three"] + + with self.tzpath_context(tzpath_0): + query_0 = self.module.TZPATH + + with self.tzpath_context(tzpath_1): + query_1 = self.module.TZPATH + + self.assertSequenceEqual(tzpath_0, query_0) + self.assertSequenceEqual(tzpath_1, query_1) + + +class CTzPathTest(TzPathTest): + module = c_zoneinfo + + +class TestModule(ZoneInfoTestBase): + module = py_zoneinfo + + def test_getattr_error(self): + with self.assertRaises(AttributeError): + self.module.NOATTRIBUTE + + def test_dir_contains_all(self): + """dir(self.module) should at least contain everything in __all__.""" + module_all_set = set(self.module.__all__) + module_dir_set = set(dir(self.module)) + + difference = module_all_set - module_dir_set + + self.assertFalse(difference) + + def test_dir_unique(self): + """Test that there are no duplicates in dir(self.module)""" + module_dir = dir(self.module) + module_unique = set(module_dir) + + self.assertCountEqual(module_dir, module_unique) + + +class CTestModule(TestModule): + module = c_zoneinfo + + +@dataclasses.dataclass(frozen=True) +class ZoneOffset: + tzname: str + utcoffset: timedelta + dst: timedelta = ZERO + + +@dataclasses.dataclass(frozen=True) +class ZoneTransition: + transition: datetime + offset_before: ZoneOffset + offset_after: ZoneOffset + + @property + def transition_utc(self): + return (self.transition - self.offset_before.utcoffset).replace( + tzinfo=timezone.utc + ) + + @property + def fold(self): + """Whether this introduces a fold""" + return self.offset_before.utcoffset > self.offset_after.utcoffset + + @property + def gap(self): + """Whether this introduces a gap""" + return self.offset_before.utcoffset < self.offset_after.utcoffset + + @property + def delta(self): + return self.offset_after.utcoffset - self.offset_before.utcoffset + + @property + def anomaly_start(self): + if self.fold: + return self.transition + self.delta + else: + return self.transition + + @property + def anomaly_end(self): + if not self.fold: + return self.transition + self.delta + else: + return self.transition + + +class ZoneInfoData: + def __init__(self, source_json, tzpath, v1=False): + self.tzpath = pathlib.Path(tzpath) + self.keys = [] + self.v1 = v1 + self._populate_tzpath(source_json) + + def path_from_key(self, key): + return self.tzpath / key + + def _populate_tzpath(self, source_json): + with open(source_json, "rb") as f: + zoneinfo_dict = json.load(f) + + zoneinfo_data = zoneinfo_dict["data"] + + for key, value in zoneinfo_data.items(): + self.keys.append(key) + raw_data = self._decode_text(value) + + if self.v1: + data = self._convert_to_v1(raw_data) + else: + data = raw_data + + destination = self.path_from_key(key) + destination.parent.mkdir(exist_ok=True, parents=True) + with open(destination, "wb") as f: + f.write(data) + + def _decode_text(self, contents): + raw_data = b"".join(map(str.encode, contents)) + decoded = base64.b85decode(raw_data) + + return lzma.decompress(decoded) + + def _convert_to_v1(self, contents): + assert contents[0:4] == b"TZif", "Invalid TZif data found!" + version = int(contents[4:5]) + + header_start = 4 + 16 + header_end = header_start + 24 # 6l == 24 bytes + assert version >= 2, "Version 1 file found: no conversion necessary" + isutcnt, isstdcnt, leapcnt, timecnt, typecnt, charcnt = struct.unpack( + ">6l", contents[header_start:header_end] + ) + + file_size = ( + timecnt * 5 + + typecnt * 6 + + charcnt + + leapcnt * 8 + + isstdcnt + + isutcnt + ) + file_size += header_end + out = b"TZif" + b"\x00" + contents[5:file_size] + + assert ( + contents[file_size : (file_size + 4)] == b"TZif" + ), "Version 2 file not truncated at Version 2 header" + + return out + + +class ZoneDumpData: + @classmethod + def transition_keys(cls): + return cls._get_zonedump().keys() + + @classmethod + def load_transition_examples(cls, key): + return cls._get_zonedump()[key] + + @classmethod + def fixed_offset_zones(cls): + if not cls._FIXED_OFFSET_ZONES: + cls._populate_fixed_offsets() + + return cls._FIXED_OFFSET_ZONES.items() + + @classmethod + def _get_zonedump(cls): + if not cls._ZONEDUMP_DATA: + cls._populate_zonedump_data() + return cls._ZONEDUMP_DATA + + @classmethod + def _populate_fixed_offsets(cls): + cls._FIXED_OFFSET_ZONES = { + "UTC": ZoneOffset("UTC", ZERO, ZERO), + } + + @classmethod + def _populate_zonedump_data(cls): + def _Africa_Abidjan(): + LMT = ZoneOffset("LMT", timedelta(seconds=-968)) + GMT = ZoneOffset("GMT", ZERO) + + return [ + ZoneTransition(datetime(1912, 1, 1), LMT, GMT), + ] + + def _Africa_Casablanca(): + P00_s = ZoneOffset("+00", ZERO, ZERO) + P01_d = ZoneOffset("+01", ONE_H, ONE_H) + P00_d = ZoneOffset("+00", ZERO, -ONE_H) + P01_s = ZoneOffset("+01", ONE_H, ZERO) + + return [ + # Morocco sometimes pauses DST during Ramadan + ZoneTransition(datetime(2018, 3, 25, 2), P00_s, P01_d), + ZoneTransition(datetime(2018, 5, 13, 3), P01_d, P00_s), + ZoneTransition(datetime(2018, 6, 17, 2), P00_s, P01_d), + # On October 28th Morocco set standard time to +01, + # with negative DST only during Ramadan + ZoneTransition(datetime(2018, 10, 28, 3), P01_d, P01_s), + ZoneTransition(datetime(2019, 5, 5, 3), P01_s, P00_d), + ZoneTransition(datetime(2019, 6, 9, 2), P00_d, P01_s), + ] + + def _America_Los_Angeles(): + LMT = ZoneOffset("LMT", timedelta(seconds=-28378), ZERO) + PST = ZoneOffset("PST", timedelta(hours=-8), ZERO) + PDT = ZoneOffset("PDT", timedelta(hours=-7), ONE_H) + PWT = ZoneOffset("PWT", timedelta(hours=-7), ONE_H) + PPT = ZoneOffset("PPT", timedelta(hours=-7), ONE_H) + + return [ + ZoneTransition(datetime(1883, 11, 18, 12, 7, 2), LMT, PST), + ZoneTransition(datetime(1918, 3, 31, 2), PST, PDT), + ZoneTransition(datetime(1918, 3, 31, 2), PST, PDT), + ZoneTransition(datetime(1918, 10, 27, 2), PDT, PST), + # Transition to Pacific War Time + ZoneTransition(datetime(1942, 2, 9, 2), PST, PWT), + # Transition from Pacific War Time to Pacific Peace Time + ZoneTransition(datetime(1945, 8, 14, 16), PWT, PPT), + ZoneTransition(datetime(1945, 9, 30, 2), PPT, PST), + ZoneTransition(datetime(2015, 3, 8, 2), PST, PDT), + ZoneTransition(datetime(2015, 11, 1, 2), PDT, PST), + # After 2038: Rules continue indefinitely + ZoneTransition(datetime(2450, 3, 13, 2), PST, PDT), + ZoneTransition(datetime(2450, 11, 6, 2), PDT, PST), + ] + + def _America_Santiago(): + LMT = ZoneOffset("LMT", timedelta(seconds=-16966), ZERO) + SMT = ZoneOffset("SMT", timedelta(seconds=-16966), ZERO) + N05 = ZoneOffset("-05", timedelta(seconds=-18000), ZERO) + N04 = ZoneOffset("-04", timedelta(seconds=-14400), ZERO) + N03 = ZoneOffset("-03", timedelta(seconds=-10800), ONE_H) + + return [ + ZoneTransition(datetime(1890, 1, 1), LMT, SMT), + ZoneTransition(datetime(1910, 1, 10), SMT, N05), + ZoneTransition(datetime(1916, 7, 1), N05, SMT), + ZoneTransition(datetime(2008, 3, 30), N03, N04), + ZoneTransition(datetime(2008, 10, 12), N04, N03), + ZoneTransition(datetime(2040, 4, 8), N03, N04), + ZoneTransition(datetime(2040, 9, 2), N04, N03), + ] + + def _Asia_Tokyo(): + JST = ZoneOffset("JST", timedelta(seconds=32400), ZERO) + JDT = ZoneOffset("JDT", timedelta(seconds=36000), ONE_H) + + # Japan had DST from 1948 to 1951, and it was unusual in that + # the transition from DST to STD occurred at 25:00, and is + # denominated as such in the time zone database + return [ + ZoneTransition(datetime(1948, 5, 2), JST, JDT), + ZoneTransition(datetime(1948, 9, 12, 1), JDT, JST), + ZoneTransition(datetime(1951, 9, 9, 1), JDT, JST), + ] + + def _Australia_Sydney(): + LMT = ZoneOffset("LMT", timedelta(seconds=36292), ZERO) + AEST = ZoneOffset("AEST", timedelta(seconds=36000), ZERO) + AEDT = ZoneOffset("AEDT", timedelta(seconds=39600), ONE_H) + + return [ + ZoneTransition(datetime(1895, 2, 1), LMT, AEST), + ZoneTransition(datetime(1917, 1, 1, 0, 1), AEST, AEDT), + ZoneTransition(datetime(1917, 3, 25, 2), AEDT, AEST), + ZoneTransition(datetime(2012, 4, 1, 3), AEDT, AEST), + ZoneTransition(datetime(2012, 10, 7, 2), AEST, AEDT), + ZoneTransition(datetime(2040, 4, 1, 3), AEDT, AEST), + ZoneTransition(datetime(2040, 10, 7, 2), AEST, AEDT), + ] + + def _Europe_Dublin(): + LMT = ZoneOffset("LMT", timedelta(seconds=-1500), ZERO) + DMT = ZoneOffset("DMT", timedelta(seconds=-1521), ZERO) + IST_0 = ZoneOffset("IST", timedelta(seconds=2079), ONE_H) + GMT_0 = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + GMT_1 = ZoneOffset("GMT", ZERO, -ONE_H) + IST_1 = ZoneOffset("IST", ONE_H, ZERO) + + return [ + ZoneTransition(datetime(1880, 8, 2, 0), LMT, DMT), + ZoneTransition(datetime(1916, 5, 21, 2), DMT, IST_0), + ZoneTransition(datetime(1916, 10, 1, 3), IST_0, GMT_0), + ZoneTransition(datetime(1917, 4, 8, 2), GMT_0, BST), + ZoneTransition(datetime(2016, 3, 27, 1), GMT_1, IST_1), + ZoneTransition(datetime(2016, 10, 30, 2), IST_1, GMT_1), + ZoneTransition(datetime(2487, 3, 30, 1), GMT_1, IST_1), + ZoneTransition(datetime(2487, 10, 26, 2), IST_1, GMT_1), + ] + + def _Europe_Lisbon(): + WET = ZoneOffset("WET", ZERO, ZERO) + WEST = ZoneOffset("WEST", ONE_H, ONE_H) + CET = ZoneOffset("CET", ONE_H, ZERO) + CEST = ZoneOffset("CEST", timedelta(seconds=7200), ONE_H) + + return [ + ZoneTransition(datetime(1992, 3, 29, 1), WET, WEST), + ZoneTransition(datetime(1992, 9, 27, 2), WEST, CET), + ZoneTransition(datetime(1993, 3, 28, 2), CET, CEST), + ZoneTransition(datetime(1993, 9, 26, 3), CEST, CET), + ZoneTransition(datetime(1996, 3, 31, 2), CET, WEST), + ZoneTransition(datetime(1996, 10, 27, 2), WEST, WET), + ] + + def _Europe_London(): + LMT = ZoneOffset("LMT", timedelta(seconds=-75), ZERO) + GMT = ZoneOffset("GMT", ZERO, ZERO) + BST = ZoneOffset("BST", ONE_H, ONE_H) + + return [ + ZoneTransition(datetime(1847, 12, 1), LMT, GMT), + ZoneTransition(datetime(2005, 3, 27, 1), GMT, BST), + ZoneTransition(datetime(2005, 10, 30, 2), BST, GMT), + ZoneTransition(datetime(2043, 3, 29, 1), GMT, BST), + ZoneTransition(datetime(2043, 10, 25, 2), BST, GMT), + ] + + def _Pacific_Kiritimati(): + LMT = ZoneOffset("LMT", timedelta(seconds=-37760), ZERO) + N1040 = ZoneOffset("-1040", timedelta(seconds=-38400), ZERO) + N10 = ZoneOffset("-10", timedelta(seconds=-36000), ZERO) + P14 = ZoneOffset("+14", timedelta(seconds=50400), ZERO) + + # This is literally every transition in Christmas Island history + return [ + ZoneTransition(datetime(1901, 1, 1), LMT, N1040), + ZoneTransition(datetime(1979, 10, 1), N1040, N10), + # They skipped December 31, 1994 + ZoneTransition(datetime(1994, 12, 31), N10, P14), + ] + + cls._ZONEDUMP_DATA = { + "Africa/Abidjan": _Africa_Abidjan(), + "Africa/Casablanca": _Africa_Casablanca(), + "America/Los_Angeles": _America_Los_Angeles(), + "America/Santiago": _America_Santiago(), + "Australia/Sydney": _Australia_Sydney(), + "Asia/Tokyo": _Asia_Tokyo(), + "Europe/Dublin": _Europe_Dublin(), + "Europe/Lisbon": _Europe_Lisbon(), + "Europe/London": _Europe_London(), + "Pacific/Kiritimati": _Pacific_Kiritimati(), + } + + _ZONEDUMP_DATA = None + _FIXED_OFFSET_ZONES = None diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py new file mode 100644 index 00000000000000..7fa7a572805a2a --- /dev/null +++ b/Lib/zoneinfo/__init__.py @@ -0,0 +1,29 @@ +__all__ = [ + "ZoneInfo", + "reset_tzpath", + "TZPATH", + "ZoneInfoNotFoundError", + "InvalidTZPathWarning", +] + +from . import _tzpath +from ._common import ZoneInfoNotFoundError + +try: + from _czoneinfo import ZoneInfo +except ImportError: # pragma: nocover + from ._zoneinfo import ZoneInfo + +reset_tzpath = _tzpath.reset_tzpath +InvalidTZPathWarning = _tzpath.InvalidTZPathWarning + + +def __getattr__(name): + if name == "TZPATH": + return _tzpath.TZPATH + else: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return sorted(__all__ + ["__version__"]) diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py new file mode 100644 index 00000000000000..568a4873c1fecc --- /dev/null +++ b/Lib/zoneinfo/_common.py @@ -0,0 +1,160 @@ +import struct + + +def load_tzdata(key): + import importlib.resources + + components = key.split("/") + package_name = ".".join(["tzdata.zoneinfo"] + components[:-1]) + resource_name = components[-1] + + try: + return importlib.resources.open_binary(package_name, resource_name) + except (ImportError, FileNotFoundError, UnicodeEncodeError) as e: + raise ZoneInfoNotFoundError(f"No time zone found with key {key}") from e + + +def load_data(fobj): + header = _TZifHeader.from_file(fobj) + + if header.version == 1: + time_size = 4 + time_type = "l" + else: + # Version 2+ has 64-bit integer transition times + time_size = 8 + time_type = "q" + + # Version 2+ also starts with a Version 1 header and data, which + # we need to skip now + skip_bytes = ( + header.timecnt * 5 # Transition times and types + + header.typecnt * 6 # Local time type records + + header.charcnt # Time zone designations + + header.leapcnt * 8 # Leap second records + + header.isstdcnt # Standard/wall indicators + + header.isutcnt # UT/local indicators + ) + + fobj.seek(skip_bytes, 1) + + # Now we need to read the second header, which is not the same + # as the first + header = _TZifHeader.from_file(fobj) + + typecnt = header.typecnt + timecnt = header.timecnt + charcnt = header.charcnt + + # The data portion starts with timecnt transitions and indices + if timecnt: + trans_list_utc = struct.unpack( + f">{timecnt}{time_type}", fobj.read(timecnt * time_size) + ) + trans_idx = struct.unpack(f">{timecnt}B", fobj.read(timecnt)) + else: + trans_list_utc = () + trans_idx = () + + # Read the ttinfo struct, (utoff, isdst, abbrind) + if typecnt: + utcoff, isdst, abbrind = zip( + *(struct.unpack(">lbb", fobj.read(6)) for i in range(typecnt)) + ) + else: + utcoff = () + isdst = () + abbrind = () + + # Now read the abbreviations. They are null-terminated strings, indexed + # not by position in the array but by position in the unsplit + # abbreviation string. I suppose this makes more sense in C, which uses + # null to terminate the strings, but it's inconvenient here... + char_total = 0 + abbr_vals = {} + abbr_chars = fobj.read(charcnt) + + def get_abbr(idx): + # Gets a string starting at idx and running until the next \x00 + # + # We cannot pre-populate abbr_vals by splitting on \x00 because there + # are some zones that use subsets of longer abbreviations, like so: + # + # LMT\x00AHST\x00HDT\x00 + # + # Where the idx to abbr mapping should be: + # + # {0: "LMT", 4: "AHST", 5: "HST", 9: "HDT"} + if idx not in abbr_vals: + span_end = abbr_chars.find(b"\x00", idx) + abbr_vals[idx] = abbr_chars[idx:span_end].decode() + + return abbr_vals[idx] + + abbr = tuple(get_abbr(idx) for idx in abbrind) + + # The remainder of the file consists of leap seconds (currently unused) and + # the standard/wall and ut/local indicators, which are metadata we don't need. + # In version 2 files, we need to skip the unnecessary data to get at the TZ string: + if header.version >= 2: + # Each leap second record has size (time_size + 4) + skip_bytes = header.isutcnt + header.isstdcnt + header.leapcnt * 12 + fobj.seek(skip_bytes, 1) + + c = fobj.read(1) # Should be \n + assert c == b"\n", c + + tz_bytes = b"" + # TODO: Walrus operator + while True: + c = fobj.read(1) + if c == b"\n": + break + tz_bytes += c + + tz_str = tz_bytes + else: + tz_str = None + + return trans_idx, trans_list_utc, utcoff, isdst, abbr, tz_str + + +class _TZifHeader: + __slots__ = [ + "version", + "isutcnt", + "isstdcnt", + "leapcnt", + "timecnt", + "typecnt", + "charcnt", + ] + + def __init__(self, *args): + assert len(self.__slots__) == len(args) + for attr, val in zip(self.__slots__, args): + setattr(self, attr, val) + + @classmethod + def from_file(cls, stream): + # The header starts with a 4-byte "magic" value + if stream.read(4) != b"TZif": + raise ValueError("Invalid TZif file: magic not found") + + _version = stream.read(1) + if _version == b"\x00": + version = 1 + else: + version = int(_version) + stream.read(15) + + args = (version,) + + # Slots are defined in the order that the bytes are arranged + args = args + struct.unpack(">6l", stream.read(24)) + + return cls(*args) + + +class ZoneInfoNotFoundError(KeyError): + """Exception raised when a ZoneInfo key is not found.""" diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py new file mode 100644 index 00000000000000..ee20672deff633 --- /dev/null +++ b/Lib/zoneinfo/_tzpath.py @@ -0,0 +1,116 @@ +import os +import sys + + +def reset_tzpath(to=None): + global TZPATH + + tzpaths = to + if tzpaths is not None: + if isinstance(tzpaths, (str, bytes)): + raise TypeError( + f"tzpaths must be a list or tuple, " + + f"not {type(tzpaths)}: {tzpaths!r}" + ) + elif not all(map(os.path.isabs, tzpaths)): + raise ValueError(_get_invalid_paths_message(tzpaths)) + base_tzpath = tzpaths + else: + env_var = os.environ.get("PYTHONTZPATH", None) + if env_var is not None: + base_tzpath = _parse_python_tzpath(env_var) + elif sys.platform != "win32": + base_tzpath = [ + "/usr/share/zoneinfo", + "/usr/lib/zoneinfo", + "/usr/share/lib/zoneinfo", + "/etc/zoneinfo", + ] + + base_tzpath.sort(key=lambda x: not os.path.exists(x)) + else: + base_tzpath = () + + TZPATH = tuple(base_tzpath) + + +def _parse_python_tzpath(env_var): + if not env_var: + return () + else: + raw_tzpath = env_var.split(os.pathsep) + new_tzpath = tuple(filter(os.path.isabs, raw_tzpath)) + + # If anything has been filtered out, we will warn about it + if len(new_tzpath) != len(raw_tzpath): + import warnings + + msg = _get_invalid_paths_message(raw_tzpath) + + warnings.warn( + "Invalid paths specified in PYTHONTZPATH environment variable." + + msg, + InvalidTZPathWarning, + ) + + return new_tzpath + + +def _get_invalid_paths_message(tzpaths): + invalid_paths = (path for path in tzpaths if not os.path.isabs(path)) + + prefix = "\n " + indented_str = prefix + prefix.join(invalid_paths) + + return ( + "Paths should be absolute but found the following relative paths:" + + indented_str + ) + + +def find_tzfile(key): + """Retrieve the path to a TZif file from a key.""" + _validate_tzfile_path(key) + for search_path in TZPATH: + filepath = os.path.join(search_path, key) + if os.path.isfile(filepath): + return filepath + + return None + + +_TEST_PATH = os.path.normpath(os.path.join("_", "_"))[:-1] + + +def _validate_tzfile_path(path, _base=_TEST_PATH): + if os.path.isabs(path): + raise ValueError( + f"ZoneInfo keys may not be absolute paths, got: {path}" + ) + + # We only care about the kinds of path normalizations that would change the + # length of the key - e.g. a/../b -> a/b, or a/b/ -> a/b. On Windows, + # normpath will also change from a/b to a\b, but that would still preserve + # the length. + new_path = os.path.normpath(path) + if len(new_path) != len(path): + raise ValueError( + f"ZoneInfo keys must be normalized relative paths, got: {path}" + ) + + resolved = os.path.normpath(os.path.join(_base, new_path)) + if not resolved.startswith(_base): + raise ValueError( + f"ZoneInfo keys must refer to subdirectories of TZPATH, got: {path}" + ) + + +del _TEST_PATH + + +class InvalidTZPathWarning(RuntimeWarning): + """Warning raised if an invalid path is specified in PYTHONTZPATH.""" + + +TZPATH = () +reset_tzpath() diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py new file mode 100644 index 00000000000000..d392017e2a1b44 --- /dev/null +++ b/Lib/zoneinfo/_zoneinfo.py @@ -0,0 +1,756 @@ +import bisect +import calendar +import collections +import functools +import os +import re +import struct +import sys +import weakref +from datetime import datetime, timedelta, timezone, tzinfo + +from . import _common, _tzpath + +EPOCH = datetime(1970, 1, 1) +EPOCHORDINAL = datetime(1970, 1, 1).toordinal() + +# It is relatively expensive to construct new timedelta objects, and in most +# cases we're looking at the same deltas, like integer numbers of hours, etc. +# To improve speed and memory use, we'll keep a dictionary with references +# to the ones we've already used so far. +# +# Loading every time zone in the 2020a version of the time zone database +# requires 447 timedeltas, which requires approximately the amount of space +# that ZoneInfo("America/New_York") with 236 transitions takes up, so we will +# set the cache size to 512 so that in the common case we always get cache +# hits, but specifically crafted ZoneInfo objects don't leak arbitrary amounts +# of memory. +@functools.lru_cache(maxsize=512) +def _load_timedelta(seconds): + return timedelta(seconds=seconds) + + +class ZoneInfo(tzinfo): + __module__ = "zoneinfo" + _strong_cache_size = 8 + _strong_cache = collections.OrderedDict() + _weak_cache = weakref.WeakValueDictionary() + + def __init_subclass__(cls): + cls._strong_cache = collections.OrderedDict() + cls._weak_cache = weakref.WeakValueDictionary() + + def __new__(cls, key): + instance = cls._weak_cache.get(key, None) + if instance is None: + instance = cls._weak_cache.setdefault(key, cls._new_instance(key)) + instance._from_cache = True + + # Update the "strong" cache + cls._strong_cache[key] = cls._strong_cache.pop(key, instance) + + if len(cls._strong_cache) > cls._strong_cache_size: + cls._strong_cache.popitem(last=False) + + return instance + + @classmethod + def no_cache(cls, key): + obj = cls._new_instance(key) + obj._from_cache = False + + return obj + + @classmethod + def _new_instance(cls, key): + obj = super().__new__(cls) + obj._key = key + obj._file_path = obj._find_tzfile(key) + + if obj._file_path is not None: + file_obj = open(obj._file_path, "rb") + else: + file_obj = _common.load_tzdata(key) + + with file_obj as f: + obj._load_file(f) + + return obj + + @classmethod + def from_file(cls, fobj, /, key=None): + obj = super().__new__(cls) + obj._key = key + obj._file_path = None + obj._load_file(fobj) + obj._file_repr = repr(fobj) + + # Disable pickling for objects created from files + obj.__reduce__ = obj._file_reduce + + return obj + + @classmethod + def clear_cache(cls, *, only_keys=None): + if only_keys is not None: + for key in only_keys: + cls._weak_cache.pop(key, None) + cls._strong_cache.pop(key, None) + + else: + cls._weak_cache.clear() + cls._strong_cache.clear() + + @property + def key(self): + return self._key + + def utcoffset(self, dt): + return self._find_trans(dt).utcoff + + def dst(self, dt): + return self._find_trans(dt).dstoff + + def tzname(self, dt): + return self._find_trans(dt).tzname + + def fromutc(self, dt): + """Convert from datetime in UTC to datetime in local time""" + + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + timestamp = self._get_local_timestamp(dt) + num_trans = len(self._trans_utc) + + if num_trans >= 1 and timestamp < self._trans_utc[0]: + tti = self._tti_before + fold = 0 + elif ( + num_trans == 0 or timestamp > self._trans_utc[-1] + ) and not isinstance(self._tz_after, _ttinfo): + tti, fold = self._tz_after.get_trans_info_fromutc( + timestamp, dt.year + ) + elif num_trans == 0: + tti = self._tz_after + fold = 0 + else: + idx = bisect.bisect_right(self._trans_utc, timestamp) + + if num_trans > 1 and timestamp >= self._trans_utc[1]: + tti_prev, tti = self._ttinfos[idx - 2 : idx] + elif timestamp > self._trans_utc[-1]: + tti_prev = self._ttinfos[-1] + tti = self._tz_after + else: + tti_prev = self._tti_before + tti = self._ttinfos[0] + + # Detect fold + shift = tti_prev.utcoff - tti.utcoff + fold = shift.total_seconds() > timestamp - self._trans_utc[idx - 1] + dt += tti.utcoff + if fold: + return dt.replace(fold=1) + else: + return dt + + def _find_trans(self, dt): + if dt is None: + if self._fixed_offset: + return self._tz_after + else: + return _NO_TTINFO + + ts = self._get_local_timestamp(dt) + + lt = self._trans_local[dt.fold] + + num_trans = len(lt) + + if num_trans and ts < lt[0]: + return self._tti_before + elif not num_trans or ts > lt[-1]: + if isinstance(self._tz_after, _TZStr): + return self._tz_after.get_trans_info(ts, dt.year, dt.fold) + else: + return self._tz_after + else: + # idx is the transition that occurs after this timestamp, so we + # subtract off 1 to get the current ttinfo + idx = bisect.bisect_right(lt, ts) - 1 + assert idx >= 0 + return self._ttinfos[idx] + + def _get_local_timestamp(self, dt): + return ( + (dt.toordinal() - EPOCHORDINAL) * 86400 + + dt.hour * 3600 + + dt.minute * 60 + + dt.second + ) + + def __str__(self): + if self._key is not None: + return f"{self._key}" + else: + return repr(self) + + def __repr__(self): + if self._key is not None: + return f"{self.__class__.__name__}(key={self._key!r})" + else: + return f"{self.__class__.__name__}.from_file({self._file_repr})" + + def __reduce__(self): + return (self.__class__._unpickle, (self._key, self._from_cache)) + + def _file_reduce(self): + import pickle + + raise pickle.PicklingError( + "Cannot pickle a ZoneInfo file created from a file stream." + ) + + @classmethod + def _unpickle(cls, key, from_cache, /): + if from_cache: + return cls(key) + else: + return cls.no_cache(key) + + def _find_tzfile(self, key): + return _tzpath.find_tzfile(key) + + def _load_file(self, fobj): + # Retrieve all the data as it exists in the zoneinfo file + trans_idx, trans_utc, utcoff, isdst, abbr, tz_str = _common.load_data( + fobj + ) + + # Infer the DST offsets (needed for .dst()) from the data + dstoff = self._utcoff_to_dstoff(trans_idx, utcoff, isdst) + + # Convert all the transition times (UTC) into "seconds since 1970-01-01 local time" + trans_local = self._ts_to_local(trans_idx, trans_utc, utcoff) + + # Construct `_ttinfo` objects for each transition in the file + _ttinfo_list = [ + _ttinfo( + _load_timedelta(utcoffset), _load_timedelta(dstoffset), tzname + ) + for utcoffset, dstoffset, tzname in zip(utcoff, dstoff, abbr) + ] + + self._trans_utc = trans_utc + self._trans_local = trans_local + self._ttinfos = [_ttinfo_list[idx] for idx in trans_idx] + + # Find the first non-DST transition + for i in range(len(isdst)): + if not isdst[i]: + self._tti_before = _ttinfo_list[i] + break + else: + if self._ttinfos: + self._tti_before = self._ttinfos[0] + else: + self._tti_before = None + + # Set the "fallback" time zone + if tz_str is not None and tz_str != b"": + self._tz_after = _parse_tz_str(tz_str.decode()) + else: + if not self._ttinfos and not _ttinfo_list: + raise ValueError("No time zone information found.") + + if self._ttinfos: + self._tz_after = self._ttinfos[-1] + else: + self._tz_after = _ttinfo_list[-1] + + # Determine if this is a "fixed offset" zone, meaning that the output + # of the utcoffset, dst and tzname functions does not depend on the + # specific datetime passed. + # + # We make three simplifying assumptions here: + # + # 1. If _tz_after is not a _ttinfo, it has transitions that might + # actually occur (it is possible to construct TZ strings that + # specify STD and DST but no transitions ever occur, such as + # AAA0BBB,0/0,J365/25). + # 2. If _ttinfo_list contains more than one _ttinfo object, the objects + # represent different offsets. + # 3. _ttinfo_list contains no unused _ttinfos (in which case an + # otherwise fixed-offset zone with extra _ttinfos defined may + # appear to *not* be a fixed offset zone). + # + # Violations to these assumptions would be fairly exotic, and exotic + # zones should almost certainly not be used with datetime.time (the + # only thing that would be affected by this). + if len(_ttinfo_list) > 1 or not isinstance(self._tz_after, _ttinfo): + self._fixed_offset = False + elif not _ttinfo_list: + self._fixed_offset = True + else: + self._fixed_offset = _ttinfo_list[0] == self._tz_after + + @staticmethod + def _utcoff_to_dstoff(trans_idx, utcoffsets, isdsts): + # Now we must transform our ttis and abbrs into `_ttinfo` objects, + # but there is an issue: .dst() must return a timedelta with the + # difference between utcoffset() and the "standard" offset, but + # the "base offset" and "DST offset" are not encoded in the file; + # we can infer what they are from the isdst flag, but it is not + # sufficient to to just look at the last standard offset, because + # occasionally countries will shift both DST offset and base offset. + + typecnt = len(isdsts) + dstoffs = [0] * typecnt # Provisionally assign all to 0. + dst_cnt = sum(isdsts) + dst_found = 0 + + for i in range(1, len(trans_idx)): + if dst_cnt == dst_found: + break + + idx = trans_idx[i] + + dst = isdsts[idx] + + # We're only going to look at daylight saving time + if not dst: + continue + + # Skip any offsets that have already been assigned + if dstoffs[idx] != 0: + continue + + dstoff = 0 + utcoff = utcoffsets[idx] + + comp_idx = trans_idx[i - 1] + + if not isdsts[comp_idx]: + dstoff = utcoff - utcoffsets[comp_idx] + + if not dstoff and idx < (typecnt - 1): + comp_idx = trans_idx[i + 1] + + # If the following transition is also DST and we couldn't + # find the DST offset by this point, we're going ot have to + # skip it and hope this transition gets assigned later + if isdsts[comp_idx]: + continue + + dstoff = utcoff - utcoffsets[comp_idx] + + if dstoff: + dst_found += 1 + dstoffs[idx] = dstoff + else: + # If we didn't find a valid value for a given index, we'll end up + # with dstoff = 0 for something where `isdst=1`. This is obviously + # wrong - one hour will be a much better guess than 0 + for idx in range(typecnt): + if not dstoffs[idx] and isdsts[idx]: + dstoffs[idx] = 3600 + + return dstoffs + + @staticmethod + def _ts_to_local(trans_idx, trans_list_utc, utcoffsets): + """Generate number of seconds since 1970 *in the local time*. + + This is necessary to easily find the transition times in local time""" + if not trans_list_utc: + return [[], []] + + # Start with the timestamps and modify in-place + trans_list_wall = [list(trans_list_utc), list(trans_list_utc)] + + if len(utcoffsets) > 1: + offset_0 = utcoffsets[0] + offset_1 = utcoffsets[trans_idx[0]] + if offset_1 > offset_0: + offset_1, offset_0 = offset_0, offset_1 + else: + offset_0 = offset_1 = utcoffsets[0] + + trans_list_wall[0][0] += offset_0 + trans_list_wall[1][0] += offset_1 + + for i in range(1, len(trans_idx)): + offset_0 = utcoffsets[trans_idx[i - 1]] + offset_1 = utcoffsets[trans_idx[i]] + + if offset_1 > offset_0: + offset_1, offset_0 = offset_0, offset_1 + + trans_list_wall[0][i] += offset_0 + trans_list_wall[1][i] += offset_1 + + return trans_list_wall + + +class _ttinfo: + __slots__ = ["utcoff", "dstoff", "tzname"] + + def __init__(self, utcoff, dstoff, tzname): + self.utcoff = utcoff + self.dstoff = dstoff + self.tzname = tzname + + def __eq__(self, other): + return ( + self.utcoff == other.utcoff + and self.dstoff == other.dstoff + and self.tzname == other.tzname + ) + + def __repr__(self): # pragma: nocover + return ( + f"{self.__class__.__name__}" + + f"({self.utcoff}, {self.dstoff}, {self.tzname})" + ) + + +_NO_TTINFO = _ttinfo(None, None, None) + + +class _TZStr: + __slots__ = ( + "std", + "dst", + "start", + "end", + "get_trans_info", + "get_trans_info_fromutc", + "dst_diff", + ) + + def __init__( + self, std_abbr, std_offset, dst_abbr, dst_offset, start=None, end=None + ): + self.dst_diff = dst_offset - std_offset + std_offset = _load_timedelta(std_offset) + self.std = _ttinfo( + utcoff=std_offset, dstoff=_load_timedelta(0), tzname=std_abbr + ) + + self.start = start + self.end = end + + dst_offset = _load_timedelta(dst_offset) + delta = _load_timedelta(self.dst_diff) + self.dst = _ttinfo(utcoff=dst_offset, dstoff=delta, tzname=dst_abbr) + + # These are assertions because the constructor should only be called + # by functions that would fail before passing start or end + assert start is not None, "No transition start specified" + assert end is not None, "No transition end specified" + + self.get_trans_info = self._get_trans_info + self.get_trans_info_fromutc = self._get_trans_info_fromutc + + def transitions(self, year): + start = self.start.year_to_epoch(year) + end = self.end.year_to_epoch(year) + return start, end + + def _get_trans_info(self, ts, year, fold): + """Get the information about the current transition - tti""" + start, end = self.transitions(year) + + # With fold = 0, the period (denominated in local time) with the + # smaller offset starts at the end of the gap and ends at the end of + # the fold; with fold = 1, it runs from the start of the gap to the + # beginning of the fold. + # + # So in order to determine the DST boundaries we need to know both + # the fold and whether DST is positive or negative (rare), and it + # turns out that this boils down to fold XOR is_positive. + if fold == (self.dst_diff >= 0): + end -= self.dst_diff + else: + start += self.dst_diff + + if start < end: + isdst = start <= ts < end + else: + isdst = not (end <= ts < start) + + return self.dst if isdst else self.std + + def _get_trans_info_fromutc(self, ts, year): + start, end = self.transitions(year) + start -= self.std.utcoff.total_seconds() + end -= self.dst.utcoff.total_seconds() + + if start < end: + isdst = start <= ts < end + else: + isdst = not (end <= ts < start) + + # For positive DST, the ambiguous period is one dst_diff after the end + # of DST; for negative DST, the ambiguous period is one dst_diff before + # the start of DST. + if self.dst_diff > 0: + ambig_start = end + ambig_end = end + self.dst_diff + else: + ambig_start = start + ambig_end = start - self.dst_diff + + fold = ambig_start <= ts < ambig_end + + return (self.dst if isdst else self.std, fold) + + +def _post_epoch_days_before_year(year): + """Get the number of days between 1970-01-01 and YEAR-01-01""" + y = year - 1 + return y * 365 + y // 4 - y // 100 + y // 400 - EPOCHORDINAL + + +class _DayOffset: + __slots__ = ["d", "julian", "hour", "minute", "second"] + + def __init__(self, d, julian, hour=2, minute=0, second=0): + if not (0 + julian) <= d <= 365: + min_day = 0 + julian + raise ValueError(f"d must be in [{min_day}, 365], not: {d}") + + self.d = d + self.julian = julian + self.hour = hour + self.minute = minute + self.second = second + + def year_to_epoch(self, year): + days_before_year = _post_epoch_days_before_year(year) + + d = self.d + if self.julian and d >= 59 and calendar.isleap(year): + d += 1 + + epoch = (days_before_year + d) * 86400 + epoch += self.hour * 3600 + self.minute * 60 + self.second + + return epoch + + +class _CalendarOffset: + __slots__ = ["m", "w", "d", "hour", "minute", "second"] + + _DAYS_BEFORE_MONTH = ( + -1, + 0, + 31, + 59, + 90, + 120, + 151, + 181, + 212, + 243, + 273, + 304, + 334, + ) + + def __init__(self, m, w, d, hour=2, minute=0, second=0): + if not 0 < m <= 12: + raise ValueError("m must be in (0, 12]") + + if not 0 < w <= 5: + raise ValueError("w must be in (0, 5]") + + if not 0 <= d <= 6: + raise ValueError("d must be in [0, 6]") + + self.m = m + self.w = w + self.d = d + self.hour = hour + self.minute = minute + self.second = second + + @classmethod + def _ymd2ord(cls, year, month, day): + return ( + _post_epoch_days_before_year(year) + + cls._DAYS_BEFORE_MONTH[month] + + (month > 2 and calendar.isleap(year)) + + day + ) + + # TODO: These are not actually epoch dates as they are expressed in local time + def year_to_epoch(self, year): + """Calculates the datetime of the occurrence from the year""" + # We know year and month, we need to convert w, d into day of month + # + # Week 1 is the first week in which day `d` (where 0 = Sunday) appears. + # Week 5 represents the last occurrence of day `d`, so we need to know + # the range of the month. + first_day, days_in_month = calendar.monthrange(year, self.m) + + # This equation seems magical, so I'll break it down: + # 1. calendar says 0 = Monday, POSIX says 0 = Sunday + # so we need first_day + 1 to get 1 = Monday -> 7 = Sunday, + # which is still equivalent because this math is mod 7 + # 2. Get first day - desired day mod 7: -1 % 7 = 6, so we don't need + # to do anything to adjust negative numbers. + # 3. Add 1 because month days are a 1-based index. + month_day = (self.d - (first_day + 1)) % 7 + 1 + + # Now use a 0-based index version of `w` to calculate the w-th + # occurrence of `d` + month_day += (self.w - 1) * 7 + + # month_day will only be > days_in_month if w was 5, and `w` means + # "last occurrence of `d`", so now we just check if we over-shot the + # end of the month and if so knock off 1 week. + if month_day > days_in_month: + month_day -= 7 + + ordinal = self._ymd2ord(year, self.m, month_day) + epoch = ordinal * 86400 + epoch += self.hour * 3600 + self.minute * 60 + self.second + return epoch + + +def _parse_tz_str(tz_str): + # The tz string has the format: + # + # std[offset[dst[offset],start[/time],end[/time]]] + # + # std and dst must be 3 or more characters long and must not contain + # a leading colon, embedded digits, commas, nor a plus or minus signs; + # The spaces between "std" and "offset" are only for display and are + # not actually present in the string. + # + # The format of the offset is ``[+|-]hh[:mm[:ss]]`` + + offset_str, *start_end_str = tz_str.split(",", 1) + + # fmt: off + parser_re = re.compile( + r"(?P[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + + r"((?P[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" + + r"((?P[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" + + r"((?P[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" + + r")?" + # dst + r")?$" # stdoff + ) + # fmt: on + + m = parser_re.match(offset_str) + + if m is None: + raise ValueError(f"{tz_str} is not a valid TZ string") + + std_abbr = m.group("std") + dst_abbr = m.group("dst") + dst_offset = None + + if std_abbr: + std_abbr = std_abbr.strip("<>") + + if dst_abbr: + dst_abbr = dst_abbr.strip("<>") + + if std_offset := m.group("stdoff"): + try: + std_offset = _parse_tz_delta(std_offset) + except ValueError as e: + raise ValueError(f"Invalid STD offset in {tz_str}") from e + else: + std_offset = 0 + + if dst_abbr is not None: + if dst_offset := m.group("dstoff"): + try: + dst_offset = _parse_tz_delta(dst_offset) + except ValueError as e: + raise ValueError(f"Invalid DST offset in {tz_str}") from e + else: + dst_offset = std_offset + 3600 + + if not start_end_str: + raise ValueError(f"Missing transition rules: {tz_str}") + + start_end_strs = start_end_str[0].split(",", 1) + try: + start, end = (_parse_dst_start_end(x) for x in start_end_strs) + except ValueError as e: + raise ValueError(f"Invalid TZ string: {tz_str}") from e + + return _TZStr(std_abbr, std_offset, dst_abbr, dst_offset, start, end) + elif start_end_str: + raise ValueError(f"Transition rule present without DST: {tz_str}") + else: + # This is a static ttinfo, don't return _TZStr + return _ttinfo( + _load_timedelta(std_offset), _load_timedelta(0), std_abbr + ) + + +def _parse_dst_start_end(dststr): + date, *time = dststr.split("/") + if date[0] == "M": + n_is_julian = False + m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date) + if m is None: + raise ValueError(f"Invalid dst start/end date: {dststr}") + date_offset = tuple(map(int, m.groups())) + offset = _CalendarOffset(*date_offset) + else: + if date[0] == "J": + n_is_julian = True + date = date[1:] + else: + n_is_julian = False + + doy = int(date) + offset = _DayOffset(doy, n_is_julian) + + if time: + time_components = list(map(int, time[0].split(":"))) + n_components = len(time_components) + if n_components < 3: + time_components.extend([0] * (3 - n_components)) + offset.hour, offset.minute, offset.second = time_components + + return offset + + +def _parse_tz_delta(tz_delta): + match = re.match( + r"(?P[+-])?(?P\d{1,2})(:(?P\d{2})(:(?P\d{2}))?)?", + tz_delta, + ) + # Anything passed to this function should already have hit an equivalent + # regular expression to find the section to parse. + assert match is not None, tz_delta + + h, m, s = ( + int(v) if v is not None else 0 + for v in map(match.group, ("h", "m", "s")) + ) + + total = h * 3600 + m * 60 + s + + if not -86400 < total < 86400: + raise ValueError( + "Offset must be strictly between -24h and +24h:" + tz_delta + ) + + # Yes, +5 maps to an offset of -5h + if match.group("sign") != "-": + total *= -1 + + return total diff --git a/Modules/Setup b/Modules/Setup index 6bf142419de3d9..e993bf8854b2cf 100644 --- a/Modules/Setup +++ b/Modules/Setup @@ -181,6 +181,7 @@ _symtable symtablemodule.c #_elementtree -I$(srcdir)/Modules/expat -DHAVE_EXPAT_CONFIG_H -DUSE_PYEXPAT_CAPI _elementtree.c # elementtree accelerator #_pickle _pickle.c # pickle accelerator #_datetime _datetimemodule.c # datetime accelerator +#_czoneinfo zoneinfomodule.c # zoneinfo accelerator #_bisect _bisectmodule.c # Bisection algorithms #_heapq _heapqmodule.c # Heap queue algorithm #_asyncio _asynciomodule.c # Fast asyncio Future diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c new file mode 100644 index 00000000000000..86233c131a0321 --- /dev/null +++ b/Modules/zoneinfomodule.c @@ -0,0 +1,2686 @@ +#include "Python.h" +#include "structmember.h" + +#include +#include +#include + +#include "datetime.h" + +// Imports +PyObject *io_open = NULL; +PyObject *_tzpath_find_tzfile = NULL; +PyObject *_common_mod = NULL; + +typedef struct TransitionRuleType TransitionRuleType; +typedef struct StrongCacheNode StrongCacheNode; + +typedef struct { + PyObject *utcoff; + PyObject *dstoff; + PyObject *tzname; + long utcoff_seconds; +} _ttinfo; + +typedef struct { + _ttinfo std; + _ttinfo dst; + int dst_diff; + TransitionRuleType *start; + TransitionRuleType *end; + unsigned char std_only; +} _tzrule; + +typedef struct { + PyDateTime_TZInfo base; + PyObject *key; + PyObject *file_repr; + PyObject *weakreflist; + unsigned int num_transitions; + unsigned int num_ttinfos; + int64_t *trans_list_utc; + int64_t *trans_list_wall[2]; + _ttinfo **trans_ttinfos; // References to the ttinfo for each transition + _ttinfo *ttinfo_before; + _tzrule tzrule_after; + _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation + unsigned char fixed_offset; + unsigned char source; +} PyZoneInfo_ZoneInfo; + +struct TransitionRuleType { + int64_t (*year_to_timestamp)(TransitionRuleType *, int); +}; + +typedef struct { + TransitionRuleType base; + uint8_t month; + uint8_t week; + uint8_t day; + int8_t hour; + int8_t minute; + int8_t second; +} CalendarRule; + +typedef struct { + TransitionRuleType base; + uint8_t julian; + unsigned int day; + int8_t hour; + int8_t minute; + int8_t second; +} DayRule; + +struct StrongCacheNode { + StrongCacheNode *next; + StrongCacheNode *prev; + PyObject *key; + PyObject *zone; +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType; + +// Globals +static PyObject *TIMEDELTA_CACHE = NULL; +static PyObject *ZONEINFO_WEAK_CACHE = NULL; +static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL; +static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8; + +static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0}; + +// Constants +static const int EPOCHORDINAL = 719163; +static int DAYS_IN_MONTH[] = { + -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, +}; + +static int DAYS_BEFORE_MONTH[] = { + -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, +}; + +static const int SOURCE_NOCACHE = 0; +static const int SOURCE_CACHE = 1; +static const int SOURCE_FILE = 2; + +// Forward declarations +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj); +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos); +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions); + +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out); + +static ssize_t +parse_abbr(const char *const p, PyObject **abbr); +static ssize_t +parse_tz_delta(const char *const p, long *total_seconds); +static ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second); +static ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out); + +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold); + +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out); +static void +xdecref_ttinfo(_ttinfo *ttinfo); +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1); + +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out); +static void +free_tzrule(_tzrule *tzrule); + +static PyObject * +load_timedelta(long seconds); + +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts); +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt); + +static int +ymd_to_ord(int y, int m, int d); +static int +is_leap_year(int year); + +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size); + +static void +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key); +static void +clear_strong_cache(const PyTypeObject *const type); +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone); +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *key); + +static PyObject * +zoneinfo_new_instance(PyTypeObject *type, PyObject *key) +{ + PyObject *file_obj = NULL; + PyObject *file_path = NULL; + + file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL); + if (file_path == NULL) { + return NULL; + } + else if (file_path == Py_None) { + file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); + if (file_obj == NULL) { + return NULL; + } + } + + PyObject *self = (PyObject *)(type->tp_alloc(type, 0)); + if (self == NULL) { + goto error; + } + + if (file_obj == NULL) { + file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb"); + if (file_obj == NULL) { + goto error; + } + } + + if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) { + goto error; + } + + PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(file_obj); + file_obj = NULL; + if (rv == NULL) { + goto error; + } + Py_DECREF(rv); + + ((PyZoneInfo_ZoneInfo *)self)->key = key; + Py_INCREF(key); + + goto cleanup; +error: + Py_XDECREF(self); + self = NULL; +cleanup: + if (file_obj != NULL) { + PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(file_obj); + } + Py_DECREF(file_path); + return self; +} + +static PyObject * +get_weak_cache(PyTypeObject *type) +{ + if (type == &PyZoneInfo_ZoneInfoType) { + return ZONEINFO_WEAK_CACHE; + } + else { + PyObject *cache = + PyObject_GetAttrString((PyObject *)type, "_weak_cache"); + // We are assuming that the type lives at least as long as the function + // that calls get_weak_cache, and that it holds a reference to the + // cache, so we'll return a "borrowed reference". + Py_XDECREF(cache); + return cache; + } +} + +static PyObject * +zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) +{ + PyObject *key = NULL; + static char *kwlist[] = {"key", NULL}; + if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { + return NULL; + } + + PyObject *instance = zone_from_strong_cache(type, key); + if (instance != NULL) { + return instance; + } + + PyObject *weak_cache = get_weak_cache(type); + instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None); + if (instance == NULL) { + return NULL; + } + + if (instance == Py_None) { + PyObject *tmp = zoneinfo_new_instance(type, key); + if (tmp == NULL) { + return NULL; + } + + instance = + PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); + ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; + + Py_DECREF(tmp); + + if (instance == NULL) { + return NULL; + } + } + + update_strong_cache(type, key, instance); + return instance; +} + +static void +zoneinfo_dealloc(PyObject *obj_self) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + if (self->weakreflist != NULL) { + PyObject_ClearWeakRefs(obj_self); + } + + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + } + + for (size_t i = 0; i < 2; i++) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < self->num_ttinfos; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + } + + free_tzrule(&(self->tzrule_after)); + + Py_XDECREF(self->key); + Py_XDECREF(self->file_repr); +} + +static PyObject * +zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *file_obj = NULL; + PyObject *file_repr = NULL; + PyObject *key = Py_None; + PyZoneInfo_ZoneInfo *self = NULL; + + static char *kwlist[] = {"", "key", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj, + &key)) { + return NULL; + } + + PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0)); + self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self == NULL) { + return NULL; + } + + file_repr = PyUnicode_FromFormat("%R", file_obj); + if (file_repr == NULL) { + goto error; + } + + if (load_data(self, file_obj)) { + goto error; + } + + self->source = SOURCE_FILE; + self->file_repr = file_repr; + self->key = key; + Py_INCREF(key); + + return obj_self; +error: + Py_XDECREF(file_repr); + Py_XDECREF(self); + return NULL; +} + +static PyObject * +zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = {"key", NULL}; + PyObject *key = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) { + return NULL; + } + + PyObject *out = zoneinfo_new_instance(cls, key); + if (out != NULL) { + ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE; + } + + return out; +} + +static PyObject * +zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs) +{ + PyObject *only_keys = NULL; + static char *kwlist[] = {"only_keys", NULL}; + + if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist, + &only_keys))) { + return NULL; + } + + PyTypeObject *type = (PyTypeObject *)cls; + PyObject *weak_cache = get_weak_cache(type); + + if (only_keys == NULL || only_keys == Py_None) { + PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL); + if (rv != NULL) { + Py_DECREF(rv); + } + + clear_strong_cache(type); + ZONEINFO_STRONG_CACHE = NULL; + } + else { + PyObject *item = NULL; + PyObject *pop = PyUnicode_FromString("pop"); + if (pop == NULL) { + return NULL; + } + + PyObject *iter = PyObject_GetIter(only_keys); + if (iter == NULL) { + Py_DECREF(pop); + return NULL; + } + + while ((item = PyIter_Next(iter))) { + // Remove from strong cache + eject_from_strong_cache(type, item); + + // Remove from weak cache + PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item, + Py_None, NULL); + + Py_DECREF(item); + if (tmp == NULL) { + break; + } + Py_DECREF(tmp); + } + Py_DECREF(iter); + Py_DECREF(pop); + } + + if (PyErr_Occurred()) { + return NULL; + } + + Py_RETURN_NONE; +} + +static PyObject * +zoneinfo_utcoffset(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->utcoff); + return tti->utcoff; +} + +static PyObject * +zoneinfo_dst(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->dstoff); + return tti->dstoff; +} + +static PyObject * +zoneinfo_tzname(PyObject *self, PyObject *dt) +{ + _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); + if (tti == NULL) { + return NULL; + } + Py_INCREF(tti->tzname); + return tti->tzname; +} + +#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo) +#define GET_DT_TZINFO(p) \ + (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None) + +static PyObject * +zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) +{ + if (!PyDateTime_Check(dt)) { + PyErr_SetString(PyExc_TypeError, + "fromutc: argument must be a datetime"); + return NULL; + } + if (GET_DT_TZINFO(dt) != obj_self) { + PyErr_SetString(PyExc_ValueError, + "fromutc: dt.tzinfo " + "is not self"); + return NULL; + } + + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + + int64_t timestamp; + if (get_local_timestamp(dt, ×tamp)) { + return NULL; + } + size_t num_trans = self->num_transitions; + + _ttinfo *tti = NULL; + unsigned char fold = 0; + + if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) { + tti = self->ttinfo_before; + } + else if (num_trans == 0 || + timestamp > self->trans_list_utc[num_trans - 1]) { + tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp, + PyDateTime_GET_YEAR(dt), &fold); + + // Immediately after the last manual transition, the fold/gap is + // between self->trans_ttinfos[num_transitions - 1] and whatever + // ttinfo applies immediately after the last transition, not between + // the STD and DST rules in the tzrule_after, so we may need to + // adjust the fold value. + if (num_trans) { + _ttinfo *tti_prev = NULL; + if (num_trans == 1) { + tti_prev = self->ttinfo_before; + } + else { + tti_prev = self->trans_ttinfos[num_trans - 2]; + } + int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds; + if (diff > 0 && + timestamp < (self->trans_list_utc[num_trans - 1] + diff)) { + fold = 1; + } + } + } + else { + size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans); + _ttinfo *tti_prev = NULL; + + if (idx >= 2) { + tti_prev = self->trans_ttinfos[idx - 2]; + tti = self->trans_ttinfos[idx - 1]; + } + else { + tti_prev = self->ttinfo_before; + tti = self->trans_ttinfos[0]; + } + + // Detect fold + int64_t shift = + (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds); + if (shift > (timestamp - self->trans_list_utc[idx - 1])) { + fold = 1; + } + } + + PyObject *tmp = PyNumber_Add(dt, tti->utcoff); + if (tmp == NULL) { + return NULL; + } + + if (fold) { + if (PyDateTime_CheckExact(tmp)) { + ((PyDateTime_DateTime *)tmp)->fold = 1; + dt = tmp; + } + else { + PyObject *replace = PyObject_GetAttrString(tmp, "replace"); + PyObject *args = PyTuple_New(0); + PyObject *kwargs = PyDict_New(); + PyObject *one = PyLong_FromLong(1); + + Py_DECREF(tmp); + if (args == NULL || kwargs == NULL || replace == NULL || + one == NULL) { + Py_XDECREF(args); + Py_XDECREF(kwargs); + Py_XDECREF(replace); + Py_XDECREF(one); + return NULL; + } + + dt = NULL; + if (!PyDict_SetItemString(kwargs, "fold", one)) { + dt = PyObject_Call(replace, args, kwargs); + } + + Py_DECREF(args); + Py_DECREF(kwargs); + Py_DECREF(replace); + + if (dt == NULL) { + return NULL; + } + } + } + else { + dt = tmp; + } + return dt; +} + +static PyObject * +zoneinfo_repr(PyZoneInfo_ZoneInfo *self) +{ + PyObject *rv = NULL; + const char *type_name = Py_TYPE((PyObject *)self)->tp_name; + if (!(self->key == Py_None)) { + rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key); + } + else { + assert(PyUnicode_Check(self->file_repr)); + rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name, + self->file_repr); + } + + return rv; +} + +static PyObject * +zoneinfo_str(PyZoneInfo_ZoneInfo *self) +{ + if (!(self->key == Py_None)) { + Py_INCREF(self->key); + return self->key; + } + else { + return zoneinfo_repr(self); + } +} + +/* Pickles the ZoneInfo object by key and source. + * + * ZoneInfo objects are pickled by reference to the TZif file that they came + * from, which means that the exact transitions may be different or the file + * may not un-pickle if the data has changed on disk in the interim. + * + * It is necessary to include a bit indicating whether or not the object + * was constructed from the cache, because from-cache objects will hit the + * unpickling process's cache, whereas no-cache objects will bypass it. + * + * Objects constructed from ZoneInfo.from_file cannot be pickled. + */ +static PyObject * +zoneinfo_reduce(PyObject *obj_self, PyObject *unused) +{ + PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; + if (self->source == SOURCE_FILE) { + // Objects constructed from files cannot be pickled. + PyObject *pickle = PyImport_ImportModule("pickle"); + if (pickle == NULL) { + return NULL; + } + + PyObject *pickle_error = + PyObject_GetAttrString(pickle, "PicklingError"); + Py_DECREF(pickle); + if (pickle_error == NULL) { + return NULL; + } + + PyErr_Format(pickle_error, + "Cannot pickle a ZoneInfo file from a file stream."); + Py_DECREF(pickle_error); + return NULL; + } + + unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0; + PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle"); + + if (constructor == NULL) { + return NULL; + } + + PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache); + Py_DECREF(constructor); + return rv; +} + +static PyObject * +zoneinfo__unpickle(PyTypeObject *cls, PyObject *args) +{ + PyObject *key; + unsigned char from_cache; + if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) { + return NULL; + } + + if (from_cache) { + PyObject *val_args = Py_BuildValue("(O)", key); + if (val_args == NULL) { + return NULL; + } + + PyObject *rv = zoneinfo_new(cls, val_args, NULL); + + Py_DECREF(val_args); + return rv; + } + else { + return zoneinfo_new_instance(cls, key); + } +} + +/* It is relatively expensive to construct new timedelta objects, and in most + * cases we're looking at a relatively small number of timedeltas, such as + * integer number of hours, etc. We will keep a cache so that we construct + * a minimal number of these. + * + * Possibly this should be replaced with an LRU cache so that it's not possible + * for the memory usage to explode from this, but in order for this to be a + * serious problem, one would need to deliberately craft a malicious time zone + * file with many distinct offsets. As of tzdb 2019c, loading every single zone + * fills the cache with ~450 timedeltas for a total size of ~12kB. + * + * This returns a new reference to the timedelta. + */ +static PyObject * +load_timedelta(long seconds) +{ + PyObject *rv = NULL; + PyObject *pyoffset = PyLong_FromLong(seconds); + if (pyoffset == NULL) { + return NULL; + } + int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset); + if (contains == -1) { + goto error; + } + + if (!contains) { + PyObject *tmp = PyDateTimeAPI->Delta_FromDelta( + 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); + + if (tmp == NULL) { + goto error; + } + + rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp); + Py_DECREF(tmp); + } + else { + rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset); + } + + Py_DECREF(pyoffset); + Py_INCREF(rv); + return rv; +error: + Py_DECREF(pyoffset); + return NULL; +} + +/* Constructor for _ttinfo object - this starts by initializing the _ttinfo + * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially + * initialized _ttinfo objects. + */ +static int +build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out) +{ + out->utcoff = NULL; + out->dstoff = NULL; + out->tzname = NULL; + + out->utcoff_seconds = utcoffset; + out->utcoff = load_timedelta(utcoffset); + if (out->utcoff == NULL) { + return -1; + } + + out->dstoff = load_timedelta(dstoffset); + if (out->dstoff == NULL) { + return -1; + } + + out->tzname = tzname; + Py_INCREF(tzname); + + return 0; +} + +/* Decrease reference count on any non-NULL members of a _ttinfo */ +static void +xdecref_ttinfo(_ttinfo *ttinfo) +{ + if (ttinfo != NULL) { + Py_XDECREF(ttinfo->utcoff); + Py_XDECREF(ttinfo->dstoff); + Py_XDECREF(ttinfo->tzname); + } +} + +/* Equality function for _ttinfo. */ +static int +ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) +{ + int rv; + if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) < + 1) { + goto end; + } + + if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) < + 1) { + goto end; + } +end: + return rv; +} + +/* Given a file-like object, this populates a ZoneInfo object + * + * The current version calls into a Python function to read the data from + * file into Python objects, and this translates those Python objects into + * C values and calculates derived values (e.g. dstoff) in C. + * + * This returns 0 on success and -1 on failure. + * + * The function will never return while `self` is partially initialized — + * the object only needs to be freed / deallocated if this succeeds. + */ +static int +load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) +{ + PyObject *data_tuple = NULL; + + long *utcoff = NULL; + long *dstoff = NULL; + size_t *trans_idx = NULL; + unsigned char *isdst = NULL; + + self->trans_list_utc = NULL; + self->trans_list_wall[0] = NULL; + self->trans_list_wall[1] = NULL; + self->trans_ttinfos = NULL; + self->_ttinfos = NULL; + self->file_repr = NULL; + + size_t ttinfos_allocated = 0; + + data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); + if (data_tuple == NULL) { + goto error; + } + + // Unpack the data tuple + PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0); + if (trans_idx_list == NULL) { + goto error; + } + + PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1); + if (trans_utc == NULL) { + goto error; + } + + PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2); + if (utcoff_list == NULL) { + goto error; + } + + PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3); + if (isdst_list == NULL) { + goto error; + } + + PyObject *abbr = PyTuple_GetItem(data_tuple, 4); + if (abbr == NULL) { + goto error; + } + + PyObject *tz_str = PyTuple_GetItem(data_tuple, 5); + if (tz_str == NULL) { + goto error; + } + + // Load the relevant sizes + Py_ssize_t num_transitions = PyTuple_Size(trans_utc); + if (num_transitions == -1) { + goto error; + } + + Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list); + if (num_ttinfos == -1) { + goto error; + } + + self->num_transitions = (size_t)num_transitions; + self->num_ttinfos = (size_t)num_ttinfos; + + // Load the transition indices and list + self->trans_list_utc = + PyMem_Malloc(self->num_transitions * sizeof(int64_t)); + trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t)); + + for (Py_ssize_t i = 0; i < self->num_transitions; ++i) { + PyObject *num = PyTuple_GetItem(trans_utc, i); + if (num == NULL) { + goto error; + } + self->trans_list_utc[i] = PyLong_AsLongLong(num); + if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(trans_idx_list, i); + if (num == NULL) { + goto error; + } + + Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num); + if (cur_trans_idx == -1) { + goto error; + } + + trans_idx[i] = (size_t)cur_trans_idx; + if (trans_idx[i] > self->num_ttinfos) { + PyErr_Format( + PyExc_ValueError, + "Invalid transition index found while reading TZif: %zd", + cur_trans_idx); + + goto error; + } + } + + // Load UTC offsets and isdst (size num_ttinfos) + utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long)); + isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char)); + + if (utcoff == NULL || isdst == NULL) { + goto error; + } + for (Py_ssize_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *num = PyTuple_GetItem(utcoff_list, i); + if (num == NULL) { + goto error; + } + + utcoff[i] = PyLong_AsLong(num); + if (utcoff[i] == -1 && PyErr_Occurred()) { + goto error; + } + + num = PyTuple_GetItem(isdst_list, i); + if (num == NULL) { + goto error; + } + + int isdst_with_error = PyObject_IsTrue(num); + if (isdst_with_error == -1) { + goto error; + } + else { + isdst[i] = (unsigned char)isdst_with_error; + } + } + + dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long)); + if (dstoff == NULL) { + goto error; + } + + // Derive dstoff and trans_list_wall from the information we've loaded + utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions, + self->num_ttinfos); + + if (ts_to_local(trans_idx, self->trans_list_utc, utcoff, + self->trans_list_wall, self->num_ttinfos, + self->num_transitions)) { + goto error; + } + + // Build _ttinfo objects from utcoff, dstoff and abbr + self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo)); + for (size_t i = 0; i < self->num_ttinfos; ++i) { + PyObject *tzname = PyTuple_GetItem(abbr, i); + if (tzname == NULL) { + goto error; + } + + ttinfos_allocated++; + if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) { + goto error; + } + } + + // Build our mapping from transition to the ttinfo that applies + self->trans_ttinfos = + PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *)); + for (size_t i = 0; i < self->num_transitions; ++i) { + size_t ttinfo_idx = trans_idx[i]; + assert(ttinfo_idx < self->num_ttinfos); + self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]); + } + + // Set ttinfo_before to the first non-DST transition + for (size_t i = 0; i < self->num_ttinfos; ++i) { + if (!isdst[i]) { + self->ttinfo_before = &(self->_ttinfos[i]); + break; + } + } + + // If there are only DST ttinfos, pick the first one, if there are no + // ttinfos at all, set ttinfo_before to NULL + if (self->ttinfo_before == NULL && self->num_ttinfos > 0) { + self->ttinfo_before = &(self->_ttinfos[0]); + } + + if (tz_str != Py_None && PyObject_IsTrue(tz_str)) { + if (parse_tz_str(tz_str, &(self->tzrule_after))) { + goto error; + } + } + else { + if (!self->num_ttinfos) { + PyErr_Format(PyExc_ValueError, "No time zone information found."); + goto error; + } + + size_t idx; + if (!self->num_transitions) { + idx = self->num_ttinfos - 1; + } + else { + idx = trans_idx[self->num_transitions - 1]; + } + + _ttinfo *tti = &(self->_ttinfos[idx]); + build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL, + &(self->tzrule_after)); + + // We've abused the build_tzrule constructor to construct an STD-only + // rule mimicking whatever ttinfo we've picked up, but it's possible + // that the one we've picked up is a DST zone, so we need to make sure + // that the dstoff is set correctly in that case. + if (PyObject_IsTrue(tti->dstoff)) { + _ttinfo *tti_after = &(self->tzrule_after.std); + Py_DECREF(tti_after->dstoff); + tti_after->dstoff = tti->dstoff; + Py_INCREF(tti_after->dstoff); + } + } + + // Determine if this is a "fixed offset" zone, meaning that the output of + // the utcoffset, dst and tzname functions does not depend on the specific + // datetime passed. + // + // We make three simplifying assumptions here: + // + // 1. If tzrule_after is not std_only, it has transitions that might occur + // (it is possible to construct TZ strings that specify STD and DST but + // no transitions ever occur, such as AAA0BBB,0/0,J365/25). + // 2. If self->_ttinfos contains more than one _ttinfo object, the objects + // represent different offsets. + // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise + // fixed-offset zone with extra _ttinfos defined may appear to *not* be + // a fixed offset zone). + // + // Violations to these assumptions would be fairly exotic, and exotic + // zones should almost certainly not be used with datetime.time (the + // only thing that would be affected by this). + if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) { + self->fixed_offset = 0; + } + else if (self->num_ttinfos == 0) { + self->fixed_offset = 1; + } + else { + int constant_offset = + ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std); + if (constant_offset < 0) { + goto error; + } + else { + self->fixed_offset = constant_offset; + } + } + + int rv = 0; + goto cleanup; +error: + // These resources only need to be freed if we have failed, if we succeed + // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc + // method to free the relevant resources. + if (self->trans_list_utc != NULL) { + PyMem_Free(self->trans_list_utc); + self->trans_list_utc = NULL; + } + + for (size_t i = 0; i < 2; ++i) { + if (self->trans_list_wall[i] != NULL) { + PyMem_Free(self->trans_list_wall[i]); + self->trans_list_wall[i] = NULL; + } + } + + if (self->_ttinfos != NULL) { + for (size_t i = 0; i < ttinfos_allocated; ++i) { + xdecref_ttinfo(&(self->_ttinfos[i])); + } + PyMem_Free(self->_ttinfos); + self->_ttinfos = NULL; + } + + if (self->trans_ttinfos != NULL) { + PyMem_Free(self->trans_ttinfos); + self->trans_ttinfos = NULL; + } + + rv = -1; +cleanup: + Py_XDECREF(data_tuple); + + if (utcoff != NULL) { + PyMem_Free(utcoff); + } + + if (dstoff != NULL) { + PyMem_Free(dstoff); + } + + if (isdst != NULL) { + PyMem_Free(isdst); + } + + if (trans_idx != NULL) { + PyMem_Free(trans_idx); + } + + return rv; +} + +/* Function to calculate the local timestamp of a transition from the year. */ +int64_t +calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + CalendarRule *self = (CalendarRule *)base_self; + + // We want (year, month, day of month); we have year and month, but we + // need to turn (week, day-of-week) into day-of-month + // + // Week 1 is the first week in which day `day` (where 0 = Sunday) appears. + // Week 5 represents the last occurrence of day `day`, so we need to know + // the first weekday of the month and the number of days in the month. + int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7; + uint8_t days_in_month = DAYS_IN_MONTH[self->month]; + if (self->month == 2 && is_leap_year(year)) { + days_in_month += 1; + } + + // This equation seems magical, so I'll break it down: + // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day + // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent + // because this math is mod 7 + // 2. Get first day - desired day mod 7 (adjusting by 7 for negative + // numbers so that -1 % 7 = 6). + // 3. Add 1 because month days are a 1-based index. + int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7; + if (month_day < 0) { + month_day += 7; + } + month_day += 1; + + // Now use a 0-based index version of `week` to calculate the w-th + // occurrence of `day` + month_day += ((int8_t)(self->week) - 1) * 7; + + // month_day will only be > days_in_month if w was 5, and `w` means "last + // occurrence of `d`", so now we just check if we over-shot the end of the + // month and if so knock off 1 week. + if (month_day > days_in_month) { + month_day -= 7; + } + + int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; + return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + + (int64_t)(self->minute * 60) + (int64_t)(self->second)); +} + +/* Constructor for CalendarRule. */ +int +calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, + int8_t minute, int8_t second, CalendarRule *out) +{ + // These bounds come from the POSIX standard, which describes an Mm.n.d + // rule as: + // + // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <= + // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which + // may occur in either the fourth or the fifth week). Week 1 is the first + // week in which the d'th day occurs. Day zero is Sunday. + if (month <= 0 || month > 12) { + PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); + return -1; + } + + if (week <= 0 || week > 5) { + PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); + return -1; + } + + // day is an unsigned integer, so day < 0 should always return false, but + // if day's type changes to a signed integer *without* changing this value, + // it may create a bug. Considering that the compiler should be able to + // optimize out the first comparison if day is an unsigned integer anyway, + // we will leave this comparison in place and disable the compiler warning. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + if (day < 0 || day > 6) { +#pragma GCC diagnostic pop + PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); + return -1; + } + + TransitionRuleType base = {&calendarrule_year_to_timestamp}; + + CalendarRule new_offset = { + .base = base, + .month = month, + .week = week, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = new_offset; + return 0; +} + +/* Function to calculate the local timestamp of a transition from the year. + * + * This translates the day of the year into a local timestamp — either a + * 1-based Julian day, not including leap days, or the 0-based year-day, + * including leap days. + * */ +int64_t +dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) +{ + // The function signature requires a TransitionRuleType pointer, but this + // function is only applicable to DayRule* objects. + DayRule *self = (DayRule *)base_self; + + // ymd_to_ord calculates the number of days since 0001-01-01, but we want + // to know the number of days since 1970-01-01, so we must subtract off + // the equivalent of ymd_to_ord(1970, 1, 1). + // + // We subtract off an additional 1 day to account for January 1st (we want + // the number of full days *before* the date of the transition - partial + // days are accounted for in the hour, minute and second portions. + int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1; + + // The Julian day specification skips over February 29th in leap years, + // from the POSIX standard: + // + // Leap days shall not be counted. That is, in all years-including leap + // years-February 28 is day 59 and March 1 is day 60. It is impossible to + // refer explicitly to the occasional February 29. + // + // This is actually more useful than you'd think — if you want a rule that + // always transitions on a given calendar day (other than February 29th), + // you would use a Julian day, e.g. J91 always refers to April 1st and J365 + // always refers to December 31st. + unsigned int day = self->day; + if (self->julian && day >= 59 && is_leap_year(year)) { + day += 1; + } + + return ((days_before_year + day) * 86400) + (self->hour * 3600) + + (self->minute * 60) + self->second; +} + +/* Constructor for DayRule. */ +static int +dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, + int8_t second, DayRule *out) +{ + // The POSIX standard specifies that Julian days must be in the range (1 <= + // n <= 365) and that non-Julian (they call it "0-based Julian") days must + // be in the range (0 <= n <= 365). + if (day < julian || day > 365) { + PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", + julian, day); + return -1; + } + + TransitionRuleType base = { + &dayrule_year_to_timestamp, + }; + + DayRule tmp = { + .base = base, + .julian = julian, + .day = day, + .hour = hour, + .minute = minute, + .second = second, + }; + + *out = tmp; + + return 0; +} + +/* Calculate the start and end rules for a _tzrule in the given year. */ +static void +tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end) +{ + assert(rule->start != NULL); + assert(rule->end != NULL); + *start = rule->start->year_to_timestamp(rule->start, year); + *end = rule->end->year_to_timestamp(rule->end, year); +} + +/* Calculate the _ttinfo that applies at a given local time from a _tzrule. + * + * This takes a local timestamp and fold for disambiguation purposes; the year + * could technically be calculated from the timestamp, but given that the + * callers of this function already have the year information accessible from + * the datetime struct, it is taken as an additional parameter to reduce + * unncessary calculation. + * */ +static _ttinfo * +find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year) +{ + if (rule->std_only) { + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + + tzrule_transitions(rule, year, &start, &end); + + // With fold = 0, the period (denominated in local time) with the smaller + // offset starts at the end of the gap and ends at the end of the fold; + // with fold = 1, it runs from the start of the gap to the beginning of the + // fold. + // + // So in order to determine the DST boundaries we need to know both the + // fold and whether DST is positive or negative (rare), and it turns out + // that this boils down to fold XOR is_positive. + if (fold == (rule->dst_diff >= 0)) { + end -= rule->dst_diff; + } + else { + start += rule->dst_diff; + } + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time. + * + * This function can determine the _ttinfo that applies at a given epoch time, + * (analogous to trans_list_utc), and whether or not the datetime is in a fold. + * This is to be used in the .fromutc() function. + * + * The year is technically a redundant parameter, because it can be calculated + * from the timestamp, but all callers of this function should have the year + * in the datetime struct anyway, so taking it as a parameter saves unnecessary + * calculation. + **/ +static _ttinfo * +find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, + unsigned char *fold) +{ + if (rule->std_only) { + *fold = 0; + return &(rule->std); + } + + int64_t start, end; + uint8_t isdst; + tzrule_transitions(rule, year, &start, &end); + start -= rule->std.utcoff_seconds; + end -= rule->dst.utcoff_seconds; + + if (start < end) { + isdst = (ts >= start) && (ts < end); + } + else { + isdst = (ts < end) || (ts >= start); + } + + // For positive DST, the ambiguous period is one dst_diff after the end of + // DST; for negative DST, the ambiguous period is one dst_diff before the + // start of DST. + int64_t ambig_start, ambig_end; + if (rule->dst_diff > 0) { + ambig_start = end; + ambig_end = end + rule->dst_diff; + } + else { + ambig_start = start; + ambig_end = start - rule->dst_diff; + } + + *fold = (ts >= ambig_start) && (ts < ambig_end); + + if (isdst) { + return &(rule->dst); + } + else { + return &(rule->std); + } +} + +/* Parse a TZ string in the format specified by the POSIX standard: + * + * std offset[dst[offset],start[/time],end[/time]] + * + * std and dst must be 3 or more characters long and must not contain a + * leading colon, embedded digits, commas, nor a plus or minus signs; The + * spaces between "std" and "offset" are only for display and are not actually + * present in the string. + * + * The format of the offset is ``[+|-]hh[:mm[:ss]]`` + * + * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3: + * + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html + */ +static int +parse_tz_str(PyObject *tz_str_obj, _tzrule *out) +{ + PyObject *std_abbr = NULL; + PyObject *dst_abbr = NULL; + TransitionRuleType *start = NULL; + TransitionRuleType *end = NULL; + long std_offset, dst_offset; + + char *tz_str = PyBytes_AsString(tz_str_obj); + if (tz_str == NULL) { + return -1; + } + char *p = tz_str; + + // Read the `std` abbreviation, which must be at least 3 characters long. + ssize_t num_chars = parse_abbr(p, &std_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); + goto error; + } + + p += num_chars; + + // Now read the STD offset, which is required + num_chars = parse_tz_delta(p, &std_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); + goto error; + } + p += num_chars; + + // If the string ends here, there is no DST, otherwise we must parse the + // DST abbreviation and start and end dates and times. + if (*p == '\0') { + goto complete; + } + + num_chars = parse_abbr(p, &dst_abbr); + if (num_chars < 1) { + PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); + goto error; + } + p += num_chars; + + if (*p == ',') { + // From the POSIX standard: + // + // If no offset follows dst, the alternative time is assumed to be one + // hour ahead of standard time. + dst_offset = std_offset + 3600; + } + else { + num_chars = parse_tz_delta(p, &dst_offset); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", + tz_str_obj); + goto error; + } + + p += num_chars; + } + + TransitionRuleType **transitions[2] = {&start, &end}; + for (size_t i = 0; i < 2; ++i) { + if (*p != ',') { + PyErr_Format(PyExc_ValueError, + "Missing transition rules in TZ string: %R", + tz_str_obj); + goto error; + } + p++; + + num_chars = parse_transition_rule(p, transitions[i]); + if (num_chars < 0) { + PyErr_Format(PyExc_ValueError, + "Malformed transition rule in TZ string: %R", + tz_str_obj); + goto error; + } + p += num_chars; + } + + if (*p != '\0') { + PyErr_Format(PyExc_ValueError, + "Extraneous characters at end of TZ string: %R", + tz_str_obj); + goto error; + } + +complete: + build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out); + Py_DECREF(std_abbr); + Py_XDECREF(dst_abbr); + + return 0; +error: + Py_XDECREF(std_abbr); + if (dst_abbr != NULL && dst_abbr != Py_None) { + Py_DECREF(dst_abbr); + } + + if (start != NULL) { + PyMem_Free(start); + } + + if (end != NULL) { + PyMem_Free(end); + } + + return -1; +} + +static ssize_t +parse_uint(const char *const p) +{ + if (!isdigit(*p)) { + return -1; + } + + return (*p) - '0'; +} + +/* Parse the STD and DST abbreviations from a TZ string. */ +static ssize_t +parse_abbr(const char *const p, PyObject **abbr) +{ + const char *ptr = p; + char buff = *ptr; + const char *str_start; + const char *str_end; + + if (*ptr == '<') { + ptr++; + str_start = ptr; + while ((buff = *ptr) != '>') { + // From the POSIX standard: + // + // In the quoted form, the first character shall be the less-than + // ( '<' ) character and the last character shall be the + // greater-than ( '>' ) character. All characters between these + // quoting characters shall be alphanumeric characters from the + // portable character set in the current locale, the plus-sign ( + // '+' ) character, or the minus-sign ( '-' ) character. The std + // and dst fields in this case shall not include the quoting + // characters. + if (!isalpha(buff) && !isdigit(buff) && buff != '+' && + buff != '-') { + return -1; + } + ptr++; + } + str_end = ptr; + ptr++; + } + else { + str_start = p; + // From the POSIX standard: + // + // In the unquoted form, all characters in these fields shall be + // alphabetic characters from the portable character set in the + // current locale. + while (isalpha(*ptr)) { + ptr++; + } + str_end = ptr; + } + + *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); + if (abbr == NULL) { + return -1; + } + + return ptr - p; +} + +/* Parse a UTC offset from a TZ str. */ +static ssize_t +parse_tz_delta(const char *const p, long *total_seconds) +{ + // From the POSIX spec: + // + // Indicates the value added to the local time to arrive at Coordinated + // Universal Time. The offset has the form: + // + // hh[:mm[:ss]] + // + // One or more digits may be used; the value is always interpreted as a + // decimal number. + // + // The POSIX spec says that the values for `hour` must be between 0 and 24 + // hours, but RFC 8536 §3.3.1 specifies that the hours part of the + // transition times may be signed and range from -167 to 167. + long sign = -1; + long hours = 0; + long minutes = 0; + long seconds = 0; + + const char *ptr = p; + char buff = *ptr; + if (buff == '-' || buff == '+') { + // Negative numbers correspond to *positive* offsets, from the spec: + // + // If preceded by a '-', the timezone shall be east of the Prime + // Meridian; otherwise, it shall be west (which may be indicated by + // an optional preceding '+' ). + if (buff == '-') { + sign = 1; + } + + ptr++; + } + + // The hour can be 1 or 2 numeric characters + for (size_t i = 0; i < 2; ++i) { + buff = *ptr; + if (!isdigit(buff)) { + if (i == 0) { + return -1; + } + else { + break; + } + } + + hours *= 10; + hours += buff - '0'; + ptr++; + } + + if (hours > 24 || hours < 0) { + return -1; + } + + // Minutes and seconds always of the format ":dd" + long *outputs[2] = {&minutes, &seconds}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != ':') { + goto complete; + } + ptr++; + + for (size_t j = 0; j < 2; ++j) { + buff = *ptr; + if (!isdigit(buff)) { + return -1; + } + *(outputs[i]) *= 10; + *(outputs[i]) += buff - '0'; + ptr++; + } + } + +complete: + *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); + + return ptr - p; +} + +/* Parse the date portion of a transition rule. */ +static ssize_t +parse_transition_rule(const char *const p, TransitionRuleType **out) +{ + // The full transition rule indicates when to change back and forth between + // STD and DST, and has the form: + // + // date[/time],date[/time] + // + // This function parses an individual date[/time] section, and returns + // the number of characters that contributed to the transition rule. This + // does not include the ',' at the end of the first rule. + // + // The POSIX spec states that if *time* is not given, the default is 02:00. + const char *ptr = p; + int8_t hour = 2; + int8_t minute = 0; + int8_t second = 0; + + // Rules come in one of three flavors: + // + // 1. Jn: Julian day n, with no leap days. + // 2. n: Day of year (0-based, with leap days) + // 3. Mm.n.d: Specifying by month, week and day-of-week. + + if (*ptr == 'M') { + uint8_t month, week, day; + ptr++; + ssize_t tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + month = (uint8_t)tmp; + ptr++; + if (*ptr != '.') { + tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + + month *= 10; + month += (uint8_t)tmp; + ptr++; + } + + uint8_t *values[2] = {&week, &day}; + for (size_t i = 0; i < 2; ++i) { + if (*ptr != '.') { + return -1; + } + ptr++; + + tmp = parse_uint(ptr); + if (tmp < 0) { + return -1; + } + ptr++; + + *(values[i]) = tmp; + } + + if (*ptr == '/') { + ptr++; + ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); + if (rv == NULL) { + return -1; + } + + if (calendarrule_new(month, week, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + + *out = (TransitionRuleType *)rv; + } + else { + uint8_t julian = 0; + unsigned int day = 0; + if (*ptr == 'J') { + julian = 1; + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (!isdigit(*ptr)) { + if (i == 0) { + return -1; + } + break; + } + day *= 10; + day += (*ptr) - '0'; + ptr++; + } + + if (*ptr == '/') { + ptr++; + ssize_t num_chars = + parse_transition_time(ptr, &hour, &minute, &second); + if (num_chars < 0) { + return -1; + } + ptr += num_chars; + } + + DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); + if (rv == NULL) { + return -1; + } + + if (dayrule_new(julian, day, hour, minute, second, rv)) { + PyMem_Free(rv); + return -1; + } + *out = (TransitionRuleType *)rv; + } + + return ptr - p; +} + +/* Parse the time portion of a transition rule (e.g. following an /) */ +static ssize_t +parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, + int8_t *second) +{ + // From the spec: + // + // The time has the same format as offset except that no leading sign + // ( '-' or '+' ) is allowed. + // + // The format for the offset is: + // + // h[h][:mm[:ss]] + // + // RFC 8536 also allows transition times to be signed and to range from + // -167 to +167, but the current version only supports [0, 99]. + // + // TODO: Support the full range of transition hours. + int8_t *components[3] = {hour, minute, second}; + const char *ptr = p; + int8_t sign = 1; + + if (*ptr == '-' || *ptr == '+') { + if (*ptr == '-') { + sign = -1; + } + ptr++; + } + + for (size_t i = 0; i < 3; ++i) { + if (i > 0) { + if (*ptr != ':') { + break; + } + ptr++; + } + + uint8_t buff = 0; + for (size_t j = 0; j < 2; j++) { + if (!isdigit(*ptr)) { + if (i == 0 && j > 0) { + break; + } + return -1; + } + + buff *= 10; + buff += (*ptr) - '0'; + ptr++; + } + + *(components[i]) = sign * buff; + } + + return ptr - p; +} + +/* Constructor for a _tzrule. + * + * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which + * case `dst_offset` will be ignored and `start` and `end` are expected to be + * NULL as well. + * + * Returns 0 on success. + */ +static int +build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, + long dst_offset, TransitionRuleType *start, + TransitionRuleType *end, _tzrule *out) +{ + _tzrule rv = {0}; + + rv.start = start; + rv.end = end; + + if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) { + goto error; + } + + if (dst_abbr != NULL) { + rv.dst_diff = dst_offset - std_offset; + if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) { + goto error; + } + } + else { + rv.std_only = 1; + } + + *out = rv; + + return 0; +error: + xdecref_ttinfo(&rv.std); + xdecref_ttinfo(&rv.dst); + return -1; +} + +/* Destructor for _tzrule. */ +static void +free_tzrule(_tzrule *tzrule) +{ + xdecref_ttinfo(&(tzrule->std)); + if (!tzrule->std_only) { + xdecref_ttinfo(&(tzrule->dst)); + } + + if (tzrule->start != NULL) { + PyMem_Free(tzrule->start); + } + + if (tzrule->end != NULL) { + PyMem_Free(tzrule->end); + } +} + +/* Calculate DST offsets from transitions and UTC offsets + * + * This is necessary because each C `ttinfo` only contains the UTC offset, + * time zone abbreviation and an isdst boolean - it does not include the + * amount of the DST offset, but we need the amount for the dst() function. + * + * Thus function uses heuristics to infer what the offset should be, so it + * is not guaranteed that this will work for all zones. If we cannot assign + * a value for a given DST offset, we'll assume it's 1H rather than 0H, so + * bool(dt.dst()) will always match ttinfo.isdst. + */ +static void +utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, + unsigned char *isdsts, size_t num_transitions, + size_t num_ttinfos) +{ + size_t dst_count = 0; + size_t dst_found = 0; + for (size_t i = 0; i < num_ttinfos; ++i) { + dst_count++; + } + + for (size_t i = 1; i < num_transitions; ++i) { + if (dst_count == dst_found) { + break; + } + + size_t idx = trans_idx[i]; + size_t comp_idx = trans_idx[i - 1]; + + // Only look at DST offsets that have nto been assigned already + if (!isdsts[idx] || dstoffs[idx] != 0) { + continue; + } + + long dstoff = 0; + long utcoff = utcoffs[idx]; + + if (!isdsts[comp_idx]) { + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (!dstoff && idx < (num_ttinfos - 1)) { + comp_idx = trans_idx[i + 1]; + + // If the following transition is also DST and we couldn't find + // the DST offset by this point, we're going to have to skip it + // and hope this transition gets assigned later + if (isdsts[comp_idx]) { + continue; + } + + dstoff = utcoff - utcoffs[comp_idx]; + } + + if (dstoff) { + dst_found++; + dstoffs[idx] = dstoff; + } + } + + if (dst_found < dst_count) { + // If there are time zones we didn't find a value for, we'll end up + // with dstoff = 0 for something where isdst=1. This is obviously + // wrong — one hour will be a much better guess than 0. + for (size_t idx = 0; idx < num_ttinfos; ++idx) { + if (isdsts[idx] && !dstoffs[idx]) { + dstoffs[idx] = 3600; + } + } + } +} + +#define _swap(x, y, buffer) \ + buffer = x; \ + x = y; \ + y = buffer; + +/* Calculate transitions in local time from UTC time and offsets. + * + * We want to know when each transition occurs, denominated in the number of + * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in + * *local time* (note: this is *not* equivalent to the output of + * datetime.timestamp, which is the total number of seconds actual elapsed + * since 1970-01-01T00:00:00Z in UTC). + * + * This is an ambiguous question because "local time" can be ambiguous — but it + * is disambiguated by the `fold` parameter, so we allocate two arrays: + * + * trans_local[0]: The wall-time transitions for fold=0 + * trans_local[1]: The wall-time transitions for fold=1 + * + * This returns 0 on success and a negative number of failure. The trans_local + * arrays must be freed if they are not NULL. + */ +static int +ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, + int64_t *trans_local[2], size_t num_ttinfos, + size_t num_transitions) +{ + if (num_transitions == 0) { + return 0; + } + + // Copy the UTC transitions into each array to be modified in place later + for (size_t i = 0; i < 2; ++i) { + trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t)); + if (trans_local[i] == NULL) { + return -1; + } + + memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t)); + } + + int64_t offset_0, offset_1, buff; + if (num_ttinfos > 1) { + offset_0 = utcoff[0]; + offset_1 = utcoff[trans_idx[0]]; + + if (offset_1 > offset_0) { + _swap(offset_0, offset_1, buff); + } + } + else { + offset_0 = utcoff[0]; + offset_1 = utcoff[0]; + } + + trans_local[0][0] += offset_0; + trans_local[1][0] += offset_1; + + for (size_t i = 1; i < num_transitions; ++i) { + offset_0 = utcoff[trans_idx[i - 1]]; + offset_1 = utcoff[trans_idx[i]]; + + if (offset_1 > offset_0) { + _swap(offset_1, offset_0, buff); + } + + trans_local[0][i] += offset_0; + trans_local[1][i] += offset_1; + } + + return 0; +} + +/* Simple bisect_right binary search implementation */ +static size_t +_bisect(const int64_t value, const int64_t *arr, size_t size) +{ + size_t lo = 0; + size_t hi = size; + size_t m; + + while (lo < hi) { + m = (lo + hi) / 2; + if (arr[m] > value) { + hi = m; + } + else { + lo = m + 1; + } + } + + return hi; +} + +/* Find the ttinfo rules that apply at a given local datetime. */ +static _ttinfo * +find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt) +{ + // datetime.time has a .tzinfo attribute that passes None as the dt + // argument; it only really has meaning for fixed-offset zones. + if (dt == Py_None) { + if (self->fixed_offset) { + return &(self->tzrule_after.std); + } + else { + return &NO_TTINFO; + } + } + + int64_t ts; + if (get_local_timestamp(dt, &ts)) { + return NULL; + } + + unsigned char fold = PyDateTime_DATE_GET_FOLD(dt); + assert(fold < 2); + int64_t *local_transitions = self->trans_list_wall[fold]; + size_t num_trans = self->num_transitions; + + if (num_trans && ts < local_transitions[0]) { + return self->ttinfo_before; + } + else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) { + return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold, + PyDateTime_GET_YEAR(dt)); + } + else { + size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1; + assert(idx < self->num_transitions); + return self->trans_ttinfos[idx]; + } +} + +static int +is_leap_year(int year) +{ + const unsigned int ayear = (unsigned int)year; + return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0); +} + +/* Calculates ordinal datetime from year, month and day. */ +static int +ymd_to_ord(int y, int m, int d) +{ + y -= 1; + int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400); + int yearday = DAYS_BEFORE_MONTH[m]; + if (m > 2 && is_leap_year(y + 1)) { + yearday += 1; + } + + return days_before_year + yearday + d; +} + +/* Calculate the number of seconds since 1970-01-01 in local time. + * + * This gets a datetime in the same "units" as self->trans_list_wall so that we + * can easily determine which transitions a datetime falls between. See the + * comment above ts_to_local for more information. + * */ +static int +get_local_timestamp(PyObject *dt, int64_t *local_ts) +{ + assert(local_ts != NULL); + + int hour, minute, second; + int ord; + if (PyDateTime_CheckExact(dt)) { + int y = PyDateTime_GET_YEAR(dt); + int m = PyDateTime_GET_MONTH(dt); + int d = PyDateTime_GET_DAY(dt); + hour = PyDateTime_DATE_GET_HOUR(dt); + minute = PyDateTime_DATE_GET_MINUTE(dt); + second = PyDateTime_DATE_GET_SECOND(dt); + + ord = ymd_to_ord(y, m, d); + } + else { + PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL); + if (num == NULL) { + return -1; + } + + ord = PyLong_AsLong(num); + Py_DECREF(num); + if (ord == -1 && PyErr_Occurred()) { + return -1; + } + + num = PyObject_GetAttrString(dt, "hour"); + if (num == NULL) { + return -1; + } + hour = PyLong_AsLong(num); + Py_DECREF(num); + if (hour == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "minute"); + if (num == NULL) { + return -1; + } + minute = PyLong_AsLong(num); + Py_DECREF(num); + if (minute == -1) { + return -1; + } + + num = PyObject_GetAttrString(dt, "second"); + if (num == NULL) { + return -1; + } + second = PyLong_AsLong(num); + Py_DECREF(num); + if (second == -1) { + return -1; + } + } + + *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + + (int64_t)(hour * 3600 + minute * 60 + second); + + return 0; +} + +///// +// Functions for cache handling + +/* Constructor for StrongCacheNode */ +static StrongCacheNode * +strong_cache_node_new(PyObject *key, PyObject *zone) +{ + StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode)); + if (node == NULL) { + return NULL; + } + + Py_INCREF(key); + Py_INCREF(zone); + + node->next = NULL; + node->prev = NULL; + node->key = key; + node->zone = zone; + + return node; +} + +/* Destructor for StrongCacheNode */ +void +strong_cache_node_free(StrongCacheNode *node) +{ + Py_XDECREF(node->key); + Py_XDECREF(node->zone); + + PyMem_Free(node); +} + +/* Frees all nodes at or after a specified root in the strong cache. + * + * This can be used on the root node to free the entire cache or it can be used + * to clear all nodes that have been expired (which, if everything is going + * right, will actually only be 1 node at a time). + */ +void +strong_cache_free(StrongCacheNode *root) +{ + StrongCacheNode *node = root; + StrongCacheNode *next_node; + while (node != NULL) { + next_node = node->next; + strong_cache_node_free(node); + + node = next_node; + } +} + +/* Removes a node from the cache and update its neighbors. + * + * This is used both when ejecting a node from the cache and when moving it to + * the front of the cache. + */ +static void +remove_from_strong_cache(StrongCacheNode *node) +{ + if (ZONEINFO_STRONG_CACHE == node) { + ZONEINFO_STRONG_CACHE = node->next; + } + + if (node->prev != NULL) { + node->prev->next = node->next; + } + + if (node->next != NULL) { + node->next->prev = node->prev; + } + + node->next = NULL; + node->prev = NULL; +} + +/* Retrieves the node associated with a key, if it exists. + * + * This traverses the strong cache until it finds a matching key and returns a + * pointer to the relevant node if found. Returns NULL if no node is found. + * + * root may be NULL, indicating an empty cache. + */ +static StrongCacheNode * +find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key) +{ + const StrongCacheNode *node = root; + while (node != NULL) { + if (PyObject_RichCompareBool(key, node->key, Py_EQ)) { + return (StrongCacheNode *)node; + } + + node = node->next; + } + + return NULL; +} + +/* Ejects a given key from the class's strong cache, if applicable. + * + * This function is used to enable the per-key functionality in clear_cache. + */ +static void +eject_from_strong_cache(const PyTypeObject *const type, PyObject *key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + if (node != NULL) { + remove_from_strong_cache(node); + + strong_cache_node_free(node); + } +} + +/* Moves a node to the front of the LRU cache. + * + * The strong cache is an LRU cache, so whenever a given node is accessed, if + * it is not at the front of the cache, it needs to be moved there. + */ +static void +move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node) +{ + StrongCacheNode *root_p = *root; + if (root_p == node) { + return; + } + + remove_from_strong_cache(node); + + node->prev = NULL; + node->next = root_p; + + if (root_p != NULL) { + root_p->prev = node; + } + + *root = node; +} + +/* Retrieves a ZoneInfo from the strong cache if it's present. + * + * This function finds the ZoneInfo by key and if found will move the node to + * the front of the LRU cache and return a new reference to it. It returns NULL + * if the key is not in the cache. + * + * The strong cache is currently only implemented for the base class, so this + * always returns a cache miss for subclasses. + */ +static PyObject * +zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return NULL; // Strong cache currently only implemented for base class + } + + StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); + + if (node != NULL) { + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node); + Py_INCREF(node->zone); + return node->zone; + } + + return NULL; // Cache miss +} + +/* Inserts a new key into the strong LRU cache. + * + * This function is only to be used after a cache miss — it creates a new node + * at the front of the cache and ejects any stale entries (keeping the size of + * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE). + */ +static void +update_strong_cache(const PyTypeObject *const type, PyObject *key, + PyObject *zone) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + StrongCacheNode *new_node = strong_cache_node_new(key, zone); + + move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node); + + StrongCacheNode *node = new_node->next; + for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) { + if (node == NULL) { + return; + } + node = node->next; + } + + // Everything beyond this point needs to be freed + if (node != NULL) { + if (node->prev != NULL) { + node->prev->next = NULL; + } + strong_cache_free(node); + } +} + +/* Clears all entries into a type's strong cache. + * + * Because the strong cache is not implemented for subclasses, this is a no-op + * for everything except the base class. + */ +void +clear_strong_cache(const PyTypeObject *const type) +{ + if (type != &PyZoneInfo_ZoneInfoType) { + return; + } + + strong_cache_free(ZONEINFO_STRONG_CACHE); +} + +static PyObject * +new_weak_cache() +{ + PyObject *weakref_module = PyImport_ImportModule("weakref"); + if (weakref_module == NULL) { + return NULL; + } + + PyObject *weak_cache = + PyObject_CallMethod(weakref_module, "WeakValueDictionary", ""); + Py_DECREF(weakref_module); + return weak_cache; +} + +static int +initialize_caches() +{ + if (TIMEDELTA_CACHE == NULL) { + TIMEDELTA_CACHE = PyDict_New(); + } + else { + Py_INCREF(TIMEDELTA_CACHE); + } + + if (TIMEDELTA_CACHE == NULL) { + return -1; + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + ZONEINFO_WEAK_CACHE = new_weak_cache(); + } + else { + Py_INCREF(ZONEINFO_WEAK_CACHE); + } + + if (ZONEINFO_WEAK_CACHE == NULL) { + return -1; + } + + return 0; +} + +static PyObject * +zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs) +{ + PyObject *weak_cache = new_weak_cache(); + if (weak_cache == NULL) { + return NULL; + } + + PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache); + Py_RETURN_NONE; +} + +///// +// Specify the ZoneInfo type +static PyMethodDef zoneinfo_methods[] = { + {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Clear the ZoneInfo cache.")}, + {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")}, + {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file, + METH_VARARGS | METH_KEYWORDS | METH_CLASS, + PyDoc_STR("Create a ZoneInfo file from a file object.")}, + {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O, + PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at " + "the given datetime.")}, + {"dst", (PyCFunction)zoneinfo_dst, METH_O, + PyDoc_STR("Retrieve a timedelta representing the amount of DST applied " + "in a zone at the given datetime.")}, + {"tzname", (PyCFunction)zoneinfo_tzname, METH_O, + PyDoc_STR("Retrieve a string containing the abbreviation for the time " + "zone that applies in a zone at a given datetime.")}, + {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O, + PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted " + "datetime in local time.")}, + {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS, + PyDoc_STR("Function for serialization with the pickle protocol.")}, + {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS, + PyDoc_STR("Private method used in unpickling.")}, + {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass, + METH_VARARGS | METH_KEYWORDS, + PyDoc_STR("Function to initialize subclasses.")}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef zoneinfo_members[] = { + {"key", /* name */ + offsetof(PyZoneInfo_ZoneInfo, key), /* offset */ + T_OBJECT_EX, /* type */ + READONLY, /* flags */ + NULL /* docstring */}, + {NULL}, /* Sentinel */ +}; + +static PyTypeObject PyZoneInfo_ZoneInfoType = { + PyVarObject_HEAD_INIT(NULL, 0) // + .tp_name = "zoneinfo.ZoneInfo", + .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo), + .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist), + .tp_repr = (reprfunc)zoneinfo_repr, + .tp_str = (reprfunc)zoneinfo_str, + .tp_getattro = PyObject_GenericGetAttr, + .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), + /* .tp_doc = zoneinfo_doc, */ + .tp_methods = zoneinfo_methods, + .tp_members = zoneinfo_members, + .tp_new = zoneinfo_new, + .tp_dealloc = zoneinfo_dealloc, +}; + +///// +// Specify the zoneinfo._czoneinfo module +static PyMethodDef module_methods[] = {{NULL, NULL}}; +static void +module_free() +{ + Py_XDECREF(_tzpath_find_tzfile); + _tzpath_find_tzfile = NULL; + + Py_XDECREF(_common_mod); + _common_mod = NULL; + + Py_XDECREF(io_open); + io_open = NULL; + + xdecref_ttinfo(&NO_TTINFO); + + Py_XDECREF(TIMEDELTA_CACHE); + if (!Py_REFCNT(TIMEDELTA_CACHE)) { + TIMEDELTA_CACHE = NULL; + } + + Py_XDECREF(ZONEINFO_WEAK_CACHE); + if (!Py_REFCNT(ZONEINFO_WEAK_CACHE)) { + ZONEINFO_WEAK_CACHE = NULL; + } + + strong_cache_free(ZONEINFO_STRONG_CACHE); + ZONEINFO_STRONG_CACHE = NULL; +} + +static int +zoneinfomodule_exec(PyObject *m) +{ + PyDateTime_IMPORT; + PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType; + if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) { + goto error; + } + + Py_INCREF(&PyZoneInfo_ZoneInfoType); + PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType); + + /* Populate imports */ + PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath"); + if (_tzpath_module == NULL) { + goto error; + } + + _tzpath_find_tzfile = + PyObject_GetAttrString(_tzpath_module, "find_tzfile"); + Py_DECREF(_tzpath_module); + if (_tzpath_find_tzfile == NULL) { + goto error; + } + + PyObject *io_module = PyImport_ImportModule("io"); + if (io_module == NULL) { + goto error; + } + + io_open = PyObject_GetAttrString(io_module, "open"); + Py_DECREF(io_module); + if (io_open == NULL) { + goto error; + } + + _common_mod = PyImport_ImportModule("zoneinfo._common"); + if (_common_mod == NULL) { + goto error; + } + + if (NO_TTINFO.utcoff == NULL) { + NO_TTINFO.utcoff = Py_None; + NO_TTINFO.dstoff = Py_None; + NO_TTINFO.tzname = Py_None; + + for (size_t i = 0; i < 3; ++i) { + Py_INCREF(Py_None); + } + } + + if (initialize_caches()) { + goto error; + } + + return 0; + +error: + return -1; +} + +static PyModuleDef_Slot zoneinfomodule_slots[] = { + {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}}; + +static struct PyModuleDef zoneinfomodule = { + PyModuleDef_HEAD_INIT, + .m_name = "zoneinfo._czoneinfo", + .m_doc = "C implementation of the zoneinfo module", + .m_size = 0, + .m_methods = module_methods, + .m_slots = zoneinfomodule_slots, + .m_free = (freefunc)module_free}; + +PyMODINIT_FUNC +PyInit__czoneinfo(void) +{ + return PyModuleDef_Init(&zoneinfomodule); +} diff --git a/setup.py b/setup.py index 878372154d411a..05d42688cea9fc 100644 --- a/setup.py +++ b/setup.py @@ -807,6 +807,8 @@ def detect_simple_extensions(self): # uses modf(). self.add(Extension('_datetime', ['_datetimemodule.c'], libraries=['m'])) + # zoneinfo module + self.add(Extension('_czoneinfo', ['zoneinfomodule.c'])), # random number generator implemented in C self.add(Extension("_random", ["_randommodule.c"], extra_compile_args=['-DPy_BUILD_CORE_MODULE'])) From 93c03aa965b55a978c1b7b0c00f018d43c833ad8 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 11:17:45 -0400 Subject: [PATCH 02/28] Add tzdata to Travis requirements --- .travis.yml | 3 +++ Misc/requirements-test.txt | 1 + 2 files changed, 4 insertions(+) create mode 100644 Misc/requirements-test.txt diff --git a/.travis.yml b/.travis.yml index 3c2fb4bdc78755..fa2c8c8397b0d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -87,6 +87,7 @@ matrix: # Need a venv that can parse covered code. - ./python -m venv venv - ./venv/bin/python -m pip install -U coverage + - ./venv/bin/python -m pip install -r Misc/requirements-test.txt - ./venv/bin/python -m test.pythoninfo script: # Skip tests that re-run the entire test suite. @@ -171,6 +172,8 @@ before_script: fi - make -j4 - make pythoninfo + - ./python -m ensurepip --user + - ./python -m pip install --user -r Misc/requirements-test.txt script: # Using the built Python as patchcheck.py is built around the idea of using diff --git a/Misc/requirements-test.txt b/Misc/requirements-test.txt new file mode 100644 index 00000000000000..6e46c12e4f9d13 --- /dev/null +++ b/Misc/requirements-test.txt @@ -0,0 +1 @@ +tzdata==2020.1rc0 From 45146f1a12a4fb393fa5ea100be01e1cb6d45d30 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 12:13:59 -0400 Subject: [PATCH 03/28] Deliberately break TZData tests to see who is skipping them --- Lib/test/test_zoneinfo/test_zoneinfo.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 578eea5cfce7b1..69ce941d7b31d2 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -525,6 +525,9 @@ def tzpath(self): def zone_from_key(self, key): return self.klass(key=key) + def test_if_skipped(self): + self.assertTrue(False) + @unittest.skipIf( not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" From ea7afeaa8181cb44f24cb97f0fb70c1656e596cd Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 14:27:39 -0400 Subject: [PATCH 04/28] Add test requirements to Github Actions builds --- .github/workflows/build.yml | 16 ++++++++++++++++ .github/workflows/coverage.yml | 1 + 2 files changed, 17 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6e6a6d2b789d34..8b2ff1d2cf3f9c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,10 @@ jobs: run: .\PCbuild\build.bat -e -p Win32 - name: Display build info run: .\python.bat -m test.pythoninfo + - name: Install test dependencies + run: | + .\python.bat -m ensurepip --user + .\python.bat -m pip install --user -r Misc/requirements-test.txt - name: Tests run: .\PCbuild\rt.bat -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 @@ -37,6 +41,10 @@ jobs: run: .\PCbuild\build.bat -e -p x64 - name: Display build info run: .\python.bat -m test.pythoninfo + - name: Install test dependencies + run: | + .\python.bat -m ensurepip --user + .\python.bat -m pip install --user -r Misc/requirements-test.txt - name: Tests run: .\PCbuild\rt.bat -x64 -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 @@ -51,6 +59,10 @@ jobs: run: make -j4 - name: Display build info run: make pythoninfo + - name: Install test dependencies + run: | + ./python.exe -m ensurepip --user + ./python.exe -m pip install --user -r Misc/requirements-test.txt - name: Tests run: make buildbottest TESTOPTS="-j4 -uall,-cpu" @@ -78,5 +90,9 @@ jobs: run: make -j4 - name: Display build info run: make pythoninfo + - name: Install test dependencies + run: | + ./python -m ensurepip --user + ./python -m pip install --user -r Misc/requirements-test.txt - name: Tests run: xvfb-run make buildbottest TESTOPTS="-j4 -uall,-cpu" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 75bdf83f6c5dbb..6dd973bf8e4ad7 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -48,6 +48,7 @@ jobs: ./python -m venv .venv source ./.venv/bin/activate python -m pip install -U coverage + python -m pip install -r Misc/requirements-test.txt python -m test.pythoninfo - name: 'Tests with coverage' run: > From b5de7784fa7e3e1d1fc2de97932f52bafbacf8d2 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 14:28:21 -0400 Subject: [PATCH 05/28] Add test dependencies to windows AP builds --- .azure-pipelines/windows-steps.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.azure-pipelines/windows-steps.yml b/.azure-pipelines/windows-steps.yml index f502c40637c310..a814b802c116f1 100644 --- a/.azure-pipelines/windows-steps.yml +++ b/.azure-pipelines/windows-steps.yml @@ -21,7 +21,10 @@ steps: displayName: 'Display build info' condition: and(succeeded(), variables['testRunPlatform']) -- script: PCbuild\rt.bat -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 --junit-xml="$(Build.BinariesDirectory)\test-results.xml" --tempdir="$(Build.BinariesDirectory)\test" +- script: | + python.bat -m ensurepip --user + python.bat -m pip install --user -r Misc/requirements-tests.txt + PCbuild\rt.bat -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 --junit-xml="$(Build.BinariesDirectory)\test-results.xml" --tempdir="$(Build.BinariesDirectory)\test" displayName: 'Tests' condition: and(succeeded(), variables['testRunPlatform']) env: From 6802178a3af97bb77bbfb0c0c47694daeac52c5d Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 16:27:17 -0400 Subject: [PATCH 06/28] Revert "Add test dependencies to windows AP builds" This reverts commit b5de7784fa7e3e1d1fc2de97932f52bafbacf8d2. --- .azure-pipelines/windows-steps.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.azure-pipelines/windows-steps.yml b/.azure-pipelines/windows-steps.yml index a814b802c116f1..f502c40637c310 100644 --- a/.azure-pipelines/windows-steps.yml +++ b/.azure-pipelines/windows-steps.yml @@ -21,10 +21,7 @@ steps: displayName: 'Display build info' condition: and(succeeded(), variables['testRunPlatform']) -- script: | - python.bat -m ensurepip --user - python.bat -m pip install --user -r Misc/requirements-tests.txt - PCbuild\rt.bat -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 --junit-xml="$(Build.BinariesDirectory)\test-results.xml" --tempdir="$(Build.BinariesDirectory)\test" +- script: PCbuild\rt.bat -q -uall -u-cpu -rwW --slowest --timeout=1200 -j0 --junit-xml="$(Build.BinariesDirectory)\test-results.xml" --tempdir="$(Build.BinariesDirectory)\test" displayName: 'Tests' condition: and(succeeded(), variables['testRunPlatform']) env: From 9292b3ce738668edd6c1b111d29a3d2aa02069a4 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 8 May 2020 16:28:49 -0400 Subject: [PATCH 07/28] Revert "Deliberately break TZData tests to see who is skipping them" This reverts commit 45146f1a12a4fb393fa5ea100be01e1cb6d45d30. --- Lib/test/test_zoneinfo/test_zoneinfo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 69ce941d7b31d2..578eea5cfce7b1 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -525,9 +525,6 @@ def tzpath(self): def zone_from_key(self, key): return self.klass(key=key) - def test_if_skipped(self): - self.assertTrue(False) - @unittest.skipIf( not HAS_TZDATA_PKG, "Skipping tzdata-specific tests: tzdata not installed" From 0cad1510df3fb005a31a9d23422bc17d22680e14 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 May 2020 10:54:43 -0400 Subject: [PATCH 08/28] fixup! Add tests and implementation for ZoneInfo --- Lib/test/test_zoneinfo/_support.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py index b41ae25bd8a272..9fd5e5d52704d4 100644 --- a/Lib/test/test_zoneinfo/_support.py +++ b/Lib/test/test_zoneinfo/_support.py @@ -10,7 +10,20 @@ TZPATH_TEST_LOCK = threading.Lock() -@functools.lru_cache(1) +def call_once(f): + """Decorator that ensures a function is only ever called once.""" + lock = threading.Lock() + cached = functools.lru_cache(None)(f) + + @functools.wraps(f) + def inner(): + with lock: + return cached() + + return inner + + +@call_once def get_modules(): import zoneinfo as c_module py_module = import_fresh_module("zoneinfo", blocked=["_czoneinfo"]) From c12439bb03fbb14d3397357d1a8d3130c554aa53 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 May 2020 10:54:43 -0400 Subject: [PATCH 09/28] fixup! Add tests and implementation for ZoneInfo --- Lib/test/test_zoneinfo/_support.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py index 9fd5e5d52704d4..a8befbbb6901b6 100644 --- a/Lib/test/test_zoneinfo/_support.py +++ b/Lib/test/test_zoneinfo/_support.py @@ -25,6 +25,14 @@ def inner(): @call_once def get_modules(): + """Retrieve two copies of zoneinfo: pure Python and C accelerated. + + Because this function manipulates the import system in a way that might + be fragile or do unexpected things if it is run many times, it uses a + `call_once` decorator to ensure that this is only ever called exactly + one time — in other words, when using this function you will only ever + get one copy of each module rather than a fresh import each time. + """ import zoneinfo as c_module py_module = import_fresh_module("zoneinfo", blocked=["_czoneinfo"]) From 7fa32f27f744038a5f70d7ab7d98a17a284582cb Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 May 2020 10:54:43 -0400 Subject: [PATCH 10/28] fixup! Add tests and implementation for ZoneInfo --- Lib/test/test_zoneinfo/_support.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py index a8befbbb6901b6..1284221681e79f 100644 --- a/Lib/test/test_zoneinfo/_support.py +++ b/Lib/test/test_zoneinfo/_support.py @@ -34,6 +34,7 @@ def get_modules(): get one copy of each module rather than a fresh import each time. """ import zoneinfo as c_module + py_module = import_fresh_module("zoneinfo", blocked=["_czoneinfo"]) return py_module, c_module From 3e4d81d466c4a7abda69dd57570994df2fbe3e44 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 May 2020 10:54:43 -0400 Subject: [PATCH 11/28] fixup! Add tests and implementation for ZoneInfo --- Lib/test/test_zoneinfo/_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_zoneinfo/_support.py b/Lib/test/test_zoneinfo/_support.py index 1284221681e79f..6b92832ac35821 100644 --- a/Lib/test/test_zoneinfo/_support.py +++ b/Lib/test/test_zoneinfo/_support.py @@ -55,7 +55,7 @@ def set_zoneinfo_module(module): yield if old_zoneinfo is not NOT_PRESENT: sys.modules["zoneinfo"] = old_zoneinfo - else: # pragma: nocover + else: # pragma: nocover sys.modules.pop("zoneinfo") From 87d8c518542a303c7bf85e1498dd26bf6022fd6e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 May 2020 11:00:41 -0400 Subject: [PATCH 12/28] fixup! fixup! Add tests and implementation for ZoneInfo --- Lib/zoneinfo/_common.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py index 568a4873c1fecc..d6f1c4b5e65ccf 100644 --- a/Lib/zoneinfo/_common.py +++ b/Lib/zoneinfo/_common.py @@ -105,11 +105,7 @@ def get_abbr(idx): assert c == b"\n", c tz_bytes = b"" - # TODO: Walrus operator - while True: - c = fobj.read(1) - if c == b"\n": - break + while (c := fobj.read(1)) != b"\n": tz_bytes += c tz_str = tz_bytes From 2324a3017e76dab9793cbfb7bed3ae619ed722d6 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 15:45:56 -0400 Subject: [PATCH 13/28] Fix member def for key --- Modules/zoneinfomodule.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index 86233c131a0321..84bf28b19317c0 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -2551,11 +2551,11 @@ static PyMethodDef zoneinfo_methods[] = { }; static PyMemberDef zoneinfo_members[] = { - {"key", /* name */ - offsetof(PyZoneInfo_ZoneInfo, key), /* offset */ - T_OBJECT_EX, /* type */ - READONLY, /* flags */ - NULL /* docstring */}, + {.name = "key", + .offset = offsetof(PyZoneInfo_ZoneInfo, key), + .type = T_OBJECT_EX, + .flags = READONLY, + .doc = NULL}, {NULL}, /* Sentinel */ }; From 3c8427b0623331f5072c7a1fd8b65d2bbe123284 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 15:46:18 -0400 Subject: [PATCH 14/28] Fix refleak in error code --- Modules/zoneinfomodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index 84bf28b19317c0..78ec4f0e6a87bf 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -223,7 +223,8 @@ zoneinfo_new_instance(PyTypeObject *type, PyObject *key) self = NULL; cleanup: if (file_obj != NULL) { - PyObject_CallMethod(file_obj, "close", NULL); + PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL); + Py_DECREF(tmp); Py_DECREF(file_obj); } Py_DECREF(file_path); From 4f1262936bb4290e9c5edba80ae39cef668bc06c Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 16:07:33 -0400 Subject: [PATCH 15/28] Fix refleak on cache miss --- Modules/zoneinfomodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index 78ec4f0e6a87bf..d5e90fdda2c37e 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -274,6 +274,7 @@ zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) return NULL; } + Py_DECREF(instance); instance = PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; From 88ae10215d13729859e6783ad94b3459eee1320c Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 17:14:52 -0400 Subject: [PATCH 16/28] fixup! Add tests and implementation for ZoneInfo --- Modules/zoneinfomodule.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index d5e90fdda2c37e..2dc53053689640 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -847,6 +847,13 @@ load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) size_t ttinfos_allocated = 0; data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); + + if (!PyTuple_CheckExact(data_tuple)) { + PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", + data_tuple); + goto error; + } + if (data_tuple == NULL) { goto error; } From 9cc8073b22d451b2a4deb5aaed1b4a877b76a1b6 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 17:46:05 -0400 Subject: [PATCH 17/28] Fix memory leak in zoneinfo_dealloc --- Modules/zoneinfomodule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index 2dc53053689640..4e2070eaf0a5e1 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -324,6 +324,8 @@ zoneinfo_dealloc(PyObject *obj_self) Py_XDECREF(self->key); Py_XDECREF(self->file_repr); + + Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject * From 3d30dcb5d72bc9cca6b96af3b31aa7efd8ae1456 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 18:02:06 -0400 Subject: [PATCH 18/28] Fix refleak in fromutc --- Modules/zoneinfomodule.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Modules/zoneinfomodule.c b/Modules/zoneinfomodule.c index 4e2070eaf0a5e1..687f32e69f92d4 100644 --- a/Modules/zoneinfomodule.c +++ b/Modules/zoneinfomodule.c @@ -186,6 +186,7 @@ zoneinfo_new_instance(PyTypeObject *type, PyObject *key) else if (file_path == Py_None) { file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); if (file_obj == NULL) { + Py_DECREF(file_path); return NULL; } } @@ -269,12 +270,12 @@ zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) } if (instance == Py_None) { + Py_DECREF(instance); PyObject *tmp = zoneinfo_new_instance(type, key); if (tmp == NULL) { return NULL; } - Py_DECREF(instance); instance = PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; @@ -573,20 +574,17 @@ zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) PyObject *replace = PyObject_GetAttrString(tmp, "replace"); PyObject *args = PyTuple_New(0); PyObject *kwargs = PyDict_New(); - PyObject *one = PyLong_FromLong(1); Py_DECREF(tmp); - if (args == NULL || kwargs == NULL || replace == NULL || - one == NULL) { + if (args == NULL || kwargs == NULL || replace == NULL) { Py_XDECREF(args); Py_XDECREF(kwargs); Py_XDECREF(replace); - Py_XDECREF(one); return NULL; } dt = NULL; - if (!PyDict_SetItemString(kwargs, "fold", one)) { + if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) { dt = PyObject_Call(replace, args, kwargs); } @@ -850,13 +848,13 @@ load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); - if (!PyTuple_CheckExact(data_tuple)) { - PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", - data_tuple); + if (data_tuple == NULL) { goto error; } - if (data_tuple == NULL) { + if (!PyTuple_CheckExact(data_tuple)) { + PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", + data_tuple); goto error; } From 8159b90aa2c8000e7110182eb592b18ac38603e2 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 10:09:37 -0400 Subject: [PATCH 19/28] Add smoke test that C and Python modules built The import machinery can be somewhat fragile, and the "seamlessly falls back to pure Python" nature of this module makes it so that a problem building the C extension or a failure to import the pure Python version might easily go unnoticed. This adds some "smoke tests" that rely on implementation details of each module to ensure that we're building the ones we think we are. --- Lib/test/test_zoneinfo/test_zoneinfo.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 578eea5cfce7b1..b7c5193c5194d0 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -1653,6 +1653,30 @@ class CTestModule(TestModule): module = c_zoneinfo +class ExtensionBuiltTest(unittest.TestCase): + """Smoke test to ensure that the C and Python extensions are both tested. + + Because the intention is for the Python and C versions of ZoneInfo to + behave identically, these tests necessarily rely on implementation details, + so the tests may need to be adjusted if the implementations change. Do not + rely on these tests as an indication of stable properties of these classes. + """ + + def test_cache_location(self): + # The pure Python version stores caches on attributes, but the C + # extension stores them in C globals (at least for now) + self.assertFalse(hasattr(c_zoneinfo.ZoneInfo, "_weak_cache")) + self.assertTrue(hasattr(py_zoneinfo.ZoneInfo, "_weak_cache")) + + def test_gc_tracked(self): + # The pure Python version is tracked by the GC but (for now) the C + # version is not. + import gc + + self.assertTrue(gc.is_tracked(py_zoneinfo.ZoneInfo)) + self.assertFalse(gc.is_tracked(c_zoneinfo.ZoneInfo)) + + @dataclasses.dataclass(frozen=True) class ZoneOffset: tzname: str From 507f5c2e417bca46cf34f16cd2c48bf2eb0ea7f7 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 10:35:54 -0400 Subject: [PATCH 20/28] Add Windows build support This should improve support on Windows, including actually building the C extension. --- PCbuild/lib.pyproj | 8 ++++++++ PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 +++ 3 files changed, 12 insertions(+) diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj index ee01d109f162d7..7ce88e5690b454 100644 --- a/PCbuild/lib.pyproj +++ b/PCbuild/lib.pyproj @@ -1396,6 +1396,10 @@ + + + + @@ -1563,6 +1567,10 @@ + + + + diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 73274ac9acf557..e3a84d74f758c9 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -357,6 +357,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 254c8fbbea5fb8..ceac1d631e176f 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -743,6 +743,9 @@ Modules + + Modules + Modules\_io From 874dca0a79c76b668cf68f665c8490af53306c85 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 11:06:42 -0400 Subject: [PATCH 21/28] fixup! Add Windows build support --- PC/config.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PC/config.c b/PC/config.c index 32af2a81aeb41e..02bc1818330eb5 100644 --- a/PC/config.c +++ b/PC/config.c @@ -49,6 +49,7 @@ extern PyObject* PyInit_parser(void); extern PyObject* PyInit_winreg(void); extern PyObject* PyInit__struct(void); extern PyObject* PyInit__datetime(void); +extern PyObject* PyInit__czoneinfo(void); extern PyObject* PyInit__functools(void); extern PyObject* PyInit__json(void); #ifdef _Py_HAVE_ZLIB @@ -131,6 +132,7 @@ struct _inittab _PyImport_Inittab[] = { {"winreg", PyInit_winreg}, {"_struct", PyInit__struct}, {"_datetime", PyInit__datetime}, + {"_czoneinfo", PyInit__czoneinfo}, {"_functools", PyInit__functools}, {"_json", PyInit__json}, From 44b3135d45398a491397ea8e700817e226fd1980 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 11:15:06 -0400 Subject: [PATCH 22/28] Revert "fixup! Add Windows build support" This reverts commit 874dca0a79c76b668cf68f665c8490af53306c85. --- PC/config.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/PC/config.c b/PC/config.c index 02bc1818330eb5..32af2a81aeb41e 100644 --- a/PC/config.c +++ b/PC/config.c @@ -49,7 +49,6 @@ extern PyObject* PyInit_parser(void); extern PyObject* PyInit_winreg(void); extern PyObject* PyInit__struct(void); extern PyObject* PyInit__datetime(void); -extern PyObject* PyInit__czoneinfo(void); extern PyObject* PyInit__functools(void); extern PyObject* PyInit__json(void); #ifdef _Py_HAVE_ZLIB @@ -132,7 +131,6 @@ struct _inittab _PyImport_Inittab[] = { {"winreg", PyInit_winreg}, {"_struct", PyInit__struct}, {"_datetime", PyInit__datetime}, - {"_czoneinfo", PyInit__czoneinfo}, {"_functools", PyInit__functools}, {"_json", PyInit__json}, From d490586163045e14854e7ddc7d42ba7b5c1e36ea Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 11:18:09 -0400 Subject: [PATCH 23/28] fixup! Add Windows build support --- PCbuild/_czoneinfo.vcxproj | 109 +++++++++++++++++++++++++++++ PCbuild/_czoneinfo.vcxproj.filters | 16 +++++ PCbuild/pcbuild.proj | 2 +- PCbuild/pcbuild.sln | 2 + PCbuild/pythoncore.vcxproj | 1 - PCbuild/pythoncore.vcxproj.filters | 3 - PCbuild/readme.txt | 1 + Tools/msi/lib/lib_files.wxs | 2 +- 8 files changed, 130 insertions(+), 6 deletions(-) create mode 100644 PCbuild/_czoneinfo.vcxproj create mode 100644 PCbuild/_czoneinfo.vcxproj.filters diff --git a/PCbuild/_czoneinfo.vcxproj b/PCbuild/_czoneinfo.vcxproj new file mode 100644 index 00000000000000..17942a1e2c3528 --- /dev/null +++ b/PCbuild/_czoneinfo.vcxproj @@ -0,0 +1,109 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + PGInstrument + ARM + + + PGInstrument + ARM64 + + + PGInstrument + Win32 + + + PGInstrument + x64 + + + PGUpdate + ARM + + + PGUpdate + ARM64 + + + PGUpdate + Win32 + + + PGUpdate + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {384C224A-7474-476E-A01B-750EA7DE918C} + _czoneinfo + Win32Proj + + + + + DynamicLibrary + NotSet + + + + .pyd + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + + + + + + + + + + {cf7ac3d1-e2df-41d2-bea6-1e2556cdea26} + false + + + + + + diff --git a/PCbuild/_czoneinfo.vcxproj.filters b/PCbuild/_czoneinfo.vcxproj.filters new file mode 100644 index 00000000000000..57b0b7f1719062 --- /dev/null +++ b/PCbuild/_czoneinfo.vcxproj.filters @@ -0,0 +1,16 @@ + + + + + + + + {2422278e-eeeb-4241-8182-433e2bc5a7fc} + + + + + Source Files + + + diff --git a/PCbuild/pcbuild.proj b/PCbuild/pcbuild.proj index 22a9eed18d42bb..d95441f9494b2e 100644 --- a/PCbuild/pcbuild.proj +++ b/PCbuild/pcbuild.proj @@ -51,7 +51,7 @@ - + diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index 6dc0139bc42af4..38a25abe429f70 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -91,6 +91,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testconsole", "_testconsol EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_asyncio", "_asyncio.vcxproj", "{384C224A-7474-476E-A01B-750EA7DE918C}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_czoneinfo", "_czoneinfo.vcxproj", "{384C224A-7474-476E-A01B-750EA7DE918C}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_queue", "_queue.vcxproj", "{78D80A15-BD8C-44E2-B49E-1F05B0A0A687}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblzma", "liblzma.vcxproj", "{12728250-16EC-4DC6-94D7-E21DD88947F8}" diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e3a84d74f758c9..73274ac9acf557 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -357,7 +357,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index ceac1d631e176f..254c8fbbea5fb8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -743,9 +743,6 @@ Modules - - Modules - Modules\_io diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 5fe3e8c36ecf53..75dfe704045327 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -132,6 +132,7 @@ library which are implemented in C; each one builds a DLL (renamed to _asyncio _ctypes _ctypes_test +_czoneinfo _decimal _elementtree _hashlib diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index b462372512f6de..f6c9b9d67852b6 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@  - + From d9fc16f263ec74a4f5fa98dd2094b0e29f7a9892 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 12:42:54 -0400 Subject: [PATCH 24/28] Remove __version__ from __dir__ --- Lib/zoneinfo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py index 7fa7a572805a2a..30f26d51e38dba 100644 --- a/Lib/zoneinfo/__init__.py +++ b/Lib/zoneinfo/__init__.py @@ -26,4 +26,4 @@ def __getattr__(name): def __dir__(): - return sorted(__all__ + ["__version__"]) + return sorted(__all__) From 07fc66d25ff3e3c63774ea460fa87c0a4674beee Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 12:48:26 -0400 Subject: [PATCH 25/28] Include all globals in __dir__ Suggestion by Petr Viktorin. --- Lib/zoneinfo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py index 30f26d51e38dba..7c0b626fd679d1 100644 --- a/Lib/zoneinfo/__init__.py +++ b/Lib/zoneinfo/__init__.py @@ -26,4 +26,4 @@ def __getattr__(name): def __dir__(): - return sorted(__all__) + return sorted(list(globals()) + ["TZPATH"]) From d55872cb18be019077b6102d20a836a5f0703202 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 18:03:28 -0400 Subject: [PATCH 26/28] fixup! Add Windows build support --- PCbuild/_czoneinfo.vcxproj | 2 +- PCbuild/pcbuild.sln | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PCbuild/_czoneinfo.vcxproj b/PCbuild/_czoneinfo.vcxproj index 17942a1e2c3528..668ccd6abbbafb 100644 --- a/PCbuild/_czoneinfo.vcxproj +++ b/PCbuild/_czoneinfo.vcxproj @@ -67,7 +67,7 @@ - {384C224A-7474-476E-A01B-750EA7DE918C} + {FCBE1EF2-E0F0-40B1-88B5-00A35D378742} _czoneinfo Win32Proj diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index 38a25abe429f70..d7a85c45cec154 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -91,7 +91,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testconsole", "_testconsol EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_asyncio", "_asyncio.vcxproj", "{384C224A-7474-476E-A01B-750EA7DE918C}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_czoneinfo", "_czoneinfo.vcxproj", "{384C224A-7474-476E-A01B-750EA7DE918C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_czoneinfo", "_czoneinfo.vcxproj", "{FCBE1EF2-E0F0-40B1-88B5-00A35D378742}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_queue", "_queue.vcxproj", "{78D80A15-BD8C-44E2-B49E-1F05B0A0A687}" EndProject From d7996a6a76846d068dfeba5024f11f198f9318cf Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 12 May 2020 18:06:08 -0400 Subject: [PATCH 27/28] Reduce indentation in _parse_python_tzpath --- Lib/zoneinfo/_tzpath.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py index ee20672deff633..a99b510a47f753 100644 --- a/Lib/zoneinfo/_tzpath.py +++ b/Lib/zoneinfo/_tzpath.py @@ -37,23 +37,23 @@ def reset_tzpath(to=None): def _parse_python_tzpath(env_var): if not env_var: return () - else: - raw_tzpath = env_var.split(os.pathsep) - new_tzpath = tuple(filter(os.path.isabs, raw_tzpath)) - # If anything has been filtered out, we will warn about it - if len(new_tzpath) != len(raw_tzpath): - import warnings + raw_tzpath = env_var.split(os.pathsep) + new_tzpath = tuple(filter(os.path.isabs, raw_tzpath)) - msg = _get_invalid_paths_message(raw_tzpath) + # If anything has been filtered out, we will warn about it + if len(new_tzpath) != len(raw_tzpath): + import warnings - warnings.warn( - "Invalid paths specified in PYTHONTZPATH environment variable." - + msg, - InvalidTZPathWarning, - ) + msg = _get_invalid_paths_message(raw_tzpath) + + warnings.warn( + "Invalid paths specified in PYTHONTZPATH environment variable." + + msg, + InvalidTZPathWarning, + ) - return new_tzpath + return new_tzpath def _get_invalid_paths_message(tzpaths): From ddb824459bca4aa6018440ed8ed5a586960223be Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 11 May 2020 11:18:05 -0400 Subject: [PATCH 28/28] Add --with-tzpath to autoconf This is configurable only on POSIX systems at the moment and TZPATH is initialized to an empty string on Windows. --- Lib/sysconfig.py | 1 + Lib/zoneinfo/_tzpath.py | 14 ++++--------- Makefile.pre.in | 3 +++ configure | 46 +++++++++++++++++++++++++++++++++++++++++ configure.ac | 36 ++++++++++++++++++++++++++++++++ setup.py | 12 +++++++++++ 6 files changed, 102 insertions(+), 10 deletions(-) diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py index eaee837f10e330..bf04ac541e6b02 100644 --- a/Lib/sysconfig.py +++ b/Lib/sysconfig.py @@ -546,6 +546,7 @@ def get_config_vars(*args): if os.name == 'nt': _init_non_posix(_CONFIG_VARS) + _CONFIG_VARS['TZPATH'] = '' if os.name == 'posix': _init_posix(_CONFIG_VARS) # For backward compatibility, see issue19555 diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py index a99b510a47f753..8cff0b171bf32f 100644 --- a/Lib/zoneinfo/_tzpath.py +++ b/Lib/zoneinfo/_tzpath.py @@ -1,5 +1,6 @@ import os import sys +import sysconfig def reset_tzpath(to=None): @@ -19,17 +20,10 @@ def reset_tzpath(to=None): env_var = os.environ.get("PYTHONTZPATH", None) if env_var is not None: base_tzpath = _parse_python_tzpath(env_var) - elif sys.platform != "win32": - base_tzpath = [ - "/usr/share/zoneinfo", - "/usr/lib/zoneinfo", - "/usr/share/lib/zoneinfo", - "/etc/zoneinfo", - ] - - base_tzpath.sort(key=lambda x: not os.path.exists(x)) else: - base_tzpath = () + base_tzpath = _parse_python_tzpath( + sysconfig.get_config_var("TZPATH") + ) TZPATH = tuple(base_tzpath) diff --git a/Makefile.pre.in b/Makefile.pre.in index 0d616d304484ce..cf068b921ba463 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -197,6 +197,9 @@ OPENSSL_INCLUDES=@OPENSSL_INCLUDES@ OPENSSL_LIBS=@OPENSSL_LIBS@ OPENSSL_LDFLAGS=@OPENSSL_LDFLAGS@ +# Default zoneinfo.TZPATH. Added here to expose it in sysconfig.get_config_var +TZPATH=@TZPATH@ + # Modes for directories, executables and data files created by the # install process. Default to user-only-writable for all file types. DIRMODE= 755 diff --git a/configure b/configure index 26e9aa9fe454e2..5d290eca0c3039 100755 --- a/configure +++ b/configure @@ -658,6 +658,7 @@ LIBFFI_INCLUDEDIR PKG_CONFIG_LIBDIR PKG_CONFIG_PATH PKG_CONFIG +TZPATH SHLIBS CFLAGSFORSHARED LINKFORSHARED @@ -819,6 +820,7 @@ with_assertions enable_optimizations with_lto with_hash_algorithm +with_tzpath with_address_sanitizer with_memory_sanitizer with_undefined_behavior_sanitizer @@ -1524,6 +1526,9 @@ Optional Packages: --with-hash-algorithm=[fnv|siphash24] select hash algorithm for use in Python/pyhash.c (default is SipHash24) + --with-tzpath= + Select the default time zone search path for zoneinfo.TZPATH + --with-address-sanitizer enable AddressSanitizer memory error detector, 'asan' (default is no) @@ -10150,6 +10155,47 @@ $as_echo "default" >&6; } fi +validate_tzpath() { + # Checks that each element of hte path is an absolute path + if test -z "$1"; then + # Empty string is allowed: it indicates no system TZPATH + return 0 + fi + + # Bad paths are those that don't start with / + if ( echo $1 | grep -qE '(^|:)([^/]|$)' ); then + as_fn_error $? "--with-tzpath must contain only absolute paths, not $1" "$LINENO" 5 + return 1; + fi +} + +TZPATH="/usr/share/zoneinfo:/usr/lib/zoneinfo:/usr/share/lib/zoneinfo:/etc/zoneinfo" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-tzpath" >&5 +$as_echo_n "checking for --with-tzpath... " >&6; } + +# Check whether --with-tzpath was given. +if test "${with_tzpath+set}" = set; then : + withval=$with_tzpath; +case "$withval" in + yes) + as_fn_error $? "--with-tzpath requires a value" "$LINENO" 5 + ;; + *) + validate_tzpath "$withval" + TZPATH="$withval" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$withval\"" >&5 +$as_echo "\"$withval\"" >&6; } + ;; +esac + +else + validate_tzpath "$TZPATH" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: \"$TZPATH\"" >&5 +$as_echo "\"$TZPATH\"" >&6; } +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-address-sanitizer" >&5 $as_echo_n "checking for --with-address-sanitizer... " >&6; } diff --git a/configure.ac b/configure.ac index acb6d4bfa8da10..0b5ca88c7f5556 100644 --- a/configure.ac +++ b/configure.ac @@ -2946,6 +2946,42 @@ esac ], [AC_MSG_RESULT(default)]) +validate_tzpath() { + # Checks that each element of hte path is an absolute path + if test -z "$1"; then + # Empty string is allowed: it indicates no system TZPATH + return 0 + fi + + # Bad paths are those that don't start with / + dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output + if ( echo $1 | grep -qE '(^|:)(@<:@^/@:>@|$)' ); then + AC_MSG_ERROR([--with-tzpath must contain only absolute paths, not $1]) + return 1; + fi +} + +TZPATH="/usr/share/zoneinfo:/usr/lib/zoneinfo:/usr/share/lib/zoneinfo:/etc/zoneinfo" +AC_MSG_CHECKING(for --with-tzpath) +AC_ARG_WITH(tzpath, + AS_HELP_STRING([--with-tzpath=] + [Select the default time zone search path for zoneinfo.TZPATH]), +[ +case "$withval" in + yes) + AC_MSG_ERROR([--with-tzpath requires a value]) + ;; + *) + validate_tzpath "$withval" + TZPATH="$withval" + AC_MSG_RESULT("$withval") + ;; +esac +], +[validate_tzpath "$TZPATH" + AC_MSG_RESULT("$TZPATH")]) +AC_SUBST(TZPATH) + AC_MSG_CHECKING(for --with-address-sanitizer) AC_ARG_WITH(address_sanitizer, AS_HELP_STRING([--with-address-sanitizer], diff --git a/setup.py b/setup.py index 05d42688cea9fc..28c74649a8e329 100644 --- a/setup.py +++ b/setup.py @@ -304,6 +304,17 @@ def find_library_file(compiler, libname, std_dirs, paths): else: assert False, "Internal error: Path not found in std_dirs or paths" +def validate_tzpath(): + base_tzpath = sysconfig.get_config_var('TZPATH') + if not base_tzpath: + return + + tzpaths = base_tzpath.split(os.pathsep) + bad_paths = [tzpath for tzpath in tzpaths if not os.path.isabs(tzpath)] + if bad_paths: + raise ValueError('TZPATH must contain only absolute paths, ' + + f'found:\n{tzpaths!r}\nwith invalid paths:\n' + + f'{bad_paths!r}') def find_module_file(module, dirlist): """Find a module in a set of possible folders. If it is not found @@ -2451,6 +2462,7 @@ class DummyProcess: ProcessPoolExecutor = None sys.modules['concurrent.futures.process'] = DummyProcess + validate_tzpath() # turn off warnings when deprecated modules are imported import warnings