commit 1ca65cf16ca227bb8264ecea8be412b009ee8667 Author: passthem Date: Thu Oct 9 18:59:33 2025 +0800 测试哦1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8bc49f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +.pytest_cache +/dist \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..20692a0 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,195 @@ +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +markers = "sys_platform == \"win32\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + +[[package]] +name = "packaging" +version = "25.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] + +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pytest" +version = "8.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "tomli" +version = "2.3.0" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45"}, + {file = "tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba"}, + {file = "tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf"}, + {file = "tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441"}, + {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845"}, + {file = "tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c"}, + {file = "tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456"}, + {file = "tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be"}, + {file = "tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac"}, + {file = "tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22"}, + {file = "tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f"}, + {file = "tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52"}, + {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8"}, + {file = "tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6"}, + {file = "tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876"}, + {file = "tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878"}, + {file = "tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b"}, + {file = "tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae"}, + {file = "tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b"}, + {file = "tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf"}, + {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f"}, + {file = "tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05"}, + {file = "tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606"}, + {file = "tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999"}, + {file = "tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e"}, + {file = "tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3"}, + {file = "tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc"}, + {file = "tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0"}, + {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879"}, + {file = "tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005"}, + {file = "tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463"}, + {file = "tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8"}, + {file = "tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77"}, + {file = "tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf"}, + {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530"}, + {file = "tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b"}, + {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67"}, + {file = "tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f"}, + {file = "tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0"}, + {file = "tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba"}, + {file = "tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b"}, + {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"}, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.9" +content-hash = "602caa09d07b5f25d69c7720418aba4aea38d751ea55df0fd54bd2eed033003d" diff --git a/ptimeparse/__init__.py b/ptimeparse/__init__.py new file mode 100644 index 0000000..82f0f42 --- /dev/null +++ b/ptimeparse/__init__.py @@ -0,0 +1,635 @@ +import datetime +import re +from dataclasses import dataclass +from typing import Literal + +from ptimeparse.err import (MultipleSpecificationException, + TokenUnhandledException) + + +@dataclass +class Parser: + now: datetime.datetime + + timedelta: datetime.timedelta + + hour_delta_triggered: bool = False + minute_delta_triggered: bool = False + second_delta_triggered: bool = False + + ampm_specification: Literal["AM", "PM", None, "ABSOLUTE"] = None + ampm_ismid: bool = False + hour_specification: int | None = None + minute_specification: int | None = None + time_spec_day_delta: int = 0 + + @property + def time_delta_triggered(self): + return self.hour_delta_triggered or self.minute_delta_triggered or self.second_delta_triggered + + def __init__(self, now: datetime.datetime | None = None): + self.now = datetime.datetime.now() if now is None else now + + self.CN_NUM = { + "零": 0, + "一": 1, + "二": 2, + "两": 2, + "三": 3, + "四": 4, + "五": 5, + "六": 6, + "七": 7, + "八": 8, + "九": 9, + "十": 10, + "百": 100, + "千": 1000, + } + self.CN_UNIT = {"万": 1_0000, "亿": 1_0000_0000, "兆": 1_0000_0000_0000} + + def clear_state(self): + self.timedelta = datetime.timedelta() + self.hour_delta_triggered = False + self.minute_delta_triggered = False + self.second_delta_triggered = False + + self.ampm_specification = None + self.ampm_ismid = False + self.hour_specification = None + self.minute_specification = None + self.time_spec_day_delta = 0 + + def clean(self, content: str) -> str: + return re.sub(r"[ \t的]", "", content) + + def parse(self, content: str) -> datetime.datetime: + self.clear_state() + content = self.clean(content) + + content = self.digest_relative_date(content) + content = self.digest_weekday_relative(content) + content = self.digest_delta(content) + + content = self.digest_date(content) + + content = self.digest_early_late_hour(content) + content = self.digest_ampm_specific(content) + content = self.digest_time(content) + content = self.digest_ke(content) + if len(content) != 0: + raise TokenUnhandledException(content) + return self.build() + + def digest_relative_date(self, content: str) -> str: + """ + 处理明天、昨天、今天、后天、大后天、前天、大前天 等相对日期。 + 返回剩余未处理字符串。 + """ + # 注意:这些词必须完整匹配开头,避免误匹配(如“明天”不能匹配“明天早上”中的“明”) + relative_days = { + "今天": 0, + "明天": 1, + "后天": 2, + "大后天": 3, + "昨日": -1, + "昨天": -1, + "前天": -2, + "大前天": -3, + } + + for word, delta_days in relative_days.items(): + if content.startswith(word): + # 如果已经设置了时间偏移(如 3 小时后),则冲突 + if self.time_delta_triggered: + raise MultipleSpecificationException() + # 如果已经通过其他方式设置了 day delta(如 digest_date 中),这里也应检查 + # 为简化,我们直接设置 + self.time_spec_day_delta = delta_days + return content[len(word):] + + return content + + def digest_timedelta(self, content: str) -> str: + """ + 解析形如 "3天", "2小时", "1星期", "5个月" 等时间增量。 + 支持中文数字和阿拉伯数字。 + 返回未处理的剩余字符串。 + """ + + if content.startswith("半"): + # "半"通常指"半小时" + remaining = content[1:] + # 检查是否有"小时"、"时"等 + if remaining.startswith(("小时", "时")): + if self.hour_delta_triggered: + raise MultipleSpecificationException() + self.hour_delta_triggered = True + self.timedelta = datetime.timedelta(minutes=30) + return remaining[len("小时") if remaining.startswith("小时") else len("时"):] + elif remaining.startswith(("分钟", "分")): + if self.minute_delta_triggered: + raise MultipleSpecificationException() + self.minute_delta_triggered = True + self.timedelta = datetime.timedelta(minutes=30) + return remaining[len("分钟") if remaining.startswith("分钟") else len("分"):] + else: + # 默认为半小时 + if self.hour_delta_triggered: + raise MultipleSpecificationException() + self.hour_delta_triggered = True + self.timedelta = datetime.timedelta(minutes=30) + return remaining + + # 定义时间单位映射(注意:月需特殊处理) + unit_patterns = [ + (r"(秒钟|秒)", "second"), + (r"(分钟|分)", "minute"), + (r"(时|小时|点)", "hour"), + (r"半(时|小时|点)", "hour+30"), + (r"(天|日)", "day"), + (r"(星期|周)", "week"), + (r"(月)", "month"), # 特殊:按30天处理 + ] + + remaining = content + delta_kwargs = { + "days": 0, + "seconds": 0, + "minutes": 0, + "hours": 0, + "weeks": 0, + } + month_count = 0 # 单独记录月,最后转为天 + + while True: + matched = False + for pattern, unit_type in unit_patterns: + m = re.match(rf"^([零一二两三四五六七八九十百千万亿兆]*|\d+)?个?({pattern})", remaining) + if m: + num_str = m.group(1) + if num_str is None or num_str == "": + num = 1 # 默认为1,如“明天”实际是“1天后” + else: + # 尝试解析数字(中文或阿拉伯) + _, num = self.digest_chinese_number(num_str) + if num is None: + try: + num = int(num_str) + except ValueError: + continue # 无效数字,跳过 + + # 设置标志位,防止后续时间规格冲突 + if unit_type == "hour": + if self.hour_delta_triggered: + raise MultipleSpecificationException() + self.hour_delta_triggered = True + elif unit_type == "hour+30": + if self.hour_delta_triggered: + raise MultipleSpecificationException() + self.hour_delta_triggered = True + if self.minute_delta_triggered: + raise MultipleSpecificationException() + self.minute_delta_triggered = True + elif unit_type == "minute": + if self.minute_delta_triggered: + raise MultipleSpecificationException() + self.minute_delta_triggered = True + elif unit_type == "second": + if self.second_delta_triggered: + raise MultipleSpecificationException() + self.second_delta_triggered = True + + # 累加到对应单位 + if unit_type == "second": + delta_kwargs["seconds"] += num + elif unit_type == "minute": + delta_kwargs["minutes"] += num + elif unit_type == "hour": + delta_kwargs["hours"] += num + elif unit_type == "day": + delta_kwargs["days"] += num + elif unit_type == "week": + delta_kwargs["weeks"] += num + elif unit_type == "month": + month_count += num + elif unit_type == "hour+30": + delta_kwargs["hours"] += num + delta_kwargs["minutes"] += 30 + + # 更新剩余字符串 + remaining = remaining[len(m.group(0)):] + matched = True + break + + if not matched: + break + + # 处理“月” → 按30天/月估算(简单处理) + if month_count > 0: + delta_kwargs["days"] += month_count * 30 + + # 构建 timedelta + self.timedelta = datetime.timedelta( + days=delta_kwargs["days"], + seconds=delta_kwargs["seconds"], + minutes=delta_kwargs["minutes"], + hours=delta_kwargs["hours"], + weeks=delta_kwargs["weeks"] + ) + + return remaining + + def digest_delta(self, content: str) -> str: + if "后" in content: + c1, _ = content.split("后", 1) + c1 = self.digest_timedelta(c1) + if c1 != "": + raise TokenUnhandledException(c1) + return c1 + if "前" in content: + c1, _ = content.split("前", 1) + c1 = self.digest_timedelta(c1) + self.timedelta = -self.timedelta + if c1 != "": + raise TokenUnhandledException(c1) + return c1 + return content + + def digest_date(self, content: str) -> str: + # 1. 尝试 ISO 格式: 2025-10-09T15:30 或 2025-10-09 + iso_match = re.match(r"^(\d{4})-(\d{1,2})-(\d{1,2})(?:T(\d{1,2}):(\d{1,2}))?", content) + if iso_match: + year, month, day = int(iso_match.group(1)), int(iso_match.group(2)), int(iso_match.group(3)) + try: + target_date = datetime.date(year, month, day) + self.time_spec_day_delta = (target_date - self.now.date()).days + remaining = content[len(iso_match.group(0)):] + if iso_match.group(4): # 有时间 + hour = int(iso_match.group(4)) + minute = int(iso_match.group(5)) if iso_match.group(5) else 0 + self.hour_specification = hour + self.minute_specification = minute + return remaining + except ValueError: + pass + + # 2. 尝试 YYYY年MM月DD日 + full_date_match = re.match(r"^(\d{4})年(\d{1,2})月(\d{1,2})日", content) + if full_date_match: + year, month, day = map(int, full_date_match.groups()) + try: + target_date = datetime.date(year, month, day) + self.time_spec_day_delta = (target_date - self.now.date()).days + return content[len(full_date_match.group(0)):] + except ValueError: + pass + + # 3. 尝试 MM月DD日(默认今年) + md_match = re.match(r"^(\d{1,2})月(\d{1,2})日", content) + if md_match: + month, day = map(int, md_match.groups()) + year = self.now.year + try: + target_date = datetime.date(year, month, day) + self.time_spec_day_delta = (target_date - self.now.date()).days + return content[len(md_match.group(0)):] + except ValueError: + pass + + # 4. 尝试 YYYY/MM/DD + slash_full = re.match(r"^(\d{4})/(\d{1,2})/(\d{1,2})", content) + if slash_full: + year, month, day = map(int, slash_full.groups()) + try: + target_date = datetime.date(year, month, day) + self.time_spec_day_delta = (target_date - self.now.date()).days + return content[len(slash_full.group(0)):] + except ValueError: + pass + + # 5. 尝试 MM/DD + slash_md = re.match(r"^(\d{1,2})/(\d{1,2})", content) + if slash_md: + month, day = map(int, slash_md.groups()) + year = self.now.year + try: + target_date = datetime.date(year, month, day) + self.time_spec_day_delta = (target_date - self.now.date()).days + return content[len(slash_md.group(0)):] + except ValueError: + pass + + # 6. 中文月日:十月九日 + cn_md_match = re.match(r"^([一二三四五六七八九十]+)月([一二三四五六七八九十]+)日", content) + if cn_md_match: + month_str, day_str = cn_md_match.groups() + _, month_num = self.digest_chinese_number(month_str + "日") + _, day_num = self.digest_chinese_number(day_str + "日") + if month_num is not None and day_num is not None: + year = self.now.year + try: + target_date = datetime.date(year, month_num, day_num) + self.time_spec_day_delta = (target_date - self.now.date()).days + return content[len(cn_md_match.group(0)):] + except ValueError: + pass + + return content + + def digest_time(self, content: str) -> str: + content = self.digest_single_hour(content) + return content + + def _chinese_to_int_final(self, cn_str: str) -> int: + result = 0 + num_section = 0 + + for char in cn_str: + if char in self.CN_NUM: + val = self.CN_NUM[char] + if val <= 9: + num_section = val + elif val >= 10: + if num_section == 0 and val == 10: + num_section = 1 + + result += num_section * val + num_section = 0 + + result += num_section + + return result + + def digest_chinese_number(self, content: str) -> tuple[str, int | None]: + """ + 识别字符串开头的中文数字并将其转换为整数。 + 处理范围:零到九千九百九十九万九千九百九十九亿九千九百九十九万九千九百九十九(约10^16) + """ + CN_CHARS = "".join(self.CN_NUM.keys()) + "".join(self.CN_UNIT.keys()) + m = re.match(f"^([{CN_CHARS}]+)", content) + + if m is None: + return content, None + + cn_num_str = m.group(1) + if not cn_num_str: + return content, None + + remaining_content = content[len(cn_num_str) :] + + if cn_num_str == "零": + return remaining_content, 0 + if cn_num_str == "一": + return remaining_content, 1 + if cn_num_str in self.CN_NUM and self.CN_NUM[cn_num_str] <= 9: + return remaining_content, self.CN_NUM[cn_num_str] + + pattern = re.compile(r"([^万亿兆]*)([万亿兆]?)") + + parts = pattern.findall(cn_num_str) + parts.reverse() + + current_unit = 1 + total_num = 0 + + for num_str, unit_char in parts: + if not num_str and not unit_char: + continue + + if unit_char in self.CN_UNIT: + current_unit = self.CN_UNIT[unit_char] + + if num_str: + section_num = self._chinese_to_int_final(num_str) + total_num += section_num * current_unit + + if unit_char in self.CN_UNIT: + pass + elif not unit_char: + current_unit = 1 + + return remaining_content, total_num + + def digest_number(self, content: str) -> tuple[str, int | None]: + c1, num = self.digest_chinese_number(content) + if num is not None: + return c1, num + m = re.match(r"^(\d+)(.+)$", content) + if m is not None: + return m.group(2), int(m.group(1)) + return content, None + + def digest_ampm_specific(self, content: str) -> str: + am_patterns = ["凌晨", "早上", "上午", "早晨", "早"] + pm_patterns = ["中午", "下午", "晚上", "傍晚", "晚"] + + for pat in am_patterns: + if content.startswith(pat): + self.ampm_specification = "AM" + return content[len(pat):] + + for pat in pm_patterns: + if content.startswith(pat): + self.ampm_specification = "PM" + if pat == '中午': + self.ampm_ismid = True + return content[len(pat):] + + return content + + def digest_single_hour(self, content: str) -> str: + c1, num = self.digest_number(content) + if num is None: + return content + if self.time_delta_triggered: + raise MultipleSpecificationException() + self.hour_specification = num + if c1.startswith("点"): + c1 = c1[1:] + elif c1.startswith("时"): + c1 = c1[1:] + if self.ampm_specification is None: + self.ampm_specification = "ABSOLUTE" + else: + return content + if c1.startswith('钟'): + c1 = c1[1:] + if c1.startswith('整'): + c1 = c1[1:] + self.minute_specification = 0 + elif c1.startswith('半'): + c1 = c1[1:] + self.minute_specification = 30 + if c1.startswith('钟'): + c1 = c1[1:] + return c1 + + def digest_ke(self, content: str) -> str: + for pat in ("一刻", "过一刻"): + if content.startswith(pat): + if self.minute_specification is not None: + raise MultipleSpecificationException() + self.minute_specification = 15 + return content[len(pat):] + for pat in ("两刻", "过两刻"): + if content.startswith(pat): + if self.minute_specification is not None: + raise MultipleSpecificationException() + self.minute_specification = 30 + return content[len(pat):] + for pat in ("三刻", "过三刻"): + if content.startswith(pat): + if self.minute_specification is not None: + raise MultipleSpecificationException() + self.minute_specification = 45 + return content[len(pat):] + return content + + def digest_early_late_hour(self, content: str) -> str: + if not (content.startswith("早") or content.startswith("晚")): + return content + + if self.time_delta_triggered: + raise MultipleSpecificationException() + + if self.hour_specification is not None: + raise MultipleSpecificationException() + if self.ampm_specification not in (None, "ABSOLUTE"): + raise MultipleSpecificationException() + + prefix = "早" if content.startswith("早") else "晚" + rest = content[1:] + + remaining, num = self.digest_number(rest) + if num is None: + return content + + if not (0 <= num <= 12): + return content + + if prefix == "早": + self.ampm_specification = "AM" + hour = num + if hour == 12: + hour = 0 + else: + self.ampm_specification = "PM" + if num == 12: + hour = 0 + else: + hour = num + + self.hour_specification = hour + self.minute_specification = 0 + + return remaining + + def _find_weekday(self, week_offset: int, target_weekday: int) -> datetime.datetime: + """ + 计算相对周的目标星期几。 + :param week_offset: 0=本周, 1=下周, -1=上周 + :param target_weekday: Monday=0, Sunday=6 (与 datetime.weekday() 一致) + :return: 对应的 datetime(时间部分设为 00:00:00) + """ + # 本周一的日期(假设周一为每周开始) + today = self.now.date() + days_since_monday = today.weekday() # Monday is 0 + this_monday = today - datetime.timedelta(days=days_since_monday) + + # 目标周一 + target_monday = this_monday + datetime.timedelta(weeks=week_offset) + + # 目标星期几 + target_date = target_monday + datetime.timedelta(days=target_weekday) + + # 返回 datetime,时间归零(与“明天”行为一致) + return datetime.datetime.combine(target_date, datetime.time.min) + + def digest_weekday_relative(self, content: str) -> str: + """ + 支持:本周五、下周三、上周一、这周五、下周一 等 + 返回剩余字符串。 + """ + # 星期映射(支持:星期一、周1、周五 等) + weekday_map = { + "一": 0, "1": 0, + "二": 1, "2": 1, + "三": 2, "3": 2, + "四": 3, "4": 3, + "五": 4, "5": 4, + "六": 5, "6": 5, + "日": 6, "天": 6, "7": 6, + } + + # 周偏移映射 + week_offset_map = { + "本周": 0, + "这周": 0, + "下周": 1, + "下下周": 2, # 可选扩展 + "上周": -1, + "上上周": -2, # 可选扩展 + } + + # 尝试匹配 [周标识][星期限定] + for week_key, week_offset in week_offset_map.items(): + if content.startswith(week_key): + rest = content[len(week_key):] + + if rest.startswith("星期"): + rest = rest[2:] + elif rest.startswith("周"): + rest = rest[1:] + + if rest and (c := rest[0]) in weekday_map: + target_wd = weekday_map[c] + rest = rest[1:] + else: + continue + + if self.time_delta_triggered or self.time_spec_day_delta != 0: + raise MultipleSpecificationException() + + target_dt = self._find_weekday(week_offset, target_wd) + + # 设置 day delta 相对于 now 的 00:00 + base_date = self.now.replace(hour=0, minute=0, second=0, microsecond=0).date() + delta_days = (target_dt.date() - base_date).days + self.time_spec_day_delta = delta_days + + return rest + + return content + + def build(self) -> datetime.datetime: + t = self.now + if not self.time_delta_triggered: + t = t.replace(hour=0, minute=0, second=0, microsecond=0) + if self.hour_specification is not None: + hour = self.hour_specification + if self.ampm_specification == "AM": + if hour == 12: + hour = 0 + elif self.ampm_specification == "PM": + if hour != 12: + hour += 12 + elif self.ampm_ismid: + hour = 12 + else: + hour = 0 + self.time_spec_day_delta += 1 + elif self.ampm_specification is None: + if hour < self.now.hour and hour < 13: + hour += 12 + t = t.replace(hour=hour) + if self.minute_specification is not None: + t = t.replace(minute=self.minute_specification) + + t += datetime.timedelta(days=self.time_spec_day_delta) + t += self.timedelta + return t + + +def parse(content: str) -> datetime.datetime: + return Parser().parse(content) diff --git a/ptimeparse/err.py b/ptimeparse/err.py new file mode 100644 index 0000000..f727d09 --- /dev/null +++ b/ptimeparse/err.py @@ -0,0 +1,11 @@ +class PTimeParseException(Exception): + ... + +class TokenUnhandledException(PTimeParseException): + ... + +class MultipleSpecificationException(PTimeParseException): + ... + +class OutOfRangeSpecificationException(PTimeParseException): + ... diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..82e3c69 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "ptimeparse" +version = "0.1.0" +description = "一个用于解析中文的时间表达的库" +authors = [ + {name = "passthem", email = "Passthem183@gmail.com"} +] +readme = "README.md" +requires-python = ">=3.9" +dependencies = [ + "ply (>=3.11,<4.0)" +] +license = "MIT" + +[tool.poetry] +packages = [{include = "ptimeparse" }] + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" + +[[tool.poetry.source]] +name = "gitea-private" +url = "https://gitea.service.jazzwhom.top/api/packages/Passthem/pypi" + +[dependency-groups] +dev = [ + "pytest (>=8.4.2,<9.0.0)" +] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..9a113d7 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,188 @@ +import datetime + +import pytest + +from ptimeparse import Parser +from ptimeparse.err import MultipleSpecificationException, TokenUnhandledException + + +def test_chinese_number(): + parser = Parser() + + assert parser.digest_chinese_number("零") == ('', 0) + assert parser.digest_chinese_number("零喵") == ('喵', 0) + assert parser.digest_chinese_number("一喵") == ('喵', 1) + assert parser.digest_chinese_number("十喵") == ('喵', 10) + assert parser.digest_chinese_number("三千万喵") == ('喵', 3000_0000) + assert parser.digest_chinese_number("三千三百万喵") == ('喵', 3300_0000) + assert parser.digest_chinese_number("三千零三万喵") == ('喵', 3003_0000) + assert parser.digest_chinese_number("三千零三十万喵") == ('喵', 3030_0000) + assert parser.digest_chinese_number("五千四百零三万喵") == ('喵', 5403_0000) + assert parser.digest_chinese_number("五百万喵") == ('喵', 500_0000) + assert parser.digest_chinese_number("五万五千喵") == ('喵', 5_5000) + assert parser.digest_chinese_number("五万零五百喵") == ('喵', 5_0500) + assert parser.digest_chinese_number("五亿喵") == ('喵', 5_0000_0000) + assert parser.digest_chinese_number("五百亿喵") == ('喵', 500_0000_0000) + assert parser.digest_chinese_number("五百亿零五十喵") == ('喵', 500_0000_0050) + assert parser.digest_chinese_number("五百亿五十万喵") == ('喵', 500_0050_0000) + + +def test_hour_specification_pm(): + parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) + + assert parser.parse("五点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) + assert parser.parse("5点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) + assert parser.parse("5 点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) + + assert parser.parse("六点") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + assert parser.parse("六点整") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + assert parser.parse("六点钟") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + + assert parser.parse("10 时") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("10 时整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("10点") == datetime.datetime(2025, 10, 9, 22, 0, 0, 0) + assert parser.parse("10点整") == datetime.datetime(2025, 10, 9, 22, 0, 0, 0) + + assert parser.parse("13点") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) + assert parser.parse("15点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) + assert parser.parse("13 时") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) + assert parser.parse("15 时") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) + + assert parser.parse("四点") == datetime.datetime(2025, 10, 9, 16, 0, 0, 0) + + assert parser.parse("上午十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("早晨十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("早上十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("早十") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("早八") == datetime.datetime(2025, 10, 9, 8, 0, 0, 0) + assert parser.parse("晚六") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + assert parser.parse("下午三点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) + + assert parser.parse("晚上十二点") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) + assert parser.parse("晚上八点") == datetime.datetime(2025, 10, 9, 20, 0, 0, 0) + assert parser.parse("凌晨零点") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + assert parser.parse("中午十二点") == datetime.datetime(2025, 10, 9, 12, 0, 0, 0) + +def test_hour_specification_am(): + parser = Parser(now=datetime.datetime(2025, 10, 9, 2, 34, 1, 114)) + + assert parser.parse("五点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) + assert parser.parse("5点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) + assert parser.parse("5 点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) + + assert parser.parse("六点") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) + assert parser.parse("六点整") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) + assert parser.parse("六点钟") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) + + assert parser.parse("10 时") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("10 时整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("10点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + assert parser.parse("10点整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) + + assert parser.parse("四点") == datetime.datetime(2025, 10, 9, 4, 0, 0, 0) + assert parser.parse("一点钟") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) + + +def test_hour_with_minute(): + parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) + + assert parser.parse("六点半") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) + assert parser.parse("六点半钟") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) + assert parser.parse("六点一刻") == datetime.datetime(2025, 10, 9, 18, 15, 0, 0) + assert parser.parse("六点过一刻") == datetime.datetime(2025, 10, 9, 18, 15, 0, 0) + + +def test_error(): + parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) + + with pytest.raises(TokenUnhandledException): + parser.parse("六点半整") + + with pytest.raises(MultipleSpecificationException): + parser.parse("六点半一刻") + + +def test_absolute_date(): + now = datetime.datetime(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=now) + # 完整年月日 + assert parser.parse("2025年10月9日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + assert parser.parse("2025-10-09") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + assert parser.parse("2025/10/09") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + # 仅月日(默认今年) + assert parser.parse("10月9日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + assert parser.parse("十月九日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) + # 年月日 + 时间 + assert parser.parse("2025年10月9日 15点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) + assert parser.parse("10月9日 下午3点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) + assert parser.parse("十月九日 晚上八点") == datetime.datetime(2025, 10, 9, 20, 0, 0, 0) + # ISO 格式(如果支持) + assert parser.parse("2025-10-09T15:30") == datetime.datetime(2025, 10, 9, 15, 30, 0, 0) + + +def test_relative_date(): + now = datetime.datetime(2025, 10, 9, 10, 0, 0) + parser = Parser(now=now) + assert parser.parse("明天") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) + assert parser.parse("后天") == datetime.datetime(2025, 10, 11, 0, 0, 0, 0) + assert parser.parse("昨天") == datetime.datetime(2025, 10, 8, 0, 0, 0, 0) + assert parser.parse("大前天") == datetime.datetime(2025, 10, 6, 0, 0, 0, 0) + assert parser.parse("大后天") == datetime.datetime(2025, 10, 12, 0, 0, 0, 0) + + assert parser.parse("三天后") == datetime.datetime(2025, 10, 12, 0, 0, 0, 0) + assert parser.parse("五天前") == datetime.datetime(2025, 10, 4, 0, 0, 0, 0) + + assert parser.parse("下周一") == datetime.datetime(2025, 10, 13, 0, 0, 0, 0) + assert parser.parse("上周五") == datetime.datetime(2025, 10, 3, 0, 0, 0, 0) + assert parser.parse("本周五") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) + + end_of_month = datetime.datetime(2025, 10, 31, 10, 0, 0) + parser2 = Parser(now=end_of_month) + assert parser2.parse("两天后") == datetime.datetime(2025, 11, 2, 0, 0, 0, 0) + + +def test_relative_time(): + now = datetime.datetime(2025, 10, 9, 16, 30, 0, 0) + parser = Parser(now=now) + + assert parser.parse("五分钟后") == datetime.datetime(2025, 10, 9, 16, 35, 0, 0) + assert parser.parse("十分钟前") == datetime.datetime(2025, 10, 9, 16, 20, 0, 0) + assert parser.parse("半小时后") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) + assert parser.parse("一个半小时后") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + + assert parser.parse("两小时后") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) + assert parser.parse("一小时前") == datetime.datetime(2025, 10, 9, 15, 30, 0, 0) + + late_night = datetime.datetime(2025, 10, 9, 23, 50, 0, 0) + parser3 = Parser(now=late_night) + assert parser3.parse("二十分钟后") == datetime.datetime(2025, 10, 10, 0, 10, 0, 0) + + assert parser.parse("5分钟后") == datetime.datetime(2025, 10, 9, 16, 35, 0, 0) + assert parser.parse("三十分钟前") == datetime.datetime(2025, 10, 9, 16, 0, 0, 0) + + + +def test_robustness_edge_cases(): + parser = Parser(now=datetime.datetime(2025, 2, 28, 10, 0, 0)) + + assert parser.parse("明天") == datetime.datetime(2025, 3, 1, 0, 0, 0, 0) + + parser_leap = Parser(now=datetime.datetime(2024, 2, 28, 10, 0, 0)) + assert parser_leap.parse("两天后") == datetime.datetime(2024, 3, 1, 0, 0, 0, 0) + + with pytest.raises(TokenUnhandledException): + parser.parse("随便乱写") + parser.parse(" 明天 ") + +def test_mixed_expressions(): + now = datetime.datetime(2025, 10, 9, 14, 0, 0) + parser = Parser(now=now) + # 如“明天下午三点” + assert parser.parse("明天下午三点") == datetime.datetime(2025, 10, 10, 15, 0, 0, 0) + assert parser.parse("后天早上八点") == datetime.datetime(2025, 10, 11, 8, 0, 0, 0) + assert parser.parse("大后天晚上十点") == datetime.datetime(2025, 10, 12, 22, 0, 0, 0) + # “下周三上午” + # 2025-10-09 是周四,下周三是 2025-10-15 + assert parser.parse("下周三") == datetime.datetime(2025, 10, 15, 0, 0, 0, 0) + assert parser.parse("下周三早八") == datetime.datetime(2025, 10, 15, 8, 0, 0, 0) # 默认0点?或上午9点?需根据实现 + # 若实现中“上午”不指定小时则设为9点,可调整;这里假设设为0点以简化