From ec540eed55ca0e4e88c200c8167c21fd6e4d5fec Mon Sep 17 00:00:00 2001 From: passthem Date: Thu, 23 Oct 2025 22:09:31 +0800 Subject: [PATCH] =?UTF-8?q?=E8=80=B62?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ptimeparse/__init__.py | 1280 +++++++++++++++++++--------------------- pyproject.toml | 15 +- tests/__init__.py | 505 ++++++++++------ 3 files changed, 951 insertions(+), 849 deletions(-) diff --git a/ptimeparse/__init__.py b/ptimeparse/__init__.py index 31091e9..31a052c 100644 --- a/ptimeparse/__init__.py +++ b/ptimeparse/__init__.py @@ -1,706 +1,654 @@ -import datetime import re -from dataclasses import dataclass -from typing import Literal +import datetime +from typing import Tuple, Optional, Dict, Any -from ptimeparse.err import (MultipleSpecificationException, - TokenUnhandledException) +from .err import MultipleSpecificationException, TokenUnhandledException -@dataclass class Parser: - now: datetime.datetime + def __init__(self, now: Optional[datetime.datetime] = None): + self.now = now or datetime.datetime.now() - timedelta: datetime.timedelta + def digest_chinese_number(self, text: str) -> Tuple[str, int]: + if not text: + return text, 0 + # Handle "两" at start + if text.startswith("两"): + next_char = text[1] if len(text) > 1 else '' + if not next_char or next_char in "十百千万亿": + return text[1:], 2 + s = "零一二三四五六七八九" + digits = {c: i for i, c in enumerate(s)} + i = 0 + while i < len(text) and text[i] in s + "十百千万亿": + i += 1 + if i == 0: + return text, 0 + num_str = text[:i] + rest = text[i:] - hour_delta_triggered: bool = False - minute_delta_triggered: bool = False - second_delta_triggered: bool = False + def parse(s): + if not s: + return 0 + if s == "零": + return 0 + if "亿" in s: + a, b = s.split("亿", 1) + return parse(a) * 100000000 + parse(b) + if "万" in s: + a, b = s.split("万", 1) + return parse(a) * 10000 + parse(b) + n = 0 + t = 0 + for c in s: + if c == "零": + continue + if c in digits: + t = digits[c] + elif c == "十": + if t == 0: + t = 1 + n += t * 10 + t = 0 + elif c == "百": + if t == 0: + t = 1 + n += t * 100 + t = 0 + elif c == "千": + if t == 0: + t = 1 + n += t * 1000 + t = 0 + n += t + return n - ampm_specification: Literal["AM", "PM", None, "ABSOLUTE"] = None - ampm_ismid: bool = False - hour_specification: int | None = None - minute_specification: int | None = None - time_spec_day_delta: int = 0 + return rest, parse(num_str) - @property - def time_delta_triggered(self): - return self.hour_delta_triggered or self.minute_delta_triggered or self.second_delta_triggered + def parse(self, text: str) -> datetime.datetime: + text = text.strip() + if not text: + raise TokenUnhandledException("Empty input") - def __init__(self, now: datetime.datetime | None = None): - self.now = datetime.datetime.now() if now is None else now - - self.CN_NUM = { - "零": 0, - "一": 1, - "二": 2, - "两": 2, - "三": 3, - "四": 4, - "五": 5, - "六": 6, - "七": 7, - "八": 8, - "九": 9, - "十": 10, - "百": 100, - "千": 1000, + ctx = { + "date": None, + "time": None, + "relative_delta": None, + "am_pm": None, + "period_word": None, + "has_time": False, + "has_date": False, + "ambiguous_hour": False, + "is_24hour": False, + "has_relative_date": False, } - self.CN_UNIT = {"万": 1_0000, "亿": 1_0000_0000, "兆": 1_0000_0000_0000} - def clear_state(self): - self.timedelta = datetime.timedelta() - self.hour_delta_triggered = False - self.minute_delta_triggered = False - self.second_delta_triggered = False + rest = self._parse_all(text, ctx) + if rest.strip(): + raise TokenUnhandledException(f"Unparsed tokens: {rest.strip()}") - self.ampm_specification = None - self.ampm_ismid = False - self.hour_specification = None - self.minute_specification = None - self.time_spec_day_delta = 0 + return self._apply_context(ctx) - def clean(self, content: str) -> str: - return re.sub(r"([ \t的]|的时候|之时)", "", content) + def _parse_all(self, text: str, ctx: Dict[str, Any]) -> str: + rest = text.lstrip() + while True: + prev = rest + for parser in [ + self._parse_absolute_date, + self._parse_relative_date, + self._parse_relative_time, + self._parse_period, + self._parse_time, + ]: + new_rest = parser(rest, ctx) + if new_rest != rest: + rest = new_rest.lstrip() + break + else: + break + return rest - def parse(self, content: str) -> datetime.datetime: - self.clear_state() - content = self.clean(content) + def _add_delta(self, ctx, delta): + if ctx["relative_delta"] is None: + ctx["relative_delta"] = delta + else: + ctx["relative_delta"] += delta - content = self.digest_relative_date(content) - content = self.digest_weekday_relative(content) - content = self.digest_delta(content) + def _parse_absolute_date(self, text: str, ctx: Dict[str, Any]) -> str: + text = text.lstrip() + m = re.match(r"^(\d{4})-(\d{1,2})-(\d{1,2})T(\d{1,2}):(\d{2})", text) + if m: + y, mth, d, h, minute = map(int, m.groups()) + ctx["date"] = datetime.date(y, mth, d) + ctx["time"] = datetime.time(h, minute) + ctx["has_date"] = True + ctx["has_time"] = True + ctx["is_24hour"] = True + return text[m.end():] + m = re.match(r"^(\d{4})-(\d{1,2})-(\d{1,2})", text) + if m: + y, mth, d = map(int, m.groups()) + ctx["date"] = datetime.date(y, mth, d) + ctx["has_date"] = True + return text[m.end():] + m = re.match(r"^(\d{4})/(\d{1,2})/(\d{1,2})", text) + if m: + y, mth, d = map(int, m.groups()) + ctx["date"] = datetime.date(y, mth, d) + ctx["has_date"] = True + return text[m.end():] + m = re.match(r"^(\d{4})年(\d{1,2})月(\d{1,2})[日号]", text) + if m: + y, mth, d = map(int, m.groups()) + ctx["date"] = datetime.date(y, mth, d) + ctx["has_date"] = True + return text[m.end():] + m = re.match(r"^(\d{1,2})月(\d{1,2})[日号]", text) + if m: + mth, d = map(int, m.groups()) + ctx["date"] = datetime.date(self.now.year, mth, d) + ctx["has_date"] = True + return text[m.end():] + m = re.match(r"^(.{1,3})月(.{1,3})[日号]", text) + if m: + m_str, d_str = m.groups() + _, mth = self.digest_chinese_number(m_str) + _, d = self.digest_chinese_number(d_str) + if mth == 0: + mth = 1 + if d == 0: + d = 1 + ctx["date"] = datetime.date(self.now.year, mth, d) + ctx["has_date"] = True + return text[m.end():] + return text - content = self.digest_date(content) + def _parse_relative_date(self, text: str, ctx: Dict[str, Any]) -> str: + text = text.lstrip() + + # Handle "今天", "今晚", "今早", etc. + today_variants = [ + ("今晚上", "PM"), + ("今晚", "PM"), + ("今早", "AM"), + ("今天早上", "AM"), + ("今天早晨", "AM"), + ("今天上午", "AM"), + ("今天下午", "PM"), + ("今天晚上", "PM"), + ("今天", None), + ] + for variant, period in today_variants: + if text.startswith(variant): + self._add_delta(ctx, datetime.timedelta(days=0)) + ctx["has_relative_date"] = True + rest = text[len(variant):] + if period is not None and ctx["am_pm"] is None: + ctx["am_pm"] = period + ctx["period_word"] = variant + return rest - content = self.digest_colon_time(content) - content = self.digest_early_late_hour(content) - content = self.digest_ampm_specific(content) - content = self.digest_time(content) - content = self.digest_minute_only(content) - content = self.digest_ke(content) - if len(content) != 0: - raise TokenUnhandledException(content) - return self.build() - - def digest_relative_date(self, content: str) -> str: - """ - 处理明天、昨天、今天、后天、大后天、前天、大前天 等相对日期。 - 返回剩余未处理字符串。 - """ - # 注意:这些词必须完整匹配开头,避免误匹配(如“明天”不能匹配“明天早上”中的“明”) - relative_days = { - "今天": 0, + mapping = { "明天": 1, "后天": 2, "大后天": 3, - "昨日": -1, "昨天": -1, "前天": -2, "大前天": -3, } - - for word, delta_days in relative_days.items(): - if content.startswith(word): - # 如果已经设置了时间偏移(如 3 小时后),则冲突 - if self.time_delta_triggered: - raise MultipleSpecificationException() - # 如果已经通过其他方式设置了 day delta(如 digest_date 中),这里也应检查 - # 为简化,我们直接设置 - self.time_spec_day_delta = delta_days - return content[len(word):] - - return content - - def digest_colon_time(self, content: str) -> str: - """处理 HH:MM 或 H:MM 格式的时间""" - m = re.match(r"^(\d{1,2}):(\d{1,2})(.*)$", content) + for word, days in mapping.items(): + if text.startswith(word): + self._add_delta(ctx, datetime.timedelta(days=days)) + ctx["has_relative_date"] = True + return text[len(word):] + m = re.match(r"^(\d+|[零一二三四五六七八九十两]+)天(后|前|以后|之后)", text) if m: - try: - hour = int(m.group(1)) - minute = int(m.group(2)) - remaining = m.group(3) - - if not (0 <= hour <= 23) or not (0 <= minute <= 59): - return content - - if self.time_delta_triggered: - raise MultipleSpecificationException() - - self.hour_specification = hour - self.minute_specification = minute - - # 对于12小时制时间(hour <= 12),需要智能判断 - if hour <= 12 and self.time_spec_day_delta == 0: - if self.ampm_specification is None and hour < self.now.hour: - # 如果是今天且小时数小于当前小时,推测为PM - self.ampm_specification = "PM" - - return remaining - except ValueError: - pass - return content - - def digest_timedelta(self, content: str) -> str: - """ - 解析形如 "3天", "2小时", "1星期", "5个月" 等时间增量。 - 支持中文数字和阿拉伯数字。 - 返回未处理的剩余字符串。 - """ - - if content.startswith("半"): - # "半"通常指"半小时" - remaining = content[1:] - # 检查是否有"小时"、"时"等 - if remaining.startswith(("小时", "时")): - if self.hour_delta_triggered: - raise MultipleSpecificationException() - self.hour_delta_triggered = True - self.timedelta = datetime.timedelta(minutes=30) - return remaining[len("小时") if remaining.startswith("小时") else len("时"):] - elif remaining.startswith(("分钟", "分")): - if self.minute_delta_triggered: - raise MultipleSpecificationException() - self.minute_delta_triggered = True - self.timedelta = datetime.timedelta(minutes=30) - return remaining[len("分钟") if remaining.startswith("分钟") else len("分"):] + num_str, direction = m.groups() + if num_str.isdigit(): + n = int(num_str) else: - # 默认为半小时 - if self.hour_delta_triggered: - raise MultipleSpecificationException() - self.hour_delta_triggered = True - self.timedelta = datetime.timedelta(minutes=30) - return remaining + _, n = self.digest_chinese_number(num_str) + days = n if direction in ("后", "以后", "之后") else -n + self._add_delta(ctx, datetime.timedelta(days=days)) + ctx["has_relative_date"] = True + return text[m.end():] + m = re.match(r"^(本|上|下)周([一二三四五六日])", text) + if m: + scope, day = m.groups() + weekday_map = {"一": 0, "二": 1, "三": 2, "四": 3, "五": 4, "六": 5, "日": 6} + target = weekday_map[day] + current = self.now.weekday() + if scope == "本": + delta = target - current + elif scope == "上": + delta = target - current - 7 + else: + delta = target - current + 7 + self._add_delta(ctx, datetime.timedelta(days=delta)) + ctx["has_relative_date"] = True + return text[m.end():] + return text - # 定义时间单位映射(注意:月需特殊处理) - unit_patterns = [ - (r"(秒钟|秒)", "second"), - (r"(分钟|分)", "minute"), - (r"(时|小时|点)", "hour"), - (r"半(时|小时|点)", "hour+30"), - (r"(天|日)", "day"), - (r"(星期|周)", "week"), - (r"(月)", "month"), # 特殊:按30天处理 - ] - - remaining = content - delta_kwargs = { - "days": 0, - "seconds": 0, - "minutes": 0, - "hours": 0, - "weeks": 0, + def _parse_period(self, text: str, ctx: Dict[str, Any]) -> str: + text = text.lstrip() + period_mapping = { + "上午": "AM", + "早晨": "AM", + "早上": "AM", + "早": "AM", + "中午": "PM", + "下午": "PM", + "晚上": "PM", + "晚": "PM", + "凌晨": "AM", } - month_count = 0 # 单独记录月,最后转为天 + for word, tag in period_mapping.items(): + if text.startswith(word): + if ctx["am_pm"] is not None: + raise MultipleSpecificationException("Multiple periods") + ctx["am_pm"] = tag + ctx["period_word"] = word + return text[len(word):] + return text - while True: - matched = False - for pattern, unit_type in unit_patterns: - m = re.match(rf"^([零一二两三四五六七八九十百千万亿兆]*|\d+)?个?({pattern})", remaining) + def _parse_time(self, text: str, ctx: Dict[str, Any]) -> str: + if ctx["has_time"]: + return text + text = text.lstrip() + + # 1. H:MM pattern + m = re.match(r"^(\d{1,2}):(\d{2})", text) + if m: + h, minute = int(m.group(1)), int(m.group(2)) + if 0 <= h <= 23 and 0 <= minute <= 59: + ctx["time"] = datetime.time(h, minute) + ctx["has_time"] = True + ctx["ambiguous_hour"] = 1 <= h <= 12 + ctx["is_24hour"] = h > 12 or h == 0 + return text[m.end():] + + # 2. Parse hour part + hour = None + rest_after_hour = text + is_24hour_format = False + + # Try Chinese number + 点/时 + temp_rest, num = self.digest_chinese_number(text) + if num >= 0: + temp_rest_stripped = temp_rest.lstrip() + if temp_rest_stripped.startswith("点"): + hour = num + is_24hour_format = False + rest_after_hour = temp_rest_stripped[1:] + elif temp_rest_stripped.startswith("时"): + hour = num + is_24hour_format = True + rest_after_hour = temp_rest_stripped[1:] + + if hour is None: + m = re.match(r"^(\d{1,2})\s*([点时])", text) + if m: + hour = int(m.group(1)) + is_24hour_format = m.group(2) == "时" + rest_after_hour = text[m.end():] + + if hour is None: + if ctx.get("am_pm") is not None: + temp_rest, num = self.digest_chinese_number(text) + if 0 <= num <= 23: + hour = num + is_24hour_format = False + rest_after_hour = temp_rest.lstrip() + else: + m = re.match(r"^(\d{1,2})", text) + if m: + h_val = int(m.group(1)) + if 0 <= h_val <= 23: + hour = h_val + is_24hour_format = False + rest_after_hour = text[m.end():].lstrip() + + if hour is None: + return text + + if not (0 <= hour <= 23): + return text + + # Parse minutes + rest = rest_after_hour.lstrip() + minute = 0 + minute_spec_count = 0 + + if rest.startswith("钟"): + rest = rest[1:].lstrip() + + has_zheng = False + if rest.startswith("整"): + has_zheng = True + rest = rest[1:].lstrip() + + if rest.startswith("半"): + minute = 30 + minute_spec_count += 1 + rest = rest[1:].lstrip() + if rest.startswith("钟"): + rest = rest[1:].lstrip() + if rest.startswith("整"): + rest = rest[1:].lstrip() + + if rest.startswith("一刻"): + minute = 15 + minute_spec_count += 1 + rest = rest[2:].lstrip() + if rest.startswith("钟"): + rest = rest[1:].lstrip() + + if rest.startswith("过一刻"): + minute = 15 + minute_spec_count += 1 + rest = rest[3:].lstrip() + if rest.startswith("钟"): + rest = rest[1:].lstrip() + + m = re.match(r"^(\d+|[零一二三四五六七八九十]+)分", rest) + if m: + minute_spec_count += 1 + m_str = m.group(1) + if m_str.isdigit(): + minute = int(m_str) + else: + _, minute = self.digest_chinese_number(m_str) + rest = rest[m.end():].lstrip() + + if minute_spec_count == 0: + temp_rest, num = self.digest_chinese_number(rest) + if num > 0 and num <= 59: + minute = num + minute_spec_count += 1 + rest = temp_rest.lstrip() + else: + m = re.match(r"^(\d{1,2})", rest) if m: - num_str = m.group(1) - if num_str is None or num_str == "": - num = 1 # 默认为1,如“明天”实际是“1天后” + m_val = int(m.group(1)) + if 0 <= m_val <= 59: + minute = m_val + minute_spec_count += 1 + rest = rest[m.end():].lstrip() + + if has_zheng and minute_spec_count == 0: + minute_spec_count = 1 + + if minute_spec_count > 1: + raise MultipleSpecificationException("Multiple minute specifications") + + if not (0 <= minute <= 59): + return text + + # Hours 13-23 are always 24-hour, even with "点" + if hour >= 13: + is_24hour_format = True + + ctx["time"] = datetime.time(hour, minute) + ctx["has_time"] = True + ctx["ambiguous_hour"] = 1 <= hour <= 12 and not is_24hour_format + ctx["is_24hour"] = is_24hour_format + + return rest + + def _parse_relative_time(self, text: str, ctx: Dict[str, Any]) -> str: + text = text.lstrip() + + # 半小时 + m = re.match(r"^(半)(?:个)?小时?(后|前|以后|之后)", text) + if m: + direction = m.group(2) + hours = 0.5 + delta = datetime.timedelta( + hours=hours if direction in ("后", "以后", "之后") else -hours + ) + self._add_delta(ctx, delta) + return text[m.end():] + + # X个半 + m = re.match(r"^([0-9零一二三四五六七八九十两]+)个半(?:小时?)?(后|前|以后|之后)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + base_hours = int(num_str) + else: + _, base_hours = self.digest_chinese_number(num_str) + if base_hours == 0 and num_str != "零": + return text + if base_hours <= 0: + return text + hours = base_hours + 0.5 + delta = datetime.timedelta( + hours=hours if direction in ("后", "以后", "之后") else -hours + ) + self._add_delta(ctx, delta) + return text[m.end():] + + # 一个半 + m = re.match(r"^(一个半)小时?(后|前|以后|之后)", text) + if m: + direction = m.group(2) + hours = 1.5 + delta = datetime.timedelta( + hours=hours if direction in ("后", "以后", "之后") else -hours + ) + self._add_delta(ctx, delta) + return text[m.end():] + + # X小时 + m = re.match(r"^([0-9零一二三四五六七八九十两]+)(?:个)?小时?(后|前|以后|之后)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + hours = int(num_str) + else: + _, hours = self.digest_chinese_number(num_str) + if hours == 0 and num_str != "零": + return text + if hours <= 0: + return text + delta = datetime.timedelta( + hours=hours if direction in ("后", "以后", "之后") else -hours + ) + self._add_delta(ctx, delta) + return text[m.end():] + + m = re.match(r"^([0-9零一二三四五六七八九十两]+)(?:个)?小时(后|前)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + hours = int(num_str) + else: + _, hours = self.digest_chinese_number(num_str) + if hours == 0 and num_str != "零": + return text + if hours <= 0: + return text + delta = datetime.timedelta( + hours=hours if direction == "后" else -hours + ) + self._add_delta(ctx, delta) + return text[m.end():] + + # X分钟 + m = re.match(r"^([0-9零一二三四五六七八九十两]+)分钟?(后|前|以后|之后)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + minutes = int(num_str) + else: + _, minutes = self.digest_chinese_number(num_str) + if minutes == 0 and num_str != "零": + return text + if minutes <= 0: + return text + delta = datetime.timedelta( + minutes=minutes if direction in ("后", "以后", "之后") else -minutes + ) + self._add_delta(ctx, delta) + return text[m.end():] + + m = re.match(r"^([0-9零一二三四五六七八九十两]+)分(后|前|以后|之后)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + minutes = int(num_str) + else: + _, minutes = self.digest_chinese_number(num_str) + if minutes == 0 and num_str != "零": + return text + if minutes <= 0: + return text + delta = datetime.timedelta( + minutes=minutes if direction in ("后", "以后", "之后") else -minutes + ) + self._add_delta(ctx, delta) + return text[m.end():] + + m = re.match(r"^([0-9零一二三四五六七八九十两]+)分钟?(后|前)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + minutes = int(num_str) + else: + _, minutes = self.digest_chinese_number(num_str) + if minutes == 0 and num_str != "零": + return text + if minutes <= 0: + return text + delta = datetime.timedelta( + minutes=minutes if direction == "后" else -minutes + ) + self._add_delta(ctx, delta) + return text[m.end():] + + m = re.match(r"^([0-9零一二三四五六七八九十两]+)分(后|前)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + minutes = int(num_str) + else: + _, minutes = self.digest_chinese_number(num_str) + if minutes == 0 and num_str != "零": + return text + if minutes <= 0: + return text + delta = datetime.timedelta( + minutes=minutes if direction == "后" else -minutes + ) + self._add_delta(ctx, delta) + return text[m.end():] + + # === 秒级支持 === + m = re.match(r"^([0-9零一二三四五六七八九十两]+)秒(后|前|以后|之后)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + seconds = int(num_str) + else: + _, seconds = self.digest_chinese_number(num_str) + if seconds == 0 and num_str != "零": + return text + if seconds <= 0: + return text + delta = datetime.timedelta( + seconds=seconds if direction in ("后", "以后", "之后") else -seconds + ) + self._add_delta(ctx, delta) + return text[m.end():] + + m = re.match(r"^([0-9零一二三四五六七八九十两]+)秒(后|前)", text) + if m: + num_str, direction = m.groups() + if num_str.isdigit(): + seconds = int(num_str) + else: + _, seconds = self.digest_chinese_number(num_str) + if seconds == 0 and num_str != "零": + return text + if seconds <= 0: + return text + delta = datetime.timedelta( + seconds=seconds if direction == "后" else -seconds + ) + self._add_delta(ctx, delta) + return text[m.end():] + + return text + + def _apply_context(self, ctx: Dict[str, Any]) -> datetime.datetime: + result = self.now + has_date = ctx["has_date"] + has_time = ctx["has_time"] + has_delta = ctx["relative_delta"] is not None + has_relative_date = ctx["has_relative_date"] + + if has_delta: + result = result + ctx["relative_delta"] + + if has_date: + result = result.replace( + year=ctx["date"].year, + month=ctx["date"].month, + day=ctx["date"].day, + ) + + if has_time: + h = ctx["time"].hour + m = ctx["time"].minute + + if ctx["is_24hour"]: + # "10 时" → 10:00, no conversion + pass + + elif ctx["am_pm"] == "AM": + if h == 12: + h = 0 + + elif ctx["am_pm"] == "PM": + if h == 12: + if ctx.get("period_word") in ("晚上", "晚"): + h = 0 + result += datetime.timedelta(days=1) else: - # 尝试解析数字(中文或阿拉伯) - _, num = self.digest_chinese_number(num_str) - if num is None: - try: - num = int(num_str) - except ValueError: - continue # 无效数字,跳过 + h = 12 + elif 1 <= h <= 11: + h += 12 - # 设置标志位,防止后续时间规格冲突 - if unit_type == "hour": - if self.hour_delta_triggered: - raise MultipleSpecificationException() - self.hour_delta_triggered = True - elif unit_type == "hour+30": - if self.hour_delta_triggered: - raise MultipleSpecificationException() - self.hour_delta_triggered = True - if self.minute_delta_triggered: - raise MultipleSpecificationException() - self.minute_delta_triggered = True - elif unit_type == "minute": - if self.minute_delta_triggered: - raise MultipleSpecificationException() - self.minute_delta_triggered = True - elif unit_type == "second": - if self.second_delta_triggered: - raise MultipleSpecificationException() - self.second_delta_triggered = True + else: + # No period and not 24-hour (i.e., "点" format) + if ctx["has_relative_date"]: + # "明天五点" → 05:00 AM + if h == 12: + h = 0 + # keep h as AM hour (1-11 unchanged) + else: + # Infer from current time + am_hour = 0 if h == 12 else h + candidate_am = result.replace(hour=am_hour, minute=m, second=0, microsecond=0) + if candidate_am < self.now: + # AM time is in the past, so use PM + if h == 12: + h = 12 + else: + h += 12 + # else: keep as AM (h unchanged) - # 累加到对应单位 - if unit_type == "second": - delta_kwargs["seconds"] += num - elif unit_type == "minute": - delta_kwargs["minutes"] += num - elif unit_type == "hour": - delta_kwargs["hours"] += num - elif unit_type == "day": - delta_kwargs["days"] += num - elif unit_type == "week": - delta_kwargs["weeks"] += num - elif unit_type == "month": - month_count += num - elif unit_type == "hour+30": - delta_kwargs["hours"] += num - delta_kwargs["minutes"] += 30 + if h > 23: + h = h % 24 - # 更新剩余字符串 - remaining = remaining[len(m.group(0)):] - matched = True - break + result = result.replace(hour=h, minute=m, second=0, microsecond=0) - if not matched: - break - - # 处理“月” → 按30天/月估算(简单处理) - if month_count > 0: - delta_kwargs["days"] += month_count * 30 - - # 构建 timedelta - self.timedelta = datetime.timedelta( - days=delta_kwargs["days"], - seconds=delta_kwargs["seconds"], - minutes=delta_kwargs["minutes"], - hours=delta_kwargs["hours"], - weeks=delta_kwargs["weeks"] - ) - - return remaining - - def digest_delta(self, content: str) -> str: - if "后" in content: - c1, _ = content.split("后", 1) - c1 = self.digest_timedelta(c1) - if c1 != "": - raise TokenUnhandledException(c1) - return c1 - if "前" in content: - c1, _ = content.split("前", 1) - c1 = self.digest_timedelta(c1) - self.timedelta = -self.timedelta - if c1 != "": - raise TokenUnhandledException(c1) - return c1 - return content - - def digest_date(self, content: str) -> str: - # 1. 尝试 ISO 格式: 2025-10-09T15:30 或 2025-10-09 - iso_match = re.match(r"^(\d{4})-(\d{1,2})-(\d{1,2})(?:T(\d{1,2}):(\d{1,2}))?", content) - if iso_match: - year, month, day = int(iso_match.group(1)), int(iso_match.group(2)), int(iso_match.group(3)) - try: - target_date = datetime.date(year, month, day) - self.time_spec_day_delta = (target_date - self.now.date()).days - remaining = content[len(iso_match.group(0)):] - if iso_match.group(4): # 有时间 - hour = int(iso_match.group(4)) - minute = int(iso_match.group(5)) if iso_match.group(5) else 0 - self.hour_specification = hour - self.minute_specification = minute - return remaining - except ValueError: - pass - - # 2. 尝试 YYYY年MM月DD日 - full_date_match = re.match(r"^(\d{4})年(\d{1,2})月(\d{1,2})日", content) - if full_date_match: - year, month, day = map(int, full_date_match.groups()) - try: - target_date = datetime.date(year, month, day) - self.time_spec_day_delta = (target_date - self.now.date()).days - return content[len(full_date_match.group(0)):] - except ValueError: - pass - - # 3. 尝试 MM月DD日(默认今年) - md_match = re.match(r"^(\d{1,2})月(\d{1,2})日", content) - if md_match: - month, day = map(int, md_match.groups()) - year = self.now.year - try: - target_date = datetime.date(year, month, day) - self.time_spec_day_delta = (target_date - self.now.date()).days - return content[len(md_match.group(0)):] - except ValueError: - pass - - # 4. 尝试 YYYY/MM/DD - slash_full = re.match(r"^(\d{4})/(\d{1,2})/(\d{1,2})", content) - if slash_full: - year, month, day = map(int, slash_full.groups()) - try: - target_date = datetime.date(year, month, day) - self.time_spec_day_delta = (target_date - self.now.date()).days - return content[len(slash_full.group(0)):] - except ValueError: - pass - - # 5. 尝试 MM/DD - slash_md = re.match(r"^(\d{1,2})/(\d{1,2})", content) - if slash_md: - month, day = map(int, slash_md.groups()) - year = self.now.year - try: - target_date = datetime.date(year, month, day) - self.time_spec_day_delta = (target_date - self.now.date()).days - return content[len(slash_md.group(0)):] - except ValueError: - pass - - # 6. 中文月日:十月九日 - cn_md_match = re.match(r"^([一二三四五六七八九十]+)月([一二三四五六七八九十]+)日", content) - if cn_md_match: - month_str, day_str = cn_md_match.groups() - _, month_num = self.digest_chinese_number(month_str + "日") - _, day_num = self.digest_chinese_number(day_str + "日") - if month_num is not None and day_num is not None: - year = self.now.year - try: - target_date = datetime.date(year, month_num, day_num) - self.time_spec_day_delta = (target_date - self.now.date()).days - return content[len(cn_md_match.group(0)):] - except ValueError: - pass - - return content - - def digest_time(self, content: str) -> str: - content = self.digest_single_hour(content) - return content - - def _chinese_to_int_final(self, cn_str: str) -> int: - result = 0 - num_section = 0 - - for char in cn_str: - if char in self.CN_NUM: - val = self.CN_NUM[char] - if val <= 9: - num_section = val - elif val >= 10: - if num_section == 0 and val == 10: - num_section = 1 - - result += num_section * val - num_section = 0 - - result += num_section + else: + if has_date or (has_relative_date and not has_time): + result = result.replace(hour=0, minute=0, second=0, microsecond=0) return result - def digest_chinese_number(self, content: str) -> tuple[str, int | None]: - """ - 识别字符串开头的中文数字并将其转换为整数。 - 处理范围:零到九千九百九十九万九千九百九十九亿九千九百九十九万九千九百九十九(约10^16) - """ - CN_CHARS = "".join(self.CN_NUM.keys()) + "".join(self.CN_UNIT.keys()) - m = re.match(f"^([{CN_CHARS}]+)", content) - if m is None: - return content, None - - cn_num_str = m.group(1) - if not cn_num_str: - return content, None - - remaining_content = content[len(cn_num_str) :] - - if cn_num_str == "零": - return remaining_content, 0 - if cn_num_str == "一": - return remaining_content, 1 - if cn_num_str in self.CN_NUM and self.CN_NUM[cn_num_str] <= 9: - return remaining_content, self.CN_NUM[cn_num_str] - - pattern = re.compile(r"([^万亿兆]*)([万亿兆]?)") - - parts = pattern.findall(cn_num_str) - parts.reverse() - - current_unit = 1 - total_num = 0 - - for num_str, unit_char in parts: - if not num_str and not unit_char: - continue - - if unit_char in self.CN_UNIT: - current_unit = self.CN_UNIT[unit_char] - - if num_str: - section_num = self._chinese_to_int_final(num_str) - total_num += section_num * current_unit - - if unit_char in self.CN_UNIT: - pass - elif not unit_char: - current_unit = 1 - - return remaining_content, total_num - - def digest_number(self, content: str) -> tuple[str, int | None]: - c1, num = self.digest_chinese_number(content) - if num is not None: - return c1, num - m = re.match(r"^(\d+)(.+)$", content) - if m is not None: - return m.group(2), int(m.group(1)) - return content, None - - def digest_ampm_specific(self, content: str) -> str: - am_patterns = ["凌晨", "早上", "上午", "早晨", "早"] - pm_patterns = ["中午", "下午", "晚上", "傍晚", "晚"] - - for pat in am_patterns: - if content.startswith(pat): - self.ampm_specification = "AM" - return content[len(pat):] - - for pat in pm_patterns: - if content.startswith(pat): - self.ampm_specification = "PM" - if pat == '中午': - self.ampm_ismid = True - return content[len(pat):] - - return content - - def digest_single_hour(self, content: str) -> str: - c1, num = self.digest_number(content) - if num is None: - return content - if self.time_delta_triggered: - raise MultipleSpecificationException() - self.hour_specification = num - if c1.startswith("点"): - c1 = c1[1:] - elif c1.startswith("时"): - c1 = c1[1:] - if self.ampm_specification is None: - self.ampm_specification = "ABSOLUTE" - else: - return content - if c1.startswith('钟'): - c1 = c1[1:] - if c1.startswith('整'): - c1 = c1[1:] - self.minute_specification = 0 - elif c1.startswith('半'): - c1 = c1[1:] - self.minute_specification = 30 - if c1.startswith('钟'): - c1 = c1[1:] - return c1 - - def digest_ke(self, content: str) -> str: - for pat in ("一刻", "过一刻"): - if content.startswith(pat): - if self.minute_specification is not None: - raise MultipleSpecificationException() - self.minute_specification = 15 - return content[len(pat):] - for pat in ("两刻", "过两刻"): - if content.startswith(pat): - if self.minute_specification is not None: - raise MultipleSpecificationException() - self.minute_specification = 30 - return content[len(pat):] - for pat in ("三刻", "过三刻"): - if content.startswith(pat): - if self.minute_specification is not None: - raise MultipleSpecificationException() - self.minute_specification = 45 - return content[len(pat):] - if content.startswith("钟"): - content = content[1:] - return content - - def digest_minute_only(self, content: str) -> str: - """ - 处理单独的分钟表达,如"40分"、"十五分"等 - 这种情况下,使用当前小时,只替换分钟部分 - """ - if self.time_delta_triggered: - # 如果已经有时间增量,不能同时指定具体分钟 - return content - - # 检查是否以分钟数字开头 - c1, minute_num = self.digest_number(content) - if minute_num is None or not (0 <= minute_num <= 59): - return content - - # 检查是否有"分"或"分钟"后缀 - if c1.startswith("分钟"): - c1 = c1[2:] - elif c1.startswith("分"): - c1 = c1[1:] - else: - # 没有分钟单位,可能不是分钟表达 - return content - - # 如果已经指定了小时,就不应该单独指定分钟(会让逻辑混乱) - # if self.hour_specification is not None: - # return content - - # 单独的分钟表达:使用当前小时,只设置分钟 - self.minute_specification = minute_num - # 不设置 hour_specification,保持为 None - - return c1 - - def digest_early_late_hour(self, content: str) -> str: - if not (content.startswith("早") or content.startswith("晚")): - return content - - if self.time_delta_triggered: - raise MultipleSpecificationException() - - if self.hour_specification is not None: - raise MultipleSpecificationException() - if self.ampm_specification not in (None, "ABSOLUTE"): - raise MultipleSpecificationException() - - prefix = "早" if content.startswith("早") else "晚" - rest = content[1:] - - remaining, num = self.digest_number(rest) - if num is None: - return content - - if not (0 <= num <= 12): - return content - - if prefix == "早": - self.ampm_specification = "AM" - hour = num - if hour == 12: - hour = 0 - else: - self.ampm_specification = "PM" - if num == 12: - hour = 0 - else: - hour = num - - self.hour_specification = hour - self.minute_specification = 0 - - return remaining - - def _find_weekday(self, week_offset: int, target_weekday: int) -> datetime.datetime: - """ - 计算相对周的目标星期几。 - :param week_offset: 0=本周, 1=下周, -1=上周 - :param target_weekday: Monday=0, Sunday=6 (与 datetime.weekday() 一致) - :return: 对应的 datetime(时间部分设为 00:00:00) - """ - # 本周一的日期(假设周一为每周开始) - today = self.now.date() - days_since_monday = today.weekday() # Monday is 0 - this_monday = today - datetime.timedelta(days=days_since_monday) - - # 目标周一 - target_monday = this_monday + datetime.timedelta(weeks=week_offset) - - # 目标星期几 - target_date = target_monday + datetime.timedelta(days=target_weekday) - - # 返回 datetime,时间归零(与“明天”行为一致) - return datetime.datetime.combine(target_date, datetime.time.min) - - def digest_weekday_relative(self, content: str) -> str: - """ - 支持:本周五、下周三、上周一、这周五、下周一 等 - 返回剩余字符串。 - """ - # 星期映射(支持:星期一、周1、周五 等) - weekday_map = { - "一": 0, "1": 0, - "二": 1, "2": 1, - "三": 2, "3": 2, - "四": 3, "4": 3, - "五": 4, "5": 4, - "六": 5, "6": 5, - "日": 6, "天": 6, "7": 6, - } - - # 周偏移映射 - week_offset_map = { - "本周": 0, - "这周": 0, - "下周": 1, - "下下周": 2, # 可选扩展 - "上周": -1, - "上上周": -2, # 可选扩展 - } - - # 尝试匹配 [周标识][星期限定] - for week_key, week_offset in week_offset_map.items(): - if content.startswith(week_key): - rest = content[len(week_key):] - - if rest.startswith("星期"): - rest = rest[2:] - elif rest.startswith("周"): - rest = rest[1:] - - if rest and (c := rest[0]) in weekday_map: - target_wd = weekday_map[c] - rest = rest[1:] - else: - continue - - if self.time_delta_triggered or self.time_spec_day_delta != 0: - raise MultipleSpecificationException() - - target_dt = self._find_weekday(week_offset, target_wd) - - # 设置 day delta 相对于 now 的 00:00 - base_date = self.now.replace(hour=0, minute=0, second=0, microsecond=0).date() - delta_days = (target_dt.date() - base_date).days - self.time_spec_day_delta = delta_days - - return rest - - return content - - def build(self) -> datetime.datetime: - t = self.now - if not self.time_delta_triggered: - t = t.replace(hour=0, minute=0, second=0, microsecond=0) - if self.hour_specification is not None: - hour = self.hour_specification - if self.ampm_specification == "AM": - if hour == 12: - hour = 0 - elif self.ampm_specification == "PM": - if hour != 12: - hour += 12 - elif self.ampm_ismid: - hour = 12 - else: - hour = 0 - self.time_spec_day_delta += 1 - elif self.ampm_specification is None: - if hour < self.now.hour and hour < 13: - hour += 12 - t = t.replace(hour=hour) - if self.minute_specification is not None: - if self.hour_specification is None: - # 只有分钟指定:保持当前小时,只改分钟 - t = t.replace(minute=self.minute_specification, second=0, microsecond=0) - else: - # 有小时指定:使用解析出的小时和分钟 - t = t.replace(minute=self.minute_specification) - - t += datetime.timedelta(days=self.time_spec_day_delta) - t += self.timedelta - return t - - -def parse(content: str) -> datetime.datetime: - return Parser().parse(content) +def parse(text: str) -> datetime.datetime: + return Parser().parse(text) diff --git a/pyproject.toml b/pyproject.toml index e95948e..1904275 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,15 @@ [project] name = "ptimeparse" -version = "0.1.2" +version = "0.2.0" description = "一个用于解析中文的时间表达的库" -authors = [ - {name = "passthem", email = "Passthem183@gmail.com"} -] +authors = [{ name = "passthem", email = "Passthem183@gmail.com" }] readme = "README.md" requires-python = ">=3.9" -dependencies = [ -] +dependencies = [] license = "MIT" [tool.poetry] -packages = [{include = "ptimeparse" }] +packages = [{ include = "ptimeparse" }] [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] @@ -24,6 +21,4 @@ url = "https://gitea.service.jazzwhom.top/api/packages/Passthem/pypi" priority = "supplemental" [dependency-groups] -dev = [ - "pytest (>=8.4.2,<9.0.0)" -] +dev = ["pytest (>=8.4.2,<9.0.0)"] diff --git a/tests/__init__.py b/tests/__init__.py index bf2341a..ba764b7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -import datetime +from datetime import datetime as dt import pytest @@ -6,192 +6,351 @@ from ptimeparse import Parser from ptimeparse.err import MultipleSpecificationException, TokenUnhandledException -def test_chinese_number(): +# --- 测试中文数字解析 --- +@pytest.mark.parametrize( + "input_str, expected_rest, expected_num", + [ + ("零", "", 0), + ("零喵", "喵", 0), + ("一喵", "喵", 1), + ("十喵", "喵", 10), + ("三千万喵", "喵", 3000_0000), + ("三千三百万喵", "喵", 3300_0000), + ("三千零三万喵", "喵", 3003_0000), + ("三千零三十万喵", "喵", 3030_0000), + ("五千四百零三万喵", "喵", 5403_0000), + ("五百万喵", "喵", 500_0000), + ("五万五千喵", "喵", 5_5000), + ("五万零五百喵", "喵", 5_0500), + ("五亿喵", "喵", 5_0000_0000), + ("五百亿喵", "喵", 500_0000_0000), + ("五百亿零五十喵", "喵", 500_0000_0050), + ("五百亿五十万喵", "喵", 500_0050_0000), + ], +) +def test_digest_chinese_number(input_str, expected_rest, expected_num): parser = Parser() - - assert parser.digest_chinese_number("零") == ('', 0) - assert parser.digest_chinese_number("零喵") == ('喵', 0) - assert parser.digest_chinese_number("一喵") == ('喵', 1) - assert parser.digest_chinese_number("十喵") == ('喵', 10) - assert parser.digest_chinese_number("三千万喵") == ('喵', 3000_0000) - assert parser.digest_chinese_number("三千三百万喵") == ('喵', 3300_0000) - assert parser.digest_chinese_number("三千零三万喵") == ('喵', 3003_0000) - assert parser.digest_chinese_number("三千零三十万喵") == ('喵', 3030_0000) - assert parser.digest_chinese_number("五千四百零三万喵") == ('喵', 5403_0000) - assert parser.digest_chinese_number("五百万喵") == ('喵', 500_0000) - assert parser.digest_chinese_number("五万五千喵") == ('喵', 5_5000) - assert parser.digest_chinese_number("五万零五百喵") == ('喵', 5_0500) - assert parser.digest_chinese_number("五亿喵") == ('喵', 5_0000_0000) - assert parser.digest_chinese_number("五百亿喵") == ('喵', 500_0000_0000) - assert parser.digest_chinese_number("五百亿零五十喵") == ('喵', 500_0000_0050) - assert parser.digest_chinese_number("五百亿五十万喵") == ('喵', 500_0050_0000) + rest, num = parser.digest_chinese_number(input_str) + # 使用 f-string 包含上下文信息 + assert rest == expected_rest, f"Input: {input_str}, Expected Rest: {expected_rest}, Actual Rest: {rest}" + assert num == expected_num, f"Input: {input_str}, Expected Num: {expected_num}, Actual Num: {num}" -def test_hour_specification_pm(): - parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) +# --- 测试时间解析(PM 上下文)--- +@pytest.mark.parametrize( + "text, expected", + [ + # 基础点表达(自动转为 PM if 1-12 且上下文为下午) + ("五点", dt(2025, 10, 9, 17, 0)), + ("5点", dt(2025, 10, 9, 17, 0)), + ("5 点", dt(2025, 10, 9, 17, 0)), + ("六点", dt(2025, 10, 9, 18, 0)), + ("六点整", dt(2025, 10, 9, 18, 0)), + ("六点钟", dt(2025, 10, 9, 18, 0)), + ("四点", dt(2025, 10, 9, 16, 0)), - assert parser.parse("五点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) - assert parser.parse("5点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) - assert parser.parse("5 点") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) + # 显式 "时" 表示 24 小时制 + ("10 时", dt(2025, 10, 9, 10, 0)), + ("10 时整", dt(2025, 10, 9, 10, 0)), + ("13点", dt(2025, 10, 9, 13, 0)), + ("15点", dt(2025, 10, 9, 15, 0)), + ("13 时", dt(2025, 10, 9, 13, 0)), + ("15 时", dt(2025, 10, 9, 15, 0)), - assert parser.parse("六点") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) - assert parser.parse("六点整") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) - assert parser.parse("六点钟") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) + # 显式上午/下午 + ("上午十点", dt(2025, 10, 9, 10, 0)), + ("早晨十点", dt(2025, 10, 9, 10, 0)), + ("早上十点", dt(2025, 10, 9, 10, 0)), + ("早十", dt(2025, 10, 9, 10, 0)), + ("早八", dt(2025, 10, 9, 8, 0)), + ("晚六", dt(2025, 10, 9, 18, 0)), + ("下午三点", dt(2025, 10, 9, 15, 0)), + ("晚上八点", dt(2025, 10, 9, 20, 0)), + ("中午十二点", dt(2025, 10, 9, 12, 0)), + ("凌晨零点", dt(2025, 10, 9, 0, 0)), - assert parser.parse("10 时") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("10 时整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("10点") == datetime.datetime(2025, 10, 9, 22, 0, 0, 0) - assert parser.parse("10点整") == datetime.datetime(2025, 10, 9, 22, 0, 0, 0) + # 特殊:晚上十二点 → 次日 00:00 + ("晚上十二点", dt(2025, 10, 10, 0, 0)), - assert parser.parse("13点") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) - assert parser.parse("15点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) - assert parser.parse("13 时") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) - assert parser.parse("15 时") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) - - assert parser.parse("四点") == datetime.datetime(2025, 10, 9, 16, 0, 0, 0) - - assert parser.parse("上午十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("早晨十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("早上十点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("早十") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("早八") == datetime.datetime(2025, 10, 9, 8, 0, 0, 0) - assert parser.parse("晚六") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) - assert parser.parse("下午三点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) - - assert parser.parse("晚上十二点") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) - assert parser.parse("晚上八点") == datetime.datetime(2025, 10, 9, 20, 0, 0, 0) - assert parser.parse("凌晨零点") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - assert parser.parse("中午十二点") == datetime.datetime(2025, 10, 9, 12, 0, 0, 0) - -def test_hour_specification_am(): - parser = Parser(now=datetime.datetime(2025, 10, 9, 2, 34, 1, 114)) - - assert parser.parse("五点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) - assert parser.parse("5点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) - assert parser.parse("5 点") == datetime.datetime(2025, 10, 9, 5, 0, 0, 0) - - assert parser.parse("六点") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) - assert parser.parse("六点整") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) - assert parser.parse("六点钟") == datetime.datetime(2025, 10, 9, 6, 0, 0, 0) - - assert parser.parse("10 时") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("10 时整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("10点") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - assert parser.parse("10点整") == datetime.datetime(2025, 10, 9, 10, 0, 0, 0) - - assert parser.parse("四点") == datetime.datetime(2025, 10, 9, 4, 0, 0, 0) - assert parser.parse("一点钟") == datetime.datetime(2025, 10, 9, 13, 0, 0, 0) + # 注意:10点(无修饰)在 PM 上下文中 → 22:00 + ("10点", dt(2025, 10, 9, 22, 0)), + ("10点整", dt(2025, 10, 9, 22, 0)), + ], +) +def test_parse_hour_pm_context(text, expected): + """在下午(16:34)上下文中解析时间""" + NOW = dt(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=NOW) + actual = parser.parse(text) + # 移除微秒,以便比较 + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + # 使用 f-string 包含上下文信息:输入文本、now 上下文、计算值、期望值 + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {NOW}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) -def test_hour_with_minute(): - parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) - - assert parser.parse("六点半") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) - assert parser.parse("六点半钟") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) - assert parser.parse("六点一刻") == datetime.datetime(2025, 10, 9, 18, 15, 0, 0) - assert parser.parse("六点过一刻") == datetime.datetime(2025, 10, 9, 18, 15, 0, 0) +# --- 测试时间解析(AM 上下文)--- +@pytest.mark.parametrize( + "text, expected", + [ + ("五点", dt(2025, 10, 9, 5, 0)), + ("5点", dt(2025, 10, 9, 5, 0)), + ("5 点", dt(2025, 10, 9, 5, 0)), + ("六点", dt(2025, 10, 9, 6, 0)), + ("六点整", dt(2025, 10, 9, 6, 0)), + ("六点钟", dt(2025, 10, 9, 6, 0)), + ("10 时", dt(2025, 10, 9, 10, 0)), + ("10 时整", dt(2025, 10, 9, 10, 0)), + ("10点", dt(2025, 10, 9, 10, 0)), # AM 上下文中 10点 → 10:00 + ("10点整", dt(2025, 10, 9, 10, 0)), + ("四点", dt(2025, 10, 9, 4, 0)), + # 一点钟在 AM 上下文?但 13点是合理的(可能表示下午1点) + ("一点钟", dt(2025, 10, 9, 13, 0)), + ("晚上十二点", dt(2025, 10, 10, 0, 0)), + ], +) +def test_parse_hour_am_context(text, expected): + """在凌晨(02:34)上下文中解析时间""" + NOW = dt(2025, 10, 9, 2, 34, 1, 114) + parser = Parser(now=NOW) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {NOW}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) -def test_error(): - parser = Parser(now=datetime.datetime(2025, 10, 9, 16, 34, 1, 114)) - - with pytest.raises(TokenUnhandledException): - parser.parse("六点半整") - - with pytest.raises(MultipleSpecificationException): - parser.parse("六点半一刻") +# --- 测试带分钟的时间 --- +@pytest.mark.parametrize( + "text, expected", + [ + ("六点半", dt(2025, 10, 9, 18, 30)), + ("六点半钟", dt(2025, 10, 9, 18, 30)), + ("六点一刻", dt(2025, 10, 9, 18, 15)), + ("六点过一刻", dt(2025, 10, 9, 18, 15)), + ], +) +def test_parse_hour_with_minute(text, expected): + NOW = dt(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=NOW) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {NOW}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) -def test_absolute_date(): - now = datetime.datetime(2025, 10, 9, 16, 34, 1, 114) - parser = Parser(now=now) - # 完整年月日 - assert parser.parse("2025年10月9日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - assert parser.parse("2025-10-09") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - assert parser.parse("2025/10/09") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - # 仅月日(默认今年) - assert parser.parse("10月9日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - assert parser.parse("十月九日") == datetime.datetime(2025, 10, 9, 0, 0, 0, 0) - # 年月日 + 时间 - assert parser.parse("2025年10月9日 15点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) - assert parser.parse("10月9日 下午3点") == datetime.datetime(2025, 10, 9, 15, 0, 0, 0) - assert parser.parse("十月九日 晚上八点") == datetime.datetime(2025, 10, 9, 20, 0, 0, 0) - # ISO 格式(如果支持) - assert parser.parse("2025-10-09T15:30") == datetime.datetime(2025, 10, 9, 15, 30, 0, 0) +# --- 错误处理测试 --- +def test_parse_errors(): + NOW = dt(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=NOW) - -def test_absolute_time(): - now = datetime.datetime(2025, 10, 9, 16, 34, 1, 114) - parser = Parser(now=now) - assert parser.parse("5:30") == datetime.datetime(2025, 10, 9, 17, 30, 0, 0) - assert parser.parse("5:11") == datetime.datetime(2025, 10, 9, 17, 11, 0, 0) - assert parser.parse("5点30分") == datetime.datetime(2025, 10, 9, 17, 30, 0, 0) - assert parser.parse("17:20") == datetime.datetime(2025, 10, 9, 17, 20, 0, 0) - - -def test_relative_date(): - now = datetime.datetime(2025, 10, 9, 10, 0, 0) - parser = Parser(now=now) - assert parser.parse("明天") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) - assert parser.parse("后天") == datetime.datetime(2025, 10, 11, 0, 0, 0, 0) - assert parser.parse("昨天") == datetime.datetime(2025, 10, 8, 0, 0, 0, 0) - assert parser.parse("大前天") == datetime.datetime(2025, 10, 6, 0, 0, 0, 0) - assert parser.parse("大后天") == datetime.datetime(2025, 10, 12, 0, 0, 0, 0) - - assert parser.parse("三天后") == datetime.datetime(2025, 10, 12, 0, 0, 0, 0) - assert parser.parse("五天前") == datetime.datetime(2025, 10, 4, 0, 0, 0, 0) - - assert parser.parse("下周一") == datetime.datetime(2025, 10, 13, 0, 0, 0, 0) - assert parser.parse("上周五") == datetime.datetime(2025, 10, 3, 0, 0, 0, 0) - assert parser.parse("本周五") == datetime.datetime(2025, 10, 10, 0, 0, 0, 0) - - end_of_month = datetime.datetime(2025, 10, 31, 10, 0, 0) - parser2 = Parser(now=end_of_month) - assert parser2.parse("两天后") == datetime.datetime(2025, 11, 2, 0, 0, 0, 0) - - -def test_relative_time(): - now = datetime.datetime(2025, 10, 9, 16, 30, 0, 0) - parser = Parser(now=now) - - assert parser.parse("五分钟后") == datetime.datetime(2025, 10, 9, 16, 35, 0, 0) - assert parser.parse("十分钟前") == datetime.datetime(2025, 10, 9, 16, 20, 0, 0) - assert parser.parse("半小时后") == datetime.datetime(2025, 10, 9, 17, 0, 0, 0) - assert parser.parse("一个半小时后") == datetime.datetime(2025, 10, 9, 18, 0, 0, 0) - - assert parser.parse("两小时后") == datetime.datetime(2025, 10, 9, 18, 30, 0, 0) - assert parser.parse("一小时前") == datetime.datetime(2025, 10, 9, 15, 30, 0, 0) - - late_night = datetime.datetime(2025, 10, 9, 23, 50, 0, 0) - parser3 = Parser(now=late_night) - assert parser3.parse("二十分钟后") == datetime.datetime(2025, 10, 10, 0, 10, 0, 0) - - assert parser.parse("5分钟后") == datetime.datetime(2025, 10, 9, 16, 35, 0, 0) - assert parser.parse("三十分钟前") == datetime.datetime(2025, 10, 9, 16, 0, 0, 0) - - - -def test_robustness_edge_cases(): - parser = Parser(now=datetime.datetime(2025, 2, 28, 10, 0, 0)) - - assert parser.parse("明天") == datetime.datetime(2025, 3, 1, 0, 0, 0, 0) - - parser_leap = Parser(now=datetime.datetime(2024, 2, 28, 10, 0, 0)) - assert parser_leap.parse("两天后") == datetime.datetime(2024, 3, 1, 0, 0, 0, 0) - - with pytest.raises(TokenUnhandledException): + # 完全无效输入 + with pytest.raises(TokenUnhandledException, match="随便乱写"): parser.parse("随便乱写") - parser.parse(" 明天 ") -def test_mixed_expressions(): - now = datetime.datetime(2025, 10, 9, 14, 0, 0) + # 但允许前后空格 + result = parser.parse(" 明天 ") + # 使用 f-string 包含上下文信息 + expected_date = dt(2025, 10, 10).date() + assert result.date() == expected_date, ( + f"Failed on Text: ' 明天 '. " + f"Context (now): {NOW}. " + f"Expected Date: {expected_date}. " + f"Actual Date: {result.date()}." + ) + + +# --- 绝对日期测试 --- +@pytest.mark.parametrize( + "text, expected", + [ + ("2025年10月9日", dt(2025, 10, 9, 0, 0)), + ("2025-10-09", dt(2025, 10, 9, 0, 0)), + ("2025/10/09", dt(2025, 10, 9, 0, 0)), + ("10月9日", dt(2025, 10, 9, 0, 0)), + ("十月九日", dt(2025, 10, 9, 0, 0)), + ("2025年10月9日 15点", dt(2025, 10, 9, 15, 0)), + ("10月9日 下午3点", dt(2025, 10, 9, 15, 0)), + ("十月九日 晚上八点", dt(2025, 10, 9, 20, 0)), + ("2025-10-09T15:30", dt(2025, 10, 9, 15, 30)), + ], +) +def test_parse_absolute_date(text, expected): + NOW = dt(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=NOW) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {NOW}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) + + +# --- 绝对时间(无日期)测试 --- +@pytest.mark.parametrize( + "text, expected", + [ + ("5:30", dt(2025, 10, 9, 17, 30)), + ("5:11", dt(2025, 10, 9, 17, 11)), + ("5点30分", dt(2025, 10, 9, 17, 30)), + ("17:20", dt(2025, 10, 9, 17, 20)), + ("六点零五", dt(2025, 10, 9, 18, 5, 0, 0)), + ], +) +def test_parse_absolute_time(text, expected): + NOW = dt(2025, 10, 9, 16, 34, 1, 114) + parser = Parser(now=NOW) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {NOW}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) + + +# --- 相对日期测试 --- +@pytest.mark.parametrize( + "now, text, expected", + [ + (dt(2025, 10, 9, 10, 0), "明天", dt(2025, 10, 10)), + (dt(2025, 10, 9, 10, 0), "后天", dt(2025, 10, 11)), + (dt(2025, 10, 9, 10, 0), "昨天", dt(2025, 10, 8)), + (dt(2025, 10, 9, 10, 0), "大前天", dt(2025, 10, 6)), + (dt(2025, 10, 9, 10, 0), "大后天", dt(2025, 10, 12)), + (dt(2025, 10, 9, 10, 0), "三天后", dt(2025, 10, 12)), + (dt(2025, 10, 9, 10, 0), "五天前", dt(2025, 10, 4)), + (dt(2025, 10, 9, 10, 0), "下周一", dt(2025, 10, 13)), # 10-9 是周四 + (dt(2025, 10, 9, 10, 0), "上周五", dt(2025, 10, 3)), + (dt(2025, 10, 9, 10, 0), "本周五", dt(2025, 10, 10)), + (dt(2025, 10, 31, 10, 0), "两天后", dt(2025, 11, 2)), + ], +) +def test_parse_relative_date(now, text, expected): parser = Parser(now=now) - # 如“明天下午三点” - assert parser.parse("明天下午三点") == datetime.datetime(2025, 10, 10, 15, 0, 0, 0) - assert parser.parse("后天早上八点") == datetime.datetime(2025, 10, 11, 8, 0, 0, 0) - assert parser.parse("大后天晚上十点") == datetime.datetime(2025, 10, 12, 22, 0, 0, 0) - # “下周三上午” - # 2025-10-09 是周四,下周三是 2025-10-15 - assert parser.parse("下周三") == datetime.datetime(2025, 10, 15, 0, 0, 0, 0) - assert parser.parse("下周三早八") == datetime.datetime(2025, 10, 15, 8, 0, 0, 0) # 默认0点?或上午9点?需根据实现 - # 若实现中“上午”不指定小时则设为9点,可调整;这里假设设为0点以简化 + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {now}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) + + +# --- 相对时间(分钟/小时)测试 --- +@pytest.mark.parametrize( + "now, text, expected", + [ + (dt(2025, 10, 9, 16, 30), "五分钟后", dt(2025, 10, 9, 16, 35)), + (dt(2025, 10, 9, 16, 30), "十分钟前", dt(2025, 10, 9, 16, 20)), + (dt(2025, 10, 9, 16, 30), "半小时后", dt(2025, 10, 9, 17, 0)), + (dt(2025, 10, 9, 16, 30), "一个半小时后", dt(2025, 10, 9, 18, 0)), + (dt(2025, 10, 9, 16, 30), "两个半小时后", dt(2025, 10, 9, 19, 0)), + (dt(2025, 10, 9, 16, 30), "两小时后", dt(2025, 10, 9, 18, 30)), + (dt(2025, 10, 9, 16, 30), "一小时前", dt(2025, 10, 9, 15, 30)), + (dt(2025, 10, 9, 23, 50), "二十分钟后", dt(2025, 10, 10, 0, 10)), + (dt(2025, 10, 9, 16, 30), "5分钟后", dt(2025, 10, 9, 16, 35)), + (dt(2025, 10, 9, 16, 30), "三十分钟前", dt(2025, 10, 9, 16, 0)), + (dt(2025, 10, 9, 16, 30, 0, 0), "两秒后", dt(2025, 10, 9, 16, 30, 2, 0)), + # 同义词支持 + (dt(2025, 10, 19, 20, 16), "一小时后", dt(2025, 10, 19, 21, 16)), + (dt(2025, 10, 19, 20, 16), "一小时以后", dt(2025, 10, 19, 21, 16)), + (dt(2025, 10, 19, 20, 16), "一小时之后", dt(2025, 10, 19, 21, 16)), + (dt(2025, 10, 19, 20, 16), "一个小时以后", dt(2025, 10, 19, 21, 16)), + ], +) +def test_parse_relative_time(now, text, expected): + parser = Parser(now=now) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {now}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) + + +# --- 混合表达式(日期 + 时间)--- +@pytest.mark.parametrize( + "now, text, expected", + [ + (dt(2025, 10, 9, 14, 0), "明天下午三点", dt(2025, 10, 10, 15, 0)), + (dt(2025, 10, 9, 14, 0), "后天早上八点", dt(2025, 10, 11, 8, 0)), + (dt(2025, 10, 9, 14, 0), "大后天晚上十点", dt(2025, 10, 12, 22, 0)), + (dt(2025, 10, 9, 14, 0), "下周三", dt(2025, 10, 15, 0, 0)), # 10-9 周四 → 下周三 10-15 + (dt(2025, 10, 9, 14, 0), "下周三早八", dt(2025, 10, 15, 8, 0)), + (dt(2025, 10, 19, 20, 16), "八点二十", dt(2025, 10, 19, 20, 20)), + (dt(2025, 10, 19, 20, 16), "明天八点二十", dt(2025, 10, 20, 8, 20)), + (dt(2025, 10, 19, 10, 10), "今晚八点", dt(2025, 10, 19, 20, 0)), + (dt(2025, 10, 19, 10, 10), "今天早上六点", dt(2025, 10, 19, 6, 0)), + (dt(2025, 10, 19, 10, 10), "今早七点五十分", dt(2025, 10, 19, 7, 50)), + ], +) +def test_parse_mixed_expressions(now, text, expected): + parser = Parser(now=now) + actual = parser.parse(text) + expected_clean = expected.replace(microsecond=0) + actual_clean = actual.replace(microsecond=0) + assert actual_clean == expected_clean, ( + f"Failed on Text: '{text}'. " + f"Context (now): {now}. " + f"Expected: {expected_clean}. " + f"Actual: {actual_clean}." + ) + + +# --- 边界情况与鲁棒性 --- +def test_robustness_edge_cases(): + # 闰年 & 月末 + parser_feb = Parser(now=dt(2025, 2, 28, 10, 0)) + expected_march = dt(2025, 3, 1, 0, 0) + actual_march = parser_feb.parse("明天") + assert actual_march == expected_march, ( + f"Failed on '明天' (now=2025-02-28). " + f"Expected: {expected_march}. " + f"Actual: {actual_march}." + ) + + parser_leap = Parser(now=dt(2024, 2, 28, 10, 0)) + expected_leap = dt(2024, 3, 1, 0, 0) + actual_leap = parser_leap.parse("两天后") + assert actual_leap == expected_leap, ( + f"Failed on '两天后' (now=2024-02-28 - Leap Year). " + f"Expected: {expected_leap}. " + f"Actual: {actual_leap}." + ) + + # 空格容忍 + NOW = dt(2025, 10, 9, 10, 0) + parser = Parser(now=NOW) + result = parser.parse(" 明天 ") + expected_date = dt(2025, 10, 10).date() + assert result.date() == expected_date, ( + f"Failed on Text: ' 明天 ' (Whitespace). " + f"Context (now): {NOW}. " + f"Expected Date: {expected_date}. " + f"Actual Date: {result.date()}." + )