This commit is contained in:
2025-10-09 19:22:18 +08:00
parent c7c1c68ae5
commit d26fabe5d1
3 changed files with 83 additions and 3 deletions

View File

@ -61,7 +61,7 @@ class Parser:
self.time_spec_day_delta = 0
def clean(self, content: str) -> str:
return re.sub(r"[ \t的]", "", content)
return re.sub(r"([ \t的]|的时候|之时)", "", content)
def parse(self, content: str) -> datetime.datetime:
self.clear_state()
@ -73,9 +73,11 @@ class Parser:
content = self.digest_date(content)
content = self.digest_colon_time(content)
content = self.digest_early_late_hour(content)
content = self.digest_ampm_specific(content)
content = self.digest_time(content)
content = self.digest_minute_only(content)
content = self.digest_ke(content)
if len(content) != 0:
raise TokenUnhandledException(content)
@ -110,6 +112,35 @@ class Parser:
return content
def digest_colon_time(self, content: str) -> str:
"""处理 HH:MM 或 H:MM 格式的时间"""
m = re.match(r"^(\d{1,2}):(\d{1,2})(.*)$", content)
if m:
try:
hour = int(m.group(1))
minute = int(m.group(2))
remaining = m.group(3)
if not (0 <= hour <= 23) or not (0 <= minute <= 59):
return content
if self.time_delta_triggered:
raise MultipleSpecificationException()
self.hour_specification = hour
self.minute_specification = minute
# 对于12小时制时间hour <= 12需要智能判断
if hour <= 12 and self.time_spec_day_delta == 0:
if self.ampm_specification is None and hour < self.now.hour:
# 如果是今天且小时数小于当前小时推测为PM
self.ampm_specification = "PM"
return remaining
except ValueError:
pass
return content
def digest_timedelta(self, content: str) -> str:
"""
解析形如 "3天", "2小时", "1星期", "5个月" 等时间增量。
@ -484,8 +515,43 @@ class Parser:
raise MultipleSpecificationException()
self.minute_specification = 45
return content[len(pat):]
if content.startswith(""):
content = content[1:]
return content
def digest_minute_only(self, content: str) -> str:
"""
处理单独的分钟表达,如"40分""十五分"
这种情况下,使用当前小时,只替换分钟部分
"""
if self.time_delta_triggered:
# 如果已经有时间增量,不能同时指定具体分钟
return content
# 检查是否以分钟数字开头
c1, minute_num = self.digest_number(content)
if minute_num is None or not (0 <= minute_num <= 59):
return content
# 检查是否有"分"或"分钟"后缀
if c1.startswith("分钟"):
c1 = c1[2:]
elif c1.startswith(""):
c1 = c1[1:]
else:
# 没有分钟单位,可能不是分钟表达
return content
# 如果已经指定了小时,就不应该单独指定分钟(会让逻辑混乱)
# if self.hour_specification is not None:
# return content
# 单独的分钟表达:使用当前小时,只设置分钟
self.minute_specification = minute_num
# 不设置 hour_specification保持为 None
return c1
def digest_early_late_hour(self, content: str) -> str:
if not (content.startswith("") or content.startswith("")):
return content
@ -624,6 +690,11 @@ class Parser:
hour += 12
t = t.replace(hour=hour)
if self.minute_specification is not None:
if self.hour_specification is None:
# 只有分钟指定:保持当前小时,只改分钟
t = t.replace(minute=self.minute_specification, second=0, microsecond=0)
else:
# 有小时指定:使用解析出的小时和分钟
t = t.replace(minute=self.minute_specification)
t += datetime.timedelta(days=self.time_spec_day_delta)

View File

@ -1,6 +1,6 @@
[project]
name = "ptimeparse"
version = "0.1.1"
version = "0.1.2"
description = "一个用于解析中文的时间表达的库"
authors = [
{name = "passthem", email = "Passthem183@gmail.com"}

View File

@ -120,6 +120,15 @@ def test_absolute_date():
assert parser.parse("2025-10-09T15:30") == datetime.datetime(2025, 10, 9, 15, 30, 0, 0)
def test_absolute_time():
now = datetime.datetime(2025, 10, 9, 16, 34, 1, 114)
parser = Parser(now=now)
assert parser.parse("5:30") == datetime.datetime(2025, 10, 9, 17, 30, 0, 0)
assert parser.parse("5:11") == datetime.datetime(2025, 10, 9, 17, 11, 0, 0)
assert parser.parse("5点30分") == datetime.datetime(2025, 10, 9, 17, 30, 0, 0)
assert parser.parse("17:20") == datetime.datetime(2025, 10, 9, 17, 20, 0, 0)
def test_relative_date():
now = datetime.datetime(2025, 10, 9, 10, 0, 0)
parser = Parser(now=now)