Files
konabot/konabot/common/ptimeparse/parser.py
passthem 3e5c1941c8
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
重构 ptimeparse 模块
2025-11-21 06:03:28 +08:00

846 lines
39 KiB
Python

"""
Parser for time expressions that builds an Abstract Syntax Tree (AST).
"""
from typing import Iterator, Optional, List
import datetime
from .ptime_token import Token, TokenType
from .ptime_ast import (
ASTNode, NumberNode, DateNode, TimeNode,
RelativeDateNode, RelativeTimeNode, WeekdayNode, TimeExpressionNode
)
from .lexer import Lexer
class ParserError(Exception):
"""Exception raised for parser errors."""
pass
class Parser:
"""Parser for time expressions that builds an AST."""
def __init__(self, text: str, now: Optional[datetime.datetime] = None):
self.lexer = Lexer(text, now)
self.tokens: List[Token] = list(self.lexer.tokenize())
self.pos = 0
self.now = now or datetime.datetime.now()
@property
def current_token(self) -> Token:
"""Get the current token."""
if self.pos < len(self.tokens):
return self.tokens[self.pos]
return Token(TokenType.EOF, None, len(self.tokens))
def eat(self, token_type: TokenType) -> Token:
"""Consume a token of the expected type."""
if self.current_token.type == token_type:
token = self.current_token
self.pos += 1
return token
else:
raise ParserError(
f"Expected token {token_type}, got {self.current_token.type} "
f"at position {self.current_token.position}"
)
def peek(self, offset: int = 1) -> Token:
"""Look ahead at the next token without consuming it."""
next_pos = self.pos + offset
if next_pos < len(self.tokens):
return self.tokens[next_pos]
return Token(TokenType.EOF, None, len(self.tokens))
def parse_number(self) -> NumberNode:
"""Parse a number (integer or Chinese number)."""
token = self.current_token
if token.type == TokenType.INTEGER:
self.eat(TokenType.INTEGER)
return NumberNode(value=token.value)
elif token.type == TokenType.CHINESE_NUMBER:
self.eat(TokenType.CHINESE_NUMBER)
return NumberNode(value=token.value)
else:
raise ParserError(
f"Expected number, got {token.type} at position {token.position}"
)
def parse_date(self) -> DateNode:
"""Parse a date specification."""
year_node = None
month_node = None
day_node = None
# Try YYYY-MM-DD or YYYY/MM/DD format
if (self.current_token.type == TokenType.INTEGER and
self.peek().type == TokenType.DATE_SEPARATOR and
self.peek().value in ['-', '/'] and
self.peek(2).type == TokenType.INTEGER and
self.peek(3).type == TokenType.DATE_SEPARATOR and
self.peek(3).value in ['-', '/'] and
self.peek(4).type == TokenType.INTEGER):
year_token = self.current_token
self.eat(TokenType.INTEGER)
separator1 = self.eat(TokenType.DATE_SEPARATOR).value
month_token = self.current_token
self.eat(TokenType.INTEGER)
separator2 = self.eat(TokenType.DATE_SEPARATOR).value
day_token = self.current_token
self.eat(TokenType.INTEGER)
year_node = NumberNode(value=year_token.value)
month_node = NumberNode(value=month_token.value)
day_node = NumberNode(value=day_token.value)
return DateNode(year=year_node, month=month_node, day=day_node)
# Try YYYY年MM月DD[日号] format
if (self.current_token.type == TokenType.INTEGER and
self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
self.peek(2).type == TokenType.INTEGER and
self.peek(3).type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.peek(4).type == TokenType.INTEGER):
year_token = self.current_token
self.eat(TokenType.INTEGER)
self.eat(self.current_token.type) # 年 (could be DATE_SEPARATOR or YEAR)
month_token = self.current_token
self.eat(TokenType.INTEGER)
self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH)
day_token = self.current_token
self.eat(TokenType.INTEGER)
# Optional 日 or 号
if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]:
self.eat(self.current_token.type)
year_node = NumberNode(value=year_token.value)
month_node = NumberNode(value=month_token.value)
day_node = NumberNode(value=day_token.value)
return DateNode(year=year_node, month=month_node, day=day_node)
# Try MM月DD[日号] format (without year)
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.peek().value == '' and
self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
month_token = self.current_token
self.eat(month_token.type)
self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH)
day_token = self.current_token
self.eat(day_token.type)
# Optional 日 or 号
if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]:
self.eat(self.current_token.type)
month_node = NumberNode(value=month_token.value)
day_node = NumberNode(value=day_token.value)
return DateNode(year=None, month=month_node, day=day_node)
# Try Chinese MM月DD[日号] format
if (self.current_token.type == TokenType.CHINESE_NUMBER and
self.peek().type == TokenType.DATE_SEPARATOR and
self.peek().value == '' and
self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
month_token = self.current_token
self.eat(TokenType.CHINESE_NUMBER)
self.eat(TokenType.DATE_SEPARATOR) # 月
day_token = self.current_token
self.eat(day_token.type)
# Optional 日 or 号
if self.current_token.type == TokenType.DATE_SEPARATOR:
self.eat(TokenType.DATE_SEPARATOR)
month_node = NumberNode(value=month_token.value)
day_node = NumberNode(value=day_token.value)
return DateNode(year=None, month=month_node, day=day_node)
raise ParserError(
f"Unable to parse date at position {self.current_token.position}"
)
def parse_time(self) -> TimeNode:
"""Parse a time specification."""
hour_node = None
minute_node = None
second_node = None
is_24hour = False
period = None
# Try HH:MM format
if (self.current_token.type == TokenType.INTEGER and
self.peek().type == TokenType.TIME_SEPARATOR and
self.peek().value == ':'):
hour_token = self.current_token
self.eat(TokenType.INTEGER)
self.eat(TokenType.TIME_SEPARATOR) # :
minute_token = self.current_token
self.eat(TokenType.INTEGER)
hour_node = NumberNode(value=hour_token.value)
minute_node = NumberNode(value=minute_token.value)
is_24hour = True # HH:MM is always interpreted as 24-hour
# Optional :SS
if (self.current_token.type == TokenType.TIME_SEPARATOR and
self.peek().type == TokenType.INTEGER):
self.eat(TokenType.TIME_SEPARATOR) # :
second_token = self.current_token
self.eat(TokenType.INTEGER)
second_node = NumberNode(value=second_token.value)
return TimeNode(
hour=hour_node,
minute=minute_node,
second=second_node,
is_24hour=is_24hour,
period=period
)
# Try Chinese time format (X点X分)
# First check for period indicators
period = None
if self.current_token.type in [TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
if self.current_token.type == TokenType.PERIOD_AM:
period = "AM"
else:
period = "PM"
self.eat(self.current_token.type)
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.EARLY_MORNING, TokenType.LATE_NIGHT]:
if self.current_token.type == TokenType.EARLY_MORNING:
self.eat(TokenType.EARLY_MORNING)
is_24hour = True
period = "AM"
# Expect a number next
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
hour_token = self.current_token
self.eat(hour_token.type)
hour_node = NumberNode(value=hour_token.value)
# "早八" should be interpreted as 08:00
# If hour is greater than 12, treat as 24-hour
if hour_node.value > 12:
is_24hour = True
period = None
else:
raise ParserError(
f"Expected number after '', got {self.current_token.type} "
f"at position {self.current_token.position}"
)
elif self.current_token.type == TokenType.LATE_NIGHT:
self.eat(TokenType.LATE_NIGHT)
is_24hour = True
period = "PM"
# Expect a number next
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
hour_token = self.current_token
self.eat(hour_token.type)
hour_node = NumberNode(value=hour_token.value)
# "晚十" should be interpreted as 22:00
# Adjust hour to 24-hour format
if hour_node.value <= 12:
hour_node.value += 12
is_24hour = True
period = None
else:
raise ParserError(
f"Expected number after '', got {self.current_token.type} "
f"at position {self.current_token.position}"
)
else:
# Regular time parsing
hour_token = self.current_token
self.eat(hour_token.type)
# Check for 点 or 时
if self.current_token.type == TokenType.TIME_SEPARATOR:
separator = self.current_token.value
self.eat(TokenType.TIME_SEPARATOR)
if separator == '':
is_24hour = False
elif separator == '':
is_24hour = True
hour_node = NumberNode(value=hour_token.value)
# Optional minutes
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
minute_token = self.current_token
self.eat(minute_token.type)
# Optional 分
if self.current_token.type == TokenType.TIME_SEPARATOR and \
self.current_token.value == '':
self.eat(TokenType.TIME_SEPARATOR)
minute_node = NumberNode(value=minute_token.value)
# Handle special markers
if self.current_token.type == TokenType.HALF:
self.eat(TokenType.HALF)
minute_node = NumberNode(value=30)
elif self.current_token.type == TokenType.QUARTER:
self.eat(TokenType.QUARTER)
minute_node = NumberNode(value=15)
elif self.current_token.type == TokenType.ZHENG:
self.eat(TokenType.ZHENG)
if minute_node is None:
minute_node = NumberNode(value=0)
# Optional 钟
if self.current_token.type == TokenType.ZHONG:
self.eat(TokenType.ZHONG)
else:
# If no separator, treat as hour-only time (like "三点")
hour_node = NumberNode(value=hour_token.value)
is_24hour = False
return TimeNode(
hour=hour_node,
minute=minute_node,
second=second_node,
is_24hour=is_24hour,
period=period
)
raise ParserError(
f"Unable to parse time at position {self.current_token.position}"
)
def parse_relative_date(self) -> RelativeDateNode:
"""Parse a relative date specification."""
years = 0
months = 0
weeks = 0
days = 0
# Handle today variants
if self.current_token.type == TokenType.RELATIVE_TODAY:
self.eat(TokenType.RELATIVE_TODAY)
days = 0
elif self.current_token.type == TokenType.RELATIVE_TOMORROW:
self.eat(TokenType.RELATIVE_TOMORROW)
days = 1
elif self.current_token.type == TokenType.RELATIVE_DAY_AFTER_TOMORROW:
self.eat(TokenType.RELATIVE_DAY_AFTER_TOMORROW)
days = 2
elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW:
self.eat(TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW)
days = 3
elif self.current_token.type == TokenType.RELATIVE_YESTERDAY:
self.eat(TokenType.RELATIVE_YESTERDAY)
days = -1
elif self.current_token.type == TokenType.RELATIVE_DAY_BEFORE_YESTERDAY:
self.eat(TokenType.RELATIVE_DAY_BEFORE_YESTERDAY)
days = -2
elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY:
self.eat(TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY)
days = -3
else:
# Check if this looks like an absolute date pattern before processing
# Look ahead to see if this matches absolute date patterns
is_likely_absolute_date = False
# Check for MM月DD[日号] patterns (like "6月20日")
if (self.pos + 2 < len(self.tokens) and
self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.tokens[self.pos + 1].value == '' and
self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
is_likely_absolute_date = True
if is_likely_absolute_date:
# This looks like an absolute date, skip relative date parsing
raise ParserError("Looks like absolute date format")
# Try to parse extended relative time expressions
# Handle patterns like "明年", "去年", "下个月", "上个月", etc.
original_pos = self.pos
try:
# Check for "今年", "明年", "去年"
if self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.YEAR:
self.eat(TokenType.RELATIVE_THIS)
self.eat(TokenType.YEAR)
years = 0 # Current year
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.YEAR:
self.eat(TokenType.RELATIVE_NEXT)
self.eat(TokenType.YEAR)
years = 1 # Next year
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.YEAR:
self.eat(TokenType.RELATIVE_LAST)
self.eat(TokenType.YEAR)
years = -1 # Last year
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.current_token.value == "明年":
self.eat(TokenType.RELATIVE_NEXT)
years = 1 # Next year
# Check if there's a month after "明年"
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.peek().type == TokenType.MONTH):
# Parse the month
month_node = self.parse_number()
self.eat(TokenType.MONTH) # Eat the "月" token
# Store the month in the months field as a special marker
# We'll handle this in semantic analysis
months = month_node.value - 100 # Use negative offset to indicate absolute month
elif self.current_token.type == TokenType.RELATIVE_LAST and self.current_token.value == "去年":
self.eat(TokenType.RELATIVE_LAST)
years = -1 # Last year
elif self.current_token.type == TokenType.RELATIVE_THIS and self.current_token.value == "今年":
self.eat(TokenType.RELATIVE_THIS)
years = 0 # Current year
# Check for "这个月", "下个月", "上个月"
elif self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.MONTH:
self.eat(TokenType.RELATIVE_THIS)
self.eat(TokenType.MONTH)
months = 0 # Current month
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.MONTH:
self.eat(TokenType.RELATIVE_NEXT)
self.eat(TokenType.MONTH)
months = 1 # Next month
# Handle patterns like "下个月五号"
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.peek().type == TokenType.DAY):
# Parse the day
day_node = self.parse_number()
self.eat(TokenType.DAY) # Eat the "号" token
# Instead of adding days to the current date, we should set a specific day in the target month
# We'll handle this in semantic analysis by setting a flag or special value
days = 0 # Reset days - we'll handle the day differently
# Use a special marker to indicate we want a specific day in the target month
# For now, we'll just store the target day in the weeks field as a temporary solution
weeks = day_node.value # This is a hack - we'll fix this in semantic analysis
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.MONTH:
self.eat(TokenType.RELATIVE_LAST)
self.eat(TokenType.MONTH)
months = -1 # Last month
# Check for "下周", "上周"
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.WEEK:
self.eat(TokenType.RELATIVE_NEXT)
self.eat(TokenType.WEEK)
weeks = 1 # Next week
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.WEEK:
self.eat(TokenType.RELATIVE_LAST)
self.eat(TokenType.WEEK)
weeks = -1 # Last week
# Handle more complex patterns like "X年后", "X个月后", etc.
elif self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
# Check if this is likely an absolute date format (e.g., "2025年11月21日")
# If the next token after the number is a date separator or date unit,
# and the number looks like a year (4 digits) or the pattern continues,
# it might be an absolute date. In that case, skip relative date parsing.
# Look ahead to see if this matches absolute date patterns
lookahead_pos = self.pos
is_likely_absolute_date = False
# Check for YYYY-MM-DD or YYYY/MM/DD patterns
if (lookahead_pos + 4 < len(self.tokens) and
self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
self.tokens[lookahead_pos + 1].value in ['-', '/', ''] and
self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.tokens[lookahead_pos + 3].value in ['-', '/', '']):
is_likely_absolute_date = True
# Check for YYYY年MM月DD patterns
if (lookahead_pos + 4 < len(self.tokens) and
self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
self.tokens[lookahead_pos + 1].value == '' and
self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.tokens[lookahead_pos + 3].value == ''):
is_likely_absolute_date = True
# Check for MM月DD[日号] patterns (like "6月20日")
if (self.pos + 2 < len(self.tokens) and
self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
self.tokens[self.pos + 1].value == '' and
self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
is_likely_absolute_date = True
if is_likely_absolute_date:
# This looks like an absolute date, skip relative date parsing
raise ParserError("Looks like absolute date format")
print(f"DEBUG: Parsing complex relative date pattern")
# Parse the number
number_node = self.parse_number()
number_value = number_node.value
print(f"DEBUG: Parsed number: {number_value}")
# Check the unit
if self.current_token.type == TokenType.YEAR:
self.eat(TokenType.YEAR)
years = number_value
print(f"DEBUG: Set years to {years}")
elif self.current_token.type == TokenType.MONTH:
self.eat(TokenType.MONTH)
months = number_value
print(f"DEBUG: Set months to {months}")
elif self.current_token.type == TokenType.WEEK:
self.eat(TokenType.WEEK)
weeks = number_value
print(f"DEBUG: Set weeks to {weeks}")
elif self.current_token.type == TokenType.DAY:
self.eat(TokenType.DAY)
days = number_value
print(f"DEBUG: Set days to {days}")
else:
print(f"DEBUG: Unexpected token type: {self.current_token.type}")
raise ParserError(
f"Expected time unit, got {self.current_token.type} "
f"at position {self.current_token.position}"
)
# Check direction (前/后)
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
print(f"DEBUG: Forward direction, values are already positive")
# Values are already positive
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
print(f"DEBUG: Backward direction, negating values")
years = -years
months = -months
weeks = -weeks
days = -days
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
raise ParserError(
f"Expected relative date, got {self.current_token.type} "
f"at position {self.current_token.position}"
)
return RelativeDateNode(years=years, months=months, weeks=weeks, days=days)
def parse_weekday(self) -> WeekdayNode:
"""Parse a weekday specification."""
# Parse week scope (本, 上, 下)
scope = "current"
if self.current_token.type == TokenType.WEEK_SCOPE_CURRENT:
self.eat(TokenType.WEEK_SCOPE_CURRENT)
scope = "current"
elif self.current_token.type == TokenType.WEEK_SCOPE_LAST:
self.eat(TokenType.WEEK_SCOPE_LAST)
scope = "last"
elif self.current_token.type == TokenType.WEEK_SCOPE_NEXT:
self.eat(TokenType.WEEK_SCOPE_NEXT)
scope = "next"
# Parse weekday
weekday_map = {
TokenType.WEEKDAY_MONDAY: 0,
TokenType.WEEKDAY_TUESDAY: 1,
TokenType.WEEKDAY_WEDNESDAY: 2,
TokenType.WEEKDAY_THURSDAY: 3,
TokenType.WEEKDAY_FRIDAY: 4,
TokenType.WEEKDAY_SATURDAY: 5,
TokenType.WEEKDAY_SUNDAY: 6,
# Handle Chinese numbers (1=Monday, 2=Tuesday, etc.)
TokenType.CHINESE_NUMBER: lambda x: x - 1 if 1 <= x <= 7 else None,
}
if self.current_token.type in weekday_map:
if self.current_token.type == TokenType.CHINESE_NUMBER:
# Handle numeric weekday (1=Monday, 2=Tuesday, etc.)
weekday_num = self.current_token.value
if 1 <= weekday_num <= 7:
weekday = weekday_num - 1 # Convert to 0-based index
self.eat(TokenType.CHINESE_NUMBER)
return WeekdayNode(weekday=weekday, scope=scope)
else:
raise ParserError(
f"Invalid weekday number: {weekday_num} "
f"at position {self.current_token.position}"
)
else:
weekday = weekday_map[self.current_token.type]
self.eat(self.current_token.type)
return WeekdayNode(weekday=weekday, scope=scope)
raise ParserError(
f"Expected weekday, got {self.current_token.type} "
f"at position {self.current_token.position}"
)
def parse_relative_time(self) -> RelativeTimeNode:
"""Parse a relative time specification."""
hours = 0.0
minutes = 0.0
seconds = 0.0
def parse_relative_time(self) -> RelativeTimeNode:
"""Parse a relative time specification."""
hours = 0.0
minutes = 0.0
seconds = 0.0
# Parse sequences of relative time expressions
while self.current_token.type in [
TokenType.INTEGER, TokenType.CHINESE_NUMBER,
TokenType.HALF, TokenType.QUARTER
] or (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
# Handle 半小时
if (self.current_token.type == TokenType.HALF):
self.eat(TokenType.HALF)
# Optional 个
if (self.current_token.type == TokenType.INTEGER and
self.current_token.value == ""):
self.eat(TokenType.INTEGER)
# Optional 小时
if self.current_token.type == TokenType.HOUR:
self.eat(TokenType.HOUR)
hours += 0.5
# Check for direction
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
hours = -hours
continue
# Handle 一刻钟 (15 minutes)
if self.current_token.type == TokenType.QUARTER:
self.eat(TokenType.QUARTER)
# Optional 钟
if self.current_token.type == TokenType.ZHONG:
self.eat(TokenType.ZHONG)
minutes += 15
# Check for direction
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
minutes = -minutes
continue
# Parse number if we have one
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
number_node = self.parse_number()
number_value = number_node.value
# Determine unit and direction
unit = None
direction = 1 # Forward by default
# Check for unit
if self.current_token.type == TokenType.HOUR:
self.eat(TokenType.HOUR)
# Optional 个
if (self.current_token.type == TokenType.INTEGER and
self.current_token.value == ""):
self.eat(TokenType.INTEGER)
unit = "hour"
elif self.current_token.type == TokenType.MINUTE:
self.eat(TokenType.MINUTE)
unit = "minute"
elif self.current_token.type == TokenType.SECOND:
self.eat(TokenType.SECOND)
unit = "second"
elif self.current_token.type == TokenType.TIME_SEPARATOR:
# Handle "X点", "X分", "X秒" format
sep_value = self.current_token.value
self.eat(TokenType.TIME_SEPARATOR)
if sep_value == "":
unit = "hour"
# Optional 钟
if self.current_token.type == TokenType.ZHONG:
self.eat(TokenType.ZHONG)
# If we have "X点" without a direction, this is likely an absolute time
# Check if there's a direction after
if not (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
# This is probably an absolute time, not relative time
# Push back the number and break
break
elif sep_value == "":
unit = "minute"
# Optional 钟
if self.current_token.type == TokenType.ZHONG:
self.eat(TokenType.ZHONG)
elif sep_value == "":
unit = "second"
else:
# If no unit specified, but we have a number followed by a direction,
# assume it's hours
if (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
unit = "hour"
else:
# If no unit and no direction, this might not be a relative time expression
# Push the number back and break
# We can't easily push back, so let's break
break
# Check for direction (后/前)
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
direction = 1
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
direction = -1
# Apply the value based on unit
if unit == "hour":
hours += number_value * direction
elif unit == "minute":
minutes += number_value * direction
elif unit == "second":
seconds += number_value * direction
continue
# If we still haven't handled the current token, break
break
return RelativeTimeNode(hours=hours, minutes=minutes, seconds=seconds)
def parse_time_expression(self) -> TimeExpressionNode:
"""Parse a complete time expression."""
date_node = None
time_node = None
relative_date_node = None
relative_time_node = None
weekday_node = None
# Parse different parts of the expression
while self.current_token.type != TokenType.EOF:
# Try to parse date first (absolute dates should take precedence)
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
if date_node is None:
original_pos = self.pos
try:
date_node = self.parse_date()
continue
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# Try to parse relative date
if self.current_token.type in [
TokenType.RELATIVE_TODAY, TokenType.RELATIVE_TOMORROW,
TokenType.RELATIVE_DAY_AFTER_TOMORROW, TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW,
TokenType.RELATIVE_YESTERDAY, TokenType.RELATIVE_DAY_BEFORE_YESTERDAY,
TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY,
TokenType.INTEGER, TokenType.CHINESE_NUMBER, # For patterns like "X年后", "X个月后", etc.
TokenType.RELATIVE_NEXT, TokenType.RELATIVE_LAST, TokenType.RELATIVE_THIS
]:
if relative_date_node is None:
original_pos = self.pos
try:
relative_date_node = self.parse_relative_date()
continue
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# Try to parse relative time first (since it can have numbers)
if self.current_token.type in [
TokenType.INTEGER, TokenType.CHINESE_NUMBER,
TokenType.HALF, TokenType.QUARTER,
TokenType.RELATIVE_DIRECTION_FORWARD, TokenType.RELATIVE_DIRECTION_BACKWARD
]:
if relative_time_node is None:
original_pos = self.pos
try:
relative_time_node = self.parse_relative_time()
# Only continue if we actually parsed some relative time
if relative_time_node.hours != 0 or relative_time_node.minutes != 0 or relative_time_node.seconds != 0:
continue
else:
# If we didn't parse any relative time, reset position
self.pos = original_pos
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# Try to parse time
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
if time_node is None:
original_pos = self.pos
try:
time_node = self.parse_time()
continue
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# Try to parse time
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
if time_node is None:
original_pos = self.pos
try:
time_node = self.parse_time()
continue
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# Try to parse weekday
if self.current_token.type in [
TokenType.WEEK_SCOPE_CURRENT, TokenType.WEEK_SCOPE_LAST, TokenType.WEEK_SCOPE_NEXT,
TokenType.WEEKDAY_MONDAY, TokenType.WEEKDAY_TUESDAY, TokenType.WEEKDAY_WEDNESDAY,
TokenType.WEEKDAY_THURSDAY, TokenType.WEEKDAY_FRIDAY, TokenType.WEEKDAY_SATURDAY,
TokenType.WEEKDAY_SUNDAY
]:
if weekday_node is None:
original_pos = self.pos
try:
weekday_node = self.parse_weekday()
continue
except ParserError:
# Reset position if parsing failed
self.pos = original_pos
pass
# If we get here and couldn't parse anything, skip the token
self.pos += 1
return TimeExpressionNode(
date=date_node,
time=time_node,
relative_date=relative_date_node,
relative_time=relative_time_node,
weekday=weekday_node
)
def parse(self) -> TimeExpressionNode:
"""Parse the complete time expression and return the AST."""
return self.parse_time_expression()