846 lines
39 KiB
Python
846 lines
39 KiB
Python
"""
|
|
Parser for time expressions that builds an Abstract Syntax Tree (AST).
|
|
"""
|
|
|
|
from typing import Iterator, Optional, List
|
|
import datetime
|
|
|
|
from .ptime_token import Token, TokenType
|
|
from .ptime_ast import (
|
|
ASTNode, NumberNode, DateNode, TimeNode,
|
|
RelativeDateNode, RelativeTimeNode, WeekdayNode, TimeExpressionNode
|
|
)
|
|
from .lexer import Lexer
|
|
|
|
|
|
class ParserError(Exception):
|
|
"""Exception raised for parser errors."""
|
|
pass
|
|
|
|
|
|
class Parser:
|
|
"""Parser for time expressions that builds an AST."""
|
|
|
|
def __init__(self, text: str, now: Optional[datetime.datetime] = None):
|
|
self.lexer = Lexer(text, now)
|
|
self.tokens: List[Token] = list(self.lexer.tokenize())
|
|
self.pos = 0
|
|
self.now = now or datetime.datetime.now()
|
|
|
|
@property
|
|
def current_token(self) -> Token:
|
|
"""Get the current token."""
|
|
if self.pos < len(self.tokens):
|
|
return self.tokens[self.pos]
|
|
return Token(TokenType.EOF, None, len(self.tokens))
|
|
|
|
def eat(self, token_type: TokenType) -> Token:
|
|
"""Consume a token of the expected type."""
|
|
if self.current_token.type == token_type:
|
|
token = self.current_token
|
|
self.pos += 1
|
|
return token
|
|
else:
|
|
raise ParserError(
|
|
f"Expected token {token_type}, got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
|
|
def peek(self, offset: int = 1) -> Token:
|
|
"""Look ahead at the next token without consuming it."""
|
|
next_pos = self.pos + offset
|
|
if next_pos < len(self.tokens):
|
|
return self.tokens[next_pos]
|
|
return Token(TokenType.EOF, None, len(self.tokens))
|
|
|
|
def parse_number(self) -> NumberNode:
|
|
"""Parse a number (integer or Chinese number)."""
|
|
token = self.current_token
|
|
if token.type == TokenType.INTEGER:
|
|
self.eat(TokenType.INTEGER)
|
|
return NumberNode(value=token.value)
|
|
elif token.type == TokenType.CHINESE_NUMBER:
|
|
self.eat(TokenType.CHINESE_NUMBER)
|
|
return NumberNode(value=token.value)
|
|
else:
|
|
raise ParserError(
|
|
f"Expected number, got {token.type} at position {token.position}"
|
|
)
|
|
|
|
def parse_date(self) -> DateNode:
|
|
"""Parse a date specification."""
|
|
year_node = None
|
|
month_node = None
|
|
day_node = None
|
|
|
|
# Try YYYY-MM-DD or YYYY/MM/DD format
|
|
if (self.current_token.type == TokenType.INTEGER and
|
|
self.peek().type == TokenType.DATE_SEPARATOR and
|
|
self.peek().value in ['-', '/'] and
|
|
self.peek(2).type == TokenType.INTEGER and
|
|
self.peek(3).type == TokenType.DATE_SEPARATOR and
|
|
self.peek(3).value in ['-', '/'] and
|
|
self.peek(4).type == TokenType.INTEGER):
|
|
|
|
year_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
separator1 = self.eat(TokenType.DATE_SEPARATOR).value
|
|
|
|
month_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
|
|
separator2 = self.eat(TokenType.DATE_SEPARATOR).value
|
|
|
|
day_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
|
|
year_node = NumberNode(value=year_token.value)
|
|
month_node = NumberNode(value=month_token.value)
|
|
day_node = NumberNode(value=day_token.value)
|
|
|
|
return DateNode(year=year_node, month=month_node, day=day_node)
|
|
|
|
# Try YYYY年MM月DD[日号] format
|
|
if (self.current_token.type == TokenType.INTEGER and
|
|
self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
|
|
self.peek(2).type == TokenType.INTEGER and
|
|
self.peek(3).type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.peek(4).type == TokenType.INTEGER):
|
|
|
|
year_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
self.eat(self.current_token.type) # 年 (could be DATE_SEPARATOR or YEAR)
|
|
|
|
month_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH)
|
|
|
|
day_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
# Optional 日 or 号
|
|
if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]:
|
|
self.eat(self.current_token.type)
|
|
|
|
year_node = NumberNode(value=year_token.value)
|
|
month_node = NumberNode(value=month_token.value)
|
|
day_node = NumberNode(value=day_token.value)
|
|
|
|
return DateNode(year=year_node, month=month_node, day=day_node)
|
|
|
|
# Try MM月DD[日号] format (without year)
|
|
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.peek().value == '月' and
|
|
self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
|
|
|
|
month_token = self.current_token
|
|
self.eat(month_token.type)
|
|
self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH)
|
|
|
|
day_token = self.current_token
|
|
self.eat(day_token.type)
|
|
# Optional 日 or 号
|
|
if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]:
|
|
self.eat(self.current_token.type)
|
|
|
|
month_node = NumberNode(value=month_token.value)
|
|
day_node = NumberNode(value=day_token.value)
|
|
|
|
return DateNode(year=None, month=month_node, day=day_node)
|
|
|
|
# Try Chinese MM月DD[日号] format
|
|
if (self.current_token.type == TokenType.CHINESE_NUMBER and
|
|
self.peek().type == TokenType.DATE_SEPARATOR and
|
|
self.peek().value == '月' and
|
|
self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
|
|
|
|
month_token = self.current_token
|
|
self.eat(TokenType.CHINESE_NUMBER)
|
|
self.eat(TokenType.DATE_SEPARATOR) # 月
|
|
|
|
day_token = self.current_token
|
|
self.eat(day_token.type)
|
|
# Optional 日 or 号
|
|
if self.current_token.type == TokenType.DATE_SEPARATOR:
|
|
self.eat(TokenType.DATE_SEPARATOR)
|
|
|
|
month_node = NumberNode(value=month_token.value)
|
|
day_node = NumberNode(value=day_token.value)
|
|
|
|
return DateNode(year=None, month=month_node, day=day_node)
|
|
|
|
raise ParserError(
|
|
f"Unable to parse date at position {self.current_token.position}"
|
|
)
|
|
|
|
def parse_time(self) -> TimeNode:
|
|
"""Parse a time specification."""
|
|
hour_node = None
|
|
minute_node = None
|
|
second_node = None
|
|
is_24hour = False
|
|
period = None
|
|
|
|
# Try HH:MM format
|
|
if (self.current_token.type == TokenType.INTEGER and
|
|
self.peek().type == TokenType.TIME_SEPARATOR and
|
|
self.peek().value == ':'):
|
|
|
|
hour_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
self.eat(TokenType.TIME_SEPARATOR) # :
|
|
|
|
minute_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
|
|
hour_node = NumberNode(value=hour_token.value)
|
|
minute_node = NumberNode(value=minute_token.value)
|
|
is_24hour = True # HH:MM is always interpreted as 24-hour
|
|
|
|
# Optional :SS
|
|
if (self.current_token.type == TokenType.TIME_SEPARATOR and
|
|
self.peek().type == TokenType.INTEGER):
|
|
|
|
self.eat(TokenType.TIME_SEPARATOR) # :
|
|
second_token = self.current_token
|
|
self.eat(TokenType.INTEGER)
|
|
second_node = NumberNode(value=second_token.value)
|
|
|
|
return TimeNode(
|
|
hour=hour_node,
|
|
minute=minute_node,
|
|
second=second_node,
|
|
is_24hour=is_24hour,
|
|
period=period
|
|
)
|
|
|
|
# Try Chinese time format (X点X分)
|
|
# First check for period indicators
|
|
period = None
|
|
if self.current_token.type in [TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
|
|
if self.current_token.type == TokenType.PERIOD_AM:
|
|
period = "AM"
|
|
else:
|
|
period = "PM"
|
|
self.eat(self.current_token.type)
|
|
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.EARLY_MORNING, TokenType.LATE_NIGHT]:
|
|
if self.current_token.type == TokenType.EARLY_MORNING:
|
|
self.eat(TokenType.EARLY_MORNING)
|
|
is_24hour = True
|
|
period = "AM"
|
|
|
|
# Expect a number next
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
hour_token = self.current_token
|
|
self.eat(hour_token.type)
|
|
hour_node = NumberNode(value=hour_token.value)
|
|
|
|
# "早八" should be interpreted as 08:00
|
|
# If hour is greater than 12, treat as 24-hour
|
|
if hour_node.value > 12:
|
|
is_24hour = True
|
|
period = None
|
|
else:
|
|
raise ParserError(
|
|
f"Expected number after '早', got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
elif self.current_token.type == TokenType.LATE_NIGHT:
|
|
self.eat(TokenType.LATE_NIGHT)
|
|
is_24hour = True
|
|
period = "PM"
|
|
|
|
# Expect a number next
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
hour_token = self.current_token
|
|
self.eat(hour_token.type)
|
|
hour_node = NumberNode(value=hour_token.value)
|
|
|
|
# "晚十" should be interpreted as 22:00
|
|
# Adjust hour to 24-hour format
|
|
if hour_node.value <= 12:
|
|
hour_node.value += 12
|
|
is_24hour = True
|
|
period = None
|
|
else:
|
|
raise ParserError(
|
|
f"Expected number after '晚', got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
else:
|
|
# Regular time parsing
|
|
hour_token = self.current_token
|
|
self.eat(hour_token.type)
|
|
|
|
# Check for 点 or 时
|
|
if self.current_token.type == TokenType.TIME_SEPARATOR:
|
|
separator = self.current_token.value
|
|
self.eat(TokenType.TIME_SEPARATOR)
|
|
|
|
if separator == '点':
|
|
is_24hour = False
|
|
elif separator == '时':
|
|
is_24hour = True
|
|
|
|
hour_node = NumberNode(value=hour_token.value)
|
|
|
|
# Optional minutes
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
minute_token = self.current_token
|
|
self.eat(minute_token.type)
|
|
|
|
# Optional 分
|
|
if self.current_token.type == TokenType.TIME_SEPARATOR and \
|
|
self.current_token.value == '分':
|
|
self.eat(TokenType.TIME_SEPARATOR)
|
|
|
|
minute_node = NumberNode(value=minute_token.value)
|
|
|
|
# Handle special markers
|
|
if self.current_token.type == TokenType.HALF:
|
|
self.eat(TokenType.HALF)
|
|
minute_node = NumberNode(value=30)
|
|
elif self.current_token.type == TokenType.QUARTER:
|
|
self.eat(TokenType.QUARTER)
|
|
minute_node = NumberNode(value=15)
|
|
elif self.current_token.type == TokenType.ZHENG:
|
|
self.eat(TokenType.ZHENG)
|
|
if minute_node is None:
|
|
minute_node = NumberNode(value=0)
|
|
|
|
# Optional 钟
|
|
if self.current_token.type == TokenType.ZHONG:
|
|
self.eat(TokenType.ZHONG)
|
|
else:
|
|
# If no separator, treat as hour-only time (like "三点")
|
|
hour_node = NumberNode(value=hour_token.value)
|
|
is_24hour = False
|
|
|
|
return TimeNode(
|
|
hour=hour_node,
|
|
minute=minute_node,
|
|
second=second_node,
|
|
is_24hour=is_24hour,
|
|
period=period
|
|
)
|
|
|
|
raise ParserError(
|
|
f"Unable to parse time at position {self.current_token.position}"
|
|
)
|
|
|
|
def parse_relative_date(self) -> RelativeDateNode:
|
|
"""Parse a relative date specification."""
|
|
years = 0
|
|
months = 0
|
|
weeks = 0
|
|
days = 0
|
|
|
|
# Handle today variants
|
|
if self.current_token.type == TokenType.RELATIVE_TODAY:
|
|
self.eat(TokenType.RELATIVE_TODAY)
|
|
days = 0
|
|
elif self.current_token.type == TokenType.RELATIVE_TOMORROW:
|
|
self.eat(TokenType.RELATIVE_TOMORROW)
|
|
days = 1
|
|
elif self.current_token.type == TokenType.RELATIVE_DAY_AFTER_TOMORROW:
|
|
self.eat(TokenType.RELATIVE_DAY_AFTER_TOMORROW)
|
|
days = 2
|
|
elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW:
|
|
self.eat(TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW)
|
|
days = 3
|
|
elif self.current_token.type == TokenType.RELATIVE_YESTERDAY:
|
|
self.eat(TokenType.RELATIVE_YESTERDAY)
|
|
days = -1
|
|
elif self.current_token.type == TokenType.RELATIVE_DAY_BEFORE_YESTERDAY:
|
|
self.eat(TokenType.RELATIVE_DAY_BEFORE_YESTERDAY)
|
|
days = -2
|
|
elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY:
|
|
self.eat(TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY)
|
|
days = -3
|
|
else:
|
|
# Check if this looks like an absolute date pattern before processing
|
|
# Look ahead to see if this matches absolute date patterns
|
|
is_likely_absolute_date = False
|
|
|
|
# Check for MM月DD[日号] patterns (like "6月20日")
|
|
if (self.pos + 2 < len(self.tokens) and
|
|
self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.tokens[self.pos + 1].value == '月' and
|
|
self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
|
|
is_likely_absolute_date = True
|
|
|
|
if is_likely_absolute_date:
|
|
# This looks like an absolute date, skip relative date parsing
|
|
raise ParserError("Looks like absolute date format")
|
|
|
|
# Try to parse extended relative time expressions
|
|
# Handle patterns like "明年", "去年", "下个月", "上个月", etc.
|
|
original_pos = self.pos
|
|
try:
|
|
# Check for "今年", "明年", "去年"
|
|
if self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.YEAR:
|
|
self.eat(TokenType.RELATIVE_THIS)
|
|
self.eat(TokenType.YEAR)
|
|
years = 0 # Current year
|
|
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.YEAR:
|
|
self.eat(TokenType.RELATIVE_NEXT)
|
|
self.eat(TokenType.YEAR)
|
|
years = 1 # Next year
|
|
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.YEAR:
|
|
self.eat(TokenType.RELATIVE_LAST)
|
|
self.eat(TokenType.YEAR)
|
|
years = -1 # Last year
|
|
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.current_token.value == "明年":
|
|
self.eat(TokenType.RELATIVE_NEXT)
|
|
years = 1 # Next year
|
|
# Check if there's a month after "明年"
|
|
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.peek().type == TokenType.MONTH):
|
|
# Parse the month
|
|
month_node = self.parse_number()
|
|
self.eat(TokenType.MONTH) # Eat the "月" token
|
|
# Store the month in the months field as a special marker
|
|
# We'll handle this in semantic analysis
|
|
months = month_node.value - 100 # Use negative offset to indicate absolute month
|
|
elif self.current_token.type == TokenType.RELATIVE_LAST and self.current_token.value == "去年":
|
|
self.eat(TokenType.RELATIVE_LAST)
|
|
years = -1 # Last year
|
|
elif self.current_token.type == TokenType.RELATIVE_THIS and self.current_token.value == "今年":
|
|
self.eat(TokenType.RELATIVE_THIS)
|
|
years = 0 # Current year
|
|
|
|
# Check for "这个月", "下个月", "上个月"
|
|
elif self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.MONTH:
|
|
self.eat(TokenType.RELATIVE_THIS)
|
|
self.eat(TokenType.MONTH)
|
|
months = 0 # Current month
|
|
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.MONTH:
|
|
self.eat(TokenType.RELATIVE_NEXT)
|
|
self.eat(TokenType.MONTH)
|
|
months = 1 # Next month
|
|
|
|
# Handle patterns like "下个月五号"
|
|
if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.peek().type == TokenType.DAY):
|
|
# Parse the day
|
|
day_node = self.parse_number()
|
|
self.eat(TokenType.DAY) # Eat the "号" token
|
|
# Instead of adding days to the current date, we should set a specific day in the target month
|
|
# We'll handle this in semantic analysis by setting a flag or special value
|
|
days = 0 # Reset days - we'll handle the day differently
|
|
# Use a special marker to indicate we want a specific day in the target month
|
|
# For now, we'll just store the target day in the weeks field as a temporary solution
|
|
weeks = day_node.value # This is a hack - we'll fix this in semantic analysis
|
|
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.MONTH:
|
|
self.eat(TokenType.RELATIVE_LAST)
|
|
self.eat(TokenType.MONTH)
|
|
months = -1 # Last month
|
|
|
|
# Check for "下周", "上周"
|
|
elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.WEEK:
|
|
self.eat(TokenType.RELATIVE_NEXT)
|
|
self.eat(TokenType.WEEK)
|
|
weeks = 1 # Next week
|
|
elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.WEEK:
|
|
self.eat(TokenType.RELATIVE_LAST)
|
|
self.eat(TokenType.WEEK)
|
|
weeks = -1 # Last week
|
|
|
|
# Handle more complex patterns like "X年后", "X个月后", etc.
|
|
elif self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
# Check if this is likely an absolute date format (e.g., "2025年11月21日")
|
|
# If the next token after the number is a date separator or date unit,
|
|
# and the number looks like a year (4 digits) or the pattern continues,
|
|
# it might be an absolute date. In that case, skip relative date parsing.
|
|
|
|
# Look ahead to see if this matches absolute date patterns
|
|
lookahead_pos = self.pos
|
|
is_likely_absolute_date = False
|
|
|
|
# Check for YYYY-MM-DD or YYYY/MM/DD patterns
|
|
if (lookahead_pos + 4 < len(self.tokens) and
|
|
self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
|
|
self.tokens[lookahead_pos + 1].value in ['-', '/', '年'] and
|
|
self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.tokens[lookahead_pos + 3].value in ['-', '/', '月']):
|
|
is_likely_absolute_date = True
|
|
|
|
# Check for YYYY年MM月DD patterns
|
|
if (lookahead_pos + 4 < len(self.tokens) and
|
|
self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and
|
|
self.tokens[lookahead_pos + 1].value == '年' and
|
|
self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.tokens[lookahead_pos + 3].value == '月'):
|
|
is_likely_absolute_date = True
|
|
|
|
# Check for MM月DD[日号] patterns (like "6月20日")
|
|
if (self.pos + 2 < len(self.tokens) and
|
|
self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and
|
|
self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and
|
|
self.tokens[self.pos + 1].value == '月' and
|
|
self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]):
|
|
is_likely_absolute_date = True
|
|
|
|
if is_likely_absolute_date:
|
|
# This looks like an absolute date, skip relative date parsing
|
|
raise ParserError("Looks like absolute date format")
|
|
|
|
print(f"DEBUG: Parsing complex relative date pattern")
|
|
# Parse the number
|
|
number_node = self.parse_number()
|
|
number_value = number_node.value
|
|
print(f"DEBUG: Parsed number: {number_value}")
|
|
|
|
# Check the unit
|
|
if self.current_token.type == TokenType.YEAR:
|
|
self.eat(TokenType.YEAR)
|
|
years = number_value
|
|
print(f"DEBUG: Set years to {years}")
|
|
elif self.current_token.type == TokenType.MONTH:
|
|
self.eat(TokenType.MONTH)
|
|
months = number_value
|
|
print(f"DEBUG: Set months to {months}")
|
|
elif self.current_token.type == TokenType.WEEK:
|
|
self.eat(TokenType.WEEK)
|
|
weeks = number_value
|
|
print(f"DEBUG: Set weeks to {weeks}")
|
|
elif self.current_token.type == TokenType.DAY:
|
|
self.eat(TokenType.DAY)
|
|
days = number_value
|
|
print(f"DEBUG: Set days to {days}")
|
|
else:
|
|
print(f"DEBUG: Unexpected token type: {self.current_token.type}")
|
|
raise ParserError(
|
|
f"Expected time unit, got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
|
|
# Check direction (前/后)
|
|
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
|
|
print(f"DEBUG: Forward direction, values are already positive")
|
|
# Values are already positive
|
|
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
|
|
print(f"DEBUG: Backward direction, negating values")
|
|
years = -years
|
|
months = -months
|
|
weeks = -weeks
|
|
days = -days
|
|
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
raise ParserError(
|
|
f"Expected relative date, got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
|
|
return RelativeDateNode(years=years, months=months, weeks=weeks, days=days)
|
|
|
|
def parse_weekday(self) -> WeekdayNode:
|
|
"""Parse a weekday specification."""
|
|
# Parse week scope (本, 上, 下)
|
|
scope = "current"
|
|
if self.current_token.type == TokenType.WEEK_SCOPE_CURRENT:
|
|
self.eat(TokenType.WEEK_SCOPE_CURRENT)
|
|
scope = "current"
|
|
elif self.current_token.type == TokenType.WEEK_SCOPE_LAST:
|
|
self.eat(TokenType.WEEK_SCOPE_LAST)
|
|
scope = "last"
|
|
elif self.current_token.type == TokenType.WEEK_SCOPE_NEXT:
|
|
self.eat(TokenType.WEEK_SCOPE_NEXT)
|
|
scope = "next"
|
|
|
|
# Parse weekday
|
|
weekday_map = {
|
|
TokenType.WEEKDAY_MONDAY: 0,
|
|
TokenType.WEEKDAY_TUESDAY: 1,
|
|
TokenType.WEEKDAY_WEDNESDAY: 2,
|
|
TokenType.WEEKDAY_THURSDAY: 3,
|
|
TokenType.WEEKDAY_FRIDAY: 4,
|
|
TokenType.WEEKDAY_SATURDAY: 5,
|
|
TokenType.WEEKDAY_SUNDAY: 6,
|
|
# Handle Chinese numbers (1=Monday, 2=Tuesday, etc.)
|
|
TokenType.CHINESE_NUMBER: lambda x: x - 1 if 1 <= x <= 7 else None,
|
|
}
|
|
|
|
if self.current_token.type in weekday_map:
|
|
if self.current_token.type == TokenType.CHINESE_NUMBER:
|
|
# Handle numeric weekday (1=Monday, 2=Tuesday, etc.)
|
|
weekday_num = self.current_token.value
|
|
if 1 <= weekday_num <= 7:
|
|
weekday = weekday_num - 1 # Convert to 0-based index
|
|
self.eat(TokenType.CHINESE_NUMBER)
|
|
return WeekdayNode(weekday=weekday, scope=scope)
|
|
else:
|
|
raise ParserError(
|
|
f"Invalid weekday number: {weekday_num} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
else:
|
|
weekday = weekday_map[self.current_token.type]
|
|
self.eat(self.current_token.type)
|
|
return WeekdayNode(weekday=weekday, scope=scope)
|
|
|
|
raise ParserError(
|
|
f"Expected weekday, got {self.current_token.type} "
|
|
f"at position {self.current_token.position}"
|
|
)
|
|
|
|
def parse_relative_time(self) -> RelativeTimeNode:
|
|
"""Parse a relative time specification."""
|
|
hours = 0.0
|
|
minutes = 0.0
|
|
seconds = 0.0
|
|
|
|
def parse_relative_time(self) -> RelativeTimeNode:
|
|
"""Parse a relative time specification."""
|
|
hours = 0.0
|
|
minutes = 0.0
|
|
seconds = 0.0
|
|
|
|
# Parse sequences of relative time expressions
|
|
while self.current_token.type in [
|
|
TokenType.INTEGER, TokenType.CHINESE_NUMBER,
|
|
TokenType.HALF, TokenType.QUARTER
|
|
] or (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
|
|
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
|
|
|
|
# Handle 半小时
|
|
if (self.current_token.type == TokenType.HALF):
|
|
self.eat(TokenType.HALF)
|
|
# Optional 个
|
|
if (self.current_token.type == TokenType.INTEGER and
|
|
self.current_token.value == "个"):
|
|
self.eat(TokenType.INTEGER)
|
|
# Optional 小时
|
|
if self.current_token.type == TokenType.HOUR:
|
|
self.eat(TokenType.HOUR)
|
|
hours += 0.5
|
|
# Check for direction
|
|
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
|
|
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
|
|
hours = -hours
|
|
continue
|
|
|
|
# Handle 一刻钟 (15 minutes)
|
|
if self.current_token.type == TokenType.QUARTER:
|
|
self.eat(TokenType.QUARTER)
|
|
# Optional 钟
|
|
if self.current_token.type == TokenType.ZHONG:
|
|
self.eat(TokenType.ZHONG)
|
|
minutes += 15
|
|
# Check for direction
|
|
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
|
|
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
|
|
minutes = -minutes
|
|
continue
|
|
|
|
# Parse number if we have one
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
number_node = self.parse_number()
|
|
number_value = number_node.value
|
|
|
|
# Determine unit and direction
|
|
unit = None
|
|
direction = 1 # Forward by default
|
|
|
|
# Check for unit
|
|
if self.current_token.type == TokenType.HOUR:
|
|
self.eat(TokenType.HOUR)
|
|
# Optional 个
|
|
if (self.current_token.type == TokenType.INTEGER and
|
|
self.current_token.value == "个"):
|
|
self.eat(TokenType.INTEGER)
|
|
unit = "hour"
|
|
elif self.current_token.type == TokenType.MINUTE:
|
|
self.eat(TokenType.MINUTE)
|
|
unit = "minute"
|
|
elif self.current_token.type == TokenType.SECOND:
|
|
self.eat(TokenType.SECOND)
|
|
unit = "second"
|
|
elif self.current_token.type == TokenType.TIME_SEPARATOR:
|
|
# Handle "X点", "X分", "X秒" format
|
|
sep_value = self.current_token.value
|
|
self.eat(TokenType.TIME_SEPARATOR)
|
|
if sep_value == "点":
|
|
unit = "hour"
|
|
# Optional 钟
|
|
if self.current_token.type == TokenType.ZHONG:
|
|
self.eat(TokenType.ZHONG)
|
|
# If we have "X点" without a direction, this is likely an absolute time
|
|
# Check if there's a direction after
|
|
if not (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
|
|
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
|
|
# This is probably an absolute time, not relative time
|
|
# Push back the number and break
|
|
break
|
|
elif sep_value == "分":
|
|
unit = "minute"
|
|
# Optional 钟
|
|
if self.current_token.type == TokenType.ZHONG:
|
|
self.eat(TokenType.ZHONG)
|
|
elif sep_value == "秒":
|
|
unit = "second"
|
|
else:
|
|
# If no unit specified, but we have a number followed by a direction,
|
|
# assume it's hours
|
|
if (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or
|
|
self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD):
|
|
unit = "hour"
|
|
else:
|
|
# If no unit and no direction, this might not be a relative time expression
|
|
# Push the number back and break
|
|
# We can't easily push back, so let's break
|
|
break
|
|
|
|
# Check for direction (后/前)
|
|
if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_FORWARD)
|
|
direction = 1
|
|
elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD:
|
|
self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD)
|
|
direction = -1
|
|
|
|
# Apply the value based on unit
|
|
if unit == "hour":
|
|
hours += number_value * direction
|
|
elif unit == "minute":
|
|
minutes += number_value * direction
|
|
elif unit == "second":
|
|
seconds += number_value * direction
|
|
continue
|
|
|
|
# If we still haven't handled the current token, break
|
|
break
|
|
|
|
return RelativeTimeNode(hours=hours, minutes=minutes, seconds=seconds)
|
|
|
|
def parse_time_expression(self) -> TimeExpressionNode:
|
|
"""Parse a complete time expression."""
|
|
date_node = None
|
|
time_node = None
|
|
relative_date_node = None
|
|
relative_time_node = None
|
|
weekday_node = None
|
|
|
|
# Parse different parts of the expression
|
|
while self.current_token.type != TokenType.EOF:
|
|
# Try to parse date first (absolute dates should take precedence)
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]:
|
|
if date_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
date_node = self.parse_date()
|
|
continue
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# Try to parse relative date
|
|
if self.current_token.type in [
|
|
TokenType.RELATIVE_TODAY, TokenType.RELATIVE_TOMORROW,
|
|
TokenType.RELATIVE_DAY_AFTER_TOMORROW, TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW,
|
|
TokenType.RELATIVE_YESTERDAY, TokenType.RELATIVE_DAY_BEFORE_YESTERDAY,
|
|
TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY,
|
|
TokenType.INTEGER, TokenType.CHINESE_NUMBER, # For patterns like "X年后", "X个月后", etc.
|
|
TokenType.RELATIVE_NEXT, TokenType.RELATIVE_LAST, TokenType.RELATIVE_THIS
|
|
]:
|
|
if relative_date_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
relative_date_node = self.parse_relative_date()
|
|
continue
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# Try to parse relative time first (since it can have numbers)
|
|
if self.current_token.type in [
|
|
TokenType.INTEGER, TokenType.CHINESE_NUMBER,
|
|
TokenType.HALF, TokenType.QUARTER,
|
|
TokenType.RELATIVE_DIRECTION_FORWARD, TokenType.RELATIVE_DIRECTION_BACKWARD
|
|
]:
|
|
if relative_time_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
relative_time_node = self.parse_relative_time()
|
|
# Only continue if we actually parsed some relative time
|
|
if relative_time_node.hours != 0 or relative_time_node.minutes != 0 or relative_time_node.seconds != 0:
|
|
continue
|
|
else:
|
|
# If we didn't parse any relative time, reset position
|
|
self.pos = original_pos
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# Try to parse time
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
|
|
if time_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
time_node = self.parse_time()
|
|
continue
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# Try to parse time
|
|
if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]:
|
|
if time_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
time_node = self.parse_time()
|
|
continue
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# Try to parse weekday
|
|
if self.current_token.type in [
|
|
TokenType.WEEK_SCOPE_CURRENT, TokenType.WEEK_SCOPE_LAST, TokenType.WEEK_SCOPE_NEXT,
|
|
TokenType.WEEKDAY_MONDAY, TokenType.WEEKDAY_TUESDAY, TokenType.WEEKDAY_WEDNESDAY,
|
|
TokenType.WEEKDAY_THURSDAY, TokenType.WEEKDAY_FRIDAY, TokenType.WEEKDAY_SATURDAY,
|
|
TokenType.WEEKDAY_SUNDAY
|
|
]:
|
|
if weekday_node is None:
|
|
original_pos = self.pos
|
|
try:
|
|
weekday_node = self.parse_weekday()
|
|
continue
|
|
except ParserError:
|
|
# Reset position if parsing failed
|
|
self.pos = original_pos
|
|
pass
|
|
|
|
# If we get here and couldn't parse anything, skip the token
|
|
self.pos += 1
|
|
|
|
return TimeExpressionNode(
|
|
date=date_node,
|
|
time=time_node,
|
|
relative_date=relative_date_node,
|
|
relative_time=relative_time_node,
|
|
weekday=weekday_node
|
|
)
|
|
|
|
def parse(self) -> TimeExpressionNode:
|
|
"""Parse the complete time expression and return the AST."""
|
|
return self.parse_time_expression() |