""" Parser for time expressions that builds an Abstract Syntax Tree (AST). """ from typing import Iterator, Optional, List import datetime from .ptime_token import Token, TokenType from .ptime_ast import ( ASTNode, NumberNode, DateNode, TimeNode, RelativeDateNode, RelativeTimeNode, WeekdayNode, TimeExpressionNode ) from .lexer import Lexer class ParserError(Exception): """Exception raised for parser errors.""" pass class Parser: """Parser for time expressions that builds an AST.""" def __init__(self, text: str, now: Optional[datetime.datetime] = None): self.lexer = Lexer(text, now) self.tokens: List[Token] = list(self.lexer.tokenize()) self.pos = 0 self.now = now or datetime.datetime.now() @property def current_token(self) -> Token: """Get the current token.""" if self.pos < len(self.tokens): return self.tokens[self.pos] return Token(TokenType.EOF, None, len(self.tokens)) def eat(self, token_type: TokenType) -> Token: """Consume a token of the expected type.""" if self.current_token.type == token_type: token = self.current_token self.pos += 1 return token else: raise ParserError( f"Expected token {token_type}, got {self.current_token.type} " f"at position {self.current_token.position}" ) def peek(self, offset: int = 1) -> Token: """Look ahead at the next token without consuming it.""" next_pos = self.pos + offset if next_pos < len(self.tokens): return self.tokens[next_pos] return Token(TokenType.EOF, None, len(self.tokens)) def parse_number(self) -> NumberNode: """Parse a number (integer or Chinese number).""" token = self.current_token if token.type == TokenType.INTEGER: self.eat(TokenType.INTEGER) return NumberNode(value=token.value) elif token.type == TokenType.CHINESE_NUMBER: self.eat(TokenType.CHINESE_NUMBER) return NumberNode(value=token.value) else: raise ParserError( f"Expected number, got {token.type} at position {token.position}" ) def parse_date(self) -> DateNode: """Parse a date specification.""" year_node = None month_node = None day_node = None # Try YYYY-MM-DD or YYYY/MM/DD format if (self.current_token.type == TokenType.INTEGER and self.peek().type == TokenType.DATE_SEPARATOR and self.peek().value in ['-', '/'] and self.peek(2).type == TokenType.INTEGER and self.peek(3).type == TokenType.DATE_SEPARATOR and self.peek(3).value in ['-', '/'] and self.peek(4).type == TokenType.INTEGER): year_token = self.current_token self.eat(TokenType.INTEGER) separator1 = self.eat(TokenType.DATE_SEPARATOR).value month_token = self.current_token self.eat(TokenType.INTEGER) separator2 = self.eat(TokenType.DATE_SEPARATOR).value day_token = self.current_token self.eat(TokenType.INTEGER) year_node = NumberNode(value=year_token.value) month_node = NumberNode(value=month_token.value) day_node = NumberNode(value=day_token.value) return DateNode(year=year_node, month=month_node, day=day_node) # Try YYYY年MM月DD[日号] format if (self.current_token.type == TokenType.INTEGER and self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and self.peek(2).type == TokenType.INTEGER and self.peek(3).type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.peek(4).type == TokenType.INTEGER): year_token = self.current_token self.eat(TokenType.INTEGER) self.eat(self.current_token.type) # 年 (could be DATE_SEPARATOR or YEAR) month_token = self.current_token self.eat(TokenType.INTEGER) self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH) day_token = self.current_token self.eat(TokenType.INTEGER) # Optional 日 or 号 if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]: self.eat(self.current_token.type) year_node = NumberNode(value=year_token.value) month_node = NumberNode(value=month_token.value) day_node = NumberNode(value=day_token.value) return DateNode(year=year_node, month=month_node, day=day_node) # Try MM月DD[日号] format (without year) if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.peek().type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.peek().value == '月' and self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]): month_token = self.current_token self.eat(month_token.type) self.eat(self.current_token.type) # 月 (could be DATE_SEPARATOR or MONTH) day_token = self.current_token self.eat(day_token.type) # Optional 日 or 号 if self.current_token.type in [TokenType.DATE_SEPARATOR, TokenType.DAY]: self.eat(self.current_token.type) month_node = NumberNode(value=month_token.value) day_node = NumberNode(value=day_token.value) return DateNode(year=None, month=month_node, day=day_node) # Try Chinese MM月DD[日号] format if (self.current_token.type == TokenType.CHINESE_NUMBER and self.peek().type == TokenType.DATE_SEPARATOR and self.peek().value == '月' and self.peek(2).type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]): month_token = self.current_token self.eat(TokenType.CHINESE_NUMBER) self.eat(TokenType.DATE_SEPARATOR) # 月 day_token = self.current_token self.eat(day_token.type) # Optional 日 or 号 if self.current_token.type == TokenType.DATE_SEPARATOR: self.eat(TokenType.DATE_SEPARATOR) month_node = NumberNode(value=month_token.value) day_node = NumberNode(value=day_token.value) return DateNode(year=None, month=month_node, day=day_node) raise ParserError( f"Unable to parse date at position {self.current_token.position}" ) def parse_time(self) -> TimeNode: """Parse a time specification.""" hour_node = None minute_node = None second_node = None is_24hour = False period = None # Try HH:MM format if (self.current_token.type == TokenType.INTEGER and self.peek().type == TokenType.TIME_SEPARATOR and self.peek().value == ':'): hour_token = self.current_token self.eat(TokenType.INTEGER) self.eat(TokenType.TIME_SEPARATOR) # : minute_token = self.current_token self.eat(TokenType.INTEGER) hour_node = NumberNode(value=hour_token.value) minute_node = NumberNode(value=minute_token.value) is_24hour = True # HH:MM is always interpreted as 24-hour # Optional :SS if (self.current_token.type == TokenType.TIME_SEPARATOR and self.peek().type == TokenType.INTEGER): self.eat(TokenType.TIME_SEPARATOR) # : second_token = self.current_token self.eat(TokenType.INTEGER) second_node = NumberNode(value=second_token.value) return TimeNode( hour=hour_node, minute=minute_node, second=second_node, is_24hour=is_24hour, period=period ) # Try Chinese time format (X点X分) # First check for period indicators period = None if self.current_token.type in [TokenType.PERIOD_AM, TokenType.PERIOD_PM]: if self.current_token.type == TokenType.PERIOD_AM: period = "AM" else: period = "PM" self.eat(self.current_token.type) if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.EARLY_MORNING, TokenType.LATE_NIGHT]: if self.current_token.type == TokenType.EARLY_MORNING: self.eat(TokenType.EARLY_MORNING) is_24hour = True period = "AM" # Expect a number next if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: hour_token = self.current_token self.eat(hour_token.type) hour_node = NumberNode(value=hour_token.value) # "早八" should be interpreted as 08:00 # If hour is greater than 12, treat as 24-hour if hour_node.value > 12: is_24hour = True period = None else: raise ParserError( f"Expected number after '早', got {self.current_token.type} " f"at position {self.current_token.position}" ) elif self.current_token.type == TokenType.LATE_NIGHT: self.eat(TokenType.LATE_NIGHT) is_24hour = True period = "PM" # Expect a number next if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: hour_token = self.current_token self.eat(hour_token.type) hour_node = NumberNode(value=hour_token.value) # "晚十" should be interpreted as 22:00 # Adjust hour to 24-hour format if hour_node.value <= 12: hour_node.value += 12 is_24hour = True period = None else: raise ParserError( f"Expected number after '晚', got {self.current_token.type} " f"at position {self.current_token.position}" ) else: # Regular time parsing hour_token = self.current_token self.eat(hour_token.type) # Check for 点 or 时 if self.current_token.type == TokenType.TIME_SEPARATOR: separator = self.current_token.value self.eat(TokenType.TIME_SEPARATOR) if separator == '点': is_24hour = False elif separator == '时': is_24hour = True hour_node = NumberNode(value=hour_token.value) # Optional minutes if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: minute_token = self.current_token self.eat(minute_token.type) # Optional 分 if self.current_token.type == TokenType.TIME_SEPARATOR and \ self.current_token.value == '分': self.eat(TokenType.TIME_SEPARATOR) minute_node = NumberNode(value=minute_token.value) # Handle special markers if self.current_token.type == TokenType.HALF: self.eat(TokenType.HALF) minute_node = NumberNode(value=30) elif self.current_token.type == TokenType.QUARTER: self.eat(TokenType.QUARTER) minute_node = NumberNode(value=15) elif self.current_token.type == TokenType.ZHENG: self.eat(TokenType.ZHENG) if minute_node is None: minute_node = NumberNode(value=0) # Optional 钟 if self.current_token.type == TokenType.ZHONG: self.eat(TokenType.ZHONG) else: # If no separator, treat as hour-only time (like "三点") hour_node = NumberNode(value=hour_token.value) is_24hour = False return TimeNode( hour=hour_node, minute=minute_node, second=second_node, is_24hour=is_24hour, period=period ) raise ParserError( f"Unable to parse time at position {self.current_token.position}" ) def parse_relative_date(self) -> RelativeDateNode: """Parse a relative date specification.""" years = 0 months = 0 weeks = 0 days = 0 # Handle today variants if self.current_token.type == TokenType.RELATIVE_TODAY: self.eat(TokenType.RELATIVE_TODAY) days = 0 elif self.current_token.type == TokenType.RELATIVE_TOMORROW: self.eat(TokenType.RELATIVE_TOMORROW) days = 1 elif self.current_token.type == TokenType.RELATIVE_DAY_AFTER_TOMORROW: self.eat(TokenType.RELATIVE_DAY_AFTER_TOMORROW) days = 2 elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW: self.eat(TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW) days = 3 elif self.current_token.type == TokenType.RELATIVE_YESTERDAY: self.eat(TokenType.RELATIVE_YESTERDAY) days = -1 elif self.current_token.type == TokenType.RELATIVE_DAY_BEFORE_YESTERDAY: self.eat(TokenType.RELATIVE_DAY_BEFORE_YESTERDAY) days = -2 elif self.current_token.type == TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY: self.eat(TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY) days = -3 else: # Check if this looks like an absolute date pattern before processing # Look ahead to see if this matches absolute date patterns is_likely_absolute_date = False # Check for MM月DD[日号] patterns (like "6月20日") if (self.pos + 2 < len(self.tokens) and self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.tokens[self.pos + 1].value == '月' and self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]): is_likely_absolute_date = True if is_likely_absolute_date: # This looks like an absolute date, skip relative date parsing raise ParserError("Looks like absolute date format") # Try to parse extended relative time expressions # Handle patterns like "明年", "去年", "下个月", "上个月", etc. original_pos = self.pos try: # Check for "今年", "明年", "去年" if self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.YEAR: self.eat(TokenType.RELATIVE_THIS) self.eat(TokenType.YEAR) years = 0 # Current year elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.YEAR: self.eat(TokenType.RELATIVE_NEXT) self.eat(TokenType.YEAR) years = 1 # Next year elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.YEAR: self.eat(TokenType.RELATIVE_LAST) self.eat(TokenType.YEAR) years = -1 # Last year elif self.current_token.type == TokenType.RELATIVE_NEXT and self.current_token.value == "明年": self.eat(TokenType.RELATIVE_NEXT) years = 1 # Next year # Check if there's a month after "明年" if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.peek().type == TokenType.MONTH): # Parse the month month_node = self.parse_number() self.eat(TokenType.MONTH) # Eat the "月" token # Store the month in the months field as a special marker # We'll handle this in semantic analysis months = month_node.value - 100 # Use negative offset to indicate absolute month elif self.current_token.type == TokenType.RELATIVE_LAST and self.current_token.value == "去年": self.eat(TokenType.RELATIVE_LAST) years = -1 # Last year elif self.current_token.type == TokenType.RELATIVE_THIS and self.current_token.value == "今年": self.eat(TokenType.RELATIVE_THIS) years = 0 # Current year # Check for "这个月", "下个月", "上个月" elif self.current_token.type == TokenType.RELATIVE_THIS and self.peek().type == TokenType.MONTH: self.eat(TokenType.RELATIVE_THIS) self.eat(TokenType.MONTH) months = 0 # Current month elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.MONTH: self.eat(TokenType.RELATIVE_NEXT) self.eat(TokenType.MONTH) months = 1 # Next month # Handle patterns like "下个月五号" if (self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.peek().type == TokenType.DAY): # Parse the day day_node = self.parse_number() self.eat(TokenType.DAY) # Eat the "号" token # Instead of adding days to the current date, we should set a specific day in the target month # We'll handle this in semantic analysis by setting a flag or special value days = 0 # Reset days - we'll handle the day differently # Use a special marker to indicate we want a specific day in the target month # For now, we'll just store the target day in the weeks field as a temporary solution weeks = day_node.value # This is a hack - we'll fix this in semantic analysis elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.MONTH: self.eat(TokenType.RELATIVE_LAST) self.eat(TokenType.MONTH) months = -1 # Last month # Check for "下周", "上周" elif self.current_token.type == TokenType.RELATIVE_NEXT and self.peek().type == TokenType.WEEK: self.eat(TokenType.RELATIVE_NEXT) self.eat(TokenType.WEEK) weeks = 1 # Next week elif self.current_token.type == TokenType.RELATIVE_LAST and self.peek().type == TokenType.WEEK: self.eat(TokenType.RELATIVE_LAST) self.eat(TokenType.WEEK) weeks = -1 # Last week # Handle more complex patterns like "X年后", "X个月后", etc. elif self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: # Check if this is likely an absolute date format (e.g., "2025年11月21日") # If the next token after the number is a date separator or date unit, # and the number looks like a year (4 digits) or the pattern continues, # it might be an absolute date. In that case, skip relative date parsing. # Look ahead to see if this matches absolute date patterns lookahead_pos = self.pos is_likely_absolute_date = False # Check for YYYY-MM-DD or YYYY/MM/DD patterns if (lookahead_pos + 4 < len(self.tokens) and self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and self.tokens[lookahead_pos + 1].value in ['-', '/', '年'] and self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.tokens[lookahead_pos + 3].value in ['-', '/', '月']): is_likely_absolute_date = True # Check for YYYY年MM月DD patterns if (lookahead_pos + 4 < len(self.tokens) and self.tokens[lookahead_pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[lookahead_pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.YEAR] and self.tokens[lookahead_pos + 1].value == '年' and self.tokens[lookahead_pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[lookahead_pos + 3].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.tokens[lookahead_pos + 3].value == '月'): is_likely_absolute_date = True # Check for MM月DD[日号] patterns (like "6月20日") if (self.pos + 2 < len(self.tokens) and self.tokens[self.pos].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER] and self.tokens[self.pos + 1].type in [TokenType.DATE_SEPARATOR, TokenType.MONTH] and self.tokens[self.pos + 1].value == '月' and self.tokens[self.pos + 2].type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]): is_likely_absolute_date = True if is_likely_absolute_date: # This looks like an absolute date, skip relative date parsing raise ParserError("Looks like absolute date format") print(f"DEBUG: Parsing complex relative date pattern") # Parse the number number_node = self.parse_number() number_value = number_node.value print(f"DEBUG: Parsed number: {number_value}") # Check the unit if self.current_token.type == TokenType.YEAR: self.eat(TokenType.YEAR) years = number_value print(f"DEBUG: Set years to {years}") elif self.current_token.type == TokenType.MONTH: self.eat(TokenType.MONTH) months = number_value print(f"DEBUG: Set months to {months}") elif self.current_token.type == TokenType.WEEK: self.eat(TokenType.WEEK) weeks = number_value print(f"DEBUG: Set weeks to {weeks}") elif self.current_token.type == TokenType.DAY: self.eat(TokenType.DAY) days = number_value print(f"DEBUG: Set days to {days}") else: print(f"DEBUG: Unexpected token type: {self.current_token.type}") raise ParserError( f"Expected time unit, got {self.current_token.type} " f"at position {self.current_token.position}" ) # Check direction (前/后) if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD: self.eat(TokenType.RELATIVE_DIRECTION_FORWARD) print(f"DEBUG: Forward direction, values are already positive") # Values are already positive elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD: self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD) print(f"DEBUG: Backward direction, negating values") years = -years months = -months weeks = -weeks days = -days except ParserError: # Reset position if parsing failed self.pos = original_pos raise ParserError( f"Expected relative date, got {self.current_token.type} " f"at position {self.current_token.position}" ) return RelativeDateNode(years=years, months=months, weeks=weeks, days=days) def parse_weekday(self) -> WeekdayNode: """Parse a weekday specification.""" # Parse week scope (本, 上, 下) scope = "current" if self.current_token.type == TokenType.WEEK_SCOPE_CURRENT: self.eat(TokenType.WEEK_SCOPE_CURRENT) scope = "current" elif self.current_token.type == TokenType.WEEK_SCOPE_LAST: self.eat(TokenType.WEEK_SCOPE_LAST) scope = "last" elif self.current_token.type == TokenType.WEEK_SCOPE_NEXT: self.eat(TokenType.WEEK_SCOPE_NEXT) scope = "next" # Parse weekday weekday_map = { TokenType.WEEKDAY_MONDAY: 0, TokenType.WEEKDAY_TUESDAY: 1, TokenType.WEEKDAY_WEDNESDAY: 2, TokenType.WEEKDAY_THURSDAY: 3, TokenType.WEEKDAY_FRIDAY: 4, TokenType.WEEKDAY_SATURDAY: 5, TokenType.WEEKDAY_SUNDAY: 6, # Handle Chinese numbers (1=Monday, 2=Tuesday, etc.) TokenType.CHINESE_NUMBER: lambda x: x - 1 if 1 <= x <= 7 else None, } if self.current_token.type in weekday_map: if self.current_token.type == TokenType.CHINESE_NUMBER: # Handle numeric weekday (1=Monday, 2=Tuesday, etc.) weekday_num = self.current_token.value if 1 <= weekday_num <= 7: weekday = weekday_num - 1 # Convert to 0-based index self.eat(TokenType.CHINESE_NUMBER) return WeekdayNode(weekday=weekday, scope=scope) else: raise ParserError( f"Invalid weekday number: {weekday_num} " f"at position {self.current_token.position}" ) else: weekday = weekday_map[self.current_token.type] self.eat(self.current_token.type) return WeekdayNode(weekday=weekday, scope=scope) raise ParserError( f"Expected weekday, got {self.current_token.type} " f"at position {self.current_token.position}" ) def parse_relative_time(self) -> RelativeTimeNode: """Parse a relative time specification.""" hours = 0.0 minutes = 0.0 seconds = 0.0 def parse_relative_time(self) -> RelativeTimeNode: """Parse a relative time specification.""" hours = 0.0 minutes = 0.0 seconds = 0.0 # Parse sequences of relative time expressions while self.current_token.type in [ TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.HALF, TokenType.QUARTER ] or (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD): # Handle 半小时 if (self.current_token.type == TokenType.HALF): self.eat(TokenType.HALF) # Optional 个 if (self.current_token.type == TokenType.INTEGER and self.current_token.value == "个"): self.eat(TokenType.INTEGER) # Optional 小时 if self.current_token.type == TokenType.HOUR: self.eat(TokenType.HOUR) hours += 0.5 # Check for direction if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD: self.eat(TokenType.RELATIVE_DIRECTION_FORWARD) elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD: self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD) hours = -hours continue # Handle 一刻钟 (15 minutes) if self.current_token.type == TokenType.QUARTER: self.eat(TokenType.QUARTER) # Optional 钟 if self.current_token.type == TokenType.ZHONG: self.eat(TokenType.ZHONG) minutes += 15 # Check for direction if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD: self.eat(TokenType.RELATIVE_DIRECTION_FORWARD) elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD: self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD) minutes = -minutes continue # Parse number if we have one if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: number_node = self.parse_number() number_value = number_node.value # Determine unit and direction unit = None direction = 1 # Forward by default # Check for unit if self.current_token.type == TokenType.HOUR: self.eat(TokenType.HOUR) # Optional 个 if (self.current_token.type == TokenType.INTEGER and self.current_token.value == "个"): self.eat(TokenType.INTEGER) unit = "hour" elif self.current_token.type == TokenType.MINUTE: self.eat(TokenType.MINUTE) unit = "minute" elif self.current_token.type == TokenType.SECOND: self.eat(TokenType.SECOND) unit = "second" elif self.current_token.type == TokenType.TIME_SEPARATOR: # Handle "X点", "X分", "X秒" format sep_value = self.current_token.value self.eat(TokenType.TIME_SEPARATOR) if sep_value == "点": unit = "hour" # Optional 钟 if self.current_token.type == TokenType.ZHONG: self.eat(TokenType.ZHONG) # If we have "X点" without a direction, this is likely an absolute time # Check if there's a direction after if not (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD): # This is probably an absolute time, not relative time # Push back the number and break break elif sep_value == "分": unit = "minute" # Optional 钟 if self.current_token.type == TokenType.ZHONG: self.eat(TokenType.ZHONG) elif sep_value == "秒": unit = "second" else: # If no unit specified, but we have a number followed by a direction, # assume it's hours if (self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD or self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD): unit = "hour" else: # If no unit and no direction, this might not be a relative time expression # Push the number back and break # We can't easily push back, so let's break break # Check for direction (后/前) if self.current_token.type == TokenType.RELATIVE_DIRECTION_FORWARD: self.eat(TokenType.RELATIVE_DIRECTION_FORWARD) direction = 1 elif self.current_token.type == TokenType.RELATIVE_DIRECTION_BACKWARD: self.eat(TokenType.RELATIVE_DIRECTION_BACKWARD) direction = -1 # Apply the value based on unit if unit == "hour": hours += number_value * direction elif unit == "minute": minutes += number_value * direction elif unit == "second": seconds += number_value * direction continue # If we still haven't handled the current token, break break return RelativeTimeNode(hours=hours, minutes=minutes, seconds=seconds) def parse_time_expression(self) -> TimeExpressionNode: """Parse a complete time expression.""" date_node = None time_node = None relative_date_node = None relative_time_node = None weekday_node = None # Parse different parts of the expression while self.current_token.type != TokenType.EOF: # Try to parse date first (absolute dates should take precedence) if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER]: if date_node is None: original_pos = self.pos try: date_node = self.parse_date() continue except ParserError: # Reset position if parsing failed self.pos = original_pos pass # Try to parse relative date if self.current_token.type in [ TokenType.RELATIVE_TODAY, TokenType.RELATIVE_TOMORROW, TokenType.RELATIVE_DAY_AFTER_TOMORROW, TokenType.RELATIVE_THREE_DAYS_AFTER_TOMORROW, TokenType.RELATIVE_YESTERDAY, TokenType.RELATIVE_DAY_BEFORE_YESTERDAY, TokenType.RELATIVE_THREE_DAYS_BEFORE_YESTERDAY, TokenType.INTEGER, TokenType.CHINESE_NUMBER, # For patterns like "X年后", "X个月后", etc. TokenType.RELATIVE_NEXT, TokenType.RELATIVE_LAST, TokenType.RELATIVE_THIS ]: if relative_date_node is None: original_pos = self.pos try: relative_date_node = self.parse_relative_date() continue except ParserError: # Reset position if parsing failed self.pos = original_pos pass # Try to parse relative time first (since it can have numbers) if self.current_token.type in [ TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.HALF, TokenType.QUARTER, TokenType.RELATIVE_DIRECTION_FORWARD, TokenType.RELATIVE_DIRECTION_BACKWARD ]: if relative_time_node is None: original_pos = self.pos try: relative_time_node = self.parse_relative_time() # Only continue if we actually parsed some relative time if relative_time_node.hours != 0 or relative_time_node.minutes != 0 or relative_time_node.seconds != 0: continue else: # If we didn't parse any relative time, reset position self.pos = original_pos except ParserError: # Reset position if parsing failed self.pos = original_pos pass # Try to parse time if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]: if time_node is None: original_pos = self.pos try: time_node = self.parse_time() continue except ParserError: # Reset position if parsing failed self.pos = original_pos pass # Try to parse time if self.current_token.type in [TokenType.INTEGER, TokenType.CHINESE_NUMBER, TokenType.TIME_SEPARATOR, TokenType.PERIOD_AM, TokenType.PERIOD_PM]: if time_node is None: original_pos = self.pos try: time_node = self.parse_time() continue except ParserError: # Reset position if parsing failed self.pos = original_pos pass # Try to parse weekday if self.current_token.type in [ TokenType.WEEK_SCOPE_CURRENT, TokenType.WEEK_SCOPE_LAST, TokenType.WEEK_SCOPE_NEXT, TokenType.WEEKDAY_MONDAY, TokenType.WEEKDAY_TUESDAY, TokenType.WEEKDAY_WEDNESDAY, TokenType.WEEKDAY_THURSDAY, TokenType.WEEKDAY_FRIDAY, TokenType.WEEKDAY_SATURDAY, TokenType.WEEKDAY_SUNDAY ]: if weekday_node is None: original_pos = self.pos try: weekday_node = self.parse_weekday() continue except ParserError: # Reset position if parsing failed self.pos = original_pos pass # If we get here and couldn't parse anything, skip the token self.pos += 1 return TimeExpressionNode( date=date_node, time=time_node, relative_date=relative_date_node, relative_time=relative_time_node, weekday=weekday_node ) def parse(self) -> TimeExpressionNode: """Parse the complete time expression and return the AST.""" return self.parse_time_expression()