95 lines
2.7 KiB
Python
95 lines
2.7 KiB
Python
"""
|
|
Token definitions for the time parser.
|
|
"""
|
|
|
|
from enum import Enum
|
|
from typing import Union
|
|
from dataclasses import dataclass
|
|
|
|
|
|
class TokenType(Enum):
|
|
"""Types of tokens recognized by the lexer."""
|
|
|
|
# Numbers
|
|
INTEGER = "INTEGER"
|
|
CHINESE_NUMBER = "CHINESE_NUMBER"
|
|
|
|
# Time units
|
|
YEAR = "YEAR"
|
|
MONTH = "MONTH"
|
|
DAY = "DAY"
|
|
WEEK = "WEEK"
|
|
HOUR = "HOUR"
|
|
MINUTE = "MINUTE"
|
|
SECOND = "SECOND"
|
|
|
|
# Date separators
|
|
DATE_SEPARATOR = "DATE_SEPARATOR" # -, /, 年, 月, 日, 号
|
|
|
|
# Time separators
|
|
TIME_SEPARATOR = "TIME_SEPARATOR" # :, 点, 时, 分, 秒
|
|
|
|
# Period indicators
|
|
PERIOD_AM = "PERIOD_AM" # 上午, 早上, 早晨, etc.
|
|
PERIOD_PM = "PERIOD_PM" # 下午, 晚上, 中午, etc.
|
|
|
|
# Relative time
|
|
RELATIVE_TODAY = "RELATIVE_TODAY" # 今天, 今晚, 今早, etc.
|
|
RELATIVE_TOMORROW = "RELATIVE_TOMORROW" # 明天
|
|
RELATIVE_DAY_AFTER_TOMORROW = "RELATIVE_DAY_AFTER_TOMORROW" # 后天
|
|
RELATIVE_THREE_DAYS_AFTER_TOMORROW = "RELATIVE_THREE_DAYS_AFTER_TOMORROW" # 大后天
|
|
RELATIVE_YESTERDAY = "RELATIVE_YESTERDAY" # 昨天
|
|
RELATIVE_DAY_BEFORE_YESTERDAY = "RELATIVE_DAY_BEFORE_YESTERDAY" # 前天
|
|
RELATIVE_THREE_DAYS_BEFORE_YESTERDAY = "RELATIVE_THREE_DAYS_BEFORE_YESTERDAY" # 大前天
|
|
RELATIVE_DIRECTION_FORWARD = "RELATIVE_DIRECTION_FORWARD" # 后, 以后, 之后
|
|
RELATIVE_DIRECTION_BACKWARD = "RELATIVE_DIRECTION_BACKWARD" # 前, 以前, 之前
|
|
|
|
# Extended relative time
|
|
RELATIVE_NEXT = "RELATIVE_NEXT" # 下
|
|
RELATIVE_LAST = "RELATIVE_LAST" # 上, 去
|
|
RELATIVE_THIS = "RELATIVE_THIS" # 这, 本
|
|
|
|
# Week days
|
|
WEEKDAY_MONDAY = "WEEKDAY_MONDAY"
|
|
WEEKDAY_TUESDAY = "WEEKDAY_TUESDAY"
|
|
WEEKDAY_WEDNESDAY = "WEEKDAY_WEDNESDAY"
|
|
WEEKDAY_THURSDAY = "WEEKDAY_THURSDAY"
|
|
WEEKDAY_FRIDAY = "WEEKDAY_FRIDAY"
|
|
WEEKDAY_SATURDAY = "WEEKDAY_SATURDAY"
|
|
WEEKDAY_SUNDAY = "WEEKDAY_SUNDAY"
|
|
|
|
# Week scope
|
|
WEEK_SCOPE_CURRENT = "WEEK_SCOPE_CURRENT" # 本
|
|
WEEK_SCOPE_LAST = "WEEK_SCOPE_LAST" # 上
|
|
WEEK_SCOPE_NEXT = "WEEK_SCOPE_NEXT" # 下
|
|
|
|
# Special time markers
|
|
HALF = "HALF" # 半
|
|
QUARTER = "QUARTER" # 一刻
|
|
ZHENG = "ZHENG" # 整
|
|
ZHONG = "ZHONG" # 钟
|
|
|
|
# Student-friendly time expressions
|
|
EARLY_MORNING = "EARLY_MORNING" # 早X
|
|
LATE_NIGHT = "LATE_NIGHT" # 晚X
|
|
|
|
# Whitespace
|
|
WHITESPACE = "WHITESPACE"
|
|
|
|
# End of input
|
|
EOF = "EOF"
|
|
|
|
|
|
@dataclass
|
|
class Token:
|
|
"""Represents a single token from the lexer."""
|
|
|
|
type: TokenType
|
|
value: Union[str, int]
|
|
position: int
|
|
|
|
def __str__(self):
|
|
return f"Token({self.type.value}, {repr(self.value)}, {self.position})"
|
|
|
|
def __repr__(self):
|
|
return self.__str__() |