581 lines
13 KiB
Go
581 lines
13 KiB
Go
package config
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
)
|
|
|
|
// TokenType represents the type of token
|
|
type TokenType int
|
|
|
|
const (
|
|
TokenError TokenType = iota
|
|
TokenEOF
|
|
TokenName
|
|
TokenString
|
|
TokenNumber
|
|
TokenBoolean
|
|
TokenEquals
|
|
TokenOpenBrace
|
|
TokenCloseBrace
|
|
TokenComment
|
|
)
|
|
|
|
// Token represents a lexical token
|
|
type Token struct {
|
|
Type TokenType
|
|
Value []byte
|
|
Line int
|
|
Column int
|
|
}
|
|
|
|
// Scanner handles the low-level parsing of the configuration format
|
|
type Scanner struct {
|
|
reader *bufio.Reader
|
|
line int // Current line number
|
|
col int // Current column position
|
|
buffer []byte
|
|
token Token // Current token
|
|
}
|
|
|
|
// NewScanner creates a new scanner with the given reader
|
|
func NewScanner(r io.Reader) *Scanner {
|
|
return &Scanner{
|
|
reader: bufio.NewReader(r),
|
|
line: 1, // Start at line 1
|
|
col: 0,
|
|
buffer: make([]byte, 0, 128), // Pre-allocate with reasonable capacity
|
|
}
|
|
}
|
|
|
|
// ReadByte reads a single byte from the input
|
|
func (s *Scanner) ReadByte() (byte, error) {
|
|
b, err := s.reader.ReadByte()
|
|
if err == nil {
|
|
if b == '\n' {
|
|
s.line++
|
|
s.col = 0
|
|
} else {
|
|
s.col++
|
|
}
|
|
}
|
|
return b, err
|
|
}
|
|
|
|
// PeekByte looks at the next byte without consuming it
|
|
func (s *Scanner) PeekByte() (byte, error) {
|
|
b, err := s.reader.Peek(1)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return b[0], nil
|
|
}
|
|
|
|
// PeekBytes looks at the next n bytes without consuming them
|
|
func (s *Scanner) PeekBytes(n int) ([]byte, error) {
|
|
return s.reader.Peek(n)
|
|
}
|
|
|
|
// UnreadByte pushes back a byte to the reader
|
|
func (s *Scanner) UnreadByte() error {
|
|
err := s.reader.UnreadByte()
|
|
if err == nil && s.col > 0 {
|
|
s.col--
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Error creates an error with line and column information
|
|
func (s *Scanner) Error(msg string) error {
|
|
return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg)
|
|
}
|
|
|
|
// SkipWhitespace skips whitespace characters
|
|
func (s *Scanner) SkipWhitespace() error {
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Fast check for common whitespace bytes
|
|
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
|
return nil
|
|
}
|
|
|
|
_, err = s.ReadByte()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// NextToken scans and returns the next token
|
|
func (s *Scanner) NextToken() (Token, error) {
|
|
if s.token.Type != TokenError {
|
|
// We have a stored token
|
|
token := s.token
|
|
s.token = Token{Type: TokenError} // Reset
|
|
return token, nil
|
|
}
|
|
|
|
// No stored token, scan a new one
|
|
// Skip whitespace
|
|
err := s.SkipWhitespace()
|
|
if err == io.EOF {
|
|
return Token{Type: TokenEOF}, nil
|
|
}
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
|
|
b, err := s.PeekByte()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return Token{Type: TokenEOF}, nil
|
|
}
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
|
|
// Record start position for error reporting
|
|
startLine, startColumn := s.line, s.col
|
|
|
|
// Process based on first character
|
|
switch {
|
|
case b == '=':
|
|
_, _ = s.ReadByte() // consume equals
|
|
return Token{Type: TokenEquals, Line: startLine, Column: startColumn}, nil
|
|
|
|
case b == '{':
|
|
_, _ = s.ReadByte() // consume open brace
|
|
return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil
|
|
|
|
case b == '}':
|
|
_, _ = s.ReadByte() // consume close brace
|
|
return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil
|
|
|
|
case b == '-':
|
|
// Could be a comment or a negative number
|
|
peekBytes, err := s.PeekBytes(2)
|
|
if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' {
|
|
err = s.scanComment()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil
|
|
}
|
|
|
|
// Check if it's a negative number
|
|
if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) {
|
|
return s.scanNumber(startLine, startColumn)
|
|
}
|
|
|
|
// Just a single dash
|
|
_, _ = s.ReadByte() // consume dash
|
|
return Token{Type: TokenError, Value: []byte("unexpected '-'")},
|
|
fmt.Errorf("unexpected '-' at line %d, column %d", startLine, startColumn)
|
|
|
|
case b == '"':
|
|
return s.scanString(startLine, startColumn)
|
|
|
|
case isLetter(b):
|
|
return s.scanName(startLine, startColumn)
|
|
|
|
case isDigit(b):
|
|
return s.scanNumber(startLine, startColumn)
|
|
|
|
default:
|
|
_, _ = s.ReadByte() // consume the unexpected character
|
|
err := fmt.Errorf("unexpected character: %c", b)
|
|
return Token{Type: TokenError, Value: []byte(err.Error()), Line: startLine, Column: startColumn}, err
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) UnreadToken(token Token) {
|
|
s.token = token // Store the token to be returned next
|
|
}
|
|
|
|
// scanComment processes a comment
|
|
func (s *Scanner) scanComment() error {
|
|
// Consume the first dash
|
|
_, err := s.ReadByte()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check for second dash
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b != '-' {
|
|
return s.Error("invalid comment")
|
|
}
|
|
|
|
// Check for block comment [[
|
|
b, err = s.PeekByte()
|
|
if err == nil && b == '[' {
|
|
_, _ = s.ReadByte() // consume first [
|
|
b, err = s.PeekByte()
|
|
if err == nil && b == '[' {
|
|
_, _ = s.ReadByte() // consume second [
|
|
return s.scanBlockComment()
|
|
}
|
|
}
|
|
|
|
// Line comment - consume until newline or EOF
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b == '\n' {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// scanBlockComment processes a block comment
|
|
func (s *Scanner) scanBlockComment() error {
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return s.Error("unclosed block comment")
|
|
}
|
|
|
|
if b == ']' {
|
|
b, err = s.PeekByte()
|
|
if err == nil && b == ']' {
|
|
_, _ = s.ReadByte() // consume second ]
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// scanString scans a quoted string
|
|
func (s *Scanner) scanString(startLine, startColumn int) (Token, error) {
|
|
// Reset buffer
|
|
s.buffer = s.buffer[:0]
|
|
|
|
// Consume opening quote
|
|
_, err := s.ReadByte()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
|
|
for {
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte("unterminated string")}, errors.New("unterminated string")
|
|
}
|
|
|
|
if b == '"' {
|
|
break
|
|
}
|
|
|
|
// Handle escape sequences
|
|
if b == '\\' {
|
|
escaped, err := s.ReadByte()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte("unterminated escape sequence")}, errors.New("unterminated escape sequence")
|
|
}
|
|
switch escaped {
|
|
case '"':
|
|
s.buffer = append(s.buffer, '"')
|
|
case '\\':
|
|
s.buffer = append(s.buffer, '\\')
|
|
case 'n':
|
|
s.buffer = append(s.buffer, '\n')
|
|
case 't':
|
|
s.buffer = append(s.buffer, '\t')
|
|
default:
|
|
s.buffer = append(s.buffer, '\\')
|
|
s.buffer = append(s.buffer, escaped)
|
|
}
|
|
} else {
|
|
s.buffer = append(s.buffer, b)
|
|
}
|
|
}
|
|
|
|
return Token{
|
|
Type: TokenString,
|
|
Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer
|
|
Line: startLine,
|
|
Column: startColumn,
|
|
}, nil
|
|
}
|
|
|
|
// scanName scans an identifier
|
|
func (s *Scanner) scanName(startLine, startColumn int) (Token, error) {
|
|
// Reset buffer
|
|
s.buffer = s.buffer[:0]
|
|
|
|
// Read first character
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
|
|
if !isLetter(b) {
|
|
return Token{Type: TokenError, Value: []byte("name must start with letter")}, s.Error("name must start with letter")
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
|
|
// Read rest of name
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
if !isLetter(b) && !isDigit(b) && b != '_' {
|
|
break
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
_, _ = s.ReadByte()
|
|
}
|
|
|
|
// Check if it's a boolean
|
|
if bytes.Equal(s.buffer, []byte("true")) || bytes.Equal(s.buffer, []byte("false")) {
|
|
return Token{
|
|
Type: TokenBoolean,
|
|
Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer
|
|
Line: startLine,
|
|
Column: startColumn,
|
|
}, nil
|
|
}
|
|
|
|
return Token{
|
|
Type: TokenName,
|
|
Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer
|
|
Line: startLine,
|
|
Column: startColumn,
|
|
}, nil
|
|
}
|
|
|
|
// scanNumber scans a numeric value
|
|
func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) {
|
|
// Reset buffer
|
|
s.buffer = s.buffer[:0]
|
|
|
|
// Read first character (might be a minus sign or digit)
|
|
b, err := s.ReadByte()
|
|
if err != nil {
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
s.buffer = append(s.buffer, b)
|
|
|
|
// Scan the rest of the number
|
|
hasDot := false
|
|
for {
|
|
b, err := s.PeekByte()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return Token{Type: TokenError, Value: []byte(err.Error())}, err
|
|
}
|
|
|
|
if b == '.' && !hasDot {
|
|
hasDot = true
|
|
_, _ = s.ReadByte()
|
|
s.buffer = append(s.buffer, b)
|
|
} else if isDigit(b) {
|
|
_, _ = s.ReadByte()
|
|
s.buffer = append(s.buffer, b)
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return Token{
|
|
Type: TokenNumber,
|
|
Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer
|
|
Line: startLine,
|
|
Column: startColumn,
|
|
}, nil
|
|
}
|
|
|
|
// ScanValue processes a value and returns its Go representation
|
|
func (s *Scanner) ScanValue() (any, error) {
|
|
token, err := s.NextToken()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch token.Type {
|
|
case TokenString:
|
|
return string(token.Value), nil
|
|
|
|
case TokenBoolean:
|
|
if bytes.Equal(token.Value, []byte("true")) {
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
|
|
case TokenNumber:
|
|
// Convert to number
|
|
value := string(token.Value)
|
|
if bytes.Contains(token.Value, []byte(".")) {
|
|
// Float
|
|
return strconv.ParseFloat(value, 64)
|
|
}
|
|
// Integer
|
|
return strconv.ParseInt(value, 10, 64)
|
|
|
|
case TokenOpenBrace:
|
|
// Object or array
|
|
return s.scanObjectOrArray()
|
|
|
|
case TokenName:
|
|
// Name identifier - could be a special value or just a string
|
|
return string(token.Value), nil
|
|
|
|
default:
|
|
return nil, fmt.Errorf("unexpected token type %v at line %d, column %d", token.Type, token.Line, token.Column)
|
|
}
|
|
}
|
|
|
|
// scanObjectOrArray processes a map or array enclosed in braces
|
|
func (s *Scanner) scanObjectOrArray() (any, error) {
|
|
// Initialize collections
|
|
contents := make(map[string]any)
|
|
var arrayElements []any
|
|
isArray := true
|
|
|
|
for {
|
|
err := s.SkipWhitespace()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
b, err := s.PeekByte()
|
|
if err == io.EOF {
|
|
return nil, errors.New("unclosed object/array")
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Check for closing brace
|
|
if b == '}' {
|
|
_, _ = s.ReadByte() // consume closing brace
|
|
if isArray && len(contents) == 0 {
|
|
return arrayElements, nil
|
|
}
|
|
return contents, nil
|
|
}
|
|
|
|
// Handle comments
|
|
if b == '-' {
|
|
peekBytes, err := s.PeekBytes(2)
|
|
if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' {
|
|
err = s.scanComment()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Process key-value pair or array element
|
|
if isLetter(b) {
|
|
// Read name
|
|
nameToken, err := s.scanName(s.line, s.col)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
name := string(nameToken.Value)
|
|
|
|
// Skip whitespace
|
|
err = s.SkipWhitespace()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Check if it's followed by = or {
|
|
b, err = s.PeekByte()
|
|
if err != nil && err != io.EOF {
|
|
return nil, err
|
|
}
|
|
|
|
if b == '=' {
|
|
// It's a key-value pair
|
|
_, _ = s.ReadByte() // consume =
|
|
err = s.SkipWhitespace()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
value, err := s.ScanValue()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
isArray = false
|
|
contents[name] = value
|
|
} else if b == '{' {
|
|
// It's a nested object/array
|
|
_, _ = s.ReadByte() // consume {
|
|
value, err := s.scanObjectOrArray()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
isArray = false
|
|
contents[name] = value
|
|
} else {
|
|
// It's a simple name as an array element
|
|
// Try to convert to appropriate type first
|
|
var value any = name
|
|
// Try common conversions
|
|
if name == "true" {
|
|
value = true
|
|
} else if name == "false" {
|
|
value = false
|
|
} else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) {
|
|
// Looks like a number, try to convert
|
|
if hasDot(name) {
|
|
if f, err := strconv.ParseFloat(name, 64); err == nil {
|
|
value = f
|
|
}
|
|
} else {
|
|
if i, err := strconv.ParseInt(name, 10, 64); err == nil {
|
|
value = i
|
|
}
|
|
}
|
|
}
|
|
|
|
arrayElements = append(arrayElements, value)
|
|
}
|
|
} else if b == '"' {
|
|
// String value - must be an array element
|
|
value, err := s.ScanValue()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
arrayElements = append(arrayElements, value)
|
|
} else {
|
|
// Other value type - must be an array element
|
|
value, err := s.ScanValue()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
arrayElements = append(arrayElements, value)
|
|
}
|
|
}
|
|
}
|