package config import ( "bufio" "bytes" "errors" "fmt" "io" "strconv" ) // TokenType represents the type of token type TokenType int const ( TokenError TokenType = iota TokenEOF TokenName TokenString TokenNumber TokenBoolean TokenEquals TokenOpenBrace TokenCloseBrace TokenComment ) // Token represents a lexical token type Token struct { Type TokenType Value []byte Line int Column int } // Scanner handles the low-level parsing of the configuration format type Scanner struct { reader *bufio.Reader line int // Current line number col int // Current column position buffer []byte token Token // Current token } // NewScanner creates a new scanner with the given reader func NewScanner(r io.Reader) *Scanner { return &Scanner{ reader: bufio.NewReader(r), line: 1, // Start at line 1 col: 0, buffer: make([]byte, 0, 128), // Pre-allocate with reasonable capacity } } // ReadByte reads a single byte from the input func (s *Scanner) ReadByte() (byte, error) { b, err := s.reader.ReadByte() if err == nil { if b == '\n' { s.line++ s.col = 0 } else { s.col++ } } return b, err } // PeekByte looks at the next byte without consuming it func (s *Scanner) PeekByte() (byte, error) { b, err := s.reader.Peek(1) if err != nil { return 0, err } return b[0], nil } // PeekBytes looks at the next n bytes without consuming them func (s *Scanner) PeekBytes(n int) ([]byte, error) { return s.reader.Peek(n) } // UnreadByte pushes back a byte to the reader func (s *Scanner) UnreadByte() error { err := s.reader.UnreadByte() if err == nil && s.col > 0 { s.col-- } return err } // Error creates an error with line and column information func (s *Scanner) Error(msg string) error { return fmt.Errorf("line %d, column %d: %s", s.line, s.col, msg) } // SkipWhitespace skips whitespace characters func (s *Scanner) SkipWhitespace() error { for { b, err := s.PeekByte() if err == io.EOF { return nil } if err != nil { return err } // Fast check for common whitespace bytes if b != ' ' && b != '\t' && b != '\n' && b != '\r' { return nil } _, err = s.ReadByte() if err != nil { return err } } } // NextToken scans and returns the next token func (s *Scanner) NextToken() (Token, error) { if s.token.Type != TokenError { // We have a stored token token := s.token s.token = Token{Type: TokenError} // Reset return token, nil } // No stored token, scan a new one // Skip whitespace err := s.SkipWhitespace() if err == io.EOF { return Token{Type: TokenEOF}, nil } if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } b, err := s.PeekByte() if err != nil { if err == io.EOF { return Token{Type: TokenEOF}, nil } return Token{Type: TokenError, Value: []byte(err.Error())}, err } // Record start position for error reporting startLine, startColumn := s.line, s.col // Process based on first character switch { case b == '=': _, _ = s.ReadByte() // consume equals return Token{Type: TokenEquals, Line: startLine, Column: startColumn}, nil case b == '{': _, _ = s.ReadByte() // consume open brace return Token{Type: TokenOpenBrace, Line: startLine, Column: startColumn}, nil case b == '}': _, _ = s.ReadByte() // consume close brace return Token{Type: TokenCloseBrace, Line: startLine, Column: startColumn}, nil case b == '-': // Could be a comment or a negative number peekBytes, err := s.PeekBytes(2) if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { err = s.scanComment() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } return Token{Type: TokenComment, Line: startLine, Column: startColumn}, nil } // Check if it's a negative number if err == nil && len(peekBytes) == 2 && isDigit(peekBytes[1]) { return s.scanNumber(startLine, startColumn) } // Just a single dash _, _ = s.ReadByte() // consume dash return Token{Type: TokenError, Value: []byte("unexpected '-'")}, fmt.Errorf("unexpected '-' at line %d, column %d", startLine, startColumn) case b == '"': return s.scanString(startLine, startColumn) case isLetter(b): return s.scanName(startLine, startColumn) case isDigit(b): return s.scanNumber(startLine, startColumn) default: _, _ = s.ReadByte() // consume the unexpected character err := fmt.Errorf("unexpected character: %c", b) return Token{Type: TokenError, Value: []byte(err.Error()), Line: startLine, Column: startColumn}, err } } func (s *Scanner) UnreadToken(token Token) { s.token = token // Store the token to be returned next } // scanComment processes a comment func (s *Scanner) scanComment() error { // Consume the first dash _, err := s.ReadByte() if err != nil { return err } // Check for second dash b, err := s.ReadByte() if err != nil { return err } if b != '-' { return s.Error("invalid comment") } // Check for block comment [[ b, err = s.PeekByte() if err == nil && b == '[' { _, _ = s.ReadByte() // consume first [ b, err = s.PeekByte() if err == nil && b == '[' { _, _ = s.ReadByte() // consume second [ return s.scanBlockComment() } } // Line comment - consume until newline or EOF for { b, err := s.ReadByte() if err == io.EOF { return nil } if err != nil { return err } if b == '\n' { return nil } } } // scanBlockComment processes a block comment func (s *Scanner) scanBlockComment() error { for { b, err := s.ReadByte() if err != nil { return s.Error("unclosed block comment") } if b == ']' { b, err = s.PeekByte() if err == nil && b == ']' { _, _ = s.ReadByte() // consume second ] return nil } } } } // scanString scans a quoted string func (s *Scanner) scanString(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Consume opening quote _, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } for { b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte("unterminated string")}, errors.New("unterminated string") } if b == '"' { break } // Handle escape sequences if b == '\\' { escaped, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte("unterminated escape sequence")}, errors.New("unterminated escape sequence") } switch escaped { case '"': s.buffer = append(s.buffer, '"') case '\\': s.buffer = append(s.buffer, '\\') case 'n': s.buffer = append(s.buffer, '\n') case 't': s.buffer = append(s.buffer, '\t') default: s.buffer = append(s.buffer, '\\') s.buffer = append(s.buffer, escaped) } } else { s.buffer = append(s.buffer, b) } } return Token{ Type: TokenString, Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer Line: startLine, Column: startColumn, }, nil } // scanName scans an identifier func (s *Scanner) scanName(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Read first character b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } if !isLetter(b) { return Token{Type: TokenError, Value: []byte("name must start with letter")}, s.Error("name must start with letter") } s.buffer = append(s.buffer, b) // Read rest of name for { b, err := s.PeekByte() if err == io.EOF { break } if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } if !isLetter(b) && !isDigit(b) && b != '_' { break } s.buffer = append(s.buffer, b) _, _ = s.ReadByte() } // Check if it's a boolean if bytes.Equal(s.buffer, []byte("true")) || bytes.Equal(s.buffer, []byte("false")) { return Token{ Type: TokenBoolean, Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer Line: startLine, Column: startColumn, }, nil } return Token{ Type: TokenName, Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer Line: startLine, Column: startColumn, }, nil } // scanNumber scans a numeric value func (s *Scanner) scanNumber(startLine, startColumn int) (Token, error) { // Reset buffer s.buffer = s.buffer[:0] // Read first character (might be a minus sign or digit) b, err := s.ReadByte() if err != nil { return Token{Type: TokenError, Value: []byte(err.Error())}, err } s.buffer = append(s.buffer, b) // Scan the rest of the number hasDot := false for { b, err := s.PeekByte() if err != nil { if err == io.EOF { break } return Token{Type: TokenError, Value: []byte(err.Error())}, err } if b == '.' && !hasDot { hasDot = true _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else if isDigit(b) { _, _ = s.ReadByte() s.buffer = append(s.buffer, b) } else { break } } return Token{ Type: TokenNumber, Value: append([]byte(nil), s.buffer...), // Make a copy of the buffer Line: startLine, Column: startColumn, }, nil } // ScanValue processes a value and returns its Go representation func (s *Scanner) ScanValue() (any, error) { token, err := s.NextToken() if err != nil { return nil, err } switch token.Type { case TokenString: return string(token.Value), nil case TokenBoolean: if bytes.Equal(token.Value, []byte("true")) { return true, nil } return false, nil case TokenNumber: // Convert to number value := string(token.Value) if bytes.Contains(token.Value, []byte(".")) { // Float return strconv.ParseFloat(value, 64) } // Integer return strconv.ParseInt(value, 10, 64) case TokenOpenBrace: // Object or array return s.scanObjectOrArray() case TokenName: // Name identifier - could be a special value or just a string return string(token.Value), nil default: return nil, fmt.Errorf("unexpected token type %v at line %d, column %d", token.Type, token.Line, token.Column) } } // scanObjectOrArray processes a map or array enclosed in braces func (s *Scanner) scanObjectOrArray() (any, error) { // Initialize collections contents := make(map[string]any) var arrayElements []any isArray := true for { err := s.SkipWhitespace() if err != nil { return nil, err } b, err := s.PeekByte() if err == io.EOF { return nil, errors.New("unclosed object/array") } if err != nil { return nil, err } // Check for closing brace if b == '}' { _, _ = s.ReadByte() // consume closing brace if isArray && len(contents) == 0 { return arrayElements, nil } return contents, nil } // Handle comments if b == '-' { peekBytes, err := s.PeekBytes(2) if err == nil && len(peekBytes) == 2 && peekBytes[1] == '-' { err = s.scanComment() if err != nil { return nil, err } continue } } // Process key-value pair or array element if isLetter(b) { // Read name nameToken, err := s.scanName(s.line, s.col) if err != nil { return nil, err } name := string(nameToken.Value) // Skip whitespace err = s.SkipWhitespace() if err != nil { return nil, err } // Check if it's followed by = or { b, err = s.PeekByte() if err != nil && err != io.EOF { return nil, err } if b == '=' { // It's a key-value pair _, _ = s.ReadByte() // consume = err = s.SkipWhitespace() if err != nil { return nil, err } value, err := s.ScanValue() if err != nil { return nil, err } isArray = false contents[name] = value } else if b == '{' { // It's a nested object/array _, _ = s.ReadByte() // consume { value, err := s.scanObjectOrArray() if err != nil { return nil, err } isArray = false contents[name] = value } else { // It's a simple name as an array element // Try to convert to appropriate type first var value any = name // Try common conversions if name == "true" { value = true } else if name == "false" { value = false } else if isDigit(name[0]) || (len(name) > 1 && name[0] == '-' && isDigit(name[1])) { // Looks like a number, try to convert if hasDot(name) { if f, err := strconv.ParseFloat(name, 64); err == nil { value = f } } else { if i, err := strconv.ParseInt(name, 10, 64); err == nil { value = i } } } arrayElements = append(arrayElements, value) } } else if b == '"' { // String value - must be an array element value, err := s.ScanValue() if err != nil { return nil, err } arrayElements = append(arrayElements, value) } else { // Other value type - must be an array element value, err := s.ScanValue() if err != nil { return nil, err } arrayElements = append(arrayElements, value) } } }