Working

2026-01-19 23:10:09 +01:00
parent c4c29a640d
commit 69d3360289
12 changed files with 1122 additions and 0 deletions
--- a/internal/parser/ast.go
+++ b/internal/parser/ast.go
@@ -0,0 +1,116 @@
+package parser
+
+type Node interface {
+	Pos() Position
+}
+
+type Position struct {
+	Line   int
+	Column int
+}
+
+type Configuration struct {
+	Definitions []Definition
+	Package     *Package
+}
+
+type Definition interface {
+	Node
+	isDefinition()
+}
+
+type Field struct {
+	Position Position
+	Name     string
+	Value    Value
+}
+
+func (f *Field) Pos() Position { return f.Position }
+func (f *Field) isDefinition() {}
+
+type ObjectNode struct {
+	Position Position
+	Name     string // includes + or $
+	Subnode  Subnode
+}
+
+func (o *ObjectNode) Pos() Position { return o.Position }
+func (o *ObjectNode) isDefinition() {}
+
+type Subnode struct {
+	Position    Position
+	Definitions []Definition
+}
+
+type Value interface {
+	Node
+	isValue()
+}
+
+type StringValue struct {
+	Position Position
+	Value    string
+}
+
+func (v *StringValue) Pos() Position { return v.Position }
+func (v *StringValue) isValue()      {}
+
+type IntValue struct {
+	Position Position
+	Value    int64
+	Raw      string
+}
+
+func (v *IntValue) Pos() Position { return v.Position }
+func (v *IntValue) isValue()      {}
+
+type FloatValue struct {
+	Position Position
+	Value    float64
+	Raw      string
+}
+
+func (v *FloatValue) Pos() Position { return v.Position }
+func (v *FloatValue) isValue()      {}
+
+type BoolValue struct {
+	Position Position
+	Value    bool
+}
+
+func (v *BoolValue) Pos() Position { return v.Position }
+func (v *BoolValue) isValue()      {}
+
+type ReferenceValue struct {
+	Position Position
+	Value    string
+}
+
+func (v *ReferenceValue) Pos() Position { return v.Position }
+func (v *ReferenceValue) isValue()      {}
+
+type ArrayValue struct {
+	Position Position
+	Elements []Value
+}
+
+func (v *ArrayValue) Pos() Position { return v.Position }
+func (v *ArrayValue) isValue()      {}
+
+type Package struct {
+	Position Position
+	URI      string
+}
+
+func (p *Package) Pos() Position { return p.Position }
+
+type Comment struct {
+	Position Position
+	Text     string
+	Doc      bool // true if starts with //#
+}
+
+type Pragma struct {
+	Position Position
+	Text     string
+}
--- a/internal/parser/lexer.go
+++ b/internal/parser/lexer.go
@@ -0,0 +1,233 @@
+package parser
+
+import (
+	"unicode"
+	"unicode/utf8"
+)
+
+type TokenType int
+
+const (
+	TokenError TokenType = iota
+	TokenEOF
+	TokenIdentifier
+	TokenObjectIdentifier // +$
+	TokenEqual
+	TokenLBrace
+	TokenRBrace
+	TokenString
+	TokenNumber
+	TokenBool
+	TokenPackage
+	TokenPragma
+	TokenComment
+	TokenDocstring
+)
+
+type Token struct {
+	Type     TokenType
+	Value    string
+	Position Position
+}
+
+type Lexer struct {
+	input    string
+	start    int
+	pos      int
+	width    int
+	line     int
+	lineStart int
+}
+
+func NewLexer(input string) *Lexer {
+	return &Lexer{
+		input: input,
+		line:  1,
+	}
+}
+
+func (l *Lexer) next() rune {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return -1
+	}
+	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
+	l.width = w
+	l.pos += l.width
+	if r == '\n' {
+		l.line++
+		l.lineStart = l.pos
+	}
+	return r
+}
+
+func (l *Lexer) backup() {
+	l.pos -= l.width
+	if l.width > 0 {
+		r, _ := utf8.DecodeRuneInString(l.input[l.pos:])
+		if r == '\n' {
+			l.line--
+			// This is tricky, we'd need to find the previous line start
+			// For simplicity, let's just not backup over newlines or handle it better
+		}
+	}
+}
+
+func (l *Lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+func (l *Lexer) emit(t TokenType) Token {
+	tok := Token{
+		Type: t,
+		Value: l.input[l.start:l.pos],
+		Position: Position{
+			Line:   l.line,
+			Column: l.start - l.lineStart + 1,
+		},
+	}
+	l.start = l.pos
+	return tok
+}
+
+func (l *Lexer) NextToken() Token {
+	for {
+		r := l.next()
+		if r == -1 {
+			return l.emit(TokenEOF)
+		}
+
+		if unicode.IsSpace(r) {
+			l.start = l.pos
+			continue
+		}
+
+		switch r {
+		case '=':
+			return l.emit(TokenEqual)
+		case '{':
+			return l.emit(TokenLBrace)
+		case '}':
+			return l.emit(TokenRBrace)
+		case '"':
+			return l.lexString()
+		case '/':
+			return l.lexComment()
+		case '#':
+			return l.lexPackage()
+		case '!':
+			// Might be part of pragma //! 
+			// But grammar says pragma is //!
+			// So it should start with //
+		case '+':
+			fallthrough
+		case '$':
+			return l.lexObjectIdentifier()
+		}
+
+		if unicode.IsLetter(r) {
+			return l.lexIdentifier()
+		}
+
+		if unicode.IsDigit(r) || r == '-' {
+			return l.lexNumber()
+		}
+
+		return l.emit(TokenError)
+	}
+}
+
+func (l *Lexer) lexIdentifier() Token {
+	for {
+		r := l.next()
+		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '-' {
+			continue
+		}
+		l.backup()
+		val := l.input[l.start:l.pos]
+		if val == "true" || val == "false" {
+			return l.emit(TokenBool)
+		}
+		return l.emit(TokenIdentifier)
+	}
+}
+
+func (l *Lexer) lexObjectIdentifier() Token {
+	for {
+		r := l.next()
+		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '-' {
+			continue
+		}
+		l.backup()
+		return l.emit(TokenObjectIdentifier)
+	}
+}
+
+func (l *Lexer) lexString() Token {
+	for {
+		r := l.next()
+		if r == '"' {
+			return l.emit(TokenString)
+		}
+		if r == -1 {
+			return l.emit(TokenError)
+		}
+	}
+}
+
+func (l *Lexer) lexNumber() Token {
+	// Simple number lexing, could be improved for hex, binary, float
+	for {
+		r := l.next()
+		if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' {
+			continue
+		}
+		l.backup()
+		return l.emit(TokenNumber)
+	}
+}
+
+func (l *Lexer) lexComment() Token {
+	r := l.next()
+	if r == '/' {
+		// It's a comment, docstring or pragma
+		r = l.next()
+		if r == '#' {
+			return l.lexUntilNewline(TokenDocstring)
+		}
+		if r == '!' {
+			return l.lexUntilNewline(TokenPragma)
+		}
+		return l.lexUntilNewline(TokenComment)
+	}
+	l.backup()
+	return l.emit(TokenError)
+}
+
+func (l *Lexer) lexUntilNewline(t TokenType) Token {
+	for {
+		r := l.next()
+		if r == '\n' || r == -1 {
+			return l.emit(t)
+		}
+	}
+}
+
+func (l *Lexer) lexPackage() Token {
+	// #package
+	l.start = l.pos - 1 // Include '#'
+	for {
+		r := l.next()
+		if unicode.IsLetter(r) {
+			continue
+		}
+		l.backup()
+		break
+	}
+	if l.input[l.start:l.pos] == "#package" {
+		return l.lexUntilNewline(TokenPackage)
+	}
+	return l.emit(TokenError)
+}
--- a/internal/parser/parser.go
+++ b/internal/parser/parser.go
@@ -0,0 +1,176 @@
+package parser
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+type Parser struct {
+	lexer *Lexer
+	tok   Token
+	peeked bool
+}
+
+func NewParser(input string) *Parser {
+	return &Parser{
+		lexer: NewLexer(input),
+	}
+}
+
+func (p *Parser) next() Token {
+	if p.peeked {
+		p.peeked = false
+		return p.tok
+	}
+	p.tok = p.lexer.NextToken()
+	return p.tok
+}
+
+func (p *Parser) peek() Token {
+	if p.peeked {
+		return p.tok
+	}
+	p.tok = p.lexer.NextToken()
+	p.peeked = true
+	return p.tok
+}
+
+func (p *Parser) Parse() (*Configuration, error) {
+	config := &Configuration{}
+	for {
+		tok := p.peek()
+		if tok.Type == TokenEOF {
+			break
+		}
+		if tok.Type == TokenPackage {
+			p.next()
+			config.Package = &Package{
+				Position: tok.Position,
+				URI:      strings.TrimSpace(strings.TrimPrefix(tok.Value, "#package")),
+			}
+			continue
+		}
+		
+		// Skip comments, pragmas, docstrings for now in AST
+		if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma {
+			p.next()
+			continue
+		}
+
+		def, err := p.parseDefinition()
+		if err != nil {
+			return nil, err
+		}
+		config.Definitions = append(config.Definitions, def)
+	}
+	return config, nil
+}
+
+func (p *Parser) parseDefinition() (Definition, error) {
+	tok := p.next()
+	switch tok.Type {
+	case TokenIdentifier:
+		// field = value
+		name := tok.Value
+		if p.next().Type != TokenEqual {
+			return nil, fmt.Errorf("%d:%d: expected =", p.tok.Position.Line, p.tok.Position.Column)
+		}
+		val, err := p.parseValue()
+		if err != nil {
+			return nil, err
+		}
+		return &Field{
+			Position: tok.Position,
+			Name:     name,
+			Value:    val,
+		}, nil
+	case TokenObjectIdentifier:
+		// node = subnode
+		name := tok.Value
+		if p.next().Type != TokenEqual {
+			return nil, fmt.Errorf("%d:%d: expected =", p.tok.Position.Line, p.tok.Position.Column)
+		}
+		sub, err := p.parseSubnode()
+		if err != nil {
+			return nil, err
+		}
+		return &ObjectNode{
+			Position: tok.Position,
+			Name:     name,
+			Subnode:  sub,
+		}, nil
+	default:
+		return nil, fmt.Errorf("%d:%d: unexpected token %v", tok.Position.Line, tok.Position.Column, tok.Value)
+	}
+}
+
+func (p *Parser) parseSubnode() (Subnode, error) {
+	tok := p.next()
+	if tok.Type != TokenLBrace {
+		return Subnode{}, fmt.Errorf("%d:%d: expected {", tok.Position.Line, tok.Position.Column)
+	}
+	sub := Subnode{Position: tok.Position}
+	for {
+		t := p.peek()
+		if t.Type == TokenRBrace {
+			p.next()
+			break
+		}
+		if t.Type == TokenEOF {
+			return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column)
+		}
+		if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma {
+			p.next()
+			continue
+		}
+		def, err := p.parseDefinition()
+		if err != nil {
+			return sub, err
+		}
+		sub.Definitions = append(sub.Definitions, def)
+	}
+	return sub, nil
+}
+
+func (p *Parser) parseValue() (Value, error) {
+	tok := p.next()
+	switch tok.Type {
+	case TokenString:
+		return &StringValue{
+			Position: tok.Position,
+			Value:    strings.Trim(tok.Value, "\""),
+		}, nil
+	case TokenNumber:
+		// Simplistic handling
+		if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") {
+			f, _ := strconv.ParseFloat(tok.Value, 64)
+			return &FloatValue{Position: tok.Position, Value: f, Raw: tok.Value}, nil
+		}
+		i, _ := strconv.ParseInt(tok.Value, 0, 64)
+		return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil
+	case TokenBool:
+		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil
+	case TokenIdentifier:
+		// reference?
+		return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil
+	case TokenLBrace:
+		// array
+		arr := &ArrayValue{Position: tok.Position}
+		for {
+			t := p.peek()
+			if t.Type == TokenRBrace {
+				p.next()
+				break
+			}
+			val, err := p.parseValue()
+			if err != nil {
+				return nil, err
+			}
+			arr.Elements = append(arr.Elements, val)
+		}
+		return arr, nil
+	default:
+		return nil, fmt.Errorf("%d:%d: unexpected value token %v", tok.Position.Line, tok.Position.Column, tok.Value)
+	}
+}
--- a/internal/parser/parser_test.go
+++ b/internal/parser/parser_test.go
@@ -0,0 +1,38 @@
+package parser
+
+import (
+	"testing"
+)
+
+func TestParseBasic(t *testing.T) {
+	input := `
+#package PROJECT.SUB
+// comment
+Node1 = {
+    Class = MyClass
+    Field1 = "value"
+    Field2 = 123
+    Field3 = true
+    +SubNode = {
+        Class = OtherClass
+    }
+}
+$Node2 = {
+    Class = AppClass
+    Array = {1 2 3}
+}
+`
+	p := NewParser(input)
+	config, err := p.Parse()
+	if err != nil {
+		t.Fatalf("Parse error: %v", err)
+	}
+
+	if config.Package == nil || config.Package.URI != "PROJECT.SUB" {
+		t.Errorf("Expected package PROJECT.SUB, got %v", config.Package)
+	}
+
+	if len(config.Definitions) != 2 {
+		t.Errorf("Expected 2 definitions, got %d", len(config.Definitions))
+	}
+}