Working on

2026-01-19 23:29:41 +01:00
parent 69d3360289
commit 1eda6a2a38
11 changed files with 471 additions and 55 deletions
@@ -12,6 +12,8 @@ type Position struct {
 type Configuration struct {
 	Definitions []Definition
 	Package     *Package
+	Comments    []Comment
+	Pragmas     []Pragma
 }

 type Definition interface {
@@ -39,6 +41,7 @@ func (o *ObjectNode) isDefinition() {}

 type Subnode struct {
 	Position    Position
+	EndPosition Position
 	Definitions []Definition
 }

@@ -50,6 +53,7 @@ type Value interface {
 type StringValue struct {
 	Position Position
 	Value    string
+	Quoted   bool
 }

 func (v *StringValue) Pos() Position { return v.Position }
@@ -90,8 +94,9 @@ func (v *ReferenceValue) Pos() Position { return v.Position }
 func (v *ReferenceValue) isValue()      {}

 type ArrayValue struct {
-	Position Position
-	Elements []Value
+	Position    Position
+	EndPosition Position
+	Elements    []Value
 }

 func (v *ArrayValue) Pos() Position { return v.Position }
@@ -31,18 +31,22 @@ type Token struct {
 }

 type Lexer struct {
-	input    string
-	start    int
-	pos      int
-	width    int
-	line     int
-	lineStart int
+	input       string
+	start       int
+	pos         int
+	width       int
+	line        int
+	lineStart   int
+	startLine   int
+	startColumn int
 }

 func NewLexer(input string) *Lexer {
 	return &Lexer{
-		input: input,
-		line:  1,
+		input:       input,
+		line:        1,
+		startLine:   1,
+		startColumn: 1,
 	}
 }

@@ -67,8 +71,8 @@ func (l *Lexer) backup() {
 		r, _ := utf8.DecodeRuneInString(l.input[l.pos:])
 		if r == '\n' {
 			l.line--
-			// This is tricky, we'd need to find the previous line start
-			// For simplicity, let's just not backup over newlines or handle it better
+			// We don't perfectly restore lineStart here as it's complex,
+			// but we mostly backup single characters within a line.
 		}
 	}
 }
@@ -79,16 +83,22 @@ func (l *Lexer) peek() rune {
 	return r
 }

+func (l *Lexer) ignore() {
+	l.start = l.pos
+	l.startLine = l.line
+	l.startColumn = l.pos - l.lineStart + 1
+}
+
 func (l *Lexer) emit(t TokenType) Token {
 	tok := Token{
-		Type: t,
+		Type:  t,
 		Value: l.input[l.start:l.pos],
 		Position: Position{
-			Line:   l.line,
-			Column: l.start - l.lineStart + 1,
+			Line:   l.startLine,
+			Column: l.startColumn,
 		},
 	}
-	l.start = l.pos
+	l.ignore()
 	return tok
 }

@@ -100,7 +110,7 @@ func (l *Lexer) NextToken() Token {
 		}

 		if unicode.IsSpace(r) {
-			l.start = l.pos
+			l.ignore()
 			continue
 		}

@@ -117,10 +127,6 @@ func (l *Lexer) NextToken() Token {
 			return l.lexComment()
 		case '#':
 			return l.lexPackage()
-		case '!':
-			// Might be part of pragma //! 
-			// But grammar says pragma is //!
-			// So it should start with //
 		case '+':
 			fallthrough
 		case '$':
@@ -178,7 +184,6 @@ func (l *Lexer) lexString() Token {
 }

 func (l *Lexer) lexNumber() Token {
-	// Simple number lexing, could be improved for hex, binary, float
 	for {
 		r := l.next()
 		if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' {
@@ -192,7 +197,6 @@ func (l *Lexer) lexNumber() Token {
 func (l *Lexer) lexComment() Token {
 	r := l.next()
 	if r == '/' {
-		// It's a comment, docstring or pragma
 		r = l.next()
 		if r == '#' {
 			return l.lexUntilNewline(TokenDocstring)
@@ -209,15 +213,21 @@ func (l *Lexer) lexComment() Token {
 func (l *Lexer) lexUntilNewline(t TokenType) Token {
 	for {
 		r := l.next()
-		if r == '\n' || r == -1 {
+		if r == '\n' {
+			l.backup()
+			tok := l.emit(t)
+			l.next() // consume \n
+			l.ignore()
+			return tok
+		}
+		if r == -1 {
 			return l.emit(t)
 		}
 	}
 }

 func (l *Lexer) lexPackage() Token {
-	// #package
-	l.start = l.pos - 1 // Include '#'
+	// We are at '#', l.start is just before it
 	for {
 		r := l.next()
 		if unicode.IsLetter(r) {
@@ -230,4 +240,4 @@ func (l *Lexer) lexPackage() Token {
 		return l.lexUntilNewline(TokenPackage)
 	}
 	return l.emit(TokenError)
-}
+}
@@ -7,9 +7,11 @@ import (
 )

 type Parser struct {
-	lexer *Lexer
-	tok   Token
-	peeked bool
+	lexer    *Lexer
+	tok      Token
+	peeked   bool
+	comments []Comment
+	pragmas  []Pragma
 }

 func NewParser(input string) *Parser {
@@ -23,7 +25,7 @@ func (p *Parser) next() Token {
 		p.peeked = false
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	return p.tok
 }

@@ -31,11 +33,27 @@ func (p *Parser) peek() Token {
 	if p.peeked {
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	p.peeked = true
 	return p.tok
 }

+func (p *Parser) fetchToken() Token {
+	for {
+		tok := p.lexer.NextToken()
+		switch tok.Type {
+		case TokenComment:
+			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value})
+		case TokenDocstring:
+			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value, Doc: true})
+		case TokenPragma:
+			p.pragmas = append(p.pragmas, Pragma{Position: tok.Position, Text: tok.Value})
+		default:
+			return tok
+		}
+	}
+}
+
 func (p *Parser) Parse() (*Configuration, error) {
 	config := &Configuration{}
 	for {
@@ -51,12 +69,6 @@ func (p *Parser) Parse() (*Configuration, error) {
 			}
 			continue
 		}
-		
-		// Skip comments, pragmas, docstrings for now in AST
-		if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma {
-			p.next()
-			continue
-		}

 		def, err := p.parseDefinition()
 		if err != nil {
@@ -64,6 +76,8 @@ func (p *Parser) Parse() (*Configuration, error) {
 		}
 		config.Definitions = append(config.Definitions, def)
 	}
+	config.Comments = p.comments
+	config.Pragmas = p.pragmas
 	return config, nil
 }

@@ -114,16 +128,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 	for {
 		t := p.peek()
 		if t.Type == TokenRBrace {
-			p.next()
+			endTok := p.next()
+			sub.EndPosition = endTok.Position
 			break
 		}
 		if t.Type == TokenEOF {
 			return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column)
 		}
-		if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma {
-			p.next()
-			continue
-		}
 		def, err := p.parseDefinition()
 		if err != nil {
 			return sub, err
@@ -136,11 +147,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 func (p *Parser) parseValue() (Value, error) {
 	tok := p.next()
 	switch tok.Type {
-	case TokenString:
-		return &StringValue{
-			Position: tok.Position,
-			Value:    strings.Trim(tok.Value, "\""),
-		}, nil
+		case TokenString:
+			return &StringValue{
+				Position: tok.Position,
+				Value:    strings.Trim(tok.Value, "\""),
+				Quoted:   true,
+			}, nil
+	
 	case TokenNumber:
 		// Simplistic handling
 		if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") {
@@ -150,7 +163,8 @@ func (p *Parser) parseValue() (Value, error) {
 		i, _ := strconv.ParseInt(tok.Value, 0, 64)
 		return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil
 	case TokenBool:
-		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil
+		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"},
+			nil
 	case TokenIdentifier:
 		// reference?
 		return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil
@@ -160,7 +174,8 @@ func (p *Parser) parseValue() (Value, error) {
 		for {
 			t := p.peek()
 			if t.Type == TokenRBrace {
-				p.next()
+				endTok := p.next()
+				arr.EndPosition = endTok.Position
 				break
 			}
 			val, err := p.parseValue()