Working on

2026-01-19 23:29:41 +01:00
parent 69d3360289
commit 1eda6a2a38
11 changed files with 471 additions and 55 deletions
@@ -0,0 +1,211 @@
+package formatter
+
+import (
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+
+	"github.com/marte-dev/marte-dev-tools/internal/parser"
+)
+
+type Insertable struct {
+	Position parser.Position
+	Text     string
+	IsDoc    bool
+}
+
+type Formatter struct {
+	insertables []Insertable
+	cursor      int
+	writer      io.Writer
+}
+
+func Format(config *parser.Configuration, w io.Writer) {
+	ins := []Insertable{}
+	for _, c := range config.Comments {
+		ins = append(ins, Insertable{Position: c.Position, Text: fixComment(c.Text), IsDoc: c.Doc})
+	}
+	for _, p := range config.Pragmas {
+		ins = append(ins, Insertable{Position: p.Position, Text: fixComment(p.Text)})
+	}
+	// Sort
+	sort.Slice(ins, func(i, j int) bool {
+		if ins[i].Position.Line != ins[j].Position.Line {
+			return ins[i].Position.Line < ins[j].Position.Line
+		}
+		return ins[i].Position.Column < ins[j].Position.Column
+	})
+
+	f := &Formatter{
+		insertables: ins,
+		writer:      w,
+	}
+	f.formatConfig(config)
+}
+
+func fixComment(text string) string {
+	if strings.HasPrefix(text, "//!") {
+		if len(text) > 3 && text[3] != ' ' {
+			return "//! " + text[3:]
+		}
+	} else if strings.HasPrefix(text, "//#") {
+		if len(text) > 3 && text[3] != ' ' {
+			return "//# " + text[3:]
+		}
+	} else if strings.HasPrefix(text, "//") {
+		if len(text) > 2 && text[2] != ' ' && text[2] != '#' && text[2] != '!' { 
+			return "// " + text[2:]
+		}
+	}
+	return text
+}
+
+func (f *Formatter) formatConfig(config *parser.Configuration) {
+	lastLine := 0
+	if config.Package != nil {
+		f.flushCommentsBefore(config.Package.Position, 0, false) // Package comments usually detached unless specifically doc
+		fmt.Fprintf(f.writer, "#package %s", config.Package.URI)
+		lastLine = config.Package.Position.Line
+		if f.hasTrailingComment(lastLine) {
+			fmt.Fprintf(f.writer, " %s", f.popComment())
+		}
+		fmt.Fprintln(f.writer)
+		fmt.Fprintln(f.writer)
+	}
+
+	for _, def := range config.Definitions {
+		f.flushCommentsBefore(def.Pos(), 0, true) // Stick to definition
+		lastLine = f.formatDefinition(def, 0)
+		if f.hasTrailingComment(lastLine) {
+			fmt.Fprintf(f.writer, " %s", f.popComment())
+		}
+		fmt.Fprintln(f.writer)
+	}
+
+	f.flushRemainingComments(0)
+}
+
+func (f *Formatter) formatDefinition(def parser.Definition, indent int) int {
+	indentStr := strings.Repeat("  ", indent)
+	switch d := def.(type) {
+	case *parser.Field:
+		fmt.Fprintf(f.writer, "%s%s = ", indentStr, d.Name)
+		endLine := f.formatValue(d.Value, indent)
+		return endLine
+	case *parser.ObjectNode:
+		fmt.Fprintf(f.writer, "%s%s = {", indentStr, d.Name)
+		if f.hasTrailingComment(d.Position.Line) {
+			fmt.Fprintf(f.writer, " %s", f.popComment())
+		}
+		fmt.Fprintln(f.writer)
+
+		f.formatSubnode(d.Subnode, indent+1)
+		
+		fmt.Fprintf(f.writer, "%s}", indentStr)
+		return d.Subnode.EndPosition.Line
+	}
+	return 0
+}
+
+func (f *Formatter) formatSubnode(sub parser.Subnode, indent int) {
+	for _, def := range sub.Definitions {
+		f.flushCommentsBefore(def.Pos(), indent, true) // Stick to definition
+		lastLine := f.formatDefinition(def, indent)
+		if f.hasTrailingComment(lastLine) {
+			fmt.Fprintf(f.writer, " %s", f.popComment())
+		}
+		fmt.Fprintln(f.writer)
+	}
+	f.flushCommentsBefore(sub.EndPosition, indent, false)
+}
+
+func (f *Formatter) formatValue(val parser.Value, indent int) int {
+	switch v := val.(type) {
+	case *parser.StringValue:
+		if v.Quoted {
+			fmt.Fprintf(f.writer, "\"%s\"", v.Value)
+		} else {
+			// Should strictly parse unquoted as ReferenceValue or identifiers, but fallback here
+			fmt.Fprint(f.writer, v.Value)
+		}
+		return v.Position.Line
+	case *parser.IntValue:
+		fmt.Fprint(f.writer, v.Raw)
+		return v.Position.Line
+	case *parser.FloatValue:
+		fmt.Fprint(f.writer, v.Raw)
+		return v.Position.Line
+	case *parser.BoolValue:
+		fmt.Fprintf(f.writer, "%v", v.Value)
+		return v.Position.Line
+	case *parser.ReferenceValue:
+		fmt.Fprint(f.writer, v.Value)
+		return v.Position.Line
+	case *parser.ArrayValue:
+		fmt.Fprint(f.writer, "{ ")
+		for i, e := range v.Elements {
+			if i > 0 {
+				fmt.Fprint(f.writer, " ")
+			}
+			f.formatValue(e, indent)
+		}
+		fmt.Fprint(f.writer, " }")
+		if v.EndPosition.Line > 0 {
+			return v.EndPosition.Line
+		}
+		// Fallback if EndPosition not set (shouldn't happen with new parser)
+		if len(v.Elements) > 0 {
+			return v.Elements[len(v.Elements)-1].Pos().Line
+		}
+		return v.Position.Line
+	default:
+		return 0
+	}
+}
+
+func (f *Formatter) flushCommentsBefore(pos parser.Position, indent int, stick bool) {
+	indentStr := strings.Repeat("  ", indent)
+	for f.cursor < len(f.insertables) {
+		c := f.insertables[f.cursor]
+		if c.Position.Line < pos.Line || (c.Position.Line == pos.Line && c.Position.Column < pos.Column) {
+			fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
+			f.cursor++
+		} else {
+			break
+		}
+	}
+	// If stick is true, we don't print extra newline. 
+	// The caller will print the definition immediately after this function returns.
+	// If stick is false (e.g. end of block comments), we act normally.
+	// But actually, the previous implementation didn't print extra newlines between comments and code
+	// explicitly, it relied on the loop in formatConfig/formatSubnode to print newline AFTER definition.
+	// So comments naturally sat on top.
+	// The issue is if there WAS a blank line in source, we ignore it and squash. This implements "stick".
+}
+
+func (f *Formatter) flushRemainingComments(indent int) {
+	indentStr := strings.Repeat("  ", indent)
+	for f.cursor < len(f.insertables) {
+		c := f.insertables[f.cursor]
+		fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
+		f.cursor++
+	}
+}
+
+func (f *Formatter) hasTrailingComment(line int) bool {
+	if f.cursor >= len(f.insertables) {
+		return false
+	}
+	c := f.insertables[f.cursor]
+	return c.Position.Line == line
+}
+
+func (f *Formatter) popComment() string {
+	if f.cursor >= len(f.insertables) {
+		return ""
+	}
+	c := f.insertables[f.cursor]
+	f.cursor++
+	return c.Text
+}
@@ -12,6 +12,8 @@ type Position struct {
 type Configuration struct {
 	Definitions []Definition
 	Package     *Package
+	Comments    []Comment
+	Pragmas     []Pragma
 }

 type Definition interface {
@@ -39,6 +41,7 @@ func (o *ObjectNode) isDefinition() {}

 type Subnode struct {
 	Position    Position
+	EndPosition Position
 	Definitions []Definition
 }

@@ -50,6 +53,7 @@ type Value interface {
 type StringValue struct {
 	Position Position
 	Value    string
+	Quoted   bool
 }

 func (v *StringValue) Pos() Position { return v.Position }
@@ -90,8 +94,9 @@ func (v *ReferenceValue) Pos() Position { return v.Position }
 func (v *ReferenceValue) isValue()      {}

 type ArrayValue struct {
-	Position Position
-	Elements []Value
+	Position    Position
+	EndPosition Position
+	Elements    []Value
 }

 func (v *ArrayValue) Pos() Position { return v.Position }
@@ -31,18 +31,22 @@ type Token struct {
 }

 type Lexer struct {
-	input    string
-	start    int
-	pos      int
-	width    int
-	line     int
-	lineStart int
+	input       string
+	start       int
+	pos         int
+	width       int
+	line        int
+	lineStart   int
+	startLine   int
+	startColumn int
 }

 func NewLexer(input string) *Lexer {
 	return &Lexer{
-		input: input,
-		line:  1,
+		input:       input,
+		line:        1,
+		startLine:   1,
+		startColumn: 1,
 	}
 }

@@ -67,8 +71,8 @@ func (l *Lexer) backup() {
 		r, _ := utf8.DecodeRuneInString(l.input[l.pos:])
 		if r == '\n' {
 			l.line--
-			// This is tricky, we'd need to find the previous line start
-			// For simplicity, let's just not backup over newlines or handle it better
+			// We don't perfectly restore lineStart here as it's complex,
+			// but we mostly backup single characters within a line.
 		}
 	}
 }
@@ -79,16 +83,22 @@ func (l *Lexer) peek() rune {
 	return r
 }

+func (l *Lexer) ignore() {
+	l.start = l.pos
+	l.startLine = l.line
+	l.startColumn = l.pos - l.lineStart + 1
+}
+
 func (l *Lexer) emit(t TokenType) Token {
 	tok := Token{
-		Type: t,
+		Type:  t,
 		Value: l.input[l.start:l.pos],
 		Position: Position{
-			Line:   l.line,
-			Column: l.start - l.lineStart + 1,
+			Line:   l.startLine,
+			Column: l.startColumn,
 		},
 	}
-	l.start = l.pos
+	l.ignore()
 	return tok
 }

@@ -100,7 +110,7 @@ func (l *Lexer) NextToken() Token {
 		}

 		if unicode.IsSpace(r) {
-			l.start = l.pos
+			l.ignore()
 			continue
 		}

@@ -117,10 +127,6 @@ func (l *Lexer) NextToken() Token {
 			return l.lexComment()
 		case '#':
 			return l.lexPackage()
-		case '!':
-			// Might be part of pragma //! 
-			// But grammar says pragma is //!
-			// So it should start with //
 		case '+':
 			fallthrough
 		case '$':
@@ -178,7 +184,6 @@ func (l *Lexer) lexString() Token {
 }

 func (l *Lexer) lexNumber() Token {
-	// Simple number lexing, could be improved for hex, binary, float
 	for {
 		r := l.next()
 		if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' {
@@ -192,7 +197,6 @@ func (l *Lexer) lexNumber() Token {
 func (l *Lexer) lexComment() Token {
 	r := l.next()
 	if r == '/' {
-		// It's a comment, docstring or pragma
 		r = l.next()
 		if r == '#' {
 			return l.lexUntilNewline(TokenDocstring)
@@ -209,15 +213,21 @@ func (l *Lexer) lexComment() Token {
 func (l *Lexer) lexUntilNewline(t TokenType) Token {
 	for {
 		r := l.next()
-		if r == '\n' || r == -1 {
+		if r == '\n' {
+			l.backup()
+			tok := l.emit(t)
+			l.next() // consume \n
+			l.ignore()
+			return tok
+		}
+		if r == -1 {
 			return l.emit(t)
 		}
 	}
 }

 func (l *Lexer) lexPackage() Token {
-	// #package
-	l.start = l.pos - 1 // Include '#'
+	// We are at '#', l.start is just before it
 	for {
 		r := l.next()
 		if unicode.IsLetter(r) {
@@ -230,4 +240,4 @@ func (l *Lexer) lexPackage() Token {
 		return l.lexUntilNewline(TokenPackage)
 	}
 	return l.emit(TokenError)
-}
+}
@@ -7,9 +7,11 @@ import (
 )

 type Parser struct {
-	lexer *Lexer
-	tok   Token
-	peeked bool
+	lexer    *Lexer
+	tok      Token
+	peeked   bool
+	comments []Comment
+	pragmas  []Pragma
 }

 func NewParser(input string) *Parser {
@@ -23,7 +25,7 @@ func (p *Parser) next() Token {
 		p.peeked = false
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	return p.tok
 }

@@ -31,11 +33,27 @@ func (p *Parser) peek() Token {
 	if p.peeked {
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	p.peeked = true
 	return p.tok
 }

+func (p *Parser) fetchToken() Token {
+	for {
+		tok := p.lexer.NextToken()
+		switch tok.Type {
+		case TokenComment:
+			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value})
+		case TokenDocstring:
+			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value, Doc: true})
+		case TokenPragma:
+			p.pragmas = append(p.pragmas, Pragma{Position: tok.Position, Text: tok.Value})
+		default:
+			return tok
+		}
+	}
+}
+
 func (p *Parser) Parse() (*Configuration, error) {
 	config := &Configuration{}
 	for {
@@ -51,12 +69,6 @@ func (p *Parser) Parse() (*Configuration, error) {
 			}
 			continue
 		}
-		
-		// Skip comments, pragmas, docstrings for now in AST
-		if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma {
-			p.next()
-			continue
-		}

 		def, err := p.parseDefinition()
 		if err != nil {
@@ -64,6 +76,8 @@ func (p *Parser) Parse() (*Configuration, error) {
 		}
 		config.Definitions = append(config.Definitions, def)
 	}
+	config.Comments = p.comments
+	config.Pragmas = p.pragmas
 	return config, nil
 }

@@ -114,16 +128,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 	for {
 		t := p.peek()
 		if t.Type == TokenRBrace {
-			p.next()
+			endTok := p.next()
+			sub.EndPosition = endTok.Position
 			break
 		}
 		if t.Type == TokenEOF {
 			return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column)
 		}
-		if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma {
-			p.next()
-			continue
-		}
 		def, err := p.parseDefinition()
 		if err != nil {
 			return sub, err
@@ -136,11 +147,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 func (p *Parser) parseValue() (Value, error) {
 	tok := p.next()
 	switch tok.Type {
-	case TokenString:
-		return &StringValue{
-			Position: tok.Position,
-			Value:    strings.Trim(tok.Value, "\""),
-		}, nil
+		case TokenString:
+			return &StringValue{
+				Position: tok.Position,
+				Value:    strings.Trim(tok.Value, "\""),
+				Quoted:   true,
+			}, nil
+	
 	case TokenNumber:
 		// Simplistic handling
 		if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") {
@@ -150,7 +163,8 @@ func (p *Parser) parseValue() (Value, error) {
 		i, _ := strconv.ParseInt(tok.Value, 0, 64)
 		return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil
 	case TokenBool:
-		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil
+		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"},
+			nil
 	case TokenIdentifier:
 		// reference?
 		return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil
@@ -160,7 +174,8 @@ func (p *Parser) parseValue() (Value, error) {
 		for {
 			t := p.peek()
 			if t.Type == TokenRBrace {
-				p.next()
+				endTok := p.next()
+				arr.EndPosition = endTok.Position
 				break
 			}
 			val, err := p.parseValue()