Working on

2026-01-19 23:29:41 +01:00
parent 69d3360289
commit 1eda6a2a38
11 changed files with 471 additions and 55 deletions
@@ -1,11 +1,13 @@
 package main
 import (
 	"bytes"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"github.com/marte-dev/marte-dev-tools/internal/builder"
 	"github.com/marte-dev/marte-dev-tools/internal/formatter"
 	"github.com/marte-dev/marte-dev-tools/internal/index"
 	"github.com/marte-dev/marte-dev-tools/internal/lsp"
 	"github.com/marte-dev/marte-dev-tools/internal/parser"
@@ -28,7 +30,7 @@ func main() {
 	case "check":
 		runCheck(os.Args[2:])
 	case "fmt":
-		runFmt()
+		runFmt(os.Args[2:])
 	default:
 		fmt.Printf("Unknown command: %s\n", command)
 		os.Exit(1)
@@ -107,7 +109,34 @@ func runCheck(args []string) {
 	}
 }
-func runFmt() {
+func runFmt(args []string) {
-	fmt.Println("Formatting files...")
+	if len(args) < 1 {
-	// TODO: Implement fmt
+		fmt.Println("Usage: mdt fmt <input_files...>")
 		os.Exit(1)
 	}
 	for _, file := range args {
 		content, err := ioutil.ReadFile(file)
 		if err != nil {
 			fmt.Printf("Error reading %s: %v\n", file, err)
 			continue
 		}
 		p := parser.NewParser(string(content))
 		config, err := p.Parse()
 		if err != nil {
 			fmt.Printf("Error parsing %s: %v\n", file, err)
 			continue
 		}
 		var buf bytes.Buffer
 		formatter.Format(config, &buf)
 		err = ioutil.WriteFile(file, buf.Bytes(), 0644)
 		if err != nil {
 			fmt.Printf("Error writing %s: %v\n", file, err)
 			continue
 		}
 		fmt.Printf("Formatted %s\n", file)
 	}
 }
@@ -0,0 +1,211 @@
 package formatter
 import (
 	"fmt"
 	"io"
 	"sort"
 	"strings"
 	"github.com/marte-dev/marte-dev-tools/internal/parser"
 )
 type Insertable struct {
 	Position parser.Position
 	Text     string
 	IsDoc    bool
 }
 type Formatter struct {
 	insertables []Insertable
 	cursor      int
 	writer      io.Writer
 }
 func Format(config *parser.Configuration, w io.Writer) {
 	ins := []Insertable{}
 	for _, c := range config.Comments {
 		ins = append(ins, Insertable{Position: c.Position, Text: fixComment(c.Text), IsDoc: c.Doc})
 	}
 	for _, p := range config.Pragmas {
 		ins = append(ins, Insertable{Position: p.Position, Text: fixComment(p.Text)})
 	}
 	// Sort
 	sort.Slice(ins, func(i, j int) bool {
 		if ins[i].Position.Line != ins[j].Position.Line {
 			return ins[i].Position.Line < ins[j].Position.Line
 		}
 		return ins[i].Position.Column < ins[j].Position.Column
 	})
 	f := &Formatter{
 		insertables: ins,
 		writer:      w,
 	}
 	f.formatConfig(config)
 }
 func fixComment(text string) string {
 	if strings.HasPrefix(text, "//!") {
 		if len(text) > 3 && text[3] != ' ' {
 			return "//! " + text[3:]
 		}
 	} else if strings.HasPrefix(text, "//#") {
 		if len(text) > 3 && text[3] != ' ' {
 			return "//# " + text[3:]
 		}
 	} else if strings.HasPrefix(text, "//") {
 		if len(text) > 2 && text[2] != ' ' && text[2] != '#' && text[2] != '!' { 
 			return "// " + text[2:]
 		}
 	}
 	return text
 }
 func (f *Formatter) formatConfig(config *parser.Configuration) {
 	lastLine := 0
 	if config.Package != nil {
 		f.flushCommentsBefore(config.Package.Position, 0, false) // Package comments usually detached unless specifically doc
 		fmt.Fprintf(f.writer, "#package %s", config.Package.URI)
 		lastLine = config.Package.Position.Line
 		if f.hasTrailingComment(lastLine) {
 			fmt.Fprintf(f.writer, " %s", f.popComment())
 		}
 		fmt.Fprintln(f.writer)
 		fmt.Fprintln(f.writer)
 	}
 	for _, def := range config.Definitions {
 		f.flushCommentsBefore(def.Pos(), 0, true) // Stick to definition
 		lastLine = f.formatDefinition(def, 0)
 		if f.hasTrailingComment(lastLine) {
 			fmt.Fprintf(f.writer, " %s", f.popComment())
 		}
 		fmt.Fprintln(f.writer)
 	}
 	f.flushRemainingComments(0)
 }
 func (f *Formatter) formatDefinition(def parser.Definition, indent int) int {
 	indentStr := strings.Repeat("  ", indent)
 	switch d := def.(type) {
 	case *parser.Field:
 		fmt.Fprintf(f.writer, "%s%s = ", indentStr, d.Name)
 		endLine := f.formatValue(d.Value, indent)
 		return endLine
 	case *parser.ObjectNode:
 		fmt.Fprintf(f.writer, "%s%s = {", indentStr, d.Name)
 		if f.hasTrailingComment(d.Position.Line) {
 			fmt.Fprintf(f.writer, " %s", f.popComment())
 		}
 		fmt.Fprintln(f.writer)
 		f.formatSubnode(d.Subnode, indent+1)
 		fmt.Fprintf(f.writer, "%s}", indentStr)
 		return d.Subnode.EndPosition.Line
 	}
 	return 0
 }
 func (f *Formatter) formatSubnode(sub parser.Subnode, indent int) {
 	for _, def := range sub.Definitions {
 		f.flushCommentsBefore(def.Pos(), indent, true) // Stick to definition
 		lastLine := f.formatDefinition(def, indent)
 		if f.hasTrailingComment(lastLine) {
 			fmt.Fprintf(f.writer, " %s", f.popComment())
 		}
 		fmt.Fprintln(f.writer)
 	}
 	f.flushCommentsBefore(sub.EndPosition, indent, false)
 }
 func (f *Formatter) formatValue(val parser.Value, indent int) int {
 	switch v := val.(type) {
 	case *parser.StringValue:
 		if v.Quoted {
 			fmt.Fprintf(f.writer, "\"%s\"", v.Value)
 		} else {
 			// Should strictly parse unquoted as ReferenceValue or identifiers, but fallback here
 			fmt.Fprint(f.writer, v.Value)
 		}
 		return v.Position.Line
 	case *parser.IntValue:
 		fmt.Fprint(f.writer, v.Raw)
 		return v.Position.Line
 	case *parser.FloatValue:
 		fmt.Fprint(f.writer, v.Raw)
 		return v.Position.Line
 	case *parser.BoolValue:
 		fmt.Fprintf(f.writer, "%v", v.Value)
 		return v.Position.Line
 	case *parser.ReferenceValue:
 		fmt.Fprint(f.writer, v.Value)
 		return v.Position.Line
 	case *parser.ArrayValue:
 		fmt.Fprint(f.writer, "{ ")
 		for i, e := range v.Elements {
 			if i > 0 {
 				fmt.Fprint(f.writer, " ")
 			}
 			f.formatValue(e, indent)
 		}
 		fmt.Fprint(f.writer, " }")
 		if v.EndPosition.Line > 0 {
 			return v.EndPosition.Line
 		}
 		// Fallback if EndPosition not set (shouldn't happen with new parser)
 		if len(v.Elements) > 0 {
 			return v.Elements[len(v.Elements)-1].Pos().Line
 		}
 		return v.Position.Line
 	default:
 		return 0
 	}
 }
 func (f *Formatter) flushCommentsBefore(pos parser.Position, indent int, stick bool) {
 	indentStr := strings.Repeat("  ", indent)
 	for f.cursor < len(f.insertables) {
 		c := f.insertables[f.cursor]
 		if c.Position.Line < pos.Line || (c.Position.Line == pos.Line && c.Position.Column < pos.Column) {
 			fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
 			f.cursor++
 		} else {
 			break
 		}
 	}
 	// If stick is true, we don't print extra newline. 
 	// The caller will print the definition immediately after this function returns.
 	// If stick is false (e.g. end of block comments), we act normally.
 	// But actually, the previous implementation didn't print extra newlines between comments and code
 	// explicitly, it relied on the loop in formatConfig/formatSubnode to print newline AFTER definition.
 	// So comments naturally sat on top.
 	// The issue is if there WAS a blank line in source, we ignore it and squash. This implements "stick".
 }
 func (f *Formatter) flushRemainingComments(indent int) {
 	indentStr := strings.Repeat("  ", indent)
 	for f.cursor < len(f.insertables) {
 		c := f.insertables[f.cursor]
 		fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
 		f.cursor++
 	}
 }
 func (f *Formatter) hasTrailingComment(line int) bool {
 	if f.cursor >= len(f.insertables) {
 		return false
 	}
 	c := f.insertables[f.cursor]
 	return c.Position.Line == line
 }
 func (f *Formatter) popComment() string {
 	if f.cursor >= len(f.insertables) {
 		return ""
 	}
 	c := f.insertables[f.cursor]
 	f.cursor++
 	return c.Text
 }
@@ -12,6 +12,8 @@ type Position struct {
 type Configuration struct {
 	Definitions []Definition
 	Package     *Package
 	Comments    []Comment
 	Pragmas     []Pragma
 }
 type Definition interface {
@@ -39,6 +41,7 @@ func (o *ObjectNode) isDefinition() {}
 type Subnode struct {
 	Position    Position
 	EndPosition Position
 	Definitions []Definition
 }
@@ -50,6 +53,7 @@ type Value interface {
 type StringValue struct {
 	Position Position
 	Value    string
 	Quoted   bool
 }
 func (v *StringValue) Pos() Position { return v.Position }
@@ -90,8 +94,9 @@ func (v *ReferenceValue) Pos() Position { return v.Position }
 func (v *ReferenceValue) isValue()      {}
 type ArrayValue struct {
-	Position Position
+	Position    Position
-	Elements []Value
+	EndPosition Position
 	Elements    []Value
 }
 func (v *ArrayValue) Pos() Position { return v.Position }
@@ -31,18 +31,22 @@ type Token struct {
 }
 type Lexer struct {
-	input    string
+	input       string
-	start    int
+	start       int
-	pos      int
+	pos         int
-	width    int
+	width       int
-	line     int
+	line        int
-	lineStart int
+	lineStart   int
 	startLine   int
 	startColumn int
 }
 func NewLexer(input string) *Lexer {
 	return &Lexer{
-		input: input,
+		input:       input,
-		line:  1,
+		line:        1,
 		startLine:   1,
 		startColumn: 1,
 	}
 }
@@ -67,8 +71,8 @@ func (l *Lexer) backup() {
 		r, _ := utf8.DecodeRuneInString(l.input[l.pos:])
 		if r == '\n' {
 			l.line--
-			// This is tricky, we'd need to find the previous line start
+			// We don't perfectly restore lineStart here as it's complex,
-			// For simplicity, let's just not backup over newlines or handle it better
+			// but we mostly backup single characters within a line.
 		}
 	}
 }
@@ -79,16 +83,22 @@ func (l *Lexer) peek() rune {
 	return r
 }
 func (l *Lexer) ignore() {
 	l.start = l.pos
 	l.startLine = l.line
 	l.startColumn = l.pos - l.lineStart + 1
 }
 func (l *Lexer) emit(t TokenType) Token {
 	tok := Token{
-		Type: t,
+		Type:  t,
 		Value: l.input[l.start:l.pos],
 		Position: Position{
-			Line:   l.line,
+			Line:   l.startLine,
-			Column: l.start - l.lineStart + 1,
+			Column: l.startColumn,
 		},
 	}
-	l.start = l.pos
+	l.ignore()
 	return tok
 }
@@ -100,7 +110,7 @@ func (l *Lexer) NextToken() Token {
 		}
 		if unicode.IsSpace(r) {
-			l.start = l.pos
+			l.ignore()
 			continue
 		}
@@ -117,10 +127,6 @@ func (l *Lexer) NextToken() Token {
 			return l.lexComment()
 		case '#':
 			return l.lexPackage()
 		case '!':
 			// Might be part of pragma //! 
 			// But grammar says pragma is //!
 			// So it should start with //
 		case '+':
 			fallthrough
 		case '$':
@@ -178,7 +184,6 @@ func (l *Lexer) lexString() Token {
 }
 func (l *Lexer) lexNumber() Token {
 	// Simple number lexing, could be improved for hex, binary, float
 	for {
 		r := l.next()
 		if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' {
@@ -192,7 +197,6 @@ func (l *Lexer) lexNumber() Token {
 func (l *Lexer) lexComment() Token {
 	r := l.next()
 	if r == '/' {
 		// It's a comment, docstring or pragma
 		r = l.next()
 		if r == '#' {
 			return l.lexUntilNewline(TokenDocstring)
@@ -209,15 +213,21 @@ func (l *Lexer) lexComment() Token {
 func (l *Lexer) lexUntilNewline(t TokenType) Token {
 	for {
 		r := l.next()
-		if r == '\n' || r == -1 {
+		if r == '\n' {
 			l.backup()
 			tok := l.emit(t)
 			l.next() // consume \n
 			l.ignore()
 			return tok
 		}
 		if r == -1 {
 			return l.emit(t)
 		}
 	}
 }
 func (l *Lexer) lexPackage() Token {
-	// #package
+	// We are at '#', l.start is just before it
 	l.start = l.pos - 1 // Include '#'
 	for {
 		r := l.next()
 		if unicode.IsLetter(r) {
@@ -7,9 +7,11 @@ import (
 )
 type Parser struct {
-	lexer *Lexer
+	lexer    *Lexer
-	tok   Token
+	tok      Token
-	peeked bool
+	peeked   bool
 	comments []Comment
 	pragmas  []Pragma
 }
 func NewParser(input string) *Parser {
@@ -23,7 +25,7 @@ func (p *Parser) next() Token {
 		p.peeked = false
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	return p.tok
 }
@@ -31,11 +33,27 @@ func (p *Parser) peek() Token {
 	if p.peeked {
 		return p.tok
 	}
-	p.tok = p.lexer.NextToken()
+	p.tok = p.fetchToken()
 	p.peeked = true
 	return p.tok
 }
 func (p *Parser) fetchToken() Token {
 	for {
 		tok := p.lexer.NextToken()
 		switch tok.Type {
 		case TokenComment:
 			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value})
 		case TokenDocstring:
 			p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value, Doc: true})
 		case TokenPragma:
 			p.pragmas = append(p.pragmas, Pragma{Position: tok.Position, Text: tok.Value})
 		default:
 			return tok
 		}
 	}
 }
 func (p *Parser) Parse() (*Configuration, error) {
 	config := &Configuration{}
 	for {
@@ -52,18 +70,14 @@ func (p *Parser) Parse() (*Configuration, error) {
 			continue
 		}
 		// Skip comments, pragmas, docstrings for now in AST
 		if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma {
 			p.next()
 			continue
 		}
 		def, err := p.parseDefinition()
 		if err != nil {
 			return nil, err
 		}
 		config.Definitions = append(config.Definitions, def)
 	}
 	config.Comments = p.comments
 	config.Pragmas = p.pragmas
 	return config, nil
 }
@@ -114,16 +128,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 	for {
 		t := p.peek()
 		if t.Type == TokenRBrace {
-			p.next()
+			endTok := p.next()
 			sub.EndPosition = endTok.Position
 			break
 		}
 		if t.Type == TokenEOF {
 			return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column)
 		}
 		if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma {
 			p.next()
 			continue
 		}
 		def, err := p.parseDefinition()
 		if err != nil {
 			return sub, err
@@ -136,11 +147,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
 func (p *Parser) parseValue() (Value, error) {
 	tok := p.next()
 	switch tok.Type {
-	case TokenString:
+		case TokenString:
-		return &StringValue{
+			return &StringValue{
-			Position: tok.Position,
+				Position: tok.Position,
-			Value:    strings.Trim(tok.Value, "\""),
+				Value:    strings.Trim(tok.Value, "\""),
-		}, nil
+				Quoted:   true,
 			}, nil
 	case TokenNumber:
 		// Simplistic handling
 		if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") {
@@ -150,7 +163,8 @@ func (p *Parser) parseValue() (Value, error) {
 		i, _ := strconv.ParseInt(tok.Value, 0, 64)
 		return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil
 	case TokenBool:
-		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil
+		return &BoolValue{Position: tok.Position, Value: tok.Value == "true"},
 			nil
 	case TokenIdentifier:
 		// reference?
 		return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil
@@ -160,7 +174,8 @@ func (p *Parser) parseValue() (Value, error) {
 		for {
 			t := p.peek()
 			if t.Type == TokenRBrace {
-				p.next()
+				endTok := p.next()
 				arr.EndPosition = endTok.Position
 				break
 			}
 			val, err := p.parseValue()
@@ -34,8 +34,10 @@ The LSP server should provide the following capabilities:
 - **File Extension**: `.marte`
 - **Project Structure**: Files can be distributed across sub-folders.
 - **Namespaces**: The `#package` macro defines the namespace for the file.
    - **Semantic**: `#package PROJECT.NODE` implies that all definitions within the file are treated as children/fields of the node `NODE`.
 - **Build Process**:
    - The build tool merges all files sharing the same base namespace.
    - **Multi-File Nodes**: Nodes can be defined across multiple files. The build tool and validator must merge these definitions before processing.
    - The LSP indexes only files belonging to the same project/namespace scope.
 - **Output**: The output format is the same as the input configuration but without the `#package` macro.
@@ -133,6 +135,23 @@ The tool must build an index of the configuration to support LSP features and va
    - **Schema Definition**:
        - Class validation rules must be defined in a separate schema file.
        - **Project-Specific Classes**: Developers can define their own project-specific classes and corresponding validation rules, expanding the validation capabilities for their specific needs.
 - **Duplicate Fields**:
    - **Constraint**: A field must not be defined more than once within the same object/node scope.
    - **Multi-File Consideration**: Validation must account for nodes being defined across multiple files (merged) when checking for duplicates.
 ### Formatting Rules
 The `fmt` command must format the code according to the following rules:
 - **Indentation**: 2 spaces per indentation level.
 - **Assignment**: 1 space before and after the `=` operator (e.g., `Field = Value`).
 - **Comments**: 
    - 1 space after `//`, `//#`, or `//!`.
    - Comments should "stick" to the next definition (no empty lines between the comment and the code it documents).
    - **Placement**: 
        - Comments can be placed inline after a definition (e.g., `field = value // comment`).
        - Comments can be placed after a subnode opening bracket (e.g., `node = { // comment`) or after an object definition.
 - **Arrays**: 1 space after the opening bracket `{` and 1 space before the closing bracket `}` (e.g., `{ 1 2 3 }`).
 - **Strings**: Quoted strings must preserve their quotes during formatting.
 ### Diagnostic Messages
@@ -0,0 +1,11 @@
 #package TEST.FMT
 // Detached comment
 +Node = {
    Class = "MyClass"
    // Sticky comment
    Field = 123
    Array = {1 2 3}
 }
@@ -0,0 +1,9 @@
 #package TEST.INLINE
 +Node = { // Comment after open brace
  Field1 = "Value" // Comment after value
  Field2 = 123 // Another comment
  FieldArr = { 1 2 3 } // Comment after array
  +Sub = {
  } // Comment after object
 }
@@ -0,0 +1,107 @@
 package integration
 import (
 	"bytes"
 	"io/ioutil"
 	"strings"
 	"testing"
 	"github.com/marte-dev/marte-dev-tools/internal/formatter"
 	"github.com/marte-dev/marte-dev-tools/internal/index"
 	"github.com/marte-dev/marte-dev-tools/internal/parser"
 	"github.com/marte-dev/marte-dev-tools/internal/validator"
 )
 func TestCheckCommand(t *testing.T) {
 	inputFile := "integration/error.marte"
 	content, err := ioutil.ReadFile(inputFile)
 	if err != nil {
 		t.Fatalf("Failed to read %s: %v", inputFile, err)
 	}
 	p := parser.NewParser(string(content))
 	config, err := p.Parse()
 	if err != nil {
 		t.Fatalf("Parse failed: %v", err)
 	}
 	idx := index.NewIndex()
 	idx.IndexConfig(inputFile, config)
 	idx.ResolveReferences()
 	v := validator.NewValidator(idx)
 	v.Validate(inputFile, config)
 	v.CheckUnused()
 	foundError := false
 	for _, diag := range v.Diagnostics {
 		if strings.Contains(diag.Message, "must contain a 'Class' field") {
 			foundError = true
 			break
 		}
 	}
 	if !foundError {
 		t.Errorf("Expected 'Class' field error in %s, but found none", inputFile)
 	}
 }
 func TestFmtCommand(t *testing.T) {
 	inputFile := "integration/fmt.marte"
 	content, err := ioutil.ReadFile(inputFile)
 	if err != nil {
 		t.Fatalf("Failed to read %s: %v", inputFile, err)
 	}
 	p := parser.NewParser(string(content))
 	config, err := p.Parse()
 	if err != nil {
 		t.Fatalf("Parse failed: %v", err)
 	}
 	var buf bytes.Buffer
 	formatter.Format(config, &buf)
 	output := buf.String()
 	// Check for indentation
 	if !strings.Contains(output, "  Class = \"MyClass\"") {
 		t.Error("Expected 2-space indentation for Class field")
 	}
 	// Check for sticky comments (no blank line between comment and field)
 	// We expect:
 	//   // Sticky comment
 	//   Field = 123
 	if !strings.Contains(output, "  // Sticky comment\n  Field = 123") {
 		t.Errorf("Expected sticky comment to be immediately followed by field, got:\n%s", output)
 	}
 	if !strings.Contains(output, "Array = { 1 2 3 }") {
 		t.Errorf("Expected formatted array '{ 1 2 3 }', got: %s", output)
 	}
 	// Check for inline comments
 	inputFile2 := "integration/fmt_inline.marte"
 	content2, err := ioutil.ReadFile(inputFile2)
 	if err != nil {
 		t.Fatalf("Failed to read %s: %v", inputFile2, err)
 	}
 	p2 := parser.NewParser(string(content2))
 	config2, err := p2.Parse()
 	if err != nil {
 		t.Fatalf("Parse failed: %v", err)
 	}
 	var buf2 bytes.Buffer
 	formatter.Format(config2, &buf2)
 	output2 := buf2.String()
 	if !strings.Contains(output2, "+Node = { // Comment after open brace") {
 		t.Error("Expected inline comment after open brace")
 	}
 	if !strings.Contains(output2, "Field1 = \"Value\" // Comment after value") {
 		t.Error("Expected inline comment after field value")
 	}
 }