diff --git a/cmd/mdt/main.go b/cmd/mdt/main.go index 4d1b2d5..9862930 100644 --- a/cmd/mdt/main.go +++ b/cmd/mdt/main.go @@ -1,11 +1,13 @@ package main import ( + "bytes" "fmt" "io/ioutil" "os" "github.com/marte-dev/marte-dev-tools/internal/builder" + "github.com/marte-dev/marte-dev-tools/internal/formatter" "github.com/marte-dev/marte-dev-tools/internal/index" "github.com/marte-dev/marte-dev-tools/internal/lsp" "github.com/marte-dev/marte-dev-tools/internal/parser" @@ -28,7 +30,7 @@ func main() { case "check": runCheck(os.Args[2:]) case "fmt": - runFmt() + runFmt(os.Args[2:]) default: fmt.Printf("Unknown command: %s\n", command) os.Exit(1) @@ -107,7 +109,34 @@ func runCheck(args []string) { } } -func runFmt() { - fmt.Println("Formatting files...") - // TODO: Implement fmt -} +func runFmt(args []string) { + if len(args) < 1 { + fmt.Println("Usage: mdt fmt ") + os.Exit(1) + } + + for _, file := range args { + content, err := ioutil.ReadFile(file) + if err != nil { + fmt.Printf("Error reading %s: %v\n", file, err) + continue + } + + p := parser.NewParser(string(content)) + config, err := p.Parse() + if err != nil { + fmt.Printf("Error parsing %s: %v\n", file, err) + continue + } + + var buf bytes.Buffer + formatter.Format(config, &buf) + + err = ioutil.WriteFile(file, buf.Bytes(), 0644) + if err != nil { + fmt.Printf("Error writing %s: %v\n", file, err) + continue + } + fmt.Printf("Formatted %s\n", file) + } +} \ No newline at end of file diff --git a/internal/formatter/formatter.go b/internal/formatter/formatter.go new file mode 100644 index 0000000..45602fc --- /dev/null +++ b/internal/formatter/formatter.go @@ -0,0 +1,211 @@ +package formatter + +import ( + "fmt" + "io" + "sort" + "strings" + + "github.com/marte-dev/marte-dev-tools/internal/parser" +) + +type Insertable struct { + Position parser.Position + Text string + IsDoc bool +} + +type Formatter struct { + insertables []Insertable + cursor int + writer io.Writer +} + +func Format(config *parser.Configuration, w io.Writer) { + ins := []Insertable{} + for _, c := range config.Comments { + ins = append(ins, Insertable{Position: c.Position, Text: fixComment(c.Text), IsDoc: c.Doc}) + } + for _, p := range config.Pragmas { + ins = append(ins, Insertable{Position: p.Position, Text: fixComment(p.Text)}) + } + // Sort + sort.Slice(ins, func(i, j int) bool { + if ins[i].Position.Line != ins[j].Position.Line { + return ins[i].Position.Line < ins[j].Position.Line + } + return ins[i].Position.Column < ins[j].Position.Column + }) + + f := &Formatter{ + insertables: ins, + writer: w, + } + f.formatConfig(config) +} + +func fixComment(text string) string { + if strings.HasPrefix(text, "//!") { + if len(text) > 3 && text[3] != ' ' { + return "//! " + text[3:] + } + } else if strings.HasPrefix(text, "//#") { + if len(text) > 3 && text[3] != ' ' { + return "//# " + text[3:] + } + } else if strings.HasPrefix(text, "//") { + if len(text) > 2 && text[2] != ' ' && text[2] != '#' && text[2] != '!' { + return "// " + text[2:] + } + } + return text +} + +func (f *Formatter) formatConfig(config *parser.Configuration) { + lastLine := 0 + if config.Package != nil { + f.flushCommentsBefore(config.Package.Position, 0, false) // Package comments usually detached unless specifically doc + fmt.Fprintf(f.writer, "#package %s", config.Package.URI) + lastLine = config.Package.Position.Line + if f.hasTrailingComment(lastLine) { + fmt.Fprintf(f.writer, " %s", f.popComment()) + } + fmt.Fprintln(f.writer) + fmt.Fprintln(f.writer) + } + + for _, def := range config.Definitions { + f.flushCommentsBefore(def.Pos(), 0, true) // Stick to definition + lastLine = f.formatDefinition(def, 0) + if f.hasTrailingComment(lastLine) { + fmt.Fprintf(f.writer, " %s", f.popComment()) + } + fmt.Fprintln(f.writer) + } + + f.flushRemainingComments(0) +} + +func (f *Formatter) formatDefinition(def parser.Definition, indent int) int { + indentStr := strings.Repeat(" ", indent) + switch d := def.(type) { + case *parser.Field: + fmt.Fprintf(f.writer, "%s%s = ", indentStr, d.Name) + endLine := f.formatValue(d.Value, indent) + return endLine + case *parser.ObjectNode: + fmt.Fprintf(f.writer, "%s%s = {", indentStr, d.Name) + if f.hasTrailingComment(d.Position.Line) { + fmt.Fprintf(f.writer, " %s", f.popComment()) + } + fmt.Fprintln(f.writer) + + f.formatSubnode(d.Subnode, indent+1) + + fmt.Fprintf(f.writer, "%s}", indentStr) + return d.Subnode.EndPosition.Line + } + return 0 +} + +func (f *Formatter) formatSubnode(sub parser.Subnode, indent int) { + for _, def := range sub.Definitions { + f.flushCommentsBefore(def.Pos(), indent, true) // Stick to definition + lastLine := f.formatDefinition(def, indent) + if f.hasTrailingComment(lastLine) { + fmt.Fprintf(f.writer, " %s", f.popComment()) + } + fmt.Fprintln(f.writer) + } + f.flushCommentsBefore(sub.EndPosition, indent, false) +} + +func (f *Formatter) formatValue(val parser.Value, indent int) int { + switch v := val.(type) { + case *parser.StringValue: + if v.Quoted { + fmt.Fprintf(f.writer, "\"%s\"", v.Value) + } else { + // Should strictly parse unquoted as ReferenceValue or identifiers, but fallback here + fmt.Fprint(f.writer, v.Value) + } + return v.Position.Line + case *parser.IntValue: + fmt.Fprint(f.writer, v.Raw) + return v.Position.Line + case *parser.FloatValue: + fmt.Fprint(f.writer, v.Raw) + return v.Position.Line + case *parser.BoolValue: + fmt.Fprintf(f.writer, "%v", v.Value) + return v.Position.Line + case *parser.ReferenceValue: + fmt.Fprint(f.writer, v.Value) + return v.Position.Line + case *parser.ArrayValue: + fmt.Fprint(f.writer, "{ ") + for i, e := range v.Elements { + if i > 0 { + fmt.Fprint(f.writer, " ") + } + f.formatValue(e, indent) + } + fmt.Fprint(f.writer, " }") + if v.EndPosition.Line > 0 { + return v.EndPosition.Line + } + // Fallback if EndPosition not set (shouldn't happen with new parser) + if len(v.Elements) > 0 { + return v.Elements[len(v.Elements)-1].Pos().Line + } + return v.Position.Line + default: + return 0 + } +} + +func (f *Formatter) flushCommentsBefore(pos parser.Position, indent int, stick bool) { + indentStr := strings.Repeat(" ", indent) + for f.cursor < len(f.insertables) { + c := f.insertables[f.cursor] + if c.Position.Line < pos.Line || (c.Position.Line == pos.Line && c.Position.Column < pos.Column) { + fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text) + f.cursor++ + } else { + break + } + } + // If stick is true, we don't print extra newline. + // The caller will print the definition immediately after this function returns. + // If stick is false (e.g. end of block comments), we act normally. + // But actually, the previous implementation didn't print extra newlines between comments and code + // explicitly, it relied on the loop in formatConfig/formatSubnode to print newline AFTER definition. + // So comments naturally sat on top. + // The issue is if there WAS a blank line in source, we ignore it and squash. This implements "stick". +} + +func (f *Formatter) flushRemainingComments(indent int) { + indentStr := strings.Repeat(" ", indent) + for f.cursor < len(f.insertables) { + c := f.insertables[f.cursor] + fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text) + f.cursor++ + } +} + +func (f *Formatter) hasTrailingComment(line int) bool { + if f.cursor >= len(f.insertables) { + return false + } + c := f.insertables[f.cursor] + return c.Position.Line == line +} + +func (f *Formatter) popComment() string { + if f.cursor >= len(f.insertables) { + return "" + } + c := f.insertables[f.cursor] + f.cursor++ + return c.Text +} \ No newline at end of file diff --git a/internal/parser/ast.go b/internal/parser/ast.go index ac0e766..ff51c7d 100644 --- a/internal/parser/ast.go +++ b/internal/parser/ast.go @@ -12,6 +12,8 @@ type Position struct { type Configuration struct { Definitions []Definition Package *Package + Comments []Comment + Pragmas []Pragma } type Definition interface { @@ -39,6 +41,7 @@ func (o *ObjectNode) isDefinition() {} type Subnode struct { Position Position + EndPosition Position Definitions []Definition } @@ -50,6 +53,7 @@ type Value interface { type StringValue struct { Position Position Value string + Quoted bool } func (v *StringValue) Pos() Position { return v.Position } @@ -90,8 +94,9 @@ func (v *ReferenceValue) Pos() Position { return v.Position } func (v *ReferenceValue) isValue() {} type ArrayValue struct { - Position Position - Elements []Value + Position Position + EndPosition Position + Elements []Value } func (v *ArrayValue) Pos() Position { return v.Position } diff --git a/internal/parser/lexer.go b/internal/parser/lexer.go index 300a1ee..82a9dba 100644 --- a/internal/parser/lexer.go +++ b/internal/parser/lexer.go @@ -31,18 +31,22 @@ type Token struct { } type Lexer struct { - input string - start int - pos int - width int - line int - lineStart int + input string + start int + pos int + width int + line int + lineStart int + startLine int + startColumn int } func NewLexer(input string) *Lexer { return &Lexer{ - input: input, - line: 1, + input: input, + line: 1, + startLine: 1, + startColumn: 1, } } @@ -67,8 +71,8 @@ func (l *Lexer) backup() { r, _ := utf8.DecodeRuneInString(l.input[l.pos:]) if r == '\n' { l.line-- - // This is tricky, we'd need to find the previous line start - // For simplicity, let's just not backup over newlines or handle it better + // We don't perfectly restore lineStart here as it's complex, + // but we mostly backup single characters within a line. } } } @@ -79,16 +83,22 @@ func (l *Lexer) peek() rune { return r } +func (l *Lexer) ignore() { + l.start = l.pos + l.startLine = l.line + l.startColumn = l.pos - l.lineStart + 1 +} + func (l *Lexer) emit(t TokenType) Token { tok := Token{ - Type: t, + Type: t, Value: l.input[l.start:l.pos], Position: Position{ - Line: l.line, - Column: l.start - l.lineStart + 1, + Line: l.startLine, + Column: l.startColumn, }, } - l.start = l.pos + l.ignore() return tok } @@ -100,7 +110,7 @@ func (l *Lexer) NextToken() Token { } if unicode.IsSpace(r) { - l.start = l.pos + l.ignore() continue } @@ -117,10 +127,6 @@ func (l *Lexer) NextToken() Token { return l.lexComment() case '#': return l.lexPackage() - case '!': - // Might be part of pragma //! - // But grammar says pragma is //! - // So it should start with // case '+': fallthrough case '$': @@ -178,7 +184,6 @@ func (l *Lexer) lexString() Token { } func (l *Lexer) lexNumber() Token { - // Simple number lexing, could be improved for hex, binary, float for { r := l.next() if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' { @@ -192,7 +197,6 @@ func (l *Lexer) lexNumber() Token { func (l *Lexer) lexComment() Token { r := l.next() if r == '/' { - // It's a comment, docstring or pragma r = l.next() if r == '#' { return l.lexUntilNewline(TokenDocstring) @@ -209,15 +213,21 @@ func (l *Lexer) lexComment() Token { func (l *Lexer) lexUntilNewline(t TokenType) Token { for { r := l.next() - if r == '\n' || r == -1 { + if r == '\n' { + l.backup() + tok := l.emit(t) + l.next() // consume \n + l.ignore() + return tok + } + if r == -1 { return l.emit(t) } } } func (l *Lexer) lexPackage() Token { - // #package - l.start = l.pos - 1 // Include '#' + // We are at '#', l.start is just before it for { r := l.next() if unicode.IsLetter(r) { @@ -230,4 +240,4 @@ func (l *Lexer) lexPackage() Token { return l.lexUntilNewline(TokenPackage) } return l.emit(TokenError) -} +} \ No newline at end of file diff --git a/internal/parser/parser.go b/internal/parser/parser.go index 35e0bef..cc29d3b 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -7,9 +7,11 @@ import ( ) type Parser struct { - lexer *Lexer - tok Token - peeked bool + lexer *Lexer + tok Token + peeked bool + comments []Comment + pragmas []Pragma } func NewParser(input string) *Parser { @@ -23,7 +25,7 @@ func (p *Parser) next() Token { p.peeked = false return p.tok } - p.tok = p.lexer.NextToken() + p.tok = p.fetchToken() return p.tok } @@ -31,11 +33,27 @@ func (p *Parser) peek() Token { if p.peeked { return p.tok } - p.tok = p.lexer.NextToken() + p.tok = p.fetchToken() p.peeked = true return p.tok } +func (p *Parser) fetchToken() Token { + for { + tok := p.lexer.NextToken() + switch tok.Type { + case TokenComment: + p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value}) + case TokenDocstring: + p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value, Doc: true}) + case TokenPragma: + p.pragmas = append(p.pragmas, Pragma{Position: tok.Position, Text: tok.Value}) + default: + return tok + } + } +} + func (p *Parser) Parse() (*Configuration, error) { config := &Configuration{} for { @@ -51,12 +69,6 @@ func (p *Parser) Parse() (*Configuration, error) { } continue } - - // Skip comments, pragmas, docstrings for now in AST - if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma { - p.next() - continue - } def, err := p.parseDefinition() if err != nil { @@ -64,6 +76,8 @@ func (p *Parser) Parse() (*Configuration, error) { } config.Definitions = append(config.Definitions, def) } + config.Comments = p.comments + config.Pragmas = p.pragmas return config, nil } @@ -114,16 +128,13 @@ func (p *Parser) parseSubnode() (Subnode, error) { for { t := p.peek() if t.Type == TokenRBrace { - p.next() + endTok := p.next() + sub.EndPosition = endTok.Position break } if t.Type == TokenEOF { return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column) } - if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma { - p.next() - continue - } def, err := p.parseDefinition() if err != nil { return sub, err @@ -136,11 +147,13 @@ func (p *Parser) parseSubnode() (Subnode, error) { func (p *Parser) parseValue() (Value, error) { tok := p.next() switch tok.Type { - case TokenString: - return &StringValue{ - Position: tok.Position, - Value: strings.Trim(tok.Value, "\""), - }, nil + case TokenString: + return &StringValue{ + Position: tok.Position, + Value: strings.Trim(tok.Value, "\""), + Quoted: true, + }, nil + case TokenNumber: // Simplistic handling if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") { @@ -150,7 +163,8 @@ func (p *Parser) parseValue() (Value, error) { i, _ := strconv.ParseInt(tok.Value, 0, 64) return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil case TokenBool: - return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil + return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, + nil case TokenIdentifier: // reference? return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil @@ -160,7 +174,8 @@ func (p *Parser) parseValue() (Value, error) { for { t := p.peek() if t.Type == TokenRBrace { - p.next() + endTok := p.next() + arr.EndPosition = endTok.Position break } val, err := p.parseValue() diff --git a/mdt b/mdt index 9b4d6ab..21fa64c 100755 Binary files a/mdt and b/mdt differ diff --git a/specification.md b/specification.md index 2a93cce..3d163c8 100644 --- a/specification.md +++ b/specification.md @@ -34,8 +34,10 @@ The LSP server should provide the following capabilities: - **File Extension**: `.marte` - **Project Structure**: Files can be distributed across sub-folders. - **Namespaces**: The `#package` macro defines the namespace for the file. + - **Semantic**: `#package PROJECT.NODE` implies that all definitions within the file are treated as children/fields of the node `NODE`. - **Build Process**: - The build tool merges all files sharing the same base namespace. + - **Multi-File Nodes**: Nodes can be defined across multiple files. The build tool and validator must merge these definitions before processing. - The LSP indexes only files belonging to the same project/namespace scope. - **Output**: The output format is the same as the input configuration but without the `#package` macro. @@ -133,6 +135,23 @@ The tool must build an index of the configuration to support LSP features and va - **Schema Definition**: - Class validation rules must be defined in a separate schema file. - **Project-Specific Classes**: Developers can define their own project-specific classes and corresponding validation rules, expanding the validation capabilities for their specific needs. +- **Duplicate Fields**: + - **Constraint**: A field must not be defined more than once within the same object/node scope. + - **Multi-File Consideration**: Validation must account for nodes being defined across multiple files (merged) when checking for duplicates. + +### Formatting Rules + +The `fmt` command must format the code according to the following rules: +- **Indentation**: 2 spaces per indentation level. +- **Assignment**: 1 space before and after the `=` operator (e.g., `Field = Value`). +- **Comments**: + - 1 space after `//`, `//#`, or `//!`. + - Comments should "stick" to the next definition (no empty lines between the comment and the code it documents). + - **Placement**: + - Comments can be placed inline after a definition (e.g., `field = value // comment`). + - Comments can be placed after a subnode opening bracket (e.g., `node = { // comment`) or after an object definition. +- **Arrays**: 1 space after the opening bracket `{` and 1 space before the closing bracket `}` (e.g., `{ 1 2 3 }`). +- **Strings**: Quoted strings must preserve their quotes during formatting. ### Diagnostic Messages diff --git a/test_error.marte b/test/integration/error.marte similarity index 100% rename from test_error.marte rename to test/integration/error.marte diff --git a/test/integration/fmt.marte b/test/integration/fmt.marte new file mode 100644 index 0000000..4ee5a38 --- /dev/null +++ b/test/integration/fmt.marte @@ -0,0 +1,11 @@ +#package TEST.FMT + +// Detached comment + ++Node = { + Class = "MyClass" + + // Sticky comment + Field = 123 + Array = {1 2 3} +} \ No newline at end of file diff --git a/test/integration/fmt_inline.marte b/test/integration/fmt_inline.marte new file mode 100644 index 0000000..53c4441 --- /dev/null +++ b/test/integration/fmt_inline.marte @@ -0,0 +1,9 @@ +#package TEST.INLINE + ++Node = { // Comment after open brace + Field1 = "Value" // Comment after value + Field2 = 123 // Another comment + FieldArr = { 1 2 3 } // Comment after array + +Sub = { + } // Comment after object +} diff --git a/test/integration_test.go b/test/integration_test.go new file mode 100644 index 0000000..c8fae1f --- /dev/null +++ b/test/integration_test.go @@ -0,0 +1,107 @@ +package integration + +import ( + "bytes" + "io/ioutil" + "strings" + "testing" + + "github.com/marte-dev/marte-dev-tools/internal/formatter" + "github.com/marte-dev/marte-dev-tools/internal/index" + "github.com/marte-dev/marte-dev-tools/internal/parser" + "github.com/marte-dev/marte-dev-tools/internal/validator" +) + +func TestCheckCommand(t *testing.T) { + inputFile := "integration/error.marte" + content, err := ioutil.ReadFile(inputFile) + if err != nil { + t.Fatalf("Failed to read %s: %v", inputFile, err) + } + + p := parser.NewParser(string(content)) + config, err := p.Parse() + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + idx := index.NewIndex() + idx.IndexConfig(inputFile, config) + idx.ResolveReferences() + + v := validator.NewValidator(idx) + v.Validate(inputFile, config) + v.CheckUnused() + + foundError := false + for _, diag := range v.Diagnostics { + if strings.Contains(diag.Message, "must contain a 'Class' field") { + foundError = true + break + } + } + + if !foundError { + t.Errorf("Expected 'Class' field error in %s, but found none", inputFile) + } +} + +func TestFmtCommand(t *testing.T) { + inputFile := "integration/fmt.marte" + content, err := ioutil.ReadFile(inputFile) + if err != nil { + t.Fatalf("Failed to read %s: %v", inputFile, err) + } + + p := parser.NewParser(string(content)) + config, err := p.Parse() + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + var buf bytes.Buffer + formatter.Format(config, &buf) + + output := buf.String() + + // Check for indentation + if !strings.Contains(output, " Class = \"MyClass\"") { + t.Error("Expected 2-space indentation for Class field") + } + + // Check for sticky comments (no blank line between comment and field) + // We expect: + // // Sticky comment + // Field = 123 + if !strings.Contains(output, " // Sticky comment\n Field = 123") { + t.Errorf("Expected sticky comment to be immediately followed by field, got:\n%s", output) + } + + if !strings.Contains(output, "Array = { 1 2 3 }") { + t.Errorf("Expected formatted array '{ 1 2 3 }', got: %s", output) + } + + // Check for inline comments + inputFile2 := "integration/fmt_inline.marte" + content2, err := ioutil.ReadFile(inputFile2) + if err != nil { + t.Fatalf("Failed to read %s: %v", inputFile2, err) + } + + p2 := parser.NewParser(string(content2)) + config2, err := p2.Parse() + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + var buf2 bytes.Buffer + formatter.Format(config2, &buf2) + output2 := buf2.String() + + if !strings.Contains(output2, "+Node = { // Comment after open brace") { + t.Error("Expected inline comment after open brace") + } + if !strings.Contains(output2, "Field1 = \"Value\" // Comment after value") { + t.Error("Expected inline comment after field value") + } +}