Working on
This commit is contained in:
211
internal/formatter/formatter.go
Normal file
211
internal/formatter/formatter.go
Normal file
@@ -0,0 +1,211 @@
|
||||
package formatter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/marte-dev/marte-dev-tools/internal/parser"
|
||||
)
|
||||
|
||||
type Insertable struct {
|
||||
Position parser.Position
|
||||
Text string
|
||||
IsDoc bool
|
||||
}
|
||||
|
||||
type Formatter struct {
|
||||
insertables []Insertable
|
||||
cursor int
|
||||
writer io.Writer
|
||||
}
|
||||
|
||||
func Format(config *parser.Configuration, w io.Writer) {
|
||||
ins := []Insertable{}
|
||||
for _, c := range config.Comments {
|
||||
ins = append(ins, Insertable{Position: c.Position, Text: fixComment(c.Text), IsDoc: c.Doc})
|
||||
}
|
||||
for _, p := range config.Pragmas {
|
||||
ins = append(ins, Insertable{Position: p.Position, Text: fixComment(p.Text)})
|
||||
}
|
||||
// Sort
|
||||
sort.Slice(ins, func(i, j int) bool {
|
||||
if ins[i].Position.Line != ins[j].Position.Line {
|
||||
return ins[i].Position.Line < ins[j].Position.Line
|
||||
}
|
||||
return ins[i].Position.Column < ins[j].Position.Column
|
||||
})
|
||||
|
||||
f := &Formatter{
|
||||
insertables: ins,
|
||||
writer: w,
|
||||
}
|
||||
f.formatConfig(config)
|
||||
}
|
||||
|
||||
func fixComment(text string) string {
|
||||
if strings.HasPrefix(text, "//!") {
|
||||
if len(text) > 3 && text[3] != ' ' {
|
||||
return "//! " + text[3:]
|
||||
}
|
||||
} else if strings.HasPrefix(text, "//#") {
|
||||
if len(text) > 3 && text[3] != ' ' {
|
||||
return "//# " + text[3:]
|
||||
}
|
||||
} else if strings.HasPrefix(text, "//") {
|
||||
if len(text) > 2 && text[2] != ' ' && text[2] != '#' && text[2] != '!' {
|
||||
return "// " + text[2:]
|
||||
}
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
func (f *Formatter) formatConfig(config *parser.Configuration) {
|
||||
lastLine := 0
|
||||
if config.Package != nil {
|
||||
f.flushCommentsBefore(config.Package.Position, 0, false) // Package comments usually detached unless specifically doc
|
||||
fmt.Fprintf(f.writer, "#package %s", config.Package.URI)
|
||||
lastLine = config.Package.Position.Line
|
||||
if f.hasTrailingComment(lastLine) {
|
||||
fmt.Fprintf(f.writer, " %s", f.popComment())
|
||||
}
|
||||
fmt.Fprintln(f.writer)
|
||||
fmt.Fprintln(f.writer)
|
||||
}
|
||||
|
||||
for _, def := range config.Definitions {
|
||||
f.flushCommentsBefore(def.Pos(), 0, true) // Stick to definition
|
||||
lastLine = f.formatDefinition(def, 0)
|
||||
if f.hasTrailingComment(lastLine) {
|
||||
fmt.Fprintf(f.writer, " %s", f.popComment())
|
||||
}
|
||||
fmt.Fprintln(f.writer)
|
||||
}
|
||||
|
||||
f.flushRemainingComments(0)
|
||||
}
|
||||
|
||||
func (f *Formatter) formatDefinition(def parser.Definition, indent int) int {
|
||||
indentStr := strings.Repeat(" ", indent)
|
||||
switch d := def.(type) {
|
||||
case *parser.Field:
|
||||
fmt.Fprintf(f.writer, "%s%s = ", indentStr, d.Name)
|
||||
endLine := f.formatValue(d.Value, indent)
|
||||
return endLine
|
||||
case *parser.ObjectNode:
|
||||
fmt.Fprintf(f.writer, "%s%s = {", indentStr, d.Name)
|
||||
if f.hasTrailingComment(d.Position.Line) {
|
||||
fmt.Fprintf(f.writer, " %s", f.popComment())
|
||||
}
|
||||
fmt.Fprintln(f.writer)
|
||||
|
||||
f.formatSubnode(d.Subnode, indent+1)
|
||||
|
||||
fmt.Fprintf(f.writer, "%s}", indentStr)
|
||||
return d.Subnode.EndPosition.Line
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (f *Formatter) formatSubnode(sub parser.Subnode, indent int) {
|
||||
for _, def := range sub.Definitions {
|
||||
f.flushCommentsBefore(def.Pos(), indent, true) // Stick to definition
|
||||
lastLine := f.formatDefinition(def, indent)
|
||||
if f.hasTrailingComment(lastLine) {
|
||||
fmt.Fprintf(f.writer, " %s", f.popComment())
|
||||
}
|
||||
fmt.Fprintln(f.writer)
|
||||
}
|
||||
f.flushCommentsBefore(sub.EndPosition, indent, false)
|
||||
}
|
||||
|
||||
func (f *Formatter) formatValue(val parser.Value, indent int) int {
|
||||
switch v := val.(type) {
|
||||
case *parser.StringValue:
|
||||
if v.Quoted {
|
||||
fmt.Fprintf(f.writer, "\"%s\"", v.Value)
|
||||
} else {
|
||||
// Should strictly parse unquoted as ReferenceValue or identifiers, but fallback here
|
||||
fmt.Fprint(f.writer, v.Value)
|
||||
}
|
||||
return v.Position.Line
|
||||
case *parser.IntValue:
|
||||
fmt.Fprint(f.writer, v.Raw)
|
||||
return v.Position.Line
|
||||
case *parser.FloatValue:
|
||||
fmt.Fprint(f.writer, v.Raw)
|
||||
return v.Position.Line
|
||||
case *parser.BoolValue:
|
||||
fmt.Fprintf(f.writer, "%v", v.Value)
|
||||
return v.Position.Line
|
||||
case *parser.ReferenceValue:
|
||||
fmt.Fprint(f.writer, v.Value)
|
||||
return v.Position.Line
|
||||
case *parser.ArrayValue:
|
||||
fmt.Fprint(f.writer, "{ ")
|
||||
for i, e := range v.Elements {
|
||||
if i > 0 {
|
||||
fmt.Fprint(f.writer, " ")
|
||||
}
|
||||
f.formatValue(e, indent)
|
||||
}
|
||||
fmt.Fprint(f.writer, " }")
|
||||
if v.EndPosition.Line > 0 {
|
||||
return v.EndPosition.Line
|
||||
}
|
||||
// Fallback if EndPosition not set (shouldn't happen with new parser)
|
||||
if len(v.Elements) > 0 {
|
||||
return v.Elements[len(v.Elements)-1].Pos().Line
|
||||
}
|
||||
return v.Position.Line
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Formatter) flushCommentsBefore(pos parser.Position, indent int, stick bool) {
|
||||
indentStr := strings.Repeat(" ", indent)
|
||||
for f.cursor < len(f.insertables) {
|
||||
c := f.insertables[f.cursor]
|
||||
if c.Position.Line < pos.Line || (c.Position.Line == pos.Line && c.Position.Column < pos.Column) {
|
||||
fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
|
||||
f.cursor++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
// If stick is true, we don't print extra newline.
|
||||
// The caller will print the definition immediately after this function returns.
|
||||
// If stick is false (e.g. end of block comments), we act normally.
|
||||
// But actually, the previous implementation didn't print extra newlines between comments and code
|
||||
// explicitly, it relied on the loop in formatConfig/formatSubnode to print newline AFTER definition.
|
||||
// So comments naturally sat on top.
|
||||
// The issue is if there WAS a blank line in source, we ignore it and squash. This implements "stick".
|
||||
}
|
||||
|
||||
func (f *Formatter) flushRemainingComments(indent int) {
|
||||
indentStr := strings.Repeat(" ", indent)
|
||||
for f.cursor < len(f.insertables) {
|
||||
c := f.insertables[f.cursor]
|
||||
fmt.Fprintf(f.writer, "%s%s\n", indentStr, c.Text)
|
||||
f.cursor++
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Formatter) hasTrailingComment(line int) bool {
|
||||
if f.cursor >= len(f.insertables) {
|
||||
return false
|
||||
}
|
||||
c := f.insertables[f.cursor]
|
||||
return c.Position.Line == line
|
||||
}
|
||||
|
||||
func (f *Formatter) popComment() string {
|
||||
if f.cursor >= len(f.insertables) {
|
||||
return ""
|
||||
}
|
||||
c := f.insertables[f.cursor]
|
||||
f.cursor++
|
||||
return c.Text
|
||||
}
|
||||
@@ -12,6 +12,8 @@ type Position struct {
|
||||
type Configuration struct {
|
||||
Definitions []Definition
|
||||
Package *Package
|
||||
Comments []Comment
|
||||
Pragmas []Pragma
|
||||
}
|
||||
|
||||
type Definition interface {
|
||||
@@ -39,6 +41,7 @@ func (o *ObjectNode) isDefinition() {}
|
||||
|
||||
type Subnode struct {
|
||||
Position Position
|
||||
EndPosition Position
|
||||
Definitions []Definition
|
||||
}
|
||||
|
||||
@@ -50,6 +53,7 @@ type Value interface {
|
||||
type StringValue struct {
|
||||
Position Position
|
||||
Value string
|
||||
Quoted bool
|
||||
}
|
||||
|
||||
func (v *StringValue) Pos() Position { return v.Position }
|
||||
@@ -90,8 +94,9 @@ func (v *ReferenceValue) Pos() Position { return v.Position }
|
||||
func (v *ReferenceValue) isValue() {}
|
||||
|
||||
type ArrayValue struct {
|
||||
Position Position
|
||||
Elements []Value
|
||||
Position Position
|
||||
EndPosition Position
|
||||
Elements []Value
|
||||
}
|
||||
|
||||
func (v *ArrayValue) Pos() Position { return v.Position }
|
||||
|
||||
@@ -31,18 +31,22 @@ type Token struct {
|
||||
}
|
||||
|
||||
type Lexer struct {
|
||||
input string
|
||||
start int
|
||||
pos int
|
||||
width int
|
||||
line int
|
||||
lineStart int
|
||||
input string
|
||||
start int
|
||||
pos int
|
||||
width int
|
||||
line int
|
||||
lineStart int
|
||||
startLine int
|
||||
startColumn int
|
||||
}
|
||||
|
||||
func NewLexer(input string) *Lexer {
|
||||
return &Lexer{
|
||||
input: input,
|
||||
line: 1,
|
||||
input: input,
|
||||
line: 1,
|
||||
startLine: 1,
|
||||
startColumn: 1,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,8 +71,8 @@ func (l *Lexer) backup() {
|
||||
r, _ := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
if r == '\n' {
|
||||
l.line--
|
||||
// This is tricky, we'd need to find the previous line start
|
||||
// For simplicity, let's just not backup over newlines or handle it better
|
||||
// We don't perfectly restore lineStart here as it's complex,
|
||||
// but we mostly backup single characters within a line.
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -79,16 +83,22 @@ func (l *Lexer) peek() rune {
|
||||
return r
|
||||
}
|
||||
|
||||
func (l *Lexer) ignore() {
|
||||
l.start = l.pos
|
||||
l.startLine = l.line
|
||||
l.startColumn = l.pos - l.lineStart + 1
|
||||
}
|
||||
|
||||
func (l *Lexer) emit(t TokenType) Token {
|
||||
tok := Token{
|
||||
Type: t,
|
||||
Type: t,
|
||||
Value: l.input[l.start:l.pos],
|
||||
Position: Position{
|
||||
Line: l.line,
|
||||
Column: l.start - l.lineStart + 1,
|
||||
Line: l.startLine,
|
||||
Column: l.startColumn,
|
||||
},
|
||||
}
|
||||
l.start = l.pos
|
||||
l.ignore()
|
||||
return tok
|
||||
}
|
||||
|
||||
@@ -100,7 +110,7 @@ func (l *Lexer) NextToken() Token {
|
||||
}
|
||||
|
||||
if unicode.IsSpace(r) {
|
||||
l.start = l.pos
|
||||
l.ignore()
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -117,10 +127,6 @@ func (l *Lexer) NextToken() Token {
|
||||
return l.lexComment()
|
||||
case '#':
|
||||
return l.lexPackage()
|
||||
case '!':
|
||||
// Might be part of pragma //!
|
||||
// But grammar says pragma is //!
|
||||
// So it should start with //
|
||||
case '+':
|
||||
fallthrough
|
||||
case '$':
|
||||
@@ -178,7 +184,6 @@ func (l *Lexer) lexString() Token {
|
||||
}
|
||||
|
||||
func (l *Lexer) lexNumber() Token {
|
||||
// Simple number lexing, could be improved for hex, binary, float
|
||||
for {
|
||||
r := l.next()
|
||||
if unicode.IsDigit(r) || r == '.' || r == 'x' || r == 'b' || r == 'e' || r == '-' {
|
||||
@@ -192,7 +197,6 @@ func (l *Lexer) lexNumber() Token {
|
||||
func (l *Lexer) lexComment() Token {
|
||||
r := l.next()
|
||||
if r == '/' {
|
||||
// It's a comment, docstring or pragma
|
||||
r = l.next()
|
||||
if r == '#' {
|
||||
return l.lexUntilNewline(TokenDocstring)
|
||||
@@ -209,15 +213,21 @@ func (l *Lexer) lexComment() Token {
|
||||
func (l *Lexer) lexUntilNewline(t TokenType) Token {
|
||||
for {
|
||||
r := l.next()
|
||||
if r == '\n' || r == -1 {
|
||||
if r == '\n' {
|
||||
l.backup()
|
||||
tok := l.emit(t)
|
||||
l.next() // consume \n
|
||||
l.ignore()
|
||||
return tok
|
||||
}
|
||||
if r == -1 {
|
||||
return l.emit(t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) lexPackage() Token {
|
||||
// #package
|
||||
l.start = l.pos - 1 // Include '#'
|
||||
// We are at '#', l.start is just before it
|
||||
for {
|
||||
r := l.next()
|
||||
if unicode.IsLetter(r) {
|
||||
@@ -230,4 +240,4 @@ func (l *Lexer) lexPackage() Token {
|
||||
return l.lexUntilNewline(TokenPackage)
|
||||
}
|
||||
return l.emit(TokenError)
|
||||
}
|
||||
}
|
||||
@@ -7,9 +7,11 @@ import (
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
lexer *Lexer
|
||||
tok Token
|
||||
peeked bool
|
||||
lexer *Lexer
|
||||
tok Token
|
||||
peeked bool
|
||||
comments []Comment
|
||||
pragmas []Pragma
|
||||
}
|
||||
|
||||
func NewParser(input string) *Parser {
|
||||
@@ -23,7 +25,7 @@ func (p *Parser) next() Token {
|
||||
p.peeked = false
|
||||
return p.tok
|
||||
}
|
||||
p.tok = p.lexer.NextToken()
|
||||
p.tok = p.fetchToken()
|
||||
return p.tok
|
||||
}
|
||||
|
||||
@@ -31,11 +33,27 @@ func (p *Parser) peek() Token {
|
||||
if p.peeked {
|
||||
return p.tok
|
||||
}
|
||||
p.tok = p.lexer.NextToken()
|
||||
p.tok = p.fetchToken()
|
||||
p.peeked = true
|
||||
return p.tok
|
||||
}
|
||||
|
||||
func (p *Parser) fetchToken() Token {
|
||||
for {
|
||||
tok := p.lexer.NextToken()
|
||||
switch tok.Type {
|
||||
case TokenComment:
|
||||
p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value})
|
||||
case TokenDocstring:
|
||||
p.comments = append(p.comments, Comment{Position: tok.Position, Text: tok.Value, Doc: true})
|
||||
case TokenPragma:
|
||||
p.pragmas = append(p.pragmas, Pragma{Position: tok.Position, Text: tok.Value})
|
||||
default:
|
||||
return tok
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) Parse() (*Configuration, error) {
|
||||
config := &Configuration{}
|
||||
for {
|
||||
@@ -51,12 +69,6 @@ func (p *Parser) Parse() (*Configuration, error) {
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip comments, pragmas, docstrings for now in AST
|
||||
if tok.Type == TokenComment || tok.Type == TokenDocstring || tok.Type == TokenPragma {
|
||||
p.next()
|
||||
continue
|
||||
}
|
||||
|
||||
def, err := p.parseDefinition()
|
||||
if err != nil {
|
||||
@@ -64,6 +76,8 @@ func (p *Parser) Parse() (*Configuration, error) {
|
||||
}
|
||||
config.Definitions = append(config.Definitions, def)
|
||||
}
|
||||
config.Comments = p.comments
|
||||
config.Pragmas = p.pragmas
|
||||
return config, nil
|
||||
}
|
||||
|
||||
@@ -114,16 +128,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
|
||||
for {
|
||||
t := p.peek()
|
||||
if t.Type == TokenRBrace {
|
||||
p.next()
|
||||
endTok := p.next()
|
||||
sub.EndPosition = endTok.Position
|
||||
break
|
||||
}
|
||||
if t.Type == TokenEOF {
|
||||
return sub, fmt.Errorf("%d:%d: unexpected EOF, expected }", t.Position.Line, t.Position.Column)
|
||||
}
|
||||
if t.Type == TokenComment || t.Type == TokenDocstring || t.Type == TokenPragma {
|
||||
p.next()
|
||||
continue
|
||||
}
|
||||
def, err := p.parseDefinition()
|
||||
if err != nil {
|
||||
return sub, err
|
||||
@@ -136,11 +147,13 @@ func (p *Parser) parseSubnode() (Subnode, error) {
|
||||
func (p *Parser) parseValue() (Value, error) {
|
||||
tok := p.next()
|
||||
switch tok.Type {
|
||||
case TokenString:
|
||||
return &StringValue{
|
||||
Position: tok.Position,
|
||||
Value: strings.Trim(tok.Value, "\""),
|
||||
}, nil
|
||||
case TokenString:
|
||||
return &StringValue{
|
||||
Position: tok.Position,
|
||||
Value: strings.Trim(tok.Value, "\""),
|
||||
Quoted: true,
|
||||
}, nil
|
||||
|
||||
case TokenNumber:
|
||||
// Simplistic handling
|
||||
if strings.Contains(tok.Value, ".") || strings.Contains(tok.Value, "e") {
|
||||
@@ -150,7 +163,8 @@ func (p *Parser) parseValue() (Value, error) {
|
||||
i, _ := strconv.ParseInt(tok.Value, 0, 64)
|
||||
return &IntValue{Position: tok.Position, Value: i, Raw: tok.Value}, nil
|
||||
case TokenBool:
|
||||
return &BoolValue{Position: tok.Position, Value: tok.Value == "true"}, nil
|
||||
return &BoolValue{Position: tok.Position, Value: tok.Value == "true"},
|
||||
nil
|
||||
case TokenIdentifier:
|
||||
// reference?
|
||||
return &ReferenceValue{Position: tok.Position, Value: tok.Value}, nil
|
||||
@@ -160,7 +174,8 @@ func (p *Parser) parseValue() (Value, error) {
|
||||
for {
|
||||
t := p.peek()
|
||||
if t.Type == TokenRBrace {
|
||||
p.next()
|
||||
endTok := p.next()
|
||||
arr.EndPosition = endTok.Position
|
||||
break
|
||||
}
|
||||
val, err := p.parseValue()
|
||||
|
||||
Reference in New Issue
Block a user