package hclwrite

import (
    "Havoc/pkg/profile/yaotl/hclsyntax"
)

var inKeyword = hclsyntax.Keyword([]byte{'i', 'n'})

// placeholder token used when we don't have a token but we don't want
// to pass a real "nil" and complicate things with nil pointer checks
var nilToken = &Token{
    Type:         hclsyntax.TokenNil,
    Bytes:        []byte{},
    SpacesBefore: 0,
}

// format rewrites tokens within the given sequence, in-place, to adjust the
// whitespace around their content to achieve canonical formatting.
func format(tokens Tokens) {
    // Formatting is a multi-pass process. More details on the passes below,
    // but this is the overview:
    // - adjust the leading space on each line to create appropriate
    //   indentation
    // - adjust spaces between tokens in a single cell using a set of rules
    // - adjust the leading space in the "assign" and "comment" cells on each
    //   line to vertically align with neighboring lines.
    // All of these steps operate in-place on the given tokens, so a caller
    // may collect a flat sequence of all of the tokens underlying an AST
    // and pass it here and we will then indirectly modify the AST itself.
    // Formatting must change only whitespace. Specifically, that means
    // changing the SpacesBefore attribute on a token while leaving the
    // other token attributes unchanged.

    lines := linesForFormat(tokens)
    formatIndent(lines)
    formatSpaces(lines)
    formatCells(lines)
}

func formatIndent(lines []formatLine) {
    // Our methodology for indents is to take the input one line at a time
    // and count the bracketing delimiters on each line. If a line has a net
    // increase in open brackets, we increase the indent level by one and
    // remember how many new openers we had. If the line has a net _decrease_,
    // we'll compare it to the most recent number of openers and decrease the
    // dedent level by one each time we pass an indent level remembered
    // earlier.
    // The "indent stack" used here allows for us to recognize degenerate
    // input where brackets are not symmetrical within lines and avoid
    // pushing things too far left or right, creating confusion.

    // We'll start our indent stack at a reasonable capacity to minimize the
    // chance of us needing to grow it; 10 here means 10 levels of indent,
    // which should be more than enough for reasonable HCL uses.
    indents := make([]int, 0, 10)

    for i := range lines {
        line := &lines[i]
        if len(line.lead) == 0 {
            continue
        }

        if line.lead[0].Type == hclsyntax.TokenNewline {
            // Never place spaces before a newline
            line.lead[0].SpacesBefore = 0
            continue
        }

        netBrackets := 0
        for _, token := range line.lead {
            netBrackets += tokenBracketChange(token)
            if token.Type == hclsyntax.TokenOHeredoc {
                break
            }
        }

        for _, token := range line.assign {
            netBrackets += tokenBracketChange(token)
        }

        switch {
        case netBrackets > 0:
            line.lead[0].SpacesBefore = 2 * len(indents)
            indents = append(indents, netBrackets)
        case netBrackets < 0:
            closed := -netBrackets
            for closed > 0 && len(indents) > 0 {
                switch {

                case closed > indents[len(indents)-1]:
                    closed -= indents[len(indents)-1]
                    indents = indents[:len(indents)-1]

                case closed < indents[len(indents)-1]:
                    indents[len(indents)-1] -= closed
                    closed = 0

                default:
                    indents = indents[:len(indents)-1]
                    closed = 0
                }
            }
            line.lead[0].SpacesBefore = 2 * len(indents)
        default:
            line.lead[0].SpacesBefore = 2 * len(indents)
        }
    }
}

func formatSpaces(lines []formatLine) {
    for _, line := range lines {
        for i, token := range line.lead {
            var before, after *Token
            if i > 0 {
                before = line.lead[i-1]
            } else {
                before = nilToken
            }
            if i < (len(line.lead) - 1) {
                after = line.lead[i+1]
            } else {
                after = nilToken
            }
            if spaceAfterToken(token, before, after) {
                after.SpacesBefore = 1
            } else {
                after.SpacesBefore = 0
            }
        }
        for i, token := range line.assign {
            if i == 0 {
                // first token in "assign" always has one space before to
                // separate the equals sign from what it's assigning.
                token.SpacesBefore = 1
            }

            var before, after *Token
            if i > 0 {
                before = line.assign[i-1]
            } else {
                before = nilToken
            }
            if i < (len(line.assign) - 1) {
                after = line.assign[i+1]
            } else {
                after = nilToken
            }
            if spaceAfterToken(token, before, after) {
                after.SpacesBefore = 1
            } else {
                after.SpacesBefore = 0
            }
        }

    }
}

func formatCells(lines []formatLine) {

    chainStart := -1
    maxColumns := 0

    // We'll deal with the "assign" cell first, since moving that will
    // also impact the "comment" cell.
    closeAssignChain := func(i int) {
        for _, chainLine := range lines[chainStart:i] {
            columns := chainLine.lead.Columns()
            spaces := (maxColumns - columns) + 1
            chainLine.assign[0].SpacesBefore = spaces
        }
        chainStart = -1
        maxColumns = 0
    }
    for i, line := range lines {
        if line.assign == nil {
            if chainStart != -1 {
                closeAssignChain(i)
            }
        } else {
            if chainStart == -1 {
                chainStart = i
            }
            columns := line.lead.Columns()
            if columns > maxColumns {
                maxColumns = columns
            }
        }
    }
    if chainStart != -1 {
        closeAssignChain(len(lines))
    }

    // Now we'll deal with the comments
    closeCommentChain := func(i int) {
        for _, chainLine := range lines[chainStart:i] {
            columns := chainLine.lead.Columns() + chainLine.assign.Columns()
            spaces := (maxColumns - columns) + 1
            chainLine.comment[0].SpacesBefore = spaces
        }
        chainStart = -1
        maxColumns = 0
    }
    for i, line := range lines {
        if line.comment == nil {
            if chainStart != -1 {
                closeCommentChain(i)
            }
        } else {
            if chainStart == -1 {
                chainStart = i
            }
            columns := line.lead.Columns() + line.assign.Columns()
            if columns > maxColumns {
                maxColumns = columns
            }
        }
    }
    if chainStart != -1 {
        closeCommentChain(len(lines))
    }

}

// spaceAfterToken decides whether a particular subject token should have a
// space after it when surrounded by the given before and after tokens.
// "before" can be TokenNil, if the subject token is at the start of a sequence.
func spaceAfterToken(subject, before, after *Token) bool {
    switch {

    case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil:
        // Never add spaces before a newline
        return false

    case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen:
        // Don't split a function name from open paren in a call
        return false

    case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot:
        // Don't use spaces around attribute access dots
        return false

    case after.Type == hclsyntax.TokenComma || after.Type == hclsyntax.TokenEllipsis:
        // No space right before a comma or ... in an argument list
        return false

    case subject.Type == hclsyntax.TokenComma:
        // Always a space after a comma
        return true

    case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc:
        // No extra spaces within templates
        return false

    case inKeyword.TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent:
        // This is a special case for inside for expressions where a user
        // might want to use a literal tuple constructor:
        // [for x in [foo]: x]
        // ... in that case, we would normally produce in[foo] thinking that
        // in is a reference, but we'll recognize it as a keyword here instead
        // to make the result less confusing.
        return true

    case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0):
        return false

    case subject.Type == hclsyntax.TokenBang:
        // No space after a bang
        return false

    case subject.Type == hclsyntax.TokenMinus:
        // Since a minus can either be subtraction or negation, and the latter
        // should _not_ have a space after it, we need to use some heuristics
        // to decide which case this is.
        // We guess that we have a negation if the token before doesn't look
        // like it could be the end of an expression.

        switch before.Type {

        case hclsyntax.TokenNil:
            // Minus at the start of input must be a negation
            return false

        case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion:
            // Minus immediately after an opening bracket or separator must be a negation.
            return false

        case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus:
            // Minus immediately after another arithmetic operator must be negation.
            return false

        case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq:
            // Minus immediately after another comparison operator must be negation.
            return false

        case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang:
            // Minus immediately after logical operator doesn't make sense but probably intended as negation.
            return false

        default:
            return true
        }

    case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace:
        // Unlike other bracket types, braces have spaces on both sides of them,
        // both in single-line nested blocks foo { bar = baz } and in object
        // constructor expressions foo = { bar = baz }.
        if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace {
            // An open brace followed by a close brace is an exception, however.
            // e.g. foo {} rather than foo { }
            return false
        }
        return true

    // In the unlikely event that an interpolation expression is just
    // a single object constructor, we'll put a space between the ${ and
    // the following { to make this more obvious, and then the same
    // thing for the two braces at the end.
    case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace:
        return true
    case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd:
        return true

    // Don't add spaces between interpolated items
    case subject.Type == hclsyntax.TokenTemplateSeqEnd && (after.Type == hclsyntax.TokenTemplateInterp || after.Type == hclsyntax.TokenTemplateControl):
        return false

    case tokenBracketChange(subject) > 0:
        // No spaces after open brackets
        return false

    case tokenBracketChange(after) < 0:
        // No spaces before close brackets
        return false

    default:
        // Most tokens are space-separated
        return true

    }
}

func linesForFormat(tokens Tokens) []formatLine {
    if len(tokens) == 0 {
        return make([]formatLine, 0)
    }

    // first we'll count our lines, so we can allocate the array for them in
    // a single block. (We want to minimize memory pressure in this codepath,
    // so it can be run somewhat-frequently by editor integrations.)
    lineCount := 1 // if there are zero newlines then there is one line
    for _, tok := range tokens {
        if tokenIsNewline(tok) {
            lineCount++
        }
    }

    // To start, we'll just put everything in the "lead" cell on each line,
    // and then do another pass over the lines afterwards to adjust.
    lines := make([]formatLine, lineCount)
    li := 0
    lineStart := 0
    for i, tok := range tokens {
        if tok.Type == hclsyntax.TokenEOF {
            // The EOF token doesn't belong to any line, and terminates the
            // token sequence.
            lines[li].lead = tokens[lineStart:i]
            break
        }

        if tokenIsNewline(tok) {
            lines[li].lead = tokens[lineStart : i+1]
            lineStart = i + 1
            li++
        }
    }

    // If a set of tokens doesn't end in TokenEOF (e.g. because it's a
    // fragment of tokens from the middle of a file) then we might fall
    // out here with a line still pending.
    if lineStart < len(tokens) {
        lines[li].lead = tokens[lineStart:]
        if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF {
            lines[li].lead = lines[li].lead[:len(lines[li].lead)-1]
        }
    }

    // Now we'll pick off any trailing comments and attribute assignments
    // to shuffle off into the "comment" and "assign" cells.
    for i := range lines {
        line := &lines[i]

        if len(line.lead) == 0 {
            // if the line is empty then there's nothing for us to do
            // (this should happen only for the final line, because all other
            // lines would have a newline token of some kind)
            continue
        }

        if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment {
            line.comment = line.lead[len(line.lead)-1:]
            line.lead = line.lead[:len(line.lead)-1]
        }

        for i, tok := range line.lead {
            if i > 0 && tok.Type == hclsyntax.TokenEqual {
                // We only move the tokens into "assign" if the RHS seems to
                // be a whole expression, which we determine by counting
                // brackets. If there's a net positive number of brackets
                // then that suggests we're introducing a multi-line expression.
                netBrackets := 0
                for _, token := range line.lead[i:] {
                    netBrackets += tokenBracketChange(token)
                }

                if netBrackets == 0 {
                    line.assign = line.lead[i:]
                    line.lead = line.lead[:i]
                }
                break
            }
        }
    }

    return lines
}

func tokenIsNewline(tok *Token) bool {
    if tok.Type == hclsyntax.TokenNewline {
        return true
    } else if tok.Type == hclsyntax.TokenComment {
        // Single line tokens (# and //) consume their terminating newline,
        // so we need to treat them as newline tokens as well.
        if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
            return true
        }
    }
    return false
}

func tokenBracketChange(tok *Token) int {
    switch tok.Type {
    case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp:
        return 1
    case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd:
        return -1
    default:
        return 0
    }
}

// formatLine represents a single line of source code for formatting purposes,
// splitting its tokens into up to three "cells":
//
// lead: always present, representing everything up to one of the others
// assign: if line contains an attribute assignment, represents the tokens
//    starting at (and including) the equals symbol
// comment: if line contains any non-comment tokens and ends with a
//    single-line comment token, represents the comment.
//
// When formatting, the leading spaces of the first tokens in each of these
// cells is adjusted to align vertically their occurrences on consecutive
// rows.
type formatLine struct {
    lead    Tokens
    assign  Tokens
    comment Tokens
}
