package codec

import (
	"fmt"
	"math"
	"strings"

	"github.com/zricethezav/gitleaks/v8/regexp"
)

var (
	// encodingsRe is a regex built by combining all the encoding patterns
	// into named capture groups so that a single pass can detect multiple
	// encodings
	encodingsRe *regexp.Regexp
	// encodings contains all the encoding configurations for the detector.
	// The precedence is important. You want more specific encodings to
	// have a higher precedence or encodings that partially encode the
	// values (e.g. percent) unlike encodings that fully encode the string
	// (e.g. base64). If two encoding matches overlap the decoder will use
	// this order to determine which encoding should wait till the next pass.
	encodings = []*encoding{
		{
			kind:    percentKind,
			pattern: `%[0-9A-Fa-f]{2}(?:.*%[0-9A-Fa-f]{2})?`,
			decode:  decodePercent,
		},
		{
			kind:    unicodeKind,
			pattern: `(?:(?:U\+[a-fA-F0-9]{4}(?:\s|$))+|(?i)(?:\\{1,2}u[a-fA-F0-9]{4})+)`,
			decode:  decodeUnicode,
		},
		{
			kind:    hexKind,
			pattern: `[0-9A-Fa-f]{32,}`,
			decode:  decodeHex,
		},
		{
			kind:    base64Kind,
			pattern: `[\w\/+-]{16,}={0,2}`,
			decode:  decodeBase64,
		},
	}
)

// encodingNames is used to map the encodingKinds to their name
var encodingNames = []string{
	"percent",
	"unicode",
	"hex",
	"base64",
}

// encodingKind can be or'd together to capture all of the unique encodings
// that were present in a segment
type encodingKind int

var (
	// make sure these go up by powers of 2
	percentKind = encodingKind(1)
	unicodeKind = encodingKind(2)
	hexKind     = encodingKind(4)
	base64Kind  = encodingKind(8)
)

func (e encodingKind) String() string {
	i := int(math.Log2(float64(e)))
	if i >= len(encodingNames) {
		return ""
	}
	return encodingNames[i]
}

// kinds returns a list of encodingKinds combined in this one
func (e encodingKind) kinds() []encodingKind {
	kinds := []encodingKind{}

	for i := 0; i < len(encodingNames); i++ {
		if kind := int(e) & int(math.Pow(2, float64(i))); kind != 0 {
			kinds = append(kinds, encodingKind(kind))
		}
	}

	return kinds
}

// encodingMatch represents a match of an encoding in the text
type encodingMatch struct {
	encoding *encoding
	startEnd
}

// encoding represent a type of coding supported by the decoder.
type encoding struct {
	// the kind of decoding (e.g. base64, etc)
	kind encodingKind
	// the regex pattern that matches the encoding format
	pattern string
	// take the match and return the decoded value
	decode func(string) string
	// determine which encoding should win out when two overlap
	precedence int
}

func init() {
	count := len(encodings)
	namedPatterns := make([]string, count)
	for i, encoding := range encodings {
		encoding.precedence = count - i
		namedPatterns[i] = fmt.Sprintf(
			"(?P<%s>%s)",
			encoding.kind,
			encoding.pattern,
		)
	}
	encodingsRe = regexp.MustCompile(strings.Join(namedPatterns, "|"))
}

// findEncodingMatches finds as many encodings as it can for this pass
func findEncodingMatches(data string) []encodingMatch {
	var all []encodingMatch
	for _, matchIndex := range encodingsRe.FindAllStringSubmatchIndex(data, -1) {
		// Add the encodingMatch with its proper encoding
		for i, j := 2, 0; i < len(matchIndex); i, j = i+2, j+1 {
			if matchIndex[i] > -1 {
				all = append(all, encodingMatch{
					encoding: encodings[j],
					startEnd: startEnd{
						start: matchIndex[i],
						end:   matchIndex[i+1],
					},
				})
			}
		}
	}

	totalMatches := len(all)
	if totalMatches == 1 {
		return all
	}

	// filter out lower precedence ones that overlap their neigbors
	filtered := make([]encodingMatch, 0, len(all))
	for i, m := range all {
		if i > 0 {
			prev := all[i-1]
			if m.overlaps(prev.startEnd) && prev.encoding.precedence > m.encoding.precedence {
				continue // skip this one
			}
		}
		if i+1 < totalMatches {
			next := all[i+1]
			if m.overlaps(next.startEnd) && next.encoding.precedence > m.encoding.precedence {
				continue // skip this one
			}
		}
		filtered = append(filtered, m)
	}

	return filtered
}
