# This implementation of glob matching for `File.match?` is a port from the Rust
# crate https://github.com/devongovett/glob-match, which is adapted from the
# linear-time algorithm described in https://research.swtch.com/glob

class File < IO::FileDescriptor
  class BadPatternError < Exception
  end

  # Matches *path* against *pattern*.
  #
  # The pattern syntax is similar to shell filename globbing. It may contain the following metacharacters:
  #
  # * `*`: Wildcard matches zero or more characters, except for directory separators.
  # * `**`: Globstar matches zero or more characters, including directory separators.
  #   It must match a complete path segment, i.e. it must be wrapped in `/`
  #   except for the beginning and end of the pattern.
  # * `?`: Matches a single Unicode character, except for directory separators.
  # * Character classes:
  #   * `[abc]`: Character set matches one of the Unicode characters contained in the brackets.
  #   * `[^abc]`: Negated character set matches any Unicode character _except_ those contained in the brackets.
  #   * `[a-z]`: Character range matches one Unicode character contained in the character range.
  #   * `[^a-z]`: Negated character range matches one Unicode character _except_ those contained in the character range.
  # * `{a,b}`: Branches matches one of the subpatterns contained in the braces. Subpatterns
  #   may contain any other pattern feature, including nested branches (max nesting depth is 10 levels deep).
  # * `\\`: Backslash escapes the next character.
  #
  # Multiple character pattern can be combined in the same brackets to define a
  # character class (for example: `[0-9a-f]`).
  #
  # If *path* is a `Path`, all directory separators supported by *path* are
  # recognized, according to the path's kind. If *path* is a `String`, only `/`
  # is considered a directory separator.
  #
  # NOTE: Only `/` in *pattern* matches directory separators in *path*.
  def self.match?(pattern : String, path : Path | String) : Bool
    if path.is_a?(Path)
      separators = Path.separators(path.@kind)
      path = path.to_s
    else
      separators = Path.separators(Path::Kind::POSIX)
    end

    match_internal(pattern, path, separators)
  end

  private def self.match_internal(glob_str, path_str, separators)
    state = State.new(separators: separators.to_static_array.to_slice)

    # Store the state when we see an opening '{' brace in a stack.
    # Up to 10 nested braces are supported.
    brace_stack_data = uninitialized StaticArray({UInt32, UInt32}, 10)
    brace_stack = BraceStack.new(brace_stack_data.to_slice)

    # First, check if the pattern is negated with a leading '!' character.
    # Multiple negations can occur.
    negated = false

    # TODO: Enable negation
    # while state.glob_index < glob.size && glob[state.glob_index] === '!'
    #   negated = !negated
    #   state.glob_index += 1
    # end

    matched = state.match_from(glob_str, path_str, 0, brace_stack)

    matched != negated
  end

  private record State,
    separators : Slice(Char),
    path_index = 0_u64,
    glob_index = 0_u64,
    brace_depth = 0_u64,
    wildcard = Wildcard.new,
    globstar = Wildcard.new

  private record Wildcard,
    glob_index = 0_u32,
    path_index = 0_u32,
    brace_depth = 0_u32 do
    setter path_index
  end

  struct BraceStack(T)
    def initialize(@slice : Slice(T), @size = 0)
    end

    getter size

    @[AlwaysInline]
    def push(item : T)
      if @size >= @slice.size
        raise BadPatternError.new("Brace nesting too deep: must not exceed 10 levels")
      end
      @slice.unsafe_put(@size, item)
      @size += 1
    end

    @[AlwaysInline]
    def pop : T
      @size -= 1
      @slice[@size]
    end

    def to_slice
      @slice[0, @size]
    end
  end

  struct State
    @[AlwaysInline]
    def backtrack
      @glob_index = @wildcard.glob_index.to_u64
      @path_index = @wildcard.path_index.to_u64
      @brace_depth = @wildcard.brace_depth.to_u64
    end

    # Coalesce multiple ** segments into one.
    @[AlwaysInline]
    private def skip_globstars(glob)
      glob_index = @glob_index + 2
      while glob_index + 4 <= glob.size && glob[glob_index, 4] == "/**/".to_slice
        glob_index += 3
      end

      if glob[glob_index..] == "/**".to_slice
        glob_index += 3
      end

      @glob_index = glob_index - 2
    end

    @[AlwaysInline]
    def skip_to_separator(path, is_end_invalid)
      if @path_index == path.size
        @wildcard.path_index += 1
        return
      end

      path_index = @path_index
      while path_index < path.size && !separators.includes?(path[path_index].unsafe_chr)
        path_index += 1
      end

      if is_end_invalid || path_index != path.size
        path_index += 1
      end

      @wildcard.path_index = path_index.to_u32!
      @globstar = @wildcard
    end

    @[AlwaysInline]
    def skip_branch(glob)
      in_brackets = false
      end_brace_depth = @brace_depth - 1

      while @glob_index < glob.size
        c = glob[@glob_index]
        # Skip nested braces.
        if c === '{' && !in_brackets
          @brace_depth += 1
        elsif c === '}' && !in_brackets
          @brace_depth -= 1
          if @brace_depth == end_brace_depth
            @glob_index += 1
            return
          end
        elsif c === '[' && !in_brackets
          in_brackets = true
        elsif c === ']'
          in_brackets = false
        elsif c === '\\'
          @glob_index += 1
        end
        @glob_index += 1
      end
    end

    def match_brace_branch(
      glob : String,
      path : String,
      open_brace_index,
      branch_index,
      brace_stack,
    )
      brace_stack.push({open_brace_index.to_u32!, branch_index})

      branch_state = self.copy_with(
        glob_index: branch_index.to_u64,
        brace_depth: brace_stack.size.to_u64
      )

      matched = branch_state.match_from(glob, path, branch_index, brace_stack)

      brace_stack.pop

      matched
    end

    def match_brace(glob : String, path : String, brace_stack)
      brace_depth = 0
      in_brackets = false
      open_brace_index = @glob_index
      branch_index = 0_u32

      while @glob_index < glob.bytesize
        c = glob.to_slice[@glob_index]
        # Skip nested braces.
        if c === '{' && !in_brackets
          brace_depth += 1
          if brace_depth == 1
            branch_index = (@glob_index + 1).to_u32!
          end
        elsif c === '}' && !in_brackets
          brace_depth -= 1
          if brace_depth == 0
            return true if match_brace_branch(glob, path, open_brace_index, branch_index, brace_stack)
            break
          end
        elsif c === ',' && brace_depth == 1
          return true if match_brace_branch(glob, path, open_brace_index, branch_index, brace_stack)
          branch_index = (@glob_index + 1).to_u32!
        elsif c === '[' && !in_brackets
          in_brackets = true
        elsif c === ']'
          in_brackets = false
        elsif c === '\\'
          @glob_index += 1
        end
        @glob_index += 1
      end
      false
    end

    def match_from(glob_str, path_str, match_start, brace_stack)
      glob = glob_str.to_slice
      path = path_str.to_slice

      while @glob_index < glob.size || @path_index < path.size
        if @glob_index < glob.size
          g = glob[@glob_index]
          if '*' === g
            is_globstar = @glob_index + 1 < glob.size && glob[@glob_index + 1] === '*'

            if is_globstar
              skip_globstars(glob)
            end

            @wildcard = Wildcard.new(
              @glob_index.to_u32!,
              @path_index.to_u32! + 1,
              @brace_depth.to_u32!
            )

            in_globstar = false
            # `**` allows path separators, whereas `*` does not.
            # However, `**` must be a full path component, i.e. `a/**/b` not `a**b`.
            if is_globstar
              @glob_index += 2
              is_end_invalid = @glob_index != glob.size

              if (@glob_index.to_i64 - match_start < 3 || glob[@glob_index - 3] === '/') && (!is_end_invalid || glob[@glob_index] === '/')
                if is_end_invalid
                  @glob_index += 1
                end

                skip_to_separator(path, is_end_invalid)
                in_globstar = true
              end
            else
              @glob_index += 1
            end

            if !in_globstar && @path_index < path.size && separators.includes?(path[@path_index].unsafe_chr)
              @wildcard = @globstar
            end

            next
          elsif '?' === g && @path_index < path.size
            if !separators.includes?(path[@path_index].unsafe_chr)
              @glob_index += 1
              _, @path_index = consume_unicode_character(path_str, @path_index)
              next
            end
          elsif '[' === g && @path_index < path.size
            @glob_index += 1

            # Check if the character class is negated.
            negated_class = false
            if @glob_index < glob.size && glob[@glob_index] === '^'
              negated_class = true
              @glob_index += 1
            end

            # Try each range.
            first = true
            is_match = false

            c, new_path_index = consume_unicode_glob_character(path_str, @path_index)

            while @glob_index < glob.size && (first || !(']' === glob[@glob_index]))
              low, @glob_index = consume_unicode_glob_character(glob_str, @glob_index)

              # If there is a - and the following character is not ], read the range end character.
              if @glob_index + 1 < glob.size &&
                 glob[@glob_index] === '-' &&
                 !(glob[@glob_index + 1] === ']')
                @glob_index += 1
                high, @glob_index = consume_unicode_glob_character(glob_str, @glob_index)
              else
                high = low
              end

              if low <= c <= high
                is_match = true
              end

              first = false
            end
            if @glob_index >= glob.size
              raise BadPatternError.new "unterminated character set"
            end
            @glob_index += 1
            if is_match != negated_class
              @path_index = new_path_index
              next
            end
          elsif g === '{'
            if brace_stack_entry = brace_stack.to_slice.find { |open_brace_index, _| open_brace_index == @glob_index }
              _, branch_index = brace_stack_entry
              @glob_index = branch_index.to_u64
              @brace_depth += 1
              next
            end

            return match_brace(glob_str, path_str, brace_stack)
          elsif (g === '}' || g === ',') && @brace_depth > 0
            skip_branch(glob)
            next
          elsif @path_index < path.size
            c = g
            # Match escaped characters as literals.
            if !unescape(pointerof(c), glob, pointerof(@glob_index))
              raise BadPatternError.new "Empty escape character"
            end

            is_match = if c === '/'
                         separators.includes?(path[@path_index].unsafe_chr)
                       else
                         path[@path_index] == c
                       end

            if is_match
              @glob_index += 1
              @path_index += 1

              if c === '/'
                @wildcard = @globstar
              end

              next
            end
          end
        end

        # If we didn't match, restore state to the previous star pattern.
        if @wildcard.path_index > 0 && @wildcard.path_index.to_u32! <= path.size
          backtrack
          next
        end

        return false
      end

      true
    end
  end
end

@[AlwaysInline]
private def unescape(c, glob, glob_index) : Bool
  if '\\' === c.value
    glob_index.value += 1
    if glob_index.value >= glob.size
      # Invalid pattern!
      return false
    end
    c.value = case escaped_char = glob[glob_index.value]
              when 'a' then 0x61_u8
              when 'b' then 0x08_u8
              when 'n' then '\n'.ord.to_u8!
              when 'r' then '\r'.ord.to_u8!
              when 't' then '\t'.ord.to_u8!
              else          escaped_char
              end
  end

  true
end

@[AlwaysInline]
private def consume_unicode_character(string, index) : {Char, UInt64}
  reader = Char::Reader.new(string, index)
  {reader.current_char, index + reader.current_char_width}
end

@[AlwaysInline]
private def consume_unicode_glob_character(string, index) : {Char, UInt64}
  c = string.to_unsafe[index]

  if !unescape(pointerof(c), string.to_slice, pointerof(index))
    raise File::BadPatternError.new("Invalid pattern")
  end

  if c < 0x80
    {c.unsafe_chr, index + 1}
  else
    consume_unicode_character(string, index)
  end
end