# The `Base64` module provides for the encoding (`encode`, `strict_encode`,
# `urlsafe_encode`) and decoding (`decode`)
# of binary data using a base64 representation.
#
# ### Example
#
# A simple encoding and decoding.
#
# ```
# require "base64"
#
# enc = Base64.encode("Send reinforcements") # => "U2VuZCByZWluZm9yY2VtZW50cw==\n"
# plain = Base64.decode_string(enc)          # => "Send reinforcements"
# ```
#
# The purpose of using base64 to encode data is that it translates any binary
# data into purely printable characters.
module Base64
  extend self

  class Error < Exception; end

  private CHARS_STD  = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  private CHARS_SAFE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
  private LINE_SIZE  = 60
  private PAD        = '='.ord.to_u8
  private NL         = '\n'.ord.to_u8
  private NR         = '\r'.ord.to_u8

  # Returns the base64-encoded version of *data*.
  # This method complies with [RFC 2045](https://tools.ietf.org/html/rfc2045).
  # Line feeds are added to every 60 encoded characters.
  #
  # ```
  # puts Base64.encode("Now is the time for all good coders\nto learn Crystal")
  # ```
  #
  # Generates:
  #
  # ```text
  # Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g
  # Q3J5c3RhbA==
  # ```
  def encode(data) : String
    slice = data.to_slice
    String.new(encode_size(slice.size, new_lines: true)) do |buf|
      appender = buf.appender
      encode_with_new_lines(slice) { |byte| appender << byte }
      size = appender.size
      {size, size}
    end
  end

  # Writes the base64-encoded version of *data* to *io*.
  # This method complies with [RFC 2045](https://tools.ietf.org/html/rfc2045).
  # Line feeds are added to every 60 encoded characters.
  #
  # ```
  # Base64.encode("Now is the time for all good coders\nto learn Crystal", STDOUT)
  # ```
  def encode(data, io : IO)
    count = 0
    encode_with_new_lines(data.to_slice) do |byte|
      io << byte.unsafe_chr
      count += 1
    end
    io.flush
    count
  end

  private def encode_with_new_lines(data, &)
    inc = 0
    to_base64(data.to_slice, CHARS_STD, pad: true) do |byte|
      yield byte
      inc += 1
      if inc >= LINE_SIZE
        yield NL
        inc = 0
      end
    end
    if inc > 0
      yield NL
    end
  end

  # Returns the base64-encoded version of *data* with no newlines.
  # This method complies with [RFC 4648](https://tools.ietf.org/html/rfc4648).
  #
  # ```
  # puts Base64.strict_encode("Now is the time for all good coders\nto learn Crystal")
  # ```
  #
  # Generates:
  #
  # ```text
  # Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4gQ3J5c3RhbA==
  # ```
  def strict_encode(data) : String
    strict_encode data, CHARS_STD, pad: true
  end

  private def strict_encode(data, alphabet, pad = false)
    slice = data.to_slice
    String.new(encode_size(slice.size)) do |buf|
      appender = buf.appender
      to_base64(slice, alphabet, pad: pad) { |byte| appender << byte }
      size = appender.size
      {size, size}
    end
  end

  # Writes the base64-encoded version of *data* with no newlines to *io*.
  # This method complies with [RFC 4648](https://tools.ietf.org/html/rfc4648).
  #
  # ```
  # Base64.strict_encode("Now is the time for all good coders\nto learn Crystal", STDOUT)
  # ```
  def strict_encode(data, io : IO)
    strict_encode_to_io_internal(data, io, CHARS_STD, pad: true)
  end

  private def strict_encode_to_io_internal(data, io, alphabet, pad)
    count = 0
    to_base64(data.to_slice, alphabet, pad: pad) do |byte|
      count += 1
      io << byte.unsafe_chr
    end
    io.flush
    count
  end

  # Returns the base64-encoded version of *data* using a urlsafe alphabet.
  # This method complies with "Base 64 Encoding with URL and Filename Safe
  # Alphabet" in [RFC 4648](https://tools.ietf.org/html/rfc4648).
  #
  # The alphabet uses `'-'` instead of `'+'` and `'_'` instead of `'/'`.
  #
  # The *padding* parameter defaults to `true`. When `false`, enough `=` characters
  # are not added to make the output divisible by 4.
  def urlsafe_encode(data, padding = true) : String
    slice = data.to_slice
    String.new(encode_size(slice.size)) do |buf|
      appender = buf.appender
      to_base64(slice, CHARS_SAFE, pad: padding) { |byte| appender << byte }
      size = appender.size
      {size, size}
    end
  end

  # Writes the base64-encoded version of *data* using a urlsafe alphabet to *io*.
  # This method complies with "Base 64 Encoding with URL and Filename Safe
  # Alphabet" in [RFC 4648](https://tools.ietf.org/html/rfc4648).
  #
  # The alphabet uses `'-'` instead of `'+'` and `'_'` instead of `'/'`.
  def urlsafe_encode(data, io : IO)
    strict_encode_to_io_internal(data, io, CHARS_SAFE, pad: true)
  end

  # Returns the base64-decoded version of *data* as a `Bytes`.
  # This will decode either the normal or urlsafe alphabets.
  def decode(data) : Bytes
    slice = data.to_slice
    buf = Pointer(UInt8).malloc(decode_size(slice.size))
    appender = buf.appender
    from_base64(slice) { |byte| appender << byte }
    appender.to_slice
  end

  # Writes the base64-decoded version of *data* to *io*.
  # This will decode either the normal or urlsafe alphabets.
  def decode(data, io : IO)
    count = 0
    from_base64(data.to_slice) do |byte|
      io.write_byte byte
      count += 1
    end
    io.flush
    count
  end

  # Returns the base64-decoded version of *data* as a string.
  # This will decode either the normal or urlsafe alphabets.
  def decode_string(data) : String
    slice = data.to_slice
    String.new(decode_size(slice.size)) do |buf|
      appender = buf.appender
      from_base64(slice) { |byte| appender << byte }
      {appender.size, 0}
    end
  end

  private def encode_size(str_size, new_lines = false)
    size = (str_size * 4 / 3.0).to_i + 4
    size += size // LINE_SIZE if new_lines
    size
  end

  private def decode_size(str_size)
    (str_size * 3 / 4.0).to_i + 4
  end

  private def to_base64(data, chars, pad = false, &)
    bytes = chars.to_unsafe
    size = data.size
    cstr = data.to_unsafe
    return if cstr.null? || size == 0
    endcstr = cstr + size - size % 3 - 3

    # process bunch of full triples
    while cstr < endcstr
      n = cstr.as(UInt32*).value.byte_swap
      yield bytes[(n >> 26) & 63]
      yield bytes[(n >> 20) & 63]
      yield bytes[(n >> 14) & 63]
      yield bytes[(n >> 8) & 63]
      cstr += 3
    end

    # process last full triple manually, because reading UInt32 not correct for guarded memory
    if size >= 3
      n = (cstr.value.to_u32 << 16) | ((cstr + 1).value.to_u32 << 8) | (cstr + 2).value
      yield bytes[(n >> 18) & 63]
      yield bytes[(n >> 12) & 63]
      yield bytes[(n >> 6) & 63]
      yield bytes[(n) & 63]
      cstr += 3
    end

    # process last partial triple
    pd = size % 3
    if pd == 1
      n = (cstr.value.to_u32 << 16)
      yield bytes[(n >> 18) & 63]
      yield bytes[(n >> 12) & 63]
      if pad
        yield PAD
        yield PAD
      end
    elsif pd == 2
      n = (cstr.value.to_u32 << 16) | ((cstr + 1).value.to_u32 << 8)
      yield bytes[(n >> 18) & 63]
      yield bytes[(n >> 12) & 63]
      yield bytes[(n >> 6) & 63]
      yield PAD if pad
    end
  end

  # Processes the given data and yields each byte.
  private def from_base64(data : Bytes, &block : UInt8 -> Nil)
    size = data.size
    bytes = data.to_unsafe
    bytes_begin = bytes

    # Get the position of the last valid base64 character (rstrip '\n', '\r' and '=')
    while (size > 0) && (sym = bytes[size - 1]) && sym.in?(NL, NR, PAD)
      size -= 1
    end

    # Process combinations of four characters until there aren't any left
    fin = bytes + size - 4
    while true
      break if bytes > fin

      # Move the pointer by one byte until there is a valid base64 character
      while bytes.value.in?(NL, NR)
        bytes += 1
      end
      break if bytes > fin

      yield_decoded_chunk_bytes(bytes[0], bytes[1], bytes[2], bytes[3], chunk_pos: bytes - bytes_begin)
      bytes += 4
    end

    # Move the pointer by one byte until there is a valid base64 character or the end of `bytes` was reached
    while (bytes < fin + 4) && bytes.value.in?(NL, NR)
      bytes += 1
    end

    # If the amount of base64 characters is not divisible by 4, the remainder of the previous loop is handled here
    unread_bytes = (fin - bytes) % 4
    case unread_bytes
    when 1
      raise Base64::Error.new("Wrong size")
    when 2
      yield_decoded_chunk_bytes(bytes[0], bytes[1], chunk_pos: bytes - bytes_begin)
    when 3
      yield_decoded_chunk_bytes(bytes[0], bytes[1], bytes[2], chunk_pos: bytes - bytes_begin)
    end
  end

  # This macro decodes the given chunk of (2-4) base64 characters.
  # The argument chunk_pos is only used for the resulting error message.
  # The resulting bytes are then each yielded.
  private macro yield_decoded_chunk_bytes(*bytes, chunk_pos)
    %buffer = 0_u32
    {% for byte, i in bytes %}
      %decoded = DECODE_TABLE.unsafe_fetch({{byte}})
      %buffer = (%buffer << 6) + %decoded
      raise Base64::Error.new("Unexpected byte 0x#{{{byte}}.to_s(16)} at #{{{chunk_pos}} + {{i}}}") if %decoded == 255_u8
    {% end %}

    # Each byte in the buffer is shifted to rightmost position of the buffer, then casted to a UInt8
    {% for i in 2..(bytes.size) %}
      yield (%buffer >> {{ (4 - bytes.size) * 2 + (8 * (bytes.size - i)) }}).to_u8!
    {% end %}
  end

  private DECODE_TABLE = Array(UInt8).new(size: 256) do |i|
    case i.unsafe_chr
    when 'A'..'Z' then (i - 0x41).to_u8!
    when 'a'..'z' then (i - 0x47).to_u8!
    when '0'..'9' then (i + 0x04).to_u8!
    when '+', '-' then 0x3E_u8
    when '/', '_' then 0x3F_u8
    else               255_u8
    end
  end
end