require "./float_common"
require "./fast_table"

module Float::FastFloat
  # This will compute or rather approximate w * 5**q and return a pair of 64-bit
  # words approximating the result, with the "high" part corresponding to the
  # most significant bits and the low part corresponding to the least significant
  # bits.
  def self.compute_product_approximation(q : Int64, w : UInt64, bit_precision : Int) : Value128
    power_of_five_128 = Powers::POWER_OF_FIVE_128.to_unsafe

    index = 2 &* (q &- Powers::SMALLEST_POWER_OF_FIVE)
    # For small values of q, e.g., q in [0,27], the answer is always exact
    # because The line value128 firstproduct = full_multiplication(w,
    # power_of_five_128[index]); gives the exact answer.
    firstproduct = w.to_u128! &* power_of_five_128[index]

    precision_mask = bit_precision < 64 ? 0xFFFFFFFFFFFFFFFF_u64.unsafe_shr(bit_precision) : 0xFFFFFFFFFFFFFFFF_u64
    if firstproduct.unsafe_shr(64).bits_set?(precision_mask) # could further guard with  (lower + w < lower)
      # regarding the second product, we only need secondproduct.high, but our
      # expectation is that the compiler will optimize this extra work away if
      # needed.
      secondproduct = w.to_u128! &* power_of_five_128[index &+ 1]
      firstproduct &+= secondproduct.unsafe_shr(64)
    end
    Value128.new(firstproduct)
  end

  module Detail
    # For q in (0,350), we have that
    #  f = (((152170 + 65536) * q ) >> 16);
    # is equal to
    #   floor(p) + q
    # where
    #   p = log(5**q)/log(2) = q * log(5)/log(2)
    #
    # For negative values of q in (-400,0), we have that
    #  f = (((152170 + 65536) * q ) >> 16);
    # is equal to
    #   -ceil(p) + q
    # where
    #   p = log(5**-q)/log(2) = -q * log(5)/log(2)
    def self.power(q : Int32) : Int32
      ((152170 &+ 65536) &* q).unsafe_shr(16) &+ 63
    end
  end

  module BinaryFormat(T, EquivUint)
    # create an adjusted mantissa, biased by the invalid power2
    # for significant digits already multiplied by 10 ** q.
    def compute_error_scaled(q : Int64, w : UInt64, lz : Int) : AdjustedMantissa
      hilz = w.unsafe_shr(63).to_i32! ^ 1
      bias = mantissa_explicit_bits &- minimum_exponent

      AdjustedMantissa.new(
        mantissa: w.unsafe_shl(hilz),
        power2: Detail.power(q.to_i32!) &+ bias &- hilz &- lz &- 62 &+ INVALID_AM_BIAS,
      )
    end

    # w * 10 ** q, without rounding the representation up.
    # the power2 in the exponent will be adjusted by invalid_am_bias.
    def compute_error(q : Int64, w : UInt64) : AdjustedMantissa
      lz = w.leading_zeros_count.to_i32!
      w = w.unsafe_shl(lz)
      product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3)
      compute_error_scaled(q, product.high, lz)
    end

    # w * 10 ** q
    # The returned value should be a valid ieee64 number that simply need to be
    # packed. However, in some very rare cases, the computation will fail. In such
    # cases, we return an adjusted_mantissa with a negative power of 2: the caller
    # should recompute in such cases.
    def compute_float(q : Int64, w : UInt64) : AdjustedMantissa
      if w == 0 || q < smallest_power_of_ten
        # result should be zero
        return AdjustedMantissa.new(
          power2: 0,
          mantissa: 0,
        )
      end
      if q > largest_power_of_ten
        # we want to get infinity:
        return AdjustedMantissa.new(
          power2: infinite_power,
          mantissa: 0,
        )
      end
      # At this point in time q is in [powers::smallest_power_of_five,
      # powers::largest_power_of_five].

      # We want the most significant bit of i to be 1. Shift if needed.
      lz = w.leading_zeros_count
      w = w.unsafe_shl(lz)

      # The required precision is binary::mantissa_explicit_bits() + 3 because
      # 1. We need the implicit bit
      # 2. We need an extra bit for rounding purposes
      # 3. We might lose a bit due to the "upperbit" routine (result too small,
      # requiring a shift)

      product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3)
      # The computed 'product' is always sufficient.
      # Mathematical proof:
      # Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to
      # appear) See script/mushtak_lemire.py

      # The "compute_product_approximation" function can be slightly slower than a
      # branchless approach: value128 product = compute_product(q, w); but in
      # practice, we can win big with the compute_product_approximation if its
      # additional branch is easily predicted. Which is best is data specific.
      upperbit = product.high.unsafe_shr(63).to_i32!
      shift = upperbit &+ 64 &- mantissa_explicit_bits &- 3

      mantissa = product.high.unsafe_shr(shift)

      power2 = (Detail.power(q.to_i32!) &+ upperbit &- lz &- minimum_exponent).to_i32!
      if power2 <= 0 # we have a subnormal?
        # Here have that answer.power2 <= 0 so -answer.power2 >= 0
        if 1 &- power2 >= 64 # if we have more than 64 bits below the minimum exponent, you have a zero for sure.
          # result should be zero
          return AdjustedMantissa.new(
            power2: 0,
            mantissa: 0,
          )
        end
        # next line is safe because -answer.power2 + 1 < 64
        mantissa = mantissa.unsafe_shr(1 &- power2)
        # Thankfully, we can't have both "round-to-even" and subnormals because
        # "round-to-even" only occurs for powers close to 0.
        mantissa &+= mantissa & 1
        mantissa = mantissa.unsafe_shr(1)
        # There is a weird scenario where we don't have a subnormal but just.
        # Suppose we start with 2.2250738585072013e-308, we end up
        # with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
        # whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
        # up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
        # subnormal, but we can only know this after rounding.
        # So we only declare a subnormal if we are smaller than the threshold.
        power2 = mantissa < 1_u64.unsafe_shl(mantissa_explicit_bits) ? 0 : 1
        return AdjustedMantissa.new(power2: power2, mantissa: mantissa)
      end

      # usually, we round *up*, but if we fall right in between and and we have an
      # even basis, we need to round down
      # We are only concerned with the cases where 5**q fits in single 64-bit word.
      if product.low <= 1 && q >= min_exponent_round_to_even && q <= max_exponent_round_to_even && mantissa & 3 == 1
        # we may fall between two floats!
        # To be in-between two floats we need that in doing
        #   answer.mantissa = product.high >> (upperbit + 64 -
        #   binary::mantissa_explicit_bits() - 3);
        # ... we dropped out only zeroes. But if this happened, then we can go
        # back!!!
        if mantissa.unsafe_shl(shift) == product.high
          mantissa &= ~1_u64 # flip it so that we do not round up
        end
      end

      mantissa &+= mantissa & 1 # round up
      mantissa = mantissa.unsafe_shr(1)
      if mantissa >= 2_u64.unsafe_shl(mantissa_explicit_bits)
        mantissa = 1_u64.unsafe_shl(mantissa_explicit_bits)
        power2 &+= 1 # undo previous addition
      end

      mantissa &= ~(1_u64.unsafe_shl(mantissa_explicit_bits))
      if power2 >= infinite_power # infinity
        return AdjustedMantissa.new(
          power2: infinite_power,
          mantissa: 0,
        )
      end
      AdjustedMantissa.new(power2: power2, mantissa: mantissa)
    end
  end
end