Jump to content
Sign in to follow this  
JimKueneman

Float 16

Recommended Posts

Sorry guys but as I try to understand JavaScript and SMS I will be asking some dumb questions in the near future....  How would I handle this code in SMS?  It uses some pointer tricks to force the bits calculated in one type into another type without the compiler doing a "conversion" in the background..... Thanks Jim 

unit lcc_math_float16;

//{$IFDEF FPC}
//{$mode objfpc}{$H+}
//{$ENDIF}


interface

//{$I lcc_compilers.inc}

{$IFDEF DWSCRIPT}
uses
  System.Types, SmartCL.System, SmartCL.Components;
{$ELSE}
uses
  Classes, SysUtils;
{$ENDIF}

{$IFDEF DWSCRIPT}
type
  THalfFloat = Word;
{$ELSE}
type
  THalfFloat = type Word;
{$ENDIF}

const
  HalfMin:     Single = 5.96046448e-08; // Smallest positive half
  HalfMinNorm: Single = 6.10351562e-05; // Smallest positive normalized half
  HalfMax:     Single = 65504.0;        // Largest positive half
  // Smallest positive e for which half (1.0 + e) != half (1.0)
  HalfEpsilon: Single = 0.00097656;
  HalfNaN:     THalfFloat = 65535;
  HalfPosInf:  THalfFloat = 31744;
  HalfNegInf:  THalfFloat = 64512;

  function FloatToHalf(Float: Single): THalfFloat;
  function HalfToFloat(Half: THalfFloat): Single;

implementation

{$IFDEF DWSCRIPT}
type
  LongInt = Integer;
{$ENDIF}

function HalfToFloat(Half: THalfFloat): Single;
var
  Dst, Sign, Mantissa: LongWord;
  Exp: LongInt;
begin
  // Extract sign, exponent, and mantissa from half number
  Sign := Half shr 15;
  Exp := (Half and $7C00) shr 10;
  Mantissa := Half and 1023;

  if (Exp > 0) and (Exp < 31) then
  begin
    // Common normalized number
    Exp := Exp + (127 - 15);
    Mantissa := Mantissa shl 13;
    Dst := (Sign shl 31) or (LongWord(Exp) shl 23) or Mantissa;
    // Result := Power(-1, Sign) * Power(2, Exp - 15) * (1 + Mantissa / 1024);
  end
  else if (Exp = 0) and (Mantissa = 0) then
  begin
    // Zero - preserve sign
    Dst := Sign shl 31;
  end
  else if (Exp = 0) and (Mantissa <> 0) then
  begin
    // Denormalized number - renormalize it
    while (Mantissa and $00000400) = 0 do
    begin
      Mantissa := Mantissa shl 1;
      Dec(Exp);
    end;
    Inc(Exp);
    Mantissa := Mantissa and not $00000400;
    // Now assemble normalized number
    Exp := Exp + (127 - 15);
    Mantissa := Mantissa shl 13;
    Dst := (Sign shl 31) or (LongWord(Exp) shl 23) or Mantissa;
    // Result := Power(-1, Sign) * Power(2, -14) * (Mantissa / 1024);
  end
  else if (Exp = 31) and (Mantissa = 0) then
  begin
    // +/- infinity
    Dst := (Sign shl 31) or $7F800000;
  end
  else //if (Exp = 31) and (Mantisa <> 0) then
  begin
    // Not a number - preserve sign and mantissa
    Dst := (Sign shl 31) or $7F800000 or (Mantissa shl 13);
  end;

  // Reinterpret LongWord as Single
 >>>>> HOW TO DO THIS?   Result := PSingle(@Dst)^;
end;

function FloatToHalf(Float: Single): THalfFloat;
var
  Src: LongWord;
  Sign, Exp, Mantissa: LongInt;
begin

 >>>>> HOW TO DO THIS?    Src := PLongWord(@Float)^;

  // Extract sign, exponent, and mantissa from Single number
  Sign := Src shr 31;
  Exp := LongInt((Src and $7F800000) shr 23) - 127 + 15;
  Mantissa := Src and $007FFFFF;

  if (Exp > 0) and (Exp < 30) then
  begin
    // Simple case - round the significand and combine it with the sign and exponent
    Result := (Sign shl 15) or (Exp shl 10) or ((Mantissa + $00001000) shr 13);
  end
  else if Mantissa = 0 then
  begin
    // Input float is zero - return zero
    if Sign = 1 then
      Result := $8000
    else
      Result := $0000;
  end
  else
  begin
    // Difficult case - lengthy conversion
    if Exp <= 0 then
    begin
      if Exp < -10 then
      begin
        // Input float's value is less than HalfMin, return zero
         Result := 0;
      end
      else
      begin
        // Float is a normalized Single whose magnitude is less than HalfNormMin.
        // We convert it to denormalized half.
        Mantissa := (Mantissa or $00800000) shr (1 - Exp);
        // Round to nearest
        if (Mantissa and $00001000) > 0 then
          Mantissa := Mantissa + $00002000;
        // Assemble Sign and Mantissa (Exp is zero to get denormalized number)
        Result := (Sign shl 15) or (Mantissa shr 13);
      end;
    end
    else if Exp = 255 - 127 + 15 then
    begin
      if Mantissa = 0 then
      begin
        // Input float is infinity, create infinity half with original sign
        Result := (Sign shl 15) or $7C00;
      end
      else
      begin
        // Input float is NaN, create half NaN with original sign and mantissa
        Result := (Sign shl 15) or $7C00 or (Mantissa shr 13);
      end;
    end
    else
    begin
      // Exp is > 0 so input float is normalized Single

      // Round to nearest
      if (Mantissa and $00001000) > 0 then
      begin
        Mantissa := Mantissa + $00002000;
        if (Mantissa and $00800000) > 0 then
        begin
          Mantissa := 0;
          Exp := Exp + 1;
        end;
      end;

      if Exp > 30 then
      begin
        // Exponent overflow - return infinity half
        Result := (Sign shl 15) or $7C00;
      end
      else
        // Assemble normalized half
        Result := (Sign shl 15) or (Exp shl 10) or (Mantissa shr 13);
    end;
  end;
end;

end.

 

Share this post


Link to post
Share on other sites

Well, I did some tests to see how I could convert a 4 byte integer to memory and back to a 4 byte integer and 4 byte float:

uses
  ... System.Types.Convert, System.Memory, System.Memory.Buffer;

...

  var Number: Integer:=1024;
  var MHandle:=TDataType.Int32ToTypedArray(Number);
  //At this point MHandle is a Uint8Array of 4 bytes

  var Address:=TAddress.Create(MHandle,0);
  WriteLn(Address.Size); //Yep, 4 bytes

  //Convert back to a 32 bit integer.
  var Number2:=TDataType.BytesToInt32(TMarshal.ReadMemory(Address,4));
  WriteLn(Number2); //Yep, same 1024

  //Convert to a 32 bit float
  var Test:=TDataType.BytesToFloat32(TMarshal.ReadMemory(Address,4));
  WriteLn(Test); //Returns 1.4349296274686127e-42

 

Share this post


Link to post
Share on other sites

And here's a test of allocating a 8 byte buffer, change byte values in it and convert it to a 64 bit float:

  //Allocate 8 bytes
  var Addr8:=TMarshal.Allocmem(8);
  Writeln(Addr8.Size); //8 bytes
  Addr8.Segment[0]:=255;
  Addr8.Segment[1]:=56;
  Addr8.Segment[2]:=111;
  Addr8.Segment[3]:=10;
  Addr8.Segment[4]:=20;
  Addr8.Segment[5]:=30;
  Addr8.Segment[6]:=40;
  Addr8.Segment[7]:=50;
  WriteLn('Manipulation done');
  WriteLn(TDataType.BytesToFloat64(TMarshal.ReadMemory(Addr8,8)));
  TMarshal.FreeMem(Addr8);

If you want to see the memory contents in Chrome's developer tools, here's a neat trick:

  • Compile and Execute
  • Open DevTools, view source and search for "Manipulation"
  • Put a breakpoint on the WriteLn-line
  • Run again

image.png

Share this post


Link to post
Share on other sites

And here's some testing by using TMemoryStream:

  var MemStream:=TMemoryStream.Create;
  //Write 4 byte integer and two bytes
  MemStream.Write(TDataType.Int32ToBytes(1024));
  MemStream.Write([Byte(255)]);
  MemStream.Write([Byte(128)]);
  WriteLn('Stream_written');
  WriteLn(MemStream.Size); //Should be 6

  //Read the byte after the first long
  MemStream.Position:=4;
  var Bytes:=MemStream.Read(1); //Read a byte to TByteArray
  var Value:=Bytes[0];
  WriteLn(IntToStr(Value)+' == 255');

  //Read the first 4 byte integer
  MemStream.Position:=0;
  Bytes:=MemStream.Read(4);
  Value:=TDataType.BytesToInt32(Bytes);
  WriteLn(IntToStr(Value)+' == 1024');

 

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
Sign in to follow this  

×