RGB Matrix Overhaul (#5372)

* RGB Matrix overhaul Breakout of animations to separate files Integration of optimized int based math lib Overhaul of rgb_matrix.c and animations for performance * Updating effect function api for future extensions * Combined the keypresses || keyreleases define checks into a single define so I stop forgetting it where necessary * Moving define RGB_MATRIX_KEYREACTIVE_ENABLED earlier in the include chain
author: XScorpion2 <rcalt2vt@gmail.com> 2019-04-02 19:24:14 -0500
committer: Drashna Jaelre <drashna@live.com> 2019-04-02 17:24:14 -0700
commit: c98247e3dd2958bd2d8969dc75170e7e2757b895 (patch)
tree: a566de223a9501809e1059c522b52adf7d37fe74 /lib
parent: 68d8bb2b3fb8a35fda164539d27754b3f74e0819 (diff)
7 files changed, 2643 insertions, 0 deletions
diff --git a/lib/lib8tion/LICENSE b/lib/lib8tion/LICENSE
new file mode 100644
index 0000000000..ebe476330b
--- /dev/null
+++ b/lib/lib8tion/LICENSE
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 FastLED
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/lib8tion/lib8tion.c b/lib/lib8tion/lib8tion.c
new file mode 100644
index 0000000000..84b3e9c61c
--- /dev/null
+++ b/lib/lib8tion/lib8tion.c
@@ -0,0 +1,242 @@
+#define FASTLED_INTERNAL
+#include <stdint.h>
+
+#define RAND16_SEED  1337
+uint16_t rand16seed = RAND16_SEED;
+
+
+// memset8, memcpy8, memmove8:
+//  optimized avr replacements for the standard "C" library
+//  routines memset, memcpy, and memmove.
+//
+//  There are two techniques that make these routines
+//  faster than the standard avr-libc routines.
+//  First, the loops are unrolled 2X, meaning that
+//  the average loop overhead is cut in half.
+//  And second, the compare-and-branch at the bottom
+//  of each loop decrements the low byte of the
+//  counter, and if the carry is clear, it branches
+//  back up immediately.  Only if the low byte math
+//  causes carry do we bother to decrement the high
+//  byte and check that result for carry as well.
+//  Results for a 100-byte buffer are 20-40% faster
+//  than standard avr-libc, at a cost of a few extra
+//  bytes of code.
+
+#if defined(__AVR__)
+//__attribute__ ((noinline))
+void * memset8 ( void * ptr, uint8_t val, uint16_t num )
+{
+    asm volatile(
+         "  movw r26, %[ptr]        \n\t"
+         "  sbrs %A[num], 0         \n\t"
+         "  rjmp Lseteven_%=        \n\t"
+         "  rjmp Lsetodd_%=         \n\t"
+         "Lsetloop_%=:              \n\t"
+         "  st X+, %[val]           \n\t"
+         "Lsetodd_%=:               \n\t"
+         "  st X+, %[val]           \n\t"
+         "Lseteven_%=:              \n\t"
+         "  subi %A[num], 2         \n\t"
+         "  brcc Lsetloop_%=        \n\t"
+         "  sbci %B[num], 0         \n\t"
+         "  brcc Lsetloop_%=        \n\t"
+         : [num] "+r" (num)
+         : [ptr]  "r" (ptr),
+           [val]  "r" (val)
+         : "memory"
+         );
+    return ptr;
+}
+
+
+
+//__attribute__ ((noinline))
+void * memcpy8 ( void * dst, const void* src, uint16_t num )
+{
+    asm volatile(
+         "  movw r30, %[src]        \n\t"
+         "  movw r26, %[dst]        \n\t"
+         "  sbrs %A[num], 0         \n\t"
+         "  rjmp Lcpyeven_%=        \n\t"
+         "  rjmp Lcpyodd_%=         \n\t"
+         "Lcpyloop_%=:              \n\t"
+         "  ld __tmp_reg__, Z+      \n\t"
+         "  st X+, __tmp_reg__      \n\t"
+         "Lcpyodd_%=:               \n\t"
+         "  ld __tmp_reg__, Z+      \n\t"
+         "  st X+, __tmp_reg__      \n\t"
+         "Lcpyeven_%=:              \n\t"
+         "  subi %A[num], 2         \n\t"
+         "  brcc Lcpyloop_%=        \n\t"
+         "  sbci %B[num], 0         \n\t"
+         "  brcc Lcpyloop_%=        \n\t"
+         : [num] "+r" (num)
+         : [src] "r" (src),
+           [dst] "r" (dst)
+         : "memory"
+         );
+    return dst;
+}
+
+//__attribute__ ((noinline))
+void * memmove8 ( void * dst, const void* src, uint16_t num )
+{
+    if( src > dst) {
+        // if src > dst then we can use the forward-stepping memcpy8
+        return memcpy8( dst, src, num);
+    } else {
+        // if src < dst then we have to step backward:
+        dst = (char*)dst + num;
+        src = (char*)src + num;
+        asm volatile(
+             "  movw r30, %[src]        \n\t"
+             "  movw r26, %[dst]        \n\t"
+             "  sbrs %A[num], 0         \n\t"
+             "  rjmp Lmoveven_%=        \n\t"
+             "  rjmp Lmovodd_%=         \n\t"
+             "Lmovloop_%=:              \n\t"
+             "  ld __tmp_reg__, -Z      \n\t"
+             "  st -X, __tmp_reg__      \n\t"
+             "Lmovodd_%=:               \n\t"
+             "  ld __tmp_reg__, -Z      \n\t"
+             "  st -X, __tmp_reg__      \n\t"
+             "Lmoveven_%=:              \n\t"
+             "  subi %A[num], 2         \n\t"
+             "  brcc Lmovloop_%=        \n\t"
+             "  sbci %B[num], 0         \n\t"
+             "  brcc Lmovloop_%=        \n\t"
+             : [num] "+r" (num)
+             : [src] "r" (src),
+               [dst] "r" (dst)
+             : "memory"
+             );
+        return dst;
+    }
+}
+
+#endif /* AVR */
+
+
+
+
+#if 0
+// TEST / VERIFICATION CODE ONLY BELOW THIS POINT
+#include <Arduino.h>
+#include "lib8tion.h"
+
+void test1abs( int8_t i)
+{
+    Serial.print("abs("); Serial.print(i); Serial.print(") = ");
+    int8_t j = abs8(i);
+    Serial.print(j); Serial.println(" ");
+}
+
+void testabs()
+{
+    delay(5000);
+    for( int8_t q = -128; q != 127; q++) {
+        test1abs(q);
+    }
+    for(;;){};
+}
+
+
+void testmul8()
+{
+    delay(5000);
+    byte r, c;
+
+    Serial.println("mul8:");
+    for( r = 0; r <= 20; r += 1) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 20; c += 1) {
+            byte t;
+            t = mul8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+
+
+void testscale8()
+{
+    delay(5000);
+    byte r, c;
+
+    Serial.println("scale8:");
+    for( r = 0; r <= 240; r += 10) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 240; c += 10) {
+            byte t;
+            t = scale8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+
+    Serial.println(' ');
+    Serial.println("scale8_video:");
+
+    for( r = 0; r <= 100; r += 4) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 100; c += 4) {
+            byte t;
+            t = scale8_video( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+
+    Serial.println("done.");
+    for(;;){};
+}
+
+
+
+void testqadd8()
+{
+    delay(5000);
+    byte r, c;
+    for( r = 0; r <= 240; r += 10) {
+        Serial.print(r); Serial.print(" : ");
+        for( c = 0; c <= 240; c += 10) {
+            byte t;
+            t = qadd8( r, c);
+            Serial.print(t); Serial.print(' ');
+        }
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+
+void testnscale8x3()
+{
+    delay(5000);
+    byte r, g, b, sc;
+    for( byte z = 0; z < 10; z++) {
+        r = random8(); g = random8(); b = random8(); sc = random8();
+
+        Serial.print("nscale8x3_video( ");
+        Serial.print(r); Serial.print(", ");
+        Serial.print(g); Serial.print(", ");
+        Serial.print(b); Serial.print(", ");
+        Serial.print(sc); Serial.print(") = [ ");
+
+        nscale8x3_video( r, g, b, sc);
+
+        Serial.print(r); Serial.print(", ");
+        Serial.print(g); Serial.print(", ");
+        Serial.print(b); Serial.print("]");
+
+        Serial.println(' ');
+    }
+    Serial.println("done.");
+    for(;;){};
+}
+
+#endif
diff --git a/lib/lib8tion/lib8tion.h b/lib/lib8tion/lib8tion.h
new file mode 100644
index 0000000000..d93c748e6a
--- /dev/null
+++ b/lib/lib8tion/lib8tion.h
@@ -0,0 +1,934 @@
+#ifndef __INC_LIB8TION_H
+#define __INC_LIB8TION_H
+
+/*
+
+ Fast, efficient 8-bit math functions specifically
+ designed for high-performance LED programming.
+
+ Because of the AVR(Arduino) and ARM assembly language
+ implementations provided, using these functions often
+ results in smaller and faster code than the equivalent
+ program using plain "C" arithmetic and logic.
+
+
+ Included are:
+
+
+ - Saturating unsigned 8-bit add and subtract.
+   Instead of wrapping around if an overflow occurs,
+   these routines just 'clamp' the output at a maxumum
+   of 255, or a minimum of 0.  Useful for adding pixel
+   values.  E.g., qadd8( 200, 100) = 255.
+
+     qadd8( i, j) == MIN( (i + j), 0xFF )
+     qsub8( i, j) == MAX( (i - j), 0 )
+
+ - Saturating signed 8-bit ("7-bit") add.
+     qadd7( i, j) == MIN( (i + j), 0x7F)
+
+
+ - Scaling (down) of unsigned 8- and 16- bit values.
+   Scaledown value is specified in 1/256ths.
+     scale8( i, sc) == (i * sc) / 256
+     scale16by8( i, sc) == (i * sc) / 256
+
+   Example: scaling a 0-255 value down into a
+   range from 0-99:
+     downscaled = scale8( originalnumber, 100);
+
+   A special version of scale8 is provided for scaling
+   LED brightness values, to make sure that they don't
+   accidentally scale down to total black at low
+   dimming levels, since that would look wrong:
+     scale8_video( i, sc) = ((i * sc) / 256) +? 1
+
+   Example: reducing an LED brightness by a
+   dimming factor:
+     new_bright = scale8_video( orig_bright, dimming);
+
+
+ - Fast 8- and 16- bit unsigned random numbers.
+   Significantly faster than Arduino random(), but
+   also somewhat less random.  You can add entropy.
+     random8()       == random from 0..255
+     random8( n)     == random from 0..(N-1)
+     random8( n, m)  == random from N..(M-1)
+
+     random16()      == random from 0..65535
+     random16( n)    == random from 0..(N-1)
+     random16( n, m) == random from N..(M-1)
+
+     random16_set_seed( k)    ==  seed = k
+     random16_add_entropy( k) ==  seed += k
+
+
+ - Absolute value of a signed 8-bit value.
+     abs8( i)     == abs( i)
+
+
+ - 8-bit math operations which return 8-bit values.
+   These are provided mostly for completeness,
+   not particularly for performance.
+     mul8( i, j)  == (i * j) & 0xFF
+     add8( i, j)  == (i + j) & 0xFF
+     sub8( i, j)  == (i - j) & 0xFF
+
+
+ - Fast 16-bit approximations of sin and cos.
+   Input angle is a uint16_t from 0-65535.
+   Output is a signed int16_t from -32767 to 32767.
+      sin16( x)  == sin( (x/32768.0) * pi) * 32767
+      cos16( x)  == cos( (x/32768.0) * pi) * 32767
+   Accurate to more than 99% in all cases.
+
+ - Fast 8-bit approximations of sin and cos.
+   Input angle is a uint8_t from 0-255.
+   Output is an UNsigned uint8_t from 0 to 255.
+       sin8( x)  == (sin( (x/128.0) * pi) * 128) + 128
+       cos8( x)  == (cos( (x/128.0) * pi) * 128) + 128
+   Accurate to within about 2%.
+
+
+ - Fast 8-bit "easing in/out" function.
+     ease8InOutCubic(x) == 3(x^i) - 2(x^3)
+     ease8InOutApprox(x) ==
+       faster, rougher, approximation of cubic easing
+     ease8InOutQuad(x) == quadratic (vs cubic) easing
+
+ - Cubic, Quadratic, and Triangle wave functions.
+   Input is a uint8_t representing phase withing the wave,
+     similar to how sin8 takes an angle 'theta'.
+   Output is a uint8_t representing the amplitude of
+     the wave at that point.
+       cubicwave8( x)
+       quadwave8( x)
+       triwave8( x)
+
+ - Square root for 16-bit integers.  About three times
+   faster and five times smaller than Arduino's built-in
+   generic 32-bit sqrt routine.
+     sqrt16( uint16_t x ) == sqrt( x)
+
+ - Dimming and brightening functions for 8-bit
+   light values.
+     dim8_video( x)  == scale8_video( x, x)
+     dim8_raw( x)    == scale8( x, x)
+     dim8_lin( x)    == (x<128) ? ((x+1)/2) : scale8(x,x)
+     brighten8_video( x) == 255 - dim8_video( 255 - x)
+     brighten8_raw( x) == 255 - dim8_raw( 255 - x)
+     brighten8_lin( x) == 255 - dim8_lin( 255 - x)
+   The dimming functions in particular are suitable
+   for making LED light output appear more 'linear'.
+
+
+ - Linear interpolation between two values, with the
+   fraction between them expressed as an 8- or 16-bit
+   fixed point fraction (fract8 or fract16).
+     lerp8by8(   fromU8, toU8, fract8 )
+     lerp16by8(  fromU16, toU16, fract8 )
+     lerp15by8(  fromS16, toS16, fract8 )
+       == from + (( to - from ) * fract8) / 256)
+     lerp16by16( fromU16, toU16, fract16 )
+       == from + (( to - from ) * fract16) / 65536)
+     map8( in, rangeStart, rangeEnd)
+       == map( in, 0, 255, rangeStart, rangeEnd);
+
+ - Optimized memmove, memcpy, and memset, that are
+   faster than standard avr-libc 1.8.
+      memmove8( dest, src,  bytecount)
+      memcpy8(  dest, src,  bytecount)
+      memset8(  buf, value, bytecount)
+
+ - Beat generators which return sine or sawtooth
+   waves in a specified number of Beats Per Minute.
+   Sine wave beat generators can specify a low and
+   high range for the output.  Sawtooth wave beat
+   generators always range 0-255 or 0-65535.
+     beatsin8( BPM, low8, high8)
+         = (sine(beatphase) * (high8-low8)) + low8
+     beatsin16( BPM, low16, high16)
+         = (sine(beatphase) * (high16-low16)) + low16
+     beatsin88( BPM88, low16, high16)
+         = (sine(beatphase) * (high16-low16)) + low16
+     beat8( BPM)  = 8-bit repeating sawtooth wave
+     beat16( BPM) = 16-bit repeating sawtooth wave
+     beat88( BPM88) = 16-bit repeating sawtooth wave
+   BPM is beats per minute in either simple form
+   e.g. 120, or Q8.8 fixed-point form.
+   BPM88 is beats per minute in ONLY Q8.8 fixed-point
+   form.
+
+Lib8tion is pronounced like 'libation': lie-BAY-shun
+
+*/
+
+
+
+#include <stdint.h>
+
+#define LIB8STATIC __attribute__ ((unused)) static inline
+#define LIB8STATIC_ALWAYS_INLINE __attribute__ ((always_inline)) static inline
+
+#if !defined(__AVR__)
+#include <string.h>
+// for memmove, memcpy, and memset if not defined here
+#endif
+
+#if defined(__arm__)
+
+#if defined(FASTLED_TEENSY3)
+// Can use Cortex M4 DSP instructions
+#define QADD8_C 0
+#define QADD7_C 0
+#define QADD8_ARM_DSP_ASM 1
+#define QADD7_ARM_DSP_ASM 1
+#else
+// Generic ARM
+#define QADD8_C 1
+#define QADD7_C 1
+#endif
+
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
+
+
+#elif defined(__AVR__)
+
+// AVR ATmega and friends Arduino
+
+#define QADD8_C 0
+#define QADD7_C 0
+#define QSUB8_C 0
+#define ABS8_C 0
+#define ADD8_C 0
+#define SUB8_C 0
+#define AVG8_C 0
+#define AVG7_C 0
+#define AVG16_C 0
+#define AVG15_C 0
+
+#define QADD8_AVRASM 1
+#define QADD7_AVRASM 1
+#define QSUB8_AVRASM 1
+#define ABS8_AVRASM 1
+#define ADD8_AVRASM 1
+#define SUB8_AVRASM 1
+#define AVG8_AVRASM 1
+#define AVG7_AVRASM 1
+#define AVG16_AVRASM 1
+#define AVG15_AVRASM 1
+
+// Note: these require hardware MUL instruction
+//       -- sorry, ATtiny!
+#if !defined(LIB8_ATTINY)
+#define SCALE8_C 0
+#define SCALE16BY8_C 0
+#define SCALE16_C 0
+#define MUL8_C 0
+#define QMUL8_C 0
+#define EASE8_C 0
+#define BLEND8_C 0
+#define SCALE8_AVRASM 1
+#define SCALE16BY8_AVRASM 1
+#define SCALE16_AVRASM 1
+#define MUL8_AVRASM 1
+#define QMUL8_AVRASM 1
+#define EASE8_AVRASM 1
+#define CLEANUP_R1_AVRASM 1
+#define BLEND8_AVRASM 1
+#else
+// On ATtiny, we just use C implementations
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define EASE8_C 1
+#define BLEND8_C 1
+#define SCALE8_AVRASM 0
+#define SCALE16BY8_AVRASM 0
+#define SCALE16_AVRASM 0
+#define MUL8_AVRASM 0
+#define QMUL8_AVRASM 0
+#define EASE8_AVRASM 0
+#define BLEND8_AVRASM 0
+#endif
+
+#else
+
+// unspecified architecture, so
+// no ASM, everything in C
+#define QADD8_C 1
+#define QADD7_C 1
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
+
+#endif
+
+///@defgroup lib8tion Fast math functions
+///A variety of functions for working with numbers.
+///@{
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// typdefs for fixed-point fractional types.
+//
+// sfract7 should be interpreted as signed 128ths.
+// fract8 should be interpreted as unsigned 256ths.
+// sfract15 should be interpreted as signed 32768ths.
+// fract16 should be interpreted as unsigned 65536ths.
+//
+// Example: if a fract8 has the value "64", that should be interpreted
+//          as 64/256ths, or one-quarter.
+//
+//
+//  fract8   range is 0 to 0.99609375
+//                 in steps of 0.00390625
+//
+//  sfract7  range is -0.9921875 to 0.9921875
+//                 in steps of 0.0078125
+//
+//  fract16  range is 0 to 0.99998474121
+//                 in steps of 0.00001525878
+//
+//  sfract15 range is -0.99996948242 to 0.99996948242
+//                 in steps of 0.00003051757
+//
+
+/// ANSI unsigned short _Fract.  range is 0 to 0.99609375
+///                 in steps of 0.00390625
+typedef uint8_t   fract8;   ///< ANSI: unsigned short _Fract
+
+///  ANSI: signed short _Fract.  range is -0.9921875 to 0.9921875
+///                 in steps of 0.0078125
+typedef int8_t    sfract7;  ///< ANSI: signed   short _Fract
+
+///  ANSI: unsigned _Fract.  range is 0 to 0.99998474121
+///                 in steps of 0.00001525878
+typedef uint16_t  fract16;  ///< ANSI: unsigned       _Fract
+
+///  ANSI: signed _Fract.  range is -0.99996948242 to 0.99996948242
+///                 in steps of 0.00003051757
+typedef int16_t   sfract15; ///< ANSI: signed         _Fract
+
+
+// accumXY types should be interpreted as X bits of integer,
+//         and Y bits of fraction.
+//         E.g., accum88 has 8 bits of int, 8 bits of fraction
+
+typedef uint16_t  accum88;  ///< ANSI: unsigned short _Accum.  8 bits int, 8 bits fraction
+typedef int16_t   saccum78; ///< ANSI: signed   short _Accum.  7 bits int, 8 bits fraction
+typedef uint32_t  accum1616;///< ANSI: signed         _Accum. 16 bits int, 16 bits fraction
+typedef int32_t   saccum1516;///< ANSI: signed         _Accum. 15 bits int, 16 bits fraction
+typedef uint16_t  accum124; ///< no direct ANSI counterpart. 12 bits int, 4 bits fraction
+typedef int32_t   saccum114;///< no direct ANSI counterpart. 1 bit int, 14 bits fraction
+
+
+
+#include "math8.h"
+#include "scale8.h"
+#include "random8.h"
+#include "trig8.h"
+
+///////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// float-to-fixed and fixed-to-float conversions
+//
+// Note that anything involving a 'float' on AVR will be slower.
+
+/// sfract15ToFloat: conversion from sfract15 fixed point to
+///                  IEEE754 32-bit float.
+LIB8STATIC float sfract15ToFloat( sfract15 y)
+{
+    return y / 32768.0;
+}
+
+/// conversion from IEEE754 float in the range (-1,1)
+///                  to 16-bit fixed point.  Note that the extremes of
+///                  one and negative one are NOT representable.  The
+///                  representable range is basically
+LIB8STATIC sfract15 floatToSfract15( float f)
+{
+    return f * 32768.0;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// memmove8, memcpy8, and memset8:
+//   alternatives to memmove, memcpy, and memset that are
+//   faster on AVR than standard avr-libc 1.8
+
+#if defined(__AVR__)
+void * memmove8( void * dst, const void * src, uint16_t num );
+void * memcpy8 ( void * dst, const void * src, uint16_t num )  __attribute__ ((noinline));
+void * memset8 ( void * ptr, uint8_t value, uint16_t num ) __attribute__ ((noinline)) ;
+#else
+// on non-AVR platforms, these names just call standard libc.
+#define memmove8 memmove
+#define memcpy8 memcpy
+#define memset8 memset
+#endif
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// linear interpolation, such as could be used for Perlin noise, etc.
+//
+
+// A note on the structure of the lerp functions:
+// The cases for b>a and b<=a are handled separately for
+// speed: without knowing the relative order of a and b,
+// the value (a-b) might be overflow the width of a or b,
+// and have to be promoted to a wider, slower type.
+// To avoid that, we separate the two cases, and are able
+// to do all the math in the same width as the arguments,
+// which is much faster and smaller on AVR.
+
+/// linear interpolation between two unsigned 8-bit values,
+/// with 8-bit fraction
+LIB8STATIC uint8_t lerp8by8( uint8_t a, uint8_t b, fract8 frac)
+{
+    uint8_t result;
+    if( b > a) {
+        uint8_t delta = b - a;
+        uint8_t scaled = scale8( delta, frac);
+        result = a + scaled;
+    } else {
+        uint8_t delta = a - b;
+        uint8_t scaled = scale8( delta, frac);
+        result = a - scaled;
+    }
+    return result;
+}
+
+/// linear interpolation between two unsigned 16-bit values,
+/// with 16-bit fraction
+LIB8STATIC uint16_t lerp16by16( uint16_t a, uint16_t b, fract16 frac)
+{
+    uint16_t result;
+    if( b > a ) {
+        uint16_t delta = b - a;
+        uint16_t scaled = scale16(delta, frac);
+        result = a + scaled;
+    } else {
+        uint16_t delta = a - b;
+        uint16_t scaled = scale16( delta, frac);
+        result = a - scaled;
+    }
+    return result;
+}
+
+/// linear interpolation between two unsigned 16-bit values,
+/// with 8-bit fraction
+LIB8STATIC uint16_t lerp16by8( uint16_t a, uint16_t b, fract8 frac)
+{
+    uint16_t result;
+    if( b > a) {
+        uint16_t delta = b - a;
+        uint16_t scaled = scale16by8( delta, frac);
+        result = a + scaled;
+    } else {
+        uint16_t delta = a - b;
+        uint16_t scaled = scale16by8( delta, frac);
+        result = a - scaled;
+    }
+    return result;
+}
+
+/// linear interpolation between two signed 15-bit values,
+/// with 8-bit fraction
+LIB8STATIC int16_t lerp15by8( int16_t a, int16_t b, fract8 frac)
+{
+    int16_t result;
+    if( b > a) {
+        uint16_t delta = b - a;
+        uint16_t scaled = scale16by8( delta, frac);
+        result = a + scaled;
+    } else {
+        uint16_t delta = a - b;
+        uint16_t scaled = scale16by8( delta, frac);
+        result = a - scaled;
+    }
+    return result;
+}
+
+/// linear interpolation between two signed 15-bit values,
+/// with 8-bit fraction
+LIB8STATIC int16_t lerp15by16( int16_t a, int16_t b, fract16 frac)
+{
+    int16_t result;
+    if( b > a) {
+        uint16_t delta = b - a;
+        uint16_t scaled = scale16( delta, frac);
+        result = a + scaled;
+    } else {
+        uint16_t delta = a - b;
+        uint16_t scaled = scale16( delta, frac);
+        result = a - scaled;
+    }
+    return result;
+}
+
+///  map8: map from one full-range 8-bit value into a narrower
+/// range of 8-bit values, possibly a range of hues.
+///
+/// E.g. map myValue into a hue in the range blue..purple..pink..red
+/// hue = map8( myValue, HUE_BLUE, HUE_RED);
+///
+/// Combines nicely with the waveform functions (like sin8, etc)
+/// to produce continuous hue gradients back and forth:
+///
+///          hue = map8( sin8( myValue), HUE_BLUE, HUE_RED);
+///
+/// Mathematically simiar to lerp8by8, but arguments are more
+/// like Arduino's "map"; this function is similar to
+///
+///          map( in, 0, 255, rangeStart, rangeEnd)
+///
+/// but faster and specifically designed for 8-bit values.
+LIB8STATIC uint8_t map8( uint8_t in, uint8_t rangeStart, uint8_t rangeEnd)
+{
+    uint8_t rangeWidth = rangeEnd - rangeStart;
+    uint8_t out = scale8( in, rangeWidth);
+    out += rangeStart;
+    return out;
+}
+
+
+///////////////////////////////////////////////////////////////////////
+//
+// easing functions; see http://easings.net
+//
+
+/// ease8InOutQuad: 8-bit quadratic ease-in / ease-out function
+///                Takes around 13 cycles on AVR
+#if EASE8_C == 1
+LIB8STATIC uint8_t ease8InOutQuad( uint8_t i)
+{
+    uint8_t j = i;
+    if( j & 0x80 ) {
+        j = 255 - j;
+    }
+    uint8_t jj  = scale8(  j, j);
+    uint8_t jj2 = jj << 1;
+    if( i & 0x80 ) {
+        jj2 = 255 - jj2;
+    }
+    return jj2;
+}
+
+#elif EASE8_AVRASM == 1
+// This AVR asm version of ease8InOutQuad preserves one more
+// low-bit of precision than the C version, and is also slightly
+// smaller and faster.
+LIB8STATIC uint8_t ease8InOutQuad(uint8_t val) {
+    uint8_t j=val;
+    asm volatile (
+      "sbrc %[val], 7 \n"
+      "com %[j]       \n"
+      "mul %[j], %[j] \n"
+      "add r0, %[j]   \n"
+      "ldi %[j], 0    \n"
+      "adc %[j], r1   \n"
+      "lsl r0         \n" // carry = high bit of low byte of mul product
+      "rol %[j]       \n" // j = (j * 2) + carry // preserve add'l bit of precision
+      "sbrc %[val], 7 \n"
+      "com %[j]       \n"
+      "clr __zero_reg__   \n"
+      : [j] "+&a" (j)
+      : [val] "a" (val)
+      : "r0", "r1"
+      );
+    return j;
+}
+
+#else
+#error "No implementation for ease8InOutQuad available."
+#endif
+
+/// ease16InOutQuad: 16-bit quadratic ease-in / ease-out function
+// C implementation at this point
+LIB8STATIC uint16_t ease16InOutQuad( uint16_t i)
+{
+    uint16_t j = i;
+    if( j & 0x8000 ) {
+        j = 65535 - j;
+    }
+    uint16_t jj  = scale16( j, j);
+    uint16_t jj2 = jj << 1;
+    if( i & 0x8000 ) {
+        jj2 = 65535 - jj2;
+    }
+    return jj2;
+}
+
+
+/// ease8InOutCubic: 8-bit cubic ease-in / ease-out function
+///                 Takes around 18 cycles on AVR
+LIB8STATIC fract8 ease8InOutCubic( fract8 i)
+{
+    uint8_t ii  = scale8_LEAVING_R1_DIRTY(  i, i);
+    uint8_t iii = scale8_LEAVING_R1_DIRTY( ii, i);
+
+    uint16_t r1 = (3 * (uint16_t)(ii)) - ( 2 * (uint16_t)(iii));
+
+    /* the code generated for the above *'s automatically
+       cleans up R1, so there's no need to explicitily call
+       cleanup_R1(); */
+
+    uint8_t result = r1;
+
+    // if we got "256", return 255:
+    if( r1 & 0x100 ) {
+        result = 255;
+    }
+    return result;
+}
+
+/// ease8InOutApprox: fast, rough 8-bit ease-in/ease-out function
+///                   shaped approximately like 'ease8InOutCubic',
+///                   it's never off by more than a couple of percent
+///                   from the actual cubic S-curve, and it executes
+///                   more than twice as fast.  Use when the cycles
+///                   are more important than visual smoothness.
+///                   Asm version takes around 7 cycles on AVR.
+
+#if EASE8_C == 1
+LIB8STATIC fract8 ease8InOutApprox( fract8 i)
+{
+    if( i < 64) {
+        // start with slope 0.5
+        i /= 2;
+    } else if( i > (255 - 64)) {
+        // end with slope 0.5
+        i = 255 - i;
+        i /= 2;
+        i = 255 - i;
+    } else {
+        // in the middle, use slope 192/128 = 1.5
+        i -= 64;
+        i += (i / 2);
+        i += 32;
+    }
+
+    return i;
+}
+
+#elif EASE8_AVRASM == 1
+LIB8STATIC uint8_t ease8InOutApprox( fract8 i)
+{
+    // takes around 7 cycles on AVR
+    asm volatile (
+        "  subi %[i], 64         \n\t"
+        "  cpi  %[i], 128        \n\t"
+        "  brcc Lshift_%=        \n\t"
+
+        // middle case
+        "  mov __tmp_reg__, %[i] \n\t"
+        "  lsr __tmp_reg__       \n\t"
+        "  add %[i], __tmp_reg__ \n\t"
+        "  subi %[i], 224        \n\t"
+        "  rjmp Ldone_%=         \n\t"
+
+        // start or end case
+        "Lshift_%=:              \n\t"
+        "  lsr %[i]              \n\t"
+        "  subi %[i], 96         \n\t"
+
+        "Ldone_%=:               \n\t"
+
+        : [i] "+&a" (i)
+        :
+        : "r0", "r1"
+        );
+    return i;
+}
+#else
+#error "No implementation for ease8 available."
+#endif
+
+
+
+/// triwave8: triangle (sawtooth) wave generator.  Useful for
+///           turning a one-byte ever-increasing value into a
+///           one-byte value that oscillates up and down.
+///
+///           input         output
+///           0..127        0..254 (positive slope)
+///           128..255      254..0 (negative slope)
+///
+/// On AVR this function takes just three cycles.
+///
+LIB8STATIC uint8_t triwave8(uint8_t in)
+{
+    if( in & 0x80) {
+        in = 255 - in;
+    }
+    uint8_t out = in << 1;
+    return out;
+}
+
+
+// quadwave8 and cubicwave8: S-shaped wave generators (like 'sine').
+//           Useful for turning a one-byte 'counter' value into a
+//           one-byte oscillating value that moves smoothly up and down,
+//           with an 'acceleration' and 'deceleration' curve.
+//
+//           These are even faster than 'sin8', and have
+//           slightly different curve shapes.
+//
+
+/// quadwave8: quadratic waveform generator.  Spends just a little more
+///            time at the limits than 'sine' does.
+LIB8STATIC uint8_t quadwave8(uint8_t in)
+{
+    return ease8InOutQuad( triwave8( in));
+}
+
+/// cubicwave8: cubic waveform generator.  Spends visibly more time
+///             at the limits than 'sine' does.
+LIB8STATIC uint8_t cubicwave8(uint8_t in)
+{
+    return ease8InOutCubic( triwave8( in));
+}
+
+/// squarewave8: square wave generator.  Useful for
+///           turning a one-byte ever-increasing value
+///           into a one-byte value that is either 0 or 255.
+///           The width of the output 'pulse' is
+///           determined by the pulsewidth argument:
+///
+///~~~
+///           If pulsewidth is 255, output is always 255.
+///           If pulsewidth < 255, then
+///             if input < pulsewidth  then output is 255
+///             if input >= pulsewidth then output is 0
+///~~~
+///
+/// the output looking like:
+///
+///~~~
+///     255   +--pulsewidth--+
+///      .    |              |
+///      0    0              +--------(256-pulsewidth)--------
+///~~~
+///
+/// @param in
+/// @param pulsewidth
+/// @returns square wave output
+LIB8STATIC uint8_t squarewave8( uint8_t in, uint8_t pulsewidth)
+{
+    if( in < pulsewidth || (pulsewidth == 255)) {
+        return 255;
+    } else {
+        return 0;
+    }
+}
+
+
+// Beat generators - These functions produce waves at a given
+//                   number of 'beats per minute'.  Internally, they use
+//                   the Arduino function 'millis' to track elapsed time.
+//                   Accuracy is a bit better than one part in a thousand.
+//
+//       beat8( BPM ) returns an 8-bit value that cycles 'BPM' times
+//                    per minute, rising from 0 to 255, resetting to zero,
+//                    rising up again, etc..  The output of this function
+//                    is suitable for feeding directly into sin8, and cos8,
+//                    triwave8, quadwave8, and cubicwave8.
+//       beat16( BPM ) returns a 16-bit value that cycles 'BPM' times
+//                    per minute, rising from 0 to 65535, resetting to zero,
+//                    rising up again, etc.  The output of this function is
+//                    suitable for feeding directly into sin16 and cos16.
+//       beat88( BPM88) is the same as beat16, except that the BPM88 argument
+//                    MUST be in Q8.8 fixed point format, e.g. 120BPM must
+//                    be specified as 120*256 = 30720.
+//       beatsin8( BPM, uint8_t low, uint8_t high) returns an 8-bit value that
+//                    rises and falls in a sine wave, 'BPM' times per minute,
+//                    between the values of 'low' and 'high'.
+//       beatsin16( BPM, uint16_t low, uint16_t high) returns a 16-bit value
+//                    that rises and falls in a sine wave, 'BPM' times per
+//                    minute, between the values of 'low' and 'high'.
+//       beatsin88( BPM88, ...) is the same as beatsin16, except that the
+//                    BPM88 argument MUST be in Q8.8 fixed point format,
+//                    e.g. 120BPM must be specified as 120*256 = 30720.
+//
+//  BPM can be supplied two ways.  The simpler way of specifying BPM is as
+//  a simple 8-bit integer from 1-255, (e.g., "120").
+//  The more sophisticated way of specifying BPM allows for fractional
+//  "Q8.8" f
author	XScorpion2 <rcalt2vt@gmail.com>	2019-04-02 19:24:14 -0500
committer	Drashna Jaelre <drashna@live.com>	2019-04-02 17:24:14 -0700
commit	c98247e3dd2958bd2d8969dc75170e7e2757b895 (patch)
tree	a566de223a9501809e1059c522b52adf7d37fe74 /lib
parent	68d8bb2b3fb8a35fda164539d27754b3f74e0819 (diff)