You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

552 lines
15 KiB

  1. #ifndef __INC_LIB8TION_MATH_H
  2. #define __INC_LIB8TION_MATH_H
  3. #include "scale8.h"
  4. ///@ingroup lib8tion
  5. ///@defgroup Math Basic math operations
  6. /// Fast, efficient 8-bit math functions specifically
  7. /// designed for high-performance LED programming.
  8. ///
  9. /// Because of the AVR(Arduino) and ARM assembly language
  10. /// implementations provided, using these functions often
  11. /// results in smaller and faster code than the equivalent
  12. /// program using plain "C" arithmetic and logic.
  13. ///@{
  14. /// add one byte to another, saturating at 0xFF
  15. /// @param i - first byte to add
  16. /// @param j - second byte to add
  17. /// @returns the sum of i & j, capped at 0xFF
  18. LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
  19. {
  20. #if QADD8_C == 1
  21. uint16_t t = i + j;
  22. if (t > 255) t = 255;
  23. return t;
  24. #elif QADD8_AVRASM == 1
  25. asm volatile(
  26. /* First, add j to i, conditioning the C flag */
  27. "add %0, %1 \n\t"
  28. /* Now test the C flag.
  29. If C is clear, we branch around a load of 0xFF into i.
  30. If C is set, we go ahead and load 0xFF into i.
  31. */
  32. "brcc L_%= \n\t"
  33. "ldi %0, 0xFF \n\t"
  34. "L_%=: "
  35. : "+a" (i)
  36. : "a" (j) );
  37. return i;
  38. #elif QADD8_ARM_DSP_ASM == 1
  39. asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
  40. return i;
  41. #else
  42. #error "No implementation for qadd8 available."
  43. #endif
  44. }
  45. /// Add one byte to another, saturating at 0x7F
  46. /// @param i - first byte to add
  47. /// @param j - second byte to add
  48. /// @returns the sum of i & j, capped at 0xFF
  49. LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
  50. {
  51. #if QADD7_C == 1
  52. int16_t t = i + j;
  53. if (t > 127) t = 127;
  54. return t;
  55. #elif QADD7_AVRASM == 1
  56. asm volatile(
  57. /* First, add j to i, conditioning the V flag */
  58. "add %0, %1 \n\t"
  59. /* Now test the V flag.
  60. If V is clear, we branch around a load of 0x7F into i.
  61. If V is set, we go ahead and load 0x7F into i.
  62. */
  63. "brvc L_%= \n\t"
  64. "ldi %0, 0x7F \n\t"
  65. "L_%=: "
  66. : "+a" (i)
  67. : "a" (j) );
  68. return i;
  69. #elif QADD7_ARM_DSP_ASM == 1
  70. asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
  71. return i;
  72. #else
  73. #error "No implementation for qadd7 available."
  74. #endif
  75. }
  76. /// subtract one byte from another, saturating at 0x00
  77. /// @returns i - j with a floor of 0
  78. LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
  79. {
  80. #if QSUB8_C == 1
  81. int16_t t = i - j;
  82. if (t < 0) t = 0;
  83. return t;
  84. #elif QSUB8_AVRASM == 1
  85. asm volatile(
  86. /* First, subtract j from i, conditioning the C flag */
  87. "sub %0, %1 \n\t"
  88. /* Now test the C flag.
  89. If C is clear, we branch around a load of 0x00 into i.
  90. If C is set, we go ahead and load 0x00 into i.
  91. */
  92. "brcc L_%= \n\t"
  93. "ldi %0, 0x00 \n\t"
  94. "L_%=: "
  95. : "+a" (i)
  96. : "a" (j) );
  97. return i;
  98. #else
  99. #error "No implementation for qsub8 available."
  100. #endif
  101. }
  102. /// add one byte to another, with one byte result
  103. LIB8STATIC_ALWAYS_INLINE uint8_t add8( uint8_t i, uint8_t j)
  104. {
  105. #if ADD8_C == 1
  106. uint16_t t = i + j;
  107. return t;
  108. #elif ADD8_AVRASM == 1
  109. // Add j to i, period.
  110. asm volatile( "add %0, %1" : "+a" (i) : "a" (j));
  111. return i;
  112. #else
  113. #error "No implementation for add8 available."
  114. #endif
  115. }
  116. /// add one byte to another, with one byte result
  117. LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
  118. {
  119. #if ADD8_C == 1
  120. uint16_t t = i + j;
  121. return t;
  122. #elif ADD8_AVRASM == 1
  123. // Add i(one byte) to j(two bytes)
  124. asm volatile( "add %A[j], %[i] \n\t"
  125. "adc %B[j], __zero_reg__ \n\t"
  126. : [j] "+a" (j)
  127. : [i] "a" (i)
  128. );
  129. return i;
  130. #else
  131. #error "No implementation for add8to16 available."
  132. #endif
  133. }
  134. /// subtract one byte from another, 8-bit result
  135. LIB8STATIC_ALWAYS_INLINE uint8_t sub8( uint8_t i, uint8_t j)
  136. {
  137. #if SUB8_C == 1
  138. int16_t t = i - j;
  139. return t;
  140. #elif SUB8_AVRASM == 1
  141. // Subtract j from i, period.
  142. asm volatile( "sub %0, %1" : "+a" (i) : "a" (j));
  143. return i;
  144. #else
  145. #error "No implementation for sub8 available."
  146. #endif
  147. }
  148. /// Calculate an integer average of two unsigned
  149. /// 8-bit integer values (uint8_t).
  150. /// Fractional results are rounded down, e.g. avg8(20,41) = 30
  151. LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
  152. {
  153. #if AVG8_C == 1
  154. return (i + j) >> 1;
  155. #elif AVG8_AVRASM == 1
  156. asm volatile(
  157. /* First, add j to i, 9th bit overflows into C flag */
  158. "add %0, %1 \n\t"
  159. /* Divide by two, moving C flag into high 8th bit */
  160. "ror %0 \n\t"
  161. : "+a" (i)
  162. : "a" (j) );
  163. return i;
  164. #else
  165. #error "No implementation for avg8 available."
  166. #endif
  167. }
  168. /// Calculate an integer average of two unsigned
  169. /// 16-bit integer values (uint16_t).
  170. /// Fractional results are rounded down, e.g. avg16(20,41) = 30
  171. LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
  172. {
  173. #if AVG16_C == 1
  174. return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
  175. #elif AVG16_AVRASM == 1
  176. asm volatile(
  177. /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
  178. "add %A[i], %A[j] \n\t"
  179. /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
  180. "adc %B[i], %B[j] \n\t"
  181. /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
  182. "ror %B[i] \n\t"
  183. /* Divide iLo by two, moving C flag into high 8th bit */
  184. "ror %A[i] \n\t"
  185. : [i] "+a" (i)
  186. : [j] "a" (j) );
  187. return i;
  188. #else
  189. #error "No implementation for avg16 available."
  190. #endif
  191. }
  192. /// Calculate an integer average of two signed 7-bit
  193. /// integers (int8_t)
  194. /// If the first argument is even, result is rounded down.
  195. /// If the first argument is odd, result is result up.
  196. LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
  197. {
  198. #if AVG7_C == 1
  199. return ((i + j) >> 1) + (i & 0x1);
  200. #elif AVG7_AVRASM == 1
  201. asm volatile(
  202. "asr %1 \n\t"
  203. "asr %0 \n\t"
  204. "adc %0, %1 \n\t"
  205. : "+a" (i)
  206. : "a" (j) );
  207. return i;
  208. #else
  209. #error "No implementation for avg7 available."
  210. #endif
  211. }
  212. /// Calculate an integer average of two signed 15-bit
  213. /// integers (int16_t)
  214. /// If the first argument is even, result is rounded down.
  215. /// If the first argument is odd, result is result up.
  216. LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
  217. {
  218. #if AVG15_C == 1
  219. return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
  220. #elif AVG15_AVRASM == 1
  221. asm volatile(
  222. /* first divide j by 2, throwing away lowest bit */
  223. "asr %B[j] \n\t"
  224. "ror %A[j] \n\t"
  225. /* now divide i by 2, with lowest bit going into C */
  226. "asr %B[i] \n\t"
  227. "ror %A[i] \n\t"
  228. /* add j + C to i */
  229. "adc %A[i], %A[j] \n\t"
  230. "adc %B[i], %B[j] \n\t"
  231. : [i] "+a" (i)
  232. : [j] "a" (j) );
  233. return i;
  234. #else
  235. #error "No implementation for avg15 available."
  236. #endif
  237. }
  238. /// Calculate the remainder of one unsigned 8-bit
  239. /// value divided by anoter, aka A % M.
  240. /// Implemented by repeated subtraction, which is
  241. /// very compact, and very fast if A is 'probably'
  242. /// less than M. If A is a large multiple of M,
  243. /// the loop has to execute multiple times. However,
  244. /// even in that case, the loop is only two
  245. /// instructions long on AVR, i.e., quick.
  246. LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
  247. {
  248. #if defined(__AVR__)
  249. asm volatile (
  250. "L_%=: sub %[a],%[m] \n\t"
  251. " brcc L_%= \n\t"
  252. " add %[a],%[m] \n\t"
  253. : [a] "+r" (a)
  254. : [m] "r" (m)
  255. );
  256. #else
  257. while( a >= m) a -= m;
  258. #endif
  259. return a;
  260. }
  261. /// Add two numbers, and calculate the modulo
  262. /// of the sum and a third number, M.
  263. /// In other words, it returns (A+B) % M.
  264. /// It is designed as a compact mechanism for
  265. /// incrementing a 'mode' switch and wrapping
  266. /// around back to 'mode 0' when the switch
  267. /// goes past the end of the available range.
  268. /// e.g. if you have seven modes, this switches
  269. /// to the next one and wraps around if needed:
  270. /// mode = addmod8( mode, 1, 7);
  271. ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
  272. LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
  273. {
  274. #if defined(__AVR__)
  275. asm volatile (
  276. " add %[a],%[b] \n\t"
  277. "L_%=: sub %[a],%[m] \n\t"
  278. " brcc L_%= \n\t"
  279. " add %[a],%[m] \n\t"
  280. : [a] "+r" (a)
  281. : [b] "r" (b), [m] "r" (m)
  282. );
  283. #else
  284. a += b;
  285. while( a >= m) a -= m;
  286. #endif
  287. return a;
  288. }
  289. /// Subtract two numbers, and calculate the modulo
  290. /// of the difference and a third number, M.
  291. /// In other words, it returns (A-B) % M.
  292. /// It is designed as a compact mechanism for
  293. /// incrementing a 'mode' switch and wrapping
  294. /// around back to 'mode 0' when the switch
  295. /// goes past the end of the available range.
  296. /// e.g. if you have seven modes, this switches
  297. /// to the next one and wraps around if needed:
  298. /// mode = addmod8( mode, 1, 7);
  299. ///LIB8STATIC_ALWAYS_INLINESee 'mod8' for notes on performance.
  300. LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
  301. {
  302. #if defined(__AVR__)
  303. asm volatile (
  304. " sub %[a],%[b] \n\t"
  305. "L_%=: sub %[a],%[m] \n\t"
  306. " brcc L_%= \n\t"
  307. " add %[a],%[m] \n\t"
  308. : [a] "+r" (a)
  309. : [b] "r" (b), [m] "r" (m)
  310. );
  311. #else
  312. a -= b;
  313. while( a >= m) a -= m;
  314. #endif
  315. return a;
  316. }
  317. /// 8x8 bit multiplication, with 8 bit result
  318. LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
  319. {
  320. #if MUL8_C == 1
  321. return ((uint16_t)i * (uint16_t)(j) ) & 0xFF;
  322. #elif MUL8_AVRASM == 1
  323. asm volatile(
  324. /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
  325. "mul %0, %1 \n\t"
  326. /* Extract the LOW 8-bits (r0) */
  327. "mov %0, r0 \n\t"
  328. /* Restore r1 to "0"; it's expected to always be that */
  329. "clr __zero_reg__ \n\t"
  330. : "+a" (i)
  331. : "a" (j)
  332. : "r0", "r1");
  333. return i;
  334. #else
  335. #error "No implementation for mul8 available."
  336. #endif
  337. }
  338. /// saturating 8x8 bit multiplication, with 8 bit result
  339. /// @returns the product of i * j, capping at 0xFF
  340. LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
  341. {
  342. #if QMUL8_C == 1
  343. int p = ((uint16_t)i * (uint16_t)(j) );
  344. if( p > 255) p = 255;
  345. return p;
  346. #elif QMUL8_AVRASM == 1
  347. asm volatile(
  348. /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
  349. " mul %0, %1 \n\t"
  350. /* If high byte of result is zero, all is well. */
  351. " tst r1 \n\t"
  352. " breq Lnospill_%= \n\t"
  353. /* If high byte of result > 0, saturate low byte to 0xFF */
  354. " ldi %0,0xFF \n\t"
  355. " rjmp Ldone_%= \n\t"
  356. "Lnospill_%=: \n\t"
  357. /* Extract the LOW 8-bits (r0) */
  358. " mov %0, r0 \n\t"
  359. "Ldone_%=: \n\t"
  360. /* Restore r1 to "0"; it's expected to always be that */
  361. " clr __zero_reg__ \n\t"
  362. : "+a" (i)
  363. : "a" (j)
  364. : "r0", "r1");
  365. return i;
  366. #else
  367. #error "No implementation for qmul8 available."
  368. #endif
  369. }
  370. /// take abs() of a signed 8-bit uint8_t
  371. LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
  372. {
  373. #if ABS8_C == 1
  374. if( i < 0) i = -i;
  375. return i;
  376. #elif ABS8_AVRASM == 1
  377. asm volatile(
  378. /* First, check the high bit, and prepare to skip if it's clear */
  379. "sbrc %0, 7 \n"
  380. /* Negate the value */
  381. "neg %0 \n"
  382. : "+r" (i) : "r" (i) );
  383. return i;
  384. #else
  385. #error "No implementation for abs8 available."
  386. #endif
  387. }
  388. /// square root for 16-bit integers
  389. /// About three times faster and five times smaller
  390. /// than Arduino's general sqrt on AVR.
  391. LIB8STATIC uint8_t sqrt16(uint16_t x)
  392. {
  393. if( x <= 1) {
  394. return x;
  395. }
  396. uint8_t low = 1; // lower bound
  397. uint8_t hi, mid;
  398. if( x > 7904) {
  399. hi = 255;
  400. } else {
  401. hi = (x >> 5) + 8; // initial estimate for upper bound
  402. }
  403. do {
  404. mid = (low + hi) >> 1;
  405. if ((uint16_t)(mid * mid) > x) {
  406. hi = mid - 1;
  407. } else {
  408. if( mid == 255) {
  409. return 255;
  410. }
  411. low = mid + 1;
  412. }
  413. } while (hi >= low);
  414. return low - 1;
  415. }
  416. /// blend a variable proproportion(0-255) of one byte to another
  417. /// @param a - the starting byte value
  418. /// @param b - the byte value to blend toward
  419. /// @param amountOfB - the proportion (0-255) of b to blend
  420. /// @returns a byte value between a and b, inclusive
  421. #if (FASTLED_BLEND_FIXED == 1)
  422. LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
  423. {
  424. #if BLEND8_C == 1
  425. uint16_t partial;
  426. uint8_t result;
  427. uint8_t amountOfA = 255 - amountOfB;
  428. partial = (a * amountOfA);
  429. #if (FASTLED_SCALE8_FIXED == 1)
  430. partial += a;
  431. //partial = add8to16( a, partial);
  432. #endif
  433. partial += (b * amountOfB);
  434. #if (FASTLED_SCALE8_FIXED == 1)
  435. partial += b;
  436. //partial = add8to16( b, partial);
  437. #endif
  438. result = partial >> 8;
  439. return result;
  440. #elif BLEND8_AVRASM == 1
  441. uint16_t partial;
  442. uint8_t result;
  443. asm volatile (
  444. /* partial = b * amountOfB */
  445. " mul %[b], %[amountOfB] \n\t"
  446. " movw %A[partial], r0 \n\t"
  447. /* amountOfB (aka amountOfA) = 255 - amountOfB */
  448. " com %[amountOfB] \n\t"
  449. /* partial += a * amountOfB (aka amountOfA) */
  450. " mul %[a], %[amountOfB] \n\t"
  451. " add %A[partial], r0 \n\t"
  452. " adc %B[partial], r1 \n\t"
  453. " clr __zero_reg__ \n\t"
  454. #if (FASTLED_SCALE8_FIXED == 1)
  455. /* partial += a */
  456. " add %A[partial], %[a] \n\t"
  457. " adc %B[partial], __zero_reg__ \n\t"
  458. // partial += b
  459. " add %A[partial], %[b] \n\t"
  460. " adc %B[partial], __zero_reg__ \n\t"
  461. #endif
  462. : [partial] "=r" (partial),
  463. [amountOfB] "+a" (amountOfB)
  464. : [a] "a" (a),
  465. [b] "a" (b)
  466. : "r0", "r1"
  467. );
  468. result = partial >> 8;
  469. return result;
  470. #else
  471. #error "No implementation for blend8 available."
  472. #endif
  473. }
  474. #else
  475. LIB8STATIC uint8_t blend8( uint8_t a, uint8_t b, uint8_t amountOfB)
  476. {
  477. // This version loses precision in the integer math
  478. // and can actually return results outside of the range
  479. // from a to b. Its use is not recommended.
  480. uint8_t result;
  481. uint8_t amountOfA = 255 - amountOfB;
  482. result = scale8_LEAVING_R1_DIRTY( a, amountOfA)
  483. + scale8_LEAVING_R1_DIRTY( b, amountOfB);
  484. cleanup_R1();
  485. return result;
  486. }
  487. #endif
  488. ///@}
  489. #endif