You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 lines
18 KiB

  1. #ifndef __INC_LIB8TION_SCALE_H
  2. #define __INC_LIB8TION_SCALE_H
  3. ///@ingroup lib8tion
  4. ///@defgroup Scaling Scaling functions
  5. /// Fast, efficient 8-bit scaling functions specifically
  6. /// designed for high-performance LED programming.
  7. ///
  8. /// Because of the AVR(Arduino) and ARM assembly language
  9. /// implementations provided, using these functions often
  10. /// results in smaller and faster code than the equivalent
  11. /// program using plain "C" arithmetic and logic.
  12. ///@{
  13. /// scale one byte by a second one, which is treated as
  14. /// the numerator of a fraction whose denominator is 256
  15. /// In other words, it computes i * (scale / 256)
  16. /// 4 clocks AVR with MUL, 2 clocks ARM
  17. LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
  18. {
  19. #if SCALE8_C == 1
  20. #if (FASTLED_SCALE8_FIXED == 1)
  21. return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
  22. #else
  23. return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
  24. #endif
  25. #elif SCALE8_AVRASM == 1
  26. #if defined(LIB8_ATTINY)
  27. #if (FASTLED_SCALE8_FIXED == 1)
  28. uint8_t work=i;
  29. #else
  30. uint8_t work=0;
  31. #endif
  32. uint8_t cnt=0x80;
  33. asm volatile(
  34. #if (FASTLED_SCALE8_FIXED == 1)
  35. " inc %[scale] \n\t"
  36. " breq DONE_%= \n\t"
  37. " clr %[work] \n\t"
  38. #endif
  39. "LOOP_%=: \n\t"
  40. /*" sbrc %[scale], 0 \n\t"
  41. " add %[work], %[i] \n\t"
  42. " ror %[work] \n\t"
  43. " lsr %[scale] \n\t"
  44. " clc \n\t"*/
  45. " sbrc %[scale], 0 \n\t"
  46. " add %[work], %[i] \n\t"
  47. " ror %[work] \n\t"
  48. " lsr %[scale] \n\t"
  49. " lsr %[cnt] \n\t"
  50. "brcc LOOP_%= \n\t"
  51. "DONE_%=: \n\t"
  52. : [work] "+r" (work), [cnt] "+r" (cnt)
  53. : [scale] "r" (scale), [i] "r" (i)
  54. :
  55. );
  56. return work;
  57. #else
  58. asm volatile(
  59. #if (FASTLED_SCALE8_FIXED==1)
  60. // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  61. "mul %0, %1 \n\t"
  62. // Add i to r0, possibly setting the carry flag
  63. "add r0, %0 \n\t"
  64. // load the immediate 0 into i (note, this does _not_ touch any flags)
  65. "ldi %0, 0x00 \n\t"
  66. // walk and chew gum at the same time
  67. "adc %0, r1 \n\t"
  68. #else
  69. /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  70. "mul %0, %1 \n\t"
  71. /* Move the high 8-bits of the product (r1) back to i */
  72. "mov %0, r1 \n\t"
  73. /* Restore r1 to "0"; it's expected to always be that */
  74. #endif
  75. "clr __zero_reg__ \n\t"
  76. : "+a" (i) /* writes to i */
  77. : "a" (scale) /* uses scale */
  78. : "r0", "r1" /* clobbers r0, r1 */ );
  79. /* Return the result */
  80. return i;
  81. #endif
  82. #else
  83. #error "No implementation for scale8 available."
  84. #endif
  85. }
  86. /// The "video" version of scale8 guarantees that the output will
  87. /// be only be zero if one or both of the inputs are zero. If both
  88. /// inputs are non-zero, the output is guaranteed to be non-zero.
  89. /// This makes for better 'video'/LED dimming, at the cost of
  90. /// several additional cycles.
  91. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
  92. {
  93. #if SCALE8_C == 1 || defined(LIB8_ATTINY)
  94. uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
  95. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  96. // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
  97. return j;
  98. #elif SCALE8_AVRASM == 1
  99. uint8_t j=0;
  100. asm volatile(
  101. " tst %[i]\n\t"
  102. " breq L_%=\n\t"
  103. " mul %[i], %[scale]\n\t"
  104. " mov %[j], r1\n\t"
  105. " clr __zero_reg__\n\t"
  106. " cpse %[scale], r1\n\t"
  107. " subi %[j], 0xFF\n\t"
  108. "L_%=: \n\t"
  109. : [j] "+a" (j)
  110. : [i] "a" (i), [scale] "a" (scale)
  111. : "r0", "r1");
  112. return j;
  113. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  114. // asm volatile(
  115. // " tst %0 \n"
  116. // " breq L_%= \n"
  117. // " mul %0, %1 \n"
  118. // " mov %0, r1 \n"
  119. // " add %0, %2 \n"
  120. // " clr __zero_reg__ \n"
  121. // "L_%=: \n"
  122. // : "+a" (i)
  123. // : "a" (scale), "a" (nonzeroscale)
  124. // : "r0", "r1");
  125. // // Return the result
  126. // return i;
  127. #else
  128. #error "No implementation for scale8_video available."
  129. #endif
  130. }
  131. /// This version of scale8 does not clean up the R1 register on AVR
  132. /// If you are doing several 'scale8's in a row, use this, and
  133. /// then explicitly call cleanup_R1.
  134. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
  135. {
  136. #if SCALE8_C == 1
  137. #if (FASTLED_SCALE8_FIXED == 1)
  138. return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
  139. #else
  140. return ((int)i * (int)(scale) ) >> 8;
  141. #endif
  142. #elif SCALE8_AVRASM == 1
  143. asm volatile(
  144. #if (FASTLED_SCALE8_FIXED==1)
  145. // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  146. "mul %0, %1 \n\t"
  147. // Add i to r0, possibly setting the carry flag
  148. "add r0, %0 \n\t"
  149. // load the immediate 0 into i (note, this does _not_ touch any flags)
  150. "ldi %0, 0x00 \n\t"
  151. // walk and chew gum at the same time
  152. "adc %0, r1 \n\t"
  153. #else
  154. /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  155. "mul %0, %1 \n\t"
  156. /* Move the high 8-bits of the product (r1) back to i */
  157. "mov %0, r1 \n\t"
  158. #endif
  159. /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
  160. /* "clr __zero_reg__ \n\t" */
  161. : "+a" (i) /* writes to i */
  162. : "a" (scale) /* uses scale */
  163. : "r0", "r1" /* clobbers r0, r1 */ );
  164. // Return the result
  165. return i;
  166. #else
  167. #error "No implementation for scale8_LEAVING_R1_DIRTY available."
  168. #endif
  169. }
  170. /// This version of scale8_video does not clean up the R1 register on AVR
  171. /// If you are doing several 'scale8_video's in a row, use this, and
  172. /// then explicitly call cleanup_R1.
  173. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
  174. {
  175. #if SCALE8_C == 1 || defined(LIB8_ATTINY)
  176. uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
  177. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  178. // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
  179. return j;
  180. #elif SCALE8_AVRASM == 1
  181. uint8_t j=0;
  182. asm volatile(
  183. " tst %[i]\n\t"
  184. " breq L_%=\n\t"
  185. " mul %[i], %[scale]\n\t"
  186. " mov %[j], r1\n\t"
  187. " breq L_%=\n\t"
  188. " subi %[j], 0xFF\n\t"
  189. "L_%=: \n\t"
  190. : [j] "+a" (j)
  191. : [i] "a" (i), [scale] "a" (scale)
  192. : "r0", "r1");
  193. return j;
  194. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  195. // asm volatile(
  196. // " tst %0 \n"
  197. // " breq L_%= \n"
  198. // " mul %0, %1 \n"
  199. // " mov %0, r1 \n"
  200. // " add %0, %2 \n"
  201. // " clr __zero_reg__ \n"
  202. // "L_%=: \n"
  203. // : "+a" (i)
  204. // : "a" (scale), "a" (nonzeroscale)
  205. // : "r0", "r1");
  206. // // Return the result
  207. // return i;
  208. #else
  209. #error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
  210. #endif
  211. }
  212. /// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
  213. LIB8STATIC_ALWAYS_INLINE void cleanup_R1(void)
  214. {
  215. #if CLEANUP_R1_AVRASM == 1
  216. // Restore r1 to "0"; it's expected to always be that
  217. asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
  218. #endif
  219. }
  220. /// scale a 16-bit unsigned value by an 8-bit value,
  221. /// considered as numerator of a fraction whose denominator
  222. /// is 256. In other words, it computes i * (scale / 256)
  223. LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
  224. {
  225. #if SCALE16BY8_C == 1
  226. uint16_t result;
  227. #if FASTLED_SCALE8_FIXED == 1
  228. result = (i * (1+((uint16_t)scale))) >> 8;
  229. #else
  230. result = (i * scale) / 256;
  231. #endif
  232. return result;
  233. #elif SCALE16BY8_AVRASM == 1
  234. #if FASTLED_SCALE8_FIXED == 1
  235. uint16_t result = 0;
  236. asm volatile(
  237. // result.A = HighByte( (i.A x scale) + i.A )
  238. " mul %A[i], %[scale] \n\t"
  239. " add r0, %A[i] \n\t"
  240. // " adc r1, [zero] \n\t"
  241. // " mov %A[result], r1 \n\t"
  242. " adc %A[result], r1 \n\t"
  243. // result.A-B += i.B x scale
  244. " mul %B[i], %[scale] \n\t"
  245. " add %A[result], r0 \n\t"
  246. " adc %B[result], r1 \n\t"
  247. // cleanup r1
  248. " clr __zero_reg__ \n\t"
  249. // result.A-B += i.B
  250. " add %A[result], %B[i] \n\t"
  251. " adc %B[result], __zero_reg__ \n\t"
  252. : [result] "+r" (result)
  253. : [i] "r" (i), [scale] "r" (scale)
  254. : "r0", "r1"
  255. );
  256. return result;
  257. #else
  258. uint16_t result = 0;
  259. asm volatile(
  260. // result.A = HighByte(i.A x j )
  261. " mul %A[i], %[scale] \n\t"
  262. " mov %A[result], r1 \n\t"
  263. //" clr %B[result] \n\t"
  264. // result.A-B += i.B x j
  265. " mul %B[i], %[scale] \n\t"
  266. " add %A[result], r0 \n\t"
  267. " adc %B[result], r1 \n\t"
  268. // cleanup r1
  269. " clr __zero_reg__ \n\t"
  270. : [result] "+r" (result)
  271. : [i] "r" (i), [scale] "r" (scale)
  272. : "r0", "r1"
  273. );
  274. return result;
  275. #endif
  276. #else
  277. #error "No implementation for scale16by8 available."
  278. #endif
  279. }
  280. /// scale a 16-bit unsigned value by a 16-bit value,
  281. /// considered as numerator of a fraction whose denominator
  282. /// is 65536. In other words, it computes i * (scale / 65536)
  283. LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
  284. {
  285. #if SCALE16_C == 1
  286. uint16_t result;
  287. #if FASTLED_SCALE8_FIXED == 1
  288. result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
  289. #else
  290. result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
  291. #endif
  292. return result;
  293. #elif SCALE16_AVRASM == 1
  294. #if FASTLED_SCALE8_FIXED == 1
  295. // implemented sort of like
  296. // result = ((i * scale) + i ) / 65536
  297. //
  298. // why not like this, you may ask?
  299. // result = (i * (scale+1)) / 65536
  300. // the answer is that if scale is 65535, then scale+1
  301. // will be zero, which is not what we want.
  302. uint32_t result;
  303. asm volatile(
  304. // result.A-B = i.A x scale.A
  305. " mul %A[i], %A[scale] \n\t"
  306. // save results...
  307. // basic idea:
  308. //" mov %A[result], r0 \n\t"
  309. //" mov %B[result], r1 \n\t"
  310. // which can be written as...
  311. " movw %A[result], r0 \n\t"
  312. // Because we're going to add i.A-B to
  313. // result.A-D, we DO need to keep both
  314. // the r0 and r1 portions of the product
  315. // UNlike in the 'unfixed scale8' version.
  316. // So the movw here is needed.
  317. : [result] "=r" (result)
  318. : [i] "r" (i),
  319. [scale] "r" (scale)
  320. : "r0", "r1"
  321. );
  322. asm volatile(
  323. // result.C-D = i.B x scale.B
  324. " mul %B[i], %B[scale] \n\t"
  325. //" mov %C[result], r0 \n\t"
  326. //" mov %D[result], r1 \n\t"
  327. " movw %C[result], r0 \n\t"
  328. : [result] "+r" (result)
  329. : [i] "r" (i),
  330. [scale] "r" (scale)
  331. : "r0", "r1"
  332. );
  333. const uint8_t zero = 0;
  334. asm volatile(
  335. // result.B-D += i.B x scale.A
  336. " mul %B[i], %A[scale] \n\t"
  337. " add %B[result], r0 \n\t"
  338. " adc %C[result], r1 \n\t"
  339. " adc %D[result], %[zero] \n\t"
  340. // result.B-D += i.A x scale.B
  341. " mul %A[i], %B[scale] \n\t"
  342. " add %B[result], r0 \n\t"
  343. " adc %C[result], r1 \n\t"
  344. " adc %D[result], %[zero] \n\t"
  345. // cleanup r1
  346. " clr r1 \n\t"
  347. : [result] "+r" (result)
  348. : [i] "r" (i),
  349. [scale] "r" (scale),
  350. [zero] "r" (zero)
  351. : "r0", "r1"
  352. );
  353. asm volatile(
  354. // result.A-D += i.A-B
  355. " add %A[result], %A[i] \n\t"
  356. " adc %B[result], %B[i] \n\t"
  357. " adc %C[result], %[zero] \n\t"
  358. " adc %D[result], %[zero] \n\t"
  359. : [result] "+r" (result)
  360. : [i] "r" (i),
  361. [zero] "r" (zero)
  362. );
  363. result = result >> 16;
  364. return result;
  365. #else
  366. uint32_t result;
  367. asm volatile(
  368. // result.A-B = i.A x scale.A
  369. " mul %A[i], %A[scale] \n\t"
  370. // save results...
  371. // basic idea:
  372. //" mov %A[result], r0 \n\t"
  373. //" mov %B[result], r1 \n\t"
  374. // which can be written as...
  375. " movw %A[result], r0 \n\t"
  376. // We actually don't need to do anything with r0,
  377. // as result.A is never used again here, so we
  378. // could just move the high byte, but movw is
  379. // one clock cycle, just like mov, so might as
  380. // well, in case we want to use this code for
  381. // a generic 16x16 multiply somewhere.
  382. : [result] "=r" (result)
  383. : [i] "r" (i),
  384. [scale] "r" (scale)
  385. : "r0", "r1"
  386. );
  387. asm volatile(
  388. // result.C-D = i.B x scale.B
  389. " mul %B[i], %B[scale] \n\t"
  390. //" mov %C[result], r0 \n\t"
  391. //" mov %D[result], r1 \n\t"
  392. " movw %C[result], r0 \n\t"
  393. : [result] "+r" (result)
  394. : [i] "r" (i),
  395. [scale] "r" (scale)
  396. : "r0", "r1"
  397. );
  398. const uint8_t zero = 0;
  399. asm volatile(
  400. // result.B-D += i.B x scale.A
  401. " mul %B[i], %A[scale] \n\t"
  402. " add %B[result], r0 \n\t"
  403. " adc %C[result], r1 \n\t"
  404. " adc %D[result], %[zero] \n\t"
  405. // result.B-D += i.A x scale.B
  406. " mul %A[i], %B[scale] \n\t"
  407. " add %B[result], r0 \n\t"
  408. " adc %C[result], r1 \n\t"
  409. " adc %D[result], %[zero] \n\t"
  410. // cleanup r1
  411. " clr r1 \n\t"
  412. : [result] "+r" (result)
  413. : [i] "r" (i),
  414. [scale] "r" (scale),
  415. [zero] "r" (zero)
  416. : "r0", "r1"
  417. );
  418. result = result >> 16;
  419. return result;
  420. #endif
  421. #else
  422. #error "No implementation for scale16 available."
  423. #endif
  424. }
  425. ///@}
  426. ///@defgroup Dimming Dimming and brightening functions
  427. ///
  428. /// Dimming and brightening functions
  429. ///
  430. /// The eye does not respond in a linear way to light.
  431. /// High speed PWM'd LEDs at 50% duty cycle appear far
  432. /// brighter then the 'half as bright' you might expect.
  433. ///
  434. /// If you want your midpoint brightness leve (128) to
  435. /// appear half as bright as 'full' brightness (255), you
  436. /// have to apply a 'dimming function'.
  437. ///@{
  438. /// Adjust a scaling value for dimming
  439. LIB8STATIC uint8_t dim8_raw( uint8_t x)
  440. {
  441. return scale8( x, x);
  442. }
  443. /// Adjust a scaling value for dimming for video (value will never go below 1)
  444. LIB8STATIC uint8_t dim8_video( uint8_t x)
  445. {
  446. return scale8_video( x, x);
  447. }
  448. /// Linear version of the dimming function that halves for values < 128
  449. LIB8STATIC uint8_t dim8_lin( uint8_t x )
  450. {
  451. if( x & 0x80 ) {
  452. x = scale8( x, x);
  453. } else {
  454. x += 1;
  455. x /= 2;
  456. }
  457. return x;
  458. }
  459. /// inverse of the dimming function, brighten a value
  460. LIB8STATIC uint8_t brighten8_raw( uint8_t x)
  461. {
  462. uint8_t ix = 255 - x;
  463. return 255 - scale8( ix, ix);
  464. }
  465. /// inverse of the dimming function, brighten a value
  466. LIB8STATIC uint8_t brighten8_video( uint8_t x)
  467. {
  468. uint8_t ix = 255 - x;
  469. return 255 - scale8_video( ix, ix);
  470. }
  471. /// inverse of the dimming function, brighten a value
  472. LIB8STATIC uint8_t brighten8_lin( uint8_t x )
  473. {
  474. uint8_t ix = 255 - x;
  475. if( ix & 0x80 ) {
  476. ix = scale8( ix, ix);
  477. } else {
  478. ix += 1;
  479. ix /= 2;
  480. }
  481. return 255 - ix;
  482. }
  483. ///@}
  484. #endif