42 #ifndef __CORE_CM4_SIMD_H
43 #define __CORE_CM4_SIMD_H
57 #if defined ( __CC_ARM )
61 #define __SADD8 __sadd8
62 #define __QADD8 __qadd8
63 #define __SHADD8 __shadd8
64 #define __UADD8 __uadd8
65 #define __UQADD8 __uqadd8
66 #define __UHADD8 __uhadd8
67 #define __SSUB8 __ssub8
68 #define __QSUB8 __qsub8
69 #define __SHSUB8 __shsub8
70 #define __USUB8 __usub8
71 #define __UQSUB8 __uqsub8
72 #define __UHSUB8 __uhsub8
73 #define __SADD16 __sadd16
74 #define __QADD16 __qadd16
75 #define __SHADD16 __shadd16
76 #define __UADD16 __uadd16
77 #define __UQADD16 __uqadd16
78 #define __UHADD16 __uhadd16
79 #define __SSUB16 __ssub16
80 #define __QSUB16 __qsub16
81 #define __SHSUB16 __shsub16
82 #define __USUB16 __usub16
83 #define __UQSUB16 __uqsub16
84 #define __UHSUB16 __uhsub16
87 #define __SHASX __shasx
89 #define __UQASX __uqasx
90 #define __UHASX __uhasx
93 #define __SHSAX __shsax
95 #define __UQSAX __uqsax
96 #define __UHSAX __uhsax
97 #define __USAD8 __usad8
98 #define __USADA8 __usada8
99 #define __SSAT16 __ssat16
100 #define __USAT16 __usat16
101 #define __UXTB16 __uxtb16
102 #define __UXTAB16 __uxtab16
103 #define __SXTB16 __sxtb16
104 #define __SXTAB16 __sxtab16
105 #define __SMUAD __smuad
106 #define __SMUADX __smuadx
107 #define __SMLAD __smlad
108 #define __SMLADX __smladx
109 #define __SMLALD __smlald
110 #define __SMLALDX __smlaldx
111 #define __SMUSD __smusd
112 #define __SMUSDX __smusdx
113 #define __SMLSD __smlsd
114 #define __SMLSDX __smlsdx
115 #define __SMLSLD __smlsld
116 #define __SMLSLDX __smlsldx
118 #define __QADD __qadd
119 #define __QSUB __qsub
121 #define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
122 ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )
124 #define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
125 ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )
127 #define __SMMLA(ARG1,ARG2,ARG3) ( (int32_t)((((int64_t)(ARG1) * (ARG2)) + \
128 ((int64_t)(ARG3) << 32) ) >> 32))
134 #elif defined ( __ICCARM__ )
138 #include <cmsis_iar.h>
144 #elif defined ( __TMS470__ )
148 #include <cmsis_ccs.h>
154 #elif defined ( __GNUC__ )
158 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD8(uint32_t op1, uint32_t op2)
162 __ASM
volatile (
"sadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
166 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD8(uint32_t op1, uint32_t op2)
170 __ASM
volatile (
"qadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
174 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2)
178 __ASM
volatile (
"shadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
182 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD8(uint32_t op1, uint32_t op2)
186 __ASM
volatile (
"uadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
190 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2)
194 __ASM
volatile (
"uqadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
198 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2)
202 __ASM
volatile (
"uhadd8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
207 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2)
211 __ASM
volatile (
"ssub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
215 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2)
219 __ASM
volatile (
"qsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
223 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2)
227 __ASM
volatile (
"shsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
231 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB8(uint32_t op1, uint32_t op2)
235 __ASM
volatile (
"usub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
239 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2)
243 __ASM
volatile (
"uqsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
247 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2)
251 __ASM
volatile (
"uhsub8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
256 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD16(uint32_t op1, uint32_t op2)
260 __ASM
volatile (
"sadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
264 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD16(uint32_t op1, uint32_t op2)
268 __ASM
volatile (
"qadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
272 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2)
276 __ASM
volatile (
"shadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
280 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD16(uint32_t op1, uint32_t op2)
284 __ASM
volatile (
"uadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
288 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2)
292 __ASM
volatile (
"uqadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
296 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2)
300 __ASM
volatile (
"uhadd16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
304 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2)
308 __ASM
volatile (
"ssub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
312 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2)
316 __ASM
volatile (
"qsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
320 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2)
324 __ASM
volatile (
"shsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
328 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB16(uint32_t op1, uint32_t op2)
332 __ASM
volatile (
"usub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
336 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2)
340 __ASM
volatile (
"uqsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
344 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2)
348 __ASM
volatile (
"uhsub16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
352 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SASX(uint32_t op1, uint32_t op2)
356 __ASM
volatile (
"sasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
360 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QASX(uint32_t op1, uint32_t op2)
364 __ASM
volatile (
"qasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
368 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHASX(uint32_t op1, uint32_t op2)
372 __ASM
volatile (
"shasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
376 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UASX(uint32_t op1, uint32_t op2)
380 __ASM
volatile (
"uasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
384 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQASX(uint32_t op1, uint32_t op2)
388 __ASM
volatile (
"uqasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
392 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHASX(uint32_t op1, uint32_t op2)
396 __ASM
volatile (
"uhasx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
400 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSAX(uint32_t op1, uint32_t op2)
404 __ASM
volatile (
"ssax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
408 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSAX(uint32_t op1, uint32_t op2)
412 __ASM
volatile (
"qsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
416 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2)
420 __ASM
volatile (
"shsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
424 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAX(uint32_t op1, uint32_t op2)
428 __ASM
volatile (
"usax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
432 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2)
436 __ASM
volatile (
"uqsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
440 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2)
444 __ASM
volatile (
"uhsax %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
448 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAD8(uint32_t op1, uint32_t op2)
452 __ASM
volatile (
"usad8 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
456 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3)
460 __ASM
volatile (
"usada8 %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
464 #define __SSAT16(ARG1,ARG2) \
466 uint32_t __RES, __ARG1 = (ARG1); \
467 __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
471 #define __USAT16(ARG1,ARG2) \
473 uint32_t __RES, __ARG1 = (ARG1); \
474 __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
478 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTB16(uint32_t op1)
482 __ASM
volatile (
"uxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
486 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2)
490 __ASM
volatile (
"uxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
494 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTB16(uint32_t op1)
498 __ASM
volatile (
"sxtb16 %0, %1" :
"=r" (result) :
"r" (op1));
502 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2)
506 __ASM
volatile (
"sxtab16 %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
510 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2)
514 __ASM
volatile (
"smuad %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
518 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2)
522 __ASM
volatile (
"smuadx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
526 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3)
530 __ASM
volatile (
"smlad %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
534 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3)
538 __ASM
volatile (
"smladx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
542 #define __SMLALD(ARG1,ARG2,ARG3) \
544 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
545 __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
546 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
549 #define __SMLALDX(ARG1,ARG2,ARG3) \
551 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
552 __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
553 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
556 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2)
560 __ASM
volatile (
"smusd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
564 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2)
568 __ASM
volatile (
"smusdx %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
572 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3)
576 __ASM
volatile (
"smlsd %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
580 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3)
584 __ASM
volatile (
"smlsdx %0, %1, %2, %3" :
"=r" (result) :
"r" (op1),
"r" (op2),
"r" (op3) );
588 #define __SMLSLD(ARG1,ARG2,ARG3) \
590 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
591 __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
592 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
595 #define __SMLSLDX(ARG1,ARG2,ARG3) \
597 uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
598 __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
599 (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
602 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SEL (uint32_t op1, uint32_t op2)
606 __ASM
volatile (
"sel %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
610 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD(uint32_t op1, uint32_t op2)
614 __ASM
volatile (
"qadd %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
618 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB(uint32_t op1, uint32_t op2)
622 __ASM
volatile (
"qsub %0, %1, %2" :
"=r" (result) :
"r" (op1),
"r" (op2) );
626 #define __PKHBT(ARG1,ARG2,ARG3) \
628 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
629 __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
633 #define __PKHTB(ARG1,ARG2,ARG3) \
635 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
637 __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \
639 __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
643 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
647 __ASM
volatile (
"smmla %0, %1, %2, %3" :
"=r" (result):
"r" (op1),
"r" (op2),
"r" (op3) );
655 #elif defined ( __TASKING__ )