• R/O
  • HTTP
  • SSH
  • HTTPS

common_source_project-fm7: Commit

Common Source Code Project for Qt (a.k.a for FM-7).


Commit MetaInfo

Révisionbd69099c7c3486b13e11aecc425870b4b1987dc3 (tree)
l'heure2017-05-27 20:10:56
AuteurK.Ohta <whatisthis.sowhat@gmai...>
CommiterK.Ohta

Message de Log

[COMMON] my_memcpy: With some HOST CPU i.e. x86, GCC use unaligend SIMD instructions to read/write memory.

Change Summary

Modification

--- a/source/src/common.cpp
+++ b/source/src/common.cpp
@@ -195,8 +195,10 @@ int DLL_PREFIX my_vstprintf_s(_TCHAR *buffer, size_t numberOfElements, const _TC
195195
196196 void DLL_PREFIX *my_memcpy(void *dst, void *src, size_t len)
197197 {
198- register size_t len1;
198+ size_t len1;
199199 register size_t len2;
200+ register uint32_t s_align = (uint32_t)(((size_t)src) & 0x1f);
201+ register uint32_t d_align = (uint32_t)(((size_t)dst) & 0x1f);
200202 int i;
201203
202204 if(len == 0) return dst;
@@ -204,9 +206,9 @@ void DLL_PREFIX *my_memcpy(void *dst, void *src, size_t len)
204206 return memcpy(dst, src, len);
205207 }
206208 len1 = len;
207- size_t s_align = ((size_t)src) & 0x1f;
208- size_t d_align = ((size_t)dst) & 0x1f;
209-#if 1
209+
210+#if defined(WITHOUT_UNALIGNED_SIMD)
211+// Using SIMD without un-aligned instructions.
210212 switch(s_align) {
211213 case 0: // Align 256
212214 {
@@ -526,59 +528,56 @@ void DLL_PREFIX *my_memcpy(void *dst, void *src, size_t len)
526528 break;
527529 }
528530
529-#else
530- // Check align(preamble)
531- if(((size_t)s & 0x0f) != 0) { // Src not align 16
532- if(((size_t)s & 0x07) != 0) { // Src not Align 8
533- return memcpy(d, s, len1);
534- } else { // Align 8 (at least src)
535- if(((size_t)d & 0x07) != 0) { // Dst not align 8
536- return memcpy(d, s, len1);
537- }
538-__src_dst_align_8:
539- uint32_t b64[2];
540- register uint32_t *s64 = (uint32_t *)s;
541- register uint32_t *d64 = (uint32_t *)d;
542-
543- // Src and Dst align 8 (at least)
544- len2 = len1 >> 3;
545- i = 0;
546- while(len2 > 0) {
547- for(i = 0; i < 2; i++) b64[i] = s64[i];
548- for(i = 0; i < 2; i++) d64[i] = b64[i];
549- s64 += 2;
550- d64 += 2;
551- --len2;
552- }
553- len1 = len1 & 7;
554- if(len1 != 0) return memcpy((uint8_t *)d64, (uint8_t *)s64, len1);
555- return dst;
556- }
557- } else { // Src align 16
558- if(((size_t)d & 0x0f) != 0) { // Dst not align 16
559- if(((size_t)d & 0x07) != 0) { // Dst not align 8
560- return memcpy(d, s, len1);
561- }
562- // Dst align 8
563- goto __src_dst_align_8;
564- } else { // Src and Dst align 16
565-__src_dst_align_16:
566- len2 = len1 >> 4;
567- uint32_t b128[4];
568- register uint32_t *s128 = (uint32_t *)s;
569- register uint32_t *d128 = (uint32_t *)d;
570- while(len2 > 0) {
571- for(i = 0; i < 4; i++) b128[i] = s128[i];
572- for(i = 0; i < 4; i++) d128[i] = b128[i];
573- s128 += 4;
574- d128 += 4;
575- --len2;
576- }
577- len1 = len1 & 0x0f;
578- if(len1 != 0) return memcpy((uint8_t *)d128, (uint8_t *)s128, len1);
579- return dst;
531+#else
532+// Using SIMD *with* un-aligned instructions.
533+ register uint32_t *s32 = (uint32_t *)src;
534+ register uint32_t *d32 = (uint32_t *)dst;
535+ if(((s_align & 0x07) != 0x0) && ((d_align & 0x07) != 0x0)) { // None align.
536+ return memcpy(dst, src, len);
537+ }
538+ if((s_align == 0x0) || (d_align == 0x0)) { // Align to 256bit
539+ uint32_t b256[8];
540+ len2 = len1 >> 5;
541+ while(len2 > 0) {
542+ for(i = 0; i < 8; i++) b256[i] = s32[i];
543+ for(i = 0; i < 8; i++) d32[i] = b256[i];
544+ s32 += 8;
545+ d32 += 8;
546+ --len2;
580547 }
548+ len1 = len1 & 0x1f;
549+ if(len1 != 0) return memcpy(d32, s32, len1);
550+ return dst;
581551 }
552+ if(((s_align & 0x0f) == 0x0) || ((d_align & 0x0f) == 0x0)) { // Align to 128bit
553+ uint32_t b128[4];
554+ len2 = len1 >> 4;
555+ while(len2 > 0) {
556+ for(i = 0; i < 4; i++) b128[i] = s32[i];
557+ for(i = 0; i < 4; i++) d32[i] = b128[i];
558+ s32 += 4;
559+ d32 += 4;
560+ --len2;
561+ }
562+ len1 = len1 & 0x0f;
563+ if(len1 != 0) return memcpy(d32, s32, len1);
564+ return dst;
565+ }
566+ if(((s_align & 0x07) == 0x0) || ((d_align & 0x07) == 0x0)) { // Align to 64bit
567+ uint32_t b64[2];
568+ len2 = len1 >> 3;
569+ while(len2 > 0) {
570+ for(i = 0; i < 2; i++) b64[i] = s32[i];
571+ for(i = 0; i < 2; i++) d32[i] = b64[i];
572+ s32 += 2;
573+ d32 += 2;
574+ --len2;
575+ }
576+ len1 = len1 & 0x07;
577+ if(len1 != 0) return memcpy(d32, s32, len1);
578+ return dst;
579+ }
580+ //if(len1 != 0) return memcpy(dst, src, len1);
582581 #endif
583582 // Trap
584583 return dst;
Afficher sur ancien navigateur de dépôt.