ZenLib
|
00001 /* Copyright (c) MediaArea.net SARL. All Rights Reserved. 00002 * 00003 * Use of this source code is governed by a zlib-style license that can 00004 * be found in the License.txt file in the root of the source tree. 00005 */ 00006 00007 //--------------------------------------------------------------------------- 00008 #ifndef ZenLib_MemoryUtilsH 00009 #define ZenLib_MemoryUtilsH 00010 //--------------------------------------------------------------------------- 00011 00012 //--------------------------------------------------------------------------- 00013 #include "ZenLib/Conf.h" 00014 #include "ZenLib/Conf.h" 00015 //--------------------------------------------------------------------------- 00016 00017 #include <cstring> 00018 #ifdef ZENLIB_MEMUTILS_SSE2 00019 #include <emmintrin.h> 00020 #endif //ZENLIB_MEMUTILS_SSE2 00021 00022 namespace ZenLib 00023 { 00024 00025 #ifndef ZENLIB_MEMUTILS_SSE2 00026 //----------------------------------------------------------------------- 00027 // Memory alloc/free 00028 #define malloc_Aligned128 (size) \ 00029 malloc (size) 00030 #define free_Aligned128 (ptr) \ 00031 free (ptr) 00032 00033 //----------------------------------------------------------------------- 00034 // Arbitrary size - To Unaligned 00035 #define memcpy_Unaligned_Unaligned memcpy 00036 #define memcpy_Aligned128_Unaligned memcpy 00037 00038 //----------------------------------------------------------------------- 00039 // Arbitrary size - To Aligned 128 bits (16 bytes) 00040 #define memcpy_Unaligned_Aligned128 memcpy 00041 #define memcpy_Aligned128_Aligned128 memcpy 00042 00043 //----------------------------------------------------------------------- 00044 // 128 bits - To Unaligned 00045 #define memcpy_Unaligned_Unaligned_Once128 memcpy 00046 00047 //----------------------------------------------------------------------- 00048 // 128 bits - To Aligned 128 bits (16 bytes) 00049 #define memcpy_Aligned128_Aligned128_Once128 memcpy 00050 00051 //----------------------------------------------------------------------- 00052 // 1024 bits - To Unaligned 00053 #define memcpy_Unaligned_Unaligned_Once1024 memcpy 00054 00055 //----------------------------------------------------------------------- 00056 // 1024 bits - To Aligned 128 bits (16 bytes) 00057 #define memcpy_Aligned128_Aligned128_Once1024 memcpy 00058 00059 //----------------------------------------------------------------------- 00060 // 128-bit multiple - To Aligned 128 bits (16 bytes) 00061 #define memcpy_Unaligned_Aligned128_Size128 memcpy 00062 #define memcpy_Aligned128_Aligned128_Size128 memcpy 00063 00064 #else // ZENLIB_MEMUTILS_SSE2 00065 00066 //----------------------------------------------------------------------- 00067 // Memory alloc/free 00068 00069 inline void* malloc_Aligned128 (size_t size) 00070 { 00071 return _aligned_malloc (size, 16); //aligned_alloc in C11 00072 } 00073 00074 inline void free_Aligned128 ( void *ptr ) 00075 { 00076 _aligned_free (ptr); //free in C11 00077 } 00078 00079 //----------------------------------------------------------------------- 00080 // Arbitrary size - To Unaligned 00081 00082 inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num) 00083 { 00084 size_t extra=num&0xF; 00085 __m128i* destination16=(__m128i*)destination; 00086 const __m128i* source16=(const __m128i*)source; 00087 00088 num>>=4; 00089 while (num--) 00090 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 00091 00092 char* destination1=(char*)destination16; 00093 char* source1=(char*)source16; 00094 while (extra--) 00095 *destination1++=*source1++; 00096 } 00097 00098 inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num) 00099 { 00100 size_t extra=num&0xF; 00101 __m128i* destination16=(__m128i*)destination; 00102 const __m128i* source16=(const __m128i*)source; 00103 00104 num>>=4; 00105 while (num--) 00106 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++)); 00107 00108 char* destination1=(char*)destination16; 00109 char* source1=(char*)source16; 00110 while (extra--) 00111 *destination1++=*source1++; 00112 } 00113 00114 //----------------------------------------------------------------------- 00115 // Arbitrary size - To Aligned 128 bits (16 bytes) 00116 00117 inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num) 00118 { 00119 size_t extra=num&0xF; 00120 __m128i* destination16=(__m128i*)destination; 00121 const __m128i* source16=(const __m128i*)source; 00122 00123 num>>=4; 00124 while (num--) 00125 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++)); 00126 00127 char* destination1=(char*)destination16; 00128 char* source1=(char*)source16; 00129 while (extra--) 00130 *destination1++=*source1++; 00131 } 00132 00133 //----------------------------------------------------------------------- 00134 // 128 bits - To Unaligned 00135 00136 inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source) 00137 { 00138 _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source)); 00139 } 00140 00141 //----------------------------------------------------------------------- 00142 // 128 bits - To Aligned 128 bits (16 bytes) 00143 00144 inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num) 00145 { 00146 size_t extra=num&0xF; 00147 __m128i* destination16=(__m128i*)destination; 00148 const __m128i* source16=(const __m128i*)source; 00149 00150 num>>=4; 00151 while (num--) 00152 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 00153 00154 char* destination1=(char*)destination16; 00155 char* source1=(char*)source16; 00156 while (extra--) 00157 *destination1++=*source1++; 00158 } 00159 00160 inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num) 00161 { 00162 __m128i* destination16=(__m128i*)destination; 00163 const __m128i* source16=(__m128i*)source; 00164 00165 num>>=4; 00166 while (num--) 00167 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 00168 } 00169 00170 //----------------------------------------------------------------------- 00171 // 1024 bits - To Unaligned 00172 00173 inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t) 00174 { 00175 __m128i* destination16=(__m128i*)destination; 00176 const __m128i* source16=(__m128i*)source; 00177 00178 size_t num=8; 00179 while (num--) 00180 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 00181 } 00182 00183 //----------------------------------------------------------------------- 00184 // 1024 bits - To Aligned 128 bits (16 bytes) 00185 00186 inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source) 00187 { 00188 _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source)); 00189 } 00190 00191 //----------------------------------------------------------------------- 00192 // 128-bit multiple - To Unaligned (16 bytes) 00193 00194 inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num) 00195 { 00196 __m128i* destination16=(__m128i*)destination; 00197 const __m128i* source16=(const __m128i*)source; 00198 00199 num>>=4; 00200 while (num--) 00201 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++)); 00202 } 00203 00204 inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num) 00205 { 00206 __m128i* destination16=(__m128i*)destination; 00207 const __m128i* source16=(__m128i*)source; 00208 00209 num>>=4; 00210 while (num--) 00211 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++)); 00212 } 00213 00214 //----------------------------------------------------------------------- 00215 // 128-bit multiple - To Aligned 128 bits (16 bytes) 00216 00217 inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num) 00218 { 00219 __m128i* destination16=(__m128i*)destination; 00220 const __m128i* source16=(__m128i*)source; 00221 00222 num>>=4; 00223 while (num--) 00224 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++)); 00225 } 00226 00227 00228 /* Slower 00229 inline void memcpy_Aligned128_Aligned128_Once1024 (void* destination, const void* source) 00230 { 00231 __m128i* destination16=(__m128i*)destination; 00232 const __m128i* source16=(__m128i*)source; 00233 00234 size_t num=8; 00235 while (num--) 00236 _mm_stream_si128 (destination16++, _mm_load_si128(source16++)); 00237 } 00238 */ 00239 00240 /* 00241 inline void memcpy_Aligned256_Aligned256 (void* destination, const void* source, size_t num) //with AVX, actually slower 00242 { 00243 size_t extra=num&0x1F; 00244 __m256i* destination16=(__m256i*)destination; 00245 const __m256i* source16=(const __m256i*)source; 00246 00247 num>>=5; 00248 while (num--) 00249 _mm256_storeu_si256 (destination16++, _mm256_loadu_si256(source16++)); 00250 00251 char* destination1=(char*)destination16; 00252 char* source1=(char*)source16; 00253 while (extra--) 00254 *destination1++=*source1++; 00255 } 00256 */ 00257 00258 #endif // ZENLIB_MEMUTILS_SSE2 00259 00260 } //NameSpace 00261 00262 #endif