source: pacpusframework/branches/2.0-beta1/include/extlib/EStereo/processingSSE2.inl@ 89

Last change on this file since 89 was 89, checked in by morasjul, 11 years ago

PACPUS 2.0 Beta deployed in new branch

Major changes:
-Add communication interface between components
-Add examples for communications interface (TestComponents)
-Move to Qt5 support

  • Property svn:executable set to *
File size: 9.1 KB
Line 
1/***************************************************************************
2*
3* Copyright 2000 by David Demirdjian. All rights reserved.
4*
5* Developed by David Demirdjian
6*
7* Permission to use, copy, or modify this software and its documentation
8* for educational and research purposes only and without fee is hereby
9* granted, provided that this copyright notice and the original authors's
10* names appear on all copies and supporting documentation. If individual
11* files are separated from this distribution directory structure, this
12* copyright notice must be included. For any other uses of this software,
13* in original or modified form, including but not limited to distribution
14* in whole or in part, specific prior permission must be obtained from
15* MIT. These programs shall not be used, rewritten, or adapted as the
16* basis of a commercial software or hardware product without first
17* obtaining appropriate licenses from David Demirdjian. The author makes
18* no representations about the suitability of this software for any purpose.
19* It is provided "as is" without express or implied warranty.
20*
21**************************************************************************/
22#include "stereoMatching.h"
23#include "processingmmx.h"
24
25// ************************************************************
26// ************************************************************
27// *** List of functions (SSE2) for image processing
28// ************************************************************
29// ************************************************************
30
31// Src1, Src2 and Dest suppose to point on 16-bytes memory block
32inline int ImgSubandAdd_sse2(const unsigned char *Src1, const unsigned char *Src2,
33 const unsigned char *Src3, unsigned char *Dest, int l)
34{
35
36 if (l < 8) return 0; // image size must be at least 8 bytes
37
38 __asm
39 {
40 mov eax, Src1
41 mov ebx, Src2
42 mov edx, Src3
43 mov edi, Dest
44 mov ecx, l
45 shr ecx, 4
46
47align 16
48inner_loop:
49 movdqa xmm1,[eax] // xmm1=src1
50 movdqa xmm2,[ebx] // mm2=src2
51
52 movdqa xmm4,xmm1 // mm4=mm1
53
54 psubusb xmm4,xmm2 // mm4 = src1 - src2
55
56 movdqu xmm3,[edx] // mm3=src3
57 psubusb xmm2,xmm1 // mm2 = src2 - src1
58
59 movdqa xmm5,xmm1 // mm5=src1
60 por xmm2,xmm4 // mm2=|src1-src2|
61
62 psubusb xmm5,xmm3 // mm4=src1-src3
63
64 psubusb xmm3,xmm1 // mm3=src3-src1
65
66 por xmm3,xmm5 // mm3=|src1-src3|
67
68 paddusb xmm2,xmm3 // mm2 = |src1-src2|+|src1-src3|
69
70 movdqa [edi], xmm2
71 add eax,16
72 add ebx,16
73 add edx,16
74 add edi,16
75 dec ecx
76 jnz inner_loop
77 emms
78 }
79
80 return 1;
81}
82
83
84
85
86
87
88
89#define macro_add_sse2 __asm \
90{ \
91 __asm paddusw xmm3, [edx] \
92 __asm paddusw xmm2, [edx+16] \
93 __asm add edx, edi \
94}
95
96
97inline void avg_Col_5_sse2(ushort* im, uchar* im_out, int dataSize, int width)
98{
99 __asm {
100
101 mov edi, width
102 shl edi, 1 // edi = 2*width
103
104 mov eax, dataSize
105 mov ecx, im_out
106
107 mov ebx, im
108 sub ebx, edi
109 sub ebx, edi // ebx = ebx-4*width
110
111 test eax, eax // Is there anything to do?"
112 jz end_sum_loop // Jump out if necessary
113
114 row_sum_loop:
115
116 test eax, eax // Is there anything to do?
117 jz end_sum_loop // Jump out if necessary
118
119 mov edx, ebx
120 add ebx, 32
121
122 // 1
123 movdqa xmm3, [edx] // xmm3 = 8 words of im
124 movdqa xmm2, [edx+16] // xmm3 = 8 words of im
125 add edx, edi
126
127 macro_add_sse2
128 macro_add_sse2
129 macro_add_sse2
130 macro_add_sse2
131
132 // divide results by ...
133 psrlw xmm3, 3
134 psrlw xmm2, 3
135
136 // convert [xmm2 xmm3] as 8 words
137 packuswb xmm3,xmm2
138 movdqa [ecx], xmm3
139
140 sub eax, 16 // Update the number of points left
141 add ecx, 16 // Update output pointer
142
143 jmp row_sum_loop // Loop
144
145 //Cleanup
146 end_sum_loop:
147 emms
148 }
149}
150
151inline void avg_Col_7_sse2(ushort* im, uchar* im_out, int dataSize, int width)
152{
153 __asm {
154
155 mov edi, width
156 shl edi, 1 // edi = 2*width
157
158 mov eax, dataSize
159 mov ecx, im_out
160
161 mov ebx, im
162 sub ebx, edi
163 sub ebx, edi
164 sub ebx, edi // ebx = ebx-4*width
165
166 test eax, eax // Is there anything to do?"
167 jz end_sum_loop // Jump out if necessary
168
169 row_sum_loop:
170
171 test eax, eax // Is there anything to do?
172 jz end_sum_loop // Jump out if necessary
173
174 mov edx, ebx
175 add ebx, 32
176
177 // 1
178 movdqa xmm3, [edx] // xmm3 = 8 words of im
179 movdqa xmm2, [edx+16] // xmm3 = 8 words of im
180 add edx, edi
181
182 macro_add_sse2
183 macro_add_sse2
184 macro_add_sse2
185 macro_add_sse2
186 macro_add_sse2
187 macro_add_sse2
188
189 // divide results by ...
190 psrlw xmm3, 3
191 psrlw xmm2, 3
192
193 // convert [xmm2 xmm3] as 8 words
194 packuswb xmm3,xmm2
195 movdqa [ecx], xmm3
196
197 sub eax, 16 // Update the number of points left
198 add ecx, 16 // Update output pointer
199
200 jmp row_sum_loop // Loop
201
202 //Cleanup
203 end_sum_loop:
204 emms
205 }
206}
207
208inline void avg_Col_9_sse2(ushort* im, uchar* im_out, int dataSize, int width)
209{
210 __asm {
211
212 mov edi, width
213 shl edi, 1 // edi = 2*width
214
215 mov eax, dataSize
216 mov ecx, im_out
217
218 mov ebx, im
219 sub ebx, edi
220 sub ebx, edi
221 sub ebx, edi
222 sub ebx, edi // ebx = ebx-4*width
223
224 test eax, eax // Is there anything to do?"
225 jz end_sum_loop // Jump out if necessary
226
227 row_sum_loop:
228
229 test eax, eax // Is there anything to do?
230 jz end_sum_loop // Jump out if necessary
231
232 mov edx, ebx
233 add ebx, 32
234
235 // 1
236 movdqa xmm3, [edx] // xmm3 = 8 words of im
237 movdqa xmm2, [edx+16] // xmm3 = 8 words of im
238 add edx, edi
239
240 macro_add_sse2
241 macro_add_sse2
242 macro_add_sse2
243 macro_add_sse2
244 macro_add_sse2
245 macro_add_sse2
246 macro_add_sse2
247 macro_add_sse2
248
249 // divide results by ...
250 psrlw xmm3, 3
251 psrlw xmm2, 3
252
253 // convert [xmm2 xmm3] as 8 words
254 packuswb xmm3,xmm2
255 movdqa [ecx], xmm3
256
257 sub eax, 16 // Update the number of points left
258 add ecx, 16 // Update output pointer
259
260 jmp row_sum_loop // Loop
261
262 //Cleanup
263 end_sum_loop:
264 emms
265 }
266}
267
268inline void avg_Col_11_sse2(ushort* im, uchar* im_out, int dataSize, int width)
269{
270 __asm {
271
272 mov edi, width
273 shl edi, 1 // edi = 2*width
274
275 mov eax, dataSize
276 mov ecx, im_out
277
278 mov ebx, im
279 sub ebx, edi
280 sub ebx, edi
281 sub ebx, edi
282 sub ebx, edi
283 sub ebx, edi // ebx = ebx-4*width
284
285 test eax, eax // Is there anything to do?"
286 jz end_sum_loop // Jump out if necessary
287
288 row_sum_loop:
289
290 test eax, eax // Is there anything to do?
291 jz end_sum_loop // Jump out if necessary
292
293 mov edx, ebx
294 add ebx, 32
295
296 // 1
297 movdqa xmm3, [edx] // xmm3 = 8 words of im
298 movdqa xmm2, [edx+16] // xmm3 = 8 words of im
299 add edx, edi
300
301 macro_add_sse2
302 macro_add_sse2
303 macro_add_sse2
304 macro_add_sse2
305 macro_add_sse2
306 macro_add_sse2
307 macro_add_sse2
308 macro_add_sse2
309 macro_add_sse2
310 macro_add_sse2
311
312 // divide results by ...
313 psrlw xmm3, 3
314 psrlw xmm2, 3
315
316 // convert [xmm2 xmm3] as 8 words
317 packuswb xmm3,xmm2
318 movdqa [ecx], xmm3
319
320 sub eax, 16 // Update the number of points left
321 add ecx, 16 // Update output pointer
322
323 jmp row_sum_loop // Loop
324
325 //Cleanup
326 end_sum_loop:
327 emms
328 }
329}
330
331inline void avg_Col_13_sse2(ushort* im, uchar* im_out, int dataSize, int width)
332{
333 __asm {
334
335 mov edi, width
336 shl edi, 1 // edi = 2*width
337
338 mov eax, dataSize
339 mov ecx, im_out
340
341 mov ebx, im
342 sub ebx, edi
343 sub ebx, edi
344 sub ebx, edi
345 sub ebx, edi
346 sub ebx, edi
347 sub ebx, edi // ebx = ebx-4*width
348
349 test eax, eax // Is there anything to do?"
350 jz end_sum_loop // Jump out if necessary
351
352 row_sum_loop:
353
354 test eax, eax // Is there anything to do?
355 jz end_sum_loop // Jump out if necessary
356
357 mov edx, ebx
358 add ebx, 32
359
360 // 1
361 movdqa xmm3, [edx] // xmm3 = 8 words of im
362 movdqa xmm2, [edx+16] // xmm3 = 8 words of im
363 add edx, edi
364
365 macro_add_sse2
366 macro_add_sse2
367 macro_add_sse2
368 macro_add_sse2
369 macro_add_sse2
370 macro_add_sse2
371 macro_add_sse2
372 macro_add_sse2
373 macro_add_sse2
374 macro_add_sse2
375 macro_add_sse2
376 macro_add_sse2
377
378 // divide results by ...
379 psrlw xmm3, 3
380 psrlw xmm2, 3
381
382 // convert [xmm2 xmm3] as 8 words
383 packuswb xmm3,xmm2
384 movdqa [ecx], xmm3
385
386 sub eax, 16 // Update the number of points left
387 add ecx, 16 // Update output pointer
388
389 jmp row_sum_loop // Loop
390
391 //Cleanup
392 end_sum_loop:
393 emms
394 }
395}
396
397// apply vertical mask 1/16*[1 1 1 ... 1]^T to 'im'
398// result in 'im_out'
399inline void avg_Col_sse2(ushort* im, uchar* im_out, int dataSize, int width, int sizeMask)
400{
401 switch (sizeMask)
402 {
403 case 5: avg_Col_5_sse2(im,im_out,dataSize,width);
404 break;
405 case 7: avg_Col_7_sse2(im,im_out,dataSize,width);
406 break;
407 case 9: avg_Col_9_sse2(im,im_out,dataSize,width);
408 break;
409 case 11: avg_Col_11_sse2(im,im_out,dataSize,width);
410 break;
411 case 13: avg_Col_13_sse2(im,im_out,dataSize,width);
412 break;
413 case 15: avg_Col_15(im,im_out,dataSize,width);
414 break;
415 case 17: avg_Col_17(im,im_out,dataSize,width);
416 break;
417
418 default: if (sizeMask<5) avg_Col_5_sse2(im,im_out,dataSize,width);
419 else if (sizeMask>17) avg_Col_17(im,im_out,dataSize,width);
420 }
421}
Note: See TracBrowser for help on using the repository browser.