Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

processingSSE2.inl@ 89

Last change on this file since 89 was 89, checked in by morasjul, 11 years ago

PACPUS 2.0 Beta deployed in new branch

Major changes:
-Add communication interface between components
-Add examples for communications interface (TestComponents)
-Move to Qt5 support

Property svn:executable set to *

File size: 9.1 KB

Line
1	/***************************************************************************
2	*
3	* Copyright 2000 by David Demirdjian. All rights reserved.
4	*
5	* Developed by David Demirdjian
6	*
7	* Permission to use, copy, or modify this software and its documentation
8	* for educational and research purposes only and without fee is hereby
9	* granted, provided that this copyright notice and the original authors's
10	* names appear on all copies and supporting documentation. If individual
11	* files are separated from this distribution directory structure, this
12	* copyright notice must be included. For any other uses of this software,
13	* in original or modified form, including but not limited to distribution
14	* in whole or in part, specific prior permission must be obtained from
15	* MIT. These programs shall not be used, rewritten, or adapted as the
16	* basis of a commercial software or hardware product without first
17	* obtaining appropriate licenses from David Demirdjian. The author makes
18	* no representations about the suitability of this software for any purpose.
19	* It is provided "as is" without express or implied warranty.
20	*
21	**************************************************************************/
22	#include "stereoMatching.h"
23	#include "processingmmx.h"
24
25	// ************************************************************
26	// ************************************************************
27	// *** List of functions (SSE2) for image processing
28	// ************************************************************
29	// ************************************************************
30
31	// Src1, Src2 and Dest suppose to point on 16-bytes memory block
32	inline int ImgSubandAdd_sse2(const unsigned char Src1, const unsigned char Src2,
33	const unsigned char Src3, unsigned char Dest, int l)
34	{
35
36	if (l < 8) return 0; // image size must be at least 8 bytes
37
38	__asm
39	{
40	mov eax, Src1
41	mov ebx, Src2
42	mov edx, Src3
43	mov edi, Dest
44	mov ecx, l
45	shr ecx, 4
46
47	align 16
48	inner_loop:
49	movdqa xmm1,[eax] // xmm1=src1
50	movdqa xmm2,[ebx] // mm2=src2
51
52	movdqa xmm4,xmm1 // mm4=mm1
53
54	psubusb xmm4,xmm2 // mm4 = src1 - src2
55
56	movdqu xmm3,[edx] // mm3=src3
57	psubusb xmm2,xmm1 // mm2 = src2 - src1
58
59	movdqa xmm5,xmm1 // mm5=src1
60	por xmm2,xmm4 // mm2=\|src1-src2\|
61
62	psubusb xmm5,xmm3 // mm4=src1-src3
63
64	psubusb xmm3,xmm1 // mm3=src3-src1
65
66	por xmm3,xmm5 // mm3=\|src1-src3\|
67
68	paddusb xmm2,xmm3 // mm2 = \|src1-src2\|+\|src1-src3\|
69
70	movdqa [edi], xmm2
71	add eax,16
72	add ebx,16
73	add edx,16
74	add edi,16
75	dec ecx
76	jnz inner_loop
77	emms
78	}
79
80	return 1;
81	}
82
83
84
85
86
87
88
89	#define macro_add_sse2 __asm \
90	{ \
91	__asm paddusw xmm3, [edx] \
92	__asm paddusw xmm2, [edx+16] \
93	__asm add edx, edi \
94	}
95
96
97	inline void avg_Col_5_sse2(ushort* im, uchar* im_out, int dataSize, int width)
98	{
99	__asm {
100
101	mov edi, width
102	shl edi, 1 // edi = 2*width
103
104	mov eax, dataSize
105	mov ecx, im_out
106
107	mov ebx, im
108	sub ebx, edi
109	sub ebx, edi // ebx = ebx-4*width
110
111	test eax, eax // Is there anything to do?"
112	jz end_sum_loop // Jump out if necessary
113
114	row_sum_loop:
115
116	test eax, eax // Is there anything to do?
117	jz end_sum_loop // Jump out if necessary
118
119	mov edx, ebx
120	add ebx, 32
121
122	// 1
123	movdqa xmm3, [edx] // xmm3 = 8 words of im
124	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
125	add edx, edi
126
127	macro_add_sse2
128	macro_add_sse2
129	macro_add_sse2
130	macro_add_sse2
131
132	// divide results by ...
133	psrlw xmm3, 3
134	psrlw xmm2, 3
135
136	// convert [xmm2 xmm3] as 8 words
137	packuswb xmm3,xmm2
138	movdqa [ecx], xmm3
139
140	sub eax, 16 // Update the number of points left
141	add ecx, 16 // Update output pointer
142
143	jmp row_sum_loop // Loop
144
145	//Cleanup
146	end_sum_loop:
147	emms
148	}
149	}
150
151	inline void avg_Col_7_sse2(ushort* im, uchar* im_out, int dataSize, int width)
152	{
153	__asm {
154
155	mov edi, width
156	shl edi, 1 // edi = 2*width
157
158	mov eax, dataSize
159	mov ecx, im_out
160
161	mov ebx, im
162	sub ebx, edi
163	sub ebx, edi
164	sub ebx, edi // ebx = ebx-4*width
165
166	test eax, eax // Is there anything to do?"
167	jz end_sum_loop // Jump out if necessary
168
169	row_sum_loop:
170
171	test eax, eax // Is there anything to do?
172	jz end_sum_loop // Jump out if necessary
173
174	mov edx, ebx
175	add ebx, 32
176
177	// 1
178	movdqa xmm3, [edx] // xmm3 = 8 words of im
179	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
180	add edx, edi
181
182	macro_add_sse2
183	macro_add_sse2
184	macro_add_sse2
185	macro_add_sse2
186	macro_add_sse2
187	macro_add_sse2
188
189	// divide results by ...
190	psrlw xmm3, 3
191	psrlw xmm2, 3
192
193	// convert [xmm2 xmm3] as 8 words
194	packuswb xmm3,xmm2
195	movdqa [ecx], xmm3
196
197	sub eax, 16 // Update the number of points left
198	add ecx, 16 // Update output pointer
199
200	jmp row_sum_loop // Loop
201
202	//Cleanup
203	end_sum_loop:
204	emms
205	}
206	}
207
208	inline void avg_Col_9_sse2(ushort* im, uchar* im_out, int dataSize, int width)
209	{
210	__asm {
211
212	mov edi, width
213	shl edi, 1 // edi = 2*width
214
215	mov eax, dataSize
216	mov ecx, im_out
217
218	mov ebx, im
219	sub ebx, edi
220	sub ebx, edi
221	sub ebx, edi
222	sub ebx, edi // ebx = ebx-4*width
223
224	test eax, eax // Is there anything to do?"
225	jz end_sum_loop // Jump out if necessary
226
227	row_sum_loop:
228
229	test eax, eax // Is there anything to do?
230	jz end_sum_loop // Jump out if necessary
231
232	mov edx, ebx
233	add ebx, 32
234
235	// 1
236	movdqa xmm3, [edx] // xmm3 = 8 words of im
237	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
238	add edx, edi
239
240	macro_add_sse2
241	macro_add_sse2
242	macro_add_sse2
243	macro_add_sse2
244	macro_add_sse2
245	macro_add_sse2
246	macro_add_sse2
247	macro_add_sse2
248
249	// divide results by ...
250	psrlw xmm3, 3
251	psrlw xmm2, 3
252
253	// convert [xmm2 xmm3] as 8 words
254	packuswb xmm3,xmm2
255	movdqa [ecx], xmm3
256
257	sub eax, 16 // Update the number of points left
258	add ecx, 16 // Update output pointer
259
260	jmp row_sum_loop // Loop
261
262	//Cleanup
263	end_sum_loop:
264	emms
265	}
266	}
267
268	inline void avg_Col_11_sse2(ushort* im, uchar* im_out, int dataSize, int width)
269	{
270	__asm {
271
272	mov edi, width
273	shl edi, 1 // edi = 2*width
274
275	mov eax, dataSize
276	mov ecx, im_out
277
278	mov ebx, im
279	sub ebx, edi
280	sub ebx, edi
281	sub ebx, edi
282	sub ebx, edi
283	sub ebx, edi // ebx = ebx-4*width
284
285	test eax, eax // Is there anything to do?"
286	jz end_sum_loop // Jump out if necessary
287
288	row_sum_loop:
289
290	test eax, eax // Is there anything to do?
291	jz end_sum_loop // Jump out if necessary
292
293	mov edx, ebx
294	add ebx, 32
295
296	// 1
297	movdqa xmm3, [edx] // xmm3 = 8 words of im
298	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
299	add edx, edi
300
301	macro_add_sse2
302	macro_add_sse2
303	macro_add_sse2
304	macro_add_sse2
305	macro_add_sse2
306	macro_add_sse2
307	macro_add_sse2
308	macro_add_sse2
309	macro_add_sse2
310	macro_add_sse2
311
312	// divide results by ...
313	psrlw xmm3, 3
314	psrlw xmm2, 3
315
316	// convert [xmm2 xmm3] as 8 words
317	packuswb xmm3,xmm2
318	movdqa [ecx], xmm3
319
320	sub eax, 16 // Update the number of points left
321	add ecx, 16 // Update output pointer
322
323	jmp row_sum_loop // Loop
324
325	//Cleanup
326	end_sum_loop:
327	emms
328	}
329	}
330
331	inline void avg_Col_13_sse2(ushort* im, uchar* im_out, int dataSize, int width)
332	{
333	__asm {
334
335	mov edi, width
336	shl edi, 1 // edi = 2*width
337
338	mov eax, dataSize
339	mov ecx, im_out
340
341	mov ebx, im
342	sub ebx, edi
343	sub ebx, edi
344	sub ebx, edi
345	sub ebx, edi
346	sub ebx, edi
347	sub ebx, edi // ebx = ebx-4*width
348
349	test eax, eax // Is there anything to do?"
350	jz end_sum_loop // Jump out if necessary
351
352	row_sum_loop:
353
354	test eax, eax // Is there anything to do?
355	jz end_sum_loop // Jump out if necessary
356
357	mov edx, ebx
358	add ebx, 32
359
360	// 1
361	movdqa xmm3, [edx] // xmm3 = 8 words of im
362	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
363	add edx, edi
364
365	macro_add_sse2
366	macro_add_sse2
367	macro_add_sse2
368	macro_add_sse2
369	macro_add_sse2
370	macro_add_sse2
371	macro_add_sse2
372	macro_add_sse2
373	macro_add_sse2
374	macro_add_sse2
375	macro_add_sse2
376	macro_add_sse2
377
378	// divide results by ...
379	psrlw xmm3, 3
380	psrlw xmm2, 3
381
382	// convert [xmm2 xmm3] as 8 words
383	packuswb xmm3,xmm2
384	movdqa [ecx], xmm3
385
386	sub eax, 16 // Update the number of points left
387	add ecx, 16 // Update output pointer
388
389	jmp row_sum_loop // Loop
390
391	//Cleanup
392	end_sum_loop:
393	emms
394	}
395	}
396
397	// apply vertical mask 1/16*[1 1 1 ... 1]^T to 'im'
398	// result in 'im_out'
399	inline void avg_Col_sse2(ushort* im, uchar* im_out, int dataSize, int width, int sizeMask)
400	{
401	switch (sizeMask)
402	{
403	case 5: avg_Col_5_sse2(im,im_out,dataSize,width);
404	break;
405	case 7: avg_Col_7_sse2(im,im_out,dataSize,width);
406	break;
407	case 9: avg_Col_9_sse2(im,im_out,dataSize,width);
408	break;
409	case 11: avg_Col_11_sse2(im,im_out,dataSize,width);
410	break;
411	case 13: avg_Col_13_sse2(im,im_out,dataSize,width);
412	break;
413	case 15: avg_Col_15(im,im_out,dataSize,width);
414	break;
415	case 17: avg_Col_17(im,im_out,dataSize,width);
416	break;
417
418	default: if (sizeMask<5) avg_Col_5_sse2(im,im_out,dataSize,width);
419	else if (sizeMask>17) avg_Col_17(im,im_out,dataSize,width);
420	}
421	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format