Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

processingSSE2.inl@ 89

Last change on this file since 89 was 89, checked in by morasjul, 11 years ago

PACPUS 2.0 Beta deployed in new branch

Major changes:
-Add communication interface between components
-Add examples for communications interface (TestComponents)
-Move to Qt5 support

Property svn:executable set to *

File size: 9.1 KB

Rev	Line
[89]	1	/***************************************************************************
	2	*
	3	* Copyright 2000 by David Demirdjian. All rights reserved.
	4	*
	5	* Developed by David Demirdjian
	6	*
	7	* Permission to use, copy, or modify this software and its documentation
	8	* for educational and research purposes only and without fee is hereby
	9	* granted, provided that this copyright notice and the original authors's
	10	* names appear on all copies and supporting documentation. If individual
	11	* files are separated from this distribution directory structure, this
	12	* copyright notice must be included. For any other uses of this software,
	13	* in original or modified form, including but not limited to distribution
	14	* in whole or in part, specific prior permission must be obtained from
	15	* MIT. These programs shall not be used, rewritten, or adapted as the
	16	* basis of a commercial software or hardware product without first
	17	* obtaining appropriate licenses from David Demirdjian. The author makes
	18	* no representations about the suitability of this software for any purpose.
	19	* It is provided "as is" without express or implied warranty.
	20	*
	21	**************************************************************************/
	22	#include "stereoMatching.h"
	23	#include "processingmmx.h"
	24
	25	// ************************************************************
	26	// ************************************************************
	27	// *** List of functions (SSE2) for image processing
	28	// ************************************************************
	29	// ************************************************************
	30
	31	// Src1, Src2 and Dest suppose to point on 16-bytes memory block
	32	inline int ImgSubandAdd_sse2(const unsigned char Src1, const unsigned char Src2,
	33	const unsigned char Src3, unsigned char Dest, int l)
	34	{
	35
	36	if (l < 8) return 0; // image size must be at least 8 bytes
	37
	38	__asm
	39	{
	40	mov eax, Src1
	41	mov ebx, Src2
	42	mov edx, Src3
	43	mov edi, Dest
	44	mov ecx, l
	45	shr ecx, 4
	46
	47	align 16
	48	inner_loop:
	49	movdqa xmm1,[eax] // xmm1=src1
	50	movdqa xmm2,[ebx] // mm2=src2
	51
	52	movdqa xmm4,xmm1 // mm4=mm1
	53
	54	psubusb xmm4,xmm2 // mm4 = src1 - src2
	55
	56	movdqu xmm3,[edx] // mm3=src3
	57	psubusb xmm2,xmm1 // mm2 = src2 - src1
	58
	59	movdqa xmm5,xmm1 // mm5=src1
	60	por xmm2,xmm4 // mm2=\|src1-src2\|
	61
	62	psubusb xmm5,xmm3 // mm4=src1-src3
	63
	64	psubusb xmm3,xmm1 // mm3=src3-src1
	65
	66	por xmm3,xmm5 // mm3=\|src1-src3\|
	67
	68	paddusb xmm2,xmm3 // mm2 = \|src1-src2\|+\|src1-src3\|
	69
	70	movdqa [edi], xmm2
	71	add eax,16
	72	add ebx,16
	73	add edx,16
	74	add edi,16
	75	dec ecx
	76	jnz inner_loop
	77	emms
	78	}
	79
	80	return 1;
	81	}
	82
	83
	84
	85
	86
	87
	88
	89	#define macro_add_sse2 __asm \
	90	{ \
	91	__asm paddusw xmm3, [edx] \
	92	__asm paddusw xmm2, [edx+16] \
	93	__asm add edx, edi \
	94	}
	95
	96
	97	inline void avg_Col_5_sse2(ushort* im, uchar* im_out, int dataSize, int width)
	98	{
	99	__asm {
	100
	101	mov edi, width
	102	shl edi, 1 // edi = 2*width
	103
	104	mov eax, dataSize
	105	mov ecx, im_out
	106
	107	mov ebx, im
	108	sub ebx, edi
	109	sub ebx, edi // ebx = ebx-4*width
	110
	111	test eax, eax // Is there anything to do?"
	112	jz end_sum_loop // Jump out if necessary
	113
	114	row_sum_loop:
	115
	116	test eax, eax // Is there anything to do?
	117	jz end_sum_loop // Jump out if necessary
	118
	119	mov edx, ebx
	120	add ebx, 32
	121
	122	// 1
	123	movdqa xmm3, [edx] // xmm3 = 8 words of im
	124	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
	125	add edx, edi
	126
	127	macro_add_sse2
	128	macro_add_sse2
	129	macro_add_sse2
	130	macro_add_sse2
	131
	132	// divide results by ...
	133	psrlw xmm3, 3
	134	psrlw xmm2, 3
	135
	136	// convert [xmm2 xmm3] as 8 words
	137	packuswb xmm3,xmm2
	138	movdqa [ecx], xmm3
	139
	140	sub eax, 16 // Update the number of points left
	141	add ecx, 16 // Update output pointer
	142
	143	jmp row_sum_loop // Loop
	144
	145	//Cleanup
	146	end_sum_loop:
	147	emms
	148	}
	149	}
	150
	151	inline void avg_Col_7_sse2(ushort* im, uchar* im_out, int dataSize, int width)
	152	{
	153	__asm {
	154
	155	mov edi, width
	156	shl edi, 1 // edi = 2*width
	157
	158	mov eax, dataSize
	159	mov ecx, im_out
	160
	161	mov ebx, im
	162	sub ebx, edi
	163	sub ebx, edi
	164	sub ebx, edi // ebx = ebx-4*width
	165
	166	test eax, eax // Is there anything to do?"
	167	jz end_sum_loop // Jump out if necessary
	168
	169	row_sum_loop:
	170
	171	test eax, eax // Is there anything to do?
	172	jz end_sum_loop // Jump out if necessary
	173
	174	mov edx, ebx
	175	add ebx, 32
	176
	177	// 1
	178	movdqa xmm3, [edx] // xmm3 = 8 words of im
	179	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
	180	add edx, edi
	181
	182	macro_add_sse2
	183	macro_add_sse2
	184	macro_add_sse2
	185	macro_add_sse2
	186	macro_add_sse2
	187	macro_add_sse2
	188
	189	// divide results by ...
	190	psrlw xmm3, 3
	191	psrlw xmm2, 3
	192
	193	// convert [xmm2 xmm3] as 8 words
	194	packuswb xmm3,xmm2
	195	movdqa [ecx], xmm3
	196
	197	sub eax, 16 // Update the number of points left
	198	add ecx, 16 // Update output pointer
	199
	200	jmp row_sum_loop // Loop
	201
	202	//Cleanup
	203	end_sum_loop:
	204	emms
	205	}
	206	}
	207
	208	inline void avg_Col_9_sse2(ushort* im, uchar* im_out, int dataSize, int width)
	209	{
	210	__asm {
	211
	212	mov edi, width
	213	shl edi, 1 // edi = 2*width
	214
	215	mov eax, dataSize
	216	mov ecx, im_out
	217
	218	mov ebx, im
	219	sub ebx, edi
	220	sub ebx, edi
	221	sub ebx, edi
	222	sub ebx, edi // ebx = ebx-4*width
	223
	224	test eax, eax // Is there anything to do?"
	225	jz end_sum_loop // Jump out if necessary
	226
	227	row_sum_loop:
	228
	229	test eax, eax // Is there anything to do?
	230	jz end_sum_loop // Jump out if necessary
	231
	232	mov edx, ebx
	233	add ebx, 32
	234
	235	// 1
	236	movdqa xmm3, [edx] // xmm3 = 8 words of im
	237	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
	238	add edx, edi
	239
	240	macro_add_sse2
	241	macro_add_sse2
	242	macro_add_sse2
	243	macro_add_sse2
	244	macro_add_sse2
	245	macro_add_sse2
	246	macro_add_sse2
	247	macro_add_sse2
	248
	249	// divide results by ...
	250	psrlw xmm3, 3
	251	psrlw xmm2, 3
	252
	253	// convert [xmm2 xmm3] as 8 words
	254	packuswb xmm3,xmm2
	255	movdqa [ecx], xmm3
	256
	257	sub eax, 16 // Update the number of points left
	258	add ecx, 16 // Update output pointer
	259
	260	jmp row_sum_loop // Loop
	261
	262	//Cleanup
	263	end_sum_loop:
	264	emms
	265	}
	266	}
	267
	268	inline void avg_Col_11_sse2(ushort* im, uchar* im_out, int dataSize, int width)
	269	{
	270	__asm {
	271
	272	mov edi, width
	273	shl edi, 1 // edi = 2*width
	274
	275	mov eax, dataSize
	276	mov ecx, im_out
	277
	278	mov ebx, im
	279	sub ebx, edi
	280	sub ebx, edi
	281	sub ebx, edi
	282	sub ebx, edi
	283	sub ebx, edi // ebx = ebx-4*width
	284
	285	test eax, eax // Is there anything to do?"
	286	jz end_sum_loop // Jump out if necessary
	287
	288	row_sum_loop:
	289
	290	test eax, eax // Is there anything to do?
	291	jz end_sum_loop // Jump out if necessary
	292
	293	mov edx, ebx
	294	add ebx, 32
	295
	296	// 1
	297	movdqa xmm3, [edx] // xmm3 = 8 words of im
	298	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
	299	add edx, edi
	300
	301	macro_add_sse2
	302	macro_add_sse2
	303	macro_add_sse2
	304	macro_add_sse2
	305	macro_add_sse2
	306	macro_add_sse2
	307	macro_add_sse2
	308	macro_add_sse2
	309	macro_add_sse2
	310	macro_add_sse2
	311
	312	// divide results by ...
	313	psrlw xmm3, 3
	314	psrlw xmm2, 3
	315
	316	// convert [xmm2 xmm3] as 8 words
	317	packuswb xmm3,xmm2
	318	movdqa [ecx], xmm3
	319
	320	sub eax, 16 // Update the number of points left
	321	add ecx, 16 // Update output pointer
	322
	323	jmp row_sum_loop // Loop
	324
	325	//Cleanup
	326	end_sum_loop:
	327	emms
	328	}
	329	}
	330
	331	inline void avg_Col_13_sse2(ushort* im, uchar* im_out, int dataSize, int width)
	332	{
	333	__asm {
	334
	335	mov edi, width
	336	shl edi, 1 // edi = 2*width
	337
	338	mov eax, dataSize
	339	mov ecx, im_out
	340
	341	mov ebx, im
	342	sub ebx, edi
	343	sub ebx, edi
	344	sub ebx, edi
	345	sub ebx, edi
	346	sub ebx, edi
	347	sub ebx, edi // ebx = ebx-4*width
	348
	349	test eax, eax // Is there anything to do?"
	350	jz end_sum_loop // Jump out if necessary
	351
	352	row_sum_loop:
	353
	354	test eax, eax // Is there anything to do?
	355	jz end_sum_loop // Jump out if necessary
	356
	357	mov edx, ebx
	358	add ebx, 32
	359
	360	// 1
	361	movdqa xmm3, [edx] // xmm3 = 8 words of im
	362	movdqa xmm2, [edx+16] // xmm3 = 8 words of im
	363	add edx, edi
	364
	365	macro_add_sse2
	366	macro_add_sse2
	367	macro_add_sse2
	368	macro_add_sse2
	369	macro_add_sse2
	370	macro_add_sse2
	371	macro_add_sse2
	372	macro_add_sse2
	373	macro_add_sse2
	374	macro_add_sse2
	375	macro_add_sse2
	376	macro_add_sse2
	377
	378	// divide results by ...
	379	psrlw xmm3, 3
	380	psrlw xmm2, 3
	381
	382	// convert [xmm2 xmm3] as 8 words
	383	packuswb xmm3,xmm2
	384	movdqa [ecx], xmm3
	385
	386	sub eax, 16 // Update the number of points left
	387	add ecx, 16 // Update output pointer
	388
	389	jmp row_sum_loop // Loop
	390
	391	//Cleanup
	392	end_sum_loop:
	393	emms
	394	}
	395	}
	396
	397	// apply vertical mask 1/16*[1 1 1 ... 1]^T to 'im'
	398	// result in 'im_out'
	399	inline void avg_Col_sse2(ushort* im, uchar* im_out, int dataSize, int width, int sizeMask)
	400	{
	401	switch (sizeMask)
	402	{
	403	case 5: avg_Col_5_sse2(im,im_out,dataSize,width);
	404	break;
	405	case 7: avg_Col_7_sse2(im,im_out,dataSize,width);
	406	break;
	407	case 9: avg_Col_9_sse2(im,im_out,dataSize,width);
	408	break;
	409	case 11: avg_Col_11_sse2(im,im_out,dataSize,width);
	410	break;
	411	case 13: avg_Col_13_sse2(im,im_out,dataSize,width);
	412	break;
	413	case 15: avg_Col_15(im,im_out,dataSize,width);
	414	break;
	415	case 17: avg_Col_17(im,im_out,dataSize,width);
	416	break;
	417
	418	default: if (sizeMask<5) avg_Col_5_sse2(im,im_out,dataSize,width);
	419	else if (sizeMask>17) avg_Col_17(im,im_out,dataSize,width);
	420	}
	421	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format