Extract three adjacent ushorts from an arbitrary array location.
(Would do a lot better unrolled, I think)
for (j = 0; j < num_indexes; j += 3) { // Determine address of aligned qword containing indexes[j] qword lower_qword = si_from_ptr(&indexes[j]); // Load qword containing indexes[j] and successor qword first = si_lqd(lower_qword, 0); qword second = si_lqd(lower_qword, 16); // Calculate &indexes[j]&15 - offset of index from 16 byte alignment qword offset = si_andi(lower_qword, 15); // Generate a mask to select the appropriate parts of first and // second form byte select mask from (1< qword one = si_from_uint(1); qword mask = si_fsmb(si_sf(one, si_shl(one, offset))); // Rotate first and second parts to desired locations // This is the key interesting bit, but I'd like to // think this could be improved upon... first = si_shlqby(first, offset); second = si_rotqmby(second, si_ori(offset, 16)); // Store indexes[j],[j+1],[j+2] in vs. qword is = si_selb(first, second, mask); // Expand is to uint positioning is = si_shufb(is, is, SHUFB8(0,A,0,B,0,C,0,0)); qword vs = si_mpya(is, (qword)spu_splats(vertex_size), (qword)spu_splats((unsigned)vertices)); func(vs); }