--- a/sdl/sdl.cpp +++ b/sdl/sdl.cpp @@ -2216,6 +2217,134 @@ } } +#define THREADS_STRETCH_CNT (3) +struct { + struct filter_data *in; + struct filter_data *out; + struct { + unsigned int src_y_start; + unsigned int src_y_end; + unsigned int dst_off; + SDL_sem *sem_in; + SDL_sem *sem_out; + } thread[THREADS_STRETCH_CNT]; + volatile int exit; +} threads_stretch_data; + + +static int filter_stretch_thread(void *num) { + int thread_num = *((int*) &num); + while (!threads_stretch_data.exit) { + SDL_SemWait(threads_stretch_data.thread[thread_num].sem_in); + + if (threads_stretch_data.exit) { + return 0; + } + + struct filter_data *in = threads_stretch_data.in; + struct filter_data *out = threads_stretch_data.out; + struct filter_stretch_data *data = + (struct filter_stretch_data *)out->data; + uint8_t *h_table = data->h_table; + uint8_t *v_table = data->v_table; + uint32_t *dst = (uint32_t*) (out->buf.u8 + threads_stretch_data.thread[thread_num].dst_off); + unsigned int dst_pitch = out->pitch; + unsigned int src_pitch = in->pitch; + unsigned int src_y_start = threads_stretch_data.thread[thread_num].src_y_start; + unsigned int src_y_end = threads_stretch_data.thread[thread_num].src_y_end; + unsigned int src_w = in->width; + unsigned int dst_w = out->width; + uint32_t *src = (uint32_t*) (in->buf.u8 + src_pitch * src_y_start); + + dst_pitch /= sizeof(uint32_t); + src_pitch /= sizeof(uint32_t); + for (unsigned int src_y = src_y_start; src_y != src_y_end; src_y++) { + uint8_t v_repeat = v_table[src_y]; + unsigned int src_x; + unsigned int dst_x; + + if (!v_repeat) { + src += src_pitch; + continue; + } + for (src_x = 0, dst_x = 0; (src_x != src_w); ++src_x) { + uint8_t h_repeat = h_table[src_x]; + + while (h_repeat--) + dst[dst_x++] = src[src_x]; + } + + dst += dst_pitch; + while (--v_repeat) { + memcpy(dst, (dst - dst_pitch), (dst_w * sizeof(*dst))); + dst += dst_pitch; + } + src += src_pitch; + } + + SDL_SemPost(threads_stretch_data.thread[thread_num].sem_out); + } +} + +static void threads_stretch_data_init() { + threads_stretch_data.in = NULL; + threads_stretch_data.out = NULL; + for (int i = 0; i < THREADS_STRETCH_CNT; i++) { + threads_stretch_data.thread[i].sem_in = SDL_CreateSemaphore(0); + threads_stretch_data.thread[i].sem_out = SDL_CreateSemaphore(0); + SDL_CreateThread(filter_stretch_thread, (void*) i); + } + threads_stretch_data.exit = 0; +} + +static void filter_stretch_4_master(const struct filter_data *in, + struct filter_data *out) +{ + struct filter_stretch_data *data = + (struct filter_stretch_data *)out->data; + uint8_t *h_table = data->h_table; + uint8_t *v_table = data->v_table; + uint32_t *dst = (uint32_t*) out->buf.u8; + unsigned int dst_pitch = out->pitch; + unsigned int dst_w = out->width; + uint32_t *src = (uint32_t*) in->buf.u8; + unsigned int src_pitch = in->pitch; + unsigned int src_w = in->width; + unsigned int src_h = in->height; + unsigned int src_y; + + threads_stretch_data.in = const_cast(in); + threads_stretch_data.out = out; + + threads_stretch_data.thread[0].src_y_start = 0; + threads_stretch_data.thread[0].src_y_end = src_h / 3; + threads_stretch_data.thread[0].dst_off = 0; + threads_stretch_data.thread[1].src_y_start = src_h / 3; + threads_stretch_data.thread[1].src_y_end = (src_h * 2) / 3; + threads_stretch_data.thread[2].src_y_start = (src_h * 2) / 3; + threads_stretch_data.thread[2].src_y_end = src_h; + + uint32_t dst_off = 0; + for (src_y = 0; src_y != src_h / 3; src_y++) { + uint8_t v_repeat = v_table[src_y]; + dst_off += v_repeat * dst_pitch; + } + threads_stretch_data.thread[1].dst_off = dst_off; + + for (; src_y != (src_h * 2) / 3; src_y++) { + uint8_t v_repeat = v_table[src_y]; + dst_off += v_repeat * dst_pitch; + } + threads_stretch_data.thread[2].dst_off = dst_off; + for (int i = 0; i < THREADS_STRETCH_CNT; i++) { + SDL_SemPost(threads_stretch_data.thread[i].sem_in); + } + + for (int i = 0; i < THREADS_STRETCH_CNT; i++) { + SDL_SemWait(threads_stretch_data.thread[i].sem_out); + } +} + /** * This filter stretches the input buffer to fill the entire output. * @param in Input buffer data. @@ -2231,7 +2360,7 @@ { 1, filter_stretch_X }, { 2, filter_stretch_X }, { 3, filter_stretch_3 }, - { 4, filter_stretch_X }, + { 4, filter_stretch_4_master }, }; struct filter_stretch_data *data; unsigned int dst_w; @@ -3872,6 +4002,7 @@ break; } } + threads_stretch_data_init(); return 1; fail: fprintf(stderr, "sdl: can't initialize graphics.\n"); @@ -6906,6 +7037,11 @@ #ifdef WITH_THREADS screen_update_thread_stop(); + threads_stretch_data.exit = 1; + for (int i = 0; i < THREADS_STRETCH_CNT; i++) { + SDL_SemPost(threads_stretch_data.thread[i].sem_in); + SDL_SemPost(threads_stretch_data.thread[i].sem_out); + } #endif if (mdscr.data) { free((void*)mdscr.data);