Created
July 6, 2012 06:59
-
-
Save nwmcsween/3058516 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| From 722c69528e86c9c85469aa00908c206551e981ed Mon Sep 17 00:00:00 2001 | |
| From: Nathan McSween <[email protected]> | |
| Date: Fri, 6 Jul 2012 00:01:19 +0000 | |
| Subject: [PATCH] aio_readwrite - restructure, using switches removes a reg | |
| spill in hotpath | |
| --- | |
| src/aio/aio_readwrite.c | 65 ++++++++++++++++++++++++++--------------------- | |
| 1 file changed, 36 insertions(+), 29 deletions(-) | |
| diff --git a/src/aio/aio_readwrite.c b/src/aio/aio_readwrite.c | |
| index 584ccb3..254d01b 100644 | |
| --- a/src/aio/aio_readwrite.c | |
| +++ b/src/aio/aio_readwrite.c | |
| @@ -11,41 +11,43 @@ weak_alias(dummy, __aio_wake); | |
| static void notify_signal(struct sigevent *sev) | |
| { | |
| siginfo_t si = { | |
| - .si_signo = sev->sigev_signo, | |
| - .si_value = sev->sigev_value, | |
| - .si_code = SI_ASYNCIO, | |
| - .si_pid = __pthread_self()->pid, | |
| - .si_uid = getuid() | |
| + .si_signo = sev->sigev_signo, | |
| + .si_value = sev->sigev_value, | |
| + .si_code = SI_ASYNCIO, | |
| + .si_pid = __pthread_self()->pid, | |
| + .si_uid = getuid() | |
| }; | |
| + | |
| __syscall(SYS_rt_sigqueueinfo, si.si_pid, si.si_signo, &si); | |
| } | |
| static void *io_thread(void *p) | |
| { | |
| - struct aiocb *cb = p; | |
| - int fd = cb->aio_fildes; | |
| - void *buf = (void *)cb->aio_buf; | |
| - size_t len = cb->aio_nbytes; | |
| - off_t off = cb->aio_offset; | |
| - int op = cb->aio_lio_opcode; | |
| - struct sigevent sev = cb->aio_sigevent; | |
| - ssize_t ret; | |
| - | |
| - if (op == LIO_WRITE) { | |
| - if ( (fcntl(fd, F_GETFL) & O_APPEND) | |
| - ||((ret = pwrite(fd, buf, len, off))<0 && errno==ESPIPE) ) | |
| - ret = write(fd, buf, len); | |
| - } else if (op == LIO_READ) { | |
| - if ( (ret = pread(fd, buf, len, off))<0 && errno==ESPIPE ) | |
| - ret = read(fd, buf, len); | |
| - } else { | |
| - ret = 0; | |
| - } | |
| - cb->__ret = ret; | |
| + struct aiocb *cb = p; | |
| + struct sigevent sev = cb->aio_sigevent; | |
| + void *buf = (void *)cb->aio_buf; | |
| + int fd = cb->aio_fildes; | |
| + int op = cb->aio_lio_opcode; | |
| + off_t off = cb->aio_offset; | |
| + ssize_t len = cb->aio_nbytes; | |
| - if (ret < 0) a_store(&cb->__err, errno); | |
| - else a_store(&cb->__err, 0); | |
| + switch (op) { | |
| + case LIO_WRITE: | |
| + if ((fcntl(fd, F_GETFL) & O_APPEND) || | |
| + ((pwrite(fd, buf, len, off)) < 0 && | |
| + errno == ESPIPE)) | |
| + cb->__ret = write(fd, buf, len); | |
| + break; | |
| + case LIO_READ: | |
| + if (pread(fd, buf, len, off) < 0 && errno == ESPIPE) | |
| + cb->__ret = read(fd, buf, len); | |
| + break; | |
| + default: | |
| + cb->__ret = 0; | |
| + } | |
| + | |
| + cb->__ret < 0 ? a_store(&cb->__err, errno) : a_store(&cb->__err, 0); | |
| __aio_wake(); | |
| switch (cb->aio_sigevent.sigev_notify) { | |
| @@ -77,14 +79,17 @@ static int new_req(struct aiocb *cb) | |
| pthread_attr_setstacksize(&a, PAGE_SIZE); | |
| pthread_attr_setguardsize(&a, 0); | |
| } | |
| + | |
| pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); | |
| sigfillset(&set); | |
| pthread_sigmask(SIG_BLOCK, &set, &set); | |
| cb->__err = EINPROGRESS; | |
| + | |
| if (pthread_create(&td, &a, io_thread, cb)) { | |
| - errno = EAGAIN; | |
| - ret = -1; | |
| + errno = EAGAIN; | |
| + ret = -1; | |
| } | |
| + | |
| pthread_sigmask(SIG_SETMASK, &set, 0); | |
| cb->__td = td; | |
| @@ -94,11 +99,13 @@ static int new_req(struct aiocb *cb) | |
| int aio_read(struct aiocb *cb) | |
| { | |
| cb->aio_lio_opcode = LIO_READ; | |
| + | |
| return new_req(cb); | |
| } | |
| int aio_write(struct aiocb *cb) | |
| { | |
| cb->aio_lio_opcode = LIO_WRITE; | |
| + | |
| return new_req(cb); | |
| } | |
| -- | |
| 1.7.10.4 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- aio_readwrite.s 2012-07-05 23:55:35.585294065 +0000 | |
| +++ aio_readwrite_opt.s 2012-07-05 23:55:24.698627910 +0000 | |
| @@ -1,4 +1,4 @@ | |
| - .file "../m/src/aio/aio_readwrite.c" | |
| + .file "./src/aio/aio_readwrite.c" | |
| .text | |
| .align 16, 0x90 | |
| .type dummy,@function | |
| @@ -165,9 +165,9 @@ | |
| pushq %rbx | |
| .Ltmp30: | |
| .cfi_def_cfa_offset 56 | |
| - subq $200, %rsp | |
| + subq $184, %rsp | |
| .Ltmp31: | |
| - .cfi_def_cfa_offset 256 | |
| + .cfi_def_cfa_offset 240 | |
| .Ltmp32: | |
| .cfi_offset %rbx, -56 | |
| .Ltmp33: | |
| @@ -182,25 +182,24 @@ | |
| .cfi_offset %rbp, -16 | |
| movq %rdi, %rbx | |
| movq 112(%rbx), %rax | |
| - movq %rax, 8(%rsp) # 8-byte Spill | |
| + movq %rax, (%rsp) # 8-byte Spill | |
| movq 96(%rbx), %rax | |
| - movq %rax, 24(%rsp) # 8-byte Spill | |
| - movq 72(%rbx), %r14 | |
| + movq %rax, 8(%rsp) # 8-byte Spill | |
| + movl 104(%rbx), %eax | |
| + movq 152(%rbx), %rcx | |
| + movq %rcx, 40(%rsp) | |
| + movq 144(%rbx), %rcx | |
| + movq %rcx, 32(%rsp) | |
| + movq 128(%rbx), %rcx | |
| + movq 136(%rbx), %rdx | |
| + movq %rdx, 24(%rsp) | |
| + movq %rcx, 16(%rsp) | |
| + movslq %eax, %r15 | |
| movq 80(%rbx), %r12 | |
| - movq 24(%rbx), %r13 | |
| + movq 72(%rbx), %r14 | |
| movl 48(%rbx), %ebp | |
| + movq 24(%rbx), %r13 | |
| movl 52(%rbx), %eax | |
| - movl 104(%rbx), %r8d | |
| - movq 128(%rbx), %rdx | |
| - movq 136(%rbx), %rsi | |
| - movq 144(%rbx), %rdi | |
| - movq 152(%rbx), %rcx | |
| - movq %rcx, 56(%rsp) | |
| - movq %rdi, 48(%rsp) | |
| - movq %rsi, 40(%rsp) | |
| - movq %rdx, 32(%rsp) | |
| - movslq %r8d, %rcx | |
| - movq %rcx, 16(%rsp) # 8-byte Spill | |
| testl %eax, %eax | |
| jne .LBB4_1 | |
| # BB#6: | |
| @@ -209,23 +208,22 @@ | |
| movq %r12, %rdx | |
| movq %r14, %rcx | |
| callq pread | |
| - movq %rax, %r15 | |
| - testq %r15, %r15 | |
| - jns .LBB4_11 | |
| + testq %rax, %rax | |
| + jns .LBB4_10 | |
| # BB#7: | |
| callq __errno_location | |
| cmpl $29, (%rax) | |
| - jne .LBB4_18 | |
| + jne .LBB4_10 | |
| # BB#8: | |
| movl %ebp, %edi | |
| movq %r13, %rsi | |
| movq %r12, %rdx | |
| callq read | |
| - jmp .LBB4_9 | |
| + movq %rax, 88(%rbx) | |
| + jmp .LBB4_10 | |
| .LBB4_1: | |
| - xorl %r15d, %r15d | |
| cmpl $1, %eax | |
| - jne .LBB4_11 | |
| + jne .LBB4_9 | |
| # BB#2: | |
| movl %ebp, %edi | |
| movl $3, %esi | |
| @@ -239,41 +237,35 @@ | |
| movq %r12, %rdx | |
| movq %r14, %rcx | |
| callq pwrite | |
| - movq %rax, %r15 | |
| - testq %r15, %r15 | |
| - js .LBB4_4 | |
| -.LBB4_11: # %.thread | |
| - movq %r15, 88(%rbx) | |
| - leaq 68(%rbx), %r14 | |
| - jmp .LBB4_12 | |
| -.LBB4_4: | |
| + testq %rax, %rax | |
| + jns .LBB4_10 | |
| +# BB#4: | |
| callq __errno_location | |
| cmpl $29, (%rax) | |
| - jne .LBB4_18 | |
| + jne .LBB4_10 | |
| .LBB4_5: | |
| movl %ebp, %edi | |
| movq %r13, %rsi | |
| movq %r12, %rdx | |
| callq write | |
| -.LBB4_9: | |
| movq %rax, 88(%rbx) | |
| - leaq 68(%rbx), %r14 | |
| - testq %rax, %rax | |
| - js .LBB4_10 | |
| -.LBB4_12: | |
| + jmp .LBB4_10 | |
| +.LBB4_9: | |
| + movq $0, 88(%rbx) | |
| +.LBB4_10: # %._crit_edge8 | |
| + cmpq $0, 88(%rbx) | |
| + js .LBB4_11 | |
| +# BB#12: | |
| xorl %eax, %eax | |
| #APP | |
| - movl %eax, (%r14) | |
| + movl %eax, 68(%rbx) | |
| #NO_APP | |
| jmp .LBB4_13 | |
| -.LBB4_18: # %.thread9 | |
| - movq %r15, 88(%rbx) | |
| - leaq 68(%rbx), %r14 | |
| -.LBB4_10: | |
| +.LBB4_11: | |
| callq __errno_location | |
| movl (%rax), %eax | |
| #APP | |
| - movl %eax, (%r14) | |
| + movl %eax, 68(%rbx) | |
| #NO_APP | |
| .LBB4_13: | |
| callq __aio_wake | |
| @@ -281,15 +273,14 @@ | |
| cmpl $2, %eax | |
| jne .LBB4_14 | |
| # BB#16: | |
| - movq 24(%rsp), %rdi # 8-byte Reload | |
| - callq *8(%rsp) # 8-byte Folded Reload | |
| + movq 8(%rsp), %rdi # 8-byte Reload | |
| + callq *(%rsp) # 8-byte Folded Reload | |
| jmp .LBB4_17 | |
| .LBB4_14: | |
| testl %eax, %eax | |
| jne .LBB4_17 | |
| # BB#15: | |
| xorps %xmm0, %xmm0 | |
| - movaps %xmm0, 176(%rsp) | |
| movaps %xmm0, 160(%rsp) | |
| movaps %xmm0, 144(%rsp) | |
| movaps %xmm0, 128(%rsp) | |
| @@ -297,20 +288,20 @@ | |
| movaps %xmm0, 96(%rsp) | |
| movaps %xmm0, 80(%rsp) | |
| movaps %xmm0, 64(%rsp) | |
| - movq $0, 192(%rsp) | |
| - movq 16(%rsp), %r15 # 8-byte Reload | |
| - movl %r15d, 64(%rsp) | |
| - movl $-4, 72(%rsp) | |
| + movaps %xmm0, 48(%rsp) | |
| + movq $0, 176(%rsp) | |
| + movl %r15d, 48(%rsp) | |
| + movl $-4, 56(%rsp) | |
| #APP | |
| movq %fs:0,%rax | |
| #NO_APP | |
| movslq 52(%rax), %rbx | |
| - leaq 64(%rsp), %r14 | |
| - movl %ebx, 80(%rsp) | |
| + leaq 48(%rsp), %r14 | |
| + movl %ebx, 64(%rsp) | |
| callq getuid | |
| - movl %eax, 84(%rsp) | |
| - movq 24(%rsp), %rax # 8-byte Reload | |
| - movq %rax, 88(%rsp) | |
| + movl %eax, 68(%rsp) | |
| + movq 8(%rsp), %rax # 8-byte Reload | |
| + movq %rax, 72(%rsp) | |
| movl $129, %eax | |
| movq %rbx, %rdi | |
| movq %r15, %rsi | |
| @@ -320,7 +311,7 @@ | |
| #NO_APP | |
| .LBB4_17: | |
| xorl %eax, %eax | |
| - addq $200, %rsp | |
| + addq $184, %rsp | |
| popq %rbx | |
| popq %r12 | |
| popq %r13 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment