Skip to content

Instantly share code, notes, and snippets.

@nwmcsween
Created July 6, 2012 06:59
Show Gist options
  • Select an option

  • Save nwmcsween/3058516 to your computer and use it in GitHub Desktop.

Select an option

Save nwmcsween/3058516 to your computer and use it in GitHub Desktop.
From 722c69528e86c9c85469aa00908c206551e981ed Mon Sep 17 00:00:00 2001
From: Nathan McSween <[email protected]>
Date: Fri, 6 Jul 2012 00:01:19 +0000
Subject: [PATCH] aio_readwrite - restructure, using switches removes a reg
spill in hotpath
---
src/aio/aio_readwrite.c | 65 ++++++++++++++++++++++++++---------------------
1 file changed, 36 insertions(+), 29 deletions(-)
diff --git a/src/aio/aio_readwrite.c b/src/aio/aio_readwrite.c
index 584ccb3..254d01b 100644
--- a/src/aio/aio_readwrite.c
+++ b/src/aio/aio_readwrite.c
@@ -11,41 +11,43 @@ weak_alias(dummy, __aio_wake);
static void notify_signal(struct sigevent *sev)
{
siginfo_t si = {
- .si_signo = sev->sigev_signo,
- .si_value = sev->sigev_value,
- .si_code = SI_ASYNCIO,
- .si_pid = __pthread_self()->pid,
- .si_uid = getuid()
+ .si_signo = sev->sigev_signo,
+ .si_value = sev->sigev_value,
+ .si_code = SI_ASYNCIO,
+ .si_pid = __pthread_self()->pid,
+ .si_uid = getuid()
};
+
__syscall(SYS_rt_sigqueueinfo, si.si_pid, si.si_signo, &si);
}
static void *io_thread(void *p)
{
- struct aiocb *cb = p;
- int fd = cb->aio_fildes;
- void *buf = (void *)cb->aio_buf;
- size_t len = cb->aio_nbytes;
- off_t off = cb->aio_offset;
- int op = cb->aio_lio_opcode;
- struct sigevent sev = cb->aio_sigevent;
- ssize_t ret;
-
- if (op == LIO_WRITE) {
- if ( (fcntl(fd, F_GETFL) & O_APPEND)
- ||((ret = pwrite(fd, buf, len, off))<0 && errno==ESPIPE) )
- ret = write(fd, buf, len);
- } else if (op == LIO_READ) {
- if ( (ret = pread(fd, buf, len, off))<0 && errno==ESPIPE )
- ret = read(fd, buf, len);
- } else {
- ret = 0;
- }
- cb->__ret = ret;
+ struct aiocb *cb = p;
+ struct sigevent sev = cb->aio_sigevent;
+ void *buf = (void *)cb->aio_buf;
+ int fd = cb->aio_fildes;
+ int op = cb->aio_lio_opcode;
+ off_t off = cb->aio_offset;
+ ssize_t len = cb->aio_nbytes;
- if (ret < 0) a_store(&cb->__err, errno);
- else a_store(&cb->__err, 0);
+ switch (op) {
+ case LIO_WRITE:
+ if ((fcntl(fd, F_GETFL) & O_APPEND) ||
+ ((pwrite(fd, buf, len, off)) < 0 &&
+ errno == ESPIPE))
+ cb->__ret = write(fd, buf, len);
+ break;
+ case LIO_READ:
+ if (pread(fd, buf, len, off) < 0 && errno == ESPIPE)
+ cb->__ret = read(fd, buf, len);
+ break;
+ default:
+ cb->__ret = 0;
+ }
+
+ cb->__ret < 0 ? a_store(&cb->__err, errno) : a_store(&cb->__err, 0);
__aio_wake();
switch (cb->aio_sigevent.sigev_notify) {
@@ -77,14 +79,17 @@ static int new_req(struct aiocb *cb)
pthread_attr_setstacksize(&a, PAGE_SIZE);
pthread_attr_setguardsize(&a, 0);
}
+
pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
sigfillset(&set);
pthread_sigmask(SIG_BLOCK, &set, &set);
cb->__err = EINPROGRESS;
+
if (pthread_create(&td, &a, io_thread, cb)) {
- errno = EAGAIN;
- ret = -1;
+ errno = EAGAIN;
+ ret = -1;
}
+
pthread_sigmask(SIG_SETMASK, &set, 0);
cb->__td = td;
@@ -94,11 +99,13 @@ static int new_req(struct aiocb *cb)
int aio_read(struct aiocb *cb)
{
cb->aio_lio_opcode = LIO_READ;
+
return new_req(cb);
}
int aio_write(struct aiocb *cb)
{
cb->aio_lio_opcode = LIO_WRITE;
+
return new_req(cb);
}
--
1.7.10.4
--- aio_readwrite.s 2012-07-05 23:55:35.585294065 +0000
+++ aio_readwrite_opt.s 2012-07-05 23:55:24.698627910 +0000
@@ -1,4 +1,4 @@
- .file "../m/src/aio/aio_readwrite.c"
+ .file "./src/aio/aio_readwrite.c"
.text
.align 16, 0x90
.type dummy,@function
@@ -165,9 +165,9 @@
pushq %rbx
.Ltmp30:
.cfi_def_cfa_offset 56
- subq $200, %rsp
+ subq $184, %rsp
.Ltmp31:
- .cfi_def_cfa_offset 256
+ .cfi_def_cfa_offset 240
.Ltmp32:
.cfi_offset %rbx, -56
.Ltmp33:
@@ -182,25 +182,24 @@
.cfi_offset %rbp, -16
movq %rdi, %rbx
movq 112(%rbx), %rax
- movq %rax, 8(%rsp) # 8-byte Spill
+ movq %rax, (%rsp) # 8-byte Spill
movq 96(%rbx), %rax
- movq %rax, 24(%rsp) # 8-byte Spill
- movq 72(%rbx), %r14
+ movq %rax, 8(%rsp) # 8-byte Spill
+ movl 104(%rbx), %eax
+ movq 152(%rbx), %rcx
+ movq %rcx, 40(%rsp)
+ movq 144(%rbx), %rcx
+ movq %rcx, 32(%rsp)
+ movq 128(%rbx), %rcx
+ movq 136(%rbx), %rdx
+ movq %rdx, 24(%rsp)
+ movq %rcx, 16(%rsp)
+ movslq %eax, %r15
movq 80(%rbx), %r12
- movq 24(%rbx), %r13
+ movq 72(%rbx), %r14
movl 48(%rbx), %ebp
+ movq 24(%rbx), %r13
movl 52(%rbx), %eax
- movl 104(%rbx), %r8d
- movq 128(%rbx), %rdx
- movq 136(%rbx), %rsi
- movq 144(%rbx), %rdi
- movq 152(%rbx), %rcx
- movq %rcx, 56(%rsp)
- movq %rdi, 48(%rsp)
- movq %rsi, 40(%rsp)
- movq %rdx, 32(%rsp)
- movslq %r8d, %rcx
- movq %rcx, 16(%rsp) # 8-byte Spill
testl %eax, %eax
jne .LBB4_1
# BB#6:
@@ -209,23 +208,22 @@
movq %r12, %rdx
movq %r14, %rcx
callq pread
- movq %rax, %r15
- testq %r15, %r15
- jns .LBB4_11
+ testq %rax, %rax
+ jns .LBB4_10
# BB#7:
callq __errno_location
cmpl $29, (%rax)
- jne .LBB4_18
+ jne .LBB4_10
# BB#8:
movl %ebp, %edi
movq %r13, %rsi
movq %r12, %rdx
callq read
- jmp .LBB4_9
+ movq %rax, 88(%rbx)
+ jmp .LBB4_10
.LBB4_1:
- xorl %r15d, %r15d
cmpl $1, %eax
- jne .LBB4_11
+ jne .LBB4_9
# BB#2:
movl %ebp, %edi
movl $3, %esi
@@ -239,41 +237,35 @@
movq %r12, %rdx
movq %r14, %rcx
callq pwrite
- movq %rax, %r15
- testq %r15, %r15
- js .LBB4_4
-.LBB4_11: # %.thread
- movq %r15, 88(%rbx)
- leaq 68(%rbx), %r14
- jmp .LBB4_12
-.LBB4_4:
+ testq %rax, %rax
+ jns .LBB4_10
+# BB#4:
callq __errno_location
cmpl $29, (%rax)
- jne .LBB4_18
+ jne .LBB4_10
.LBB4_5:
movl %ebp, %edi
movq %r13, %rsi
movq %r12, %rdx
callq write
-.LBB4_9:
movq %rax, 88(%rbx)
- leaq 68(%rbx), %r14
- testq %rax, %rax
- js .LBB4_10
-.LBB4_12:
+ jmp .LBB4_10
+.LBB4_9:
+ movq $0, 88(%rbx)
+.LBB4_10: # %._crit_edge8
+ cmpq $0, 88(%rbx)
+ js .LBB4_11
+# BB#12:
xorl %eax, %eax
#APP
- movl %eax, (%r14)
+ movl %eax, 68(%rbx)
#NO_APP
jmp .LBB4_13
-.LBB4_18: # %.thread9
- movq %r15, 88(%rbx)
- leaq 68(%rbx), %r14
-.LBB4_10:
+.LBB4_11:
callq __errno_location
movl (%rax), %eax
#APP
- movl %eax, (%r14)
+ movl %eax, 68(%rbx)
#NO_APP
.LBB4_13:
callq __aio_wake
@@ -281,15 +273,14 @@
cmpl $2, %eax
jne .LBB4_14
# BB#16:
- movq 24(%rsp), %rdi # 8-byte Reload
- callq *8(%rsp) # 8-byte Folded Reload
+ movq 8(%rsp), %rdi # 8-byte Reload
+ callq *(%rsp) # 8-byte Folded Reload
jmp .LBB4_17
.LBB4_14:
testl %eax, %eax
jne .LBB4_17
# BB#15:
xorps %xmm0, %xmm0
- movaps %xmm0, 176(%rsp)
movaps %xmm0, 160(%rsp)
movaps %xmm0, 144(%rsp)
movaps %xmm0, 128(%rsp)
@@ -297,20 +288,20 @@
movaps %xmm0, 96(%rsp)
movaps %xmm0, 80(%rsp)
movaps %xmm0, 64(%rsp)
- movq $0, 192(%rsp)
- movq 16(%rsp), %r15 # 8-byte Reload
- movl %r15d, 64(%rsp)
- movl $-4, 72(%rsp)
+ movaps %xmm0, 48(%rsp)
+ movq $0, 176(%rsp)
+ movl %r15d, 48(%rsp)
+ movl $-4, 56(%rsp)
#APP
movq %fs:0,%rax
#NO_APP
movslq 52(%rax), %rbx
- leaq 64(%rsp), %r14
- movl %ebx, 80(%rsp)
+ leaq 48(%rsp), %r14
+ movl %ebx, 64(%rsp)
callq getuid
- movl %eax, 84(%rsp)
- movq 24(%rsp), %rax # 8-byte Reload
- movq %rax, 88(%rsp)
+ movl %eax, 68(%rsp)
+ movq 8(%rsp), %rax # 8-byte Reload
+ movq %rax, 72(%rsp)
movl $129, %eax
movq %rbx, %rdi
movq %r15, %rsi
@@ -320,7 +311,7 @@
#NO_APP
.LBB4_17:
xorl %eax, %eax
- addq $200, %rsp
+ addq $184, %rsp
popq %rbx
popq %r12
popq %r13
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment