Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
emper
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lehrstuhl für Informatik 4 (Systemsoftware)
manycore
emper
Commits
30fe27bf
Commit
30fe27bf
authored
4 years ago
by
Florian Schmaus
Browse files
Options
Downloads
Patches
Plain Diff
[IO] Use io_uring_peek_batch_cqe()
parent
196e2ec0
No related branches found
No related tags found
1 merge request
!120
Reduce the critical section of io_uring CQ
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
emper/io/IoContext.cpp
+44
-33
44 additions, 33 deletions
emper/io/IoContext.cpp
with
44 additions
and
33 deletions
emper/io/IoContext.cpp
+
44
−
33
View file @
30fe27bf
...
...
@@ -7,6 +7,7 @@
#include
<sys/eventfd.h>
// for eventfd
#include
<unistd.h>
// for close
#include
<array>
#include
<atomic>
// for atomic, __atomic_base
#include
<cassert>
// for assert
#include
<cerrno>
// for errno, ECANCELED, EBUSY, EAGAIN, EINTR
...
...
@@ -162,16 +163,33 @@ template void IoContext::submit<CallerEnvironment::ANYWHERE>(Future &future);
template
<
CallerEnvironment
callerEnvironment
>
auto
IoContext
::
reapCompletions
()
->
std
::
vector
<
Fiber
*>
{
//
vector returned containing all reaped completions
std
::
vector
<
Fiber
*>
continuationFibers
;
//
Should not be more than the uring_entries count.
const
unsigned
CQE_BATCH_COUNT
=
EMPER_IO_WORKER_URING_ENTRIES
;
uint32_t
maxRaceFreeCompleterAttempts
=
1
;
using
Completion
=
std
::
pair
<
uint32_t
,
TaggedPtr
>
;
// vector to store seen cqes to make the critical section
// where cq_lock is held as small as possible
std
::
vector
<
Completion
>
reapedCompletions
;
reapedCompletions
.
reserve
(
CQE_BATCH_COUNT
);
// Vector returned containing all reaped completions in form of fibers.
std
::
vector
<
Fiber
*>
continuationFibers
;
// We reserve capacity in contiunationFibers as soon as we know the
// amount of reaped completions.
// this label is not used for callerEnvironment::ANYWHERE and thus has to be
// annotated with ATTR_UNUSED
reap_cqes:
ATTR_UNUSED
;
// never reap completions on the global IoContext
assert
(
this
!=
runtime
.
globalIo
);
LOGD
(
"Reaping completions"
);
std
::
array
<
struct
io_uring_cqe
*
,
CQE_BATCH_COUNT
>
cqes
;
// Someone else is currently reaping completions
if
constexpr
(
callerEnvironment
==
CallerEnvironment
::
EMPER
)
{
if
(
unlikely
(
!
cq_lock
.
try_lock
()))
{
...
...
@@ -185,32 +203,12 @@ reap_cqes:
}
}
// never reap completions on the global IoContext
assert
(
this
!=
runtime
.
globalIo
);
LOGD
(
"Reaping completions"
);
unsigned
head
;
struct
io_uring_cqe
*
cqe
;
unsigned
count
=
0
;
using
Completion
=
std
::
pair
<
uint32_t
,
TaggedPtr
>
;
// vector to store seen cqes to make the critical section
// where cq_lock is held as small as possible
std
::
vector
<
Completion
>
reapedCompletions
;
int
err
=
io_uring_peek_cqe
(
&
ring
,
&
cqe
);
if
(
err
)
{
if
(
err
==
-
EAGAIN
)
{
goto
unlock
;
}
errno
=
-
err
;
DIE_MSG_ERRNO
(
"io_uring_peek_cqe failed"
);
}
io_uring_for_each_cqe
(
&
ring
,
head
,
cqe
)
{
count
++
;
unsigned
count
=
io_uring_peek_batch_cqe
(
&
ring
,
cqes
.
data
(),
CQE_BATCH_COUNT
);
TaggedPtr
tptr
(
io_uring_cqe_get_data
(
cqe
));
for
(
unsigned
i
=
0
;
i
<
count
;
++
i
)
{
struct
io_uring_cqe
*
cqe
=
cqes
[
i
];
void
*
cqe_data
=
io_uring_cqe_get_data
(
cqe
);
TaggedPtr
tptr
(
cqe_data
);
// Got a CQE for a forgotten Future
if
(
!
tptr
)
{
...
...
@@ -220,17 +218,17 @@ reap_cqes:
reapedCompletions
.
emplace_back
(
cqe
->
res
,
tptr
);
}
LOGD
(
"got "
<<
count
<<
" cqes from the io_uring"
);
io_uring_cq_advance
(
&
ring
,
count
);
uint32_t
globalCompleterAttempts
=
cq_lock
.
unlock
();
LOGD
(
"got "
<<
count
<<
" cqes from the io_uring"
);
if
constexpr
(
emper
::
DEBUG
)
{
assert
(
count
<=
reqs_in_uring
);
reqs_in_uring
-=
count
;
}
unlock
:
uint32_t
globalCompleterAttempts
=
cq_lock
.
unlock
();
// A naive try lock protecting a worker's IoContext's cq is racy.
// While a worker is holding the lock additional completions could arrive
// which the worker does not observe because it could be already finished iterating.
...
...
@@ -253,6 +251,8 @@ unlock:
stats
.
record_reaps
<
callerEnvironment
>
(
count
);
continuationFibers
.
reserve
(
reapedCompletions
.
size
());
for
(
auto
&
completion
:
reapedCompletions
)
{
auto
res
=
completion
.
first
;
auto
tptr
=
completion
.
second
;
...
...
@@ -290,13 +290,24 @@ unlock:
// check if lost wakeup was possible
if
constexpr
(
callerEnvironment
==
CallerEnvironment
::
EMPER
)
{
bool
reReap
=
false
;
// TODO: How sure are we that this is unlikely?
if
(
unlikely
(
globalCompleterAttempts
>
maxRaceFreeCompleterAttempts
))
{
// In all CQ iteration after the first we expect no further globalCompleter attempts
maxRaceFreeCompleterAttempts
=
0
;
reReap
=
true
;
}
else
if
(
count
==
CQE_BATCH_COUNT
)
{
// We reaped a full batch, this means there could be potentially
// more CQEs in the completion queue.
reReap
=
true
;
}
if
(
reReap
)
{
// schedule all already collected continuation fibers
runtime
.
schedule
(
continuationFibers
.
begin
(),
continuationFibers
.
end
());
reapedCompletions
.
clear
();
continuationFibers
.
clear
();
// In all CQ iteration after the first we expect no further globalCompleter attempts
maxRaceFreeCompleterAttempts
=
0
;
goto
reap_cqes
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment