diff --git a/llkd/Android.bp b/llkd/Android.bp index a6edd26ceb5664943e5a4578df018ea34e30002b..62a637dcdb9bbb23996727826d63c65f3e1f8299 100644 --- a/llkd/Android.bp +++ b/llkd/Android.bp @@ -20,6 +20,12 @@ cc_library_static { export_include_dirs: ["include"], cflags: ["-Werror"], + + product_variables: { + debuggable: { + cppflags: ["-D__PTRACE_ENABLED__"], + }, + }, } cc_binary { @@ -39,4 +45,9 @@ cc_binary { cflags: ["-Werror"], init_rc: ["llkd.rc"], + product_variables: { + debuggable: { + init_rc: ["llkd-debuggable.rc"], + }, + }, } diff --git a/llkd/README.md b/llkd/README.md index 2314583656b6281eeee71c8af776ea9514ca84d1..1f69718a1edebb61c19e8bbbc3a6219cfbb27487 100644 --- a/llkd/README.md +++ b/llkd/README.md @@ -23,6 +23,9 @@ the period of time before the next expected call to this handler. Operations ---------- +There are two detection scenarios. Persistent D or Z state, and persistent +stack signature. + If a thread is in D or Z state with no forward progress for longer than ro.llk.timeout_ms, or ro.llk.[D|Z].timeout_ms, kill the process or parent process respectively. If another scan shows the same process continues to @@ -32,6 +35,26 @@ condition. Add a alarm self watchdog should llkd ever get locked up that is double the expected time to flow through the mainloop. Sampling is every ro.llk_sample_ms. +For usedebug releases only, persistent stack signature checking is enabled. +If a thread in any state but Z, has a persistent listed ro.llk.stack kernel +symbol always being reported, even if there is forward scheduling progress, for +longer than ro.llk.timeout_ms, or ro.llk.stack.timeout_ms, then issue a kill +to the process. If another scan shows the same process continues to exist, +then have a confirmed live-lock condition and need to panic. There is no +ABA detection since forward scheduling progress is allowed, thus the condition +for the symbols are: + +- Check is looking for " " + __symbol__+ "0x" in /proc/<pid>/stack. +- The __symbol__ should be rare and short lived enough that on a typical + system the function is seen at most only once in a sample over the timeout + period of ro.llk.stack.timeout_ms, samples occur every ro.llk.check_ms. This + can be the only way to prevent a false trigger as there is no ABA protection. +- Persistent continuously when the live lock condition exists. +- Should be just below the function that is calling the lock that could + contend, because if the lock is below or in the symbol function, the + symbol will show in all affected processes, not just the one that + caused the lockup. + Default will not monitor init, or [kthreadd] and all that [kthreadd] spawns. This reduces the effectiveness of llkd by limiting its coverage. If there is value in covering [kthreadd] spawned threads, the requirement will be that @@ -40,7 +63,9 @@ to recover the thread should it be killed externally (this is good driver coding hygiene, a common request to add such to publicly reviewed kernel.org maintained drivers). For instance use wait_event_interruptible() instead of wait_event(). The blacklists can be adjusted accordingly if these -conditions are met to cover kernel components. +conditions are met to cover kernel components. For the stack symbol checking, +there is an additional process blacklist so that we do not incide sepolicy +violations on services that block ptrace operations. An accompanying gTest set have been added, and will setup a persistent D or Z process, with and without forward progress, but not in a live-lock state @@ -93,14 +118,31 @@ default ro.llk.timeout_ms, D maximum timelimit. #### ro.llk.Z.timeout_ms default ro.llk.timeout_ms, Z maximum timelimit. +#### ro.llk.stack.timeout_ms +default ro.llk.timeout_ms, +checking for persistent stack symbols maximum timelimit. +Only active on userdebug and eng builds. + #### ro.llk.check_ms default 2 minutes samples of threads for D or Z. +#### ro.llk.stack +default *empty* or false, comma separated list of kernel symbols. +The string "*false*" is the equivalent to an *empty* list. +Look for kernel stack symbols that if ever persistently present can +indicate a subsystem is locked up. +Beware, check does not on purpose do forward scheduling ABA except by polling +every ro.llk_check_ms over the period ro.llk.stack.timeout_ms, so stack symbol +should be exceptionally rare and fleeting. +One must be convinced that it is virtually *impossible* for symbol to show up +persistently in all samples of the stack. +Only active on userdebug and eng builds. + #### ro.llk.blacklist.process default 0,1,2 (kernel, init and [kthreadd]) plus process names init,[kthreadd],[khungtaskd],lmkd,lmkd.llkd,llkd,watchdogd, [watchdogd],[watchdogd/0],...,[watchdogd/***get_nprocs**-1*]. -The string false is the equivalent to an empty list. +The string "*false*" is the equivalent to an *empty* list. Do not watch these processes. A process can be comm, cmdline or pid reference. NB: automated default here can be larger than the current maximum property size of 92. @@ -108,15 +150,23 @@ NB: false is a very very very unlikely process to want to blacklist. #### ro.llk.blacklist.parent default 0,2 (kernel and [kthreadd]). -The string false is the equivalent to an empty list. +The string "*false*" is the equivalent to an *empty* list. Do not watch processes that have this parent. A parent process can be comm, cmdline or pid reference. #### ro.llk.blacklist.uid default *empty* or false, comma separated list of uid numbers or names. -The string false is the equivalent to an empty list. +The string "*false*" is the equivalent to an *empty* list. Do not watch processes that match this uid. +#### ro.llk.blacklist.process.stack +default process names init,lmkd,lmkd.llkd,llkd,keystore,logd. +The string "*false*" is the equivalent to an *empty* list. +This subset of processes are not monitored for live lock stack signatures. +Also prevents the sepolicy violation associated with processes that block +ptrace, as these can not be checked anyways. +Only active on userdebug and eng builds. + Architectural Concerns ---------------------- diff --git a/llkd/include/llkd.h b/llkd/include/llkd.h index e3ae4bbd8b76166afc7027c5546a03fa5d113a81..d0188ec79f83e55e9b0e16e9693b095b6b22607b 100644 --- a/llkd/include/llkd.h +++ b/llkd/include/llkd.h @@ -43,9 +43,12 @@ unsigned llkCheckMilliseconds(void); #define KHT_TIMEOUT_PROPERTY "ro.khungtask.timeout" #define LLK_D_TIMEOUT_MS_PROPERTY "ro.llk.D.timeout_ms" #define LLK_Z_TIMEOUT_MS_PROPERTY "ro.llk.Z.timeout_ms" +#define LLK_STACK_TIMEOUT_MS_PROPERTY "ro.llk.stack.timeout_ms" #define LLK_CHECK_MS_PROPERTY "ro.llk.check_ms" /* LLK_CHECK_MS_DEFAULT = actual timeout_ms / LLK_CHECKS_PER_TIMEOUT_DEFAULT */ #define LLK_CHECKS_PER_TIMEOUT_DEFAULT 5 +#define LLK_CHECK_STACK_PROPERTY "ro.llk.stack" +#define LLK_CHECK_STACK_DEFAULT "" #define LLK_BLACKLIST_PROCESS_PROPERTY "ro.llk.blacklist.process" #define LLK_BLACKLIST_PROCESS_DEFAULT \ "0,1,2,init,[kthreadd],[khungtaskd],lmkd,lmkd.llkd,llkd,watchdogd,[watchdogd],[watchdogd/0]" @@ -53,6 +56,8 @@ unsigned llkCheckMilliseconds(void); #define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd]" #define LLK_BLACKLIST_UID_PROPERTY "ro.llk.blacklist.uid" #define LLK_BLACKLIST_UID_DEFAULT "" +#define LLK_BLACKLIST_STACK_PROPERTY "ro.llk.blacklist.process.stack" +#define LLK_BLACKLIST_STACK_DEFAULT "init,lmkd.llkd,llkd,keystore,/system/bin/keystore" /* clang-format on */ __END_DECLS diff --git a/llkd/libllkd.cpp b/llkd/libllkd.cpp index bb55d1fee5575258edb6ab29d2f084ca5e0fed26..58c2ba8d23166df57e9acbb71cf7be55faf02e5c 100644 --- a/llkd/libllkd.cpp +++ b/llkd/libllkd.cpp @@ -73,7 +73,14 @@ bool llkRunning = false; // thread is running bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout -enum { llkStateD, llkStateZ, llkNumStates }; // state indexes +enum { // enum of state indexes + llkStateD, // Persistent 'D' state + llkStateZ, // Persistent 'Z' state +#ifdef __PTRACE_ENABLED__ // Extra privileged states + llkStateStack, // stack signature +#endif // End of extra privilege + llkNumStates, // Maxumum number of states +}; // state indexes milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state milliseconds llkCheckMs; // checking interval to inspect any // persistent live-locked states @@ -83,6 +90,10 @@ bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic // Provides a wide angle of margin b/c khtTimeout is also its granularity. seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) / LLK_CHECKS_PER_TIMEOUT_DEFAULT); +#ifdef __PTRACE_ENABLED__ +// list of stack symbols to search for persistence. +std::unordered_set<std::string> llkCheckStackSymbols; +#endif // Blacklist variables, initialized with comma separated lists of high false // positive and/or dangerous references, e.g. without self restart, for pid, @@ -97,6 +108,11 @@ std::unordered_set<std::string> llkBlacklistProcess; std::unordered_set<std::string> llkBlacklistParent; // list of uids, and uid names, to skip, default nothing std::unordered_set<std::string> llkBlacklistUid; +#ifdef __PTRACE_ENABLED__ +// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or +// "logd" (if not userdebug). +std::unordered_set<std::string> llkBlacklistStack; +#endif class dir { public: @@ -263,6 +279,9 @@ struct proc { // forward scheduling progress. milliseconds update; // llkUpdate millisecond signature of last. milliseconds count; // duration in state. +#ifdef __PTRACE_ENABLED__ // Privileged state checking + milliseconds count_stack; // duration where stack is stagnant. +#endif // End privilege pid_t pid; // /proc/<pid> before iterating through // /proc/<pid>/task/<tid> for threads. pid_t ppid; // /proc/<tid>/stat field 4 parent pid. @@ -272,6 +291,9 @@ struct proc { std::string cmdline; // cached /cmdline content char state; // /proc/<tid>/stat field 3: Z or D // (others we do not monitor: S, R, T or ?) +#ifdef __PTRACE_ENABLED__ // Privileged state checking + char stack; // index in llkCheckStackSymbols for matches +#endif // and with maximum index PROP_VALUE_MAX/2. char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']' bool exeMissingValid; // exeMissing has been cached bool cmdlineValid; // cmdline has been cached @@ -286,11 +308,17 @@ struct proc { nrSwitches(0), update(llkUpdate), count(0ms), +#ifdef __PTRACE_ENABLED__ + count_stack(0ms), +#endif pid(pid), ppid(ppid), uid(-1), time(time), state(state), +#ifdef __PTRACE_ENABLED__ + stack(-1), +#endif exeMissingValid(false), cmdlineValid(false), updated(true), @@ -343,6 +371,10 @@ struct proc { void reset(void) { // reset cache, if we detected pid rollover uid = -1; state = '?'; +#ifdef __PTRACE_ENABLED__ + count_stack = 0ms; + stack = -1; +#endif cmdline = ""; comm[0] = '\0'; exeMissingValid = false; @@ -667,6 +699,48 @@ long long getSchedValue(const std::string& schedString, const char* key) { return ret; } +#ifdef __PTRACE_ENABLED__ +bool llkCheckStack(proc* procp, const std::string& piddir) { + if (llkCheckStackSymbols.empty()) return false; + if (procp->state == 'Z') { // No brains for Zombies + procp->stack = -1; + procp->count_stack = 0ms; + return false; + } + + // Don't check process that are known to block ptrace, save sepolicy noise. + if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false; + if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false; + if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false; + + auto kernel_stack = ReadFile(piddir + "/stack"); + if (kernel_stack.empty()) { + LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm() + << " cmdline=" << procp->getCmdline(); + return false; + } + // A scheduling incident that should not reset count_stack + if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false; + char idx = -1; + char match = -1; + for (const auto& stack : llkCheckStackSymbols) { + if (++idx < 0) break; + if (kernel_stack.find(" "s + stack + "+0x") != std::string::npos) { + match = idx; + break; + } + } + if (procp->stack != match) { + procp->stack = match; + procp->count_stack = 0ms; + return false; + } + if (match == char(-1)) return false; + procp->count_stack += llkCycle; + return procp->count_stack >= llkStateTimeoutMs[llkStateStack]; +} +#endif + // Primary ABA mitigation watching last time schedule activity happened void llkCheckSchedUpdate(proc* procp, const std::string& piddir) { // Audit finds /proc/<tid>/sched is just over 1K, and @@ -731,7 +805,15 @@ void llkLogConfig(void) { << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n" << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n" << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n" +#ifdef __PTRACE_ENABLED__ + << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack]) + << "\n" +#endif << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n" +#ifdef __PTRACE_ENABLED__ + << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n" + << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n" +#endif << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n" << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n" << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid); @@ -892,9 +974,14 @@ milliseconds llkCheck(bool checkRunning) { if (pid == myPid) { break; } - if (!llkIsMonitorState(state)) { +#ifdef __PTRACE_ENABLED__ + // if no stack monitoring, we can quickly exit here + if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) { continue; } +#else + if (!llkIsMonitorState(state)) continue; +#endif if ((tid == myTid) || llkSkipPid(tid)) { continue; } @@ -925,12 +1012,26 @@ milliseconds llkCheck(bool checkRunning) { // ABA mitigation watching last time schedule activity happened llkCheckSchedUpdate(procp, piddir); - // Can only fall through to here if registered D or Z state !!! - if (procp->count < llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) { - LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" - << pid << "->" << tid << ' ' << procp->getComm(); +#ifdef __PTRACE_ENABLED__ + auto stuck = llkCheckStack(procp, piddir); + if (llkIsMonitorState(state)) { + if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) { + stuck = true; + } else if (procp->count != 0ms) { + LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" + << pid << "->" << tid << ' ' << procp->getComm(); + } + } + if (!stuck) continue; +#else + if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) { + if (procp->count != 0ms) { + LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" + << pid << "->" << tid << ' ' << procp->getComm(); + } continue; } +#endif // We have to kill it to determine difference between live lock // and persistent state blocked on a resource. Is there something @@ -969,12 +1070,13 @@ milliseconds llkCheck(bool checkRunning) { // not working is we kill a process that likes to // stay in 'D' state, instead of panicing the // kernel (worse). - LOG(WARNING) << "D " << llkFormat(procp->count) << ' ' << pid << "->" << tid - << ' ' << procp->getComm() << " [kill]"; + default: + LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid + << "->" << tid << ' ' << procp->getComm() << " [kill]"; if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) || - (llkKillOneProcess(pid, 'D', tid) >= 0) || + (llkKillOneProcess(pid, state, tid) >= 0) || (llkKillOneProcess(procp, procp) >= 0) || - (llkKillOneProcess(tid, 'D', tid) >= 0)) { + (llkKillOneProcess(tid, state, tid) >= 0)) { continue; } break; @@ -983,7 +1085,8 @@ milliseconds llkCheck(bool checkRunning) { // We are here because we have confirmed kernel live-lock LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid << "->" << tid << ' ' << procp->getComm() << " [panic]"; - llkPanicKernel(true, tid, (state == 'Z') ? "zombie" : "driver"); + llkPanicKernel(true, tid, + (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping"); } LOG(VERBOSE) << "+closedir()"; } @@ -1041,8 +1144,9 @@ unsigned llkCheckMilliseconds() { } bool llkInit(const char* threadname) { + auto debuggable = android::base::GetBoolProperty("ro.debuggable", false); llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false); - if (!LLK_ENABLE_DEFAULT && android::base::GetBoolProperty("ro.debuggable", false)) { + if (!LLK_ENABLE_DEFAULT && debuggable) { llkEnable = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng"; khtEnable = android::base::GetProperty(KHT_ENABLE_PROPERTY, "eng") == "eng"; } @@ -1069,8 +1173,21 @@ bool llkInit(const char* threadname) { llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs); llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs); +#ifdef __PTRACE_ENABLED__ + llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs); +#endif llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs); llkValidate(); // validate all (effectively minus llkTimeoutMs) +#ifdef __PTRACE_ENABLED__ + if (debuggable) { + llkCheckStackSymbols = llkSplit( + android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT)); + } + std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT); + if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd"; + llkBlacklistStack = llkSplit( + android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack)); +#endif std::string defaultBlacklistProcess( std::to_string(kernelPid) + "," + std::to_string(initPid) + "," + std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," + diff --git a/llkd/llkd-debuggable.rc b/llkd/llkd-debuggable.rc new file mode 100644 index 0000000000000000000000000000000000000000..724cb5ee0c3b8a0bb6b36b9cb81ef8813cc01480 --- /dev/null +++ b/llkd/llkd-debuggable.rc @@ -0,0 +1,19 @@ +on property:ro.debuggable=1 + setprop llk.enable ${ro.llk.enable:-1} + setprop khungtask.enable ${ro.khungtask.enable:-1} + +on property:ro.llk.enable=eng + setprop llk.enable ${ro.debuggable:-0} + +on property:ro.khungtask.enable=eng + setprop khungtask.enable ${ro.debuggable:-0} + +service llkd-1 /system/bin/llkd + class late_start + disabled + user llkd + group llkd readproc + capabilities KILL IPC_LOCK SYS_PTRACE DAC_OVERRIDE + file /dev/kmsg w + file /proc/sysrq-trigger w + writepid /dev/cpuset/system-background/tasks diff --git a/llkd/llkd.rc b/llkd/llkd.rc index e538cdb919003590293abe36f9442ae5373295ef..b1f96a8f3e38f7a0d7f7384d4dd6561a6485d857 100644 --- a/llkd/llkd.rc +++ b/llkd/llkd.rc @@ -3,15 +3,8 @@ on property:ro.debuggable=* setprop llk.enable ${ro.llk.enable:-0} setprop khungtask.enable ${ro.khungtask.enable:-0} -on property:ro.debuggable=1 - setprop llk.enable ${ro.llk.enable:-1} - setprop khungtask.enable ${ro.khungtask.enable:-1} - -on property:ro.llk.enable=eng - setprop llk.enable ${ro.debuggable:-0} - -on property:ro.khungtask.enable=eng - setprop khungtask.enable ${ro.debuggable:-0} +on property:ro.llk.enable=true + setprop llk.enable true on property:llk.enable=1 setprop llk.enable true @@ -19,6 +12,9 @@ on property:llk.enable=1 on property:llk.enable=0 setprop llk.enable false +on property:ro.khungtask.enable=true + setprop khungtask.enable true + on property:khungtask.enable=1 setprop khungtask.enable true @@ -36,9 +32,9 @@ on property:khungtask.enable=false write /proc/sys/kernel/hung_task_panic 0 on property:llk.enable=true - start llkd + start llkd-${ro.debuggable:-0} -service llkd /system/bin/llkd +service llkd-0 /system/bin/llkd class late_start disabled user llkd diff --git a/llkd/tests/llkd_test.cpp b/llkd/tests/llkd_test.cpp index 3a15ff1e3a5db7f33836ace93aff51af49853f74..f54932bab5857c3658bfbe1b0e923fc468d15c90 100644 --- a/llkd/tests/llkd_test.cpp +++ b/llkd/tests/llkd_test.cpp @@ -14,8 +14,10 @@ * limitations under the License. */ +#include <fcntl.h> #include <signal.h> #include <stdint.h> +#include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> @@ -81,9 +83,12 @@ seconds llkdSleepPeriod(char state) { (GetUintProperty(LLK_CHECK_MS_PROPERTY, LLK_TIMEOUT_MS_DEFAULT / LLK_CHECKS_PER_TIMEOUT_DEFAULT) != duration_cast<milliseconds>(10s))) { - execute("stop llkd"); + execute("stop llkd-0"); + execute("stop llkd-1"); rest(); std::string setprop("setprop "); + execute((setprop + LLK_CHECK_STACK_PROPERTY + " SyS_openat").c_str()); + rest(); execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " false").c_str()); rest(); execute((setprop + LLK_TIMEOUT_MS_PROPERTY + " 120000").c_str()); @@ -92,8 +97,10 @@ seconds llkdSleepPeriod(char state) { rest(); execute((setprop + LLK_CHECK_MS_PROPERTY + " 10000").c_str()); rest(); - execute((setprop + LLK_ENABLE_PROPERTY + " true").c_str()); - rest(); + if (!default_enable) { + execute((setprop + LLK_ENABLE_PROPERTY + " true").c_str()); + rest(); + } execute((setprop + LLK_ENABLE_WRITEABLE_PROPERTY + " true").c_str()); rest(); } @@ -104,7 +111,7 @@ seconds llkdSleepPeriod(char state) { } default_enable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, default_enable); if (default_enable) { - execute("start llkd"); + execute("start llkd-1"); rest(); GTEST_LOG_INFO << "llkd enabled\n"; } else { @@ -123,8 +130,10 @@ seconds llkdSleepPeriod(char state) { llkTimeoutMs = LLK_TIMEOUT_MS_MINIMUM; } milliseconds llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT; - auto timeout = GetUintProperty( - (state == 'Z') ? LLK_Z_TIMEOUT_MS_PROPERTY : LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs); + auto timeout = GetUintProperty((state == 'Z') ? LLK_Z_TIMEOUT_MS_PROPERTY + : (state == 'S') ? LLK_STACK_TIMEOUT_MS_PROPERTY + : LLK_D_TIMEOUT_MS_PROPERTY, + llkTimeoutMs); if (timeout < LLK_TIMEOUT_MS_MINIMUM) { timeout = LLK_TIMEOUT_MS_MINIMUM; } @@ -285,3 +294,41 @@ TEST(llkd, driver) { waitForPid(child_pid); } + +TEST(llkd, sleep) { + if (checkKill("kernel_panic,sysrq,livelock,sleeping")) { + return; + } + if (!android::base::GetBoolProperty("ro.debuggable", false)) { + GTEST_LOG_WARNING << "Features not available on user builds\n"; + } + + const auto period = llkdSleepPeriod('S'); + + /* Create a Persistent SyS_openat for single-ended pipe */ + static constexpr char stack_pipe_file[] = "/dev/stack_pipe_file"; + unlink(stack_pipe_file); + auto pipe_ret = mknod(stack_pipe_file, S_IFIFO | 0666, 0); + ASSERT_LE(0, pipe_ret); + + auto child_pid = fork(); + ASSERT_LE(0, child_pid); + if (!child_pid) { + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (!child_pid) { + sleep(period.count()); + auto fd = open(stack_pipe_file, O_RDONLY | O_CLOEXEC); + close(fd); + exit(0); + } else { + auto fd = open(stack_pipe_file, O_WRONLY | O_CLOEXEC); + close(fd); + exit(42); + } + } + + waitForPid(child_pid); + + unlink(stack_pipe_file); +}