网站做百度推广成都网站建设
文章目录
- 1. 前言
- 2. TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE
- 2.1 语义
- 2.2 实现
- 2.2.1 TASK_INTERRUPTIBLE 实现
- 2.2.1.1 等待的条件成立时 唤醒
- 2.2.1.2 信号 唤醒
- 2.2.1.3 中断 唤醒
- 2.2.1.3.1 内核态的处理过程
- 2.2.1.3.2 用户态的处理过程
- 2.2.2 TASK_UNINTERRUPTIBLE 实现
- 2.3 小结
- 3. 参考资料
1. 前言
限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
2. TASK_INTERRUPTIBLE 和 TASK_UNINTERRUPTIBLE
2.1 语义
以下是从文章 Process Scheduling in the Kernel 摘录的对进程状态 TASK_INTERRUPTIBLE
和 TASK_UNINTERRUPTIBLE
的说明:
TASK_INTERRUPTIBLEidentifies a process that is suspended (sleeping) until some condition becomes true.
Raising an interrupt, releasing a system resource the process is waiting for, or
delivering a signal are examples of conditions that might wake up the process,
that is put its state back to TASK_RUNNNING. TASK_UNINTERRUPTIBLEidentifies a process that is suspended like in the TASK_INTERRUPTIBLE state, except that
in this case delivering a signal will not wake up the process. This process state is
seldom used.
简单翻译一下:
. TASK_INTERRUPTIBLE进程进入睡眠直到等待,中断、信号、或等待的条件成立时,可唤醒进程,并可能将进程重新置为运行态(TASK_RUNNNING)。. TASK_UNINTERRUPTIBLE类似于 TASK_INTERRUPTIBLE,但无法通过信号唤醒进程。
2.2 实现
从 2.1
了解了对 TASK_INTERRUPTIBLE
和 TASK_UNINTERRUPTIBLE
的语义,本小节从代码层面看内核是如何实现
它们的。
2.2.1 TASK_INTERRUPTIBLE 实现
以 socket
通信 TCP 三次握手 过程中的 accept()
调用为例,来说明 TASK_INTERRUPTIBLE
的语义实现。
服务端调用 accept()
等待 TCP 连接的三次握手完成:
sys_accept()sys_accept4()sock->ops->accept() = inet_accept()sk1->sk_prot->accept() = inet_csk_accept()if (reqsk_queue_empty(queue)) {/* 阻塞模式下,永不超时,即 timeout 为 MAX_SCHEDULE_TIMEOUT */long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);.../* 等待 TCP 连接的三次握手完成 */error = inet_csk_wait_for_connect(sk, timeo);...}static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
{struct inet_connection_sock *icsk = inet_csk(sk);DEFINE_WAIT(wait);int err;for (;;) {/* 将进程添加到等待队列 sk_sleep(sk) */prepare_to_wait_exclusive(sk_sleep(sk), &wait,TASK_INTERRUPTIBLE);release_sock(sk);if (reqsk_queue_empty(&icsk->icsk_accept_queue))timeo = schedule_timeout(timeo); /* (1) 主动调度:进入 TASK_INTERRUPTIBLE 睡眠等待 */...lock_sock(sk);err = 0;if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) /* 有连接完成三次握手, */break; /* 正常结束等待 */err = -EINVAL;if (sk->sk_state != TCP_LISTEN)break;err = sock_intr_errno(timeo);if (signal_pending(current)) /* 进程有信号挂起, */break; /* 终止等待过程,处理信号 */err = -EAGAIN; /* 非阻塞方式下等待超时错误码 EAGAIN */if (!timeo) /* 非阻塞方式下等待超时, */break; /* 终止等待过程,用户收到错误码 EAGAIN,提示可以重试 */}/* 将进程从等待队列 sk_sleep(sk) 移除,重新进入 TASK_RUNNING 状态 */finish_wait(sk_sleep(sk), &wait);return err;
}void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{unsigned long flags;__set_current_state(TASK_RUNNING); /* 进程重新进入 TASK_RUNNING 状态 *//** We can check for list emptiness outside the lock* IFF:* - we use the "careful" check that verifies both* the next and prev pointers, so that there cannot* be any half-pending updates in progress on other* CPU's that we haven't seen yet (and that might* still change the stack area.* and* - all other users take the lock (ie we can only* have _one_ other CPU that looks at or modifies* the list).*/if (!list_empty_careful(&wq_entry->entry)) {spin_lock_irqsave(&wq_head->lock, flags);list_del_init(&wq_entry->entry);spin_unlock_irqrestore(&wq_head->lock, flags);}
}
看下调度细节:
/* kernel/time/timer.c */signed long __sched schedule_timeout(signed long timeout)
{...schedule();...
}
/* kernel/sched/core.c */asmlinkage __visible void __sched schedule(void)
{struct task_struct *tsk = current;sched_submit_work(tsk);do {preempt_disable(); /* 关闭抢占 */__schedule(false); /* 主动调度 */sched_preempt_enable_no_resched(); /* 开启抢占 */} while (need_resched());
}static void __sched notrace __schedule(bool preempt)
{struct task_struct *prev, *next;...struct rq *rq;int cpu;cpu = smp_processor_id();rq = cpu_rq(cpu);prev = rq->curr;...local_irq_disable();...if (!preempt && prev->state) {/** 如果进程 @prev 当前有信号挂起,不进入睡眠,* 而是继续保持 可运行 状态,以备后续被调度时处理信号。*/if (unlikely(signal_pending_state(prev->state, prev))) {prev->state = TASK_RUNNING;} else {/* 从可运行队列移除 */deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);prev->on_rq = 0;...}}next = pick_next_task(rq, prev, &rf); /* 挑选要执行的进程 */...if (likely(prev != next)) { /* 切换到不同进程 */rq->nr_switches++;rq->curr = next;.../* Also unlocks the rq: */rq = context_switch(rq, prev, next, &rf); /* 进程上下文切换 */} else {...}
}
从上面分析看到,服务端在 accept()
中在 TASK_INTERRUPTIBLE
状态睡眠等待
。接着看在 3
种不同场景下唤醒进程的过程。
2.2.1.1 等待的条件成立时 唤醒
正常唤醒过程,在连接三次握手完成唤醒过程
:
tcp_child_process(sk, nsk, skb)...parent->sk_data_ready(parent) = sock_def_readable()wq = rcu_dereference(sk->sk_wq);if (skwq_has_sleeper(wq))/* 唤醒在 accept() 中等待连接的进程 */wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI POLLRDNORM | POLLRDBAND);......
2.2.1.2 信号 唤醒
异常唤醒过程,通过信号唤醒进程
:
static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,int group, int from_ancestor_ns)
{...complete_signal(sig, t, group);
}static void complete_signal(int sig, struct task_struct *p, int group)
{...signal_wake_up(t, sig == SIGKILL);...
}static inline void signal_wake_up(struct task_struct *t, bool resume)
{signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
}void signal_wake_up_state(struct task_struct *t, unsigned int state)
{set_tsk_thread_flag(t, TIF_SIGPENDING); /* 标记进程 @t 有信号挂起 *//** TASK_WAKEKILL also means wake it up in the stopped/traced/killable* case. We don't check t->state here because there is a race with it* executing another processor and just now entering stopped state.* By using wake_up_state, we ensure the process will wake up and* handle its death signal.*//* * 唤醒 TASK_INTERRUPTIBLE 状态的进程处理信号: * . 将进程设置为 TASK_RUNNING 状态* . 选择运行的 CPU* . 设置 TIF_NEED_RESCHED 标志* . 其它 ......*/if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))kick_process(t);
}
2.2.1.3 中断 唤醒
异常唤醒过程,中断唤醒
。如果被中断的进程因信号投递而被唤醒(设置了 TIF_NEED_RESCHED
标志),将发生中断处理结束时的抢占。由于进程被中断时,可能处于 内核态 和 用户态,所以有两种不同的执行路径。本文以 ARMv7
架构中断处理过程为例分别加以说明。
2.2.1.3.1 内核态的处理过程
/* arch/arm/kernel/entry-armv.S */.align 5
__irq_svc:svc_entryirq_handler /* 处理 内核态 中断 */#ifdef CONFIG_PREEMPT/* 开启了 内核态抢占 的情形 */ldr r8, [tsk, #TI_PREEMPT] @ get preempt countldr r0, [tsk, #TI_FLAGS] @ get flagsteq r8, #0 @ if preempt count != 0movne r0, #0 @ force flags to 0tst r0, #_TIF_NEED_RESCHED /* 在本文场景,检查因信号投递而设置的 TIF_NEED_RESCHED 标志 */blne svc_preempt /* 中断处理结束后,发起 内核态 抢占 */
#endifsvc_exit r5, irq = 1 @ return from exceptionUNWIND(.fnend )
ENDPROC(__irq_svc)#ifdef CONFIG_PREEMPT
svc_preempt:mov r8, lr/* 发起内核态抢占 */
1: bl preempt_schedule_irq @ irq en/disable is done insideldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGStst r0, #_TIF_NEED_RESCHEDreteq r8 @ go againb 1b
#endif
asmlinkage __visible void __sched preempt_schedule_irq(void)
{enum ctx_state prev_state;/* Catch callers which need to be fixed */BUG_ON(preempt_count() || !irqs_disabled());prev_state = exception_enter();do {preempt_disable();local_irq_enable();__schedule(true); /* 抢占调度 */local_irq_disable();sched_preempt_enable_no_resched();} while (need_resched());exception_exit(prev_state);
}
2.2.1.3.2 用户态的处理过程
/* arch/arm/kernel/entry-armv.S */.align 5
__irq_usr:usr_entrykuser_cmpxchg_checkirq_handler /* 处理 用户态 中断 */get_thread_info tskmov why, #0b ret_to_user_from_irqUNWIND(.fnend )
ENDPROC(__irq_usr)
/* arch/arm/include/asm/thread_info.h *//** Change these and you break ASM code in entry-common.S*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \_TIF_NOTIFY_RESUME | _TIF_UPROBE)
/* arch/arm/kernel/entry-common.S */ENTRY(ret_to_user_from_irq)...ldr r1, [tsk, #TI_FLAGS]tst r1, #_TIF_WORK_MASK /* 检查 是否需要调度、是否有信号要处理 等等 */bne slow_work_pending /* 处理 调度、信号 等等 */
no_work_pending:...restore_user_regs fast = 0, offset = 0
ENDPROC(ret_to_user_from_irq)/* 处理 调度、信号 等等 */
slow_work_pending:...bl do_work_pending...
/* arch/arm/kernel/signal.c */asmlinkage int
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{...do {if (likely(thread_flags & _TIF_NEED_RESCHED)) {schedule(); /* 执行调度 */} else {...}...thread_flags = current_thread_info()->flags;} while (thread_flags & _TIF_WORK_MASK);...
}
2.2.2 TASK_UNINTERRUPTIBLE 实现
TASK_UNINTERRUPTIBLE
状态的典型场景是 msleep()
调用:
void msleep(unsigned int msecs)
{unsigned long timeout = msecs_to_jiffies(msecs) + 1;while (timeout)timeout = schedule_timeout_uninterruptible(timeout);
}signed long __sched schedule_timeout_uninterruptible(signed long timeout)
{__set_current_state(TASK_UNINTERRUPTIBLE);return schedule_timeout(timeout);
}
msleep()
将进程设置为 TASK_UNINTERRUPTIBLE
状态,不会让人觉得意外。毕竟,msleep()
的本意就是让进程睡够指定时间才被唤醒,睡眠过程不可被中断(即 UNINTERRUPTIBLE
)。如果时间没睡够中途就被唤醒,这不符合 msleep()
的语义。
再看一个驱动代码片段示例:
/* drivers/hwmon/abituguru.c */static int abituguru_send_address(struct abituguru_data *data,u8 bank_addr, u8 sensor_addr, int retries)
{...for (;;) {...if (abituguru_wait(data, ABIT_UGURU_STATUS_INPUT)) {if (retries) {/* 进入 TASK_UNINTERRUPTIBLE 等待超时时间 ABIT_UGURU_RETRY_DELAY 到达 */set_current_state(TASK_UNINTERRUPTIBLE);schedule_timeout(ABIT_UGURU_RETRY_DELAY);retries--;continue;}...}...}
}
/* kernel/time/timer.c */signed long __sched schedule_timeout(signed long timeout)
{struct timer_list timer;unsigned long expire;switch (timeout){case MAX_SCHEDULE_TIMEOUT:schedule();goto out;default:...}expire = timeout + jiffies;setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);__mod_timer(&timer, expire, false);schedule(); /* 主动调度出去,等待超时时间、或等待的事件 到达 */...timeout = expire - jiffies; /* 剩余的超时时间: 有可能 等待的事件到达 而被 提前唤醒 */out:return timeout < 0 ? 0 : timeout;
}/* 超时后唤醒进进程 */
static void process_timeout(unsigned long __data)
{wake_up_process((struct task_struct *)__data);
}/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)int wake_up_process(struct task_struct *p)
{return try_to_wake_up(p, TASK_NORMAL, 0);
}
为什么 TASK_UNINTERRUPTIBLE
进程不会因中断或信号而唤醒?从前面的 signal_wake_up_state()
分析已经有了答案,这里再重复一下:
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{...if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))kick_process(t);
}
可见,信号只会唤醒 TASK_INTERRUPTIBLE
状态的进程。既然信号不唤醒 TASK_UNINTERRUPTIBLE
状态进程,自然也不会进一步设置进程的 TIF_NEED_RESCHED
标志,因此中断也无法唤醒它:中断处理结束时的抢占调度依赖于 TIF_NEED_RESCHED
标志。
在本小节驱动例子中,TASK_UNINTERRUPTIBLE
状态的进程等到超时时间 ABIT_UGURU_RETRY_DELAY
到达时被唤醒。
2.3 小结
TASK_INTERRUPTIBLE
在主动调度出去时,如果当前没有信号挂起
,就会从 CPU 的运行队列中移除
,但如果当前有信号挂起
,会继续保持 TASK_RUNNING
状态,且不会从 CPU 的运行队列中移除;TASK_UNINTERRUPTIBLE
在主动调度出去时,直接从 CPU 的运行队列中移除
。TASK_INTERRUPTIBLE
可看作浅度睡眠
。
TASK_INTERRUPTIBLE
睡眠期间,可能被等待的事件、信号、中断唤醒;TASK_UNINTERRUPTIBLE
睡眠期间,无法被 信号、中断唤醒,只能被等待的事件唤醒,如前面例子中的超时时间到达。TASK_UNINTERRUPTIBLE
可看作深度睡眠
。
TASK_INTERRUPTIBLE
工具观察为 S
态,TASK_UNINTERRUPTIBLE
工具观察为 D
态。
Linux 内核提供对长时间处于 TASK_UNINTERRUPTIBLE
态进程的监测机制,细节可参考博文 Linux: hung task 检测机制简析 。
3. 参考资料
[1] Process Scheduling in the Kernel