博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Linux创建进程的源码分析
阅读量:2135 次
发布时间:2019-04-30

本文共 17055 字,大约阅读时间需要 56 分钟。

进程的创建

  1. 进程创建流程

    在这里插入图片描述

    a. do_fork主要处理clone、fork、vfork系统调用     1. 先检查父进程的ptrace字段,如果父进程被跟踪了,则根据clone_flag的信息,对子进程进行相关操作     2. 调用copy_process()函数将fork()之前的信息复制一份给 子进程。这里包含了出现异常nr=0的情况     3. 如果是vfork的话,直接初始化完成处理信息。     4. 用wake_up_new_task()函数将新创建的进程加入到调度器中,为其分配CPU。     5. 如果是vfork(),父进程会等待子进程结束或者子进程调用exec函数族。     6. 最后返回子进程的pidb. copy_process 创建进程描述符以及子进程执行需要的其他的数据结构    1. 进行一些检查    2. 创建一些进程需要的结构体    3. 复制父进程的task_struct    4. 初始化进程状态,为进程分配cpu    5. 初始化进程内核栈    3. 返回创建进程的进程描述符的地址c. dup_task_struct    1. 用alloc_stack_node分配一个task_struct节点    2. 用alloc_thread_info_node分配一个thread_info节点,其实是分配了一个thread_union联合体,将栈底返回给tid. copy_thread(解释如下的问题)    1. 为什么 fork 在子进程中返回0,原因是childregs->ax = 0;这段代码将子进程的 eax 赋值为0    2. p->thread.ip = (unsigned long) ret_from_fork;将子进程的 ip 设置为 ret_form_fork 的首地址,因此子进程是从 ret_from_fork 开始执行的e. sched_fork    1. 设置子进程的状态为TASK_RUNNING    2. 为子进程分配CPU
  2. sys_clone

    asmlinkage int sys_clone(unsigned long __user *args){
    unsigned long clone_flags; //各种各样的信息,低字节指定进程结束时发送到父进程的信号代码,通常选择SIGCHLD信号,剩余的3字节给一clone标志组,用于编码 unsigned long newsp; //根据do_fork的参数,这个是新的进程的栈地址 uintptr_t parent_tidptr; //父进程用户态变量的地址 uintptr_t child_tidptr; //表示新的轻量级进程的用户态变量地址 get_user(clone_flags, &args[0]); get_user(newsp, &args[1]); get_user(parent_tidptr, &args[2]); get_user(child_tidptr, &args[3]); return do_fork(clone_flags, newsp, 0, (int __user *)parent_tidptr, (int __user *)child_tidptr);}
  3. do_fork

    long do_fork(unsigned long clone_flags,	      unsigned long stack_start,	      unsigned long stack_size, //默认为0,未使用 其余参数都是clone传进来的	      int __user *parent_tidptr,	      int __user *child_tidptr){
    return _do_fork(clone_flags, stack_start, stack_size, parent_tidptr, child_tidptr, 0);}
  4. _do_fork

    /*  执行流程	1. 先检查父进程的ptrace字段,如果父进程被跟踪了,则根据clone_flag的信息,对子进程进行相关操作	2. 调用copy_process()函数将fork()之前的信息复制一份给子进程。这里包含了出现异常nr=0的情况	3. 如果是vfork的话,直接初始化完成处理信息。	4. 用wake_up_new_task()函数将新创建的进程加入到调度器中,为其分配CPU。	5. 如果是vfork(),父进程会等待子进程结束或者子进程调用exec函数族。	6. 最后返回子进程的pid*/long _do_fork(unsigned long clone_flags,	      unsigned long stack_start,	      unsigned long stack_size,	      int __user *parent_tidptr,	      int __user *child_tidptr,	      unsigned long tls){
    struct task_struct *p; int trace = 0; long nr; /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event * for the type of forking is enabled. */ //检查父进程的ptrace字段,如果父进程的ptrace != 0,则根据情况设置新的进程的trace值 if (!(clone_flags & CLONE_UNTRACED)) {
    //父进程被跟踪的情况 if (clone_flags & CLONE_VFORK) // if the parent wants the child to wake it up on mm_release trace = PTRACE_EVENT_VFORK; else if ((clone_flags & CSIGNAL) != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; if (likely(!ptrace_event_enabled(current, trace))) trace = 0; } //复制进程描述符,如果所有资源可用, 返回刚创建的task_struct描述符的地址 p = copy_process(clone_flags, stack_start, stack_size, child_tidptr, NULL, trace, tls, NUMA_NO_NODE); add_latent_entropy(); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ //copy_process的时候没有出错的话 if (!IS_ERR(p)) {
    struct completion vfork; struct pid *pid; trace_sched_process_fork(current, p); //子进程的pid pid = get_task_pid(p, PIDTYPE_PID); //子进程的全局进程号 nr = pid_vnr(pid); //设置父进程的TID if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); //首先定义了一个完成量vfork,如果clone_flags包含CLONE_VFORK标志,那么将进程描述符中的vfork_done字段指向这个完成量,之后再对vfork完成量进行初始化。vfork完成量所起到的作用:当子进程调用exec函数或退出时就向父进程发出信号。此时,父进程才会被唤醒;否则一直等待。 if (clone_flags & CLONE_VFORK) {
    p->vfork_done = &vfork; init_completion(&vfork); get_task_struct(p); } //将子进程加入到调度器中,为其分配 CPU,准备执行 wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) ptrace_event_pid(trace, pid); //如果CLONE_VFORK标志被设置,则通过wait操作将父进程阻塞,直至子进程调用exec函数或者退出 if (clone_flags & CLONE_VFORK) {
    if (!wait_for_vfork_done(p, &vfork)) ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); } put_pid(pid); } else {
    nr = PTR_ERR(p); } //结束返回子进程的pid return nr;}
  5. copy_process

    /* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */static __latent_entropy struct task_struct *copy_process(					unsigned long clone_flags,					unsigned long stack_start,					unsigned long stack_size,					int __user *child_tidptr,					struct pid *pid,					int trace,					unsigned long tls,					int node){
    int retval; struct task_struct *p; //检查clone_flags所传递标识的一致性 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /* * If the new process will be in a different pid or user namespace * do not allow it to share a thread group with the forking task. */ if (clone_flags & CLONE_THREAD) {
    if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); } retval = -ENOMEM; //这里应该又selinux的钩子函数,如果配置了selinux安全策略会执行相应的安全检查 //调用dup_task_struct,复制当前进程的task_struct给子进程 p = dup_task_struct(current, node); if (!p) goto fork_out; /* * This _must_ happen before we call free_task(), i.e. before we jump * to any of the bad_fork_* labels. This is to avoid freeing * p->set_child_tid which is (ab)used as a kthread's data pointer for * kernel threads (PF_KTHREAD). */ p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; ftrace_graph_init_task(p); rt_mutex_init_task(p);#ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);#endif retval = -EAGAIN; //检查系统进程数,有没有超限制 if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) {
    if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0;#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME p->utimescaled = p->stimescaled = 0;#endif prev_cputime_init(&p->prev_cputime);#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_init(&p->vtime.seqcount); p->vtime.starttime = 0; p->vtime.state = VTIME_INACTIVE;#endif#if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat));#endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p);#ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) {
    retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_threadgroup_lock; }#endif#ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq);#endif#ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0;#endif p->pagefault_disabled = 0;#ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; lockdep_init_task(p);#endif#ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */#endif#ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0;#endif //设置子进程的状态,为其分配cpu /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_perf; /* copy all the process information */ shm_init_task(p); retval = security_task_alloc(p, clone_flags); if (retval) goto bad_fork_cleanup_audit; retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_security; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; //初始化子进程内核栈 retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); if (retval) goto bad_fork_cleanup_io; //为新进程分配pid if (pid != &init_struct_pid) {
    pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (IS_ERR(pid)) {
    retval = PTR_ERR(pid); goto bad_fork_cleanup_thread; } }#ifdef CONFIG_BLOCK p->plug = NULL;#endif futex_init_task(p); /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) sas_ss_reset(p); /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);#ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);#endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ //设置子进程的pid p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) {
    p->group_leader = current->group_leader; p->tgid = current->tgid; } else {
    p->group_leader = p; p->tgid = p->pid; } p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; cgroup_threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed * between here and cgroup_post_fork() if an organisation operation is in * progress. */ retval = cgroup_can_fork(p); if (retval) goto bad_fork_free_pid; /* * From this point on we must avoid any synchronous user-space * communication until we take the tasklist-lock. In particular, we do * not want user-space to be able to predict the process start-time by * stalling fork(2) after we recorded the start_time but before it is * visible to the system. */ p->start_time = ktime_get_ns(); p->real_start_time = ktime_get_boot_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
    p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else p->exit_signal = current->group_leader->exit_signal; } else {
    p->real_parent = current; p->parent_exec_id = current->self_exec_id; p->exit_signal = (clone_flags & CSIGNAL); } klp_copy_process(p); spin_lock(&current->sighand->siglock); /* * Copy seccomp details explicitly here, in case they were changed * before holding sighand lock. */ copy_seccomp(p); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) {
    retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
    retval = -ENOMEM; goto bad_fork_cancel_cgroup; } if (likely(p->pid)) {
    ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) {
    init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); if (is_child_reaper(pid)) {
    ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); /* * Inherit has_child_subreaper flag under the same * tasklist_lock with adding child to the process tree * for propagate_has_child_subreaper optimization. */ p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else {
    current->signal->nr_threads++; atomic_inc(&current->signal->live); atomic_inc(&current->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; } total_forks++; spin_unlock(&current->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); cgroup_threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); uprobe_copy_process(p, clone_flags); copy_oom_score_adj(clone_flags, p); //返回结构体p return p;bad_fork_cancel_cgroup: spin_unlock(&current->sighand->siglock); write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p);bad_fork_free_pid: cgroup_threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid);bad_fork_cleanup_thread: exit_thread(p);bad_fork_cleanup_io: if (p->io_context) exit_io_context(p);bad_fork_cleanup_namespaces: exit_task_namespaces(p);bad_fork_cleanup_mm: if (p->mm) {
    mm_clear_owner(p->mm, p); mmput(p->mm); }bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal);bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand);bad_fork_cleanup_fs: exit_fs(p); /* blocking */bad_fork_cleanup_files: exit_files(p); /* blocking */bad_fork_cleanup_semundo: exit_sem(p);bad_fork_cleanup_security: security_task_free(p);bad_fork_cleanup_audit: audit_free(p);bad_fork_cleanup_perf: perf_event_free_task(p);bad_fork_cleanup_policy: lockdep_free_task(p);#ifdef CONFIG_NUMA mpol_put(p->mempolicy);bad_fork_cleanup_threadgroup_lock:#endif delayacct_tsk_free(p);bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p);bad_fork_free: p->state = TASK_DEAD; put_task_stack(p); delayed_free_task(p);fork_out: return ERR_PTR(retval);}
  6. 参考连接

1.https://blog.csdn.net/weixin_42250655/article/details/1025332802.http://blog.luoyuanhang.com/2015/07/27/%E5%88%86%E6%9E%90Linux%E5%86%85%E6%A0%B8%E5%88%9B%E5%BB%BA%E4%B8%80%E4%B8%AA%E6%96%B0%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%87%E7%A8%8B/
你可能感兴趣的文章
"NetworkError: 400 Bad Request - http://172.16.47.117:8088/rhip/**/####t/approval?date=976
查看>>
mybatis 根据 数据库表 自动生成 实体
查看>>
win10将IE11兼容ie10
查看>>
checkbox设置字体颜色
查看>>
第一篇 HelloWorld.java重新学起
查看>>
ORACLE表空间扩张
查看>>
orcal 循环执行sql
查看>>
web.xml配置监听器,加载数据库信息配置文件ServletContextListener
查看>>
结构型模式之桥接模式(Bridge)
查看>>
行为型模式之状态模式(State)
查看>>
行为型模式之策略模式(Strategy)
查看>>
行为型模式之模板方法模式(TemplateMethod)
查看>>
行为型模式之访问者模式(Visitor)
查看>>
大小端详解
查看>>
source insight使用方法简介
查看>>
<stdarg.h>头文件的使用
查看>>
C++/C 宏定义(define)中# ## 的含义 宏拼接
查看>>
Git安装配置
查看>>
linux中fork()函数详解
查看>>
C语言字符、字符串操作偏僻函数总结
查看>>