GCD类型解密

GCD是一套强大的多线程方案,提供了多种任务队列来提高开发效率,通过阅读libdispatch的源码可以更好的理解GCD的工作流程,帮助我们设计更好的代码

结构类型

模板结构

libdispatch使用宏定义实现了大量的模板结构类型,除此之外还使用了unionenum结合的方式实现动态参数类型的灵活性:

  • queue_type:队列类型,例如全局队列
  • source_type:资源统称,queue或者function都可以看做是一个资源
  • semaphore_type:信号类型,信号可以保证资源同时多线程竞争下的安全
  • continuation_type:派发任务会被封装成dispatch_continuation_t,然后被压入队列中

      enum {  
          _DISPATCH_CONTINUATION_TYPE     =    0x00000, // meta-type for continuations  
          _DISPATCH_QUEUE_TYPE            =    0x10000, // meta-type for queues  
          _DISPATCH_SOURCE_TYPE           =    0x20000, // meta-type for sources  
          _DISPATCH_SEMAPHORE_TYPE        =    0x30000, // meta-type for semaphores  
          _DISPATCH_ATTR_TYPE             = 0x10000000, // meta-type for attribute structures  
      
          DISPATCH_CONTINUATION_TYPE      = _DISPATCH_CONTINUATION_TYPE,  
    		 
          DISPATCH_QUEUE_ATTR_TYPE        = _DISPATCH_QUEUE_TYPE | _DISPATCH_ATTR_TYPE,  
    		  
          DISPATCH_QUEUE_TYPE             = 1 | _DISPATCH_QUEUE_TYPE,  
          DISPATCH_QUEUE_GLOBAL_TYPE      = 2 | _DISPATCH_QUEUE_TYPE,  
          DISPATCH_QUEUE_MGR_TYPE         = 3 | _DISPATCH_QUEUE_TYPE,  
    		  
          DISPATCH_SEMAPHORE_TYPE         = _DISPATCH_SEMAPHORE_TYPE,  
    		 
          DISPATCH_SOURCE_ATTR_TYPE       = _DISPATCH_SOURCE_TYPE | _DISPATCH_ATTR_TYPE,  
     
          DISPATCH_SOURCE_KEVENT_TYPE     = 1 | _DISPATCH_SOURCE_TYPE,  
      };
    

对于libdispatch的结构体类型来说,都存在DISPATCH_STRUCT_HEADER(x)类型的变量。通过##拼接变量名的方式对不同的类型生成动态的参数变量

#define DISPATCH_STRUCT_HEADER(x) \
	_OS_OBJECT_HEADER( \
	const struct dispatch_##x##_vtable_s *do_vtable, \
	do_ref_cnt, \
	do_xref_cnt); \
	struct dispatch_##x##_s *volatile do_next; \
	struct dispatch_queue_s *do_targetq; \
	void *do_ctxt; \
	void *do_finalizer; \
	unsigned int do_suspend_cnt;

联合体

联合体union保证了各变量享用同一个内存地址,这也意味着各变量是互斥的。通过联合体结构,libdispatch实现了类型强制转换的效果。另外,通过宏定义DISPATCH_DECL(name)来保证所有dispatch_xxx_t变量实际上是一个指向dispatch_xxx_s的指针:

typedef union {
    struct _os_object_s *_os_obj;
    struct dispatch_object_s *_do;
    struct dispatch_continuation_s *_dc;
    struct dispatch_queue_s *_dq;
    struct dispatch_queue_attr_s *_dqa;
    struct dispatch_group_s *_dg;
    struct dispatch_source_s *_ds;
    struct dispatch_source_attr_s *_dsa;
    struct dispatch_semaphore_s *_dsema;
    struct dispatch_data_s *_ddata;
    struct dispatch_io_s *_dchannel;
    struct dispatch_operation_s *_doperation;
    struct dispatch_disk_s *_ddisk;
} dispatch_object_t __attribute__((__transparent_union__));

#define DISPATCH_DECL(name) typedef struct name##_s *name##_t

一方面,union在使用时会分配一块足够大的内存(能够容纳任一一种类型),这意味着可以随时更换存储数据,同样可能造成数据的破坏;另一方面,它让C函数拥有了返回参数多样化的灵活性。但是要记住不同的数据类型在生成自身的do_vtable也有不同的表现:

/// GCD数据类型vtable属性初始化
DISPATCH_VTABLE_INSTANCE(semaphore,
	.do_type = DISPATCH_SEMAPHORE_TYPE,
	.do_kind = "semaphore",
	.do_dispose = _dispatch_semaphore_dispose,
	.do_debug = _dispatch_semaphore_debug,
);

DISPATCH_VTABLE_INSTANCE(group,
	.do_type = DISPATCH_GROUP_TYPE,
	.do_kind = "group",
	.do_dispose = _dispatch_semaphore_dispose,
	.do_debug = _dispatch_semaphore_debug,
);

DISPATCH_VTABLE_INSTANCE(queue,
	.do_type = DISPATCH_QUEUE_TYPE,
	.do_kind = "queue",
	.do_dispose = _dispatch_queue_dispose,
	.do_invoke = NULL,
	.do_probe = (void *)dummy_function_r0,
	.do_debug = dispatch_queue_debug,
);

DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_root, queue,
	.do_type = DISPATCH_QUEUE_GLOBAL_TYPE,
	.do_kind = "global-queue",
	.do_debug = dispatch_queue_debug,
	.do_probe = _dispatch_queue_probe_root,
);

DISPATCH_VTABLE_SUBCLASS_INSTANCE(queue_mgr, queue,
	.do_type = DISPATCH_QUEUE_MGR_TYPE,
	.do_kind = "mgr-queue",
	.do_invoke = _dispatch_mgr_thread,
	.do_debug = dispatch_queue_debug,
	.do_probe = _dispatch_mgr_wakeup,
);

DISPATCH_VTABLE_INSTANCE(queue_specific_queue,
	.do_type = DISPATCH_QUEUE_SPECIFIC_TYPE,
	.do_kind = "queue-context",
	.do_dispose = _dispatch_queue_specific_queue_dispose,
	.do_invoke = NULL,
	.do_probe = (void *)dummy_function_r0,
	.do_debug = (void *)dispatch_queue_debug,
);

DISPATCH_VTABLE_INSTANCE(queue_attr,
	.do_type = DISPATCH_QUEUE_ATTR_TYPE,
	.do_kind = "queue-attr",
);

DISPATCH_VTABLE_INSTANCE(source,
	.do_type = DISPATCH_SOURCE_KEVENT_TYPE,
	.do_kind = "kevent-source",
	.do_invoke = _dispatch_source_invoke,
	.do_dispose = _dispatch_source_dispose,
	.do_probe = _dispatch_source_probe,
	.do_debug = _dispatch_source_debug,
);

dispatch_queue_s

queue为例,其结构类型为dispatch_queue_s,鉴于源码中使用了大量的宏定义增加属性,下面的结构是替换一部分宏定义后的结构:

struct dispatch_queue_s {
    DISPATCH_STRUCT_HEADER(queue);

    uint32_t volatile dq_running; 
    uint32_t dq_width;
    struct dispatch_object_s *volatile dq_items_tail;
    struct dispatch_object_s *volatile dq_items_head; 
    unsigned long dq_serialnum; 
    dispatch_queue_t dq_specific_q;

    char dq_label[DISPATCH_QUEUE_MIN_LABEL_SIZE];
    char _dq_pad[DISPATCH_QUEUE_CACHELINE_PAD];
};

一个线程任务队列有这么几个重要的属性:

  • do_vtable

    虚拟表,采用类似于C++的模板方式定义了一个结构体vtable,主要存储了结构类型相关的描述信息

  • do_targetq

    目标队列,GCD允许我们将某个任务队列指派到另外的任务队列中执行。当do_targetq不为空时,async的实现有会所不同

  • dq_width

    最大并发数,串行/主线程的这个值是1

  • do_ctxt

    线程上下文,用来存储线程池相关数据,比如用于线程挂起和唤醒的信号量、线程池尺寸等

    struct dispatch_root_queue_context_s { union { struct { unsigned int volatile dgq_pending; dispatch_semaphore_t dgq_thread_mediator; uint32_t dgq_thread_pool_size; }; char _dgq_pad[DISPATCH_CACHELINE_SIZE]; }; };
  • do_suspend_cnt

    用作暂停标记,当大于等于2时表示任务为延时任务。在任务达到时会修改标记,然后唤醒队列调度任务

dispatch_continuation_s

派发任务会被包装成dispatch_continuation_s结构体对象,同样

#define DISPATCH_OBJ_ASYNC_BIT		0x1
#define DISPATCH_OBJ_BARRIER_BIT  	0x2
#define DISPATCH_OBJ_GROUP_BIT		0x4
#define DISPATCH_OBJ_SYNC_SLOW_BIT	0x8

#define DISPATCH_CONTINUATION_HEADER(x) \
    _OS_OBJECT_HEADER( \
    const void *do_vtable, \
    do_ref_cnt, \
    do_xref_cnt); \
    struct dispatch_##x##_s *volatile do_next; \
    dispatch_function_t dc_func; \
    void *dc_ctxt; \
    void *dc_data; \
    void *dc_other;

struct dispatch_continuation_s {
    DISPATCH_CONTINUATION_HEADER(continuation);
};

一个dispatch_continuation_s变量不总是只包装了单个任务,它被设置成复用机制,通过TSD的方式保证每个线程可以拥有一定数量的复用continuation,以此来减少不必要的内存分配开销。

  • dc_func

    承担执行任务的对象,宏定义_dispatch_client_callout最终会以dc_func(dc_ctxt)的方式回调

  • dc_ctxt

    存储了continuation对象的上下文数据,同样用于执行任务

  • do_vtable

    只有当do_vtable的值小于127时才表示变量是一个continuation,派发到主/串行队列的任务会被标记DISPATCH_OBJ_BARRIER_BIT屏障标记

do_vtable

libdispatch的结构对象都拥有自己的一张do_vtable虚拟表,同样采用模板式的方式生成,每一个具体的结构类型会生成一张对应类型的虚拟表,但是属性基本是统一的

#define DISPATCH_VTABLE_HEADER(x) \
	unsigned long const do_type; \
	const char *const do_kind; \
	size_t (*const do_debug)(struct dispatch_##x##_s *, char *, size_t); \
	struct dispatch_queue_s *(*const do_invoke)(struct dispatch_##x##_s *); \
	bool (*const do_probe)(struct dispatch_##x##_s *); \
	void (*const do_dispose)(struct dispatch_##x##_s *)

虚拟表采用类型名拼接的方式生成不同类型的重载函数,由于libdispatch类型采用union结构,这两者结合极大的保证了执行的灵活性

  • do_type

    数据的具体类型,详见上文中的枚举值

  • do_kind

    数据的类型描述字符串,比如全局队列为global-queue

  • do_debug

    debug方法,用来获取调试时需要的变量信息字符串

  • do_probe

    用于检测传入对象中的一些值是否满足条件

  • do_invoke

    唤醒队列的方法,全局队列和主队列此项为NULL

dispatch_root_queue_context_s

disaptch_root_queue_context_s存储了线程运行过程中的上下文数据,默认情况下已经创建了多个全局的上下文对象

struct dispatch_root_queue_context_s {
	union {
		struct {
			unsigned int volatile dgq_pending;
#if DISPATCH_USE_PTHREAD_POOL
			dispatch_semaphore_t dgq_thread_mediator;
			uint32_t dgq_thread_pool_size;
#endif
		};
		char _dgq_pad[DISPATCH_CACHELINE_SIZE];
	};
};

上下文对象在执行任务的过程中存储了线程信号信息以及可用线程池数量。

  • dgq_pending_dgq_pad

    当前版本源码中未使用

  • dgq_thread_mediator

    用于判断是否存在可用线程资源,如果存在返回1,否则后续将创建新线程执行任务

  • dgq_thread_pool_size

    线程池剩余可用数量,只有dgq_thread_mediator的查询返回0并且此项大于0时会尝试创建新线程用以执行任务

GCD创建了总共八个四种优先级的全局上下文对象,每个上下文最多可容纳255个线程数量

#define MAX_THREAD_COUNT 255
DISPATCH_CACHELINE_ALIGN
static struct dispatch_root_queue_context_s _dispatch_root_queue_contexts[] = {
	[DISPATCH_ROOT_QUEUE_IDX_LOW_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_LOW_OVERCOMMIT_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_DEFAULT_OVERCOMMIT_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_HIGH_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_HIGH_OVERCOMMIT_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_PRIORITY] = },
	[DISPATCH_ROOT_QUEUE_IDX_BACKGROUND_OVERCOMMIT_PRIORITY] = },
};

dispatch_semaphore_s

dispatch_semaphore_s是性能稍次于自旋锁的的信号量对象,用来保证资源使用的安全性。

struct dispatch_semaphore_s {
	DISPATCH_STRUCT_HEADER(semaphore);
	long dsema_value;
	long dsema_orig;
	size_t dsema_sent_ksignals;

	size_t dsema_group_waiters;
	struct dispatch_sema_notify_s *dsema_notify_head;
	struct dispatch_sema_notify_s *dsema_notify_tail;
};

相比其他的结构,信号量的内部要简洁的多,主要使用的就三个属性:

  • dsema_value

    当前信号值,当这个值小于0时无法访问加锁资源

  • dsema_orig

    初始化信号值,限制了同时访问资源的线程数量

  • dsema_sent_ksignals

    由于mach信号可能会被意外唤醒,通过原子操作来避免虚假信号

总结

通过一张图表示上面提及的四种数据类型的关系:

线程私有变量

进程中的全局变量和静态变量是所有线程都能访问的共享变量,这意味着访问这样的数据需要昂贵的同步花销,Thread-specific-Data线程私有数据机制让每一个线程拥有私有的全局变量。libdispatch提供了四个pthread_key来存取这些数据,在初始化阶段进行初始化:

DISPATCH_EXPORT DISPATCH_NOTHROW
void
libdispatch_init(void)
{
	dispatch_assert(DISPATCH_QUEUE_PRIORITY_COUNT == 4);
	dispatch_assert(DISPATCH_ROOT_QUEUE_COUNT == 8);

	dispatch_assert(DISPATCH_QUEUE_PRIORITY_LOW ==
			-DISPATCH_QUEUE_PRIORITY_HIGH);
	dispatch_assert(countof(_dispatch_root_queues) ==
			DISPATCH_ROOT_QUEUE_COUNT);
	dispatch_assert(countof(_dispatch_root_queue_contexts) ==
			DISPATCH_ROOT_QUEUE_COUNT);

    ......

	_dispatch_thread_key_create(&dispatch_queue_key, _dispatch_queue_cleanup);
	_dispatch_thread_key_create(&dispatch_sema4_key,
			(void (*)(void *))_dispatch_thread_semaphore_dispose);
	_dispatch_thread_key_create(&dispatch_cache_key, _dispatch_cache_cleanup);
	_dispatch_thread_key_create(&dispatch_io_key, NULL);
	_dispatch_thread_key_create(&dispatch_apply_key, NULL);
#if DISPATCH_PERF_MON
	_dispatch_thread_key_create(&dispatch_bcounter_key, NULL);
#endif
    ......
}

基于大概率使用的派发任务,libdispatch缓存了dispatch_continuation_s,采用复用模式的做法在每次async中尝试去获取空闲的continuation变量,通过两个函数存取数据:

void* _Nullable pthread_getspecific(pthread_key_t);
int pthread_setspecific(pthread_key_t , const void * _Nullable);