admin 管理员组文章数量: 1184232
dma
dma fence是Linux中用于不同内核模块DMA同步操作的原语,常用于GPU Rending,display buffer等之间的同步,使用DMA FENCE可以减少在用户态的等待,让数据的同步在内核中进行,它是内核中一种比较常用的同步机制,本身的实现和使用并不复杂,其只有两种状态signaled和unsignaled。可能正是因为其本身的精简,在融入其他概念中时,在不同的环境下,赋予了dma-fence不同的含义。所以通常需要根据dma-fence的具体使用的情况来理解其含义。
dma-fence是内核中的同步原语,本身只能表示两种状态,这点上就和complete有点类似了。
但是dma-fence是可以跨设备,跨进程的。
具体来说:
1.就是A设备驱动程序中创建的dma-fence可以被B驱动程序使用。
2.dma-fence是由内核创建,但是可以在进程间传递,并且可以在用户层获取fence的当前状态。
而常规的内核中的同步方法,则不具备对上述两点的支持。
基本原理:
一个被初始化的dma-fence,使用wait函数后,会将当前进程换出,即当前进程会sleep,而当调用signal函数时会唤醒被wait函数换出的进程。
dma-fence的使用还可以通过向dma-fence添加一个或多个callback函数,当dma-fence调用signal操作时,会依次遍历callback list,并调用每个callback函数。当调用wait函数时,会把默认的一个callback函数加入到dma-fence中,而这个函数就起到唤醒的作用。
dma-fence在内核中被创建,可以通过导出一个文件描述符fd到user层,然后用户层可以对该fd做常规的文件操作,也可以把该fence传递给其他进程。这个fd给到内核中后,又可以还原出dma-fence的内核数据结构。所以在user层看到的dma-fence是一个文件描述符。
其中提到的几个操作对用函数如下:
- init:dma_fence_init()
- wait:dma_fence_wait()
- signal:dma_fence_signal()
- callback:dma_fence_add_callback()
dma-fence demo
demo的流程如下,本DEMO使用了两种唤醒机制,分别为POLL和fence唤醒,前者并没有使用FENCE机制同步,而是使用了驱动自己的就绪队列,后者使用了FENCE机制进行了同步,使用了FENCE对象自身的唤醒队列。
源码如下:
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/types.h>
#include <asm/ioctl.h>
#include <asm/fcntl.h>
#include <linux/uaccess.h>
#include <linux/dma-fence.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/sync_file.h>
#include <linux/fs.h>
#include <linux/poll.h>#define DMA_FENCE_WAIT_CMD _IOWR('f', 0, int)
#define DMA_FENCE_EXPORT_CMD _IOWR('f', 1, int)
#define DMA_FENCE_SIGNAL_CMD _IO('f', 2)static int in_fence_fd = -1;
static int out_fence_fd = -1;
static int poll_signaled = 0;static struct dma_fence_cb cb;
static wait_queue_head_t poll_wait_head;static DEFINE_SPINLOCK(fence_lock);static void dma_fence_cb(struct dma_fence *f, struct dma_fence_cb *cb)
{//dump_stack();printk("dma-fence callback !.\n");
}static const char *dma_fence_get_name(struct dma_fence *fence)
{return "dma-fence-example";
}static const struct dma_fence_ops fence_ops = {.get_driver_name = dma_fence_get_name,.get_timeline_name = dma_fence_get_name,
};static void iter_fence_callbac(struct dma_fence *fence)
{unsigned long flags;struct dma_fence_cb *cur, *tmp;spin_lock_irqsave(fence->lock, flags);list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {printk("%s line %d cur->func = 0x%px, 0x%pS.\n", __func__, __LINE__, cur->func, cur->func);}spin_unlock_irqrestore(fence->lock, flags);return;
}static long fence_ioctl(struct file *filp,unsigned int cmd, unsigned long arg)
{struct sync_file *sync_file;struct dma_fence *in_fence;struct dma_fence *out_fence;out_fence = (struct dma_fence *)filp->private_data;if (out_fence == NULL) {pr_err("%s line %d. fence is null.\n", __func__, __LINE__);return -1;}switch (cmd) {case DMA_FENCE_SIGNAL_CMD:if (out_fence) {printk("signal fence, seqno %lld.\n", out_fence->seqno);iter_fence_callbac(out_fence);dma_fence_signal(out_fence);wake_up_interruptible(&poll_wait_head);poll_signaled = 1;}break;case DMA_FENCE_WAIT_CMD:if (copy_from_user(&in_fence_fd, (void __user *)arg, sizeof(int)) != 0)return -EFAULT;in_fence = sync_file_get_fence(in_fence_fd);if (!in_fence)return -EINVAL;printk("Get in-fence from fd = %d, in_fence 0x%px.\n", in_fence_fd, in_fence);/* add a callback func */dma_fence_add_callback(in_fence, &cb, dma_fence_cb);printk("waiting in-fence seqno %lld to be signaled, process is blocking ...\n", in_fence->seqno);dma_fence_wait(in_fence, true);printk("in-fence signaled, process exit\n");dma_fence_put(in_fence);break;case DMA_FENCE_EXPORT_CMD:if (!out_fence)return -EINVAL;sync_file = sync_file_create(out_fence);out_fence_fd = get_unused_fd_flags(O_CLOEXEC);fd_install(out_fence_fd, sync_file->file);set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &out_fence->flags);if (copy_to_user((void __user *)arg, &out_fence_fd, sizeof(int)) != 0)return -EFAULT;printk("Created an out-fence fd = %d, out_fence = 0x%px,seqno %lld.\n", out_fence_fd, out_fence, out_fence->seqno);dma_fence_put(out_fence);break;default:printk("bad cmd.\n");break;}return 0;
}static struct dma_fence *create_fence(void)
{struct dma_fence *fence;fence = kzalloc(sizeof(*fence), GFP_KERNEL);if (!fence)return NULL;dma_fence_init(fence, &fence_ops, &fence_lock, 0, 90);dma_fence_get(fence);return fence;
}static int fence_open(struct inode *inode, struct file *filp)
{struct dma_fence *out_fence;/* create an new fence */out_fence = create_fence();if (!out_fence)return -ENOMEM;filp->private_data = out_fence;init_waitqueue_head(&poll_wait_head);return 0;
}static int fence_close(struct inode *inode, struct file *filp)
{struct dma_fence *out_fence = NULL;out_fence = (struct dma_fence *)filp->private_data;if (out_fence == NULL) {pr_err("%s line %d.fatal error. fence is null.\n", __func__, __LINE__);return -1;}dma_fence_put(out_fence);return 0;
}static __poll_t fence_poll(struct file *filp, struct poll_table_struct *wait)
{__poll_t mask = 0;poll_wait(filp, &poll_wait_head, wait);if (poll_signaled) {mask = EPOLLIN | EPOLLRDNORM;poll_signaled = 0;printk("%s line %d, poll signaled.\n", __func__, __LINE__);}return mask;
}static struct file_operations fence_fops = {.owner = THIS_MODULE,.unlocked_ioctl = fence_ioctl,.open = fence_open,.poll = fence_poll,.release = fence_close,
};static struct miscdevice mdev = {.minor = MISC_DYNAMIC_MINOR,.name = "dma-fence",.fops = &fence_fops,
};static int __init dma_fence_demo_init(void)
{return misc_register(&mdev);
}static void __exit dma_fence_demo_unint(void)
{misc_deregister(&mdev);
}module_init(dma_fence_demo_init);
module_exit(dma_fence_demo_unint);MODULE_AUTHOR("czl");
MODULE_LICENSE("GPL v2");
测试代码:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/ioctl.h>
#include <poll.h>#define DMA_FENCE_WAIT_CMD _IOWR('f', 0, int)
#define DMA_FENCE_EXPORT_CMD _IOWR('f', 1, int)
#define DMA_FENCE_SIGNAL_CMD _IO('f', 2)
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)//#define BLOCKING_IN_KERNELstatic int fd = -1;static inline int sync_wait(int fd, int timeout)
{struct pollfd fds = {0};int ret;fds.fd = fd;fds.events = POLLIN;do {ret = poll(&fds, 1, timeout);if (ret > 0) {if (fds.revents & (POLLERR | POLLNVAL)) {errno = EINVAL;return -1;}printf("%s line %d, DEFAULT_POLLMASK = 0x%x, fds_revents = 0x%x.\n", \__func__, __LINE__, DEFAULT_POLLMASK, fds.revents);return 0;} else if (ret == 0) {errno = ETIME;return -1;}} while (ret == -1 && (errno == EINTR || errno == EAGAIN));return ret;
}static void *signal_pthread(void *arg)
{sleep(10);if (ioctl(fd, DMA_FENCE_SIGNAL_CMD) < 0) {perror("get out fence fd fail\n");}return NULL;
}int main(void)
{int out_fence_fd;pthread_t tidp;fd = open("/dev/dma-fence", O_RDWR | O_NONBLOCK, 0);if (-1 == fd) {printf("Cannot open dma-fence dev\n");exit(1);}if (ioctl(fd, DMA_FENCE_EXPORT_CMD, &out_fence_fd) < 0) {perror("get out fence fd fail\n");close(fd);return -1;}printf("Get an out-fence fd = %d\n", out_fence_fd);if ((pthread_create(&tidp, NULL, signal_pthread, NULL)) == -1) {printf("create error!\n");close(out_fence_fd);close(fd);return -1;}#ifdef BLOCKING_IN_KERNELprintf("waiting out-fence to be signaled on kernel side ...\n");if (ioctl(fd, DMA_FENCE_WAIT_CMD, &out_fence_fd) < 0) {perror("get out fence fd fail\n");close(out_fence_fd);close(fd);return -1;}
#elseprintf("Waiting out-fence to be signaled on USER side ...\n");sync_wait(out_fence_fd, -1);
#endifprintf("out-fence is signaled\n");if (pthread_join(tidp, NULL)) {printf("thread is not exit...\n");return -1;}close(out_fence_fd);close(fd);return 0;
}
测试过程,安装内核模块后,运行用例,程序运行卡10秒钟后,signal线程发出信号,主线程等到信号后退出。
fence中挂接了多个callback.
同步方式
通过signal/wait进行同步,wait阻塞线程,阻塞的线程通过signal触发唤醒。
另一种方式使用singal/poll机制,内核驱动通过调用sync_file接口将fence包装成一个sync file对象并和一个匿名文件建立关联,将匿名文件FD导出到其它模块,其它模块通过FD得到匿名文件,进而调用poll进入阻塞状态,直到fence_signal将其唤醒。
匿名文件fops定义如下。
其POLL函数会注册一个FENCE唤醒的CALLBACK 回调函数,然后将当前线程加入等待队列头,此回调函数在FENCE SIGNAL被触发时调用,唤醒在sync_file队列头上睡眠的线程。
一般以上两种同步情况一方会调用dma_fence_signal,但是另一方不必调用poll或者dma_fence_wait,这种情况下实际上只有外部定义的CALLBACK会被调用到,内部的wake up callback没有被注册,自然也不会被调用。
sync_file
内核中可以通过sync_file_create->sync_file_alloc 生成了一个anon_inode_inode对应的匿名文件,之后,用户驱动就可以从当前进程的文件表中获取一个未占用的FD,将后者同FILE绑定起来,之后将FD返回给用户态,之后,用户态就可以通过标准的文件系统调用操作FENCE了。典型的比如FENCE 对POLL 调用的支持。
关于匿名文件的创建流程,可以参考内核源码中KVM虚拟机匿名文件的创建过程。
sync_file poll的流程如下:
安卓中sync_wait fence阻塞原理,这里的FD对应的就是内核中的sync_file.
/+/master/libsync/sync.c
等待队列的结构:
动态FENCE同步:
动态创建FENCE并同步,注意其中 KREF引用计数的用法,通过饮用计数控制FENCE的生命期,FENCE对象可以独立于某个驱动模块存在。
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/types.h>
#include <asm/ioctl.h>
#include <asm/fcntl.h>
#include <linux/uaccess.h>
#include <linux/dma-fence.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/sync_file.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/syscalls.h>
#include <linux/fdtable.h>
#include <linux/version.h>#define DMA_FENCE_WAIT_CMD _IOWR('f', 0, int)
#define DMA_FENCE_EXPORT_CMD _IOWR('f', 1, int)
#define DMA_FENCE_SIGNAL_CMD _IO('f', 2)static int in_fence_fd = -1;
static int out_fence_fd = -1;
static int poll_signaled = 0;static struct dma_fence_cb cb;
static wait_queue_head_t poll_wait_head;static DEFINE_SPINLOCK(fence_lock);static __maybe_unused struct sync_file *sync_file_fdget(int fd)
{struct file *file = fget(fd);if (file == NULL) {return NULL;}return file->private_data;
}static void dma_fence_cb(struct dma_fence *f, struct dma_fence_cb *cb)
{//dump_stack();printk("dma-fence callback!, seqno %lld.\n", f->seqno);
}static const char *dma_fence_get_name(struct dma_fence *fence)
{return "dma-fence-example";
}static const struct dma_fence_ops fence_ops = {.get_driver_name = dma_fence_get_name,.get_timeline_name = dma_fence_get_name,
};static void iter_fence_callbac(struct dma_fence *fence)
{unsigned long flags;struct dma_fence_cb *cur, *tmp;spin_lock_irqsave(fence->lock, flags);list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {printk("%s line %d cur->func = 0x%px, 0x%pS.\n", __func__, __LINE__, cur->func, cur->func);}spin_unlock_irqrestore(fence->lock, flags);return;
}static long fence_ioctl(struct file *filp,unsigned int cmd, unsigned long arg)
{struct sync_file *sync_file;struct dma_fence *in_fence;struct dma_fence *fence;int signal_fd;static unsigned long seqno = 0;switch (cmd) {case DMA_FENCE_SIGNAL_CMD:if (copy_from_user(&signal_fd, (void __user *)arg, sizeof(int)) != 0)return -EFAULT;in_fence = sync_file_get_fence(signal_fd);if (!in_fence)return -EINVAL;iter_fence_callbac(in_fence);printk("signal fence, seqno %lld.\n", in_fence->seqno);dma_fence_signal(in_fence);dma_fence_put(in_fence);//wake_up_interruptible(&poll_wait_head);//poll_signaled = 1;break;case DMA_FENCE_WAIT_CMD:if (copy_from_user(&in_fence_fd, (void __user *)arg, sizeof(int)) != 0)return -EFAULT;// will increase the fence kref refcount.in_fence = sync_file_get_fence(in_fence_fd);if (!in_fence)return -EINVAL;printk("get in-fence from fd = %d, in_fence 0x%px.\n", in_fence_fd, in_fence);// must be held before goto wait for loop scenario.clear_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &in_fence->flags);//INIT_LIST_HEAD(&in_fence->cb_list);if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &in_fence->flags)) {printk("%s line %d, fence has been signaled.\n", __func__, __LINE__);} else {printk("%s line %d, fence has not been signaled.\n", __func__, __LINE__);}/* add a callback func */dma_fence_add_callback(in_fence, &cb, dma_fence_cb);printk("waiting in-fence seqno %lld to be signaled, process is blocking ...\n",in_fence->seqno);dma_fence_wait(in_fence, true);printk("in-fence signo %lld signaled, fence refcount %d.\n", in_fence->seqno, kref_read(&in_fence->refcount));// now relese the fence corrspont sync file get.dma_fence_put(in_fence);//release the first create refcount.dma_fence_put(in_fence);#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0)__close_fd(current->files, in_fence_fd);
#elseclose_fd(in_fence_fd);
#endifprintk("in-fence signo %lld signaled, fence refcount %d.\n", in_fence->seqno, kref_read(&in_fence->refcount));break;case DMA_FENCE_EXPORT_CMD:fence = kzalloc(sizeof(*fence), GFP_KERNEL);if (!fence)return -1;// dma_fence_init will call kref_init set the fence krefcount to 1.dma_fence_init(fence, &fence_ops, &fence_lock, 0, seqno ++);// get the fence ,now refcount to 2.dma_fence_get(fence);// get the fence ,now refcount to 3.sync_file = sync_file_create(fence);out_fence_fd = get_unused_fd_flags(O_CLOEXEC);fd_install(out_fence_fd, sync_file->file);set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);if (copy_to_user((void __user *)arg, &out_fence_fd, sizeof(int)) != 0)return -EFAULT;printk("Created an out-fence fd = %d, fence = 0x%px,seqno %lld, outfd %d.\n",out_fence_fd, fence, fence->seqno, out_fence_fd);// put the fence corrspontd sync_file_get, now refcount to 2.dma_fence_put(fence);// put the fence, now refcount to 1, now we use it.dma_fence_put(fence);printk("out-fence signo %lld signaled, fence refcount %d.\n", fence->seqno, kref_read(&fence->refcount));break;default:printk("bad cmd.\n");break;}return 0;
}static __maybe_unused struct dma_fence *create_fence(void)
{struct dma_fence *fence;fence = kzalloc(sizeof(*fence), GFP_KERNEL);if (!fence)return NULL;dma_fence_init(fence, &fence_ops, &fence_lock, 0, 90);dma_fence_get(fence);return fence;
}static int fence_open(struct inode *inode, struct file *filp)
{
#if 0struct dma_fence *out_fence;/* create an new fence */out_fence = create_fence();if (!out_fence)return -ENOMEM;filp->private_data = out_fence;init_waitqueue_head(&poll_wait_head);
#endifreturn 0;
}static int fence_close(struct inode *inode, struct file *filp)
{
#if 0struct dma_fence *out_fence = NULL;out_fence = (struct dma_fence *)filp->private_data;if (out_fence == NULL) {pr_err("%s line %d.fatal error. fence is null.\n", __func__, __LINE__);return -1;}dma_fence_put(out_fence);
#endifreturn 0;
}static __poll_t fence_poll(struct file *filp, struct poll_table_struct *wait)
{__poll_t mask = 0;poll_wait(filp, &poll_wait_head, wait);if (poll_signaled) {mask = EPOLLIN | EPOLLRDNORM;poll_signaled = 0;printk("%s line %d, poll signaled.\n", __func__, __LINE__);}return mask;
}static struct file_operations fence_fops = {.owner = THIS_MODULE,.unlocked_ioctl = fence_ioctl,.open = fence_open,.poll = fence_poll,.release = fence_close,
};static struct miscdevice mdev = {.minor = MISC_DYNAMIC_MINOR,.name = "dma-fence",.fops = &fence_fops,
};static int __init dma_fence_demo_init(void)
{return misc_register(&mdev);
}static void __exit dma_fence_demo_unint(void)
{misc_deregister(&mdev);
}module_init(dma_fence_demo_init);
module_exit(dma_fence_demo_unint);MODULE_AUTHOR("czl");
MODULE_LICENSE("GPL v2");
测试用例
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/ioctl.h>
#include <poll.h>#define DMA_FENCE_WAIT_CMD _IOWR('f', 0, int)
#define DMA_FENCE_EXPORT_CMD _IOWR('f', 1, int)
#define DMA_FENCE_SIGNAL_CMD _IO('f', 2)
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)#define BLOCKING_IN_KERNELstatic int fd = -1;static inline int sync_wait(int fd, int timeout)
{struct pollfd fds = {0};int ret;fds.fd = fd;fds.events = POLLIN;do {ret = poll(&fds, 1, timeout);if (ret > 0) {if (fds.revents & (POLLERR | POLLNVAL)) {errno = EINVAL;return -1;}printf("%s line %d, DEFAULT_POLLMASK = 0x%x, fds_revents = 0x%x.\n", \__func__, __LINE__, DEFAULT_POLLMASK, fds.revents);return 0;} else if (ret == 0) {errno = ETIME;return -1;}} while (ret == -1 && (errno == EINTR || errno == EAGAIN));return ret;
}static void *signal_pthread(void *arg)
{while (1) {sleep(1);if (ioctl(fd, DMA_FENCE_SIGNAL_CMD, arg) < 0) {perror("get out fence fd fail\n");}}return NULL;
}int main(void)
{int out_fence_fd;pthread_t tidp;fd = open("/dev/dma-fence", O_RDWR | O_NONBLOCK, 0);if (-1 == fd) {printf("Cannot open dma-fence dev\n");exit(1);}if (ioctl(fd, DMA_FENCE_EXPORT_CMD, &out_fence_fd) < 0) {perror("get out fence fd fail\n");close(fd);return -1;}printf("Get an out-fence fd = %d\n", out_fence_fd);if ((pthread_create(&tidp, NULL, signal_pthread, &out_fence_fd)) == -1) {printf("create error!\n");close(out_fence_fd);close(fd);return -1;}#ifdef BLOCKING_IN_KERNELwhile (1) {printf("waiting out-fence to be signaled on kernel side ...\n");if (ioctl(fd, DMA_FENCE_WAIT_CMD, &out_fence_fd) < 0) {perror("get out fence fd fail\n");close(out_fence_fd);close(fd);return -1;}if (ioctl(fd, DMA_FENCE_EXPORT_CMD, &out_fence_fd) < 0) {perror("get out fence fd fail\n");close(fd);return -1;}}
#elseprintf("Waiting out-fence to be signaled on USER side ...\n");sync_wait(out_fence_fd, -1);
#endifprintf("out-fence is signaled\n");if (pthread_join(tidp, NULL)) {printf("thread is not exit...\n");return -1;}close(out_fence_fd);close(fd);return 0;
}
在DRM GPU调度器中的使用模拟
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/dma-fence.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#define assert(expr) \if (!(expr)) { \printk( "Assertion failed! %s,%s,%s,line=%d\n",\#expr,__FILE__,__func__,__LINE__); \BUG(); \}#define num_hw_submission 128
struct fence_driver {uint64_t gpu_addr;volatile uint32_t *cpu_addr;uint32_t sync_seq;atomic_t last_seq;bool initialized;unsigned num_fences_mask;spinlock_t lock;struct dma_fence **fences;struct mutex mutex;struct dma_fence *last_fence;struct timer_list timer;
};static struct fence_driver *ring;
static uint32_t fence_seq;
static struct task_struct *fence_emit_task;
static struct task_struct *fence_recv_task;
static struct kmem_cache *fence_slab;
static const char *dma_fence_get_name(struct dma_fence *fence)
{return "dma-fence-drv";
}static const struct dma_fence_ops fence_ops = {.get_driver_name = dma_fence_get_name,.get_timeline_name = dma_fence_get_name,
};static int fence_recv_task_thread(void *data)
{uint32_t seqno_next = 0;//mutex_lock(&ring->mutex);while (ring->initialized == false) {set_current_state(TASK_UNINTERRUPTIBLE);if (ring->initialized == true) {break;}//mutex_unlock(&ring->mutex);schedule();//mutex_lock(&ring->mutex);}set_current_state(TASK_RUNNING);//mutex_unlock(&ring->mutex);while (!kthread_should_stop()) {
#if 0msleep(1000);printk("%s line %d.\n", __func__, __LINE__);
#elseuint32_t seq, last_seq;int r;do {last_seq = atomic_read(&ring->last_seq);seq = *ring->cpu_addr;if (kthread_should_stop())return 0;} while (atomic_cmpxchg(&ring->last_seq, last_seq, seq) != last_seq);//printk("%s line %d, last_seq %d, seq %d, sync_seq %d.\n", __func__, __LINE__, last_seq, seq, ring->sync_seq);if (unlikely(seq == last_seq)) {msleep(10);continue;}assert(seq > last_seq);last_seq &= ring->num_fences_mask;seq &= ring->num_fences_mask;//printk("%s line %d, last_seq %d, seq %d, sync_seq %d.\n", __func__, __LINE__, last_seq, seq, ring->sync_seq);do {struct dma_fence *fence, **ptr;++last_seq;last_seq &= ring->num_fences_mask;ptr = &ring->fences[last_seq];fence = rcu_dereference_protected(*ptr, 1);RCU_INIT_POINTER(*ptr, NULL);if (!fence) {continue;}if (seqno_next == 0 || seqno_next == fence->seqno) {seqno_next = fence->seqno + 1;} else { /*if (seqno_next != 0 && seqno_next != fence->seqno)*/pr_err("%s line %d, seqno is not continue, exptect %d, actual %lld.\n",__func__, __LINE__, seqno_next, fence->seqno);}#if 1printk("%s line %d, last_seq %d, seq %d, signal %lld, slot %d.\n",__func__, __LINE__, last_seq, seq, fence->seqno, last_seq);
#endifr = dma_fence_signal(fence);if (kthread_should_stop())return 0;if (r) {BUG();}dma_fence_put(fence);} while (last_seq != seq);//msleep(1000);
#endif}set_current_state(TASK_RUNNING);return 0;
}static int fence_emit_task_thread(void *data)
{int r;//mutex_lock(&ring->mutex);while (ring->initialized == false) {set_current_state(TASK_UNINTERRUPTIBLE);if (ring->initialized == true) {break;}//mutex_unlock(&ring->mutex);schedule();//mutex_lock(&ring->mutex);}set_current_state(TASK_RUNNING);//mutex_unlock(&ring->mutex);while (!kthread_should_stop()) {
#if 0msleep(1000);printk("%s line %d.\n", __func__, __LINE__);
#elsestruct dma_fence __rcu **ptr;struct dma_fence *fence;uint32_t seq;fence = kmem_cache_alloc(fence_slab, GFP_KERNEL);if (fence == NULL) {pr_err("%s line %d, alloc fence from fence slab failure.\n",__func__, __LINE__);return -1;}seq = ++ring->sync_seq;dma_fence_init(fence, &fence_ops, &ring->lock, 0, seq);ptr = &ring->fences[seq & ring->num_fences_mask];//printk("%s line %d, seq = %d.\n", __func__, __LINE__, seq);if (unlikely(rcu_dereference_protected(*ptr, 1))) {struct dma_fence *old;int diff;rcu_read_lock();old = dma_fence_get_rcu_safe(ptr);rcu_read_unlock();if (old) {dma_fence_get(old);ring->last_fence = old;r = dma_fence_wait(old, false);ring->last_fence = NULL;dma_fence_put(old);if (kthread_should_stop())continue;if (r)return r;diff = seq - old->seqno;printk("%s line %d, fence wokenup, seqno %lld, seq %d, slot %d, diff %d.\n",__func__, __LINE__, old->seqno, seq, seq & ring->num_fences_mask, diff);if (diff != num_hw_submission * 2) {pr_err("%s line %d, fatal error, diff not match totoal ring.\n",__func__, __LINE__);}}}/**printk("%s line %d, fence add, seqno %lld, seq %d, slot %d.\n",* __func__, __LINE__, fence->seqno, seq, seq & ring->num_fences_mask);*///printk("%s line %d, fence emit.\n", __func__, __LINE__);rcu_assign_pointer(*ptr, dma_fence_get(fence));
#endif}set_current_state(TASK_RUNNING);return 0;
}void gpu_job_consume_interrupt(struct timer_list *timer)
{uint32_t seq, oldseq;seq = ring->sync_seq;oldseq = fence_seq;// trigger a job done on device.if (fence_seq == 0) {if (seq > 6)fence_seq = seq - 4;} else if ((seq - fence_seq) > 100) {fence_seq += (seq - fence_seq) / 2;assert(fence_seq > oldseq);}printk("%s line %d, timer trigger job.\n", __func__, __LINE__);mod_timer(timer, jiffies + HZ / 2);
}static int __init fencedrv_init(void)
{if ((num_hw_submission & (num_hw_submission - 1)) != 0) {pr_err("%s line %d, num_hw_submission must be power of two.\n",__func__, __LINE__);return -1;}ring = kzalloc(sizeof(*ring), GFP_KERNEL);if (ring == NULL) {pr_err("%s line %d, alloc fence driver failure.\n",__func__, __LINE__);return -ENOMEM;}ring->cpu_addr = &fence_seq;ring->gpu_addr = (uint64_t)&fence_seq;ring->sync_seq = 0;atomic_set(&ring->last_seq, 0);ring->initialized = false;ring->last_fence = NULL;ring->num_fences_mask = num_hw_submission * 2 - 1;spin_lock_init(&ring->lock);ring->fences = kcalloc(num_hw_submission * 2, sizeof(void *), GFP_KERNEL);if (!ring->fences) {pr_err("%s line %d, alloc fence buffer failure.\n",__func__, __LINE__);return -ENOMEM;}fence_slab = kmem_cache_create("fence_slab", sizeof(struct dma_fence), 0,SLAB_HWCACHE_ALIGN, NULL);if (!fence_slab) {pr_err("%s line %d, alloc fence_slab falure.\n",__func__, __LINE__);return -ENOMEM;}mutex_init(&ring->mutex);fence_emit_task = kthread_run(fence_emit_task_thread, NULL, "fence_emit");if (IS_ERR(fence_emit_task)) {pr_err("%s line %d, create fence emit tsk failure.\n",__func__, __LINE__);return -1;}fence_recv_task = kthread_run(fence_recv_task_thread, NULL, "fence_recv");if (IS_ERR(fence_recv_task)) {pr_err("%s line %d, create fence recv tsk failure.\n",__func__, __LINE__);return -1;}timer_setup(&ring->timer, gpu_job_consume_interrupt, TIMER_IRQSAFE);add_timer(&ring->timer);mod_timer(&ring->timer, jiffies + HZ / 2);printk("%s line %d, module init.\n", __func__, __LINE__);ring->initialized = true;wake_up_process(fence_emit_task);wake_up_process(fence_recv_task);return 0;
}static void __exit fencedrv_exit(void)
{printk("%s line %d, module unload task begin.\n", __func__, __LINE__);if ((ring->last_fence != NULL) &&(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &ring->last_fence->flags))) {dma_fence_signal(ring->last_fence);dma_fence_put(ring->last_fence);}del_timer(&ring->timer);kthread_stop(fence_emit_task);printk("%s line %d, module unload task mid.\n", __func__, __LINE__);kthread_stop(fence_recv_task);printk("%s line %d, module unload task end.\n", __func__, __LINE__);rcu_barrier();kmem_cache_destroy(fence_slab);kfree(ring->fences);kfree(ring);printk("%s line %d, module unload.\n", __func__, __LINE__);
}module_init(fencedrv_init);
module_exit(fencedrv_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("czl");
makefile
ifneq ($(KERNELRELEASE),)
CFLAGS_seqfile.o:=-I$(src)
obj-m:=fencedrv.o
else
KERNELDIR:=/lib/modules/$(shell uname -r)/build
PWD:=$(shell pwd)
all:$(MAKE) -C $(KERNELDIR) M=$(PWD) modulesclean:rm -rf *.o *.mod.c *.mod.o *.ko *.symvers *.mod .*.cmd *.order
format:astyle --options=linux.astyle *.[ch]
endif
FENCE的历史演进
在4.9版附近,进行了一次API重命名,从fence_xxxx变为现在我们看到的样子:
首次提交发生在v3.16,大概是2014年中
变更点提交分别为:
分析
深入分析——Linux DMA Fence_Bystander_J的博客-CSDN博客
fence生命期
dma_fence的生命期由dma_fence_init/dma_fence_get/dma_fence_put三个接口控制kref refcount实现:
dma_fence_init将refcount置1。
dma_fence_put递减引用计数,如果引用计数归0,则调用release callback释放fence占用的内存。
dma_fence_get增加引用计数。
由于fence初始化时必须调用dma_fence_init首先将引用计数置为1,所以如果一个fence在使用过程中没有调用dma_fence_get, 则只需要调用一次dma_fence_put,即可将fence释放。
参考资料
Android 重学系列 fence原理 - 简书
.html
AMD GPU任务调度(3) —— fence机制_享乐主的博客-CSDN博客
结束
本文标签: dma
版权声明:本文标题:dma 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.roclinux.cn/p/1700360290a413052.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论