记一次云盘热拔失败

虚机中有个盘拔失败了

这里主要记录一下怎么找到vring相关信息的

crash> set ffff95b2b9078000
    PID: 1697
COMMAND: "kworker/u416:1"
   TASK: ffff95b2b9078000  [THREAD_INFO: ffff95b2b9078000]
    CPU: 181
  STATE: TASK_UNINTERRUPTIBLE
crash> bt
PID: 1697   TASK: ffff95b2b9078000  CPU: 181  COMMAND: "kworker/u416:1"
 #0 [ffffa2835c82bb10] __schedule at ffffffff863092d2
 #1 [ffffa2835c82bba0] schedule at ffffffff86309906
 #2 [ffffa2835c82bbb8] blk_mq_freeze_queue_wait at ffffffff85e322d6
 #3 [ffffa2835c82bc00] blk_cleanup_queue at ffffffff85e27508
 #4 [ffffa2835c82bc18] virtblk_remove at ffffffffc02e3307 [virtio_blk]
 #5 [ffffa2835c82bc30] virtio_dev_remove at ffffffffc0297499 [virtio]
 #6 [ffffa2835c82bc50] __device_release_driver at ffffffff8605da7b
 #7 [ffffa2835c82bc78] device_release_driver at ffffffff8605db74
 #8 [ffffa2835c82bc90] bus_remove_device at ffffffff8605cf4b
 #9 [ffffa2835c82bcb0] device_del at ffffffff86057c0b
#10 [ffffa2835c82bcf8] device_unregister at ffffffff86057ec6
#11 [ffffa2835c82bd08] unregister_virtio_device at ffffffffc02972b1 [virtio]
#12 [ffffa2835c82bd18] virtio_pci_remove at ffffffffc02d84cd [virtio_pci]
#13 [ffffa2835c82bd38] pci_device_remove at ffffffff85f2cc48
#14 [ffffa2835c82bd58] __device_release_driver at ffffffff8605da7b
#15 [ffffa2835c82bd80] device_release_driver at ffffffff8605db74
#16 [ffffa2835c82bd98] pci_stop_bus_device at ffffffff85f225cc
#17 [ffffa2835c82bdb8] pci_stop_and_remove_bus_device at ffffffff85f227ee
#18 [ffffa2835c82bdc8] disable_slot at ffffffff85f49509
#19 [ffffa2835c82bde8] acpiphp_disable_and_eject_slot at ffffffff85f49775
#20 [ffffa2835c82be00] acpiphp_hotplug_notify at ffffffff85f4a702
#21 [ffffa2835c82be38] acpi_device_hotplug at ffffffff85f74afc
#22 [ffffa2835c82be80] acpi_hotplug_work_fn at ffffffff85f6a09d
#23 [ffffa2835c82be90] process_one_work at ffffffff85aaf7c3
#24 [ffffa2835c82bed0] worker_thread at ffffffff85aaf9b3
#25 [ffffa2835c82bf10] kthread at ffffffff85ab5b48
#26 [ffffa2835c82bf50] ret_from_fork at ffffffff85a0451f

##可以看到virtblk_remove的参数是virtio_device结构体指针,所以先找他
crash> bt
PID: 1697   TASK: ffff95b2b9078000  CPU: 181  COMMAND: "kworker/u416:1"
 #0 [ffffa2835c82bb10] __schedule at ffffffff863092d2
 #1 [ffffa2835c82bba0] schedule at ffffffff86309906
 #2 [ffffa2835c82bbb8] blk_mq_freeze_queue_wait at ffffffff85e322d6
 #3 [ffffa2835c82bc00] blk_cleanup_queue at ffffffff85e27508
 #4 [ffffa2835c82bc18] virtblk_remove at ffffffffc02e3307 [virtio_blk]
 #5 [ffffa2835c82bc30] virtio_dev_remove at ffffffffc0297499 [virtio]
 #6 [ffffa2835c82bc50] __device_release_driver at ffffffff8605da7b
 #7 [ffffa2835c82bc78] device_release_driver at ffffffff8605db74
 #8 [ffffa2835c82bc90] bus_remove_device at ffffffff8605cf4b

 static void virtblk_remove(struct virtio_device *vdev)
{

##看一下virtio_dev_remove的汇编,rbp rdi都是virtio_device的地址
crash> dis -lr ffffffffc0297499
0xffffffffc0297470 <virtio_dev_remove>: nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc0297475 <virtio_dev_remove+5>:       push   %r12
0xffffffffc0297477 <virtio_dev_remove+7>:       push   %rbp
0xffffffffc0297478 <virtio_dev_remove+8>:       lea    -0x10(%rdi),%rbp
0xffffffffc029747c <virtio_dev_remove+12>:      push   %rbx
0xffffffffc029747d <virtio_dev_remove+13>:      mov    0x68(%rdi),%r12
0xffffffffc0297481 <virtio_dev_remove+17>:      mov    %rdi,%rbx
0xffffffffc0297484 <virtio_dev_remove+20>:      mov    %rbp,%rdi
0xffffffffc0297487 <virtio_dev_remove+23>:      callq  0xffffffffc02973e0 <virtio_config_disable>
0xffffffffc029748c <virtio_dev_remove+28>:      mov    0xd0(%r12),%rax
0xffffffffc0297494 <virtio_dev_remove+36>:      mov    %rbp,%rdi
0xffffffffc0297497 <virtio_dev_remove+39>:      callq  *%rax
0xffffffffc0297499 <virtio_dev_remove+41>:      nopl   (%rax)

##看一下virtblk_remove的汇编,第二个push把rbp给push了,所以该栈的第二个压栈参数就是virtio_device的地址
crash> dis -lr ffffffffc02e3307
0xffffffffc02e32d0 <virtblk_remove>:    nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc02e32d5 <virtblk_remove+5>:  push   %r12
0xffffffffc02e32d7 <virtblk_remove+7>:  mov    %rdi,%r12
0xffffffffc02e32da <virtblk_remove+10>: push   %rbp
0xffffffffc02e32db <virtblk_remove+11>: mov    0x330(%rdi),%rbp
0xffffffffc02e32e2 <virtblk_remove+18>: lea    0x158(%rbp),%rdi
0xffffffffc02e32e9 <virtblk_remove+25>: callq  0xffffffff85aaef70 <flush_work>
0xffffffffc02e32ee <virtblk_remove+30>: mov    0x28(%rbp),%rdi
0xffffffffc02e32f2 <virtblk_remove+34>: callq  0xffffffff85e3f8f0 <del_gendisk>
0xffffffffc02e32f7 <virtblk_remove+39>: mov    0x28(%rbp),%rax
0xffffffffc02e32fb <virtblk_remove+43>: mov    0x3b0(%rax),%rdi
0xffffffffc02e3302 <virtblk_remove+50>: callq  0xffffffff85e274d0 <blk_cleanup_queue>
0xffffffffc02e3307 <virtblk_remove+55>: lea    0x30(%rbp),%rdi

##也就是ffff96b1d16c2400是virtio_device的地址
crash> bt -f
PID: 1697   TASK: ffff95b2b9078000  CPU: 181  COMMAND: "kworker/u416:1"
 #0 [ffffa2835c82bb10] __schedule at ffffffff863092d2
    ffffa2835c82bb18: ffff95b2b9078000 ffff97a8d3773c80
    ffffa2835c82bb28: ffff96b1cc775b80 ffffffff86b77b80
    ffffa2835c82bb38: ffffa2835c82bb98 ffffffff863092d2
    ffffa2835c82bb48: ffff95b2b90789d8 00000000b8c1c520
    ffffa2835c82bb58: ffffa2835c82bba0 0000000000000004
    ffffa2835c82bb68: f4bb75f19a2e0000 ffff95b2b9078000
    ffffa2835c82bb78: ffff96b1d16c2400 ffffffffc02e6060
    ffffa2835c82bb88: ffff95b29072a9c0 0000000000000000
    ffffa2835c82bb98: ffff95b2b9078000 ffffffff86309906
 #1 [ffffa2835c82bba0] schedule at ffffffff86309906
    ffffa2835c82bba8: ffff95b2b8c1bf50 ffff95b2b8c1c518
    ffffa2835c82bbb8: ffffffff85e322d6
 #2 [ffffa2835c82bbb8] blk_mq_freeze_queue_wait at ffffffff85e322d6
    ffffa2835c82bbc0: ffff95b200000000 ffff95b2b9078000
    ffffa2835c82bbd0: ffffffff85ae3f70 ffff95b2b8c1c520
    ffffa2835c82bbe0: ffff95b2b8c1c520 f4bb75f19a2e0000
    ffffa2835c82bbf0: ffff95b2b8c1bf40 ffff95b3004a2e00
    ffffa2835c82bc00: ffffffff85e27508
 #3 [ffffa2835c82bc00] blk_cleanup_queue at ffffffff85e27508
    ffffa2835c82bc08: ffff96b1d16c2410 ffff95b3004a2e00
    ffffa2835c82bc18: ffffffffc02e3307
 #4 [ffffa2835c82bc18] virtblk_remove at ffffffffc02e3307 [virtio_blk]
    ffffa2835c82bc20: ffff96b1d16c2400 ffffffffc02e6060
    ffffa2835c82bc30: ffffffffc0297499
 #5 [ffffa2835c82bc30] virtio_dev_remove at ffffffffc0297499 [virtio]

##priv就是virtio_blk的地址
##不知道是因为符号没导出还是什么原因查不到,mod -s加载模块也不行,那就只能手动去算了
 crash> virtio_device ffff96b1d16c2400 |grep priv
  priv = 0xffff95b3004a2e00

crash> virtio_blk 0xffff95b3004a2e00
crash: command not found: virtio_blk

##当前的virtio_blk结构体定义如下
##我们需要根据这些偏移来计算处vqs的地址
##首先需要明确的是refcount_t/unsigned int/int这些类型是占4个字节,指针占8个字节
struct virtio_blk {
	struct mutex vdev_mutex;
	struct virtio_device *vdev;
	struct gendisk *disk;
	struct blk_mq_tag_set tag_set;
	struct work_struct config_work;
	refcount_t refs;
	unsigned int sg_elems;
	int index;
	int num_vqs;
	struct virtio_blk_vq *vqs;
};
##首先计算出两个结构体占的字节数
##然后就可以计算整体偏移了,结果是392个字节
crash> mutex |grep -i size
SIZE: 32
crash> blk_mq_tag_set |grep -i size
    unsigned int cmd_size;
SIZE: 296
crash> work_struct |grep -i size
SIZE: 32
crash> p 32+8+8+296+32+4+4+4+4
$22 = 392
##然后计算virtio_blk地址偏移392的地址,注意这时候这个是算出来的virtio_blk_vq指针所在的地址
##得需要读一下这个值才知道这个virtio_blk_vq的地址
crash> p/x 0xffff95b3004a2e00+392
$23 = 0xffff95b3004a2f88
crash> rd 0xffff95b3004a2f88
ffff95b3004a2f88:  ffff95b2b80d6600                    .f......

##然后看一下virtio_blk_vq结构体
struct virtio_blk_vq {
	struct virtqueue *vq;
	spinlock_t lock;
	char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp;
##所以这时候读到的这个结构体的内容的第一个值就是virtqueue的指针
crash> rd ffff95b2b80d6600
ffff95b2b80d6600:  ffff95b28e6ae0c0                    ..j.....

##所以virtqueue就是这样子了
crash> virtqueue ffff95b28e6ae0c0
struct virtqueue {
  list = {
    next = 0xffff95b28e6ae6c0,
    prev = 0xffff96b1d16c2718
  },
  callback = 0xffffffffc02e3000 <virtblk_done>,
  name = 0xffff95b2b80d660c "req.0",
  vdev = 0xffff96b1d16c2400,
  index = 0,
  num_free = 253,
  priv = 0xffffa2835902d000
}

##那么从virtqueue怎么获取到vring呢?通过vring_virtqueue结构体
##而看这个结构体可以看到,vring_virtqueue的首地址其实就是virtqueue结构体
##所以,ffff95b28e6ae0c0也是这个virtqueue所属的vring_virtqueue的地址
##所以接下来继续根据偏移来计算这个vring的所在
struct vring_virtqueue {
	struct virtqueue vq;
	bool packed_ring;
	bool use_dma_api;
	bool weak_barriers;
	bool broken;
	bool indirect;
	bool event;
	unsigned int free_head;
	unsigned int num_added;
	u16 last_used_idx;
	union {
		struct {
			struct vring vring;
			u16 avail_flags_shadow;
			u16 avail_idx_shadow;
			struct vring_desc_state_split *desc_state;
			dma_addr_t queue_dma_addr;
			size_t queue_size_in_bytes;
		} split;
		struct {
			struct {
				unsigned int num;
				struct vring_packed_desc *desc;
				struct vring_packed_desc_event *driver;
				struct vring_packed_desc_event *device;
			} vring;
			bool avail_wrap_counter;
......
};

##所以把上面的偏移计算一下,然后根据这个偏移得出地址
##注意这时候不用再rd了,因为这就是这个vring的地址,不是指针(看一下virng在vring_virtqueue中不是指针)
crash> virtqueue |grep -i size
SIZE: 56
##然后剩下的字段大概占3个字节?bool凑一个,两个unsigned int凑一个,u16一个
##那地址就是0xffff95b28e6ae110
crash> p/x 0xffff95b28e6ae0c0+56+3*8
$32 = 0xffff95b28e6ae110

##看一下vring的内容
struct vring {
	unsigned int num;
	vring_desc_t *desc;
	vring_avail_t *avail;
	vring_used_t *used;
};

##所以这三个内容就是desc avail used了
crash> rd  0xffff95b28e6ae110 4
ffff95b28e6ae110:  0000000000000100 ffff95b31c1c6000   .........`......
ffff95b28e6ae120:  ffff95b31c1c7000 ffff95b31c1c7240   .p......@r......

##然后再根据vring_desc_t vring_avail_t vring_used_t结构体的内容去读值
crash> rd ffff95b31c1c6000 2
ffff95b31c1c6000:  0000010061f16a18 0001000100000010   .j.a............
crash> rd ffff95b31c1c7000
ffff95b31c1c7000:  0000000000080000                    ........
crash> rd ffff95b31c1c7240
ffff95b31c1c7240:  0000000000070000                    ........
##这其实就能读出来avail idx是8,used idx是7了

转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 857879363@qq.com