OpenStack之虚机热迁移的代码详细解析_服务器知识

话说虚机迁移分为冷迁移以及热迁移，所谓热迁移用度娘的话说即是：热迁移（Live Migration，又叫动态迁移、实时迁移），即虚机保存/恢复(Save/Restore)：将整个虚拟机的运行状态完整保存下来，同时可以快速的恢复到原有硬件平台甚至是不同硬件平台上。恢复以后，虚机仍旧平滑运行，用户不会察觉到任何差异。OpenStack的虚机迁移是基于Libvirt实现的，下面来看看Openstack虚机热迁移的具体代码实现。

首先，由API入口进入到nova/api/openstack/compute/contrib/admin_actions.py

									@wsgi.action('os-migrateLive')

									  def _migrate_live(self, req, id, body):

									    """Permit admins to (live) migrate a server to a new host."""

									    context = req.environ["nova.context"]

									    authorize(context, 'migrateLive')

									    try:

									      block_migration = body["os-migrateLive"]["block_migration"]

									      disk_over_commit = body["os-migrateLive"]["disk_over_commit"]

									      host = body["os-migrateLive"]["host"]

									    except (TypeError, KeyError):

									      msg = _("host, block_migration and disk_over_commit must "

									          "be specified for live migration.")

									      raise exc.HTTPBadRequest(explanation=msg)

									    try:

									      block_migration = strutils.bool_from_string(block_migration,

									                            strict=True)

									      disk_over_commit = strutils.bool_from_string(disk_over_commit,

									                             strict=True)

									    except ValueError as err:

									      raise exc.HTTPBadRequest(explanation=str(err))

									    try:

									      instance = self.compute_api.get(context, id, want_objects=True)

									      self.compute_api.live_migrate(context, instance, block_migration,

									                     disk_over_commit, host)

									    except (exception.ComputeServiceUnavailable,

									        exception.InvalidHypervisorType,

									        exception.UnableToMigrateToSelf,

									        exception.DestinationHypervisorTooOld,

									        exception.NoValidHost,

									        exception.InvalidLocalStorage,

									        exception.InvalidSharedStorage,

									        exception.MigrationPreCheckError) as ex:

									      raise exc.HTTPBadRequest(explanation=ex.format_message())

									    except exception.InstanceNotFound as e:

									      raise exc.HTTPNotFound(explanation=e.format_message())

									    except exception.InstanceInvalidState as state_error:

									      common.raise_http_conflict_for_instance_invalid_state(state_error,

									          'os-migrateLive')

									    except Exception:

									      if host is None:

									        msg = _("Live migration of instance %s to another host "

									            "failed") % id

									      else:

									        msg = _("Live migration of instance %(id)s to host %(host)s "

									            "failed") % {'id': id, 'host': host}

									      LOG.exception(msg)

									      # Return messages from scheduler

									      raise exc.HTTPBadRequest(explanation=msg)

									    return webob.Response(status_int=202)

这里第一行可以看到是与API文档的第二行照应的：

									{

									  "os-migrateLive": {

									    "host": "0443e9a1254044d8b99f35eace132080",

									    "block_migration": false,

									    "disk_over_commit": false

									  }

									}

好了，源码中其实执行迁移工作的就是第26、27行的一条语句：

1 2	`self.compute_api.live_migrate(context, instance, block_migration,` `disk_over_commit, host)`

由这句进入到nova/compute/api.py中，源码如下：

									@check_instance_cell

									  @check_instance_state(vm_state=[vm_states.ACTIVE])

									  def live_migrate(self, context, instance, block_migration,

									           disk_over_commit, host_name):

									    """Migrate a server lively to a new host."""

									    LOG.debug(_("Going to try to live migrate instance to %s"),

									         host_name or "another host", instance=instance)

									    instance.task_state = task_states.MIGRATING

									    instance.save(expected_task_state=[None])

									    self.compute_task_api.live_migrate_instance(context, instance,

									        host_name, block_migration=block_migration,

									        disk_over_commit=disk_over_commit)

第2行是一个装饰器，用于在进入API方法之前，检测虚拟机和/或任务的状态，如果实例处于错误的状态，将会引发异常；接下来实时迁移虚机到新的主机，并将虚机状态置于“migrating”,然后由12行进入nova/conductor/api.py

									def live_migrate_instance(self, context, instance, host_name,

									                block_migration, disk_over_commit):

									     scheduler_hint = {'host': host_name}

									     self._manager.migrate_server(

									       context, instance, scheduler_hint, True, False, None,

									       block_migration, disk_over_commit, None)

将主机名存入字典scheduler_hint中，然后调用nova/conductor/manager.py方法migrate_server,

									def migrate_server(self, context, instance, scheduler_hint, live, rebuild,

									      flavor, block_migration, disk_over_commit, reservations=None):

									    if instance and not isinstance(instance, instance_obj.Instance):

									      # NOTE(danms): Until v2 of the RPC API, we need to tolerate

									      # old-world instance objects here

									      attrs = ['metadata', 'system_metadata', 'info_cache',

									           'security_groups']

									      instance = instance_obj.Instance._from_db_object(

									        context, instance_obj.Instance(), instance,

									        expected_attrs=attrs)

									    if live and not rebuild and not flavor:

									      self._live_migrate(context, instance, scheduler_hint,

									                block_migration, disk_over_commit)

									    elif not live and not rebuild and flavor:

									      instance_uuid = instance['uuid']

									      with compute_utils.EventReporter(context, self.db,

									                     'cold_migrate', instance_uuid):

									        self._cold_migrate(context, instance, flavor,

									                  scheduler_hint['filter_properties'],

									                  reservations)

									    else:

									      raise NotImplementedError()

由于在nova/conductor/api.py中传过来的参数是

									self._manager.migrate_server(

									       context, instance, scheduler_hint, True, False, None,

									       block_migration, disk_over_commit, None)

因此live是True，rebuild是Flase,flavor是None，执行第12、13行代码：

									if live and not rebuild and not flavor:

									       self._live_migrate(context, instance, scheduler_hint,

									                block_migration, disk_over_commit) 

									　_live_migrate代码如下：

									def _live_migrate(self, context, instance, scheduler_hint,

									           block_migration, disk_over_commit):

									    destination = scheduler_hint.get("host")

									    try:

									      live_migrate.execute(context, instance, destination,

									               block_migration, disk_over_commit)

									    except (exception.NoValidHost,

									        exception.ComputeServiceUnavailable,

									        exception.InvalidHypervisorType,

									        exception.InvalidCPUInfo,

									        exception.UnableToMigrateToSelf,

									        exception.DestinationHypervisorTooOld,

									        exception.InvalidLocalStorage,

									        exception.InvalidSharedStorage,

									        exception.HypervisorUnavailable,

									        exception.MigrationPreCheckError) as ex:

									      with excutils.save_and_reraise_exception():

									        #TODO(johngarbutt) - eventually need instance actions here

									        request_spec = {'instance_properties': {

									          'uuid': instance['uuid'], },

									        }

									        scheduler_utils.set_vm_state_and_notify(context,

									            'compute_task', 'migrate_server',

									            dict(vm_state=instance['vm_state'],

									               task_state=None,

									               expected_task_state=task_states.MIGRATING,),

									            ex, request_spec, self.db)

									    except Exception as ex:

									      LOG.error(_('Migration of instance %(instance_id)s to host'

									            ' %(dest)s unexpectedly failed.'),

									            {'instance_id': instance['uuid'], 'dest': destination},

									            exc_info=True)

									      raise exception.MigrationError(reason=ex)

首先，第三行中将主机名赋给destination,然后执行迁移，后面的都是异常的捕捉，执行迁移的代码分为两部分，先看第一部分，在nova/conductor/tasks/live_migrate.py的184行左右：

									def execute(context, instance, destination,

									      block_migration, disk_over_commit):

									  task = LiveMigrationTask(context, instance,

									               destination,

									               block_migration,

									               disk_over_commit)

									  #TODO(johngarbutt) create a superclass that contains a safe_execute call

									  return task.execute()

先创建包含安全执行回调的超类，然后返回如下函数也即执行迁移的第二部分代码，在54行左右：

									def execute(self):

									    self._check_instance_is_running()

									    self._check_host_is_up(self.source)

									    if not self.destination:

									      self.destination = self._find_destination()

									    else:

									      self._check_requested_destination()

									    #TODO(johngarbutt) need to move complexity out of compute manager

									    return self.compute_rpcapi.live_migration(self.context,

									        host=self.source,

									        instance=self.instance,

									        dest=self.destination,

									        block_migration=self.block_migration,

									        migrate_data=self.migrate_data)

									        #TODO(johngarbutt) disk_over_commit?

这里有三部分内容：

如果目前主机不存在，则由调度算法选取一个目标主机，并且进行相关的检测，确保能够进行实时迁移操作；

如果目标主机存在，则直接进行相关的检测操作，确保能够进行实时迁移操作；

执行迁移操作。

前两部分不再赘述，直接看第三部分代码，在nova/compute/rpcapi.py中：

									def live_migration(self, ctxt, instance, dest, block_migration, host,

									            migrate_data=None):

									    # NOTE(russellb) Havana compat

									    version = self._get_compat_version('3.0', '2.0')

									    instance_p = jsonutils.to_primitive(instance)

									    cctxt = self.client.prepare(server=host, version=version)

									    cctxt.cast(ctxt, 'live_migration', instance=instance_p,

									          dest=dest, block_migration=block_migration,

									          migrate_data=migrate_data)

热迁移开始执行：

									def live_migration(self, context, instance, dest,

									            post_method, recover_method, block_migration=False,

									            migrate_data=None):

									    """Spawning live_migration operation for distributing high-load.

									    :param context: security context

									    :param instance:

									      nova.db.sqlalchemy.models.Instance object

									      instance object that is migrated.

									    :param dest: destination host

									    :param post_method:

									      post operation method.

									      expected nova.compute.manager.post_live_migration.

									    :param recover_method:

									      recovery method when any exception occurs.

									      expected nova.compute.manager.recover_live_migration.

									    :param block_migration: if true, do block migration.

									    :param migrate_data: implementation specific params

									    """

									    greenthread.spawn(self._live_migration, context, instance, dest,

									             post_method, recover_method, block_migration,

									             migrate_data)

这个方法中建立一个绿色线程来运行方法_live_migration，来执行实时迁移；主要是调用libvirt python接口方法virDomainMigrateToURI，来实现从当前主机迁移domain对象到给定的目标主机；

spawn：建立一个绿色线程来运行方法“func(*args, **kwargs)”，这里就是来运行方法_live_migration；

_live_migration：执行实时迁移；主要是调用libvirt python接口方法virDomainMigrateToURI，来实现从当前主机迁移domain对象到给定的目标主机；

接着在绿色线程中调用_live_migration方法：

									def _live_migration(self, context, instance, dest, post_method,

									            recover_method, block_migration=False,

									            migrate_data=None):

									    """Do live migration.

									    :param context: security context

									    :param instance:

									      nova.db.sqlalchemy.models.Instance object

									      instance object that is migrated.

									    :param dest: destination host

									    :param post_method:

									      post operation method.

									      expected nova.compute.manager.post_live_migration.

									    :param recover_method:

									      recovery method when any exception occurs.

									      expected nova.compute.manager.recover_live_migration.

									    :param block_migration: if true, do block migration.

									    :param migrate_data: implementation specific params

									    """

									    # Do live migration.

									    try:

									      if block_migration:

									        flaglist = CONF.libvirt.block_migration_flag.split(',')

									      else:

									        flaglist = CONF.libvirt.live_migration_flag.split(',')

									      flagvals = [getattr(libvirt, x.strip()) for x in flaglist]

									      logical_sum = reduce(lambda x, y: x | y, flagvals)

									      dom = self._lookup_by_name(instance["name"])

									      dom.migrateToURI(CONF.libvirt.live_migration_uri % dest,

									               logical_sum,

									               None,

									               CONF.libvirt.live_migration_bandwidth)

									    except Exception as e:

									      with excutils.save_and_reraise_exception():

									        LOG.error(_("Live Migration failure: %s"), e,

									             instance=instance)

									        recover_method(context, instance, dest, block_migration)

									    # Waiting for completion of live_migration.

									    timer = loopingcall.FixedIntervalLoopingCall(f=None)

1 2	`if` `block_migration:` `flaglist` `=` `CONF.libvirt.block_migration_flag.split(',')`

这个获取块迁移标志列表，block_migration_flag：这个参数定义了为块迁移设置迁移标志。

									else:

									         flaglist = CONF.libvirt.live_migration_flag.split(',')

									       flagvals = [getattr(libvirt, x.strip()) for x in flaglist]

									      logical_sum = reduce(lambda x, y: x | y, flagvals)