#!/usr/bin/env python3 # # Test bitmap-sync backups (incremental, differential, and partials) # # Copyright (c) 2019 John Snow for Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # owner=jsnow@redhat.com import math import os import iotests from iotests import log, qemu_img SIZE = 64 * 1024 * 1024 GRANULARITY = 64 * 1024 class Pattern: def __init__(self, byte, offset, size=GRANULARITY): self.byte = byte self.offset = offset self.size = size def bits(self, granularity): lower = self.offset // granularity upper = (self.offset + self.size - 1) // granularity return set(range(lower, upper + 1)) class PatternGroup: """Grouping of Pattern objects. Initialize with an iterable of Patterns.""" def __init__(self, patterns): self.patterns = patterns def bits(self, granularity): """Calculate the unique bits dirtied by this pattern grouping""" res = set() for pattern in self.patterns: res |= pattern.bits(granularity) return res GROUPS = [ PatternGroup([ # Batch 0: 4 clusters Pattern('0x49', 0x0000000), Pattern('0x6c', 0x0100000), # 1M Pattern('0x6f', 0x2000000), # 32M Pattern('0x76', 0x3ff0000)]), # 64M - 64K PatternGroup([ # Batch 1: 6 clusters (3 new) Pattern('0x65', 0x0000000), # Full overwrite Pattern('0x77', 0x00f8000), # Partial-left (1M-32K) Pattern('0x72', 0x2008000), # Partial-right (32M+32K) Pattern('0x69', 0x3fe0000)]), # Adjacent-left (64M - 128K) PatternGroup([ # Batch 2: 7 clusters (3 new) Pattern('0x74', 0x0010000), # Adjacent-right Pattern('0x69', 0x00e8000), # Partial-left (1M-96K) Pattern('0x6e', 0x2018000), # Partial-right (32M+96K) Pattern('0x67', 0x3fe0000, 2*GRANULARITY)]), # Overwrite [(64M-128K)-64M) PatternGroup([ # Batch 3: 8 clusters (5 new) # Carefully chosen such that nothing re-dirties the one cluster # that copies out successfully before failure in Group #1. Pattern('0xaa', 0x0010000, 3*GRANULARITY), # Overwrite and 2x Adjacent-right Pattern('0xbb', 0x00d8000), # Partial-left (1M-160K) Pattern('0xcc', 0x2028000), # Partial-right (32M+160K) Pattern('0xdd', 0x3fc0000)]), # New; leaving a gap to the right ] class EmulatedBitmap: def __init__(self, granularity=GRANULARITY): self._bits = set() self.granularity = granularity def dirty_bits(self, bits): self._bits |= set(bits) def dirty_group(self, n): self.dirty_bits(GROUPS[n].bits(self.granularity)) def clear(self): self._bits = set() def clear_bits(self, bits): self._bits -= set(bits) def clear_bit(self, bit): self.clear_bits({bit}) def clear_group(self, n): self.clear_bits(GROUPS[n].bits(self.granularity)) @property def first_bit(self): return sorted(self.bits)[0] @property def bits(self): return self._bits @property def count(self): return len(self.bits) def compare(self, qmp_bitmap): """ Print a nice human-readable message checking that a bitmap as reported by the QMP interface has as many bits set as we expect it to. """ name = qmp_bitmap.get('name', '(anonymous)') log("= Checking Bitmap {:s} =".format(name)) want = self.count have = qmp_bitmap['count'] // qmp_bitmap['granularity'] log("expecting {:d} dirty sectors; have {:d}. {:s}".format( want, have, "OK!" if want == have else "ERROR!")) log('') class Drive: """Represents, vaguely, a drive attached to a VM. Includes format, graph, and device information.""" def __init__(self, path, vm=None): self.path = path self.vm = vm self.fmt = None self.size = None self.node = None def img_create(self, fmt, size): self.fmt = fmt self.size = size iotests.qemu_img_create('-f', self.fmt, self.path, str(self.size)) def create_target(self, name, fmt, size): basename = os.path.basename(self.path) file_node_name = "file_{}".format(basename) vm = self.vm log(vm.command('blockdev-create', job_id='bdc-file-job', options={ 'driver': 'file', 'filename': self.path, 'size': 0, })) vm.run_job('bdc-file-job') log(vm.command('blockdev-add', driver='file', node_name=file_node_name, filename=self.path)) log(vm.command('blockdev-create', job_id='bdc-fmt-job', options={ 'driver': fmt, 'file': file_node_name, 'size': size, })) vm.run_job('bdc-fmt-job') log(vm.command('blockdev-add', driver=fmt, node_name=name, file=file_node_name)) self.fmt = fmt self.size = size self.node = name def blockdev_backup(vm, device, target, sync, **kwargs): # Strip any arguments explicitly nulled by the caller: kwargs = {key: val for key, val in kwargs.items() if val is not None} result = vm.qmp_log('blockdev-backup', device=device, target=target, sync=sync, filter_node_name='backup-top', **kwargs) return result def blockdev_backup_mktarget(drive, target_id, filepath, sync, **kwargs): target_drive = Drive(filepath, vm=drive.vm) target_drive.create_target(target_id, drive.fmt, drive.size) blockdev_backup(drive.vm, drive.node, target_id, sync, **kwargs) def reference_backup(drive, n, filepath): log("--- Reference Backup #{:d} ---\n".format(n)) target_id = "ref_target_{:d}".format(n) job_id = "ref_backup_{:d}".format(n) blockdev_backup_mktarget(drive, target_id, filepath, "full", job_id=job_id) drive.vm.run_job(job_id, auto_dismiss=True) log('') def backup(drive, n, filepath, sync, **kwargs): log("--- Test Backup #{:d} ---\n".format(n)) target_id = "backup_target_{:d}".format(n) job_id = "backup_{:d}".format(n) kwargs.setdefault('auto-finalize', False) blockdev_backup_mktarget(drive, target_id, filepath, sync, job_id=job_id, **kwargs) return job_id def perform_writes(drive, n, filter_node_name=None): log("--- Write #{:d} ---\n".format(n)) for pattern in GROUPS[n].patterns: cmd = "write -P{:s} 0x{:07x} 0x{:x}".format( pattern.byte, pattern.offset, pattern.size) log(cmd) log(drive.vm.hmp_qemu_io(filter_node_name or drive.node, cmd)) bitmaps = drive.vm.query_bitmaps() log({'bitmaps': bitmaps}, indent=2) log('') return bitmaps def compare_images(image, reference, baseimg=None, expected_match=True): """ Print a nice human-readable message comparing these images. """ expected_ret = 0 if expected_match else 1 if baseimg: assert qemu_img("rebase", "-u", "-b", baseimg, image) == 0 ret = qemu_img("compare", image, reference) log('qemu_img compare "{:s}" "{:s}" ==> {:s}, {:s}'.format( image, reference, "Identical" if ret == 0 else "Mismatch", "OK!" if ret == expected_ret else "ERROR!"), filters=[iotests.filter_testfiles]) def test_bitmap_sync(bsync_mode, msync_mode='bitmap', failure=None): """ Test bitmap backup routines. :param bsync_mode: Is the Bitmap Sync mode, and can be any of: - on-success: This is the "incremental" style mode. Bitmaps are synchronized to what was copied out only on success. (Partial images must be discarded.) - never: This is the "differential" style mode. Bitmaps are never synchronized. - always: This is a "best effort" style mode. Bitmaps are always synchronized, regardless of failure. (Partial images must be kept.) :param msync_mode: The mirror sync mode to use for the first backup. Can be any one of: - bitmap: Backups based on bitmap manifest. - full: Full backups. - top: Full backups of the top layer only. :param failure: Is the (optional) failure mode, and can be any of: - None: No failure. Test the normative path. Default. - simulated: Cancel the job right before it completes. This also tests writes "during" the job. - intermediate: This tests a job that fails mid-process and produces an incomplete backup. Testing limitations prevent testing competing writes. """ with iotests.FilePaths(['img', 'bsync1', 'bsync2', 'fbackup0', 'fbackup1', 'fbackup2']) as \ (img_path, bsync1, bsync2, fbackup0, fbackup1, fbackup2), \ iotests.VM() as vm: mode = "Mode {:s}; Bitmap Sync {:s}".format(msync_mode, bsync_mode) preposition = "with" if failure else "without" cond = "{:s} {:s}".format(preposition, "{:s} failure".format(failure) if failure else "failure") log("\n=== {:s} {:s} ===\n".format(mode, cond)) log('--- Preparing image & VM ---\n') drive0 = Drive(img_path, vm=vm) drive0.img_create(iotests.imgfmt, SIZE) vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device())) vm.launch() file_config = { 'driver': 'file', 'filename': drive0.path } if failure == 'intermediate': file_config = { 'driver': 'blkdebug', 'image': file_config, 'set-state': [{ 'event': 'flush_to_disk', 'state': 1, 'new_state': 2 }, { 'event': 'read_aio', 'state': 2, 'new_state': 3 }], 'inject-error': [{ 'event': 'read_aio', 'errno': 5, 'state': 3, 'immediately': False, 'once': True }] } drive0.node = 'drive0' vm.qmp_log('blockdev-add', filters=[iotests.filter_qmp_testfiles], node_name=drive0.node, driver=drive0.fmt, file=file_config) log('') # 0 - Writes and Reference Backup perform_writes(drive0, 0) reference_backup(drive0, 0, fbackup0) log('--- Add Bitmap ---\n') vm.qmp_log("block-dirty-bitmap-add", node=drive0.node, name="bitmap0", granularity=GRANULARITY) log('') ebitmap = EmulatedBitmap() # 1 - Writes and Reference Backup bitmaps = perform_writes(drive0, 1) ebitmap.dirty_group(1) bitmap = vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps) ebitmap.compare(bitmap) reference_backup(drive0, 1, fbackup1) # 1 - Test Backup (w/ Optional induced failure) if failure == 'intermediate': # Activate blkdebug induced failure for second-to-next read log(vm.hmp_qemu_io(drive0.node, 'flush')) log('') job = backup(drive0, 1, bsync1, msync_mode, bitmap="bitmap0", bitmap_mode=bsync_mode) def _callback(): """Issue writes while the job is open to test bitmap divergence.""" # Note: when `failure` is 'intermediate', this isn't called. log('') bitmaps = perform_writes(drive0, 2, filter_node_name='backup-top') # Named bitmap (static, should be unchanged) ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps)) # Anonymous bitmap (dynamic, shows new writes) anonymous = EmulatedBitmap() anonymous.dirty_group(2) anonymous.compare(vm.get_bitmap(drive0.node, '', recording=True, bitmaps=bitmaps)) # Simulate the order in which this will happen: # group 1 gets cleared first, then group two gets written. if ((bsync_mode == 'on-success' and not failure) or (bsync_mode == 'always')): ebitmap.clear() ebitmap.dirty_group(2) vm.run_job(job, auto_dismiss=True, auto_finalize=False, pre_finalize=_callback, cancel=(failure == 'simulated')) bitmaps = vm.query_bitmaps() log({'bitmaps': bitmaps}, indent=2) log('') if bsync_mode == 'always' and failure == 'intermediate': # TOP treats anything allocated as dirty, expect to see: if msync_mode == 'top': ebitmap.dirty_group(0) # We manage to copy one sector (one bit) before the error. ebitmap.clear_bit(ebitmap.first_bit) # Full returns all bits set except what was copied/skipped if msync_mode == 'full': fail_bit = ebitmap.first_bit ebitmap.clear() ebitmap.dirty_bits(range(fail_bit, SIZE // GRANULARITY)) ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps)) # 2 - Writes and Reference Backup bitmaps = perform_writes(drive0, 3) ebitmap.dirty_group(3) ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps)) reference_backup(drive0, 2, fbackup2) # 2 - Bitmap Backup (In failure modes, this is a recovery.) job = backup(drive0, 2, bsync2, "bitmap", bitmap="bitmap0", bitmap_mode=bsync_mode) vm.run_job(job, auto_dismiss=True, auto_finalize=False) bitmaps = vm.query_bitmaps() log({'bitmaps': bitmaps}, indent=2) log('') if bsync_mode != 'never': ebitmap.clear() ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps)) log('--- Cleanup ---\n') vm.qmp_log("block-dirty-bitmap-remove", node=drive0.node, name="bitmap0") bitmaps = vm.query_bitmaps() log({'bitmaps': bitmaps}, indent=2) vm.shutdown() log('') log('--- Verification ---\n') # 'simulated' failures will actually all pass here because we canceled # while "pending". This is actually undefined behavior, # don't rely on this to be true! compare_images(bsync1, fbackup1, baseimg=fbackup0, expected_match=failure != 'intermediate') if not failure or bsync_mode == 'always': # Always keep the last backup on success or when using 'always' base = bsync1 else: base = fbackup0 compare_images(bsync2, fbackup2, baseimg=base) compare_images(img_path, fbackup2) log('') def test_backup_api(): """ Test malformed and prohibited invocations of the backup API. """ with iotests.FilePaths(['img', 'bsync1']) as \ (img_path, backup_path), \ iotests.VM() as vm: log("\n=== API failure tests ===\n") log('--- Preparing image & VM ---\n') drive0 = Drive(img_path, vm=vm) drive0.img_create(iotests.imgfmt, SIZE) vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device())) vm.launch() file_config = { 'driver': 'file', 'filename': drive0.path } drive0.node = 'drive0' vm.qmp_log('blockdev-add', filters=[iotests.filter_qmp_testfiles], node_name=drive0.node, driver=drive0.fmt, file=file_config) log('') target0 = Drive(backup_path, vm=vm) target0.create_target("backup_target", drive0.fmt, drive0.size) log('') vm.qmp_log("block-dirty-bitmap-add", node=drive0.node, name="bitmap0", granularity=GRANULARITY) log('') log('-- Testing invalid QMP commands --\n') error_cases = { 'incremental': { None: ['on-success', 'always', 'never', None], 'bitmap404': ['on-success', 'always', 'never', None], 'bitmap0': ['always', 'never'] }, 'bitmap': { None: ['on-success', 'always', 'never', None], 'bitmap404': ['on-success', 'always', 'never', None], 'bitmap0': [None], }, 'full': { None: ['on-success', 'always', 'never'], 'bitmap404': ['on-success', 'always', 'never', None], 'bitmap0': ['never', None], }, 'top': { None: ['on-success', 'always', 'never'], 'bitmap404': ['on-success', 'always', 'never', None], 'bitmap0': ['never', None], }, 'none': { None: ['on-success', 'always', 'never'], 'bitmap404': ['on-success', 'always', 'never', None], 'bitmap0': ['on-success', 'always', 'never', None], } } # Dicts, as always, are not stably-ordered prior to 3.7, so use tuples: for sync_mode in ('incremental', 'bitmap', 'full', 'top', 'none'): log("-- Sync mode {:s} tests --\n".format(sync_mode)) for bitmap in (None, 'bitmap404', 'bitmap0'): for policy in error_cases[sync_mode][bitmap]: blockdev_backup(drive0.vm, drive0.node, "backup_target", sync_mode, job_id='api_job', bitmap=bitmap, bitmap_mode=policy) log('') def main(): for bsync_mode in ("never", "on-success", "always"): for failure in ("simulated", "intermediate", None): test_bitmap_sync(bsync_mode, "bitmap", failure) for sync_mode in ('full', 'top'): for bsync_mode in ('on-success', 'always'): for failure in ('simulated', 'intermediate', None): test_bitmap_sync(bsync_mode, sync_mode, failure) test_backup_api() if __name__ == '__main__': iotests.script_main(main, supported_fmts=['qcow2'], supported_protocols=['file'])