enhance replica state logging
Carthaca opened this issue · comments
Maurice Escher commented
each situation that leads to replica state not going to in_sync
should be logged
manila/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py
Lines 2899 to 3021 in 79078c2
def update_replica_state(self, context, replica_list, replica, | |
access_rules, share_snapshots, share_server=None, | |
replication=True): | |
"""Returns the status of the given replica on this backend.""" | |
active_replica = self.find_active_replica(replica_list) | |
share_name = self._get_backend_share_name(replica['id']) | |
vserver, vserver_client = self._get_vserver(share_server=share_server) | |
if not vserver_client.volume_exists(share_name): | |
msg = _("Volume %(share_name)s does not exist on vserver " | |
"%(vserver)s.") | |
msg_args = {'share_name': share_name, 'vserver': vserver} | |
raise exception.ShareResourceNotFound(msg % msg_args) | |
# NOTE(cknight): The SnapMirror may have been intentionally broken by | |
# a revert-to-snapshot operation, in which case this method should not | |
# attempt to change anything. | |
if active_replica['status'] == constants.STATUS_REVERTING: | |
return None | |
dm_session = data_motion.DataMotionSession() | |
try: | |
snapmirrors = dm_session.get_snapmirrors(active_replica, replica) | |
except netapp_api.NaApiError: | |
LOG.exception("Could not get snapmirrors for replica %s.", | |
replica['id']) | |
return constants.STATUS_ERROR | |
is_readable = replication and self._is_readable_replica(replica) | |
if not snapmirrors: | |
if replica['status'] != constants.STATUS_CREATING: | |
try: | |
pool_name = share_utils.extract_host(replica['host'], | |
level='pool') | |
relationship_type = na_utils.get_relationship_type( | |
self._is_flexgroup_pool(pool_name)) | |
dm_session.create_snapmirror(active_replica, replica, | |
relationship_type, | |
mount=is_readable) | |
except netapp_api.NaApiError: | |
LOG.exception("Could not create snapmirror for " | |
"replica %s.", replica['id']) | |
return constants.STATUS_ERROR | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
snapmirror = snapmirrors[0] | |
# NOTE(dviroel): Don't try to resume or resync a SnapMirror that has | |
# one of the in progress transfer states, because the storage will | |
# answer with an error. | |
in_progress_status = ['preparing', 'transferring', 'finalizing'] | |
if (snapmirror.get('mirror-state') != 'snapmirrored' and | |
snapmirror.get('relationship-status') in in_progress_status): | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
if snapmirror.get('mirror-state') != 'snapmirrored': | |
try: | |
vserver_client.resume_snapmirror_vol( | |
snapmirror['source-vserver'], | |
snapmirror['source-volume'], | |
vserver, | |
share_name) | |
vserver_client.resync_snapmirror_vol( | |
snapmirror['source-vserver'], | |
snapmirror['source-volume'], | |
vserver, | |
share_name) | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
except netapp_api.NaApiError: | |
LOG.exception("Could not resync snapmirror.") | |
return constants.STATUS_ERROR | |
last_update_timestamp = float( | |
snapmirror.get('last-transfer-end-timestamp', 0)) | |
# TODO(ameade): Have a configurable RPO for replicas, for now it is | |
# one hour. | |
if (last_update_timestamp and | |
(timeutils.is_older_than( | |
datetime.datetime.utcfromtimestamp(last_update_timestamp) | |
.isoformat(), 1200))): | |
current_schedule = snapmirror.get('schedule') | |
new_schedule = self.configuration.netapp_snapmirror_schedule | |
if current_schedule == new_schedule: | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
else: | |
LOG.debug('Modify snapmirror schedule for replica:' | |
'%(replica)s from %(from)s to %(to)s', | |
{'replica': replica['id'], | |
'from': current_schedule, | |
'to': new_schedule}) | |
dm_session.modify_snapmirror(active_replica, replica, | |
schedule=new_schedule) | |
replica_backend = share_utils.extract_host(replica['host'], | |
level='backend_name') | |
config = data_motion.get_backend_configuration(replica_backend) | |
config_size = (int(config.safe_get( | |
'netapp_snapmirror_last_transfer_size_limit')) * units.Ki) | |
last_transfer_size = int(snapmirror.get('last-transfer-size', 0)) | |
if last_transfer_size > config_size: | |
LOG.debug('Found last-transfer-size %(size)d for replica: ' | |
'%(replica)s.', {'replica': replica['id'], | |
'size': last_transfer_size}) | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
last_transfer_error = snapmirror.get('last-transfer-error', None) | |
if last_transfer_error: | |
LOG.debug('Found last-transfer-error: %(error)s for replica: ' | |
'%(replica)s.', {'replica': replica['id'], | |
'error': last_transfer_error}) | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
# Check all snapshots exist | |
snapshots = [snap['share_replica_snapshot'] | |
for snap in share_snapshots] | |
for snap in snapshots: | |
snapshot_name = snap.get('provider_location') | |
if (not snapshot_name or | |
not vserver_client.snapshot_exists(snapshot_name, | |
share_name)): | |
return constants.REPLICA_STATE_OUT_OF_SYNC | |
return constants.REPLICA_STATE_IN_SYNC |