commit 3313998ed12557ed63ae4c55bc6b5d9c07dbbf58 Author: Daniel Alley Date: Mon Jun 21 19:16:00 2021 -0400 Provide an option to fall back to on_demand if artifact download fails closes: #5286 https://pulp.plan.io/issues/5286 diff --git a/CHANGES/5286.feature b/CHANGES/5286.feature new file mode 100644 index 000000000..bf6641dd9 --- /dev/null +++ b/CHANGES/5286.feature @@ -0,0 +1 @@ +Files which are unable to be downloaded will fall back to "on_demand" and log a warning rather than causing a sync to fail. diff --git a/pulpcore/app/migrations/0066_download_concurrency_and_retry_changes.py b/pulpcore/app/migrations/0066_remote_field_changes.py similarity index 80% rename from pulpcore/app/migrations/0066_download_concurrency_and_retry_changes.py rename to pulpcore/app/migrations/0066_remote_field_changes.py index 836cd5e51..08c468ee1 100644 --- a/pulpcore/app/migrations/0066_download_concurrency_and_retry_changes.py +++ b/pulpcore/app/migrations/0066_remote_field_changes.py @@ -21,4 +21,9 @@ class Migration(migrations.Migration): name='max_retries', field=models.PositiveIntegerField(null=True), ), + migrations.AddField( + model_name='remote', + name='continue_on_error', + field=models.BooleanField(default=False), + ), ] diff --git a/pulpcore/app/models/repository.py b/pulpcore/app/models/repository.py index e1dbd28a5..cea59a573 100644 --- a/pulpcore/app/models/repository.py +++ b/pulpcore/app/models/repository.py @@ -295,6 +295,7 @@ class Remote(MasterModel): null=True, validators=[MinValueValidator(1, "Download concurrency must be at least 1")] ) max_retries = models.PositiveIntegerField(null=True) + continue_on_error = models.BooleanField(default=False) policy = models.TextField(choices=POLICY_CHOICES, default=IMMEDIATE) total_timeout = models.FloatField( diff --git a/pulpcore/app/serializers/repository.py b/pulpcore/app/serializers/repository.py index 4504b353b..2c2d5e422 100644 --- a/pulpcore/app/serializers/repository.py +++ b/pulpcore/app/serializers/repository.py @@ -152,6 +152,15 @@ class RemoteSerializer(ModelSerializer): required=False, allow_null=True, ) + continue_on_error = serializers.BooleanField( + help_text=( + "In the event that a sync encounters a recoverable error such as a 404 while " + "downloading an artifact, attempt to recover and continue. Caution: use of this " + "option may result in 'successful' syncs which do not fully conform to the " + "expected sync settings, and defer failure to the time of content consumption." + ), + required=False, + ) policy = serializers.ChoiceField( help_text="The policy to use when downloading content.", choices=((models.Remote.IMMEDIATE, "When syncing, download all metadata and content now.")), diff --git a/pulpcore/plugin/stages/artifact_stages.py b/pulpcore/plugin/stages/artifact_stages.py index 60d50d14e..34a6eded7 100644 --- a/pulpcore/plugin/stages/artifact_stages.py +++ b/pulpcore/plugin/stages/artifact_stages.py @@ -124,13 +124,16 @@ class ArtifactDownloader(Stage): max_concurrent_content (int): The maximum number of :class:`~pulpcore.plugin.stages.DeclarativeContent` instances to handle simultaneously. Default is 200. + continue_on_error (bool): Whether to continue if an artifact fails to be downloaded. + In this case, the artifact will fall back to 'deferred_download'. args: unused positional arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. kwargs: unused keyword arguments passed along to :class:`~pulpcore.plugin.stages.Stage`. """ - def __init__(self, max_concurrent_content=200, *args, **kwargs): + def __init__(self, max_concurrent_content=200, continue_on_error=False, *args, **kwargs): super().__init__(*args, **kwargs) self.max_concurrent_content = max_concurrent_content + self.continue_on_error = continue_on_error async def run(self): """ @@ -196,10 +199,22 @@ class ArtifactDownloader(Stage): and not d_artifact.deferred_download and not d_artifact.artifact.file ] + downloaded_count = 0 if downloaders_for_content: - await asyncio.gather(*downloaders_for_content) + results = await asyncio.gather(*downloaders_for_content, return_exceptions=True) + for d_artifact, result in zip(d_content.d_artifacts, results): + if isinstance(result, Exception): + if self.continue_on_error: + log.warn( + "Download of {} failed due to error: {}.".format(d_artifact.url, result) + ) + d_artifact.deferred_download = True + else: + raise result + else: + downloaded_count += 1 await self.put(d_content) - return len(downloaders_for_content) + return downloaded_count class ArtifactSaver(Stage):