Issue #5964
Updated by gmbnomis about 5 years ago
h1. Problem
@pulpcore.plugin.repo_version_utils.remove_duplicates@ does not handle the case were @base_version != None@ correctly as it operates on the wrong content sets.
h1. Example
Assume that there are two artifacts:
- a1 sha256: 4355...
- a2 sha256: 53c2...
and two file content units:
- c1 "relative_path": "test_upload.txt", artifact is a1
- c2 "relative_path": "test_upload.txt", artifact is a2
These two collide w.r.t. the repo_key and @pulpcore.plugin.repo_version_utils.remove_duplicates@ must remove duplicates.
Now create the following repo versions:
0: empty inital repo version
1: Post to /modify adding "c1". Expected content "c1"
2: Post to /modify adding "c2". Expected content "c2" (c1 has to removed because we are adding newer conflicting content)
3: Post to /modify adding "c2" to base_version 1. Note that, semantically, this is exactly the same operation as the one in version 2, i.e. add "c2" to a repo version containing "c1". Expected content "c2" (c1 has to removed because we are adding newer conflicting content)
However, version 3 contains c1!
h1. Reproducer
Run the following script on a Pulp3 install with an empty DB.
<pre><code class="text">
#!/usr/bin/env bash
set -e
echo "Setting environment variables for default hostname/port for the API and the Content app"
export BASE_ADDR=${BASE_ADDR:-http://localhost:24817}
export CONTENT_ADDR=${CONTENT_ADDR:-http://localhost:24816}
# Necessary for `django-admin`
# Poll a Pulp task until it is finished.
wait_until_task_finished() {
echo "Polling the task until it has reached a final state."
local task_url=$1
while true
local response=$(http $task_url)
local state=$(jq -r .state <<< ${response})
jq . <<< "${response}"
case ${state} in
echo "Task in final state: ${state}"
exit 1
echo "$task_url complete."
echo "Still waiting..."
sleep 1
echo "Creating a file "1" at path FILE_CONTENT to upload."
export FILE_CONTENT="1"
echo $FILE_CONTENT > test_upload.txt
echo "Uploading the file to Pulp, creating an artifact, storing ARTIFACT1_HREF."
export ARTIFACT1_HREF=$(http --form POST $BASE_ADDR/pulp/api/v3/artifacts/ \
file@./test_upload.txt \
| jq -r '.pulp_href')
echo "Inspecting new artifact."
echo 'Create File Content from the artifact and save as environment variable'
export TASK_URL=$(http POST $BASE_ADDR/pulp/api/v3/content/file/files/ \
relative_path="test_upload.txt" \
artifact=$ARTIFACT1_HREF \
| jq -r '.task')
wait_until_task_finished $BASE_ADDR$TASK_URL
export CONTENT1_HREF=$(http $BASE_ADDR$TASK_URL| jq -r '.created_resources | first')
echo "Inspecting new file content 1"
echo "Creating a file "2" at path FILE_CONTENT to upload."
export FILE_CONTENT="2"
echo $FILE_CONTENT > test_upload.txt
echo "Uploading the file to Pulp, creating an artifact, storing ARTIFACT1_HREF."
export ARTIFACT2_HREF=$(http --form POST $BASE_ADDR/pulp/api/v3/artifacts/ \
file@./test_upload.txt \
| jq -r '.pulp_href')
echo "Inspecting new artifact."
echo 'Create File Content from the artifact and save as environment variable'
export TASK_URL=$(http POST $BASE_ADDR/pulp/api/v3/content/file/files/ \
relative_path="test_upload.txt" \
artifact=$ARTIFACT2_HREF \
| jq -r '.task')
wait_until_task_finished $BASE_ADDR$TASK_URL
export CONTENT2_HREF=$(http $BASE_ADDR$TASK_URL| jq -r '.created_resources | first')
echo "Inspecting new file content 2"
export REPO_NAME=dup
echo "Creating a new repository named $REPO_NAME."
export REPO_HREF=$(http POST $BASE_ADDR/pulp/api/v3/repositories/file/file/ name=$REPO_NAME \
| jq -r '.pulp_href')
echo "Inspecting repository."
echo "Kick off a task to add content 1 to a repository, storing TASK_URL env variable"
export TASK_URL=$(http POST $BASE_ADDR$REPO_HREF'modify/' \
add_content_units:="[\"$CONTENT1_HREF\"]" \
| jq -r '.task')
# Poll the task (here we use a function defined in docs/_scripts/
wait_until_task_finished $BASE_ADDR$TASK_URL
echo "Retrieving REPOVERSION_HREF from task"
export REPOVERSION_HREF=$(http $BASE_ADDR$TASK_URL| jq -r '.created_resources | first')
echo "Inspecting repository version."
echo "Inspecting the content of the repository version"
export FILE_CONTENT_REPO_VERSION_HREF=$(http $BASE_ADDR$REPOVERSION_HREF | jq -r '.content_summary.present["file.file"].href')
echo "Kick off a task to add content 2 to a repository, storing TASK_URL env variable"
export TASK_URL=$(http POST $BASE_ADDR$REPO_HREF'modify/' \
add_content_units:="[\"$CONTENT2_HREF\"]" \
| jq -r '.task')
# Poll the task (here we use a function defined in docs/_scripts/
wait_until_task_finished $BASE_ADDR$TASK_URL
echo "Retrieving REPOVERSION_HREF from task"
export REPOVERSION_HREF=$(http $BASE_ADDR$TASK_URL| jq -r '.created_resources | first')
echo "Inspecting repository version."
echo "Inspecting the content of the repository version"
export FILE_CONTENT_REPO_VERSION_HREF=$(http $BASE_ADDR$REPOVERSION_HREF | jq -r '.content_summary.present["file.file"].href')
echo "Kick off a task to add content 2 to the version containing 1, storing TASK_URL env variable"
export TASK_URL=$(http POST $BASE_ADDR$REPO_HREF'modify/' \
add_content_units:="[\"$CONTENT2_HREF\"]" \
base_version=$REPOVERSION_WITH_1_HREF \
| jq -r '.task')
# Poll the task (here we use a function defined in docs/_scripts/
wait_until_task_finished $BASE_ADDR$TASK_URL
echo "Retrieving REPOVERSION_HREF from task"
export REPOVERSION_HREF=$(http $BASE_ADDR$TASK_URL| jq -r '.created_resources | first')
echo "Inspecting repository version."
echo "Inspecting the content of the repository version"
export FILE_CONTENT_REPO_VERSION_HREF=$(http $BASE_ADDR$REPOVERSION_HREF | jq -r '.content_summary.present["file.file"].href')