66
77from django .db .models import Prefetch
88from django .db .models .query import QuerySet
9+ from pulpcore .app .models import Artifact , Domain
910from pulp_python .app .models import PythonPackageContent , PythonRepository
1011from pulp_python .app .utils import (
12+ artifact_to_metadata_artifact ,
1113 artifact_to_python_content_data ,
1214 fetch_json_release_metadata ,
1315 parse_metadata ,
@@ -41,16 +43,25 @@ def repair(repository_pk: UUID) -> None:
4143 content_set = repository .latest_version ().content .values_list ("pk" , flat = True )
4244 content = PythonPackageContent .objects .filter (pk__in = content_set )
4345
44- num_repaired , pkgs_not_repaired = repair_metadata (content )
46+ num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired = (
47+ repair_metadata (content )
48+ )
49+ # Convert set() to 0
50+ if not pkgs_not_repaired :
51+ pkgs_not_repaired = 0
52+ if not pkgs_metadata_not_repaired :
53+ pkgs_metadata_not_repaired = 0
54+
4555 log .info (
4656 _ (
4757 "{} packages' metadata repaired. Not repaired packages due to either "
48- "inaccessible URL or mismatched sha256: {}."
49- ).format (num_repaired , pkgs_not_repaired )
58+ "inaccessible URL or mismatched sha256: {}. "
59+ "{} metadata files repaired. Packages whose metadata files could not be repaired: {}."
60+ ).format (num_repaired , pkgs_not_repaired , num_metadata_repaired , pkgs_metadata_not_repaired )
5061 )
5162
5263
53- def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ]]:
64+ def repair_metadata (content : QuerySet [PythonPackageContent ]) -> tuple [int , set [str ], int , set [ str ] ]:
5465 """
5566 Repairs metadata for a queryset of PythonPackageContent objects
5667 and updates the progress report.
@@ -59,9 +70,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
5970 content (QuerySet[PythonPackageContent]): The queryset of items to repair.
6071
6172 Returns:
62- tuple[int, set[str]]: A tuple containing:
73+ tuple[int, set[str], int, set[str] ]: A tuple containing:
6374 - The number of packages that were repaired.
6475 - A set of packages' PKs that were not repaired.
76+ - The number of metadata files that were repaired.
77+ - A set of packages' PKs without repaired metadata artifacts.
6578 """
6679 immediate_content = (
6780 content .filter (contentartifact__artifact__isnull = False )
@@ -87,6 +100,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
87100 # Keep track of on-demand packages that were not repaired
88101 pkgs_not_repaired = set ()
89102
103+ # Metadata artifacts and content artifacts
104+ metadata_batch = []
105+ total_metadata_repaired = 0
106+ pkgs_metadata_not_repaired = set ()
107+
90108 progress_report = ProgressReport (
91109 message = "Repairing packages' metadata" ,
92110 code = "repair.metadata" ,
@@ -102,6 +120,14 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
102120 .artifact
103121 )
104122 new_data = artifact_to_python_content_data (package .filename , main_artifact , domain )
123+ total_metadata_repaired += update_metadata_artifact_if_needed (
124+ package ,
125+ new_data .get ("metadata_sha256" ),
126+ main_artifact ,
127+ domain ,
128+ metadata_batch ,
129+ pkgs_metadata_not_repaired ,
130+ )
105131 total_repaired += update_package_if_needed (
106132 package , new_data , batch , set_of_update_fields
107133 )
@@ -163,7 +189,12 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
163189 total_repaired += len (batch )
164190 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
165191
166- return total_repaired , pkgs_not_repaired
192+ if metadata_batch :
193+ not_repaired = _process_metadata_batch (metadata_batch )
194+ pkgs_metadata_not_repaired .update (not_repaired )
195+ total_metadata_repaired += len (metadata_batch ) - len (not_repaired )
196+
197+ return total_repaired , pkgs_not_repaired , total_metadata_repaired , pkgs_metadata_not_repaired
167198
168199
169200def update_package_if_needed (
@@ -202,3 +233,95 @@ def update_package_if_needed(
202233 set_of_update_fields .clear ()
203234
204235 return total_repaired
236+
237+
238+ def update_metadata_artifact_if_needed (
239+ package : PythonPackageContent ,
240+ new_metadata_sha256 : str | None ,
241+ main_artifact : Artifact ,
242+ domain : Domain ,
243+ metadata_batch : list [tuple ],
244+ pkgs_metadata_not_repaired : set [str ],
245+ ) -> int :
246+ """
247+ Repairs metadata artifacts for wheel packages by creating missing metadata artifacts
248+ or updating existing ones when the metadata_sha256 differs. Only processes wheel files
249+ that have a valid new_metadata_sha256. Queues operations for batch processing.
250+
251+ Args:
252+ package: Package to check for metadata changes.
253+ new_metadata_sha256: The correct metadata_sha256 extracted from the main artifact, or None.
254+ main_artifact: The main package artifact used to generate metadata.
255+ domain: The domain in which the metadata artifact will be created.
256+ metadata_batch: List of tuples for batch processing (updated in-place).
257+ pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
258+
259+ Returns:
260+ Number of repaired metadata artifacts (only when batch is flushed at BULK_SIZE).
261+ """
262+ total_metadata_repaired = 0
263+
264+ if not package .filename .endswith (".whl" ) or not new_metadata_sha256 :
265+ return total_metadata_repaired
266+
267+ original_metadata_sha256 = package .metadata_sha256
268+ cas = package .contentartifact_set .filter (relative_path__endswith = ".metadata" )
269+
270+ # Create missing
271+ if not cas :
272+ metadata_batch .append (("create" , package , main_artifact , None , domain ))
273+ # Fix existing
274+ elif new_metadata_sha256 != original_metadata_sha256 :
275+ ca = cas .first ()
276+ metadata_artifact = ca .artifact
277+ if metadata_artifact is None or (metadata_artifact .sha256 != new_metadata_sha256 ):
278+ metadata_batch .append (("update" , package , main_artifact , ca , domain ))
279+
280+ if len (metadata_batch ) == BULK_SIZE :
281+ not_repaired = _process_metadata_batch (metadata_batch )
282+ pkgs_metadata_not_repaired .update (not_repaired )
283+ total_metadata_repaired += BULK_SIZE - len (not_repaired )
284+ metadata_batch .clear ()
285+
286+ return total_metadata_repaired
287+
288+
289+ def _process_metadata_batch (metadata_batch : list [tuple ]) -> set [str ]:
290+ """
291+ Processes a batch of metadata repair operations by creating metadata artifacts
292+ and their corresponding ContentArtifacts.
293+
294+ Args:
295+ metadata_batch: List of (action, package, main_artifact, content_artifact, domain) tuples.
296+
297+ Returns:
298+ Set of package PKs for which metadata artifacts could not be created.
299+ """
300+ not_repaired = set ()
301+ content_artifacts_to_create = []
302+ content_artifacts_to_update = []
303+
304+ for action , package , main_artifact , content_artifact , domain in metadata_batch :
305+ metadata_artifact = artifact_to_metadata_artifact (package .filename , main_artifact )
306+ if metadata_artifact :
307+ metadata_artifact .pulp_domain = domain
308+ metadata_artifact .save ()
309+ if action == "create" :
310+ ca = ContentArtifact (
311+ artifact = metadata_artifact ,
312+ content = package ,
313+ relative_path = f"{ package .filename } .metadata" ,
314+ )
315+ content_artifacts_to_create .append (ca )
316+ elif action == "update" :
317+ content_artifact .artifact = metadata_artifact
318+ content_artifacts_to_update .append (content_artifact )
319+ else :
320+ not_repaired .add (package .pk )
321+
322+ if content_artifacts_to_create :
323+ ContentArtifact .objects .bulk_create (content_artifacts_to_create )
324+ if content_artifacts_to_update :
325+ ContentArtifact .objects .bulk_update (content_artifacts_to_update , ["artifact" ])
326+
327+ return not_repaired
0 commit comments