Commit 9b2a6046 authored by Boris Baldassari's avatar Boris Baldassari
Browse files

Fix clearlydefined script to prevent duplicates in curations file.

parent 0c266016
......@@ -16,6 +16,10 @@ import json
import requests
import getopt
import sys
import yaml
from datetime import datetime
from os.path import isfile
# Parse command line arguments
......@@ -70,22 +74,41 @@ for package in packages:
licenses[pkg] = answer['licensed']['declared']
urls[pkg] = '' + coordinates
print("Not found on clearlydefined.")
print("No declared license found on clearlydefined.")
print("Not found on clearlydefined.")
print("Not found on clearlydefined (status_code).")
print("Package type not recognised.")
print(f"\nFound {len(licenses)} curations.")
# Now write the yaml curation file
curations = []
for p in licenses:
f'- id: "{p}"\n'
f' curations:\n'
f' comment: "Curated from clearlydefined, see {urls[p]}"\n'
f' concluded_license: "{licenses[p]}"'))
print(f"Write curation file to {file_out}.")
# Now update the yaml curations file
curations_in = []
ids = []
if isfile(file_out):
print(f"Read curations from {file_out}.")
with open(file_out) as f:
curations_in = yaml.load(f, Loader=yaml.loader.SafeLoader)
ids = [ a['id'] for a in curations_in ]
now =
for pkg in licenses:
if pkg not in ids:
new = {}
new['id'] = pkg
new['curations'] = {
'comment': f'Curated from clearlydefined on {now}, see {urls[pkg]}',
'concluded_license': f'{licenses[pkg]}'
print(f"Write curations to {file_out}.")
with open(file_out, 'w') as f:
yaml.dump(curations_in, f, sort_keys=False) #, default_flow_style=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment