Skip to content

[Quality Evaluator] wrong response with all columns joined

With the following payload:

{
  "object_storage_url": "http://91.235.109.231:9000/default/85a6410d-8c9a-48b4-b7f9-7891adf01396/tmpqlgaho2i.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20250403T150341Z&X-Amz-SignedHeaders=host&X-Amz-Credential=minioadmin%2F20250403%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Expires=600&X-Amz-Signature=086cd2d44be599efbfde190b9a12030103225835a91dfcd5d9f7065a4609590b",
  "resource_name": "E-REDES-NetworkConnections-DS.csv",
  "dataset_id": "b743a885-18e5-403e-b5cc-ff7e476430f0",
  "column_types": {
    "Year": "text",
    "Semester": "text",
    "Date": "text",
    "Municipality": "text",
    "Executed Network Connection Requests": "text",
    "CodConcelho": "text"
  },
  "csv_options": {
    "field_delimiter": ";",
    "decimal_delimiter": "."
  },
  "charset": "UTF_8",
  "mime_type": "text/csv"
}

We got the following response:

[
    {
        "column_name": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
        "dataset_id": "b743a885-18e5-403e-b5cc-ff7e476430f0",
        "profiling": [
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "DATASET",
                "dqv_isMeasurementOf": "general.num_rows",
                "dqv_value": "2229",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "DATASET",
                "dqv_isMeasurementOf": "general.num_columns",
                "dqv_value": 1,
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.count",
                "dqv_value": "2229",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.position",
                "dqv_value": "0",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "DATASET",
                "dqv_isMeasurementOf": "general.duplicated_entries",
                "dqv_value": "0",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "DATASET",
                "dqv_isMeasurementOf": "general.duplicated_entries_percent",
                "dqv_value": "0.0",
                "rdf_datatype": "Float"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.completeness",
                "dqv_value": "0",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.completeness_percent",
                "dqv_value": "0.0",
                "rdf_datatype": "Float"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.unique_entries",
                "dqv_value": "2229",
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.unique_entries_percent",
                "dqv_value": "100.0",
                "rdf_datatype": "Float"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.data_types",
                "dqv_value": "String",
                "rdf_datatype": "String"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Index",
                "dqv_isMeasurementOf": "general.memory_usage_bytes",
                "dqv_value": 128,
                "rdf_datatype": "Integer"
            },
            {
                "ddqv_hasParameters": [],
                "dqv_computedOn": "Year,Semester,Date,Municipality,Executed Network Connection Requests,CodConcelho",
                "dqv_isMeasurementOf": "general.memory_usage_bytes",
                "dqv_value": 17832,
                "rdf_datatype": "Integer"
            }
        ],
        "timestamp": "2025-04-03 15:30:07"
    }
]

E-REDES-NetworkConnections-DS.csv

/cc @jpla, @tasosnik

Edited by Antoni Gimeno