Skip to content
Snippets Groups Projects

a docker-compose to build two containers to extract, transform and insert into...

Merged Francisco Perez requested to merge otterdog_database into main
20 files
+ 822
0
Compare changes
  • Side-by-side
  • Inline
Files
20
+ 68
0
import pymongo
import json
import urllib.parse
import os
USERNAME = os.environ["MONGODB_CLOUD_USERNAME"]
PASSWD = os.environ["MONGODB_CLOUD_PASSWORD"]
DB = os.environ["MONGODB_CLOUD_DB"]
COLLECTION = os.environ["MONGODB_CLOUD_COLLECTION"]
DATA_PATH=os.environ["DATA_PATH"]
### TO-DO
## Adding a environment file to populate variables above
## Adding this script to etl script
def insert_json_file(json_file_path):
# Load JSON data from file
with open(json_file_path, "r") as json_file:
json_data = json.load(json_file)
# Escape the username and password using quote_plus
username = urllib.parse.quote_plus(USERNAME)
password = urllib.parse.quote_plus(PASSWD)
# This a specific connection to the current free MongoDB instance
uri = f"mongodb+srv://{username}:{password}@cluster0.t3vfep0.mongodb.net/?retryWrites=true&w=majority"
# Connect to MongoDB
client = pymongo.MongoClient(uri)
db = client[DB]
collection = db[COLLECTION]
if client:
print(f"Connected to server: {uri}")
# Insert the JSON data into the collection
query = {"github_id": json_data["github_id"]}
existing_document = collection.find_one(query)
if existing_document:
# Update the existing document
collection.update_one(query, {"$set": json_data})
print("Document updated successfully with ID:", existing_document["_id"])
else:
# Insert the new document
insert_result = collection.insert_one(json_data)
# Check if the insertion was successful
if insert_result.inserted_id:
print("Document inserted successfully with ID:", insert_result.inserted_id)
else:
print("Failed to insert document")
# Close the MongoDB connection
client.close()
def main():
for filename in os.listdir(DATA_PATH):
if filename.endswith(".json"):
file_path = os.path.join(DATA_PATH,filename)
insert_json_file(file_path)
print(f"Processed file: {filename}")
if __name__ == "__main__":
main()
\ No newline at end of file
Loading