Skip to content

Commit 1153e29

Browse files
committed
implement script to populate projects with users
1 parent 14e549e commit 1153e29

File tree

2 files changed

+320
-0
lines changed

2 files changed

+320
-0
lines changed

backend/scripts/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Created by venv; see https://docs.python.org/3/library/venv.html
2+
adjustedreqs.txt
3+
venv/
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "b9b6c5e5-4b20-4407-9542-3bea81ab742e",
6+
"metadata": {},
7+
"source": [
8+
"# Setup\n",
9+
"For dev, you must have the backend api running on your computer. For prod, please change USER_API_URL to reflect the production url."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 393,
15+
"id": "d04b046c-ad92-4f9b-a7d1-c900c1ff4581",
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"import requests\n",
20+
"import json\n",
21+
"import os\n",
22+
"import re\n",
23+
"import pprint as pp\n",
24+
"from dotenv import load_dotenv\n",
25+
"from bson.objectid import ObjectId\n",
26+
"from datetime import datetime\n",
27+
"from functools import reduce\n",
28+
"from pymongo import MongoClient, ReturnDocument, UpdateOne\n",
29+
"from pymongo.errors import BulkWriteError\n",
30+
"\n",
31+
"load_dotenv()\n",
32+
"custom_request_header = os.getenv(\"CUSTOM_REQUEST_HEADER\")\n",
33+
"DATABASE_URL = os.getenv(\"DATABASE_URL\")"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"id": "852bea67-8354-49df-b6fb-c766f305ee8a",
39+
"metadata": {},
40+
"source": [
41+
"# Connect to database and check current list of DBs"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": 395,
47+
"id": "33d48fca-a40d-4619-b97b-46b598258967",
48+
"metadata": {},
49+
"outputs": [
50+
{
51+
"name": "stdout",
52+
"output_type": "stream",
53+
"text": [
54+
"['backup_db', 'testdb', 'vrms-populate-projects-test', 'vrms-slack-dev', 'vrms-slack-main', 'vrms-slack-staging', 'vrms-test', 'vrms-test-2', 'vrms-test-3', 'vrms-test-4', 'vrms-test-5', 'vrms-test-6', 'vrms-test-clone-project-sync', 'vrms-test-copy', 'vrms-test-sync', 'vrms-user-migration-test', 'admin', 'local']\n"
55+
]
56+
}
57+
],
58+
"source": [
59+
"# Connect to MongoDB\n",
60+
"client = MongoClient(DATABASE_URL)\n",
61+
"print(client.list_database_names())"
62+
]
63+
},
64+
{
65+
"cell_type": "markdown",
66+
"id": "0e4d3414-f130-4e76-9506-efd468d401df",
67+
"metadata": {},
68+
"source": [
69+
"# Create a new test database\n",
70+
"\n",
71+
"Define a source and copy for databases\n"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": 396,
77+
"id": "68a7e8a9-e3f3-4231-8424-8b8dd44f522f",
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"db_source = client['vrms-test']\n",
82+
"db_copy = client['vrms-populate-projects-test']"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"id": "6565ea84-e799-40d0-a56b-7859620db461",
88+
"metadata": {},
89+
"source": [
90+
"# Drop all collections in test database (ONLY IF NECESSARY!)\n"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": 405,
96+
"id": "a4cb07f2-3e55-4a2e-8358-96bf67ebf354",
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# for collection_name in db_copy.list_collection_names():\n",
101+
"# db_copy.drop_collection(collection_name)\n",
102+
"# print(f\"Dropped collection: {collection_name}\")"
103+
]
104+
},
105+
{
106+
"cell_type": "markdown",
107+
"id": "141b69ae-a407-4c41-a551-33f547244eb0",
108+
"metadata": {},
109+
"source": [
110+
"# Copy Users and Projects collections from source -> test databases\n"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": 398,
116+
"id": "fd46eb06-d246-455e-8f48-a4e5df0efc9a",
117+
"metadata": {},
118+
"outputs": [],
119+
"source": [
120+
"users_collection = db_source['users']\n",
121+
"users = list(users_collection.find())\n",
122+
"projects_collection = db_source['projects']\n",
123+
"projects = list(projects_collection.find())\n",
124+
"\n",
125+
"users_copy = db_copy['users']\n",
126+
"projects_copy = db_copy['projects']\n",
127+
"\n",
128+
"try:\n",
129+
" users_copy.insert_many(users, ordered=False) # Copy source db users to test db users\n",
130+
" projects_copy.insert_many(projects, ordered=False) # Copy source db projects to test db projects\n",
131+
"except BulkWriteError as bwe:\n",
132+
" print(\"BulkWriteError details:\")\n",
133+
" print(bwe.details) # This contains info on which documents failed and why"
134+
]
135+
},
136+
{
137+
"cell_type": "markdown",
138+
"id": "0c8b8712-7654-4f42-96c2-3809d33d214a",
139+
"metadata": {},
140+
"source": [
141+
"# Get Users with at least one managedProjects\n",
142+
"\n",
143+
"Retrieve a list of all users with at least one managedProject.\n"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": 399,
149+
"id": "d4f52891-72c0-440c-8ef1-0f2102cebdb1",
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"query = {\n",
154+
" \"managedProjects\": { \n",
155+
" \"$exists\": True, \n",
156+
" \"$not\": { \"$size\": 0 } \n",
157+
" }\n",
158+
"}\n",
159+
"\n",
160+
"target_users = list(users_copy.find(query))"
161+
]
162+
},
163+
{
164+
"cell_type": "markdown",
165+
"id": "de61c365-ec09-4acf-b863-221067f988db",
166+
"metadata": {},
167+
"source": [
168+
"# Create an dictionary called `projects_users`\n",
169+
"\n",
170+
"The dict has project IDs as keys and arrays of user IDs as values\n"
171+
]
172+
},
173+
{
174+
"cell_type": "code",
175+
"execution_count": 400,
176+
"id": "dd384405-c9bc-4b00-bb9b-8dcd4be0e9ba",
177+
"metadata": {},
178+
"outputs": [
179+
{
180+
"name": "stdout",
181+
"output_type": "stream",
182+
"text": [
183+
"{'68a3e64ee2653c001fe3ff3b': [ObjectId('6481155fab091f001e30925b'),\n",
184+
" ObjectId('66024c13e6a0050028e07948'),\n",
185+
" ObjectId('670dd397cace6a002abb20ce')],\n",
186+
" '68a3e75ea19d60385b3938f8': [ObjectId('670dd397cace6a002abb20ce')]}\n"
187+
]
188+
}
189+
],
190+
"source": [
191+
"projects_users = {}\n",
192+
"\n",
193+
"# Function to filter only projects with valid mongoose IDs\n",
194+
"def filter_valid_mongoose_ids(id_list):\n",
195+
" return [x for x in id_list if ObjectId.is_valid(x)]\n",
196+
"\n",
197+
"for user in target_users:\n",
198+
" # Destructure id and managed projects from user\n",
199+
" _id, managed_projects = user['_id'], user['managedProjects']\n",
200+
"\n",
201+
" # Filter projects\n",
202+
" filtered_projects = filter_valid_mongoose_ids(managed_projects)\n",
203+
"\n",
204+
" for proj_id in filtered_projects:\n",
205+
" if proj_id in projects_users:\n",
206+
" projects_users[f\"{proj_id}\"].append(_id)\n",
207+
" else:\n",
208+
" projects_users[f\"{proj_id}\"] = [_id]\n",
209+
"\n",
210+
"pp.pprint(projects_users)"
211+
]
212+
},
213+
{
214+
"cell_type": "markdown",
215+
"id": "a34d198a-ce32-41af-b4e2-2be590a6f5a6",
216+
"metadata": {},
217+
"source": [
218+
"# Update `managedByUsers` field in Projects \n",
219+
"\n",
220+
"Update all project's `managedByUsers` array using bulk write"
221+
]
222+
},
223+
{
224+
"cell_type": "code",
225+
"execution_count": 404,
226+
"id": "f280d029-47ed-46ef-a8d1-731071600a49",
227+
"metadata": {},
228+
"outputs": [
229+
{
230+
"name": "stdout",
231+
"output_type": "stream",
232+
"text": [
233+
"Project before update:\n",
234+
"{'__v': 0,\n",
235+
" '_id': ObjectId('68a3e64ee2653c001fe3ff3b'),\n",
236+
" 'createdDate': datetime.datetime(2025, 8, 19, 2, 49, 50, 843000),\n",
237+
" 'description': 'Testing...',\n",
238+
" 'githubIdentifier': 'lkjlkj',\n",
239+
" 'githubUrl': 'lkjlk',\n",
240+
" 'googleDriveUrl': 'https://drive.google.com/drive/folders/1hAq0wyZKOaZLujqOYiaFv5PYgooISger?usp=drive_link',\n",
241+
" 'hflaWebsiteUrl': 'lkjlkj',\n",
242+
" 'managedByUsers': [ObjectId('6481155fab091f001e30925b'),\n",
243+
" ObjectId('66024c13e6a0050028e07948'),\n",
244+
" ObjectId('670dd397cace6a002abb20ce')],\n",
245+
" 'name': 'Jacks Test Project',\n",
246+
" 'partners': [],\n",
247+
" 'projectStatus': 'Active',\n",
248+
" 'recruitingCategories': [],\n",
249+
" 'slackUrl': 'lkjlkj'}\n",
250+
"Project before update:\n",
251+
"{'__v': 0,\n",
252+
" '_id': ObjectId('68a3e75ea19d60385b3938f8'),\n",
253+
" 'createdDate': datetime.datetime(2025, 8, 19, 2, 54, 22, 871000),\n",
254+
" 'description': 'afk',\n",
255+
" 'githubIdentifier': 'afk',\n",
256+
" 'githubUrl': 'afk',\n",
257+
" 'googleDriveUrl': 'https://drive.google.com/test',\n",
258+
" 'hflaWebsiteUrl': 'afk',\n",
259+
" 'managedByUsers': [ObjectId('670dd397cace6a002abb20ce')],\n",
260+
" 'name': 'VRMS Test Project',\n",
261+
" 'partners': [],\n",
262+
" 'projectStatus': 'Active',\n",
263+
" 'recruitingCategories': [],\n",
264+
" 'slackUrl': 'afk'}\n",
265+
"Result: BulkWriteResult({'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 2, 'nModified': 0, 'nRemoved': 0, 'upserted': []}, acknowledged=True)\n"
266+
]
267+
}
268+
],
269+
"source": [
270+
"operations = []\n",
271+
"\n",
272+
"for proj_id, user_ids in projects_users.items():\n",
273+
" valid_user_ids = [uid for uid in user_ids if ObjectId.is_valid(uid)] \n",
274+
"\n",
275+
" proj = projects_copy.find_one({\"_id\": ObjectId(proj_id)})\n",
276+
"\n",
277+
" if proj:\n",
278+
" print('Project before update:')\n",
279+
" pp.pprint(proj)\n",
280+
" \n",
281+
" # Compile individual updates in operations \n",
282+
" operations.append(UpdateOne(\n",
283+
" {\"_id\": ObjectId(proj_id)}, # Filter\n",
284+
" {\"$set\": {\"managedByUsers\": valid_user_ids}}, # Update\n",
285+
" ))\n",
286+
" else:\n",
287+
" print(f\"No project with {proj_id} found\")\n",
288+
"\n",
289+
"# Execute the bulk write to update operations\n",
290+
"result = projects_copy.bulk_write(operations)\n",
291+
"\n",
292+
"print(f\"Result: \", result)"
293+
]
294+
}
295+
],
296+
"metadata": {
297+
"kernelspec": {
298+
"display_name": "Python 3 (ipykernel)",
299+
"language": "python",
300+
"name": "python3"
301+
},
302+
"language_info": {
303+
"codemirror_mode": {
304+
"name": "ipython",
305+
"version": 3
306+
},
307+
"file_extension": ".py",
308+
"mimetype": "text/x-python",
309+
"name": "python",
310+
"nbconvert_exporter": "python",
311+
"pygments_lexer": "ipython3",
312+
"version": "3.8.10"
313+
}
314+
},
315+
"nbformat": 4,
316+
"nbformat_minor": 5
317+
}

0 commit comments

Comments
 (0)