add code for checking duplicates and a function to create a box of given size

2162543b · Ankit Izardar · 0b7f4576 · 2162543b · 0b7f4576
Commit 2162543b authored 5 months ago by Ankit Izardar
--- a/Sample_Manager/Script.ipynb
+++ b/Sample_Manager/Script.ipynb
@@ -293,6 +293,32 @@
    "### By default: Box sizes of 10x10 are created! Please make sure to check the Box size with the user and correct them in the UI after boxes are created before populating with Samples!!!"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "e87c18f0",
+   "metadata": {},
+   "source": [
+    "### First check for duplicate locations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d9568dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get a list of duplicates for Location column in df\n",
+    "\n",
+    "duplicates = df[df.duplicated(['Location'], keep=False)]\n",
+    "\n",
+    "# print duplicates locations\n",
+    "if not duplicates.empty:\n",
+    "    print(\"Duplicates found in 'Location' column:\")\n",
+    "    for index, row in duplicates.iterrows():\n",
+    "        print(f\"Row {index}: {row['Location']}\")"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 7,
@@ -353,13 +379,16 @@
    "# ### Create storage heierarchy in LabKey\n",
    "\n",
    "# Create Unit Type Box\n",
+    "\n",
+    "def create_storage_unit_type(api, unit_type_name, description, cols, rows):\n",
    "    try:\n",
-    "    api.storage.delete_storage_item('Storage Unit Type', {'name': 'Box 10x10'})\n",
+    "        result = api.storage.create_storage_item('Storage Unit Type', {'name': unit_type_name, 'description': description,'UnitType': 'Box', 'cols': cols, 'rows': rows})\n",
-    "except:\n",
+    "        return result['data']['rowId']\n",
-    "    pass\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error creating storage unit type {unit_type_name}: {e}\")\n",
+    "        return None\n",
    "\n",
-    "result = api.storage.create_storage_item('Storage Unit Type', {'name': 'Box 10x10', 'description': 'Box 10x10', 'UnitType': 'Box', 'cols': 10, 'rows': 10})\n",
+    "box_type_id_10_10 = create_storage_unit_type(api, 'Box 10x10', '10x10 Box for storing samples', 10, 10)\n",
-    "box_type_id = result['data']['rowId']\n",
    "\n",
    "df['box_id'] = ''\n",
    "# Physical Locations\n",
@@ -398,7 +427,7 @@
    "                    rack_id = result_rack['data']['rowId']\n",
    "                    rack_df = shelf_df.loc[shelf_df['Rack'] == rack]\n",
    "                    for box in rack_df['Box'].dropna().unique():\n",
-    "                        result_box = api.storage.create_storage_item('Terminal Storage Location', {'name': box, 'typeId': box_type_id, 'locationId': rack_id})\n",
+    "                        result_box = api.storage.create_storage_item('Terminal Storage Location', {'name': box, 'typeId': box_type_id_10_10, 'locationId': rack_id})\n",
    "                        box_id = result_box['data']['rowId']\n",
    "                        box_df = rack_df.loc[rack_df['Box'] == box]\n",
    "                        df.loc[box_df.index, 'box_id'] = box_id\n",


 %% Cell type:markdown id:f154826c tags:
 ## Import Libraries
 %% Cell type:code id:e650569d tags:
 ``` python
 import labkey
 from labkey.api_wrapper import APIWrapper
 import pandas as pd
 import json
 import urllib3
 import urllib
 import os
 ```
 %% Cell type:markdown id:df672b72 tags:
 ## Project Configuration and LabKey API Initialization
 %% Cell type:code id:2f599db6 tags:
 ``` python
 # Define project and LabKey server details
 PROJECT = 'sciCORE-dev/Ankit/Sample_Manager_Test_Dataset'
 LABKEY_SERVER = "labkey-pro-dev.scicore.unibas.ch"
 CONTEXT_PATH = '' # Use 'labkey' for main server
 # Initialize LabKey API Wrapper
 api = APIWrapper(LABKEY_SERVER, PROJECT, CONTEXT_PATH, use_ssl=True)
 ```
 %% Cell type:markdown id:3def1a57 tags:
 ## Authentication Setup
 %% Cell type:code id:78ec9c0a tags:
 ``` python
 # Path to .netrc file for authentication
 NETRC_FILE = os.path.join(os.path.expanduser('~'), '.netrc')
 # Verify and read .netrc file
 if not os.path.isfile(NETRC_FILE):
    raise FileNotFoundError(f'.netrc file not found: {NETRC_FILE}')
 # Extract login credentials
 netrc_df = pd.read_csv(NETRC_FILE, sep=' ', header=None, names=['key', 'value'])
 login = netrc_df.loc[netrc_df['key'] == 'login', 'value'].iloc[0]
 password = netrc_df.loc[netrc_df['key'] == 'password', 'value'].iloc[0]
 # Authentication headers
 headers = urllib3.util.make_headers(basic_auth=f'{login}:{password}')
 ```
 %% Cell type:markdown id:f5f58e20 tags:
 ## Verify Project Directory
 %% Cell type:code id:cef2e4e8 tags:
 ``` python
 params = {"includeSubfolders": True, "depth": 1}
 url = api.server_context.build_url("project", "getContainers.view", container_path=PROJECT.replace(' ', '%20'))
 resp = api.server_context.make_request(url, urllib.parse.urlencode(params).encode(), headers=headers, non_json_response=True)
 if resp.status_code == 404:
    raise Exception(f'Project not found: {PROJECT}. Please create it first.')
 ```
 %% Cell type:markdown id:9203b282 tags:
 ## Create and Populate Source Type 'Study'
 %% Cell type:code id:0d422897 tags:
 ``` python
 # Define the source Excel file for study data
 SOURCE_STUDY = 'Study.xlsx'
 # Read data from the Excel file
 try:
    df = pd.read_excel(SOURCE_STUDY)
 except Exception as e:
    print(f'Error reading Excel file {SOURCE_STUDY}: {e}')
    exit(1)
 # Extract column names
 columns = df.columns[1:].tolist()
 # Define LabKey fields for the DataClass
 labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
 labkey_fields = [{"name": col, "rangeURI": "string"} for col in columns]
 # Define DataClass domain
 study_domain_definition = {
    "kind": "DataClass",
    "domainDesign": {
        "name": "Study",
        "fields": labkey_fields
    },
    "options": {
        "category": "sources"
    }
 }
 # Create the DataClass domain in LabKey
 try:
    created_dataclass_domain = api.domain.create(study_domain_definition)
    print("Success: Domain created for sample source: Study")
 except Exception as e:
    print(f'Error creating domain: {e}')
    exit(1)
 # Insert data into the DataClass 'Study'
 sources_rows = []
 # Add Samples of the Sample Type
 for i, row in df.iterrows():
    sources_rows.append(row[columns].fillna('').to_dict())
    sources_rows[-1]['Name'] = row['SourceID']
 # Insert data into the DataClass 'Study'
 try:
    insert_result = api.query.insert_rows("exp.data", "Study", sources_rows)
    print("Success: Data inserted into the DataClass: Study")
 except Exception as e:
    print(f'Error inserting data: {e}')
    exit(1)
 ```
 %% Output
    /Users/izarda0000/miniconda3/lib/python3.12/site-packages/openpyxl/styles/stylesheet.py:237: UserWarning: Workbook contains no default style, apply openpyxl's default
      warn("Workbook contains no default style, apply openpyxl's default")
    Success: Domain created for sample source: Study
    Success: Data inserted into the DataClass: Study
 %% Cell type:markdown id:cee50632 tags:
 ## Create and Populate Source Type 'Patient' (Linked to Study)
 %% Cell type:code id:dfb86ebf tags:
 ``` python
 # Define the source Excel file for study data
 SOURCE_PATIENT = 'Patient.xlsx'
 # Read data from the Excel file
 try:
    df = pd.read_excel(SOURCE_PATIENT)
 except Exception as e:
    print(f'Error reading Excel file {SOURCE_PATIENT}: {e}')
    exit(1)
 # Extract column names except for the last column SourceStudy and SourceID
 columns = df.columns.tolist()[1:-1]
 # Define LabKey fields for the DataClass
 labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
 labkey_fields = [{"name": col, "rangeURI": "string"} for col in columns]
 # Define DataClass domain
 patient_domain_definition = {
    "kind": "DataClass",
    "domainDesign": {
        "name": "Patient",
        "fields": labkey_fields
    },
    "options": {
        "category": "sources",
        "name": "Patient",
        "importAliases": {
            "SourceStudy": {
                "inputType": "dataInputs/Study"
            }
        }
    }
 }
 # Create the DataClass domain in LabKey
 try:
    created_dataclass_domain = api.domain.create(patient_domain_definition)
    print("Success: Domain created for sample source: Patient")
 except Exception as e:
    print(f'Error creating domain: {e}')
    exit(1)
 # Insert data into the DataClass 'Patient'
 sources_rows = []
 # Add Source of the Source Type 'Patient'. Also now include 'SourceStudy' column to add lineage
 for i, row in df.iterrows():
    sources_rows.append(row[columns + ['SourceStudy']].fillna('').to_dict())
    sources_rows[-1]['Name'] = row['SourceID']
 # Insert data into the DataClass 'Study'
 try:
    insert_result = api.query.insert_rows("exp.data", "Patient", sources_rows)
    print("Success: Data inserted into the DataClass: Patient")
 except Exception as e:
    print(f'Error inserting data: {e}')
    exit(1)
 ```
 %% Output
    Success: Domain created for sample source: Patient
    Success: Data inserted into the DataClass: Patient
 %% Cell type:markdown id:36d27f01 tags:
 ## Process Samples and Create Storage Hierarchy
 ### By default: Box sizes of 10x10 are created! Please make sure to check the Box size with the user and correct them in the UI after boxes are created before populating with Samples!!!
+%% Cell type:markdown id:e87c18f0 tags:
+### First check for duplicate locations
+%% Cell type:code id:8d9568dd tags:
+``` python
+# Get a list of duplicates for Location column in df
+duplicates = df[df.duplicated(['Location'], keep=False)]
+# print duplicates locations
+if not duplicates.empty:
+    print("Duplicates found in 'Location' column:")
+    for index, row in duplicates.iterrows():
+        print(f"Row {index}: {row['Location']}")
+```
 %% Cell type:code id:8a5bf323 tags:
 ``` python
 SOURCE_SAMPLES = 'Samples.xlsx'
 # Read data from the Excel file
 try:
    df = pd.read_excel(SOURCE_SAMPLES)
 except Exception as e:
    print(f'Error reading Excel file {SOURCE_SAMPLES}: {e}')
    exit(1)
 # Sample ID is a reserved field in LabKey, so we need to rename it to SampleIdentifier.
 df.rename(columns={'Sample ID': 'SampleIdentifier'}, inplace=True)
 # Get columns for samples table but do not include the 'SourcePatient'. It is only used for mapping to the Patient DataClass
 # Also exclude Sample ID column. It will be renamed to "Name" column.
 columns = df.columns[1:-1].tolist()
 columns
 # ### Get Building, Floor, Freezer, Shelf, Rack, Box, and Coordinates from Location
 # Note: Change the heirarchy according to the data in excel file
 df['Building'] = df['Location'].str.split('/').str[0]
 df['Floor'] = df['Location'].str.split('/').str[1]
 df['Freezer'] = df['Location'].str.split('/').str[2]
 df['Freezer full'] = df['Building'] + '/' + df['Floor'] + '/' + df['Freezer']
 df['Shelf'] = df['Location'].str.split('/').str[3]
 df['Rack'] = df['Location'].str.split('/').str[4]
 df['Box'] = df['Location'].str.split('/').str[5] + '/' + df['Location'].str.split('/').str[6]
 df['Coordinates'] = df['Box'].str.split(':').str[-1]
 df['Box'] = df['Box'].str.split(':').str[0]
 # Convert the 'Coordinates' column to numeric, invalid parsing will be set as NaN
 df['StorageCol'] = pd.to_numeric(df['Coordinates'].str.split('/').str[-1])
 df['StorageCol'] = None
 df['StorageRow'] = None
 df.loc[~df['Coordinates'].isna(), 'StorageCol'] = df.loc[~df['Coordinates'].isna(), 'Coordinates'].str.split('/').str[-1].astype(int)
 df.loc[~df['Coordinates'].isna(), 'StorageRow'] = df.loc[~df['Coordinates'].isna(), 'Coordinates'].str.split('/').str[0]
 # ### Create storage heierarchy in LabKey
 # Create Unit Type Box
-try:
-    api.storage.delete_storage_item('Storage Unit Type', {'name': 'Box 10x10'})
-except:
-    pass
-result = api.storage.create_storage_item('Storage Unit Type', {'name': 'Box 10x10', 'description': 'Box 10x10', 'UnitType': 'Box', 'cols': 10, 'rows': 10})
+def create_storage_unit_type(api, unit_type_name, description, cols, rows):
-box_type_id = result['data']['rowId']
+    try:
+        result = api.storage.create_storage_item('Storage Unit Type', {'name': unit_type_name, 'description': description,'UnitType': 'Box', 'cols': cols, 'rows': rows})
+        return result['data']['rowId']
+    except Exception as e:
+        print(f"Error creating storage unit type {unit_type_name}: {e}")
+        return None
+box_type_id_10_10 = create_storage_unit_type(api, 'Box 10x10', '10x10 Box for storing samples', 10, 10)
 df['box_id'] = ''
 # Physical Locations
 for building in df['Building'].dropna().unique():
    # 'Physical Location' ->
    result_building = api.storage.create_storage_item("Physical Location", {"name": building, "description": "Building"})
    building_id = result_building['data']['rowId']
    building_df = df.loc[df['Building'] == building]
    for floor in building_df['Floor'].dropna().unique():
        result_floor = api.storage.create_storage_item('Physical Location', {'name': floor, 'description': 'Floor', 'locationId': building_id})
        floor_id = result_floor['data']['rowId']
        floor_df = building_df.loc[building_df['Floor'] == floor]
        # Labkey Terminology = Freezer. Our Dataset = Freezer / Liquid Nitrogen Storage
        for freezer in floor_df['Freezer'].dropna().unique()[:1]:
            freezer_full = '{}-{}-{}'.format(building, floor, freezer)
            freezer_desc = 'Liquid Nitrogen Room'
            if 'Freezer' in freezer:
                freezer_desc = 'Freezer'
            result_freezer = api.storage.create_storage_item('Freezer', {'name': freezer_full, 'description': freezer_desc, 'locationId': floor_id})
            freezer_id = result_freezer['data']['rowId']
            freezer_df = floor_df.loc[floor_df['Freezer'] == freezer]
            # Labkey Terminologoy = Shelf. Our Dataset = Shelf / Tank
            for shelf in freezer_df['Shelf'].dropna().unique():
                shelf_desc = 'Shelf'
                if 'tank' in shelf.lower():
                    shelf_desc = 'Tank'
                result_shelf = api.storage.create_storage_item('Shelf', {'name': shelf, 'description': shelf_desc, 'locationId': freezer_id})
                shelf_id = result_shelf['data']['rowId']
                shelf_df = freezer_df.loc[freezer_df['Shelf'] == shelf]
                # Labkey Terminology = Rack. Our Dataset = Rack / Tower
                for rack in shelf_df['Rack'].dropna().unique():
                    rack_desc = 'Rack'
                    if 'tower' in rack.lower():
                        rack_desc = 'Tower'
                    result_rack = api.storage.create_storage_item('Rack', {'name': rack, 'description': rack_desc, 'locationId': shelf_id})
                    rack_id = result_rack['data']['rowId']
                    rack_df = shelf_df.loc[shelf_df['Rack'] == rack]
                    for box in rack_df['Box'].dropna().unique():
-                        result_box = api.storage.create_storage_item('Terminal Storage Location', {'name': box, 'typeId': box_type_id, 'locationId': rack_id})
+                        result_box = api.storage.create_storage_item('Terminal Storage Location', {'name': box, 'typeId': box_type_id_10_10, 'locationId': rack_id})
                        box_id = result_box['data']['rowId']
                        box_df = rack_df.loc[rack_df['Box'] == box]
                        df.loc[box_df.index, 'box_id'] = box_id
                        df.loc[box_df.index, 'StorageLocation'] = '{}/{}/{}/{}'.format(freezer_full, shelf, rack, box)
                        print('Created box: {}'.format(box_id))
 ```
 %% Output
    Created box: 6522
    Created box: 6523
 %% Cell type:markdown id:530f62a9 tags:
 ## Create Sample Types and Insert Samples into LabKey
 %% Cell type:code id:105001ab tags:
 ``` python
 sample_types = df['SampleType'].unique().tolist()
 print(sample_types)
 # Loop over each sample type and create the domain
 for sample_type in sample_types:
    labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
    for col in columns:
        rangeURI = 'string'
        labkey_fields.append({'name': col, 'rangeURI': rangeURI})
    sample_params = {
        'kind': 'SampleSet',
        'domainDesign': {
            'name': sample_type,
            'fields': labkey_fields,
            "domainKindName": "SampleSet",
        },
        "options": {
        "name": sample_type,
        "nameExpression": "S-${genId}",
        "aliquotNameExpression": "${${AliquotedFrom}-:withCounter}",
        "importAliases": {
            "SourcePatient": {
                "inputType": "dataInputs/Patient"
            }
        }
    }
    }
    # Create domain using API call
    sample_domain = api.domain.create(sample_params)
    print(f"Domain created for sample type: {sample_type}")
 # Step 1: Prepare Sample Rows
 sample_rows = []
 # Add Samples of the Sample Type
 for i, row in df.iterrows():
    sample_rows.append(row[columns + ['StorageLocation', 'StorageRow', 'StorageCol', 'SourcePatient']].fillna('').to_dict())
    sample_rows[-1]['Name'] = row['SampleIdentifier']
    if not row['StorageLocation'] or not row['StorageRow'] or not row['StorageCol']:
        del sample_rows[-1]['StorageLocation']
        del sample_rows[-1]['StorageRow']
        del sample_rows[-1]['StorageCol']
 # Step 2: Insert Rows
 for sample_type in sample_types:
    # Filter rows that match the current sample type
    filtered_rows = [row for row in sample_rows if row.get("SampleType") == sample_type]
    # Remove 'Type' column from each row before inserting
    rows_to_insert = [{k: v for k, v in row.items() if k != "SampleType"} for row in filtered_rows]
    # Insert rows using the API
    if rows_to_insert:
        api.query.insert_rows("samples", sample_type, rows_to_insert)
        print(f"Inserted {len(rows_to_insert)} rows into {sample_type} domain.")
 ```
 %% Output
    ['Blood', 'Saliva']
    Domain created for sample type: Blood
    Domain created for sample type: Saliva
    Inserted 40 rows into Blood domain.
    Inserted 15 rows into Saliva domain.
 %% Cell type:markdown id:b525f127 tags:
 ## Optional Cleanup (Commented)
 %% Cell type:code id:a92c91e9 tags:
 ``` python
 # Delete all sample types and their data
 '''
 for sample_type in sample_types:
    drop_response = api.domain.drop("samples", sample_type)
    if "success" in drop_response:
        print("The dataset domain was deleted.")'
 '''
 ```
 %% Output
    '\nfor sample_type in sample_types:\n\n    drop_response = api.domain.drop("samples", sample_type)\n    if "success" in drop_response:\n        print("The dataset domain was deleted.")\'\n'


--- a/Sample_Manager/Script.py
+++ b/Sample_Manager/Script.py
-# %% [markdown]
-# ## Import Libraries
-# %%
-import labkey
-from labkey.api_wrapper import APIWrapper
-import pandas as pd
-import json
-import urllib3
-import urllib
-import os
-# %% [markdown]
-# ## Project Configuration and LabKey API Initialization
-# %%
-# Define project and LabKey server details
-PROJECT = 'sciCORE-dev/Ankit/Sample_Manager_Test_Dataset'
-LABKEY_SERVER = "labkey-pro-dev.scicore.unibas.ch"
-CONTEXT_PATH = '' # Use 'labkey' for main server
-# Initialize LabKey API Wrapper
-api = APIWrapper(LABKEY_SERVER, PROJECT, CONTEXT_PATH, use_ssl=True)
-# %% [markdown]
-# ## Authentication Setup
-# %%
-# Path to .netrc file for authentication
-NETRC_FILE = os.path.join(os.path.expanduser('~'), '.netrc')
-# Verify and read .netrc file
-if not os.path.isfile(NETRC_FILE):
-    raise FileNotFoundError(f'.netrc file not found: {NETRC_FILE}')
-# Extract login credentials
-netrc_df = pd.read_csv(NETRC_FILE, sep=' ', header=None, names=['key', 'value'])
-login = netrc_df.loc[netrc_df['key'] == 'login', 'value'].iloc[0]
-password = netrc_df.loc[netrc_df['key'] == 'password', 'value'].iloc[0]
-# Authentication headers
-headers = urllib3.util.make_headers(basic_auth=f'{login}:{password}')
-# %% [markdown]
-# ## Verify Project Directory
-# %%
-params = {"includeSubfolders": True, "depth": 1}
-url = api.server_context.build_url("project", "getContainers.view", container_path=PROJECT.replace(' ', '%20'))
-resp = api.server_context.make_request(url, urllib.parse.urlencode(params).encode(), headers=headers, non_json_response=True)
-if resp.status_code == 404:
-    raise Exception(f'Project not found: {PROJECT}. Please create it first.')
-# %% [markdown]
-# ## Create and Populate Source Type 'Study'
-# %%
-# Define the source Excel file for study data
-SOURCE_STUDY = 'Study.xlsx'
-# Read data from the Excel file
-try:
-    df = pd.read_excel(SOURCE_STUDY)
-except Exception as e:
-    print(f'Error reading Excel file {SOURCE_STUDY}: {e}')
-    exit(1)
-# Extract column names
-columns = df.columns[1:].tolist()
-# Define LabKey fields for the DataClass
-labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
-labkey_fields = [{"name": col, "rangeURI": "string"} for col in columns]
-# Define DataClass domain
-study_domain_definition = {
-    "kind": "DataClass",
-    "domainDesign": {
-        "name": "Study",
-        "fields": labkey_fields
-    },
-    "options": {
-        "category": "sources"
-    }
-}
-# Create the DataClass domain in LabKey
-try:
-    created_dataclass_domain = api.domain.create(study_domain_definition)
-    print("Success: Domain created for sample source: Study")
-except Exception as e:
-    print(f'Error creating domain: {e}')
-    exit(1)
-# Insert data into the DataClass 'Study'
-sources_rows = []
-# Add Samples of the Sample Type
-for i, row in df.iterrows():
-    sources_rows.append(row[columns].fillna('').to_dict())
-    sources_rows[-1]['Name'] = row['SourceID']
-# Insert data into the DataClass 'Study'
-try:
-    insert_result = api.query.insert_rows("exp.data", "Study", sources_rows)
-    print("Success: Data inserted into the DataClass: Study")
-except Exception as e:
-    print(f'Error inserting data: {e}')
-    exit(1)
-# %% [markdown]
-# ## Create and Populate Source Type 'Patient' (Linked to Study)
-# %%
-# Define the source Excel file for study data
-SOURCE_PATIENT = 'Patient.xlsx'
-# Read data from the Excel file
-try:
-    df = pd.read_excel(SOURCE_PATIENT)
-except Exception as e:
-    print(f'Error reading Excel file {SOURCE_PATIENT}: {e}')
-    exit(1)
-# Extract column names except for the last column SourceStudy and SourceID
-columns = df.columns.tolist()[1:-1]
-# Define LabKey fields for the DataClass
-labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
-labkey_fields = [{"name": col, "rangeURI": "string"} for col in columns]
-# Define DataClass domain
-patient_domain_definition = {
-    "kind": "DataClass",
-    "domainDesign": {
-        "name": "Patient",
-        "fields": labkey_fields
-    },
-    "options": {
-        "category": "sources",
-        "name": "Patient",
-        "importAliases": {
-            "SourceStudy": {
-                "inputType": "dataInputs/Study"
-            }
-        }
-    }
-}
-# Create the DataClass domain in LabKey
-try:
-    created_dataclass_domain = api.domain.create(patient_domain_definition)
-    print("Success: Domain created for sample source: Patient")
-except Exception as e:
-    print(f'Error creating domain: {e}')
-    exit(1)
-# Insert data into the DataClass 'Patient'
-sources_rows = []
-# Add Source of the Source Type 'Patient'. Also now include 'SourceStudy' column to add lineage 
-for i, row in df.iterrows():
-    sources_rows.append(row[columns + ['SourceStudy']].fillna('').to_dict())
-    sources_rows[-1]['Name'] = row['SourceID']
-# Insert data into the DataClass 'Study'
-try:
-    insert_result = api.query.insert_rows("exp.data", "Patient", sources_rows)
-    print("Success: Data inserted into the DataClass: Patient")
-except Exception as e:
-    print(f'Error inserting data: {e}')
-    exit(1)
-# %% [markdown]
-# ## Process Samples and Create Storage Hierarchy
-### By default: Box sizes of 10x10 are created! Please make sure to check the Box size with the user and correct them in the UI after boxes are created before populating with Samples!!!
-# %%
-SOURCE_SAMPLES = 'Samples.xlsx'
-# Read data from the Excel file
-try:
-    df = pd.read_excel(SOURCE_SAMPLES)
-except Exception as e:
-    print(f'Error reading Excel file {SOURCE_SAMPLES}: {e}')
-    exit(1)
-# Sample ID is a reserved field in LabKey, so we need to rename it to SampleIdentifier. 
-df.rename(columns={'Sample ID': 'SampleIdentifier'}, inplace=True)
-# Get columns for samples table but do not include the 'SourcePatient'. It is only used for mapping to the Patient DataClass
-# Also exclude Sample ID column. It will be renamed to "Name" column. 
-columns = df.columns[1:-1].tolist()
-columns
-# ### Get Building, Floor, Freezer, Shelf, Rack, Box, and Coordinates from Location 
-# Note: Change the heirarchy according to the data in excel file
-df['Building'] = df['Location'].str.split('/').str[0]
-df['Floor'] = df['Location'].str.split('/').str[1]
-df['Freezer'] = df['Location'].str.split('/').str[2]
-df['Freezer full'] = df['Building'] + '/' + df['Floor'] + '/' + df['Freezer']
-df['Shelf'] = df['Location'].str.split('/').str[3]
-df['Rack'] = df['Location'].str.split('/').str[4]
-df['Box'] = df['Location'].str.split('/').str[5] + '/' + df['Location'].str.split('/').str[6]
-df['Coordinates'] = df['Box'].str.split(':').str[-1]
-df['Box'] = df['Box'].str.split(':').str[0]
-# Convert the 'Coordinates' column to numeric, invalid parsing will be set as NaN
-df['StorageCol'] = pd.to_numeric(df['Coordinates'].str.split('/').str[-1])
-df['StorageCol'] = None
-df['StorageRow'] = None
-df.loc[~df['Coordinates'].isna(), 'StorageCol'] = df.loc[~df['Coordinates'].isna(), 'Coordinates'].str.split('/').str[-1].astype(int)
-df.loc[~df['Coordinates'].isna(), 'StorageRow'] = df.loc[~df['Coordinates'].isna(), 'Coordinates'].str.split('/').str[0]
-# ### Create storage heierarchy in LabKey
-# Create Unit Type Box
-try:
-    api.storage.delete_storage_item('Storage Unit Type', {'name': 'Box 10x10'})
-except:
-    pass
-result = api.storage.create_storage_item('Storage Unit Type', {'name': 'Box 10x10', 'description': 'Box 10x10', 'UnitType': 'Box', 'cols': 10, 'rows': 10})
-box_type_id = result['data']['rowId']
-df['box_id'] = ''
-# Physical Locations
-for building in df['Building'].dropna().unique():
-    # 'Physical Location' -> 
-    result_building = api.storage.create_storage_item("Physical Location", {"name": building, "description": "Building"})
-    building_id = result_building['data']['rowId']
-    building_df = df.loc[df['Building'] == building]
-    for floor in building_df['Floor'].dropna().unique():
-        result_floor = api.storage.create_storage_item('Physical Location', {'name': floor, 'description': 'Floor', 'locationId': building_id})
-        floor_id = result_floor['data']['rowId']
-        floor_df = building_df.loc[building_df['Floor'] == floor]
-        # Labkey Terminology = Freezer. Our Dataset = Freezer / Liquid Nitrogen Storage
-        for freezer in floor_df['Freezer'].dropna().unique()[:1]:
-            freezer_full = '{}-{}-{}'.format(building, floor, freezer)
-            freezer_desc = 'Liquid Nitrogen Room'
-            if 'Freezer' in freezer:
-                freezer_desc = 'Freezer'
-            result_freezer = api.storage.create_storage_item('Freezer', {'name': freezer_full, 'description': freezer_desc, 'locationId': floor_id})
-            freezer_id = result_freezer['data']['rowId']
-            freezer_df = floor_df.loc[floor_df['Freezer'] == freezer]
-            # Labkey Terminologoy = Shelf. Our Dataset = Shelf / Tank
-            for shelf in freezer_df['Shelf'].dropna().unique():
-                shelf_desc = 'Shelf'
-                if 'tank' in shelf.lower():
-                    shelf_desc = 'Tank'
-                result_shelf = api.storage.create_storage_item('Shelf', {'name': shelf, 'description': shelf_desc, 'locationId': freezer_id})
-                shelf_id = result_shelf['data']['rowId']
-                shelf_df = freezer_df.loc[freezer_df['Shelf'] == shelf]
-                # Labkey Terminology = Rack. Our Dataset = Rack / Tower
-                for rack in shelf_df['Rack'].dropna().unique():
-                    rack_desc = 'Rack'
-                    if 'tower' in rack.lower():
-                        rack_desc = 'Tower'
-                    result_rack = api.storage.create_storage_item('Rack', {'name': rack, 'description': rack_desc, 'locationId': shelf_id})
-                    rack_id = result_rack['data']['rowId']
-                    rack_df = shelf_df.loc[shelf_df['Rack'] == rack]
-                    for box in rack_df['Box'].dropna().unique():
-                        result_box = api.storage.create_storage_item('Terminal Storage Location', {'name': box, 'typeId': box_type_id, 'locationId': rack_id})
-                        box_id = result_box['data']['rowId']
-                        box_df = rack_df.loc[rack_df['Box'] == box]
-                        df.loc[box_df.index, 'box_id'] = box_id
-                        df.loc[box_df.index, 'StorageLocation'] = '{}/{}/{}/{}'.format(freezer_full, shelf, rack, box)
-                        print('Created box: {}'.format(box_id))
-# %% [markdown]
-# ## Create Sample Types and Insert Samples into LabKey
-# %%
-sample_types = df['SampleType'].unique().tolist()
-print(sample_types)
-# Loop over each sample type and create the domain
-for sample_type in sample_types:
-    labkey_fields = [{'name': 'Name', 'rangeURI': 'string'}]
-    for col in columns:
-        rangeURI = 'string'
-        labkey_fields.append({'name': col, 'rangeURI': rangeURI})
-    sample_params = {
-        'kind': 'SampleSet',
-        'domainDesign': {
-            'name': sample_type,
-            'fields': labkey_fields,
-            "domainKindName": "SampleSet",
-        },
-        "options": {
-        "name": sample_type,
-        "nameExpression": "S-${genId}",
-        "aliquotNameExpression": "${${AliquotedFrom}-:withCounter}",
-        "importAliases": {
-            "SourcePatient": {
-                "inputType": "dataInputs/Patient"
-            }
-        }
-    }
-    }
-    # Create domain using API call
-    sample_domain = api.domain.create(sample_params)
-    print(f"Domain created for sample type: {sample_type}")
-# Step 1: Prepare Sample Rows
-sample_rows = []
-# Add Samples of the Sample Type
-for i, row in df.iterrows():
-    sample_rows.append(row[columns + ['StorageLocation', 'StorageRow', 'StorageCol', 'SourcePatient']].fillna('').to_dict())
-    sample_rows[-1]['Name'] = row['SampleIdentifier']
-    if not row['StorageLocation'] or not row['StorageRow'] or not row['StorageCol']:
-        del sample_rows[-1]['StorageLocation']
-        del sample_rows[-1]['StorageRow']
-        del sample_rows[-1]['StorageCol']
-# Step 2: Insert Rows
-for sample_type in sample_types:
-    # Filter rows that match the current sample type
-    filtered_rows = [row for row in sample_rows if row.get("SampleType") == sample_type]
-    # Remove 'Type' column from each row before inserting
-    rows_to_insert = [{k: v for k, v in row.items() if k != "SampleType"} for row in filtered_rows]
-    # Insert rows using the API
-    if rows_to_insert:
-        api.query.insert_rows("samples", sample_type, rows_to_insert)
-        print(f"Inserted {len(rows_to_insert)} rows into {sample_type} domain.")
-# %% [markdown]
-# ## Optional Cleanup (Commented)
-# %%
-# Delete all sample types and their data
-'''
-for sample_type in sample_types:
-    drop_response = api.domain.drop("samples", sample_type)
-    if "success" in drop_response:
-        print("The dataset domain was deleted.")'
-'''