Enhance spreadsheet data handling

- Added a function to convert mixed column types.
- Updated the main reading function to use this new conversion.
- Improved error handling for reading TSV files.
This commit is contained in:
Jonathon Broughton
2025-02-17 23:41:55 +00:00
parent c92a751516
commit 897850197f
+20 -1
View File
@@ -13,7 +13,26 @@ def read_rules_from_spreadsheet(url: str) -> DataFrame | None:
"""
try:
# Since the output is a TSV, we use `pd.read_csv` with `sep='\t'` to specify tab-separated values.
return pd.read_csv(url, sep="\t")
df = pd.read_csv(url, sep="\t")
df = convert_mixed_columns(df)
# Convert columns to appropriate types based on their content.
return df
except Exception as e:
print(f"Failed to read the TSV from the URL: {e}")
return None
def convert_mixed_columns(df):
"""Converts columns in a DataFrame to appropriate types based on their content.
Args:
df (DataFrame): The DataFrame whose columns are to be converted.
Returns:
DataFrame: The DataFrame with columns converted to appropriate types.
"""
df = df.apply(lambda c: c.astype(object) if any(str(x).replace(".", "", 1).isdigit() for x in c) else c.astype(str))
return df