import netCDF4 import pandas as pd import numpy as np import requests from os import path, remove from sqlalchemy import create_engine import pymysql def mysql_netcdf(database, start, end, Level0, Level1A): db_connection_str = 'mysql+pymysql://'+database["user"]+':'+database["password"]+'@'+database["host"]+'/'+database["db"] db_connection = create_engine(db_connection_str) df = pd.read_sql("SELECT * FROM ctd WHERE timestamp BETWEEN '"+start+"' AND '"+end+"';", con=db_connection) df.to_csv(Level0, index=False) timestamps = df.groupby('profile_id').first()["timestamp"] df = df.drop(['timestamp'], axis=1) df = pd.merge(df, timestamps, how='left', on='profile_id') profile_ids = list(df["profile_id"].sort_values(ascending=True).unique()) press = df.press.sort_values(ascending=True).unique() press_temp = pd.DataFrame(press, columns=["press"]) profiles = [] if len(profile_ids) > 0: for index, value in enumerate(profile_ids): profile = df.loc[df['profile_id'] == value] profiles.append(pd.merge(press_temp, profile.drop_duplicates(subset=['press']), how='left', on='press')) data = {} data["pressure"] = list(press) data["time"] = list(df.timestamp.sort_values(ascending=True).unique().astype(int) / 10 ** 9) att = { "institution": "EAWAG", "source": "Lake Greifen CTD Data", "history": "See history on Renku", "conventions": "CF 1.7", "comment": "Data from CTD profiler on platform in Lake Greifen", "title": "Lake Greifen CTD Data" } dim_dict = { 'time': {'dim_name': 'time', 'dim_size': None}, 'pressure': {'dim_name': 'pressure', 'dim_size': None} } dim_var_dict = { 'time': {'var_name': 'time', 'dim': ('time',), 'unit': 'seconds since 1970-01-01 00:00:00', 'longname': 'time'}, 'pressure': {'var_name': 'pressure', 'dim': ('pressure',), 'unit': 'm', 'longname': 'pressure'} } var_dict = { 'temp': {'var_name': 'temperature', 'dim': ('pressure', 'time',), 'unit': 'degC', 'longname': 'Water Temperature'}, 'cond': {'var_name': 'conductivity', 'dim': ('pressure', 'time',), 'unit': 'mS/cm', 'longname': 'Conductivity'}, 'sal': {'var_name': 'conductivity_20', 'dim': ('pressure', 'time',), 'unit': 'mS/cm', 'longname': 'Conductivity 20deg'}, 'o2_percent': {'var_name': 'o2_percent', 'dim': ('pressure', 'time',), 'unit': '%', 'longname': 'Percentage Oxygen Concentration'}, 'o2_ppm': {'var_name': 'o2_ppm', 'dim': ('pressure', 'time',), 'unit': 'mg/L', 'longname': 'Oxygen Concentration'}, 'ph': {'var_name': 'ph', 'dim': ('pressure', 'time',), 'unit': '', 'longname': 'pH'}, 'chl': {'var_name': 'chla', 'dim': ('pressure', 'time',), 'unit': 'μg/L', 'longname': 'Chlorophyll A'}, 'turb': {'var_name': 'turb', 'dim': ('pressure', 'time',), 'unit': 'NTU', 'longname': 'Turbidity'}, 'phy': {'var_name': 'phycocyanin', 'dim': ('pressure', 'time',), 'unit': 'μg/L', 'longname': 'Phycocyanin'}, 'par': {'var_name': 'par', 'dim': ('pressure', 'time',), 'unit': 'μE', 'longname': 'Photosynthetically Active Radiation'}, } # Create new NetCDF file ncfile = netCDF4.Dataset(Level1A, mode='w', format='NETCDF4') # Add global attributes for key in att: setattr(ncfile, key, att[key]) # Create Dimensions for key, values in dim_dict.items(): ncfile.createDimension(values['dim_name'], values['dim_size']) # Create Dimensions Variables for key, values in dim_var_dict.items(): var = ncfile.createVariable(values["var_name"], np.float64, values["dim"]) var.units = values["unit"] var.long_name = values["longname"] var[:] = data[key] # Create Dimensions Variables for key, values in var_dict.items(): var = ncfile.createVariable(values["var_name"], np.float64, values["dim"]) var.units = values["unit"] var.long_name = values["longname"] for index, value in enumerate(data["time"]): var[:, index] = list(profiles[index][key]) # Close NetCDF file ncfile.close()