import numpy as np
from netCDF4 import Dataset
import os

# --- 1. Define the structure of your binary data ---
# Replace these with your actual data characteristics
data_type = np.float32  # Or np.float64, np.int16, etc.
nx = 223               # Number of grid points in x (e.g., longitude)
ny = 174               # Number of grid points in y (e.g., latitude)
nt = 2                # Number of time steps (if applicable)

# Define actual coordinates (these need to match your data)
lons = np.linspace(-94.63206, -5.367915, nx)
lats = np.linspace(-52.48462, 17.25804,  ny)
# If you have time steps, define them
times = [1, 20, 30] # Example: Arbitrary time values, or use datetime objects
# If your data is 3D (e.g., (time, lat, lon)):
# lons, lats = np.meshgrid(np.linspace(-180, 180, nx), np.linspace(-90, 90, ny))

# --- 2. Read the binary file ---
input_bin_file = 'Eta40_C00_2025070400_FF.bin'
try:
    # Read the data. The 'order' (C or Fortran) is crucial if your data
    # was written in a specific order. 'reshape' needs to match your dimensions.
    # If nt > 1, the reshape would be (nt, ny, nx) or (nt, nx, ny) depending on order
    data = np.fromfile(input_bin_file, dtype=data_type).reshape(nt, ny, nx)
except FileNotFoundError:
    print(f"Error: {input_bin_file} not found. Please check the path.")
    exit()
except ValueError as e:
    print(f"Error reading binary file: {e}")
    print("Check data_type, nx, ny, nt, and the actual size of input.bin.")
    exit()

# --- 3. Create a NetCDF file ---
output_nc_file = 'output_from_bin.nc'

with Dataset(output_nc_file, 'w', format='NETCDF4') as nc_file:
    # Create dimensions
    nc_file.createDimension('time', None if nt > 1 else nt) # Unlimited time dimension if multiple time steps
    nc_file.createDimension('lat', ny)
    nc_file.createDimension('lon', nx)

    # Create variables
    time_var = nc_file.createVariable('time', np.float64, ('time',))
    lat_var = nc_file.createVariable('lat', np.float32, ('lat',))
    lon_var = nc_file.createVariable('lon', np.float32, ('lon',))
    
    # Example variable 'my_variable' - adjust name, dtype, dimensions
    # For a 2D variable (lat, lon) without time:
    # data_var = nc_file.createVariable('my_variable', data_type, ('lat', 'lon'))
    # For a 3D variable (time, lat, lon):
    data_var = nc_file.createVariable('my_variable', data_type, ('time', 'lat', 'lon'), fill_value=-999.0)

    # Add attributes (very important for CDO and other tools to understand your data)
    lat_var.units = 'degrees_north'
    lon_var.units = 'degrees_east'
    time_var.units = 'days since 2000-01-01 00:00:00' # Or whatever is appropriate
    time_var.calendar = 'standard' # Or 'gregorian', 'noleap', etc.

    data_var.long_name = 'My Binary Data Variable'
    data_var.units = 'units_of_your_data' # e.g., 'm/s', 'K', 'mm/day'
    data_var.missing_value = -999.0 # If your binary data uses a specific missing value

    # Write data to variables
    lat_var[:] = lats
    lon_var[:] = lons
    # If your `times` are `datetime` objects, you'll need to convert them to numbers
    # e.g., `netCDF4.date2num(times, time_var.units, time_var.calendar)`
    time_var[:] = np.arange(nt) # Placeholder, replace with actual time values

    # Write the main data
    data_var[:] = data

    print(f"Successfully converted {input_bin_file} to {output_nc_file}")

# --- 4. Now use CDO with the new NetCDF file ---
# After running the Python script, you would then use CDO:
# cdo -f nc -setgrid,grid.txt output_from_bin.nc final_output.nc
print(f"You can now use CDO with '{output_nc_file}'. Example:")
print(f"cdo -f nc -setgrid,grid.txt {output_nc_file} final_output.nc")

