Lab 4-3: Snowpack temperature and density profiles - snowpit data#

Written by Daniel Hogan - April, 2023.

Modified by Jessica Lundquist - April, 2023.

Modified by Eli Schwat - January 2024.

import xarray as xr
import numpy as np
import os 
import urllib
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt

SOS Data#

sos_file = "../data/sos_full_dataset_30min.nc"
sos_dataset = xr.open_dataset(sos_file)

Replicate the steps from Lab 4-1 to create a dataset of in-snow temperature measurements (including snow surface temperatures)#

tsnow_vars = [v for v in sos_dataset if 'Tsnow_' in v and v.endswith('_d')]
snow_depth_vars = ['SnowDepth_d']
print(snow_depth_vars, tsnow_vars)
['SnowDepth_d'] ['Tsnow_0_4m_d', 'Tsnow_0_5m_d', 'Tsnow_0_6m_d', 'Tsnow_0_7m_d', 'Tsnow_0_8m_d', 'Tsnow_0_9m_d', 'Tsnow_1_0m_d', 'Tsnow_1_1m_d', 'Tsnow_1_2m_d', 'Tsnow_1_3m_d', 'Tsnow_1_4m_d', 'Tsnow_1_5m_d']
# Transform the NetCDF dataset to be a "tidy" dataset of snow depths
snow_temp_dataset = sos_dataset[
    tsnow_vars + snow_depth_vars
].to_dataframe().reset_index().set_index(['time', 'SnowDepth_d']).melt(ignore_index=False)

# Calculate the depth of the snow sensor (relative to the snow surface)
# using the snow depth measurements and the known above-ground height
# of the snow sensors
snow_temp_dataset['height_agl'] = snow_temp_dataset['variable'].str[6:9].str.replace('_', '.').astype(float)
snow_temp_dataset = snow_temp_dataset.reset_index().set_index('time')
snow_temp_dataset['depth'] = snow_temp_dataset['height_agl'] - snow_temp_dataset['SnowDepth_d']
# Add surface temperature data (depth=0)
surface_temps_dataset = sos_dataset['Tsurf_d'].to_dataframe()
surface_temps_dataset = surface_temps_dataset.rename(columns={'Tsurf_d': 'value'})
surface_temps_dataset['depth'] = 0
surface_temps_dataset
value depth
time
2022-11-01 00:00:00 NaN 0
2022-11-01 00:30:00 NaN 0
2022-11-01 01:00:00 NaN 0
2022-11-01 01:30:00 NaN 0
2022-11-01 02:00:00 NaN 0
... ... ...
2023-06-19 15:30:00 23.838074 0
2023-06-19 16:00:00 22.595795 0
2023-06-19 16:30:00 21.681244 0
2023-06-19 17:00:00 20.645264 0
2023-06-19 17:30:00 NaN 0

11074 rows × 2 columns

snow_temp_dataset = pd.concat([snow_temp_dataset, surface_temps_dataset])

Make sure we don’t have any measurements from above the snow surface

snow_temp_dataset = snow_temp_dataset.query("depth <= 0")

Snowpit Measurements - a first look#

Open up the snowpit data and look at data from just two snowpits - on February 6 and 7, 2023

snowpit_ds = xr.open_dataset("../data/KettlePondsSnowPits.nc")

snowpit_ds = snowpit_ds.sel(time = slice('20230206', '20230207'))
snowpit_df = snowpit_ds.to_dataframe().reset_index()

# The timestamps in this dataset are in UTC. Let's convert to US/Mountain time, which our other dataset is conveniently already in.
# After we convert, we remove the timezone info by calling `tz_localize(None)`. 
snowpit_df.time = snowpit_df.time.dt.tz_localize('UTC').dt.tz_convert('US/Mountain').dt.tz_localize(None)

# convert depth variables from cm to m
snowpit_df['depth'] = snowpit_df['depth']/100
snowpit_df['pit_total_snow_depth'] = snowpit_df['pit_total_snow_depth']/100
snowpit_df
time depth temperature density ir_surface_temp thermometer_surface_temp pit_total_snow_depth id
0 2023-02-06 09:12:00 0.00 -0.6 NaN -8.1 -9.5 1.04 KP21
1 2023-02-06 09:12:00 0.02 NaN NaN -8.1 -9.5 1.04 KP21
2 2023-02-06 09:12:00 0.03 NaN NaN -8.1 -9.5 1.04 KP21
3 2023-02-06 09:12:00 0.04 NaN NaN -8.1 -9.5 1.04 KP21
4 2023-02-06 09:12:00 0.05 NaN NaN -8.1 -9.5 1.04 KP21
... ... ... ... ... ... ... ... ...
287 2023-02-07 10:01:00 1.53 NaN NaN -7.3 -9.3 1.03 KP22
288 2023-02-07 10:01:00 1.55 NaN NaN -7.3 -9.3 1.03 KP22
289 2023-02-07 10:01:00 1.59 NaN NaN -7.3 -9.3 1.03 KP22
290 2023-02-07 10:01:00 1.61 NaN NaN -7.3 -9.3 1.03 KP22
291 2023-02-07 10:01:00 1.65 NaN NaN -7.3 -9.3 1.03 KP22

292 rows × 8 columns

Look at the times the snowpit data was collected

snowpit_df.time.unique()
<DatetimeArray>
['2023-02-06 09:12:00', '2023-02-07 10:01:00']
Length: 2, dtype: datetime64[ns]
import altair as alt

Plot density and temperature profiles from the two days#

density_charts = (
    alt.Chart(snowpit_df).mark_circle().encode(
        alt.X('density:Q').title('Density (kg/m^3)'),
        alt.Y('depth:Q').title('Depth (m)'),
        
    ) + alt.Chart(snowpit_df).mark_rule().encode(
        alt.Y('pit_total_snow_depth:Q')
    ).properties(width=150, height=150)
).facet(
    alt.Facet('time:T')
)

temp_charts = (
    alt.Chart(snowpit_df).mark_circle().encode(
        alt.X('temperature:Q').title('Temperature (˚C)'),
        alt.Y('depth:Q').title('Depth (m)'),
        
    ) + alt.Chart(snowpit_df).mark_rule().encode(
        alt.Y('pit_total_snow_depth:Q')
    ).properties(width=150, height=150)
).facet(
    alt.Facet('time:T')
)

density_charts & temp_charts

Combining temperature measurements from snowpit and in-situ thermistor measurements#

Let’s compare the manual snowpit temperature data to the in-situ in-snow temperature data we have analyzed previously.

This will involve some data wrangling, because notice that the manual snowpit data measures Depth relative to the soil-snow interface. With the in-snow temperature data used previously, we calculated depth relative to the snow-air interface.

First, isolate the in-situ snow temperature measurements to the 30-minute period overlapping when the snowpits were taken.

We saw above that the snowpits were dug during:

  • 2023-02-06 0900 - 0930

  • 2023-02-07 1000 - 1030

Let’s grab those two 30minute averages from the snow temp dataset and combine them into one small dataset.

snow_temp_dataset_4_comparison = pd.concat([
    snow_temp_dataset.loc['20230206 0900'], # this represents the average from 0900-0930
    snow_temp_dataset.loc['20230207 1000']  # this represents the average from 1000-1030
]).reset_index()
snow_temp_dataset_4_comparison
time SnowDepth_d variable value height_agl depth
0 2023-02-06 09:00:00 0.912619 Tsnow_0_4m_d -3.447297 0.4 -0.512619
1 2023-02-06 09:00:00 0.912619 Tsnow_0_5m_d -4.041552 0.5 -0.412619
2 2023-02-06 09:00:00 0.912619 Tsnow_0_6m_d -4.684970 0.6 -0.312619
3 2023-02-06 09:00:00 0.912619 Tsnow_0_7m_d -4.825782 0.7 -0.212619
4 2023-02-06 09:00:00 0.912619 Tsnow_0_8m_d -4.882523 0.8 -0.112619
5 2023-02-06 09:00:00 0.912619 Tsnow_0_9m_d -5.438139 0.9 -0.012619
6 2023-02-06 09:00:00 NaN NaN -11.606720 NaN 0.000000
7 2023-02-07 10:00:00 0.890080 Tsnow_0_4m_d -3.188019 0.4 -0.490080
8 2023-02-07 10:00:00 0.890080 Tsnow_0_5m_d -3.727944 0.5 -0.390080
9 2023-02-07 10:00:00 0.890080 Tsnow_0_6m_d -4.572083 0.6 -0.290080
10 2023-02-07 10:00:00 0.890080 Tsnow_0_7m_d -5.429698 0.7 -0.190080
11 2023-02-07 10:00:00 0.890080 Tsnow_0_8m_d -7.319589 0.8 -0.090080
12 2023-02-07 10:00:00 NaN NaN -15.121002 NaN 0.000000

Check those measurements out quickly to make sure we grabbed the right stuff

(
    alt.Chart(snow_temp_dataset_4_comparison).mark_line().encode(
        alt.X('value:Q').title('Temperature (˚C)'),
        alt.Y('depth:Q').title('Depth (m)'),
        
    ).properties(width=150, height=150)
).facet(
    alt.Facet('time:T')
)

OK looks good.

The depth of the snow on the two days, according to the snowpits measurements, was

snowpit_df[['time', 'pit_total_snow_depth']].drop_duplicates()
time pit_total_snow_depth
0 2023-02-06 09:12:00 1.04
146 2023-02-07 10:01:00 1.03

We can adjust the in-site snow temperature measurements so that depth means the same thing across the datasets. We do this by adding the depth from the dataset snow_temp_dataset_4_comparison to the snow depth values according to the snow pit measurements (in the snowpit_df dataset).

snow_temp_dataset_4_comparison.loc[
    snow_temp_dataset_4_comparison['time'] == '2023-02-06 09:00:00',
    'depth'
] = snow_temp_dataset_4_comparison.loc[
    snow_temp_dataset_4_comparison['time'] == '2023-02-06 09:00:00',
    'depth'
] + 1.04 # in meters, the dataset had 104 (centimeters)

snow_temp_dataset_4_comparison.loc[
    snow_temp_dataset_4_comparison['time'] == '2023-02-07 10:00:00',
    'depth'
] = snow_temp_dataset_4_comparison.loc[
    snow_temp_dataset_4_comparison['time'] == '2023-02-07 10:00:00',
    'depth'
] + 1.03 # in meters, the dataset had 104 (centimeters)

Now we can plot the datasets together and see if they line up.

To plot the datasets together, we combine the datasets into one dataset. We follow the following steps to do this:

  1. Make sure the variable names across the two datasets are the same

  2. Add a variable to each of the separate datasets indicating their measurement type (i.e. snow pit or in-situ)

  3. Combine

# Modifying the in situ measurements
## Isolate the variables we want (timestemps, depth, and temperature)
in_situ_data = snow_temp_dataset_4_comparison[['time', 'depth', 'value']]
## Rename the "value" column, which has temperatures, to "temperature"
in_situ_data = in_situ_data.rename(columns={'value': 'temperature'})
## Create a new column to clarify which data this is
in_situ_data['data_type'] = 'thermistor array'
in_situ_data
time depth temperature data_type
0 2023-02-06 09:00:00 0.527381 -3.447297 thermistor array
1 2023-02-06 09:00:00 0.627381 -4.041552 thermistor array
2 2023-02-06 09:00:00 0.727381 -4.684970 thermistor array
3 2023-02-06 09:00:00 0.827381 -4.825782 thermistor array
4 2023-02-06 09:00:00 0.927381 -4.882523 thermistor array
5 2023-02-06 09:00:00 1.027381 -5.438139 thermistor array
6 2023-02-06 09:00:00 1.040000 -11.606720 thermistor array
7 2023-02-07 10:00:00 0.539920 -3.188019 thermistor array
8 2023-02-07 10:00:00 0.639920 -3.727944 thermistor array
9 2023-02-07 10:00:00 0.739920 -4.572083 thermistor array
10 2023-02-07 10:00:00 0.839920 -5.429698 thermistor array
11 2023-02-07 10:00:00 0.939920 -7.319589 thermistor array
12 2023-02-07 10:00:00 1.030000 -15.121002 thermistor array
# Modifying the snowpit measurements
## Isolate the variables we want - this is the only step we need to do for this one
snowpit_data = snowpit_df[['time', 'depth', 'temperature']].dropna()
## Create a new column to clarify which data this is
snowpit_data['data_type'] = 'snow pit'
snowpit_data
time depth temperature data_type
0 2023-02-06 09:12:00 0.00 -0.6 snow pit
13 2023-02-06 09:12:00 0.14 -1.4 snow pit
23 2023-02-06 09:12:00 0.24 -2.9 snow pit
33 2023-02-06 09:12:00 0.34 -3.2 snow pit
43 2023-02-06 09:12:00 0.44 -3.6 snow pit
53 2023-02-06 09:12:00 0.54 -4.2 snow pit
63 2023-02-06 09:12:00 0.64 -4.9 snow pit
73 2023-02-06 09:12:00 0.74 -5.4 snow pit
83 2023-02-06 09:12:00 0.84 -6.0 snow pit
93 2023-02-06 09:12:00 0.94 -6.4 snow pit
104 2023-02-06 09:12:00 1.04 -8.1 snow pit
146 2023-02-07 10:01:00 0.00 -0.6 snow pit
158 2023-02-07 10:01:00 0.13 -1.5 snow pit
168 2023-02-07 10:01:00 0.23 -2.1 snow pit
178 2023-02-07 10:01:00 0.33 -2.6 snow pit
188 2023-02-07 10:01:00 0.43 -3.1 snow pit
198 2023-02-07 10:01:00 0.53 -3.6 snow pit
208 2023-02-07 10:01:00 0.63 -4.4 snow pit
218 2023-02-07 10:01:00 0.73 -5.7 snow pit
228 2023-02-07 10:01:00 0.83 -7.6 snow pit
238 2023-02-07 10:01:00 0.93 -11.7 snow pit
249 2023-02-07 10:01:00 1.03 -9.3 snow pit
# Combine the datasets!
temp_profile_comparison_dataset = pd.concat([snowpit_data, in_situ_data])
temp_profile_comparison_dataset
time depth temperature data_type
0 2023-02-06 09:12:00 0.000000 -0.600000 snow pit
13 2023-02-06 09:12:00 0.140000 -1.400000 snow pit
23 2023-02-06 09:12:00 0.240000 -2.900000 snow pit
33 2023-02-06 09:12:00 0.340000 -3.200000 snow pit
43 2023-02-06 09:12:00 0.440000 -3.600000 snow pit
53 2023-02-06 09:12:00 0.540000 -4.200000 snow pit
63 2023-02-06 09:12:00 0.640000 -4.900000 snow pit
73 2023-02-06 09:12:00 0.740000 -5.400000 snow pit
83 2023-02-06 09:12:00 0.840000 -6.000000 snow pit
93 2023-02-06 09:12:00 0.940000 -6.400000 snow pit
104 2023-02-06 09:12:00 1.040000 -8.100000 snow pit
146 2023-02-07 10:01:00 0.000000 -0.600000 snow pit
158 2023-02-07 10:01:00 0.130000 -1.500000 snow pit
168 2023-02-07 10:01:00 0.230000 -2.100000 snow pit
178 2023-02-07 10:01:00 0.330000 -2.600000 snow pit
188 2023-02-07 10:01:00 0.430000 -3.100000 snow pit
198 2023-02-07 10:01:00 0.530000 -3.600000 snow pit
208 2023-02-07 10:01:00 0.630000 -4.400000 snow pit
218 2023-02-07 10:01:00 0.730000 -5.700000 snow pit
228 2023-02-07 10:01:00 0.830000 -7.600000 snow pit
238 2023-02-07 10:01:00 0.930000 -11.700000 snow pit
249 2023-02-07 10:01:00 1.030000 -9.300000 snow pit
0 2023-02-06 09:00:00 0.527381 -3.447297 thermistor array
1 2023-02-06 09:00:00 0.627381 -4.041552 thermistor array
2 2023-02-06 09:00:00 0.727381 -4.684970 thermistor array
3 2023-02-06 09:00:00 0.827381 -4.825782 thermistor array
4 2023-02-06 09:00:00 0.927381 -4.882523 thermistor array
5 2023-02-06 09:00:00 1.027381 -5.438139 thermistor array
6 2023-02-06 09:00:00 1.040000 -11.606720 thermistor array
7 2023-02-07 10:00:00 0.539920 -3.188019 thermistor array
8 2023-02-07 10:00:00 0.639920 -3.727944 thermistor array
9 2023-02-07 10:00:00 0.739920 -4.572083 thermistor array
10 2023-02-07 10:00:00 0.839920 -5.429698 thermistor array
11 2023-02-07 10:00:00 0.939920 -7.319589 thermistor array
12 2023-02-07 10:00:00 1.030000 -15.121002 thermistor array
(
    alt.Chart(temp_profile_comparison_dataset.dropna()).mark_line(
        point=True
    ).encode(
        alt.X('temperature:Q').title('Temperature (˚C)'),
        alt.Y('depth:Q').title('Depth (m)'),
        alt.Color('data_type:N'),
        alt.Order('depth:Q') # this makes sure the lines are drawn, connecting dots vertically, in order of depth
    ).properties(width=150, height=150)
).facet(
    alt.Facet('day(time):T')
)

OK so the temperature profiles have some differences! The surface temperatures disagree pretty strongly. Their basic behavior underneath the snow surface is pretty similar though. Why do you think the surface temperature measurements might differ? Why might the temperature measurements within the snowpack (beneath the surface) differ?

snowpit_ds = xr.open_dataset("../data/KettlePondsSnowPits.nc")
snowpit_df = snowpit_ds.to_dataframe().reset_index()

# The timestamps in this dataset are in UTC. Let's convert to US/Mountain time, which our other dataset is conveniently already in.
# After we convert, we remove the timezone info by calling `tz_localize(None)`. 
snowpit_df.time = snowpit_df.time.dt.tz_localize('UTC').dt.tz_convert('US/Mountain').dt.tz_localize(None)

# convert depth variables from cm to m
snowpit_df['pit_total_snow_depth'] = snowpit_df['pit_total_snow_depth']/100
snowpit_df
time depth temperature density ir_surface_temp thermometer_surface_temp pit_total_snow_depth id
0 2023-01-06 10:48:00 0.0 -0.5 NaN -4.3 -4.3 1.02 KP01
1 2023-01-06 10:48:00 2.0 NaN NaN -4.3 -4.3 1.02 KP01
2 2023-01-06 10:48:00 3.0 NaN NaN -4.3 -4.3 1.02 KP01
3 2023-01-06 10:48:00 4.0 NaN NaN -4.3 -4.3 1.02 KP01
4 2023-01-06 10:48:00 5.0 NaN NaN -4.3 -4.3 1.02 KP01
... ... ... ... ... ... ... ... ...
7003 2023-03-16 08:12:00 153.0 NaN NaN -8.9 -6.6 1.65 KP48
7004 2023-03-16 08:12:00 155.0 -2.6 152.0 -8.9 -6.6 1.65 KP48
7005 2023-03-16 08:12:00 159.0 NaN NaN -8.9 -6.6 1.65 KP48
7006 2023-03-16 08:12:00 161.0 NaN NaN -8.9 -6.6 1.65 KP48
7007 2023-03-16 08:12:00 165.0 -6.6 91.0 -8.9 -6.6 1.65 KP48

7008 rows × 8 columns

snowpit_df = snowpit_df.query("depth <= 30")
snowpit_df = snowpit_df.sort_values('time')[['time', 'depth', 'temperature']].dropna()
snowpit_df.groupby('time').apply(
    lambda df: 
    (df.sort_values('depth').temperature.values[-1] - df.sort_values('depth').temperature.values[0])
    /
    (df.sort_values('depth').depth.values[-1] - df.sort_values('depth').depth.values[0])
).min()
/tmp/ipykernel_2783/3371135051.py:1: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  snowpit_df.groupby('time').apply(
np.float64(-0.1)