Data Visualization

Tracking Stress at University Part 2 - A Visual Summary

During my winter break, I contemplated a lot about my previous two semesters at university. I had been stressed constantly and I frequently overworked myself leading to more stress. Before I started Spring 2019, I devised a project that I could use to track and hopefully mitigate my stress while also flexing my data science muscles a bit. In this blog post I made on the first day of the semester, I detailed the variables that I would be tracking and the analyses I thought might be interesting.

However, I believe I might have inadvertantly become a victim of Goodhart’s law, best phrased by Marilyn Strathern: “When a measure becomes a target, it ceases to be a good measure.” Throughout the semester, my stress levels were significantly lower, my classes required less work, and overall, I felt as though I was in a much better place. Now, make no mistake, this is great news for me personally. However, the visualizations aren’t as cool as had hoped.

Either way, here I present the visualizations that I tracked during my semester in order to keep my mental in check.

import datetime as dt
import pytz

import numpy as np
import pandas as pd
import json
from ics import Calendar
from lib.python_fitbit import fitbit
from lib.python_fitbit import gather_keys_oauth2 as Oauth2

import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from plotly import tools
import cufflinks as cf
from wordcloud import WordCloud

from lib import custom_utils

init_notebook_mode(connected=True)
cf.set_config_file(world_readable=True, offline=True)

Constants

START_DATE = dt.datetime.strptime('2019-01-22', '%Y-%m-%d')
END_DATE = dt.datetime.strptime('2019-05-10', '%Y-%m-%d')

local = pytz.timezone ("America/New_York")

START_DATE_UTC = local.localize(START_DATE, is_dst=None).astimezone(pytz.utc)
END_DATE_UTC = local.localize(END_DATE, is_dst=None).astimezone(pytz.utc)

Load Daily Log Data

daily_log_df = pd.read_csv('data/Daily Log (Responses) - Form Responses 1.csv', parse_dates=[0, 6])

# get relevant dates
daily_log_df = daily_log_df[(daily_log_df['Timestamp'] > START_DATE) & (daily_log_df['Timestamp'] < END_DATE)]

daily_log_df.head()
Timestamp Stress Happiness Energy Motivation Notes Date Food Coma
21 2019-01-22 20:07:08 2 8 8 8 First day of classes. I think it's gonna be go... 2019-01-22 1
22 2019-01-23 21:51:14 3 7 8 8 Had all classes now. I'm going to have to work... 2019-01-23 2
23 2019-01-24 20:13:42 3 7 8 7 Everything is still going well for keeping up ... 2019-01-24 1
24 2019-01-25 20:49:29 2 8 7 8 Chilling at the SASA party right now. It's pre... 2019-01-25 1
25 2019-01-26 20:07:14 2 7 7 6 Chilling all day. Went to the gym to start the... 2019-01-26 1

Simple Stress Timeseries

stress_trace = go.Scatter(
    x=daily_log_df.Date,
    y=daily_log_df.Stress,
    name='Stress Level',
    fill='tozeroy',
)
data = [stress_trace]
layout = go.Layout(
    title='Stress Level',
    yaxis={ 
        'title': 'Stress Level',
        'range': [1, 8] 
        
        
    }
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress')

Work

How much does what I need to complete correlate with my stress?

Load Asana Data

asana_df = pd.read_csv('data/asana-umass-s19.csv', parse_dates=[1, 2, 3, 8, 9])
asana_df.head()
Task ID Created At Completed At Last Modified Name Column Assignee Assignee Email Start Date Due Date Tags Notes Projects Parent Task
0 1120800788569533 2019-05-01 2019-05-04 2019-05-04 final report Spanish 306 NaN NaN NaT 2019-05-04 NaN NaN UMass NaN
1 1120292396473715 2019-04-28 2019-04-30 2019-04-30 final submission CS 326 NaN NaN NaT 2019-04-30 NaN NaN UMass NaN
2 1120002043175716 2019-04-25 2019-04-25 2019-04-25 go to wikipedia page button CS 326 NaN NaN NaT 2019-04-25 NaN NaN UMass NaN
3 884979504873319 2019-04-24 2019-05-02 2019-05-02 Final cheat sheet Stats 516 NaN NaN NaT 2019-05-01 NaN NaN UMass NaN
4 1119773356459372 2019-04-23 2019-04-30 2019-04-30 hw 5 CS 589 NaN NaN NaT 2019-05-02 NaN NaN UMass NaN

Due Dates vs Stress

due_date_val_counts = asana_df['Due Date'].value_counts()

due_date_freqs = []
for i in range((END_DATE - START_DATE).days + 1):
    date_str = (START_DATE + dt.timedelta(days=i)).strftime('%Y-%m-%d')
    date_val = due_date_val_counts.get(date_str, 0)
    num = date_val.values[0] if len(date_val)>0 else 0
    due_date_freqs.append({
        'date': date_str,
        'num': num
    })
due_date_freqs_df = pd.DataFrame(due_date_freqs)
due_date_trace = go.Bar(
    x=due_date_freqs_df.date,
    y=due_date_freqs_df.num,
    name='Due Dates',
    yaxis='y2'
)

data = [stress_trace, due_date_trace]
layout = go.Layout(
    title='Stress Level vs Number of Due Dates',
    yaxis1=dict(
        title='Stress Level',
        overlaying='y2',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Due Dates',
        side='right',
        range=[0, 8]
    ),
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress-vs-due-dates')

Wordcloud of Due Dates on Stressful vs Non-stressful Days

stressful_dates = daily_log_df[daily_log_df.Stress > 3].Date
nonstressful_dates = daily_log_df[daily_log_df.Stress <= 3].Date
stress_tasks = asana_df[asana_df['Due Date'].isin(stressful_dates)]
nonstress_tasks = asana_df[asana_df['Due Date'].isin(nonstressful_dates)]
# concatenate all name fields from tasks separated by duration of 3 days
stress_text = ' '.join(list(stress_tasks['Name'].dropna()))
nonstress_text = ' '.join(list(nonstress_tasks['Name'].dropna()))

# prep text
stress_wordcloud = custom_utils.generate_wordcloud(stress_text)
nonstress_wordcloud = custom_utils.generate_wordcloud(nonstress_text)

# display wordclouds using matplotlib
f, axes = plt.subplots(1, 2, sharex=True)
f.set_size_inches(18, 10)
axes[0].imshow(stress_wordcloud, interpolation="bilinear")
axes[0].set_title('Tasks on Stressful Days', fontsize=36)
axes[0].axis("off")
axes[1].imshow(nonstress_wordcloud, interpolation="bilinear")
axes[1].set_title('Tasks on Non-stressful Days', fontsize=36)
axes[1].axis("off")
(-0.5, 399.5, 199.5, -0.5)

png

Number of Incomplete Tasks vs Stress

incomplete_task_counts = []
for i in range((END_DATE - START_DATE).days + 1):
    date = START_DATE + dt.timedelta(days=i)
    incomplete_task_counts.append({
        'date': date,
        'num': len(asana_df[(asana_df['Created At'] <= date) & ((asana_df['Completed At'] >= date) | (asana_df['Completed At'].isnull()))].index)
    })
incomplete_task_counts = pd.DataFrame(incomplete_task_counts)
incomplete_tasks_trace = go.Bar(
    x=incomplete_task_counts.date,
    y=incomplete_task_counts.num,
    name='Incomplete Tasks',
    yaxis='y2'
)

data = [stress_trace, incomplete_tasks_trace]
layout = go.Layout(
    title='Stress Level vs Number of Incomplete Tasks',
    yaxis1=dict(
        title='Stress Level',
        overlaying='y2',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Incomplete Tasks',
        side='right',
        range=[0, 16]
    ),
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='rhr-vs-due-dates')

Exams vs Stress

with open('data/Exams_2alvmakoou6sa9ks0roaq79nic@group.calendar.google.com.ics', 'r') as f:
    exams_cal = Calendar(f.readlines())
exam_counts = []
for i in range((END_DATE_UTC - START_DATE_UTC).days + 1):
    date = START_DATE_UTC + dt.timedelta(days=i)
    num = 0
    for event in exams_cal.events:
        if (event.begin - date).days == 0:
            num += 1
    
    exam_counts.append({
        'date': date.strftime('%Y-%m-%d'),
        'num': num
    })
    
exam_counts = pd.DataFrame(exam_counts)
exams_trace = go.Bar(
    x=exam_counts.date,
    y=exam_counts.num,
    name='Exams',
    yaxis='y2'
)

data = [stress_trace, exams_trace]
layout = go.Layout(
    title='Stress Level vs Exams',
    yaxis1=dict(
        title='Stress Level',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Exams',
        overlaying='y',
        side='right',
        range=[0, 2]
    ),
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress-vs-exams')

Body

How does my body respond to stress?

Setup Fitbit API Client

with open('keys.json', 'r') as f:
    keys = json.loads(f.read())
    
server = Oauth2.OAuth2Server(keys['fitbit_client_id'], keys['fitbit_client_secret'])
server.browser_authorize()
ACCESS_TOKEN = str(server.fitbit.client.session.token['access_token'])
REFRESH_TOKEN = str(server.fitbit.client.session.token['refresh_token'])
fitbit_client = fitbit.Fitbit(keys['fitbit_client_id'], keys['fitbit_client_secret'], oauth2=True, access_token=ACCESS_TOKEN, refresh_token=REFRESH_TOKEN)
[12/Jun/2019:19:22:39] ENGINE Listening for SIGTERM.
[12/Jun/2019:19:22:39] ENGINE Listening for SIGHUP.
[12/Jun/2019:19:22:39] ENGINE Listening for SIGUSR1.
[12/Jun/2019:19:22:39] ENGINE Bus STARTING
CherryPy Checker:
The Application mounted at '' has an empty config.

[12/Jun/2019:19:22:39] ENGINE Started monitor thread 'Autoreloader'.
[12/Jun/2019:19:22:39] ENGINE Serving on http://127.0.0.1:8080
[12/Jun/2019:19:22:39] ENGINE Bus STARTED


127.0.0.1 - - [12/Jun/2019:19:22:41] "GET /?code=6ca26bec02cd3e194d7a3d292acd43579d406bdf&state=yWAKtBkwQV1kPMpOPi1dePViirgWh3 HTTP/1.1" 200 122 "" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/74.0.3729.169 Chrome/74.0.3729.169 Safari/537.36"


[12/Jun/2019:19:22:42] ENGINE Bus STOPPING
[12/Jun/2019:19:22:47] ENGINE HTTP Server cherrypy._cpwsgi_server.CPWSGIServer(('127.0.0.1', 8080)) shut down
[12/Jun/2019:19:22:47] ENGINE Stopped thread 'Autoreloader'.
[12/Jun/2019:19:22:47] ENGINE Bus STOPPED
[12/Jun/2019:19:22:47] ENGINE Bus EXITING
[12/Jun/2019:19:22:47] ENGINE Bus EXITED
[12/Jun/2019:19:22:47] ENGINE Waiting for child threads to terminate...

Resting Heart Rate vs Stress

heart_ts = fitbit_client.time_series('activities/heart', 
                                    base_date=START_DATE.strftime('%Y-%m-%d'), 
                                    end_date=END_DATE.strftime('%Y-%m-%d'))

rhr_data = []
for row in heart_ts['activities-heart']:
    try:
        restingHeartRate = row['value']['restingHeartRate']
    except:
        restingHeartRate = restingHeartRate
        
    rhr_data.append({
        'date': row['dateTime'],
        'rhr': restingHeartRate
    })
rhr_df = pd.DataFrame(rhr_data)
rhr_trace = go.Scatter(
    x=rhr_df.date,
    y=rhr_df.rhr,
    name='Resting Heart Rate',
    yaxis='y2',
    fill='tozeroy',
)
data = [stress_trace, rhr_trace]
layout = go.Layout(
    title='Stress Level vs Resting Heart Rate',
    yaxis=dict(
        title='Stress Level',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Resting Heart Rate',
        overlaying='y',
        side='right',
        range=[45, 70]
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress-vs-rhr')

Sleep vs Stress

The missing data in mid-May is because my Fitbit ran out of battery over spring break and I forgot to bring my charger.

sleep_logs = []
for i in range((END_DATE - START_DATE).days + 1):
    try:
        date_str = (START_DATE + dt.timedelta(days=i))
        sleep_log = fitbit_client.get_sleep(date=date_str)
        sleep_logs.append({
            'date': date_str,
            'deep': sleep_log['summary']['stages']['deep'] / 60,
            'light': sleep_log['summary']['stages']['light']  / 60,
            'rem': sleep_log['summary']['stages']['rem']  / 60,
            'wake': sleep_log['summary']['stages']['wake']  / 60,
            'total': sleep_log['summary']['totalMinutesAsleep'] / 60
        })
    except Exception as e:
        print(e)
sleep_df = pd.DataFrame(sleep_logs)
'stages'
'stages'
'stages'
'stages'
'stages'
'stages'
'stages'
sleep_trace = go.Bar(
    x=sleep_df.date,
    y=sleep_df.total,
    name='Sleep',
    yaxis='y2'
)

data = [stress_trace, sleep_trace]
layout = go.Layout(
    title='Stress Level vs Sleep',
    barmode='stack',
    yaxis=dict(
        title='Stress Level',
        overlaying='y2',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Sleep (Hrs)',
        side='right',
        range=[0, 10]
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress-vs-sleep')

Sleep Stages before Stressful vs Non-stressful Days

stress_sleep_logs = sleep_df[sleep_df['date'].isin(stressful_dates)]
nonstress_sleep_logs = sleep_df[sleep_df['date'].isin(nonstressful_dates)]

stress_sleep_sums = stress_sleep_logs.sum()
nonstress_sleep_sums = nonstress_sleep_logs.sum()
fig = {
    "data": [
        {
            "labels": stress_sleep_sums.keys(),
            "values": stress_sleep_sums.values,
            "domain": {"x": [0, .48]},
            "name": "Stressful Sleep Stages",
            "hoverinfo":"label+percent+name",
            "hole": .4,
            "type": "pie"
        },
        {
            "labels": nonstress_sleep_sums.keys(),
            "values": nonstress_sleep_sums.values,
            "domain": {"x": [.52, 1]},
            "name": "Non-stressful Sleep Stages",
            "hoverinfo":"label+percent+name",
            "hole": .4,
            "type": "pie"
        }
    ],
    "layout": {
        "title": "Sleep Stages before Stressful vs Non-stressful Days",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Stress",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Non-stress",
                "x": 0.83,
                "y": 0.5
            }
        ]
    }
}
iplot(fig, filename='donut')

Habits

How do my actions change when I’m stressed?

Caloric Intake vs Stress

meal_type = { 1: 'Breakfast', 2: 'Morning Snack', 3: 'Lunch', 4: 'Afternoon Snack', 5: 'Dinner', 7: 'Anytime' }
cals_per_nutrient = { 'carbs': 4, 'fat': 9, 'protein': 4 }
macronutrient_logs = []
for i in range((END_DATE - START_DATE).days + 1):
    date = START_DATE + dt.timedelta(days=i)
    food_log = fitbit_client.foods_log(date=date)
    macronutrient_logs.append({
        'date': date,
        'cals_from_carbs': food_log['summary']['carbs']*cals_per_nutrient['carbs'],
        'cals_from_fat': food_log['summary']['fat']*cals_per_nutrient['fat'],
        'cals_from_protein': food_log['summary']['protein']*cals_per_nutrient['protein'],
        'total_cals': food_log['summary']['calories'],
        'foods_eaten': [item['loggedFood']['name'] for item in food_log['foods']]
    })
macronutrient_df = pd.DataFrame(macronutrient_logs)
cals_trace = go.Bar(
    x=macronutrient_df.date,
    y=macronutrient_df.total_cals,
    name='Calories',
    yaxis='y2'
)

data = [stress_trace, cals_trace]
layout = go.Layout(
    title='Stress Level vs Caloric Intake',
    barmode='stack',
    yaxis=dict(
        title='Stress Level',
        overlaying='y2',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Calories',
        side='right',
        range=[0, 4000]
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stacked-bar')

Carbs, Fat, Protein Avg on Stressful vs Non-stressful Days

stress_food_logs = macronutrient_df[macronutrient_df['date'].isin(stressful_dates)]
nonstress_food_logs = macronutrient_df[macronutrient_df['date'].isin(nonstressful_dates)]

stress_food_sums = stress_food_logs.sum()
nonstress_food_sums = nonstress_food_logs.sum()
fig = {
    "data": [
        {
            "labels": stress_food_sums.keys(),
            "values": stress_food_sums.values,
            "domain": {"x": [0, .48]},
            "name": "Stressful Caloric Intake",
            "hoverinfo":"label+percent",
            "hole": .4,
            "type": "pie"
        },
        {
            "labels": nonstress_food_sums.keys(),
            "values": nonstress_food_sums.values,
            "domain": {"x": [.52, 1]},
            "name": "Non-stressful Caloric Intake",
            "hoverinfo":"label+percent",
            "hole": .4,
            "type": "pie"
        }
    ],
    "layout": {
        "title": "Caloric Intake on Stressful vs Non-stressful Days",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Stress",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Non-stress",
                "x": 0.83,
                "y": 0.5
            }
        ]
    }
}
iplot(fig, filename='donut')

Wordcloud of Foods Eaten on Stressful vs Non-stressful Days

stress_food_str = ' '.join([' '.join(foods_for_day) for foods_for_day in stress_food_logs['foods_eaten'].values])
nonstress_food_str = ' '.join([' '.join(foods_for_day) for foods_for_day in nonstress_food_logs['foods_eaten'].values])
# prep text
stress_wordcloud = custom_utils.generate_wordcloud(stress_food_str)
nonstress_wordcloud = custom_utils.generate_wordcloud(nonstress_food_str)

# display wordclouds using matplotlib
f, axes = plt.subplots(1, 2, sharex=True)
f.set_size_inches(18, 10)
axes[0].imshow(stress_wordcloud, interpolation="bilinear")
axes[0].set_title('Stressed', fontsize=36)
axes[0].axis("off")
axes[1].imshow(nonstress_wordcloud, interpolation="bilinear")
axes[1].set_title('Not Stressed', fontsize=36)
axes[1].axis("off")
(-0.5, 399.5, 199.5, -0.5)

png

Top 10 Website Usage on Stressful vs Non-stressful Days

webtracker_df = pd.read_csv('data/webtime-tracker.csv', index_col=0).transpose()
webtracker_df.head()
Domain 0.0.0.0 1.1.1.1 127.0.0.1 192.168.1.1 192.168.123.1 2.bp.blogspot.com 2ality.com 326webprojectslack.slack.com 4.bp.blogspot.com 66.media.tumblr.com ... www8.garmin.com xarray.pydata.org yalebooks.yale.edu yourbasic.org youthful-sage.glitch.me youtu.be yutsumura.com z-table.com zellwk.com zulko.github.io
2018-11-22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-11-22.1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-11-23 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-11-24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-11-25 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 1183 columns

stressful_date_strs = list(stressful_dates.apply(lambda x: x.strftime('%Y-%m-%d')))
nonstressful_date_strs = list(nonstressful_dates.apply(lambda x: x.strftime('%Y-%m-%d')))

stress_domains = webtracker_df.loc[stressful_date_strs].sum(axis=0).sort_values(ascending=False)
nonstress_domains = webtracker_df.loc[nonstressful_date_strs].sum(axis=0).sort_values(ascending=False)
fig = {
    "data": [
        {
            "labels": list(stress_domains[:10].keys()) + ['Other'],
            "values": list(stress_domains[:10].values) + [stress_domains[10:].sum()],
            "domain": {"x": [0, .48]},
            "name": "Stressful Surfing",
            "hoverinfo":"label+percent",
            "hole": .4,
            "type": "pie"
        },
        {
            "labels": list(nonstress_domains[:10].keys()) + ['Other'],
            "values": list(nonstress_domains[:10].values) + [nonstress_domains[10:].sum()],
            "domain": {"x": [.52, 1]},
            "name": "Non-stressful Surfing",
            "hoverinfo":"label+percent",
            "hole": .4,
            "type": "pie"
        }
    ],
    "layout": {
        "title": "Website Usage on Stressful vs Non-stressful Days",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Stress",
                "x": 0.19,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "Non-stress",
                "x": 0.85,
                "y": 0.5
            }
        ]
    }
}
iplot(fig, filename='donut')

Stress Level vs Minutes Sedentary

sed_ts = fitbit_client.time_series('activities/minutesSedentary', 
                                    base_date=START_DATE.strftime('%Y-%m-%d'), 
                                    end_date=END_DATE.strftime('%Y-%m-%d'))

sed_data = []
for row in sed_ts['activities-minutesSedentary']:
    sed_data.append({
        'date': row['dateTime'],
        'rhr': row['value']
    })
sed_df = pd.DataFrame(sed_data)
sedentary_trace = go.Scatter(
    x=sed_df.date,
    y=sed_df.rhr,
    name='Minutes Sedentary',
    yaxis='y2',
    fill='tozeroy',
)
data = [stress_trace, sedentary_trace]
layout = go.Layout(
    title='Stress Level vs Minutes Sedentary',
    yaxis=dict(
        title='Stress Level',
        range=[0, 8]
    ),
    yaxis2=dict(
        title='Minutes Sedentary',
        overlaying='y',
        side='right',s
        range=[0, 1600]
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stress-vs-min-sedentary')