import pandas as pd
dete_df = pd.read_csv('data/dete_survey.csv')
dete_df.head()


dete_df = pd.read_csv('data/dete_survey.csv', na_values = 'Not Stated')
dete_df.head()


for col in dete_df.columns:
    print(col)

ID
SeparationType
Cease Date
DETE Start Date
Role Start Date
Position
Classification
Region
Business Unit
Employment Status
Career move to public sector
Career move to private sector
Interpersonal conflicts
Job dissatisfaction
Dissatisfaction with the department
Physical work environment
Lack of recognition
Lack of job security
Work location
Employment conditions
Maternity/family
Relocation
Study/Travel
Ill Health
Traumatic incident
Work life balance
Workload
None of the above
Professional Development
Opportunities for promotion
Staff morale
Workplace issue
Physical environment
Worklife balance
Stress and pressure support
Performance of supervisor
Peer support
Initiative
Skills
Coach
Career Aspirations
Feedback
Further PD
Communication
My say
Information
Kept informed
Wellness programs
Health & Safety
Gender
Age
Aboriginal
Torres Strait
South Sea
Disability
NESB


dete_df_updated = dete_df.drop(dete_df.columns[28:49], axis = 1)


dete_df_updated.columns = dete_df_updated.columns.str.lower().str.strip().str.replace(' ','_')
dete_df_updated.columns

Index(['id', 'separationtype', 'cease_date', 'dete_start_date',
       'role_start_date', 'position', 'classification', 'region',
       'business_unit', 'employment_status', 'career_move_to_public_sector',
       'career_move_to_private_sector', 'interpersonal_conflicts',
       'job_dissatisfaction', 'dissatisfaction_with_the_department',
       'physical_work_environment', 'lack_of_recognition',
       'lack_of_job_security', 'work_location', 'employment_conditions',
       'maternity/family', 'relocation', 'study/travel', 'ill_health',
       'traumatic_incident', 'work_life_balance', 'workload',
       'none_of_the_above', 'gender', 'age', 'aboriginal', 'torres_strait',
       'south_sea', 'disability', 'nesb'],
      dtype='object')


dete_df_updated['separationtype'].value_counts()

Age Retirement                          285
Resignation-Other reasons               150
Resignation-Other employer               91
Resignation-Move overseas/interstate     70
Voluntary Early Retirement (VER)         67
Ill Health Retirement                    61
Other                                    49
Contract Expired                         34
Termination                              15
Name: separationtype, dtype: int64


dete_resignations = dete_df_updated.loc[dete_df_updated.loc[:,'separationtype'].str.contains('Resignation'), :].copy()


dete_resignations.loc[:,'cease_date'].value_counts()

2012       126
2013        74
01/2014     22
12/2013     17
06/2013     14
09/2013     11
07/2013      9
11/2013      9
10/2013      6
08/2013      4
05/2012      2
05/2013      2
07/2012      1
2010         1
09/2010      1
07/2006      1
Name: cease_date, dtype: int64


dete_resignations.loc[:,'dete_start_date'].value_counts()

2011.0    24
2008.0    22
2012.0    21
2007.0    21
2010.0    17
2005.0    15
2004.0    14
2006.0    13
2009.0    13
2013.0    10
2000.0     9
1999.0     8
1994.0     6
2003.0     6
1992.0     6
1996.0     6
1998.0     6
2002.0     6
1990.0     5
1997.0     5
1980.0     5
1993.0     5
1989.0     4
1991.0     4
1988.0     4
1995.0     4
1986.0     3
2001.0     3
1985.0     3
1983.0     2
1976.0     2
1974.0     2
1975.0     1
1984.0     1
1971.0     1
1982.0     1
1972.0     1
1963.0     1
1977.0     1
1973.0     1
1987.0     1
Name: dete_start_date, dtype: int64


dete_resignations.loc[:,'cease_date'] = dete_resignations.loc[:,'cease_date'].str[-4:].astype(float)


dete_resignations.loc[:,'institute_service'] = dete_resignations['cease_date'] - dete_resignations['dete_start_date']
dete_resignations.loc[dete_resignations['institute_service'] <= 0.0,'institute_service'].value_counts()

0.0    20
Name: institute_service, dtype: int64


dete_disatisfied_cols = ['job_dissatisfaction', 'dissatisfaction_with_the_department',
                    'physical_work_environment', 'lack_of_recognition', 'lack_of_job_security',
                    'work_location', 'employment_conditions', 'work_life_balance', 'workload']
dete_resignations['dissatisfied'] = dete_resignations.loc[:, dete_disatisfied_cols].any(axis = 1)
dete_resignations['dissatisfied'].value_counts()

False    162
True     149
Name: dissatisfied, dtype: int64


dete_resignations['institute'] = 'DETE'


tafe_df = pd.read_csv('data/tafe_survey.csv')
tafe_df.columns

Index(['Record ID', 'Institute', 'WorkArea', 'CESSATION YEAR',
       'Reason for ceasing employment',
       'Contributing Factors. Career Move - Public Sector ',
       'Contributing Factors. Career Move - Private Sector ',
       'Contributing Factors. Career Move - Self-employment',
       'Contributing Factors. Ill Health',
       'Contributing Factors. Maternity/Family',
       'Contributing Factors. Dissatisfaction',
       'Contributing Factors. Job Dissatisfaction',
       'Contributing Factors. Interpersonal Conflict',
       'Contributing Factors. Study', 'Contributing Factors. Travel',
       'Contributing Factors. Other', 'Contributing Factors. NONE',
       'Main Factor. Which of these was the main factor for leaving?',
       'InstituteViews. Topic:1. I feel the senior leadership had a clear vision and direction',
       'InstituteViews. Topic:2. I was given access to skills training to help me do my job better',
       'InstituteViews. Topic:3. I was given adequate opportunities for personal development',
       'InstituteViews. Topic:4. I was given adequate opportunities for promotion within %Institute]Q25LBL%',
       'InstituteViews. Topic:5. I felt the salary for the job was right for the responsibilities I had',
       'InstituteViews. Topic:6. The organisation recognised when staff did good work',
       'InstituteViews. Topic:7. Management was generally supportive of me',
       'InstituteViews. Topic:8. Management was generally supportive of my team',
       'InstituteViews. Topic:9. I was kept informed of the changes in the organisation which would affect me',
       'InstituteViews. Topic:10. Staff morale was positive within the Institute',
       'InstituteViews. Topic:11. If I had a workplace issue it was dealt with quickly',
       'InstituteViews. Topic:12. If I had a workplace issue it was dealt with efficiently',
       'InstituteViews. Topic:13. If I had a workplace issue it was dealt with discreetly',
       'WorkUnitViews. Topic:14. I was satisfied with the quality of the management and supervision within my work unit',
       'WorkUnitViews. Topic:15. I worked well with my colleagues',
       'WorkUnitViews. Topic:16. My job was challenging and interesting',
       'WorkUnitViews. Topic:17. I was encouraged to use my initiative in the course of my work',
       'WorkUnitViews. Topic:18. I had sufficient contact with other people in my job',
       'WorkUnitViews. Topic:19. I was given adequate support and co-operation by my peers to enable me to do my job',
       'WorkUnitViews. Topic:20. I was able to use the full range of my skills in my job',
       'WorkUnitViews. Topic:21. I was able to use the full range of my abilities in my job. ; Category:Level of Agreement; Question:YOUR VIEWS ABOUT YOUR WORK UNIT]',
       'WorkUnitViews. Topic:22. I was able to use the full range of my knowledge in my job',
       'WorkUnitViews. Topic:23. My job provided sufficient variety',
       'WorkUnitViews. Topic:24. I was able to cope with the level of stress and pressure in my job',
       'WorkUnitViews. Topic:25. My job allowed me to balance the demands of work and family to my satisfaction',
       'WorkUnitViews. Topic:26. My supervisor gave me adequate personal recognition and feedback on my performance',
       'WorkUnitViews. Topic:27. My working environment was satisfactory e.g. sufficient space, good lighting, suitable seating and working area',
       'WorkUnitViews. Topic:28. I was given the opportunity to mentor and coach others in order for me to pass on my skills and knowledge prior to my cessation date',
       'WorkUnitViews. Topic:29. There was adequate communication between staff in my unit',
       'WorkUnitViews. Topic:30. Staff morale was positive within my work unit',
       'Induction. Did you undertake Workplace Induction?',
       'InductionInfo. Topic:Did you undertake a Corporate Induction?',
       'InductionInfo. Topic:Did you undertake a Institute Induction?',
       'InductionInfo. Topic: Did you undertake Team Induction?',
       'InductionInfo. Face to Face Topic:Did you undertake a Corporate Induction; Category:How it was conducted?',
       'InductionInfo. On-line Topic:Did you undertake a Corporate Induction; Category:How it was conducted?',
       'InductionInfo. Induction Manual Topic:Did you undertake a Corporate Induction?',
       'InductionInfo. Face to Face Topic:Did you undertake a Institute Induction?',
       'InductionInfo. On-line Topic:Did you undertake a Institute Induction?',
       'InductionInfo. Induction Manual Topic:Did you undertake a Institute Induction?',
       'InductionInfo. Face to Face Topic: Did you undertake Team Induction; Category?',
       'InductionInfo. On-line Topic: Did you undertake Team Induction?process you undertook and how it was conducted.]',
       'InductionInfo. Induction Manual Topic: Did you undertake Team Induction?',
       'Workplace. Topic:Did you and your Manager develop a Performance and Professional Development Plan (PPDP)?',
       'Workplace. Topic:Does your workplace promote a work culture free from all forms of unlawful discrimination?',
       'Workplace. Topic:Does your workplace promote and practice the principles of employment equity?',
       'Workplace. Topic:Does your workplace value the diversity of its employees?',
       'Workplace. Topic:Would you recommend the Institute as an employer to others?',
       'Gender. What is your Gender?', 'CurrentAge. Current Age',
       'Employment Type. Employment Type', 'Classification. Classification',
       'LengthofServiceOverall. Overall Length of Service at Institute (in years)',
       'LengthofServiceCurrent. Length of Service at current workplace (in years)'],
      dtype='object')


tafe_df_updated = tafe_df.drop(tafe_df.columns[17:66], axis = 1)


tafe_col_name_map = {'Record ID': 'id',
                    'CESSATION YEAR': 'cease_date',
                    'Reason for ceasing employment': 'separationtype',
                    'Gender. What is your Gender?': 'gender',
                    'CurrentAge. Current Age': 'age',
                    'Employment Type. Employment Type': 'employment_status',
                    'Classification. Classification': 'position',
                    'LengthofServiceOverall. Overall Length of Service at Institute (in years)': 'institute_service',
                    'LengthofServiceCurrent. Length of Service at current workplace (in years)': 'role_service'}
tafe_df_updated.rename(mapper=tafe_col_name_map,
                        axis = 1, inplace = True)
tafe_df_updated.head()


tafe_df_updated.loc[:,'separationtype'].value_counts()

Resignation                 340
Contract Expired            127
Retrenchment/ Redundancy    104
Retirement                   82
Transfer                     25
Termination                  23
Name: separationtype, dtype: int64


tafe_resignations = tafe_df_updated.loc[tafe_df_updated.loc[:,'separationtype'] == 'Resignation',:].copy()
tafe_resignations.head()


tafe_resignations.loc[:,'cease_date'].value_counts()

2011.0    116
2012.0     94
2010.0     68
2013.0     55
2009.0      2
Name: cease_date, dtype: int64


print(tafe_resignations['Contributing Factors. Dissatisfaction'].value_counts())
print(tafe_resignations['Contributing Factors. Job Dissatisfaction'].value_counts())

-                                         277
Contributing Factors. Dissatisfaction      55
Name: Contributing Factors. Dissatisfaction, dtype: int64
-                      270
Job Dissatisfaction     62
Name: Contributing Factors. Job Dissatisfaction, dtype: int64


import numpy as np
def updated_vals(val):
    if val == '-':
        return False
    elif pd.isnull(val) == True:
        return np.nan
    else:
        return True
tafe_disat_cols = ['Contributing Factors. Dissatisfaction','Contributing Factors. Job Dissatisfaction']

tafe_resignations.loc[:, 'dissatisfied'] = tafe_resignations.loc[
                                            :,  tafe_disat_cols].applymap(
                                                updated_vals).any(
                                                    axis = 1)
tafe_resignations.head()


tafe_resignations['institute'] = 'TAFE'


combined = pd.concat([dete_resignations, tafe_resignations])
combined.dropna(axis = 1, thresh = 500, inplace = True)
combined.head()


combined = combined.drop('id', axis = 1)


combined.loc[:,'separationtype'].value_counts(dropna = False)

Resignation                             340
Resignation-Other reasons               150
Resignation-Other employer               91
Resignation-Move overseas/interstate     70
Name: separationtype, dtype: int64


combined.loc[:,'separationtype'] = combined.loc[:,'separationtype'].str.replace(
    'Resignation-','').str.replace(
        'Resignation','Unknown').str.replace(
            'Other reasons','Unknown').copy()


combined.loc[:,'cease_date'].value_counts(dropna = False)

2012.0    223
2013.0    201
2011.0    116
2010.0     70
2014.0     22
NaN        16
2009.0      2
2006.0      1
Name: cease_date, dtype: int64


combined[combined.loc[:,'cease_date'].isna()]


combined.drop([113,135,513], axis = 0, inplace = True)


combined.loc[:,'position'].value_counts(dropna = False)

Administration (AO)                                        148
Teacher                                                    129
Teacher (including LVT)                                     95
Teacher Aide                                                62
NaN                                                         50
Cleaner                                                     39
Public Servant                                              30
Professional Officer (PO)                                   16
Operational (OO)                                            13
Head of Curriculum/Head of Special Education                10
School Administrative Staff                                  8
Technical Officer                                            8
Schools Officer                                              7
Workplace Training Officer                                   6
Technical Officer (TO)                                       5
School Based Professional Staff (Therapist, nurse, etc)      5
Executive (SES/SO)                                           4
Other                                                        3
Tutor                                                        3
Guidance Officer                                             3
Professional Officer                                         2
Business Service Manager                                     1
Name: position, dtype: int64


combined.loc[:,'position'] = combined.loc[:,'position'].str.replace('\([A-Z][A-Z]\)','').str.strip().str.lower()

/tmp/ipykernel_801/1694396809.py:1: FutureWarning: The default value of regex will change from True to False in a future version.
  combined.loc[:,'position'] = combined.loc[:,'position'].str.replace('\([A-Z][A-Z]\)','').str.strip().str.lower()


teaching = ['teacher','teacher (including lvt)','teacher aide','tutor']
                                                                                       
officers = ['professional officer','guidance officer','technical officer',
            'workplace training officer','schools officer','technical officer', 
            'professional officer']

administration = ['administration','school administrative staff', 'head of curriculum/head of special education']

combined.loc[:,'position'] = combined.loc[:,'position'].apply(
    lambda x: 'teaching staff' if x in teaching else (
        'administration' if x in administration else (
            'officer' if x in officers else 'other')))


combined.loc[:,'employment_status'].value_counts(dropna = False)

Permanent Full-time    256
Permanent Part-time    149
Temporary Full-time    120
NaN                     51
Temporary Part-time     37
Contract/casual         29
Casual                   5
Name: employment_status, dtype: int64


combined.loc[combined.loc[:,'employment_status'].isna(),:]


combined = combined.loc[
    (combined.isna().sum( axis = 1)) < 5,
         :].copy()
combined.loc[:,'employment_status'].value_counts(dropna = False)

Permanent Full-time    256
Permanent Part-time    149
Temporary Full-time    120
NaN                     51
Temporary Part-time     37
Contract/casual         29
Casual                   5
Name: employment_status, dtype: int64


combined.loc[:,'employment_status'] = combined.loc[:,'employment_status'].apply(lambda x: 'Contract/casual' if x == 'Casual' else x)
combined.loc[:,'employment_status'].value_counts(dropna = False)

Permanent Full-time    256
Permanent Part-time    149
Temporary Full-time    120
NaN                     51
Temporary Part-time     37
Contract/casual         34
Name: employment_status, dtype: int64


combined.loc[:,'gender'].value_counts(dropna = False)

Female    423
Male      168
NaN        56
Name: gender, dtype: int64


combined.loc[:,'age'].value_counts(dropna = False)

51-55            71
NaN              52
41-45            48
41  45           45
46-50            42
36-40            41
46  50           39
26-30            35
21  25           33
36  40           32
31  35           32
26  30           32
21-25            29
31-35            29
56 or older      29
56-60            25
61 or older      23
20 or younger    10
Name: age, dtype: int64


combined.loc[:,'age'] = combined.loc[:,'age'].str.replace(
                            '\s+','-').str.replace(
                                '[1-9][1-9]-or-older', '56+').str.replace(
                                    '56-60','56+').str.replace(
                                        '20-or-younger','<=20')

/tmp/ipykernel_801/1250315822.py:1: FutureWarning: The default value of regex will change from True to False in a future version.
  combined.loc[:,'age'] = combined.loc[:,'age'].str.replace(


exploration = ['<=20','21-25']
establishment = ['26-30','31-35']
mid_career = ['36-40','41-45']
late_career = ['46-50','51-55']

combined.loc[:,'age'] = combined.loc[:,'age'].apply(
    lambda x: 'less than 26' if x in exploration else(
        '26-35' if x in establishment else(
            '36-45' if x in mid_career else (
                '46-55' if x in late_career else '56+'))))


combined.loc[:,'institute_service'].value_counts()

Less than 1 year      73
1-2                   64
3-4                   63
5-6                   33
11-20                 26
5.0                   23
1.0                   22
7-10                  21
3.0                   20
0.0                   20
6.0                   17
4.0                   16
9.0                   14
2.0                   14
7.0                   13
More than 20 years    10
13.0                   8
8.0                    8
15.0                   7
20.0                   7
10.0                   6
14.0                   6
12.0                   6
17.0                   6
22.0                   6
16.0                   5
11.0                   4
23.0                   4
18.0                   4
24.0                   4
32.0                   3
39.0                   3
19.0                   3
21.0                   3
36.0                   2
25.0                   2
30.0                   2
26.0                   2
28.0                   2
49.0                   1
41.0                   1
27.0                   1
42.0                   1
29.0                   1
34.0                   1
31.0                   1
33.0                   1
35.0                   1
38.0                   1
Name: institute_service, dtype: int64


combined.loc[:,'institute_service'] = combined.loc[:,'institute_service'].astype(str).replace(
                                            'Less than 1 year','1').replace(
                                            'More than 20 years','20').replace(
                                            '11-20','11').replace(
                                            '7-10','8').replace(
                                            '5-6','5.5').replace(
                                            '3-4','3.5').replace(
                                            '1-2','1.5').astype(float)


def year_bin(year):
    if pd.isnull(year):
        return np.nan
    elif year < 3:
        return 'New'
    elif  year < 7:
        return 'Experienced'
    elif year < 11:
        return 'Established'
    else:
        return 'Veteran'

combined.loc[:,'service_cat'] = combined.loc[:,'institute_service'].apply(year_bin)
combined.loc[:,'service_cat'].value_counts(dropna = False)

New            193
Experienced    172
Veteran        135
NaN             85
Established     62
Name: service_cat, dtype: int64


combined.drop('institute_service', axis = 1, inplace = True)


combined.isnull().sum(axis = 0)

separationtype        0
cease_date           13
position              0
employment_status    51
gender               56
age                   0
dissatisfied          0
institute             0
service_cat          85
dtype: int64


fill_vals = combined.loc[:,'service_cat'].value_counts(normalize = True).keys().to_list()
probs = combined.loc[:,'service_cat'].value_counts(normalize = True).to_list()

combined.loc[:,'service_cat'] = combined.loc[:,'service_cat'].fillna(pd.Series(np.random.choice(fill_vals, 
                                                      p=probs, size=len(combined))))

#fill any remaining values to be filled with 'New'
combined.loc[:,'service_cat'] = combined.loc[:,'service_cat'].fillna('New')


combined.isnull().sum(axis = 0)

separationtype        0
cease_date           13
position              0
employment_status    51
gender               56
age                   0
dissatisfied          0
institute             0
service_cat           0
dtype: int64


combined = combined.apply(lambda x: x.fillna(x.value_counts().index[0]))
combined.isnull().sum(axis = 0)

separationtype       0
cease_date           0
position             0
employment_status    0
gender               0
age                  0
dissatisfied         0
institute            0
service_cat          0
dtype: int64


import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
dissatisfied = {}
for age in combined['age'].value_counts().index.to_list():
    dissatisfied[age] = combined.loc[combined.loc[:,'age'] == age,'dissatisfied'].mean()
ages = ['less than 26', '26-35','36-45','46-55','56+']
dissat_ordered = [dissatisfied[age] for age in ages]

fig, ax = plt.subplots(figsize = (12,8))
ax.barh(ages, dissat_ordered, alpha = .85)
#__________________________________________________________________________

ax.grid(visible = False)  
ax.tick_params(colors = 'grey', which = 'both')                  #remove grid and yticklabels
ax.set_xticklabels([])            #create legend
#ax.yaxis.set_tick_params(rotation = 30)
#__________________________________________________________________________
#Generate title and subtext
ax.text(0,5, 'Dissatisfaction Similar Among All Ages', weight = 'bold', size = 26, alpha = .75)
ax.text(0,4.7, 'Percentage (by age) of employees who are dissatisfied', size = 19, alpha = .85)
ax.axhline(4.6, color='grey', linewidth=1, alpha=0.9,
          xmin=-.5, xmax=1)
#___________________________________________________________________________
#Place proportion values above bars
ax.text(x = -.05, y = -1,
    s = '   J. Wilson Peoples                                                                          Source: https://data.gov.au',
    fontsize = 19, color = '#f0f0f0', backgroundcolor = 'grey')
for i, v in enumerate(dissat_ordered):
    ax.text(v+.01,i, str(round(100*v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)


combined_pv = combined.groupby(['separationtype', 'service_cat'])['dissatisfied'].mean()
service_order = ['New', 'Experienced', 'Established', 'Veteran']
sep_types = combined.loc[:,'separationtype'].value_counts().keys().to_list()
plot = {}
for septype in sep_types:
    plot[septype] = [combined_pv[septype][service] for service in service_order]

fig, ax = plt.subplots(figsize = (12,8))


for septype in sep_types:
    ax.plot(service_order,plot[septype], 'o-', linewidth = 2, label = septype)

#__________________________________________________________________________
ax.tick_params(colors = 'grey', which = 'both') 
                   #remove grid and yticklabels
ax.set_yticks([0,.35,.5])
ax.set_yticklabels([])          #create legend
#ax.set_yticks([0,.25,.5,.75])
#ax.legend(fontsize = 10, loc = 'upper left')
#ax.legend(fontsize = 8, bbox_to_anchor=(0, .15), loc='upper left', borderaxespad=0)
#ax.yaxis.set_tick_params(rotation = 30)
#__________________________________________________________________________
#Generate title and subtext
ax.text(-0.1,.715, 'Other Employers: Hiring Experienced', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.1,.685, 'Dissatisfied Workers', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.1,.66, 'Percentage (by years of service) of employees who are', size = 19, alpha = .85)
ax.text(-0.1,.64, 'dissatisfied: grouped by separation type', size = 19, alpha = .85)
ax.text(.31,.30, 'Unknown', size = 19, rotation = 17, c ='#30a2da', alpha = .85)
ax.text(.1,.41, 'Other Employer', size = 19, rotation = 41, c ='#fc4f30', alpha = .85)
ax.text(.40,.24, 'Moving', size = 19, rotation = -10, c ='#e5ae38', alpha = .85)
ax.axhline(.63, color='grey', linewidth=1, alpha=0.9,
          xmin=-.7, xmax=2)
#___________________________________________________________________________
#Place proportion values above bars
ax.text(x = -.15, y = .12,
    s = '   J. Wilson Peoples                                                                                            Source: https://data.gov.au',
    fontsize = 15, color = '#f0f0f0', backgroundcolor = 'grey')

for i, v in enumerate(plot['Unknown']):
    if i == 0:
        pass
    else:
        ax.text(i,v+.015, str(round(100*v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)
for i, v in enumerate(plot['Other employer']):
    if i == 0:
        pass
    else:
        ax.text(i,v+.015, str(round(100*v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)
for i, v in enumerate(plot['Move overseas/interstate']):
    if i == 0:
        pass
    else:
        ax.text(i,v+.015, str(round(100*v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)


combined_pv = combined.groupby(['service_cat'])['dissatisfied'].mean()
service_order = ['New','Experienced', 'Established', 'Veteran']
sep_types = combined.loc[:,'separationtype'].value_counts().keys().to_list()
plot = [combined_pv[service] for service in service_order]
fig, ax = plt.subplots(figsize = (12,8))
ax.plot(service_order,plot);

#__________________________________________________________________________
ax.tick_params(color = 'grey', which = 'both');
ax.grid(visible = True);   
ax.set_yticklabels(['','30%','35%','40%','45%','50%','55%']);              #remove grid and yticklabels           #create legend
ax.axhline(.60, color='grey', linewidth=1, alpha=0.9,
          xmin=-.7, xmax=2);
#__________________________________________________________________________
#Generate title and subtext
ax.text(0,.67, 'Dissatisfaction Grows with Time of Service', weight = 'bold', size = 26, alpha = .75)
ax.text(0,.635, 'Percentage (by years of service) of employees who are dissatisfied', size = 19, alpha = .85)
#___________________________________________________________________________
ax.fill_between(service_order,plot, alpha = .6)


ax.text(x = -.29, y = -.13,
    s = '   J. Wilson Peoples                                                                                            Source: https://data.gov.au',
    fontsize = 15, color = '#f0f0f0', backgroundcolor = 'grey')

/tmp/ipykernel_801/2742531742.py:11: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(['','30%','35%','40%','45%','50%','55%']);              #remove grid and yticklabels           #create legend

Text(-0.29, -0.13, '   J. Wilson Peoples                                                                                            Source: https://data.gov.au')


combined_pv = combined.groupby(['cease_date'])['dissatisfied'].sum()
combined_pv.index.to_list()
combined_pv = combined.groupby(['cease_date'])['dissatisfied'].sum()
cease_years = combined_pv.index.to_list()
plot = [combined_pv[year] for year in cease_years]

combined_count = combined.groupby(['cease_date'])['dissatisfied'].count()
plot2 = [combined_count[year] for year in cease_years]
fig, ax = plt.subplots(figsize = (12,8))
x_axis = cease_years    #create numeric axis for purpose of shifting

#create bar plots
ax.tick_params(color = 'grey', which = 'both')
ax.plot([year for year in cease_years], plot2, .4, label = 'Total worker resignations')
ax.plot([year for year in cease_years], plot, .2, label = 'Dissatisfied worker resignations')
ax.set_xlim([2008.9,2014.05])
ax.set_ylim([-10,256])

#__________________________________________________________________________

ax.grid(visible = False)                #remove grid and yticklabels           #create legend
ax.axhline(.60, color='grey', linewidth=1, alpha=0.9,
          xmin=-.7, xmax=2)
#__________________________________________________________________________
#Generate title and subtext
ax.fill_between(cease_years,plot,plot2, alpha = .6)
ax.fill_between(cease_years,plot, alpha = .6)
ax.text(2008.4,307, 'Proportional Dissatisfaction: Steady Over Time', weight = 'bold', size = 26, alpha = .75)
ax.text(2008.4,292, 'number of resignations (blue) and number of dissatisfied', size = 19, alpha = .85)
ax.text(2008.4,280, 'resignations (red) vs. year of separation', size = 19, alpha = .85)
ax.text(2008.4, 270, "_______________________________________________________________________________________", size = 19, color =  'grey')

#___________________________________________________________________________
#generate text on graph
ax.text(2010.5,100, 'Total', size = 19, fontweight = 'bold', rotation = 30, c ='#30a2da')
ax.text(2011,135, 'Resignations', size = 19, fontweight = 'bold', rotation = 55.5, c ='#30a2da')
ax.text(2010,26, 'Disatisfied', size = 19, fontweight = 'bold', rotation = 6, c ='#fc4f30') 
ax.text(2011,41, 'Resignations', size = 19, fontweight = 'bold', rotation = 39, c ='#fc4f30') 

ax.tick_params(color = 'grey', which = 'both')

ax.text(x = 2008.4, y = -53,
    s = '   J. Wilson Peoples                                                                                                                                  Source: https://data.gov.au',
    fontsize = 13, color = '#f0f0f0', backgroundcolor = 'grey')

Text(2008.4, -53, '   J. Wilson Peoples                                                                                                                                  Source: https://data.gov.au')


dissat_props = combined.groupby(['institute'])['dissatisfied'].sum()/combined.groupby(['institute'])['dissatisfied'].count()
fig, ax = plt.subplots(figsize = (12,8))


x_axis = dissat_props.index.to_list()
y_axis = [100*dissat_props[institute] for institute in x_axis]
ax.bar(x_axis, y_axis)
ax.set_yticklabels([])
ax.grid(visible = False)
ax.text(-.5, .5, '_____________________________________________________________________________________________________________', color = 'grey')
ax.set_ylim([0,56])
ax.text(-0.5,65, 'DETE Employees: Twice as Dissatisfied', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.5,61.5, 'as their TAFE Counterparts', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.5,58, 'percentage of dissatisfied employees vs. institution', size = 19, alpha = .85)
ax.text(-.5, 55, '_____________________________________________________________________________________________________________', color = 'grey')
ax.tick_params(color = 'grey', which = 'both')

for i, v in enumerate(y_axis):
    ax.text(i-.1,v+1.5, str(round(v,2)) + '%', color='black', fontweight='bold', fontsize=19, ha='left', va='center', alpha = .75)


ax.text(x = -.5, y = -7,
    s = '   J. Wilson Peoples                                                                                                                  Source: https://data.gov.au',
    fontsize = 13, color = '#f0f0f0', backgroundcolor = 'grey')

Text(-0.5, -7, '   J. Wilson Peoples                                                                                                                  Source: https://data.gov.au')


data = combined.groupby(['service_cat','institute'])['dissatisfied'].sum()/combined.groupby(['service_cat','institute'])['dissatisfied'].count()
Dete_data = [100*data[service, 'DETE'] for service in service_order]
Tafe_data = [100*data[service,'TAFE'] for service in service_order]
fig,ax = plt.subplots(figsize = (12,8))
x_axis = np.arange(len(service_order))
ax.bar(x_axis + .2, Dete_data,.4, label = 'DETE', alpha = .85)
ax.bar(x_axis - .2,Tafe_data,.4,label = 'TAFE', alpha = .85)
ax.set_xticks(x_axis,service_order)
ax.set_yticklabels([])
ax.grid(visible=False)
ax.text(-0.5,82, 'TAFE Employees Remain Satisfied', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.5,77, 'Throughout Career', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.5,72, 'percentage of dissatisfied          and          employees vs. years of service', size = 19, alpha = .85)
ax.axhline(70, color='grey', linewidth=1, alpha=1,
          xmin=0, xmax=1)
for i, v in enumerate(Dete_data):
    ax.text(i+.04,v+1, str(round(v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)
for i, v in enumerate(Tafe_data):
    ax.text(i-.38,v+1, str(round(v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)

ax.text(x= .89, y = 72, s = "DETE", color = 'dodgerblue', size = 19)

ax.text(x= 1.44, y = 72, s = "TAFE", color = 'orangered', size = 19)

ax.text(-.6, .5, '_____________________________________________________________________________________________________________', color = 'grey')


ax.text(x = -.6, y = -7,
    s = '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au',
    fontsize = 13, color = '#f0f0f0', backgroundcolor = 'grey')

Text(-0.6, -7, '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au')


data = combined.groupby(['position','institute'])['dissatisfied'].sum()/combined.groupby(['position','institute'])['dissatisfied'].count()
position_order = combined['position'].value_counts().index.to_list()
Dete_data = [100*data[position, 'DETE'] for position in position_order]
Tafe_data = [100*data[position, 'TAFE'] for position in position_order]
fig,ax = plt.subplots(figsize = (12,8))
x_axis = np.arange(len(position_order))
ax.bar(x_axis + .2, Dete_data,.4, label = 'DETE')
ax.bar(x_axis - .2,Tafe_data,.4,label = 'TAFE')
ax.set_xticks(x_axis,position_order)
ax.set_yticklabels([])
ax.legend(bbox_to_anchor=(.5, .5, 0.5, 0.5))
ax.grid(visible=False)
ax.text(-0.5,63.5, 'Teaching Staff: Dissatisfied Across Institutions', weight = 'bold', size = 26, alpha = .75)
ax.text(-0.5,61, 'percentage of dissatisfied employees vs. position', size = 19, alpha = .85)
ax.text(-.6, 60, '_____________________________________________________________________________________________________________', color = 'grey')
for i, v in enumerate(Dete_data):
    ax.text(i+.04,v+1, str(round(v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)
for i, v in enumerate(Tafe_data):
    ax.text(i-.38,v+1, str(round(v,2)) + '%', color='black', fontweight='bold', fontsize=14, ha='left', va='center', alpha = .75)


ax.text(-.6, .5, '_____________________________________________________________________________________________________________', color = 'grey')


ax.text(x = -.6, y = -7,
    s = '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au',
    fontsize = 13, color = '#f0f0f0', backgroundcolor = 'grey')

Text(-0.6, -7, '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au')


DETE_data = combined[(combined['dissatisfied'] == True) &(combined['institute'] == 'DETE')]
DETE_data = 100*DETE_data['gender'].value_counts(normalize = True)
gender_list = combined['gender'].value_counts().index.to_list()
TAFE_data = combined[(combined['dissatisfied'] == True) &(combined['institute'] == 'TAFE')]
TAFE_data = 100*TAFE_data['gender'].value_counts(normalize = True)
fig,ax = plt.subplots(nrows = 1, ncols = 2, figsize = (12,8))
fig.set_facecolor('white')
explode = (0, 0.1)
ax[0].pie(DETE_data, explode = explode, labels = gender_list, autopct='%1.1f%%')
ax[1].pie(TAFE_data, explode = explode, labels = gender_list, autopct='%1.1f%%')
ax[0].text(-1.25,2.3, 'Dissatisfaction Among Genders:', weight = 'bold', size = 26, alpha = .75)
ax[0].text(-1.25,2.10, 'Consistent Across Institutions', weight = 'bold', size = 26, alpha = .75)
ax[0].set_ylim([0,2])
ax[1].set_ylim([0,2])
ax[0].axhline(1.6, color='grey', linewidth=1, alpha=1,
          xmin=0, xmax=20)
ax[1].axhline(1.6, color='grey', linewidth=1, alpha=1,
          xmin=-10, xmax=10)
ax[0].text(-1.25,1.85, 'Proportion of dissatisfaction by', size = 19, alpha = .85)
ax[0].text(-1.25,1.7, 'gender (DETE employees)', size = 19, alpha = .85)
ax[1].text(-1.25,1.85, 'Proportion of dissatisfaction by', size = 19, alpha = .85)
ax[1].text(-1.25,1.7, 'gender (TAFE employees)', size = 19, alpha = .85)

ax[0].text(x = -1.3, y = -1.5,
    s = '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au',
    fontsize = 13, color = '#f0f0f0', backgroundcolor = 'grey')

Text(-1.3, -1.5, '   J. Wilson Peoples                                                                                                                     Source: https://data.gov.au')

Introduction¶

Initial Data Exploration¶

Exploring and Cleaning the Dete Survey¶

Exploring the TAFE Survey¶

Combining the Data¶

Final Data Cleansing¶

id, separationtype, and cease_date¶

position, employement status, and gender¶

age and insitute_serivice columns¶

Filling remaining missing values¶

Data Analysis¶

Time and Satisfaction¶

Insitutional Analysis¶

Conclusion¶

	ID	SeparationType	Cease Date	DETE Start Date	Role Start Date	Position	Classification	Region	Business Unit	Employment Status	...	Kept informed	Wellness programs	Health & Safety	Gender	Age	Aboriginal	Torres Strait	South Sea	Disability	NESB
0	1	Ill Health Retirement	08/2012	1984	2004	Public Servant	A01-A04	Central Office	Corporate Strategy and Peformance	Permanent Full-time	...	N	N	N	Male	56-60	NaN	NaN	NaN	NaN	Yes
1	2	Voluntary Early Retirement (VER)	08/2012	Not Stated	Not Stated	Public Servant	AO5-AO7	Central Office	Corporate Strategy and Peformance	Permanent Full-time	...	N	N	N	Male	56-60	NaN	NaN	NaN	NaN	NaN
2	3	Voluntary Early Retirement (VER)	05/2012	2011	2011	Schools Officer	NaN	Central Office	Education Queensland	Permanent Full-time	...	N	N	N	Male	61 or older	NaN	NaN	NaN	NaN	NaN
3	4	Resignation-Other reasons	05/2012	2005	2006	Teacher	Primary	Central Queensland	NaN	Permanent Full-time	...	A	N	A	Female	36-40	NaN	NaN	NaN	NaN	NaN
4	5	Age Retirement	05/2012	1970	1989	Head of Curriculum/Head of Special Education	NaN	South East	NaN	Permanent Full-time	...	N	A	M	Female	61 or older	NaN	NaN	NaN	NaN	NaN

	ID	SeparationType	Cease Date	DETE Start Date	Role Start Date	Position	Classification	Region	Business Unit	Employment Status	...	Kept informed	Wellness programs	Health & Safety	Gender	Age	Aboriginal	Torres Strait	South Sea	Disability	NESB
0	1	Ill Health Retirement	08/2012	1984.0	2004.0	Public Servant	A01-A04	Central Office	Corporate Strategy and Peformance	Permanent Full-time	...	N	N	N	Male	56-60	NaN	NaN	NaN	NaN	Yes
1	2	Voluntary Early Retirement (VER)	08/2012	NaN	NaN	Public Servant	AO5-AO7	Central Office	Corporate Strategy and Peformance	Permanent Full-time	...	N	N	N	Male	56-60	NaN	NaN	NaN	NaN	NaN
2	3	Voluntary Early Retirement (VER)	05/2012	2011.0	2011.0	Schools Officer	NaN	Central Office	Education Queensland	Permanent Full-time	...	N	N	N	Male	61 or older	NaN	NaN	NaN	NaN	NaN
3	4	Resignation-Other reasons	05/2012	2005.0	2006.0	Teacher	Primary	Central Queensland	NaN	Permanent Full-time	...	A	N	A	Female	36-40	NaN	NaN	NaN	NaN	NaN
4	5	Age Retirement	05/2012	1970.0	1989.0	Head of Curriculum/Head of Special Education	NaN	South East	NaN	Permanent Full-time	...	N	A	M	Female	61 or older	NaN	NaN	NaN	NaN	NaN

	id	Institute	WorkArea	cease_date	separationtype	Contributing Factors. Career Move - Public Sector	Contributing Factors. Career Move - Private Sector	Contributing Factors. Career Move - Self-employment	Contributing Factors. Ill Health	Contributing Factors. Maternity/Family	...	Contributing Factors. Study	Contributing Factors. Travel	Contributing Factors. Other	Contributing Factors. NONE	gender	age	employment_status	position	institute_service	role_service
0	6.341330e+17	Southern Queensland Institute of TAFE	Non-Delivery (corporate)	2010.0	Contract Expired	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	Female	26 30	Temporary Full-time	Administration (AO)	1-2	1-2
1	6.341337e+17	Mount Isa Institute of TAFE	Non-Delivery (corporate)	2010.0	Retirement	-	-	-	-	-	...	-	Travel	-	-	NaN	NaN	NaN	NaN	NaN	NaN
2	6.341388e+17	Mount Isa Institute of TAFE	Delivery (teaching)	2010.0	Retirement	-	-	-	-	-	...	-	-	-	NONE	NaN	NaN	NaN	NaN	NaN	NaN
3	6.341399e+17	Mount Isa Institute of TAFE	Non-Delivery (corporate)	2010.0	Resignation	-	-	-	-	-	...	-	Travel	-	-	NaN	NaN	NaN	NaN	NaN	NaN
4	6.341466e+17	Southern Queensland Institute of TAFE	Delivery (teaching)	2010.0	Resignation	-	Career Move - Private Sector	-	-	-	...	-	-	-	-	Male	41 45	Permanent Full-time	Teacher (including LVT)	3-4	3-4

	id	separationtype	cease_date	position	employment_status	gender	age	institute_service	dissatisfied	institute
3	4.0	Resignation-Other reasons	2012.0	Teacher	Permanent Full-time	Female	36-40	7.0	False	DETE
5	6.0	Resignation-Other reasons	2012.0	Guidance Officer	Permanent Full-time	Female	41-45	18.0	True	DETE
8	9.0	Resignation-Other reasons	2012.0	Teacher	Permanent Full-time	Female	31-35	3.0	False	DETE
9	10.0	Resignation-Other employer	2012.0	Teacher Aide	Permanent Part-time	Female	46-50	15.0	True	DETE
11	12.0	Resignation-Move overseas/interstate	2012.0	Teacher	Permanent Full-time	Male	31-35	3.0	False	DETE

	separationtype	cease_date	position	employment_status	gender	age	institute_service	dissatisfied	institute
683	Other employer	NaN	Teacher	Permanent Full-time	Male	21-25	NaN	True	DETE
694	Unknown	NaN	Teacher Aide	Casual	Female	46-50	NaN	False	DETE
704	Unknown	NaN	Teacher Aide	Permanent Full-time	Female	41-45	NaN	False	DETE
709	Other employer	NaN	Teacher	Permanent Full-time	Female	51-55	NaN	True	DETE
724	Unknown	NaN	Teacher	Permanent Full-time	Female	46-50	NaN	False	DETE
770	Unknown	NaN	Cleaner	Permanent Part-time	Female	61 or older	NaN	False	DETE
774	Other employer	NaN	Teacher Aide	Permanent Part-time	Female	41-45	NaN	False	DETE
788	Other employer	NaN	Teacher	Permanent Full-time	Female	41-45	NaN	False	DETE
791	Unknown	NaN	Public Servant	Permanent Part-time	Female	46-50	NaN	True	DETE
797	Move overseas/interstate	NaN	Public Servant	Permanent Part-time	Female	36-40	NaN	False	DETE
798	Move overseas/interstate	NaN	Teacher Aide	Permanent Part-time	Female	36-40	NaN	False	DETE
93	Unknown	NaN	Teacher (including LVT)	Permanent Full-time	Male	56 or older	11-20	False	TAFE
104	Unknown	NaN	Administration (AO)	Permanent Full-time	Female	36 40	11-20	False	TAFE
113	Unknown	NaN	NaN	NaN	NaN	NaN	NaN	True	TAFE
135	Unknown	NaN	NaN	NaN	NaN	NaN	NaN	True	TAFE
513	Unknown	NaN	NaN	NaN	NaN	NaN	NaN	False	TAFE

	separationtype	cease_date	position	employment_status	gender	age	institute_service	dissatisfied	institute
399	Other employer	2013.0	other	NaN	Male	56-60	NaN	True	DETE
405	Unknown	2012.0	teaching staff	NaN	NaN	NaN	NaN	False	DETE
489	Unknown	2012.0	other	NaN	Female	46-50	NaN	False	DETE
821	Move overseas/interstate	2013.0	teaching staff	NaN	NaN	NaN	NaN	False	DETE
3	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
16	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
18	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
19	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
20	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
21	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
26	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
36	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
37	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
39	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
51	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
53	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
54	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
87	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
91	Unknown	2010.0	other	NaN	NaN	NaN	NaN	False	TAFE
94	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
97	Unknown	2011.0	other	NaN	NaN	NaN	NaN	True	TAFE
101	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
102	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
130	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
138	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
169	Unknown	2011.0	other	NaN	NaN	NaN	NaN	True	TAFE
204	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
234	Unknown	2010.0	other	NaN	NaN	NaN	NaN	True	TAFE
243	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
258	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
276	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
279	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
287	Unknown	2011.0	other	NaN	NaN	NaN	NaN	False	TAFE
373	Unknown	2011.0	other	NaN	NaN	NaN	NaN	True	TAFE
412	Unknown	2012.0	other	NaN	NaN	NaN	NaN	True	TAFE
423	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
437	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
530	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
533	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
535	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
539	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
579	Unknown	2012.0	other	NaN	NaN	NaN	NaN	False	TAFE
621	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
625	Unknown	2013.0	other	NaN	NaN	NaN	NaN	True	TAFE
628	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
665	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
666	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
670	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
690	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
694	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE
698	Unknown	2013.0	other	NaN	NaN	NaN	NaN	False	TAFE