#!/usr/bin/python3 # Released in public domain under CC0 license: # '''Analysis of GUADEC accommodation survey results.''' import numpy import pandas IN_FILE = 'GUADEC 2017 accommodation survey.csv' ACCOMMODATION_OPTION_LABELS = [ 'hostel', 'uni', 'hotel_sharing', 'hotel_single', 'own' ] NIGHT_OPTION_LABELS = [ 'thu27', 'fri28', 'sat29', 'sun30', 'mon31', 'tue1', 'wed2', ] COLUMN_NAMES = [ 'TIMESTAMP', # Timestamp 'ACCOMMODATION', # Which accommodation option(s) would you choose for GUADEC 2017? 'REGISTRATION', # How early could you register for the conference & pay for accommodation? 'NIGHTS', # Which nights would you expect to stay at GUADEC 2017? 'BREAKFAST' # Would you pay £8.50 extra per day for a buffet breakfast? ] def expand_multiple_choice_column(data, source_column, option_strings, option_labels): '''Parse string of option names, turn into additional true/false columns.''' to_append = [] for option_number, full_option_string in enumerate(option_strings): new_column = [] for column_number, response in enumerate(source_column): chosen = response.split(';') new_column.append(full_option_string in chosen) new_column_series = pandas.Series(new_column, name=option_labels[option_number]) to_append.append(new_column_series) new_data = pandas.concat([data] + to_append, axis=1) return new_data def expand_accommodation_column(data): accommodation_option_strings = [ 'Youth hostel (4-bed dorm, £33 per night)', 'University Townhouse (single room, £43 or more per night)', 'Jury\'s Inn Hotel (sharing a twin room, ~£51 per night)', 'Jury\'s Inn Hotel (single room, ~£94 per night)', 'I would organise my own accommodation' ] return expand_multiple_choice_column(data, source_column=data.ACCOMMODATION, option_strings=accommodation_option_strings, option_labels=ACCOMMODATION_OPTION_LABELS) def expand_nights_column(data): nights_option_strings = [ 'Thursday 27th July 2017', 'Friday 28th July 2017 (core day #1)', 'Saturday 29th July 2017 (core day #2)', 'Sunday 30th July 2017 (core day #3)', 'Monday 31st July 2017 (BOF/workshop day #1)', 'Tuesday 1st August 2017 (BOF/workshop day #2)', 'Wednesday 2nd August 2017 (BOF/workshop day #3)' ] return expand_multiple_choice_column(data, source_column=data.NIGHTS, option_strings=nights_option_strings, option_labels=NIGHT_OPTION_LABELS) def number_and_percentage(number, total): return '%i of %i (%i%%)' % (number, total, (number / total) * 100.0) def main(): everyone = pandas.read_csv(IN_FILE, names=COLUMN_NAMES, header=0) print("Total responses: %i" % len(everyone)) everyone = expand_accommodation_column(everyone) everyone = expand_nights_column(everyone) print("How many people would consider staying at the MMU Townhouses?") townhouse_guests = everyone.query('uni == True') print(number_and_percentage(len(townhouse_guests), len(everyone))) print("How many people would only consider staying at the MMU Townhouses?") answer = len(townhouse_guests.query('hostel == False and hotel_single == False and hotel_sharing == False and own == False')) print(number_and_percentage(answer, len(everyone))) print("How many people who would stay at the Townhouses could register " "before March 2017?") answer = len(townhouse_guests.query('REGISTRATION == "By 1st March 2017"')) print(number_and_percentage(answer, len(townhouse_guests))) print("How many people who would stay at the Townhouses would pay " "£8.50 extra for breakfast") answer = len(townhouse_guests.query('BREAKFAST == "Yes"')) print(number_and_percentage(answer, len(townhouse_guests))) print("What nights would Townhouses guests stay?") for date in NIGHT_OPTION_LABELS: answer = len(townhouse_guests.query('%s == True' % date)) print(" %s: %s, or %i%% of total attendees" % ( date, number_and_percentage(answer, len(townhouse_guests)), (answer / len(everyone) * 100.0))) print("How many people would only stay in the youth hostel?") answer = len(everyone.query('hostel == True and uni == False and hotel_single == False and hotel_sharing == False')) print(number_and_percentage(answer, len(everyone))) print("How many people would only stay the hotel?") answer = len(everyone.query('hostel == False and uni == False')) print(number_and_percentage(answer, len(everyone))) main()