This code reads in the responses from the Google Forms Survey below. The survey responses are exported in CSV format.

The numeric_survey function below takes the responses and converts them to numeric values. This allows us to perform calculations on the responses to find correlations and create a GLM.

Google Forms Survey¶

title

# dependencies
import pandas as pd
import numpy as np

# import googlespreadsheet from survey

googlecsv = pd.read_csv('../data/survey041718.csv')

# numeric scale legends

# gender numeric legend
gender_scale = pd.DataFrame({
    'gender':['Male','Female','Other',''],
    'gender_values':['1','0','2','3']
})
gender_scale
#gender_scale.to_csv('gender_scale.csv',index=False)

# race numeric legend
race_scale = pd.DataFrame({
    'race':['Asian / Pacific Islander',
    'Black or African American',
    'Hispanic or Latino',
    'White',
    'Other',''
    ],
    'race_values':[1,2,3,4,5,0]
})
race_scale
#race_scale.to_csv('race_scale.csv',index=False)

#income numeric legend
income_scale = pd.DataFrame({
    'income':['','Less than $25,000',
    '$25,000 to $34,999',
    '$35,000 to $49,999',
    '$50,000 to $74,999',
    '$75,000 to $99,999',
    '$100,000 to $149,999',
    '$150,000 to $199,999',
    '$200,000 or more'
    ], 'income_values':[0,1,2,3,4,5,6,7,8]})
income_scale
#income_scale.to_csv('income_scale.csv',index=False)

# days since last haircut numeric legend
days_last_scale = pd.DataFrame({
    'days_last_haircut':['','1 week',
    '2 weeks',
    '3 weeks',
    '1 month',
    '2 months ago',
    '6 weeks',
    '3+ months',
    'other'
    ],'days_last_values':[0,1,2,3,4,5,6,7,8]
})
days_last_scale
#days_last_scale.to_csv('days_last_scale.csv',index=False)


# time between haircuts numeric legend
time_between_scale = pd.DataFrame({
    'time_between_haircut':['','1 week',
    '2 weeks',
    '3 weeks',
    '1 month',
    '2 months ago',
    '6 weeks',
    '3+ months',
    'other'
    ],'time_between_values':[0,1,2,3,4,5,6,7,8]
})
time_between_scale
#time_between_scale.to_csv('time_between_scale.csv',index=False)


# purchased products numeric legend
products_scale = pd.DataFrame({
    'products':['Yes','No'],
    'products_values':[1,0]
})
products_scale
#products_scale.to_csv('products_scale.csv',index=False)


# how much spent numeric scale
spend_scale = pd.DataFrame({
    'did_spend':['','10',
    '11 - 20',
    '21 - 30',
    '31 - 40',
    '40 +',
    'other',
    ],
    'spend_values':[0,1,2,3,4,5,6]
})
spend_scale
#spend_scale.to_csv('spend_scale.csv',index=False)


# max spend numeric scale
max_spend_scale = pd.DataFrame({
    'max_spend':['','10',
    '11 - 20',
    '21 - 30',
    '31 - 40',
    '41 - 50',
    '51 - 60',
    '61 - 70',
    'other'
    ],
    'max_spend_values':[0,1,2,3,4,5,6,7,8]
})
max_spend_scale
#max_spend_scale.to_csv('max_spend_scale.csv',index=False)


# how find shop numeric scale
how_find_scale = pd.DataFrame({
    'how_find':['','Walk in',
    'Referral',
    'Yelp',
    'Google',
    'Facebook',
    'other'
               ],
    'how_find_values':[0,1,2,3,4,5,6]
})
how_find_scale
#how_find_scale.to_csv('how_find_scale.csv',index=False)


# leave a review numeric scale
review_scale = pd.DataFrame({
    'leave_review':['Yes','No','Sometimes'],
    'review_values':[1,2,3]
})
#review_scale.to_csv('review_scale.csv',index=False)

gender_scale

race_scale

income_scale

days_last_scale

time_between_scale

products_scale

spend_scale

max_spend_scale

how_find_scale

review_scale

def numeric_survey(csv):
    
    surveyimport = csv
    # resave dataframe with relative columns
    googledf = surveyimport[['What is your work zip code?','What is your home zip code?','Gender','Age','Race','Income','How long since your last haircut?','How long between typical haircuts?','Did you buy any products after your haircut?','How much did you spend on your haircut?','What is the most you would spend on a haircut?','How did you find your current barber?','Do you leave reviews on Yelp, Google, Etc? ','How important is Price ( 1 = not important, 5 = very important)','How important is Convenience( 1 = not important, 5 = very important)','How important is Atmosphere( 1 = not important, 5 = very important)','How important are Additional Services ( 1 = not important, 5 = very important)']]
    
    # rename columns to match numeric scale legends keys
    googledf.columns = ['work_zip','home_zip','gender','age','race','income','days_last_haircut','time_between_haircut','products','did_spend','max_spend','how_find','leave_review','price','convenient','atmosphere','amenities']
    
    # merge legend tables with current dataframe to have all items in numeric terms for prediction model
    legendDF = googledf.merge(gender_scale, on='gender',how='left')
    legendDF2 = legendDF.merge(race_scale, on='race',how='left')
    legendDF3 = legendDF2.merge(income_scale, on='income', how='left')
    legendDF4 = legendDF3.merge(days_last_scale, on='days_last_haircut',how='left')
    legendDF5 = legendDF4.merge(time_between_scale, on='time_between_haircut',how='left')
    legendDF6 = legendDF5.merge(products_scale, on='products',how='left')
    legendDF7 = legendDF6.merge(spend_scale, on='did_spend',how='left')
    legendDF8 = legendDF7.merge(max_spend_scale, on='max_spend',how='left')
    legendDF9 = legendDF8.merge(how_find_scale, on='how_find',how='left')
    legendDF_final = legendDF9.merge(review_scale, on='leave_review',how='left')
    
    # create dataframe with numeric scale values
    legendDF_values = legendDF_final[['work_zip','home_zip','gender_values','age','race_values','income_values','days_last_values','time_between_values','products_values','spend_values','max_spend_values','how_find_values','review_values','price','convenient','atmosphere','amenities']]
    
    # fill nan values with 0 for the scale/legend on non-legend responses
    surveyFinal = legendDF_values.fillna(0)
    
    # import time dependency fro today's date timestamp
    import datetime as dt
    date = dt.datetime.today().strftime("%m%d%Y")

    surveyFinal.to_csv(f'../data/survey{date}.csv', index=False)
    return surveyFinal.head()

numeric_survey(googlecsv)

	work_zip	home_zip	gender_values	age	race_values	income_values	days_last_values	time_between_values	products_values	spend_values	max_spend_values	how_find_values	review_values	price	convenient	atmosphere	amenities
0	92614	92614	1	38.0	1.0	1.0	1.0	3.0	0.0	5.0	5.0	3.0	1.0	4	4	4	1
1	92660	92677	1	34.0	4.0	5.0	6.0	7.0	0.0	5.0	7.0	2.0	2.0	5	3	3	1
2	92612	92602	0	35.0	4.0	4.0	0.0	7.0	1.0	5.0	0.0	3.0	2.0	5	5	4	4
3	92620	92780	1	35.0	1.0	4.0	1.0	3.0	1.0	5.0	6.0	3.0	3.0	3	5	4	4
4	97205	97205	1	38.0	4.0	4.0	1.0	6.0	0.0	5.0	5.0	4.0	3.0	4	2	3	2

	race	race_values
0	Asian / Pacific Islander	1
1	Black or African American	2
2	Hispanic or Latino	3
3	White	4
4	Other	5
5		0

	income	income_values
0		0
1	Less than $25,000	1
2	$25,000 to $34,999	2
3	$35,000 to $49,999	3
4	$50,000 to $74,999	4
5	$75,000 to $99,999	5
6	$100,000 to $149,999	6
7	$150,000 to $199,999	7
8	$200,000 or more	8

	days_last_haircut	days_last_values
0		0
1	1 week	1
2	2 weeks	2
3	3 weeks	3
4	1 month	4
5	2 months ago	5
6	6 weeks	6
7	3+ months	7
8	other	8

	max_spend	max_spend_values
0		0
1	10	1
2	11 - 20	2
3	21 - 30	3
4	31 - 40	4
5	41 - 50	5
6	51 - 60	6
7	61 - 70	7
8	other	8

	how_find	how_find_values
0		0
1	Walk in	1
2	Referral	2
3	Yelp	3
4	Google	4
5	Facebook	5
6	other	6

	leave_review	review_values
0	Yes	1
1	No	2
2	Sometimes	3