Predictive Model
Based on results of the survey – we used a GLM to see what factors determined a client’s price point. Our initial analysis suggests using 3 factors:
- Importance of price
- Importance of atmosphere
- Method of finding your salon.
# read in the csv
data <- read.csv("Survey.csv")
data <- subset(data,gender == "Male")
#price.importance
data2 <- data
boxplot(most ~ price.importance, data=data)
#Make 3 groups: 1-3: "Low" ; 4: "Medium" ; 5: "High"
data2$price.importance2 <- ifelse(data2$price.importance < 4, "Low",
ifelse(data2$price.importance==4,"Medium","High"))
boxplot(most ~ price.importance2, data=data2)
#Make 2 groups: 1-4: "Low" ; 5: "High"
data2$atmosphere.importance2 <- ifelse(data2$atmosphere.importance < 5, "Low",
ifelse(data2$atmosphere.importance==5,"High","Error"))
data2$find3 <- ifelse(data2$find2 == "Google", "other",
ifelse(data2$find2 == "Referral", "other",
ifelse(data2$find2 == "other","other",
ifelse(data2$find2 == "Walk in", "Walk in",
ifelse(data2$find2 == "Yelp","Yelp","Fucked Up")))))
summary(model)
Call:
glm(formula = most ~ price.importance2 + atmosphere.importance2 +
find3, family = poisson(link = "log"), data = data2)
Deviance Residuals:
Min 1Q Median 3Q Max
-6.091 -2.247 -0.041 1.824 8.374
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.69787 0.06002 61.610 < 2e-16 ***
price.importance2Low 1.00662 0.05746 17.520 < 2e-16 ***
price.importance2Medium 0.51592 0.05746 8.979 < 2e-16 ***
atmosphere.importance2Low -0.77750 0.05447 -14.274 < 2e-16 ***
find3Walk in -0.27006 0.04883 -5.530 3.20e-08 ***
find3Yelp -0.20597 0.04984 -4.132 3.59e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1054.81 on 79 degrees of freedom
Residual deviance: 568.42 on 74 degrees of freedom
AIC: 989.37
Number of Fisher Scoring iterations: 5
# Summary shows everything is statistically significant (***)
library(ggplot2)
library(GGally)
data3 <- data2
data3$predicted <- predict(model, type = "response", scientific = FALSE)
# Lets do an xy scatterplot comparing actual and predicted. A good model should show most of the points on the x=y (actual = predicted) line
g <- ggplot(data3, aes(x=most, y=predicted))
LS0tCnRpdGxlOiAiQmFyYmVyIFNob3AgQ2FzZSBTdHVkeSAtIEdMTSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKKioqCiMjIFByZWRpY3RpdmUgTW9kZWwKCkJhc2VkIG9uIHJlc3VsdHMgb2YgdGhlIHN1cnZleSDigJMgd2UgdXNlZCBhICoqR0xNKiogdG8gc2VlIHdoYXQgZmFjdG9ycyBkZXRlcm1pbmVkIGEgY2xpZW504oCZcyBwcmljZSBwb2ludC4gIE91ciBpbml0aWFsIGFuYWx5c2lzIHN1Z2dlc3RzIHVzaW5nIDMgZmFjdG9yczogIAoKMS4gSW1wb3J0YW5jZSBvZiBwcmljZQoyLiBJbXBvcnRhbmNlIG9mIGF0bW9zcGhlcmUKMy4gTWV0aG9kIG9mIGZpbmRpbmcgeW91ciBzYWxvbi4KICAgIAoKCmBgYHtyfQojIHJlYWQgaW4gdGhlIGNzdgpkYXRhIDwtIHJlYWQuY3N2KCJTdXJ2ZXkuY3N2IikKZGF0YSA8LSBzdWJzZXQoZGF0YSxnZW5kZXIgPT0gIk1hbGUiKQpgYGAKCgpgYGB7cn0KI3ByaWNlLmltcG9ydGFuY2UKZGF0YTIgPC0gZGF0YQpib3hwbG90KG1vc3QgfiBwcmljZS5pbXBvcnRhbmNlLCBkYXRhPWRhdGEpCmBgYApgYGB7cn0KI01ha2UgMyBncm91cHM6ICAxLTM6ICJMb3ciIDsgNDogIk1lZGl1bSIgOyA1OiAiSGlnaCIKZGF0YTIkcHJpY2UuaW1wb3J0YW5jZTIgPC0gaWZlbHNlKGRhdGEyJHByaWNlLmltcG9ydGFuY2UgPCA0LCAiTG93IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZShkYXRhMiRwcmljZS5pbXBvcnRhbmNlPT00LCJNZWRpdW0iLCJIaWdoIikpCmBgYAoKYGBge3J9CmJveHBsb3QobW9zdCB+IHByaWNlLmltcG9ydGFuY2UyLCBkYXRhPWRhdGEyKQpgYGAKYGBge3J9CmJveHBsb3QobW9zdCB+IGF0bW9zcGhlcmUuaW1wb3J0YW5jZSwgZGF0YT1kYXRhKQpgYGAKYGBge3J9CiNNYWtlIDIgZ3JvdXBzOiAxLTQ6ICJMb3ciIDsgNTogIkhpZ2giCmRhdGEyJGF0bW9zcGhlcmUuaW1wb3J0YW5jZTIgPC0gaWZlbHNlKGRhdGEyJGF0bW9zcGhlcmUuaW1wb3J0YW5jZSA8IDUsICJMb3ciLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UoZGF0YTIkYXRtb3NwaGVyZS5pbXBvcnRhbmNlPT01LCJIaWdoIiwiRXJyb3IiKSkKYGBgCgpgYGB7cn0KYm94cGxvdChtb3N0IH4gYXRtb3NwaGVyZS5pbXBvcnRhbmNlMiwgZGF0YT1kYXRhMikKYGBgCgpgYGB7cn0KYm94cGxvdChtb3N0IH4gZmluZDIsIGRhdGE9ZGF0YSkKYGBgCgpgYGB7cn0KZGF0YTIkZmluZDMgPC0gaWZlbHNlKGRhdGEyJGZpbmQyID09ICJHb29nbGUiLCAib3RoZXIiLAogICAgICAgICAgICAgICAgICAgICAgaWZlbHNlKGRhdGEyJGZpbmQyID09ICJSZWZlcnJhbCIsICJvdGhlciIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWZlbHNlKGRhdGEyJGZpbmQyID09ICJvdGhlciIsIm90aGVyIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWZlbHNlKGRhdGEyJGZpbmQyID09ICJXYWxrIGluIiwgIldhbGsgaW4iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWZlbHNlKGRhdGEyJGZpbmQyID09ICJZZWxwIiwiWWVscCIsIkZ1Y2tlZCBVcCIpKSkpKQpgYGAKCmBgYHtyfQpib3hwbG90KG1vc3QgfiBmaW5kMywgZGF0YSA9IGRhdGEyKQpgYGAKCmBgYHtyfQojR0xNIG1vZGVsIHdpdGggMyBmYWN0b3JzOgptb2RlbCA8LSBnbG0obW9zdCB+IHByaWNlLmltcG9ydGFuY2UyICsgYXRtb3NwaGVyZS5pbXBvcnRhbmNlMiArIGZpbmQzLCBmYW1pbHkgPSBwb2lzc29uKGxpbmsgPSAibG9nIiksIGRhdGEgPSBkYXRhMikKc3VtbWFyeShtb2RlbCkKYGBgCgpgYGB7cn0KIyBTdW1tYXJ5IHNob3dzIGV2ZXJ5dGhpbmcgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCAoKioqKQoKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KEdHYWxseSkKCmRhdGEzIDwtIGRhdGEyCmRhdGEzJHByZWRpY3RlZCA8LSBwcmVkaWN0KG1vZGVsLCB0eXBlID0gInJlc3BvbnNlIiwgc2NpZW50aWZpYyA9IEZBTFNFKQpgYGAKCmBgYHtyfQojIExldHMgZG8gYW4geHkgc2NhdHRlcnBsb3QgY29tcGFyaW5nIGFjdHVhbCBhbmQgcHJlZGljdGVkLiAgQSBnb29kIG1vZGVsIHNob3VsZCBzaG93IG1vc3Qgb2YgdGhlIHBvaW50cyBvbiB0aGUgeD15IChhY3R1YWwgPSBwcmVkaWN0ZWQpIGxpbmUKZyA8LSBnZ3Bsb3QoZGF0YTMsIGFlcyh4PW1vc3QsIHk9cHJlZGljdGVkKSkKYGBgCgoKCiMjIFByZWRpY3RpdmUgTW9kZWwgR29vZG5lc3Mgb2YgRml0CgoqIFhZIHNjYXR0ZXJwbG90OiBYID0gQWN0dWFsOyBZID0gUHJlZGljdGVkCiogQSBnb29kIG1vZGVsIHdpbGwgc2hvdyBwb2ludHMgb24gdGhlIHg9eSBsaW5lCiogU3RhcnRpbmcgdG8gc2VlIHRoaXMgc2hhcGUg4oCTIGJ1dCBub3Qgc3VwZXIgdGlnaHQKKiBMaW1pdGF0aW9ucyBvZiBzbWFsbGVyIGRhdGFzZXQKKiBJbXBvcnRhbmNlIG9mIHRoaXMgbW9kZWwgaXNu4oCZdCBuZWNlc3NhcmlseSBnZXR0aW5nIGEgcGVyZmVjdCBwcmVkaWN0aW9uIOKAkyBpdCBpcyB0aGUgaW5zaWdodHMgZnJvbSB0aGUgZmFjdG9ycyAoc2VlIGJlbG93KS4KCmBgYHtyfQpnICsgZ2VvbV9wb2ludChhZXMoY29sb3I9YWdlKSxzaXplID0gNSkKYGBgCioqKgo8Y2VudGVyPgoKICFbXShpbWcvcHJlZF9tb2RlbF9pbnQucG5nKSAKIAogPC9jZW50ZXI+CiAKIDxjZW50ZXI+CiAKICFbXShpbWcvcHJlZF9tb2RlbF9iaXoucG5nKSAKIAo8L2NlbnRlcj4=