# R scripts from paper # Baseball Data at Season, Play-by-Play, and Pitch-by-Pitch # Levels by Jim Albert # Section 2 Lahman Baseball Database # Section 2.1 Description batting=read.table("mlb_batting.dat", header=TRUE) mg=subset(batting, batting$first.name=="Mark"&batting$last.name=="McGwire") mg[,c("year","ab","h","hr")] # Section 2.2 A Sample Analysis hits.ab=aggregate(batting[,c("h","ab")], list(year=batting$year),sum,na.rm=TRUE) avg=hits.ab$h/hits.ab$ab plot(hits.ab$year, avg, xlab="Year", ylab="Batting Average") lines(lowess(hits.ab$year, avg, f = 1/8)) so.ab=aggregate(batting[,c("so","ab")], list(year=batting$year),sum,na.rm=TRUE) so.rate=so.ab$so/so.ab$ab plot(so.ab$year, so.rate, xlab="Year", ylab="Strikeout Rate") lines(lowess(so.ab$year, so.rate, f=1/8)) # Section 3 Retrosheet Data # Section 3.1 Description pbp=read.table("playbyplay2008.dat", header=TRUE) roster=read.table("roster2008.dat", header=TRUE) howard.hr=subset(pbp,pbp$date=="5100"&pbp$v_team=="PHI"& pbp$batter=="howar001"&pbp$inning==2) howard.hr[,c("inning","outs","balls","strikes", "b1_runner","b2_runner","b3_runner","event")] # Section 3.2 A Sample Analysis with(roster, abbrev[first.name=="Derek"&last.name=="Jeter"]) jeter=subset(pbp, pbp$batter=="jeted001"& pbp$ab_flag==TRUE) y=ifelse(jeter$hit_value>0,1,0) m.avg=filter(y, rep(1/20,20)) plot(m.avg,type="l",ylab="Moving Batting Average") # Section 4 PITCHf/x Database # Section 4.1 Description pitchdata=read.table("pitchfx.dat", header=TRUE) halladay1=subset(pitchdata,pitchdata$pitcher=="halladay"& pitchdata$game==1&pitchdata$num==1) halladay1[,c("des", "des2","pitch_type","count","new.count")] # Section 4.2 A Sample Analysis tim=subset(pitchdata, pitchdata$pitcher=="lincecum") table(tim$pitch_type) with(subset(tim, tim$new.count.type=="PA event"), table(brief_event)) ########### figure showing movement of three type of pitches palette(gray(seq(0,.9,len=25))) scale.speed=function(x) 25-25*(x-53)/(93-53) tim.FF=subset(tim,tim$pitch_type=="FF") tim.CH=subset(tim,tim$pitch_type=="CH") tim.CU=subset(tim,tim$pitch_type=="CU") with(tim.FF,plot(pfx_x,pfx_z, xlim=c(-13,13),ylim=c(-15,18), col=scale.speed(end_speed),pch=1, xlab="Horizontal Break",ylab="Vertical Break")) with(tim.CU,points(pfx_x,pfx_z, col=scale.speed(end_speed),pch=2)) with(tim.CH,points(pfx_x,pfx_z, col=scale.speed(end_speed),pch=3)) text(c(-5,-12,-12),c(-7,2,15), c("Curveball","Changeup","Fastball")) ######## figure showing location of fastballs and curveballs library(KernSmooth) plot2D=function(loc.pitches,...) { est=bkde2D(loc.pitches,bandwidth=c(0.3,0.3)) contour(est$x1, est$x2, est$fhat, xlim=c(-2,2),ylim=c(0,5), levels=seq(.05,.3,by=.05), xlab="Horizontal Location", ylab="Vertical Location",...) lines(c(-1,1,1,-1,-1), c(1.67,1.67,3.5,3.5,1.67),lwd=3) } tim.FF.R=subset(tim.FF,tim.FF$stand=="R") plot2D(with(tim.FF.R,cbind(px,pz))) tim.CU.R=subset(tim.CU,tim.CU$stand=="R") plot2D(with(tim.CU.R,cbind(px,pz))) ######################################################