R软件期末考试复习提纲教学总结.docx
- 文档编号:30499582
- 上传时间:2023-08-16
- 格式:DOCX
- 页数:16
- 大小:17.55KB
R软件期末考试复习提纲教学总结.docx
《R软件期末考试复习提纲教学总结.docx》由会员分享,可在线阅读,更多相关《R软件期末考试复习提纲教学总结.docx(16页珍藏版)》请在冰豆网上搜索。
R软件期末考试复习提纲教学总结
#期末考试专项复习
#一、矩阵与数据框
#1.生成特定的矩阵与数据框
#矩阵
#方法一
a=array(1:
10,dim=c(2,5))
rownames(a)=1:
2
colnames(a)=c("one","two","three","four","five")
a
dimnames(a)=list(1:
2,c("one","two","three","four","five"))
nrow=nrow(a)
ncol=ncol(a)
dim(a)
#方法二
a=matrix(1:
10,nrow=2,byrow=F)
rownames(a)=1:
2
colnames(a)=c("one","two","three","four","five")
a=matrix(1:
10,nrow=2,byrow=F,
dimnames=list(1:
2,c("one","two","three","four","five"))
)
#数据框的生成
df=data.frame(
Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5)
);df
Lst=list(Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5))
Lst
Lst[["Name"]]
Lst["Name"]
Lst[1]
Lst[[1]]
Lst$Name
df=as.data.frame(Lst)
df
x=array(1:
6,dim=c(2,3))
as.data.frame(x)
#数据框的引用
df[1:
2,3:
5]
df[["Height"]]
df$Weight
names(df)#此属性一定非空
rownames(df)=c("one","two","three","four","five")
df
attach(df)
r=Height/Weight
r
df$r=r
names(df)
detach()
r=Height/Weight
#2.矩阵的运算
a=diag(1:
3)
a[2][1]=1
a
#1转置运算
t(a)
#2行列式
det(a)
#3向量内积
x=1:
5
y=2*1:
5
x%*%y
t(x)%*%y
crossprod(x,y)
#4向量的外积
x%*%t(y)
tcrossprod(x,y)
outer(x,y)
x%o%y
#矩阵的乘法
a=array(1:
9,dim=c(3,3))
b=array(9:
1,dim=c(3,3))
x=1:
3
a*b
a%*%b
x%*%a%*%x
crossprod(a,b)#t(a)%*%b
tcrossprod(a,b)#a%*%t(b)
#矩阵的逆
solve(a)
b=1:
3
solve(a,b)#ax=b的解
#矩阵的特征值与特征向量
sm=eigen(a)
sm
e=diag(1:
3)
svde=svd(e)
svde
attach(svde)
u%*%diag(d)%*%t(v)
#与矩阵运算有关的函数
#取维数
a=diag(1:
4)
nrow(a)
ncol(a)
#矩阵的合并
x1=rbind(c(1,2),c(3,4))
x2=x1+10
x3=cbind(x1,x2)
x3
x4=rbind(x1,x2)
x4
cbind(1,x1)
#矩阵的拉直
a=matrix(1:
6,ncol=2,
dimnames=list(c("one","two","three"),
c("first","second")),byrow=T)
as.vector(a)
#apply函数
apply(a,1,mean)
apply(a,2,sum)
tapply(1:
5,factor(c("f","f","m","m","m")),mean)
#第二题
#产生随机数
x=rnorm(100,0,1)
x
#画随机数的直方图
hist(x,freq=F)
#核密度曲线
density(x)
lines(density(x),col="blue")
#添加正态分布分布函数
y=seq(-4,3,0.2)
lines(y,dnorm(y,mean(x),sd(x)),col="red")
#画随机数的经验分布函数
z=rnorm(50,0,1)
plot(ecdf(z),do.p=F,verticals=T)
d=seq(-3,2,0.2)
lines(d,pnorm(d,mean(z),sd(z)),col="red")
y=rpois(100,2)
plot(ecdf(y),col="red",verticals=T,do.p=F)
x=0:
8
lines(x,ppois(x,mean(y)),col="blue")
w=c(75,64,47.4,66.9,62.2,62.2,58.7,63.5,66.6,64.0,57.0,69.0
56.9,50.0,72.0)
hist(w,freq=F)
lines(density(w),col="blue")
x=44:
76
lines(x,dnorm(x,mean(w),sd(w)),col="red")
plot(ecdf(w),do.p=F,verticals=T)
lines(x,pnorm(x,mean(w),sd(w)),col="red")
#编写函数求随机数的各种描述统计量
data_outline=function(x){
n=length(x)
m=mean(x)
v=var(x)
s=sd(x)
me=median(x)
cv=100*s/m
css=sum((x-m)^2)
uss=sum(x^2)
R=max(x)-min(x)#样本极差
R1=quantile(x,3/4)-quantile(x,1/4)#四分位差
sm=s/sqrt(n)#样本标准误
g1=n/(n-1)/(n-2)*sum((x-m)^3)/s^3
g2=n*(n+1)/(n-1)/(n-2)/(n-3)*sum((x-m)^4)/s^4
-3*(n-1)^2/(n-2)/(n-3)
data.frame(N=n,Mean=m,Var=v,std_dev=s,
Median=me,std_mean=sm,CV=cv,CSS=css,USS=uss,
R=R,R1=R1,Skewness=g1,Kurtosis=g2,row.names=1)
}
x=rnorm(100)
data_outline(x)
#第三题
#r,p,q,d
rnorm(100,0,1)
pnorm(1:
5,0,1)
dnorm(-3:
3,0,1)
qnorm(seq(0,1,0.25),0,1)
rbeta(100,2,2)
rbinom(100,100,0.5)
pbinom(1:
100,100,0.5)
dbinom(1:
5,100,0.5)
qbinom(seq(0,1,0.1),100,0.5)
rchisq(100,1)
qchisq(seq(0,1,0.2),10)
pchisq(1:
10,10)
dchisq(1:
10,10)
rexp(100,0.5)
rpois(100,2)
ppois(1:
1000,2)
dpois(1:
100,2)
runif(100,0,1)
qunif(c(0,0.2,0.8),0,1)
punif(seq(0,1,0.2),0,1)
dunif(seq(0,1,0.01),0,1)
rt(100,2)
qt(0.8,2)
pt(-3:
3,2)
dt(-3:
3,2)
rf(100,1,2)
qf(0.8,1,2)
#四置信区间
#1
#
(1)sigma已知
interval_estimate1=function(x,side=0,sigma=1,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=xb-tmp;b=Inf}
else{
tmp=sigma/sqrt(n)*qnorm(1-alpha/2)
a=xb-tmp;b=xb+tmp}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,4)
interval_estimate1(x,sigma=4,side=0)
interval_estimate1(x,sigma=4,side=-1)
interval_estimate1(x,sigma=4,side=1)
#
(2)sigma未知
interval_estimate2=function(x,side=0,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=xb-tmp;b=Inf
}
else{
tmp=sd(x)/sqrt(n)*qt(1-alpha/2,n-1)
a=xb-tmp;b=xb+tmp
}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,1)
interval_estimate2(x,side=-1)
interval_estimate2(x,side=0)
interval_estimate2(x,side=1)
t.test(x,side=-1)
t.test(x,side=0)
t.test(x,side=1)
#两个总体sigma1=sigma2但未知
interval_estimate3=function(x,y,alpha=0.05){
xb=mean(x);yb=mean(y)
n1=length(x);n2=length(y)
sw=((n1-1)*var(x)+(n2-1)*var(y))/(n1+n1-2)
tmp=sqrt((1/n1+1/n2)*sw)*qt(1-alpha/2,n1+n2-2)
a=xb-yb-tmp;b=xb-yb+tmp
data.frame(mean=xb-yb,a=a,b=b)
}
x=rnorm(100,0,1)
y=rnorm(100,1,1)
interval_estimate3(x,y)
t.test(x,y)
-0.03643479-0.98699097
#第五题假设检验
#
(1)sigam已知,双侧,检验mu=mu0
mean.test1=function(x,mu=0,sigma=1){
xb=mean(x);n=length(x)
z=(xb-mu)/sigma*sqrt(n)
p=pnorm(z)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb,Z=z,p_value=P)
}
x=rnorm(100,0,2)
mean.test1(x,mu=0,sigma=2)
#
(2)sigma未知,双侧,检验mu=mu0
mean.test2=function(x,mu=0){
xb=mean(x);n=length(x)
z=(xb-mu)/sd(x)*sqrt(n)
p=pt(z,n-1)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb,Z=z,p_value=P)
}
x=rnorm(100)
mean.test2(x,mu=0)
t.test(x,mu=0,alt="two.side")
#两个总体sigma1=sigma2但未知,检验mu1=mu2
mean.test3=function(x,y,mu=0){
xb=mean(x);yb=mean(y)
n1=length(x);n2=length(y)
sw=((n1-1)*var(x)+(n2-1)*var(y))/(n1+n2-2)
t=(xb-yb-mu)/sqrt(sw*(1/n1+1/n2))
p=pt(t,n1+n2-1)
if(p<=1/2)
P=2*p
else
P=2*(1-p)
data.frame(mean=xb-yb,T=t,p_value=P)
}
x=rnorm(100,0,1)
y=rnorm(100,2,1)
mean.test3(x,y,mu=-2)
t.test(x,y,var.equal=T,mu=-2)
x=rnorm(100,0,1)
y=rnorm(100,0,2)
mean.test3(x,y)
t.test(x,y,var.equal=T)
#第六题调用R函数
#k-s检验两组数是否同分布
x=rnorm(100,0,1)
y=rt(100,5)
z=rnorm(100,0,1)
ks.test(x,y),alt="l"
ks.test(x,z)
#检验一组数是否服从已知分布
ks.test(x,"pnorm",0,2)
ks.test(x,"pt",1)
#符号检验两组数是否有差异
x=rbinom(100,100,0.5)
binom.test(sum(x>=50),100)
y=rbinom(100,100,0.4)
binom.test(sum(x #wilcoxon符号秩和检验(精确或大样本近似) #wilcox.test(x,y,alt,mu,paired=F,exact=NULL,correct=T,conf.int=F, conf.level=0.95) r=runif(100,136,145) wilcox.test(r,mu=140,alt="l",exact=F,conf.int=T,correct=F) x=rnorm(100) y=rnorm(100) wilcox.test(x,y,paired=T,alt="g") wilcox.test(x-y,alt="g") binom.test(sum(x>y),length(x),alt="g") #第七题 #相关性检验 x=1: 6 y=6: 1 z=2: 7 cor.test(x,y,alt="g",method="spearman") cor.test(x,z,alt="g",method="spearman") #无节点 x=c(2,3,1,4,5,8,6) y=1: 7 cor.test(x,y,alt="g",method="spearman",correct=T) n=length(x) r=rank(x) r R=rank(y) R s=sum((r-R)^2) rho=1-6*s/n/(n^2-1) rho #有节点 x=c(2,3,4,4,5,8,6) y=1: 7 cor.test(x,y,alt="g",method="spearman",correct=T)exact=F, n=length(x) r=rank(x) r R=rank(y) R sxy=sum((r*R)) sx=sum(r^2) sy=sum(y^2) t=n*((n+1)/2)^2 rho=(sxy-t)/sqrt(sx-t)/sqrt(sy-t) rho #第八题回归 x=c(seq(0.1,0.18,0.01),0.20,0.21,0.23) y=c(42,43.5,45,45.5,45,47.5,49,53,50,55,55,60) #散点图 plot(x,y) #做回归 lm.sol=lm(y~x) lm.sol=lm(y~1+x) #汇总统计量 summary(lm.sol) #画回归线 abline(lm.sol) #求回归系数的区间估计 beta.int=function(lm.sol,alpha=0.05){ A=summary(lm.sol)$coefficients df=lm.sol$df.residual left=A[,1]-A[,2]*qt(1-alpha/2,df) right=A[,1]+A[,2]*qt(1-alpha/2,df) rowname=dimnames(A)[[1]]#列表的第一个元素 colname=c("estimate","left","right") matrix(c(A[,1],left,right),ncol=3,dimnames=list(rowname,colname)) } beta.int(lm.sol) #对新的自变量求因变量的预测值及预测区间 new=data.frame(x=c(0.16,0.19,1.20)) lm.predict=predict(lm.sol,new) lm.predict lm.predict=predict(lm.sol,new,interval="confidence",level=0.95) lm.predict=predict(lm.sol,new,interval="prediction",level=0.95) #残差图 resid=lm.sol$residuals plot(resid) y.res=resid(lm.sol) y.fit=predict(lm.sol) plot(y.res~y.fit) plot(y.res~x) plot(lm.sol,1) plot(lm.sol,2) plot(lm.sol,3) plot(lm.sol,4)
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 软件 期末考试 复习 提纲 教学 总结