R语言数据分析报告美国天气事件对人员伤亡和经济损失的影响 附代码数据Word文件下载.docx
- 文档编号:19411868
- 上传时间:2023-01-06
- 格式:DOCX
- 页数:15
- 大小:150.44KB
R语言数据分析报告美国天气事件对人员伤亡和经济损失的影响 附代码数据Word文件下载.docx
《R语言数据分析报告美国天气事件对人员伤亡和经济损失的影响 附代码数据Word文件下载.docx》由会员分享,可在线阅读,更多相关《R语言数据分析报告美国天气事件对人员伤亡和经济损失的影响 附代码数据Word文件下载.docx(15页珍藏版)》请在冰豆网上搜索。
library(dplyr)
library(ggplot2)
library(lubridate)
library(knitr)
#----------------------ConstantsDefinition------------------------#
RECENCY_SPAN_IN_YEARS<
-10#LastXyearsTopEventsbyfrequency,bygeographicarea
C_NOT_DEFINED_STR<
-'
NOTDEFINED'
C_NOT_DEFINED_INT<
--1
#----------------------阶段1:
加载源数据------------------------#
setwd('
/Users/prosales/Documents/Capacitaciones/Certificaciones/CourseraDSCertificate-Course5-ReproducibleResearch/FinalProject/'
)
natural_events_df<
-read.csv('
repdata%2Fdata%2FStormData.csv.bz2'
state_geocodes_df<
state-geocodes-v2015.csv'
##----------------------阶段2:
数据准备:
增强和重组----------------------------#
regions_df<
-state_geocodes_df%>
%filter(division==0&
state_fips==0)%>
%select(region,name)
colnames(regions_df)<
-c('
region_id'
'
region_name'
divisions_df<
%filter(division!
=0&
%select(division,name)
colnames(divisions_df)<
division_id'
division_name'
states_df<
%filter(state_fips!
=0)%>
%select(region,division,state_fips,name)
colnames(states_df)<
state_id'
state_name'
complete_geography_df<
-merge(states_df,regions_df,by='
-merge(complete_geography_df,divisions_df,by='
-complete_geography_df%>
%select('
geography_structured_events_df<
-merge(natural_events_df,complete_geography_df,by.x='
STATE__'
by.y='
all.x=TRUE)
-geography_structured_events_df%>
%mutate(region_name=as.character(region_name))
%mutate(division_name=as.character(division_name))
%mutate(state_name=as.character(state_name))
%mutate(region_name=replace(region_name,is.na(region_name),C_NOT_DEFINED_STR))
%mutate(division_name=replace(division_name,is.na(division_name),C_NOT_DEFINED_STR))
%mutate(state_name=replace(state_name,is.na(state_name),C_NOT_DEFINED_STR))
%mutate(region_id=replace(region_id,is.na(region_id),C_NOT_DEFINED_INT))
%mutate(division_id=replace(division_id,is.na(division_id),C_NOT_DEFINED_INT))
%mutate(BGN_DATE=as.Date(BGN_DATE,format='
%m/%d/%Y'
))
##----------------------第三阶段:
按地理区域划分的频率,历史最高事件----------------------#
events_frequency_by_geography_df<
%count(region_name,state_name,EVTYPE)
top_events_by_geography_df<
-events_frequency_by_geography_df%>
%group_by(region_name,state_name)%>
%mutate(my_rank=rank(desc(n)))%>
%filter(my_rank<
=3)
-top_events_by_geography_df[with(top_events_by_geography_df,order(region_name,state_name,my_rank)),]
max_dates_by_geography_df<
%filter(!
is.na(BGN_DATE))%>
%filter(is.Date(BGN_DATE))%>
%summarise(max_date=max(BGN_DATE))%>
%mutate(event_date_lower_bound=max_date-years(RECENCY_SPAN_IN_YEARS))
last_X_years_events_by_geography_df<
-merge(geography_structured_events_df,max_dates_by_geography_df,by.x=c('
),by.y=c('
))%>
%filter(BGN_DATE>
event_date_lower_bound)%>
%select(region_name,state_name,EVTYPE,BGN_DATE,event_date_lower_bound)
last_X_years_events_frequency_by_geography_df<
-last_X_years_events_by_geography_df%>
top_events_in_last_X_years_events_frequency_by_geography_df<
-last_X_years_events_frequency_by_geography_df%>
-top_events_in_last_X_years_events_frequency_by_geography_df[with(top_events_in_last_X_years_events_frequency_by_geography_df,order(region_name,state_name,my_rank)),]
##----------------------第四阶段:
地理区域致命事件----------------------#
fatalities_by_event_type_by_geography_df<
is.na(FATALITIES))%>
%group_by(region_name,state_name,EVTYPE)%>
%summarise(sum(FATALITIES))
colnames(fatalities_by_event_type_by_geography_df)<
EVTYPE'
fatalities_sum'
top_deadliest_events_types_by_geography_df<
-fatalities_by_event_type_by_geography_df%>
%mutate(my_rank=rank(desc(fatalities_sum)))%>
-top_deadliest_events_types_by_geography_df[with(top_deadliest_events_types_by_geography_df,order(region_name,state_name,my_rank)),]
#-#----------------------阶段5:
按地理区域造成大部分财产损失的事件类型----------------------#---------------------Stage5:
Eventstypesthatcausemostpropertylossesbygeographicarea----------------------#
property_losses_by_event_type_by_geography_df<
is.na(PROPDMG))%>
%summarise(sum(PROPDMG))
colnames(property_losses_by_event_type_by_geography_df)<
property_losses_sum'
top_property_costly_events_types_by_geography_df<
-property_losses_by_event_type_by_geography_df%>
%mutate(my_rank=rank(desc(property_losses_sum)))%>
-top_property_costly_events_types_by_geography_df[with(top_property_costly_events_types_by_geography_df,order(region_name,state_name,my_rank)),]
-top_property_costly_events_types_by_geography_df%>
%mutate(property_losses_sum=property_losses_sum/1000)
#----------------------Stage6:
Eventstypesthatcausemostcroplossesbygeographicarea----------------------#
crop_losses_by_event_type_by_geography_df<
is.na(CROPDMG))%>
%summarise(sum(CROPDMG))
colnames(crop_losses_by_event_type_by_geography_df)<
crop_losses_sum'
top_crop_costly_events_types_by_geography_df<
-crop_losses_by_event_type_by_geography_df%>
%mutate(my_rank=rank(desc(crop_losses_sum)))%>
-top_crop_costly_events_types_by_geography_df[with(top_crop_costly_events_types_by_geography_df,order(region_name,state_name,my_rank)),]
-top_crop_costly_events_types_by_geography_df%>
%mutate(crop_losses_sum=crop_losses_sum/1000)
#----------------------Stage7:
Eventsoccurencebygeographicareabymonth,duringthelastXyearsrecorded----------------------#
last_X_years_events_frequency_by_month_by_geography_df<
-mutate(last_X_years_events_by_geography_df,event_month=month(BGN_DATE))%>
%count(region_name,state_name,EVTYPE,event_month)
top_events_in_last_X_years_by_month_by_geography_df<
-merge(last_X_years_events_frequency_by_month_by_geography_df,top_events_in_last_X_years_events_frequency_by_geography_df,by.x=c('
))
-top_events_in_last_X_years_by_month_by_geography_df%>
%select(region_name.y,state_name,EVTYPE,event_month,n.x)
colnames(top_events_in_last_X_years_by_month_by_geography_df)<
event_month'
n'
#----------------------Stage8:
Deadliesteventsbygeographicareabymonth----------------------#
fatalities_by_geography_by_event_type_by_month_df<
%mutate(event_month=month(BGN_DATE))%>
%group_by(region_name,state_name,EVTYPE,event_month)%>
colnames(fatalities_by_geography_by_event_type_by_month_df)<
top_fatalities_by_geography_by_event_type_by_month_df<
-merge(fatalities_by_geography_by_event_type_by_month_df,top_deadliest_events_types_by_geography_df,by.x=c('
-top_fatalities_by_geography_by_event_type_by_month_df%>
region_name.x'
fatalities_sum.x'
colnames(top_fatalities_by_geography_by_event_type_by_month_df)<
'
#----------------------Stage9:
EventstypesthatcausemostPROPERTYlossesbygeographicarea,bymonth----------------------#
property_losses_by_geography_by_event_type_by_month_df<
colnames(property_losses_by_geography_by_event_type_by_month_df)<
region_name
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- R语言数据分析报告美国天气事件对人员伤亡和经济损失的影响 附代码数据 语言 数据 分析 报告 美国 天气 事件 人员伤亡 经济损失 影响 代码