Révision | 6d251fb28c86ef80ecea7ffcc452a676b4544509 |
---|---|
Taille | 10,265 octets |
l'heure | 2013-10-29 22:23:56 |
Auteur | Lorenzo Isella |
Message de Log | I finalized the code for the generation of the first part of the country fiche.
|
rm(list=ls())
library(reshape2)
#function to add a row in a specific position in a data frame
#see http://bit.ly/19Afqzy
insertRow <- function(existingDF, newrow, r) {
existingDF[seq(r+1,nrow(existingDF)+1),] <- existingDF[seq(r,nrow(existingDF)),]
existingDF[r,] <- newrow
existingDF
}
n_ind <- 17 # the number of the highest-labelled indicator
n_extra <- 2 # number of extra indicators calculated by Alexander
n_missing <- 0 #number of missing indicators
n_ind_seq <- seq(n_ind) #list of indicator files (with possible
## missing indicator numbers)
#remove the number for the missing indicator
## n_ind_seq <- n_ind_seq[-6]
sel_quarter <- "2012Q"
sel_year <- 2012
extra_year <- 2013 #sometimes I need a forecast
year_seq <- seq(2008,2013)
data_list <- list()
for (i in seq((n_ind-n_missing))){
fn <- paste("./script_data/",n_ind_seq[i],".csv",sep="")
print("fn is, ")
print(fn)
data_list[[i]] <- read.csv(fn,header=T)
data_list[[i]]$Value <-as.numeric(gsub(",","", data_list[[i]]$Value))
sel <-grep("Germany",data_list[[i]]$GEO)
levels(data_list[[i]]$GEO) <- c(levels(data_list[[i]]$GEO),
"Germany")
data_list[[i]]$GEO[sel] <- "Germany"
data_list[[i]]$GEO <- droplevels(data_list[[i]])$GEO
}
#Read the labels!
labels <- read.csv('./script_data/labels.csv',header=F)
#############################################
## #Some data manipulation on the population structure (second indicator)
## data_to_fix <- data_list[[2]]
## sel_14 <- which(data_to_fix$INDIC_DE=="Proportion of population aged 0-14 years")
## sel_24 <- which(data_to_fix$INDIC_DE=="Proportion of population aged 15-24 years")
############################################
country_list <- unique(as.character(data_list[[1]]$GEO))[-c(1)]
#Now start building the country list!
country_data <- list()
res <- c()
res_extra <- c()
for (j in seq(length(data_list))){
sel <- which(data_list[[j]]$TIME==sel_year)
## print("sel is, ")
## print(sel)
data_temp <- data_list[[j]][sel, ]
## res <- rbind(res,as.data.frame(cbind(data_temp$TIME,
## data_temp$GEO,
## data_temp$Value
## )))
res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))
if (j==5){
sel <- which(data_list[[j]]$TIME==extra_year)
## print("sel is, ")
## print(sel)
data_temp <- data_list[[j]][sel, ]
res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))
}
if (j==13 ## the value of j will need to be changed later on
){
## print("sel is, ")
## print(sel)
data_temp <- data_list[[j]][grep(as.character(sel_year),
as.character(data_list[[j]]$TIME)), ]
res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))
}
if (j==17 ## the value of j will need to be changed later on
){
year_temp <- max(data_list[[j]]$TIME)
sel <- which(data_list[[j]]$TIME==year_temp)
data_temp <- data_list[[j]][sel, ]
res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))
}
## for (i in seq(length(country_list))){
## sel <- which(data_temp$GEO==country_list[i])
## print("data_temp[sel, ]")
## print(data_temp[sel, ])
## res <- rbind(res,c(data_temp[sel, ]$TIME,
## data_temp[sel, ]$GEO,
## data_temp[sel, ]$Value
## ))
## country_data[[i]] <- data_temp[sel, ]
## }
}
extra_list <- list()
#Extra indicators by Alexander
for (i in seq(n_extra)){
fn <- paste("./script_data/extra",i,".csv",sep="")
indic_extra <- read.csv(fn,header=T)
indic_extra$Value <-as.numeric(gsub(",","", indic_extra$Value))
sel <-grep("Germany",indic_extra$GEO)
levels(indic_extra$GEO) <- c(levels(indic_extra$GEO),
"Germany")
indic_extra$GEO[sel] <- "Germany"
indic_extra$GEO <- droplevels(indic_extra)$GEO
extra_list[[i]] <- indic_extra
}
#######################
for (m in seq(n_extra)){
indic_extra <- extra_list[[m]]
for (i in seq(length(country_list))){
sel <- which(indic_extra$GEO==country_list[i])
country_extra <- indic_extra[sel, ]
sel <- which(country_extra$TIME==sel_year)
country_extra <- country_extra[sel, ]
sel <- which(country_extra$NACE_R2=="TOTAL")
total <- country_extra$Value[sel]
sel <- which(country_extra$NACE_R2=="A")
## print("The country is")
## print(country_list[i])
agri <- country_extra$Value[sel]/total
## print("agri is, ")
## print(agri)
sel <- which((country_extra$NACE_R2 == "B-E" ) |
(country_extra$NACE_R2 == "F" ))
industry <- sum(country_extra$Value[sel])/total
## print("industry is, ")
## print(industry)
sel <- which(country_extra$NACE_R2 == "C")
manufacturing <- country_extra$Value[sel]/total
sel <- which((country_extra$NACE_R2 == "G-I" ) |
(country_extra$NACE_R2 == "J" )|
(country_extra$NACE_R2 == "K" )|
(country_extra$NACE_R2 == "L" )|
(country_extra$NACE_R2 == "M_N" )|
(country_extra$NACE_R2 == "O-Q" )|
(country_extra$NACE_R2 == "R-U" )
)
services <- sum(country_extra$Value[sel])/total
## print("services is, ")
## print(services)
temp <- cbind(as.data.frame(rep(sel_year,4)),
as.data.frame(rep(country_list[i],4)),
as.data.frame(c(agri, industry,
manufacturing, services)))
names(temp) <- c("TIME","GEO", "Value")
res_extra <- rbind(res_extra, temp)
## temp$V2 <- as.numeric(temp$V2)
}
}
#some minor adjustements!
res_extra$Value <- round(res_extra$Value*100, 1)
#generate an extra column
n_seg <- dim(res_extra)[1]/8
ex1 <- rep(c("Agriculture weight in the economy",
"Industry & construction weight in the economy",
"Industry & construction weight in the economy [manufacturing]",
"Services weight in the economy"), n_seg*2)
res_extra$V1 <- ex1
ex2 <- c(rep(c("% of GVA"), n_seg*4),
rep(c("% of employment"), n_seg*4))
res_extra$V2 <- ex2
# This way I am done with the calculations of the indicators by
# Alexander.
#Now I deal with the renewable energies
data_renewable <- read.csv("./script_data/renewable.csv",header=T)
data_renewable <- melt(data_renewable, c("GEO", "TARGET"))
data_renewable$value <-as.numeric(gsub(",","",
data_renewable$value))
data_renewable$TARGET <-as.numeric(gsub(",","",
data_renewable$TARGET))
names(data_renewable) <- c("GEO", "TARGET","TIME","Value")
data_renewable$TIME <- as.character(data_renewable$TIME)
for (i in seq(dim(data_renewable)[1]) ){
data_renewable$TIME[i] <- substring(data_renewable$TIME[i],2)
}
data_renewable$TIME <- as.numeric(data_renewable$TIME)
#Now I can extract the data by country!!!
for (i in seq(length(country_list))){
sel <- which(res$GEO==country_list[i])
country_end <- res[sel,]
country_end[3,3] <- country_end[3,3]+country_end[4,3]
country_end[4,3] <- country_end[5,3]+country_end[6,3]
country_end <- country_end[-c(2,5,6),]
## country_end[5:6,3] <- country_end[6:5,3]
sel <- grep(sel_quarter,country_end$TIME)
val <- sum(country_end$Value[sel])
country_end$Value[sel[1]] <- val
country_end$TIME[sel[1]] <- sel_year
country_end <- country_end[-(sel[2:(length(sel))]),]
country_end <- cbind(country_end, labels)
country_end$V1 <- as.character(country_end$V1)
country_end$V2 <- as.character(country_end$V2)
###################################
#Calculate another indicator
data <- data_list[[5]]
sel <- which(data$GEO==country_list[i])
sel2 <- which(res_extra$GEO==country_list[i])
data <- data[sel, ]
gdp_vec <- seq(length(year_seq))
for (m in seq(length(year_seq))){
sel <- which(data$TIME==year_seq[m])
gdp_vec[m] <- data$Value[sel]
}
gdp_vec <- 1+gdp_vec/100.
final_res <- round(((prod(gdp_vec)-1)*100),2)
newrow <- country_end[9, ]
country_end <- insertRow(country_end, newrow, 10)
country_end$V1[10] <- "Real GDP growth rate 2008-2013"
country_end$V2[10] <- "%"
country_end$Value[10] <- final_res
extra_data <- res_extra[sel2, ]
## country_end <- insertRow(country_end, extra_data[1, ], 11)
## country_end <- insertRow(country_end, extra_data[4, ], 12)
## country_end <- insertRow(country_end, extra_data[2, ], 13)
## country_end <- insertRow(country_end, extra_data[5, ], 14)
## country_end <- insertRow(country_end, extra_data[3, ], 15)
## country_end <- insertRow(country_end, extra_data[6, ], 16)
country_end <- insertRow(country_end, extra_data[1, ], 11)
country_end <- insertRow(country_end, extra_data[5, ], 12)
country_end <- insertRow(country_end, extra_data[2, ], 13)
country_end <- insertRow(country_end, extra_data[6, ], 14)
country_end <- insertRow(country_end, extra_data[3, ], 15)
country_end <- insertRow(country_end, extra_data[7, ], 16)
country_end <- insertRow(country_end, extra_data[4, ], 16)
country_end <- insertRow(country_end, extra_data[8, ], 17)
##############################
#Calculate the total government expenditure as a part of the GDP
sel_gov <- which(country_end$V1=="Total goverment expenditure" )
sel_gdp <- which(country_end$V1=="GDP" )
gov_over_gdp <-round((country_end$Value[sel_gov]/
country_end$Value[sel_gdp]*100),1)
newrow <- country_end[sel_gov, ]
newrow$Value <- gov_over_gdp
newrow$V2 <- "% GDP"
country_end <- insertRow(country_end, newrow, (sel_gov+1))
###Add the last indicator
year_temp <- max(data_renewable$TIME)
sel <- which(data_renewable$TIME==year_temp)
data_temp <- data_renewable[sel, ]
sel <- which(data_temp$GEO==country_list[i])
data_temp <- data_temp[sel, ]
final_row <- c( as.data.frame(year_temp),
as.data.frame(country_list[i]),
as.data.frame(data_temp$Value),
as.data.frame("Share of renewable energy in gross financial energy consumption should be increased to 20%"),
as.data.frame("%") )
names(final_row) <- names(country_end)
country_end <- rbind(country_end, final_row)
country_end$Value[1] <- round((country_end$Value[1]/1e6),2)
###################################
fn <- paste(country_list[i],".csv",sep="")
write.table(country_end,
fn,
row.names=FALSE, col.names=FALSE, sep=",")
}
print("So far so good")