Révision | 81b18823357e48c6d28668eff86d0a85385886b7 |
---|---|
Taille | 13,718 octets |
l'heure | 2009-10-30 23:26:42 |
Auteur | lorenzo |
Message de Log | New modifications in the way in which I calculate the number of infected
|
#!/usr/bin/env python
import scipy as s
import pylab as p
import numpy as n
import sys
import string
def remove_self_loops(clean_data):
sel=s.where(clean_data[:,1]!=clean_data[:,2])[0]
clean_data_no_int=clean_data[sel,:]
return (clean_data_no_int.astype("int64"))
#NB: the following function only keeps track of the interaction between infected
#individuals giving rise to new infected.
#If initially I introduce two infected individuals in the system which are in contact, then I will
#not see their contact as it does not increment the # of infected
def time_infection(data_no_loops, infected):
before_and_after=\
network_after_infection_introduction(infected,data_no_loops)
net_infected=before_and_after[0]
infected_list_overall=s.zeros(0).astype("int64")
infected_times_overall=s.zeros(0).astype("int64")
set_infected=set(infected)
print "set_infected is, ", set_infected
# set_inf_old=set_infected
time_unique=s.unique1d(net_infected[:,0])
cumulative_new_infected=s.zeros(len(time_unique)).astype("int64")
#print "time_unique is, ", time_unique
for i in xrange(len(time_unique)):
time_sel=s.where(net_infected[:,0]==time_unique[i])[0]
contacts_in_frame=net_infected[time_sel,:] #all contacts in a time
#frame
for m in xrange(len(contacts_in_frame)):
#print "contacts_in_frame[m,1], contacts_in_frame[m,2] are, ", contacts_in_frame[m,1],\
#contacts_in_frame[m,2]
# if ((contacts_in_frame[m,1] in set_infected) ^ \
# (contacts_in_frame[m,2]) in set_infected) :
if (len(set(contacts_in_frame[m,1:3]) & set_infected) ==1):
#I am imposing that only one of the individuals involved in the
#contact is infected.
#that I am NOT considering the case of two infected individuals
#meeting up as a new infection.
#Now I have a genuine new infected individual
#print "set(contacts_in_frame[m,1:3]) is", set(contacts_in_frame[m,1:3])
set_infected_temp= set_infected | set(contacts_in_frame[m,1:3])
#set_infected= set_infected | set(contacts_in_frame[m,1:3])
#print "len(set_infected) is, ", len(set_infected)
infected_list_overall=s.hstack((infected_list_overall,\
contacts_in_frame[m,1:3]))
infected_times_overall=s.hstack((infected_times_overall,\
contacts_in_frame[m,0],\
contacts_in_frame[m,0]))
set_infected=set_infected_temp #It is a matter of being consistent
#here: I update the list of infected individuals only after I have
#finished scanning a time frame. Otherwise, within the same time frame
#the spreading of the infection may depend on which node gets processed
#first.
cumulative_new_infected[i]=len(set_infected)
return [infected_list_overall,infected_times_overall,\
cumulative_new_infected, time_unique]
def read_and_clean_data(file_number):
prefix="raw_time_edge_list_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
print "filename is, ", filename
f = open(filename)
raw_data = [map(int, string.split(line)) for line in f.readlines()]
f.close()
raw_data = s.array(raw_data, dtype="int64")
print "s.shape(raw_data) is, ", s.shape(raw_data)
prefix="blacklist_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
f = open(filename)
blacklist = [map(int, string.split(line)) for line in f.readlines()]
f.close()
blacklist = s.array(blacklist, dtype="int64")
print "blacklist is, ", blacklist
#removal_list=s.zeros(0,dtype="int64")
if (len(blacklist)>0):
for i in xrange(len(blacklist)):
keep_list=s.where((raw_data[:,1]!=blacklist[i])\
& (raw_data[:,2]!=blacklist[i]) )[0]
# print "keep_list is, ", keep_list
raw_data=raw_data[keep_list, :]
return (raw_data.astype("int64"))
def read_dirty_data(file_number):
prefix="raw_time_edge_list_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
print "filename is, ", filename
f = open(filename)
raw_data = [map(int, string.split(line)) for line in f.readlines()]
f.close()
raw_data = s.array(raw_data, dtype="int64")
return (raw_data.astype("int64"))
#Causality: I can modify the dataset only after the first appearance of any
#initially_infected_individual
def network_after_infection_introduction(ini_infected,clean_data):
earliest_infected=len(clean_data)
infected_times=s.zeros(0).astype("int64")
for i in xrange(len(ini_infected)):
infected=s.where((clean_data[:,1]==ini_infected[i])\
| (clean_data[:,2]==ini_infected[i]) )[0]
infected_times=s.hstack((infected_times,infected))
#temp_first_infected=min(infected_times)
#earliest_infected=min(earliest_infected,temp_first_infected)
earliest_infected=min(infected_times)
print "ealiest_infected is, ", earliest_infected
print "clean_data[earliest_infected] is, ", clean_data[earliest_infected]
#Now get rid of the array before that time (I do not need it since I cannot)
#modify it (no infection going backward in time!)
network_section=clean_data[earliest_infected :, :]
network_section_before=clean_data[0:earliest_infected, :]
return [network_section.astype("int64"), network_section_before.astype("int64")]
def spread_infection(network_infected,infected):
infection_position_all=s.zeros(0)
for i in xrange(len(infected)):
infected_pos=s.where((network_infected[:,1]==infected[i])\
| (network_infected[:,2]==infected[i]) )[0]
infection_position_all=s.hstack((infection_position_all,infected_pos))
infection_position_all=s.unique1d(infection_position_all)
infection_position_all=s.sort(infection_position_all).astype("int64")
infected=network_infected[infection_position_all,1:3].\
reshape(2*len(infection_position_all))
infected=s.unique1d(infected)
return [infection_position_all.astype("int64"),infected.astype("int64")]
################################################################
################################################################
################################################################
################################################################
################################################################
file_number=sys.argv[1]
data_laundry=0 #this specifies whether I need to clean the data via blacklists
#or not
static_infection=0# do not use the old functions that know nothing about time
if (data_laundry==1):
clean_data=read_and_clean_data(file_number)
else:
clean_data=read_dirty_data(file_number)
print "s.shape(clean_data) is, ", s.shape(clean_data)
p.save("clean_data.dat", clean_data, fmt="%d")
ini_infected=s.array([1084], dtype="int64")
# ini_infected=s.array([83558456,73400387], dtype="int64")
ini_infected_bis=s.copy(ini_infected)
prefix="initial_infected_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, ini_infected,fmt="%d")
if (static_infection==1):
net_before_and_after=network_after_infection_introduction(ini_infected,clean_data)
network_infected=net_before_and_after[0]
prefix="network_after_infection_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, network_infected,fmt="%d")
network_healthy=net_before_and_after[1]
print "s.shape(network_infected) is, ", s.shape(network_infected)
#infection_list_old=s.zeros(1).astype("int64")
infection_list_new=s.ones(1).astype("int64")
#count=0
len_old=len(ini_infected)
len_new=0
flag=0
count=0
while (len_old!=len_new):
prefix="infected_at_generation_"
count_name="%01d"%(count)
prefix=prefix+count_name
prefix=prefix+"_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
print "filename is, ", filename
p.save(filename, ini_infected,fmt="%d")
print "ini_infected is, ", ini_infected
if (flag!=0):
len_old=len(ini_infected)
#Careful! The next function also changes the value of ini_infected!!!
infected_pos_and_individuals=spread_infection(network_infected,ini_infected)
count+=1
infection_generation=infected_pos_and_individuals[0]
ini_infected=infected_pos_and_individuals[1]
len_new=len(ini_infected)
infection_list_new=infection_generation
flag=1
len_iter=s.zeros(1).astype("int64")
len_iter[:]=count
prefix="number_of_generations_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, len_iter,fmt="%d")
prefix="final_infected_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, ini_infected,fmt="%d")
######################################################################
######################################################################
################################################################
################################################################
################################################################
################################################################
################################################################
#First I save the network as created as soon as an infected individual is introduced.
net_before_and_after=network_after_infection_introduction(ini_infected,clean_data)
network_infected=net_before_and_after[0]
prefix="network_after_infection_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, network_infected,fmt="%d")
data_no_loops=remove_self_loops(clean_data)
print "ini_infected_bis is, ", ini_infected_bis
#Now I propagate the infection
dynamic_infection=time_infection(data_no_loops, ini_infected_bis)
#At this point I am done with the infection and it is time to
#save some data in a suitable form.
prefix="dynamic_infectitious_contacts_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, dynamic_infection[0],fmt="%d")
prefix="dynamic_infectitious_contacts_binary_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, dynamic_infection[0].reshape((-1,2)),fmt="%d")
prefix="dynamic_infectitious_contacts_binary_and_time_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, s.hstack((dynamic_infection[0].reshape((-1,2)),\
dynamic_infection[1].reshape((-1,2)) ))[:,0:3],fmt="%d")
prefix="dynamic_infected_individuals_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, s.unique1d(dynamic_infection[0]),fmt="%d")
prefix="dynamic_infectitious_times_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, dynamic_infection[1],fmt="%d")
####################################
####################################
#Now I am going to calculate some statistics.
time_unique=dynamic_infection[3]
print "time_unique is, ", time_unique
count_infections=s.ones(len(time_unique)).astype("int64")
# for i in xrange(len(time_unique)):
# count_infections[i]=len(s.where(dynamic_infection[1]==time_unique[i])[0])/2
# #NB: I have to divide by 2 since every contact time is reported twice
count_infections=dynamic_infection[2]
print "count_infections is, ", count_infections
#print "s.sum(count_infections) is, ", s.sum(count_infections)
cum_number_new_infections=count_infections
prefix="cumulative_number_infected_individuals_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, cum_number_new_infections,fmt="%d")
time_infection_scaled=(time_unique-time_unique[0])/3600.
#i.e. time in minutes counted from the new infection
prefix="rescaled_time_occurrence_new_infections_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
p.save(filename, time_infection_scaled)
fig = p.figure()
axes = fig.gca()
axes.plot(time_infection_scaled,cum_number_new_infections/ \
(1.*cum_number_new_infections[-1]),"ro",\
linewidth=2.)
# axes.plot(time,phi_mean,"k+",\
# label="<phi>")
# # axes.plot(time,pos_results[:,2],"mx",\
# # label="<z> ",linewidth=2.)
p.ylim(0,1.05)
p.xlim(-0.05,)
# axes.legend(loc='lower right')
p.xlabel('Time [hours] from first new infection')
p.ylabel('Normalized number of infections')
p.savefig("Infection_vs_time_normalized.pdf")
p.clf()
fig = p.figure()
axes = fig.gca()
axes.plot(time_infection_scaled,cum_number_new_infections,"ro",\
linewidth=2.)
# axes.plot(time,phi_mean,"k+",\
# label="<phi>")
# # axes.plot(time,pos_results[:,2],"mx",\
# # label="<z> ",linewidth=2.)
# #p.ylim(1.4,2.6)
# axes.legend(loc='lower right')
p.xlim(-0.05,)
p.xlabel('Time [hours] from first new infection')
p.ylabel('Number of infections')
p.savefig("Infection_vs_time.pdf")
p.clf()
print "So far so good"