OSDN > Developer > larry77 > Chambre > myprojects-hg-reborn > File Details
myprojects-hg-reborn
Fork

Dépôt original, Pas de origine de fork
File Info

Révision	221dfa4b3d3e7d79d49af50aa18e80ca15a6d8d8
Taille	10,424 octets
l'heure	2009-08-31 21:21:14
Auteur	lorenzo
Message de Log	Minor modifications to a couple of codes. I simply changed the names of the input files they require to work.
Content

Export as raw format
#! /usr/bin/env python
from sociopatterns.loader import Loader
import scipy as s
import numpy as n
import pylab as p


#Some notes on usage: since I did not build the library provided by Ciro,
#I need to copy the whole sociopattern directory in  the same folder where
#I have this code.




XXTEA_CRYPTO_KEY = (0xd2e3fd73, 0xfa70ca9c, 0xe2575826, 0x0fe09946)


# loader = Loader(["log_1240843289.pcap"], decode=1,
# xxtea_crypto_key=XXTEA_CRYPTO_KEY, load_contacts=0, unique_contacts=1,
# load_sightings=1)




# def iter_reading(read_contact=1,read_sighting=1,dumpfile,\
#                  time_begin,time_end, len_data ):

    
    


##############################################################
##############################################################
##############################################################

# The aim of this code is to gain info about the duration of the average
# visit in the museum. There are many subtleties, but we will start
# out with a very naive approach: the change in the bootcount number
#means that a visit has ended. The bootcount number is broadcast in the
#sighting protocol. This info needs to be complemented by the ID of the tag
#changing its bootcount and by the times when this happens.



#Now the aim is to extract (timestap, tag_id,bootcount) and store them into
#an array consisting of 3 columns. Rather than attaching the data every time we
#read them, we will allocate the array to start with (otherwise the whole
#process would become slower and slower as time progresses)

#initialize an array I will use to load the data

read=0

data_arr=s.zeros(2).astype("int") #just an initialization, it will be
#overwritten anyhow

if (read==1):

    time_begin=1240843289

    #delta_time=20000

    #time_end=time_begin+delta_time

    time_end= 1241181886 #time_begin+3000

    #n_row=delta_time

    len_data=3613311+10

    data_arr=s.zeros((len_data,3)).astype("int")-1 #let us initialize the
    #array with an impossible value

    print "s.shape(data_arr) is, ", s.shape(data_arr)


    loader = Loader(["log_1240843289.pcap"],start_time=time_begin,
                    stop_time=time_end, decode=1,
    xxtea_crypto_key=XXTEA_CRYPTO_KEY, load_contacts=0, unique_contacts=1,
    load_sightings=1,unique_sightings=1, sighting_time_delta=10)




    i=0
    for c in loader:
        #print c.t, c.id, c.boot_count
        #print "c.protocol is, ", c.protocol
        data_arr[i,:]=s.array([c.t,c.id,c.boot_count]).astype("int")
        #print "data_arr[i, :] is, ", data_arr[i, :]

        if (s.remainder(i,100000)==0):
            print "i is, ", i

        i+=1

    sel=s.where(data_arr[:,0]!=-1)

    data_arr=data_arr[sel[0],:]

    p.save("data_arr.dat", data_arr , fmt='%d')

else:
    data_arr=p.load("boot_count_cut.dat")
    data_arr=data_arr.astype("int")
    print "I finished reading the data"


print "id_list is, ", s.unique1d(data_arr[:,1])

print "s.shape(data_arr) is, ", s.shape(data_arr)

def time_interval_and_bootcount_single_tag(data_arr,tag_id):
    find_id_by_tag=s.where(data_arr[:,1]==tag_id)
    #the following may look odd but it retrieves the 1st and
    #3rd column (counted from 1, or the oth and 2nd counted from zero)
    #of the matrix, that is to day the timestamp and bootcount of the
    #sighting reports of a given tag
    retrive_time_bootcount=data_arr[find_id_by_tag[0],:][:,0:3:2]
    #now calculate an array giving the gaps in the bootcounts and the
    #corresponding gaps in the timestamps

    p.save("time_and_bootcount.dat",retrive_time_bootcount , fmt='%d')
    
    boot_diff=s.diff(retrive_time_bootcount[:,1])
    time_diff=s.diff(retrive_time_bootcount[:,0])

    boot_gap_sel=s.where(boot_diff!=0)

    time_gaps=time_diff[boot_gap_sel]

    return time_gaps


def time_interval_and_bootcount_all_tags(data_arr):
    tag_id_list=s.unique1d(data_arr[:,1])
    res=s.zeros(0).astype("int")

    for track_tag in tag_id_list:
        single_tag_gap=time_interval_and_bootcount_single_tag(data_arr\
                                                              ,track_tag)
        res=s.hstack((res,single_tag_gap))


    return (res)




def visit_duration_single_tag(data_arr,tag_id):
    find_id_by_tag=s.where(data_arr[:,1]==tag_id)
    #the following may look odd but it retrieves the 1st and
    #3rd column (counted from 1, or the oth and 2nd counted from zero)
    #of the matrix, that is to day the timestamp and bootcount of the
    #sighting reports of a given tag
    retrive_time_bootcount=data_arr[find_id_by_tag[0],:][:,0:3:2]
    #now calculate an array giving the gaps in the bootcounts and the
    #corresponding gaps in the timestamps

    bootcount_list=retrive_time_bootcount[:,1]
    
    bootcount_unique=s.unique1d(bootcount_list)
    time_list=retrive_time_bootcount[:,0]

    duration_arr=s.zeros(len(bootcount_unique)).astype("int") -1 #it
    #will be overwritten

    interval_arr=s.zeros(len(bootcount_unique)-1).astype("int") -1 #it
    #will be overwritten

    #i=0
    #for my_bootcount in bootcount_unique:
        #sel=s.where(bootcount_list==my_bootcount)
        #duration_arr[i]=max(time_list[sel])-min(time_list[sel])
        #i+=1


    for i  in xrange(len(bootcount_unique)):
        my_bootcount=bootcount_unique[i]
        sel=s.where(bootcount_list==my_bootcount)
        duration_arr[i]=max(time_list[sel])-min(time_list[sel])

        if (i>0):
            lower=bootcount_unique[i-1]
            upper=bootcount_unique[i]
            sel_lower=s.where(bootcount_list==lower)[0][-1]
            sel_upper=s.where(bootcount_list==upper)[0][0]
            interval_arr[i-1]=time_list[sel_upper]-time_list[sel_lower]


    if (tag_id==503):
        p.save("503_duration_arr.dat",duration_arr,fmt='%d'  )
        p.save("503_bootcount_list.dat",bootcount_list,fmt='%d'  )
        p.save("503_bootcount_unique.dat",bootcount_unique,fmt='%d'  )
        

    #NB: the number of reboots for a tag is given by the number of unique
    #bootcounts minus 1 !!!
    return [duration_arr, len(bootcount_unique)-1, interval_arr]


def visit_duration_many_tags(data_arr):
    
    tag_id_list=s.unique1d(data_arr[:,1])

    p.save("all_visit_tag_ids.dat", tag_id_list,fmt='%d' )
    res=s.zeros(0).astype("int")
    count_boot=s.zeros(0).astype("int")
    interval_between_boots=s.zeros(0).astype("int")

    for track_tag in tag_id_list:
        duration_and_count=visit_duration_single_tag(data_arr,track_tag)
        single_tag_duration=duration_and_count[0]

        single_tag_count_boot=duration_and_count[1]

        single_tag_boot_interval=duration_and_count[2]
        
        res=s.hstack((res,single_tag_duration))
        count_boot=s.hstack((count_boot,single_tag_count_boot))
        interval_between_boots=s.hstack((interval_between_boots,\
                                         single_tag_boot_interval))

    return [res, count_boot,interval_between_boots]



def contact_duration_and_interval_single_tag(single_tag_no_rep, delta_slice):

    #the following if condition is useful only when I am really tracking a particular
    #tag whose ID is given a priory but which may not exist at all (in the sense that
    #it would not estabilish any contact) in the time window during which I am studying
    #the system.


    if (single_tag_no_rep==None):
        print "The chosen tag does not exist hence no analysis can be performed on it"
        return



    delta_slice=int(delta_slice) #I do not need floating point arithmetic

    single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
    gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above

    #print "gaps is, ", gaps

    #gaps is now an array of integers. It either has a list of consecutive 1`s
    # (which means a contact duration of delta_slice times the number of consecutive ones)
    # of an entry higher than one which expresses (in units of delta_slice) the time during
    #which the tag underwent no contact
    

    #p.save("gaps.dat",gaps, fmt='%d')

    find_gap=s.where(gaps != 1)

    gap_distr=(gaps[find_gap]-1)*delta_slice  #so, this is really the list of the
    #time interval between two contacts for my tag. After the discussion with Ciro,
    #I modified slightly the definition (now there is a -1) in the definition.
    #It probably does not matter much for the calculated distribution.

    #print "gap_distr is, ", gap_distr
    #NB: the procedure above does NOT break down is gap_distr is empty


    #Now I calculate the duration of the contacts of my tag. I changed this bit since
    #I had new discussions with Ciro

    #single_tag_no_rep=s.hstack((0,single_tag_no_rep))

    #print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
    
    e2=s.diff(single_tag_no_rep)

    #print "e2 is, ", e2
    
    sel=s.where(e2!=1)[0]
    #print "sel is, ", sel

    res=0 #this will contain the results and will be overwritten

    #What is here needs to be tested very carefully! There may be some bugs


    sol=s.hstack((0,sel,len(e2)))
    #print "sol is, ", sol

        
    
    res=s.diff(sol)
    #print "res initially is, ", res


    res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array

    #print "res is, ", res
        
        


    res=res*delta_slice


    #print "the sum of all the durations is, ", res.sum()

    return res,gap_distr 



    

#tag_id=509

# my_gaps=time_interval_and_bootcount_single_tag(data_arr,tag_id)

# p.save("single_tag_gaps.dat", my_gaps , fmt='%d')

# all_gaps=time_interval_and_bootcount_all_tags(data_arr)

# p.save("all_tag_gaps.dat",all_gaps , fmt='%d')

all_duration_and_all_counts=visit_duration_many_tags(data_arr)

all_durations=all_duration_and_all_counts[0]
p.save("all_visit_duration.dat",all_durations , fmt='%d')

all_counts=all_duration_and_all_counts[1]
p.save("all_counts.dat",all_counts , fmt='%d')

all_boot_intervals=all_duration_and_all_counts[2]
p.save("all_boot_intervals.dat",all_boot_intervals , fmt='%d')


fig = p.figure()
axes = fig.gca()

n_bins=50
#print "the length of df_distr is, ", len(df_distr)
axes.hist(all_durations/3600.,bins=n_bins, normed=0)

p.ylabel('frequency')
p.xlabel("Duration (hours)")
p.title('Analysis of log_1240843289.pcap')

cluster_name="histogram_visit_duration_from_bootcount.pdf"
p.savefig(cluster_name)

p.clf()    




print "So far so good"
myprojects-hg-reborn Fork

Tags

Frequently used words (click to add to your profile)

File Info

Content

myprojects-hg-reborn
Fork