Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christian Dietrich
clang-hash
Commits
f4144178
Commit
f4144178
authored
Jan 29, 2017
by
Ludwig Fueracker
Browse files
renamed stuff, added outputs
parent
25f84c14
Changes
1
Show whitespace changes
Inline
Side-by-side
evaluate_data_with_stop.py
View file @
f4144178
...
...
@@ -16,36 +16,31 @@ PATH_TO_RECORDS = '' # gets set from command line parameter
# data/record filenames
INFO_EXTENSION
=
'.info'
FULL_RECORD_FILENAME
=
'full
R
ecord'
+
INFO_EXTENSION
FULL_RECORD_FILENAME
=
'full
_r
ecord'
+
INFO_EXTENSION
COMMIT_INFO_FILENAME
=
'buildInfo_musl_with_stop'
+
INFO_EXTENSION
BUILD_TIME_DATA_FILENAME
=
'total
B
uild
T
imes.csv'
BUILD_TIME_FILENAME
=
'total
B
uild
T
imes.pdf'
BUILD_TIME_DATA_HEADER
=
[
'total
P
arse
T
imes'
,
'total
H
ash
T
imes'
,
'total
C
ompile
T
imes'
,
'diff
ToB
uild
T
imes'
,
'total
B
uild
T
imes'
]
BUILD_TIME_DATA_FILENAME
=
'total
_b
uild
_t
imes.csv'
BUILD_TIME_FILENAME
=
'total
_b
uild
_t
imes.pdf'
BUILD_TIME_DATA_HEADER
=
[
'total
_p
arse
_t
imes'
,
'total
_h
ash
_t
imes'
,
'total
_c
ompile
_t
imes'
,
'diff
_to_b
uild
_t
imes'
,
'total
_b
uild
_t
imes'
]
def
abs_path
(
filename
):
"""Prepends the absolute path to the filename.
"""
"""Prepends the absolute path to the filename."""
return
PATH_TO_RECORDS
+
'/../'
+
filename
def
get
L
ist
OfF
iles
(
directory
):
def
get
_l
ist
_of_f
iles
(
directory
):
for
root
,
dirnames
,
filenames
in
os
.
walk
(
directory
):
for
filename
in
fnmatch
.
filter
(
filenames
,
'*'
+
INFO_EXTENSION
):
yield
os
.
path
.
join
(
root
,
filename
)
errorCount
=
0
astDifferObjSameCount
=
0
missingCount
=
0
################################################################################
#
#
################################################################################
key
T
ranslation
ToN
r
=
{
def
build_key_translation_dict
():
key
_t
ranslation
_to_n
r
=
{
'start-time'
:
0
,
'hash-start-time'
:
1
,
'object-hash'
:
2
,
...
...
@@ -67,61 +62,65 @@ keyTranslationToNr = {
'insertions'
:
18
,
'deletions'
:
19
,
'run_id'
:
20
}
keyTranslationFromNr
=
{
v
:
k
for
k
,
v
in
keyTranslationToNr
.
items
()}
}
key_translation_from_nr
=
{
v
:
k
for
k
,
v
in
key_translation_to_nr
.
items
()}
key_translation_dict
=
key_translation_to_nr
.
copy
()
key_translation_dict
.
update
(
key_translation_from_nr
)
keyTranslation
=
keyTranslationToNr
.
copy
()
keyTranslation
.
update
(
keyTranslationFromNr
)
return
key_translation_dict
key_translation
=
build_key_translation_dict
()
def
tr
(
key
):
"""lookup key translation (both directions)"""
return
key
T
ranslation
[
key
]
return
key
_t
ranslation
[
key
]
def
build
F
ull
R
ecord
T
o
(
path
ToF
ull
R
ecord
F
ile
):
def
build
_f
ull
_r
ecord
_t
o
(
path
_to_f
ull
_r
ecord
_f
ile
):
"""structure of full record:
{commitID: {'build-time': time, files: {filename: {record}, filename: {record}}}}
"""
full
R
ecord
=
build
F
ull
R
ecord
()
full
_r
ecord
=
build
_f
ull
_r
ecord
()
if
DO_PRINT_RECORDS
:
f
=
open
(
path
ToF
ull
R
ecord
F
ile
,
'w'
)
f
=
open
(
path
_to_f
ull
_r
ecord
_f
ile
,
'w'
)
try
:
f
.
write
(
repr
(
full
R
ecord
)
+
"
\n
"
)
f
.
write
(
repr
(
full
_r
ecord
)
+
"
\n
"
)
except
MemoryError
as
me
:
print
me
raise
finally
:
print
time
.
ctime
()
f
.
close
()
print
"built full record, wrote to "
+
path
ToF
ull
R
ecord
F
ile
print
"built full record, wrote to "
+
path
_to_f
ull
_r
ecord
_f
ile
return
full
R
ecord
return
full
_r
ecord
def
build
F
ull
R
ecord
():
def
build
_f
ull
_r
ecord
():
"""Builds a complete record from all the single hash records.
The records are grouped by the commitIDs
"""
full
R
ecord
=
{}
with
open
(
abs_path
(
COMMIT_INFO_FILENAME
),
'r'
)
as
commit
I
nfoFile
:
commit
I
nfo
=
eval
(
commit
I
nfoFile
.
read
())
for
run_id
in
commit
I
nfo
:
full
_r
ecord
=
{}
with
open
(
abs_path
(
COMMIT_INFO_FILENAME
),
'r'
)
as
commit
_i
nfoFile
:
commit
_i
nfo
=
eval
(
commit
_i
nfoFile
.
read
())
for
run_id
in
commit
_i
nfo
:
if
not
isinstance
(
run_id
,
int
):
# dict also contains key 'commit-hash'
continue
;
current
R
ecord
=
{}
current
R
ecord
[
tr
(
'filename'
)]
=
commit
I
nfo
[
run_id
][
'filename'
]
current
R
ecord
[
tr
(
'build-time'
)]
=
commit
I
nfo
[
run_id
][
'build-time'
]
current
R
ecord
[
tr
(
'files'
)]
=
{}
full
R
ecord
[
run_id
]
=
current
R
ecord
for
record
F
ilename
in
get
L
ist
OfF
iles
(
PATH_TO_RECORDS
):
for
line
in
open
(
record
F
ilename
):
current
_r
ecord
=
{}
current
_r
ecord
[
tr
(
'filename'
)]
=
commit
_i
nfo
[
run_id
][
'filename'
]
current
_r
ecord
[
tr
(
'build-time'
)]
=
commit
_i
nfo
[
run_id
][
'build-time'
]
current
_r
ecord
[
tr
(
'files'
)]
=
{}
full
_r
ecord
[
run_id
]
=
current
_r
ecord
for
record
_f
ilename
in
get
_l
ist
_of_f
iles
(
PATH_TO_RECORDS
):
for
line
in
open
(
record
_f
ilename
):
data
=
eval
(
line
)
# commitID = data['commit-hash']
# del data['commit-hash']
obj
F
ilename
=
data
[
'obj-file'
]
obj
_f
ilename
=
data
[
'obj-file'
]
del
data
[
'obj-file'
]
...
...
@@ -134,115 +133,124 @@ def buildFullRecord():
run_id
=
data
[
'run_id'
]
data
NewK
eys
=
{
tr
(
k
):
v
for
k
,
v
in
data
.
items
()}
full
R
ecord
[
run_id
][
tr
(
'files'
)][
obj
F
ilename
]
=
data
NewK
eys
data
_new_k
eys
=
{
tr
(
k
):
v
for
k
,
v
in
data
.
items
()}
full
_r
ecord
[
run_id
][
tr
(
'files'
)][
obj
_f
ilename
]
=
data
_new_k
eys
return
full
R
ecord
return
full
_r
ecord
################################################################################
def
write_to_csv
(
data
,
column
N
ames
,
filename
):
with
open
(
filename
,
"w"
)
as
csv
F
ile
:
writer
=
csv
.
writer
(
csv
F
ile
)
writer
.
writerow
(
column
N
ames
)
def
write_to_csv
(
data
,
column
_n
ames
,
filename
):
with
open
(
filename
,
"w"
)
as
csv
_f
ile
:
writer
=
csv
.
writer
(
csv
_f
ile
)
writer
.
writerow
(
column
_n
ames
)
for
line
in
data
:
writer
.
writerow
(
line
)
def
print
A
vg
(
data
,
name
):
def
print
_a
vg
(
data
,
name
):
print
'avg %s: %f'
%
(
name
,
sum
(
data
)
/
float
(
len
(
data
)))
################################################################################
parse
C
olor
,
hash
C
olor
,
compile
C
olor
,
remain
C
olor
=
(
'#FFFF66'
,
'#FF0000'
,
'#3399FF'
,
'#008800'
)
parse
_c
olor
,
hash
_c
olor
,
compile
_c
olor
,
remain
_c
olor
=
(
'#FFFF66'
,
'#FF0000'
,
'#3399FF'
,
'#008800'
)
def
plot_build_time_graph1
(
data
):
plot
B
uild
T
ime
C
omposition
G
raph
(
data
[
0
],
data
[
1
],
data
[
2
],
data
[
3
])
plot
_b
uild
_t
ime
_c
omposition
_g
raph
(
data
[
0
],
data
[
1
],
data
[
2
],
data
[
3
])
def
plot
B
uild
T
ime
C
omposition
G
raph
(
parse
T
imes
,
hash
T
imes
,
compile
T
imes
,
diff
ToB
uild
T
ime
):
# times in ms
def
plot
_b
uild
_t
ime
_c
omposition
_g
raph
(
parse
_t
imes
,
hash
_t
imes
,
compile
_t
imes
,
diff
_to_b
uild
_t
ime
):
# times in ms
fig
,
ax
=
plt
.
subplots
()
ax
.
stackplot
(
np
.
arange
(
1
,
len
(
parse
T
imes
)
+
1
),
# x axis
[
parse
T
imes
,
hash
T
imes
,
compile
T
imes
,
#diff
ToB
uild
T
ime
],
colors
=
[
parse
C
olor
,
hash
C
olor
,
compile
C
olor
,
# remain
C
olor
ax
.
stackplot
(
np
.
arange
(
1
,
len
(
parse
_t
imes
)
+
1
),
# x axis
[
parse
_t
imes
,
hash
_t
imes
,
compile
_t
imes
,
#diff
_to_b
uild
_t
ime
],
colors
=
[
parse
_c
olor
,
hash
_c
olor
,
compile
_c
olor
,
# remain
_c
olor
],
edgecolor
=
'none'
)
plt
.
xlim
(
1
,
len
(
parse
T
imes
))
plt
.
xlim
(
1
,
len
(
parse
_t
imes
))
plt
.
xlabel
(
'commits'
)
plt
.
ylabel
(
'time [s]'
)
ax
.
set_yscale
(
'log'
)
lgd
=
ax
.
legend
([
#mpatches.Patch(color=remain
C
olor),
mpatches
.
Patch
(
color
=
compile
C
olor
),
mpatches
.
Patch
(
color
=
hash
C
olor
),
mpatches
.
Patch
(
color
=
parse
C
olor
)],
lgd
=
ax
.
legend
([
#mpatches.Patch(color=remain
_c
olor),
mpatches
.
Patch
(
color
=
compile
_c
olor
),
mpatches
.
Patch
(
color
=
hash
_c
olor
),
mpatches
.
Patch
(
color
=
parse
_c
olor
)],
[
#'remaining build time',
'compile time'
,
'hash time'
,
'parse time'
],
loc
=
'center left'
,
bbox_to_anchor
=
(
1
,
0.5
))
fig
.
savefig
(
abs_path
(
BUILD_TIME_FILENAME
),
bbox_extra_artists
=
(
lgd
,),
bbox_inches
=
'tight'
)
printAvg
(
parseTimes
,
'parse'
)
printAvg
(
hashTimes
,
'hash'
)
printAvg
(
compileTimes
,
'compile'
)
printAvg
(
diffToBuildTime
,
'remainder'
)
print
"
\n
-----------------"
print
"average total times per build:"
print_avg
(
parse_times
,
'parse'
)
print_avg
(
hash_times
,
'hash'
)
print_avg
(
compile_times
,
'compile'
)
print_avg
(
diff_to_build_time
,
'remainder'
)
print
""
print
"average times if header/source file touched"
print
"-----------------
\n
"
################################################################################
def
make
G
raphs
(
full
R
ecord
):
def
make
_g
raphs
(
full
_r
ecord
):
# data for build time graphs
totalParseTimes
=
[]
totalHashTimes
=
[]
totalCompileTimes
=
[]
totalBuildTimes
=
[]
diffToBuildTimes
=
[]
# freshBuildRecord = fullRecord[0]
for
run_id
in
fullRecord
:
total_parse_times
=
[]
total_hash_times
=
[]
total_compile_times
=
[]
total_build_times
=
[]
diff_to_build_times
=
[]
parse_times_header_touched
=
[]
parse_times_source_touched
=
[]
# freshBuildRecord = full_record[0]
for
run_id
in
full_record
:
if
run_id
<
2
:
# skip fresh build (and also 1st, seems to be buggy...)
continue
current
R
ecord
=
full
R
ecord
[
run_id
]
current
F
iles
=
current
R
ecord
[
tr
(
'files'
)]
files
C
hanged
=
len
(
current
F
iles
)
# count changed files per run #TODO!
current
_r
ecord
=
full
_r
ecord
[
run_id
]
current
_f
iles
=
current
_r
ecord
[
tr
(
'files'
)]
files
_c
hanged
=
len
(
current
_f
iles
)
# count changed files per run #TODO!
print
current
R
ecord
[
tr
(
'filename'
)]
print
current
_r
ecord
[
tr
(
'filename'
)]
total
P
arse
D
uration
=
0
total
H
ash
D
uration
=
0
total
C
ompile
D
uration
=
0
for
filename
in
current
F
iles
:
# deal with first commit
# if tr('ast-hash') not in current
F
iles[filename].keys():
total
_p
arse
_d
uration
=
0
total
_h
ash
_d
uration
=
0
total
_c
ompile
_d
uration
=
0
for
filename
in
current
_f
iles
:
# deal with first commit
# if tr('ast-hash') not in current
_f
iles[filename].keys():
# print "error: missing AST hash for file %s" % filename
# continue
current
F
ile
R
ecord
=
current
F
iles
[
filename
]
total
P
arse
D
uration
+=
current
F
ile
R
ecord
[
tr
(
'parse-duration'
)]
total
H
ash
D
uration
+=
current
F
ile
R
ecord
[
tr
(
'hash-duration'
)]
total
C
ompile
D
uration
+=
current
F
ile
R
ecord
[
tr
(
'compile-duration'
)]
current
_f
ile
_r
ecord
=
current
_f
iles
[
filename
]
total
_p
arse
_d
uration
+=
current
_f
ile
_r
ecord
[
tr
(
'parse-duration'
)]
total
_h
ash
_d
uration
+=
current
_f
ile
_r
ecord
[
tr
(
'hash-duration'
)]
total
_c
ompile
_d
uration
+=
current
_f
ile
_r
ecord
[
tr
(
'compile-duration'
)]
# if total
P
arse
D
uration == 0:# or (total
C
ompile
D
uration/1e6) > 500000:
# if total
_p
arse
_d
uration == 0:# or (total
_c
ompile
_d
uration/1e6) > 500000:
# continue
total
P
arse
T
imes
.
append
(
total
P
arse
D
uration
/
1e6
)
# nano to milli
total
H
ash
T
imes
.
append
(
total
H
ash
D
uration
/
1e6
)
total
C
ompile
T
imes
.
append
(
total
C
ompile
D
uration
/
1e6
)
build
T
ime
=
current
R
ecord
[
tr
(
'build-time'
)]
total
B
uild
T
imes
.
append
(
build
T
ime
/
1e6
)
diff
ToB
uild
T
imes
.
append
((
build
T
ime
-
total
P
arse
D
uration
-
total
H
ash
D
uration
-
total
C
ompile
D
uration
)
/
1e6
)
total
_p
arse
_t
imes
.
append
(
total
_p
arse
_d
uration
/
1e6
)
# nano to milli
total
_h
ash
_t
imes
.
append
(
total
_h
ash
_d
uration
/
1e6
)
total
_c
ompile
_t
imes
.
append
(
total
_c
ompile
_d
uration
/
1e6
)
build
_t
ime
=
current
_r
ecord
[
tr
(
'build-time'
)]
total
_b
uild
_t
imes
.
append
(
build
_t
ime
/
1e6
)
diff
_to_b
uild
_t
imes
.
append
((
build
_t
ime
-
total
_p
arse
_d
uration
-
total
_h
ash
_d
uration
-
total
_c
ompile
_d
uration
)
/
1e6
)
print
'run_id %d, #files
C
hanged: %d'
%
(
run_id
,
files
C
hanged
)
print
'run_id %d, #files
_c
hanged: %d'
%
(
run_id
,
files
_c
hanged
)
print
A
vg
(
total
B
uild
T
imes
,
'total'
)
print
_a
vg
(
total
_b
uild
_t
imes
,
'total'
)
# save data to csv files
build
T
ime
D
ata
=
np
.
column_stack
((
total
P
arse
T
imes
,
total
H
ash
T
imes
,
total
C
ompile
T
imes
,
diff
ToB
uild
T
imes
,
total
B
uild
T
imes
))
write_to_csv
(
build
T
ime
D
ata
,
BUILD_TIME_DATA_HEADER
,
abs_path
(
BUILD_TIME_DATA_FILENAME
))
build
_t
ime
_d
ata
=
np
.
column_stack
((
total
_p
arse
_t
imes
,
total
_h
ash
_t
imes
,
total
_c
ompile
_t
imes
,
diff
_to_b
uild
_t
imes
,
total
_b
uild
_t
imes
))
write_to_csv
(
build
_t
ime
_d
ata
,
BUILD_TIME_DATA_HEADER
,
abs_path
(
BUILD_TIME_DATA_FILENAME
))
plot
B
uild
T
ime
C
omposition
G
raph
(
total
P
arse
T
imes
,
total
H
ash
T
imes
,
total
C
ompile
T
imes
,
diff
ToB
uild
T
imes
)
plot
_b
uild
_t
ime
_c
omposition
_g
raph
(
total
_p
arse
_t
imes
,
total
_h
ash
_t
imes
,
total
_c
ompile
_t
imes
,
diff
_to_b
uild
_t
imes
)
################################################################################
"""functions for reading data from the csv files to skip full record building"""
...
...
@@ -250,7 +258,7 @@ def makeGraphs(fullRecord):
def
csv_files_are_existing
():
return
os
.
path
.
isfile
(
abs_path
(
BUILD_TIME_DATA_FILENAME
))
def
read_from_csv
(
filename
,
column
N
ames
):
def
read_from_csv
(
filename
,
column
_n
ames
):
data
=
[]
with
open
(
filename
)
as
csv_file
:
reader
=
csv
.
reader
(
csv_file
)
...
...
@@ -291,10 +299,10 @@ if (len(sys.argv) > 1):
# read_csv_data_and_plot_graphs()
# print "finished graphs at %s" % time.ctime()
# else:
full_record
=
build
F
ull
R
ecord
T
o
(
path_to_full_record_file
)
full_record
=
build
_f
ull
_r
ecord
_t
o
(
path_to_full_record_file
)
print
"finished building/loading full record at %s"
%
time
.
ctime
()
make
G
raphs
(
full_record
)
make
_g
raphs
(
full_record
)
print
"finished graphs at %s"
%
time
.
ctime
()
print
"Finished at %s"
%
time
.
ctime
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment