Tue, 28 Aug 2012 16:44:32 +0200
changed version output slightly
20
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
1 | /* |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
2 | * bfile_heuristics.c |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
3 | * |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
4 | * Created on: 20.10.2011 |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
5 | * Author: Mike |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
6 | */ |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
7 | |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
8 | #include "bfile_heuristics.h" |
22
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
9 | #include <ctype.h> |
20
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
10 | |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
11 | bfile_heuristics_t *new_bfile_heuristics_t() { |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
12 | bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t)); |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
13 | ret->level = BFILE_MEDIUM_ACCURACY; |
22
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
14 | bfile_reset(ret); |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
15 | return ret; |
20
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
16 | } |
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
17 | |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
18 | void destroy_bfile_heuristics_t(bfile_heuristics_t *def) { |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
19 | free(def); |
20
43725438ac50
Changed author comments + added signatures for upcomming bfile heuristics
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
20 | } |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
21 | |
22
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
22 | void bfile_reset(bfile_heuristics_t *def) { |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
23 | def->bcount = 0; |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
24 | def->tcount = 0; |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
25 | } |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
26 | |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
27 | bool bfile_check(bfile_heuristics_t *def, int next_char) { |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
28 | bool ret = false; |
22
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
29 | if (def->level != BFILE_IGNORE) { |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
30 | def->tcount++; |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
31 | if (!isprint(next_char) && !isspace(next_char)) { |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
32 | def->bcount++; |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
33 | } |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
34 | |
23
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
35 | if (def->tcount > 1) { /* empty files are text files */ |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
36 | switch (def->level) { |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
37 | case BFILE_LOW_ACCURACY: |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
38 | if (def->tcount > 15 || next_char == EOF) { |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
39 | ret = (1.0*def->bcount)/def->tcount > 0.32; |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
40 | } |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
41 | break; |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
42 | case BFILE_HIGH_ACCURACY: |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
43 | if (def->tcount > 500 || next_char == EOF) { |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
44 | ret = (1.0*def->bcount)/def->tcount > 0.1; |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
45 | } |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
46 | break; |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
47 | default: /* BFILE_MEDIUM_ACCURACY */ |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
48 | if (def->tcount > 100 || next_char == EOF) { |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
49 | ret = (1.0*def->bcount)/def->tcount > 0.1; |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
50 | } |
778388400f7b
encapsulated scanner arguments + enabled optimizer + empty file is no bfile
Mike Becker <universe@uap-core.de>
parents:
22
diff
changeset
|
51 | break; |
22
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
52 | } |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
53 | } |
4508da679ffb
completed binary file heuristics
Mike Becker <universe@uap-core.de>
parents:
21
diff
changeset
|
54 | } |
21
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
55 | |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
56 | return ret; |
91e0890464b0
implemented bfile heuristics option + TODO: implement algorithm
Mike Becker <universe@uap-core.de>
parents:
20
diff
changeset
|
57 | } |