Skip to content

Commit 5316527

Browse files
committed
wip
1 parent 05483e0 commit 5316527

1 file changed

Lines changed: 111 additions & 99 deletions

File tree

scripts/checkCopyrightPresent.sh

Lines changed: 111 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -26,52 +26,56 @@ expectedHeader=(
2626
" *******************************************************************************/"
2727
)
2828

29-
normalize_line_for_compare() {
30-
local line="$1"
31-
line="${line//\*/}"
32-
line=$(printf '%s' "$line" | tr -s '[:space:]' ' ')
33-
line="${line#"${line%%[! ]*}"}"
34-
line="${line%"${line##*[! ]}"}"
35-
printf '%s' "$line"
36-
}
37-
3829
count_asterisks() {
3930
local line="$1"
4031
local only
4132
only="${line//[^*]/}"
4233
printf '%s' "${#only}"
4334
}
4435

45-
compare_header_line() {
36+
line_matches_expected() {
4637
local expected="$1"
4738
local actual="$2"
48-
local expected_text
49-
local actual_text
5039
local expected_count
51-
local actual_count
52-
local diff
40+
local len
41+
local i
42+
local candidate
43+
local prev
44+
local curr
5345

54-
expected_text="$(normalize_line_for_compare "$expected")"
55-
actual_text="$(normalize_line_for_compare "$actual")"
56-
if [ "$expected_text" != "$actual_text" ]; then
57-
return 1
46+
if [ "$expected" = "$actual" ]; then
47+
return 0
5848
fi
5949

6050
expected_count="$(count_asterisks "$expected")"
61-
actual_count="$(count_asterisks "$actual")"
62-
63-
if [ "$expected_count" -gt 10 ]; then
64-
diff=$((expected_count - actual_count))
65-
diff="${diff#-}"
66-
if [ "$diff" -le 1 ]; then
67-
return 0
68-
fi
51+
if [ "$expected_count" -le 10 ]; then
6952
return 1
7053
fi
7154

72-
if [ "$expected_count" -eq "$actual_count" ]; then
73-
return 0
74-
fi
55+
len=${#expected}
56+
for ((i=0; i<len; i++)); do
57+
if [ "${expected:i:1}" = "*" ]; then
58+
candidate="${expected:0:i}${expected:i+1}"
59+
if [ "$candidate" = "$actual" ]; then
60+
return 0
61+
fi
62+
fi
63+
done
64+
65+
for ((i=0; i<=len; i++)); do
66+
prev=""
67+
curr=""
68+
if [ "$i" -gt 0 ]; then
69+
prev="${expected:i-1:1}"
70+
fi
71+
curr="${expected:i:1}"
72+
if [ "$prev" = "*" ] || [ "$curr" = "*" ]; then
73+
candidate="${expected:0:i}*${expected:i}"
74+
if [ "$candidate" = "$actual" ]; then
75+
return 0
76+
fi
77+
fi
78+
done
7579
return 1
7680
}
7781

@@ -110,96 +114,104 @@ read_header_lines() {
110114
done < <(head -n 10 "$file")
111115
}
112116

113-
compute_header_stats() {
114-
local year_line_normalized
115-
local year_asterisks
116-
local idx
117-
match_count=0
118-
has_valid_year_line=false
119-
aduna_year=""
120-
121-
year_line_normalized="$(normalize_line_for_compare "${header_lines[1]}")"
122-
year_asterisks="$(count_asterisks "${header_lines[1]}")"
123-
if [ "$year_asterisks" -eq 1 ]; then
124-
if [[ "$year_line_normalized" =~ ^Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors\.$ ]]; then
125-
has_valid_year_line=true
126-
elif [[ "$year_line_normalized" =~ ^Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors,\ Aduna,\ and\ others\.$ ]]; then
127-
has_valid_year_line=true
128-
aduna_year="${BASH_REMATCH[1]}"
129-
fi
130-
fi
131117

132-
for idx in 0 2 3 4 5 6 7 8 9; do
133-
if compare_header_line "${expectedHeader[$idx]}" "${header_lines[$idx]}"; then
134-
match_count=$((match_count + 1))
135-
fi
136-
done
137-
}
138-
139-
for i in $(find "${repo_root}" -name pom.xml); do
118+
while IFS= read -r -d '' i; do
140119
if [[ "$i" == "${repo_root}/core/queryparser/sparql/JavaCC/"* ]]; then
141120
continue
142121
fi
143-
if [[ "$i" == *"/target/"* ]]; then
144-
target_parent="${i%%/target/*}"
145-
if [[ -f "${target_parent}/pom.xml" ]]; then
146-
echo "Skipping target dir pom: $i"
147-
continue
148-
fi
149-
fi
150-
echo $i;
151122
# only look in non test files but make sure src/main exists.
152123
# and not in package-info
153124
dir="${i/pom.xml/}src/"
154125
if [ -d "$dir" ]; then
155-
for c in $(find "${dir}" -name *.java -not -name package-info.java); do
156-
if grep -q 'Generated By:' "$c"; then
157-
continue
158-
fi
159-
126+
while IFS= read -r -d '' c; do
127+
echo "$c"
160128
read_header_lines "$c"
161129

162-
has_copyright=false
163-
if grep -q "Copyright" "$c"; then
164-
has_copyright=true
130+
generated=false
131+
if grep -q 'Generated By:' "$c"; then
132+
generated=true
165133
fi
166134

167-
if [ ${#header_lines[@]} -lt 10 ]; then
168-
if [ "$has_copyright" = true ]; then
169-
filesWithInvalidHeader+=("$c")
170-
else
171-
filesWithOutCopyright+=("$c")
135+
if [ "$generated" = false ]; then
136+
has_copyright=false
137+
if [ ${#header_lines[@]} -ge 2 ] && [[ "${header_lines[1]}" == *Copyright* ]]; then
138+
has_copyright=true
139+
elif grep -q "Copyright" "$c"; then
140+
has_copyright=true
172141
fi
173-
continue
174-
fi
175-
176-
compute_header_stats
177142

178-
if [ "$rewrite_headers" = true ] && [ "$match_count" -ge 4 ] && [ "$match_count" -lt 9 ]; then
179-
rewrite_header "$c" "${header_lines[1]}"
180-
read_header_lines "$c"
181-
compute_header_stats
143+
if [ ${#header_lines[@]} -lt 10 ]; then
144+
if [ "$has_copyright" = true ]; then
145+
filesWithInvalidHeader+=("$c")
146+
else
147+
filesWithOutCopyright+=("$c")
148+
fi
149+
else
150+
match_count=0
151+
has_valid_year_line=false
152+
aduna_year=""
153+
154+
if [[ "${header_lines[1]}" =~ ^[[:space:]]*\*\ Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors\.$ ]]; then
155+
has_valid_year_line=true
156+
elif [[ "${header_lines[1]}" =~ ^[[:space:]]*\*\ Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors,\ Aduna,\ and\ others\.$ ]]; then
157+
has_valid_year_line=true
158+
aduna_year="${BASH_REMATCH[1]}"
159+
fi
160+
161+
for idx in 0 2 3 4 5 6 7 8 9; do
162+
if line_matches_expected "${expectedHeader[$idx]}" "${header_lines[$idx]}"; then
163+
match_count=$((match_count + 1))
164+
fi
165+
done
166+
167+
if [ "$rewrite_headers" = true ] && [ "$match_count" -ge 4 ] && [ "$match_count" -lt 9 ]; then
168+
rewrite_header "$c" "${header_lines[1]}"
169+
read_header_lines "$c"
170+
match_count=0
171+
has_valid_year_line=false
172+
aduna_year=""
173+
if [[ "${header_lines[1]}" =~ ^[[:space:]]*\*\ Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors\.$ ]]; then
174+
has_valid_year_line=true
175+
elif [[ "${header_lines[1]}" =~ ^[[:space:]]*\*\ Copyright\ \(c\)\ ([0-9]{4})\ Eclipse\ RDF4J\ contributors,\ Aduna,\ and\ others\.$ ]]; then
176+
has_valid_year_line=true
177+
aduna_year="${BASH_REMATCH[1]}"
178+
fi
179+
for idx in 0 2 3 4 5 6 7 8 9; do
180+
if line_matches_expected "${expectedHeader[$idx]}" "${header_lines[$idx]}"; then
181+
match_count=$((match_count + 1))
182+
fi
183+
done
184+
fi
185+
186+
if [ -n "$aduna_year" ] && [ "$aduna_year" -gt 2020 ]; then
187+
filesWithAdunaYearAfter2020+=("$c")
188+
fi
189+
190+
if [ "$has_valid_year_line" = false ]; then
191+
if [ "$has_copyright" = true ]; then
192+
filesWithInvalidHeader+=("$c")
193+
else
194+
filesWithOutCopyright+=("$c")
195+
fi
196+
elif [ "$match_count" -lt 9 ]; then
197+
filesWithInvalidHeader+=("$c")
198+
fi
199+
fi
182200
fi
183201

184-
if [ -n "$aduna_year" ] && [ "$aduna_year" -gt 2020 ]; then
185-
filesWithAdunaYearAfter2020+=("$c")
202+
has_spdx=false
203+
if [ ${#header_lines[@]} -ge 9 ] && [[ "${header_lines[8]}" == *"SPDX-License-Identifier: BSD-3-Clause"* ]]; then
204+
has_spdx=true
205+
elif grep -q 'SPDX-License-Identifier: BSD-3-Clause' "$c"; then
206+
has_spdx=true
186207
fi
187208

188-
if [ "$has_valid_year_line" = false ]; then
189-
if [ "$has_copyright" = true ]; then
190-
filesWithInvalidHeader+=("$c")
191-
else
192-
filesWithOutCopyright+=("$c")
193-
fi
194-
elif [ "$match_count" -lt 9 ]; then
195-
filesWithInvalidHeader+=("$c")
196-
fi
197-
done
198-
for c in $(find "${dir}" -name *.java -not -name package-info.java -exec grep -L 'SPDX-License-Identifier: BSD-3-Clause' {} \;); do
209+
if [ "$has_spdx" = false ]; then
199210
filesWithOutSPDX+=("$c")
200-
done
211+
fi
212+
done < <(find "${dir}" -name "*.java" -not -name "package-info.java" -print0)
201213
fi
202-
done
214+
done < <(find "${repo_root}" -type d -name target -prune -o -type f -name "pom.xml" -print0)
203215

204216
for f in "${filesWithOutCopyright[@]}"; do
205217
echo "Missing copyright: $f"

0 commit comments

Comments
 (0)