Bug 2137: Improve license check 49/12449/2
authorMark Beierl <mark.beierl@canonical.com>
Fri, 12 Aug 2022 01:59:11 +0000 (21:59 -0400)
committerbeierlm <mark.beierl@canonical.com>
Fri, 12 Aug 2022 13:59:44 +0000 (15:59 +0200)
Rather than relying on a case sensitive extension (.jpg, etc)
this change introduces the use of file mime-type to better
determine if a file requires a license or not.

Change-Id: I24492c584c0d124c47d542cb8f8019ae56642682
Signed-off-by: Mark Beierl <mark.beierl@canonical.com>
tools/license_scan.sh

index b20bb0a..a794636 100755 (executable)
@@ -22,15 +22,31 @@ exception_list="':(exclude)*.pdf' ':(exclude)*.png' ':(exclude)*.jpeg' ':(exclud
 git fetch
 
 for file in $(echo ${exception_list} | xargs git diff --name-only origin/$GERRIT_BRANCH -- . ); do
-    license="No Apache license found"
-    if [ -f $file ]; then
-        if [ -s $file ]; then
-            if [ $(grep -c "http://www.apache.org/licenses/LICENSE-2.0" $file) -ge 1 ] ; then
-                license="Apache-2.0"
+
+    file_type=$(file -b --mime-type $file | sed 's|/.*||')
+    echo $file is $file_type
+    case "$file_type" in
+        text)
+            binary=false
+            ;;
+        *)
+            binary=true
+            ;;
+    esac
+
+    if $binary ; then
+        license=Binary
+    else
+        license="No Apache license found"
+        if [ -f $file ]; then
+            if [ -s $file ]; then
+                if [ $(grep -c "http://www.apache.org/licenses/LICENSE-2.0" $file) -ge 1 ] ; then
+                    license="Apache-2.0"
+                fi
             fi
+        else
+            license="DELETED"
         fi
-    else
-        license="DELETED"
     fi
     echo "$file $license"
     case "$license" in
@@ -42,6 +58,8 @@ for file in $(echo ${exception_list} | xargs git diff --name-only origin/$GERRIT
             ;;
         "DELETED")
             ;;
+        "Binary")
+            ;;
         *)
             echo "BAD LICENSE ON FILE $file"
             other=$((other + 1))