#!/bin/bash # Run and convert IM 6.x identify -verbose output to JSON # Usage: sequencelogic-image-analyzer.sh file FILE="$1" BN=`basename "${FILE}"` identify -verbose "${FILE}" \ | awk -Wposix -v NAME="${BN}" ' function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s } function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s } function trim(s) { return rtrim(ltrim(s)); } function emitProp(prop,val,isInt){ valquote="\""; if (isInt > 0){ valquote=""; if (match(val,"K$")>0){ sub("K$", "", val); val=val*1024; } if (match(val,"M$")>0){ sub("M$", "", val); val=val*1024*1024; } } printf "%s\"%s\":%s%s%s\n", prop_comma, prop, valquote, val, valquote; } function deriveXY(prop,val){ x=val; y=val; sub("x.*$", "", x); sub("^[0-9.]+x", "", y); sub("[^0-9.].+$", "", y); emitProp(prop "_x", x, 1); emitProp(prop "_y", y, 1); } BEGIN { printf "{\"jobType\": \"LOAN\", \"fileType\": \"IDENTIFY_VERBOSE_ANALYIS\""; printf ",\"name\": \"%s\"\n", NAME; printf ",\"images\":[\n"; is_image=0; img_comma=""; } /^Image:/ { if (is_image==1) printf "}\n"; printf "%s{\n", img_comma; is_image=1; prop_comma=""; img_comma="," next; } # Histogram: # 60417: ( 0, 0, 0,65535) #000000000000 graya(0,1) # 425871: (65535,65535,65535,65535) #FFFFFFFFFFFF graya(255,1) /^[ ]+Histogram:/ { is_histogram=1; #printf "HISTOGRAM:\n" delete rgb; next; } is_histogram ~ /1/ { match($0, "[0-9]+:"); if (RSTART == 0){ is_histogram=0; emitProp("black_pixel_count", rgb["black"], 1); emitProp("white_pixel_count", rgb["white"], 1); next; } else { #print $0 hcnt=substr($0, RSTART, RLENGTH-1); match($0, "#[0-9A-F]+"); hexc=substr($0, RSTART, RLENGTH); #printf "%d/%s\n", hcnt, hexc; rlen=4 if (length(hexc) == 7){ # 8 bit rlen=2 } r=substr(hexc, 2, rlen); g=substr(hexc, 2+rlen, rlen); b=substr(hexc, 2+rlen+rlen, rlen); r=sprintf("%d", "0x" r); g=sprintf("%d", "0x" g); b=sprintf("%d", "0x" b); if (rlen == 4){ r = r*0.00390625;#(0xFF/0xFFFF); g = g*0.00390625;#(0xFF/0xFFFF); b = b*0.00390625;#(0xFF/0xFFFF); } avg=(r+g+b)/3; #printf "%d - %s,%s,%s\n", avg, r, g, b if (avg < 128) rgb["black"] += hcnt; else rgb["white"] += hcnt; } } /^[ ]+(Format|Number pixels|Colors|Geometry|Resolution|Print size|Type|Colorspace|Depth|date:create|date:modify|pdf:Version): / { #prop=$1; #val=$2; $0=trim($0); if (match($0, /date:create|date:modify|pdf:Version/) > 0){ sub(":", "_", $0); } match($0, "[^:]+"); prop=substr($0, RSTART, RSTART+RLENGTH-1); prop=trim(prop); gsub("[ ]+", "_", prop); prop=tolower(prop); match($0, ": .*"); val=substr($0, RSTART+1); #printf "PROP %s\n", prop; #printf "VAL %s\n", val; val=trim(val); if (match(prop, "resolution|geometry|print_size") > 0){ # "resolution": "72x72" deriveXY(prop, val); } else { emitProp(prop, val, match(prop, /^(number_pixels|colors)$/)); } prop_comma=","; } { if (is_image==1){ } } END { if (is_image==1){ printf "}\n"; } printf "]}\n"; } ' | jq '.' exit 0