|
|
|
|
@ -24,3 +24,20 @@ ocrmypdf --optimize 3 --skip-text input.pdf output.pdf
|
|
|
|
|
ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \ |
|
|
|
|
--force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf |
|
|
|
|
|
|
|
|
|
# down sample pdfs to 72dpi |
|
|
|
|
(single file) |
|
|
|
|
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ |
|
|
|
|
-dPDFSETTINGS=/screen \ |
|
|
|
|
-dNOPAUSE -dQUIET -dBATCH \ |
|
|
|
|
-sOutputFile=output.pdf input.pdf |
|
|
|
|
|
|
|
|
|
(batch) |
|
|
|
|
|
|
|
|
|
for f in *.PDF; do |
|
|
|
|
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ |
|
|
|
|
-dNOPAUSE -dBATCH \ |
|
|
|
|
-dDownsampleColorImages=true -dColorImageResolution=72 \ |
|
|
|
|
-dDownsampleGrayImages=true -dGrayImageResolution=72 \ |
|
|
|
|
-dDownsampleMonoImages=true -dMonoImageResolution=72 \ |
|
|
|
|
-sOutputFile="output_pdfs/$f" "$f" |
|
|
|
|
done |
|
|
|
|
|