diff --git a/cli_commands.md b/cli_commands.md index ea89792..fa986b1 100644 --- a/cli_commands.md +++ b/cli_commands.md @@ -1,78 +1,112 @@ -# cli commands -#cli - -## display list of content types and # of associated nodes: -drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' - -## And then if you want filter by a specific type, just use grep like this: -drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' | grep 2014 - -## search replace in text mulitle files -perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt -perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.* - -## search replace in file names -rename 's/livero/lives/g' **/*.* -v - -## torrent download -aria2c -d ~/Downloads "magnetlink" - - -ocrmypdf --optimize 3 --skip-text input.pdf output.pdf - - -ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \ - --force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf - -# down sample pdfs to 72dpi -(single file) -gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ - -dPDFSETTINGS=/screen \ - -dNOPAUSE -dQUIET -dBATCH \ - -sOutputFile=output.pdf input.pdf - -(batch) - -# In the folder with your PDFs -mkdir downsampled - -for f in *.pdf *.PDF; do - [ -f "$f" ] || continue - gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \ - -dNOPAUSE -dBATCH \ - -sOutputFile="downsampled/${f%.pdf}_72dpi.pdf" \ - "$f" -done -# In the folder that contains your original PDFs - -mkdir -p downsampled - -for f in *.pdf *.PDF; do - [ -f "$f" ] || continue - gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \ - -dNOPAUSE -dBATCH -dQUIET \ - -sOutputFile="downsampled/$f" \ - "$f" -done - -# check current dpi -for f in *.pdf *.PDF; do - echo "=== Images in: $f ===" - pdfimages -list "$f" - echo "" -done -# Creates (or overwrites) images_list.txt in the current directory -for f in *.pdf *.PDF; do - if [ -f "$f" ]; then - echo "=== Images in: $f ===" >> images_list.txt - pdfimages -list "$f" >> images_list.txt - echo "" >> images_list.txt - fi -done -# scan for ccitt encoding -for f in *.pdf *.PDF; do - [ -f "$f" ] || continue - if pdfimages -list "$f" 2>/dev/null | grep -q " ccitt "; then - echo "$f uses CCITT" - fi -done +# CLI Cheatsheet + +## Contents +- [Drupal / Drush](#drupal--drush) +- [Text Processing](#text-processing) +- [Torrents](#torrents) +- [PDF Tools](#pdf-tools) + +--- + +## Drupal / Drush + +> Note: These SQL queries target the Drupal 7 schema (`node_type` table). They won't work as-is on Drupal 8+. + +### List content types with node counts + + drush sqlq 'select count(node.nid) as node_count, node_type.type + from node + inner join node_type on node.type = node_type.type + group by node_type.type' + +### Filter results by keyword (e.g. "2014") + + drush sqlq 'select count(node.nid) as node_count, node_type.type + from node + inner join node_type on node.type = node_type.type + group by node_type.type' | grep 2014 + +--- + +## Text Processing + +### Find and replace inside files (perl) + + # Single file type in current directory + perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt + + # Recursively across all file types + perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.* + +### Find and replace in file names + + # Rename files matching a pattern, verbose output shows what changed + rename 's/livero/lives/g' **/*.* -v + +--- + +## Torrents + +### Download via magnet link (aria2c) + + # aria2c is a lightweight multi-protocol download utility + aria2c -d ~/Downloads "magnetlink" + +--- + +## PDF Tools + +### OCR a PDF (ocrmypdf) + + # Standard: optimize output, skip pages that already have a text layer + ocrmypdf --optimize 3 --skip-text input.pdf output.pdf + + # Aggressive: force re-OCR even if a text layer exists (useful for corrupt/bad layers), + # set DPI manually, use page segmentation mode 1 (automatic with OSD) + ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \ + --force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf + +### Downsample a PDF to 72dpi (Ghostscript) + + # Single file + gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ + -dPDFSETTINGS=/screen \ + -dNOPAUSE -dQUIET -dBATCH \ + -sOutputFile=output.pdf input.pdf + + # Batch - processes all PDFs in current folder, preserves original filenames + mkdir -p downsampled + for f in *.pdf *.PDF; do + [ -f "$f" ] || continue + gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \ + -dNOPAUSE -dBATCH -dQUIET \ + -sOutputFile="downsampled/$f" \ + "$f" + done + +### Check image DPI in PDFs (pdfimages) + + # Print image info to terminal + for f in *.pdf *.PDF; do + echo "=== $f ===" + pdfimages -list "$f" + echo "" + done + + # Save output to images_list.txt instead + for f in *.pdf *.PDF; do + [ -f "$f" ] || continue + echo "=== $f ===" >> images_list.txt + pdfimages -list "$f" >> images_list.txt + echo "" >> images_list.txt + done + +### Scan for CCITT encoding + + # CCITT is a fax-era compression format - flags PDFs that may cause compatibility issues + for f in *.pdf *.PDF; do + [ -f "$f" ] || continue + if pdfimages -list "$f" 2>/dev/null | grep -q " ccitt "; then + echo "$f uses CCITT" + fi + done