[nb] Edit: cli_commands.md

4 months ago · b057b0b94e
1 changed files with 112 additions and 78 deletions
--- a/cli_commands.md
+++ b/cli_commands.md
@ -1,78 +1,112 @@
-# cli commands
+# CLI Cheatsheet
 #cli
-## display list of content types and # of associated nodes:
+## Contents
-drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type'
+- [Drupal / Drush](#drupal--drush)
 - [Text Processing](#text-processing)
 - [Torrents](#torrents)
 - [PDF Tools](#pdf-tools)
-## And then if you want filter by a specific type, just use grep like this:
+---
 drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' | grep 2014
-## search replace in text mulitle files
+## Drupal / Drush
 perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt
 perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.*
-## search replace in file names
+> Note: These SQL queries target the Drupal 7 schema (`node_type` table). They won't work as-is on Drupal 8+.
 rename 's/livero/lives/g' **/*.* -v
-## torrent download
+### List content types with node counts
 aria2c -d ~/Downloads "magnetlink" 
    drush sqlq 'select count(node.nid) as node_count, node_type.type
                from node
                inner join node_type on node.type = node_type.type
                group by node_type.type'
-ocrmypdf --optimize 3 --skip-text input.pdf output.pdf
+### Filter results by keyword (e.g. "2014")
    drush sqlq 'select count(node.nid) as node_count, node_type.type
                from node
                inner join node_type on node.type = node_type.type
                group by node_type.type' | grep 2014
-ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \
+---
         --force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf
-# down sample pdfs to 72dpi
+## Text Processing
 (single file)
 gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \
   -dPDFSETTINGS=/screen \
   -dNOPAUSE -dQUIET -dBATCH \
   -sOutputFile=output.pdf input.pdf
-(batch)
+### Find and replace inside files (perl)
-# In the folder with your PDFs
+    # Single file type in current directory
-mkdir downsampled
+    perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt
-for f in *.pdf *.PDF; do
+    # Recursively across all file types
-    [ -f "$f" ] || continue
+    perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.*
-    gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \
+
-       -dNOPAUSE -dBATCH \
+### Find and replace in file names
-       -sOutputFile="downsampled/${f%.pdf}_72dpi.pdf" \
+
-       "$f"
+    # Rename files matching a pattern, verbose output shows what changed
-done
+    rename 's/livero/lives/g' **/*.* -v
-# In the folder that contains your original PDFs
+
 ---
 ## Torrents
 ### Download via magnet link (aria2c)
    # aria2c is a lightweight multi-protocol download utility
    aria2c -d ~/Downloads "magnetlink"
 ---
 ## PDF Tools
-mkdir -p downsampled
+### OCR a PDF (ocrmypdf)
-for f in *.pdf *.PDF; do
+    # Standard: optimize output, skip pages that already have a text layer
    ocrmypdf --optimize 3 --skip-text input.pdf output.pdf
    # Aggressive: force re-OCR even if a text layer exists (useful for corrupt/bad layers),
    # set DPI manually, use page segmentation mode 1 (automatic with OSD)
    ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \
             --force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf
 ### Downsample a PDF to 72dpi (Ghostscript)
    # Single file
    gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \
       -dPDFSETTINGS=/screen \
       -dNOPAUSE -dQUIET -dBATCH \
       -sOutputFile=output.pdf input.pdf
    # Batch - processes all PDFs in current folder, preserves original filenames
    mkdir -p downsampled
    for f in *.pdf *.PDF; do
        [ -f "$f" ] || continue
        gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \
           -dNOPAUSE -dBATCH -dQUIET \
           -sOutputFile="downsampled/$f" \
           "$f"
-done
+    done
 ### Check image DPI in PDFs (pdfimages)
-# check current dpi
+    # Print image info to terminal
-for f in *.pdf *.PDF; do
+    for f in *.pdf *.PDF; do
-    echo "=== Images in: $f ==="
+        echo "=== $f ==="
        pdfimages -list "$f"
        echo ""
-done
+    done
-# Creates (or overwrites) images_list.txt in the current directory
+
-for f in *.pdf *.PDF; do
+    # Save output to images_list.txt instead
-    if [ -f "$f" ]; then
+    for f in *.pdf *.PDF; do
-        echo "=== Images in: $f ===" >> images_list.txt
+        [ -f "$f" ] || continue
        echo "=== $f ===" >> images_list.txt
        pdfimages -list "$f" >> images_list.txt
        echo "" >> images_list.txt
-    fi
+    done
-done
+
-# scan for ccitt encoding
+### Scan for CCITT encoding
-for f in *.pdf *.PDF; do
+
    # CCITT is a fax-era compression format - flags PDFs that may cause compatibility issues
    for f in *.pdf *.PDF; do
        [ -f "$f" ] || continue
        if pdfimages -list "$f" 2>/dev/null | grep -q " ccitt "; then
            echo "$f uses CCITT"
        fi
-done
+    done