[nb] Edit: cli_commands.md

4 months ago · b057b0b94e
1 changed files with 112 additions and 78 deletions
--- a/cli_commands.md
+++ b/cli_commands.md
@ -1,52 +1,81 @@
-# cli commands
+# CLI Cheatsheet
 #cli
-## display list of content types and # of associated nodes:
+## Contents
-drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type'
+- [Drupal / Drush](#drupal--drush)
 - [Text Processing](#text-processing)
 - [Torrents](#torrents)
 - [PDF Tools](#pdf-tools)
-## And then if you want filter by a specific type, just use grep like this:
+---
 drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' | grep 2014
-## search replace in text mulitle files
+## Drupal / Drush
 > Note: These SQL queries target the Drupal 7 schema (`node_type` table). They won't work as-is on Drupal 8+.
 ### List content types with node counts
    drush sqlq 'select count(node.nid) as node_count, node_type.type
                from node
                inner join node_type on node.type = node_type.type
                group by node_type.type'
 ### Filter results by keyword (e.g. "2014")
    drush sqlq 'select count(node.nid) as node_count, node_type.type
                from node
                inner join node_type on node.type = node_type.type
                group by node_type.type' | grep 2014
 ---
 ## Text Processing
 ### Find and replace inside files (perl)
    # Single file type in current directory
    perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt
    # Recursively across all file types
    perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.*
-## search replace in file names
+### Find and replace in file names
    # Rename files matching a pattern, verbose output shows what changed
    rename 's/livero/lives/g' **/*.* -v
-## torrent download
+---
 ## Torrents
 ### Download via magnet link (aria2c)
    # aria2c is a lightweight multi-protocol download utility
    aria2c -d ~/Downloads "magnetlink"
 ---
-ocrmypdf --optimize 3 --skip-text input.pdf output.pdf
+## PDF Tools
 ### OCR a PDF (ocrmypdf)
    # Standard: optimize output, skip pages that already have a text layer
    ocrmypdf --optimize 3 --skip-text input.pdf output.pdf
    # Aggressive: force re-OCR even if a text layer exists (useful for corrupt/bad layers),
    # set DPI manually, use page segmentation mode 1 (automatic with OSD)
    ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \
             --force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf
-# down sample pdfs to 72dpi
+### Downsample a PDF to 72dpi (Ghostscript)
-(single file)
+
    # Single file
    gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \
       -dPDFSETTINGS=/screen \
       -dNOPAUSE -dQUIET -dBATCH \
       -sOutputFile=output.pdf input.pdf
-(batch)
+    # Batch - processes all PDFs in current folder, preserves original filenames
 # In the folder with your PDFs
 mkdir downsampled
 for f in *.pdf *.PDF; do
    [ -f "$f" ] || continue
    gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \
       -dNOPAUSE -dBATCH \
       -sOutputFile="downsampled/${f%.pdf}_72dpi.pdf" \
       "$f"
 done
 # In the folder that contains your original PDFs
    mkdir -p downsampled
    for f in *.pdf *.PDF; do
        [ -f "$f" ] || continue
        gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \
@ -55,21 +84,26 @@ for f in *.pdf *.PDF; do
           "$f"
    done
-# check current dpi
+### Check image DPI in PDFs (pdfimages)
    # Print image info to terminal
    for f in *.pdf *.PDF; do
-    echo "=== Images in: $f ==="
+        echo "=== $f ==="
        pdfimages -list "$f"
        echo ""
    done
-# Creates (or overwrites) images_list.txt in the current directory
+
    # Save output to images_list.txt instead
    for f in *.pdf *.PDF; do
-    if [ -f "$f" ]; then
+        [ -f "$f" ] || continue
-        echo "=== Images in: $f ===" >> images_list.txt
+        echo "=== $f ===" >> images_list.txt
        pdfimages -list "$f" >> images_list.txt
        echo "" >> images_list.txt
    fi
    done
-# scan for ccitt encoding
+
 ### Scan for CCITT encoding
    # CCITT is a fax-era compression format - flags PDFs that may cause compatibility issues
    for f in *.pdf *.PDF; do
        [ -f "$f" ] || continue
        if pdfimages -list "$f" 2>/dev/null | grep -q " ccitt "; then