1 changed files with 112 additions and 78 deletions
@ -1,78 +1,112 @@
|
||||
# cli commands |
||||
#cli |
||||
# CLI Cheatsheet |
||||
|
||||
## display list of content types and # of associated nodes: |
||||
drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' |
||||
## Contents |
||||
- [Drupal / Drush](#drupal--drush) |
||||
- [Text Processing](#text-processing) |
||||
- [Torrents](#torrents) |
||||
- [PDF Tools](#pdf-tools) |
||||
|
||||
## And then if you want filter by a specific type, just use grep like this: |
||||
drush sqlq 'select count(node.nid) as node_count, node_type.type from node inner join node_type on node.type = node_type.type group by node_type.type' | grep 2014 |
||||
--- |
||||
|
||||
## search replace in text mulitle files |
||||
perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt |
||||
perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.* |
||||
## Drupal / Drush |
||||
|
||||
## search replace in file names |
||||
rename 's/livero/lives/g' **/*.* -v |
||||
> Note: These SQL queries target the Drupal 7 schema (`node_type` table). They won't work as-is on Drupal 8+. |
||||
|
||||
## torrent download |
||||
aria2c -d ~/Downloads "magnetlink" |
||||
### List content types with node counts |
||||
|
||||
drush sqlq 'select count(node.nid) as node_count, node_type.type |
||||
from node |
||||
inner join node_type on node.type = node_type.type |
||||
group by node_type.type' |
||||
|
||||
ocrmypdf --optimize 3 --skip-text input.pdf output.pdf |
||||
### Filter results by keyword (e.g. "2014") |
||||
|
||||
drush sqlq 'select count(node.nid) as node_count, node_type.type |
||||
from node |
||||
inner join node_type on node.type = node_type.type |
||||
group by node_type.type' | grep 2014 |
||||
|
||||
ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \ |
||||
--force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf |
||||
--- |
||||
|
||||
# down sample pdfs to 72dpi |
||||
(single file) |
||||
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ |
||||
-dPDFSETTINGS=/screen \ |
||||
-dNOPAUSE -dQUIET -dBATCH \ |
||||
-sOutputFile=output.pdf input.pdf |
||||
## Text Processing |
||||
|
||||
(batch) |
||||
### Find and replace inside files (perl) |
||||
|
||||
# In the folder with your PDFs |
||||
mkdir downsampled |
||||
# Single file type in current directory |
||||
perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt |
||||
|
||||
for f in *.pdf *.PDF; do |
||||
[ -f "$f" ] || continue |
||||
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \ |
||||
-dNOPAUSE -dBATCH \ |
||||
-sOutputFile="downsampled/${f%.pdf}_72dpi.pdf" \ |
||||
"$f" |
||||
done |
||||
# In the folder that contains your original PDFs |
||||
# Recursively across all file types |
||||
perl -pi -w -e 's/thex/robertsonlibrary/g;' **/*.* |
||||
|
||||
### Find and replace in file names |
||||
|
||||
# Rename files matching a pattern, verbose output shows what changed |
||||
rename 's/livero/lives/g' **/*.* -v |
||||
|
||||
--- |
||||
|
||||
## Torrents |
||||
|
||||
### Download via magnet link (aria2c) |
||||
|
||||
# aria2c is a lightweight multi-protocol download utility |
||||
aria2c -d ~/Downloads "magnetlink" |
||||
|
||||
--- |
||||
|
||||
## PDF Tools |
||||
|
||||
mkdir -p downsampled |
||||
### OCR a PDF (ocrmypdf) |
||||
|
||||
for f in *.pdf *.PDF; do |
||||
# Standard: optimize output, skip pages that already have a text layer |
||||
ocrmypdf --optimize 3 --skip-text input.pdf output.pdf |
||||
|
||||
# Aggressive: force re-OCR even if a text layer exists (useful for corrupt/bad layers), |
||||
# set DPI manually, use page segmentation mode 1 (automatic with OSD) |
||||
ocrmypdf --optimize 3 --image-dpi 300 --output-type pdf \ |
||||
--force-ocr --tesseract-pagesegmode 1 input.pdf output.pdf |
||||
|
||||
### Downsample a PDF to 72dpi (Ghostscript) |
||||
|
||||
# Single file |
||||
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 \ |
||||
-dPDFSETTINGS=/screen \ |
||||
-dNOPAUSE -dQUIET -dBATCH \ |
||||
-sOutputFile=output.pdf input.pdf |
||||
|
||||
# Batch - processes all PDFs in current folder, preserves original filenames |
||||
mkdir -p downsampled |
||||
for f in *.pdf *.PDF; do |
||||
[ -f "$f" ] || continue |
||||
gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen \ |
||||
-dNOPAUSE -dBATCH -dQUIET \ |
||||
-sOutputFile="downsampled/$f" \ |
||||
"$f" |
||||
done |
||||
done |
||||
|
||||
### Check image DPI in PDFs (pdfimages) |
||||
|
||||
# check current dpi |
||||
for f in *.pdf *.PDF; do |
||||
echo "=== Images in: $f ===" |
||||
# Print image info to terminal |
||||
for f in *.pdf *.PDF; do |
||||
echo "=== $f ===" |
||||
pdfimages -list "$f" |
||||
echo "" |
||||
done |
||||
# Creates (or overwrites) images_list.txt in the current directory |
||||
for f in *.pdf *.PDF; do |
||||
if [ -f "$f" ]; then |
||||
echo "=== Images in: $f ===" >> images_list.txt |
||||
done |
||||
|
||||
# Save output to images_list.txt instead |
||||
for f in *.pdf *.PDF; do |
||||
[ -f "$f" ] || continue |
||||
echo "=== $f ===" >> images_list.txt |
||||
pdfimages -list "$f" >> images_list.txt |
||||
echo "" >> images_list.txt |
||||
fi |
||||
done |
||||
# scan for ccitt encoding |
||||
for f in *.pdf *.PDF; do |
||||
done |
||||
|
||||
### Scan for CCITT encoding |
||||
|
||||
# CCITT is a fax-era compression format - flags PDFs that may cause compatibility issues |
||||
for f in *.pdf *.PDF; do |
||||
[ -f "$f" ] || continue |
||||
if pdfimages -list "$f" 2>/dev/null | grep -q " ccitt "; then |
||||
echo "$f uses CCITT" |
||||
fi |
||||
done |
||||
done |
||||
|
||||
Loading…
Reference in new issue