#!/usr/bin/perl -W
#
# This script will extract the texts from a Adobe Photoshop (PSD) file.
# The texts need to be still in text mode, on their own layer.
#
# 2004 Salagir - GPL licence.
#
# Tested on Photoshop 7 and Image Ready "the one which goes with Photoshop 7"
#
# 2007: Update, now works with Photoshop CS 2 too.
#
# Note that i think i work in latin-1(5?), so for specials character and such,
# this may be waaaay different for non-french photoshop users, but that's your problem :)
#
use strict qw(subs vars refs);
##### config #####
#
# Do you want to get the exact text, with a little specials characters ? => 0
# Or do you want to export in a simple text ? => 1
# Exemple of adaptations: the weird opening and closing quotes or double quotes will be replaced by good old ' or "
my $adapt = 1;
# A size limit (in megabytes) because memory doesn't grow on trees
my $PsdSizeLimit = 80;
#############################
#### actual script begin ####
die(" Syntax: ./PsdTextExtractor.pl file.psd\n") if (!$ARGV[0]);
open (BUFFY, $ARGV[0]) or die (" Can't find file '$ARGV[0]', stopped.\n");
binmode(BUFFY);
my $allf;
read(BUFFY, $allf, $PsdSizeLimit*1024*1024) or die ("Couldn't read from '$ARGV[0]'. (Note that I limit the size to $PsdSizeLimit MB)\n");
close(BUFFY);
my $zero = chr(0);
# non-latin-1 chars that we usually find... and their latin-1 translation
my %aliens = (
" ".chr(24), $adapt?"'":chr(145), # apostrophe ouvrante (' ^X' => '~Q')
" ".chr(25), $adapt?"'":chr(146), # apostrophe fermante (' ^Y' => '~R')
" ".chr(28), $adapt?'"':'«', # doubles quotes ouvrantes (' ^\' => ?))
" ".chr(29), $adapt?'"':'»', # doubles quotes fermantes (' ^]' => ?)
$zero.chr(160), $adapt?" ":chr(160) # espace insécable ('^@| ' => '| ')
);
# "header" des textes
# Si le fichier est sauvé avec /Adobe Photoshop 7/, les textes sont présents deux fois:
# (1) et (2). Si le fichier est sauvé avec /Adobe Image Ready 7/, les textes sont
# présent qu'avec (2), c'est donc ca qu'on va utiliser.
#
# Format (1):
# m/Txt TEXT(.{4})(.{$a})/
# with $1 an int of the numbers of char minus 1
# and $2 the string in utf-16 or sort (of course $a should be equal to two times the value found with $1)
#
# Format (2):
# m,/Editor << /Text \(..(.*?)\).\n) >>,
# The 4 dots being: chr(254), chr(255), a sequel of utf-16 chars or sort, and chr(0) (it's "\n" in utf-16)
#
my $begin = '/Editor(?: |\n\t\t)<<(?: |\n\t\t\t)/Text \('.chr(254).chr(255);
my $end = chr(0).chr(13).'\)(?: |\n\t\t)>>';
# c'est parti et c'est imbitable !!!
my ($tmp, $tmp2, @phrases);
#while ($allf =~ s/$begin((?:$zero[\w\s'",;:!?.\/$chars()\[\]-]$aliensstr)+)//s) { #old version
while ($allf =~ m,$begin(.*?)$end,gs) {
$tmp = $1;
# utf-16 to latin-1 converter (sort of)
$tmp2 = '';
while ($tmp =~ s/^(.)(.)(.?)/$3/s) {
if ($& eq $zero.'\\(' or $& eq $zero.'\\)') { # les parenthèses sont escapées
$tmp = "$zero$tmp"; # comme ca il sera pris normalement la prochaine fois
} elsif (defined($aliens{"$1$2"})) {
$tmp2 .= $aliens{"$1$2"};
} else {
$tmp2 .= $2;
}
}
$tmp2 =~ s/\s+/ /g if ($adapt);
push(@phrases, $tmp2);
}
foreach (@phrases) {
print "$_\n";
}