#!/usr/bin/perl -W
 
# 
# This script will extract the texts from a Adobe Photoshop (PSD) file.
# The texts need to be still in text mode, on their own layer.
#
# 2004 Salagir - GPL licence.
# 
# Tested on Photoshop 7 and Image Ready "the one which goes with Photoshop 7"
#
# 2007: Update, now works with Photoshop CS 2 too.
#
# Note that i think i work in latin-1(5?), so for specials character and such,
# this may be waaaay different for non-french photoshop users, but that's your problem :)
# 
 
use strict qw(subs vars refs);
 
##### config #####
#
# Do you want to get the exact text, with a little specials characters ? => 0
# Or do you want to export in a simple text ? => 1
# Exemple of adaptations: the weird opening and closing quotes or double quotes will be replaced by good old ' or "
my $adapt = 1;
 
# A size limit (in megabytes) because memory doesn't grow on trees
my $PsdSizeLimit = 80;
 
 
 
#############################
#### actual script begin ####
 
die(" Syntax:   ./PsdTextExtractor.pl file.psd\n") if (!$ARGV[0]);
 
open (BUFFY, $ARGV[0]) or die (" Can't find file '$ARGV[0]', stopped.\n");
binmode(BUFFY);
my $allf;
read(BUFFY, $allf, $PsdSizeLimit*1024*1024) or die ("Couldn't read from '$ARGV[0]'. (Note that I limit the size to $PsdSizeLimit MB)\n");
close(BUFFY);
 
my $zero = chr(0);
 
# non-latin-1 chars that we usually find... and their latin-1 translation
my %aliens = (
		" ".chr(24), $adapt?"'":chr(145), # apostrophe ouvrante (' ^X' => '~Q')
		" ".chr(25), $adapt?"'":chr(146), # apostrophe fermante (' ^Y' => '~R')
		" ".chr(28), $adapt?'"':'«', # doubles quotes ouvrantes (' ^\' => ?))
		" ".chr(29), $adapt?'"':'»', # doubles quotes fermantes (' ^]' => ?)
		$zero.chr(160), $adapt?" ":chr(160) # espace insécable ('^@| ' => '| ')
		);
 
 
# "header" des textes
# Si le fichier est sauvé avec /Adobe Photoshop 7/, les textes sont présents deux fois:
# (1) et (2). Si le fichier est sauvé avec /Adobe Image Ready 7/, les textes sont
# présent qu'avec (2), c'est donc ca qu'on va utiliser.
#
# Format (1):
# m/Txt TEXT(.{4})(.{$a})/
# with $1 an int of the numbers of char minus 1
# and $2 the string in utf-16 or sort  (of course $a should be equal to two times the value found with $1)
#
# Format (2):
# m,/Editor  <<  /Text \(..(.*?)\).\n)  >>,
# The 4 dots being: chr(254), chr(255), a sequel of utf-16 chars or sort, and chr(0) (it's "\n" in utf-16)
#
my $begin = '/Editor(?:  |\n\t\t)<<(?:  |\n\t\t\t)/Text \('.chr(254).chr(255);
my $end = chr(0).chr(13).'\)(?:  |\n\t\t)>>';
 
# c'est parti et c'est imbitable !!!
my ($tmp, $tmp2, @phrases);
#while ($allf =~ s/$begin((?:$zero[\w\s'",;:!?.\/$chars()\[\]-]$aliensstr)+)//s) { #old version
while ($allf =~ m,$begin(.*?)$end,gs) {
	$tmp = $1;
	# utf-16 to latin-1 converter (sort of)
	$tmp2 = '';
	while ($tmp =~ s/^(.)(.)(.?)/$3/s) {
		if ($& eq $zero.'\\('  or  $& eq $zero.'\\)') { # les parenthèses sont escapées
			$tmp = "$zero$tmp"; # comme ca il sera pris normalement la prochaine fois
		} elsif (defined($aliens{"$1$2"})) {
			$tmp2 .= $aliens{"$1$2"};
		} else {
			$tmp2 .= $2;
		}
	}
	$tmp2 =~ s/\s+/ /g  if ($adapt);
	push(@phrases, $tmp2);
}
 
 
foreach (@phrases) {
	print "$_\n";
}