<ScriptableScraper>
  <details>
    <!-- These details are for display purposes only. -->
    <name>imdb.com</name>
    <author>John Conrad (fforde), Armand Pondman (armandp), Ron (RoChess)</author>
    <description>This script pulls data from IMDb.</description>

    <!-- 
    These fields uniquely identify the script. The ID should never 
    change and should be unique (no other script should use this ID). With
    new versions of the script the version numbers should change. EVEN FOR 
    MINOR CHANGES, you should change the version number if you are going to
    disribute it to anyone but internal testers. The point number 
    is for minor bug fix releases. Use it.
    -->
    <id>974902</id>
    <version major="1" minor="6" point="1" />
    <published month="09" day="20" year="2010" />
    
    <!--
    These fields are used for categorization purposes. Seperate multiple 
    types with a pipe | character.
    -->
    <type>MovieDetailsFetcher|MovieCoverFetcher</type>
    <language>en</language>

  </details>
  
  
  <action name="search">
    
	<set name="offset" value="0" />
    
    <!-- Regular Expressions -->
    <set name="rx_details_page_block">
	<![CDATA[
	<title>.+?\(\d{4}[\/IVX]*\).*?</title>.+</body>
	]]>
    </set>

    <set name="rx_title_year">
	<![CDATA[
	<title>(.+?)\((\d{4})[\/IVX]*\).*?</title>
	]]>
    </set>

    <set name="rx_title_exact_match">
	<![CDATA[
	<p><b>Popular Titles</b> \(Displaying \d{2} Results\).*?<p><b>Titles \(Exact Matches\)</b>.*?>\d+\.<.*?><a href="/title/(tt\d{7})/".*?>(.+?)</a> \((\d{4})\)
	]]>
    </set>

    <set name="rx_search_results">
	<![CDATA[
	><a href="/title/(tt\d{7})/"[^>]+>(?!&#34;)([^<]+)</a> \((\d{4})[\/IVX]*\)(?! \(VG\))(?:[^<].*?>(?=aka ")?(.+?)?</td>)?
	]]>
    </set>

    <set name="rx_aka_details">
	<![CDATA[
	<tr>\s+?<td>(.+?)</td>\s+?<td>(.+?)</td>.*?</tr>
	]]>
    </set>

    <set name="rx_aka">
	<![CDATA[
	aka "(.+?)"
	]]>
    </set>

    <!-- Retrieve result using IMDB id (if provided) -->
    <if test='${search.imdb_id}!='>
      <retrieve name="imdb_page" url="http://akas.imdb.com/find?s=tt;q=${search.imdb_id}" />
      <parse name="details_page_block" input="${imdb_page}" regex="${rx_details_page_block}" />
      <loop name="imdb_page" on="details_page_block">
        <parse name="title_year" input="${imdb_page}" regex="${rx_title_year}" />
        <set name="movie[0].title" value="${title_year[0][0]:htmldecode}" />
        <set name="movie[0].year" value="${title_year[0][1]:htmldecode}" />
        <parse name="imdb" input="${imdb_page}" regex="(tt\d{7})/fullcredits" />
        <set name="movie[0].imdb_id" value="${imdb[0][0]}" />
        <set name="movie[0].site_id" value="${imdb[0][0]}" />
        <set name="movie[0].popularity" value="100" />
        <set name="offset" value="1" />
      </loop>
    </if>

    <!-- Retrieve other results using Title -->
    
    <set name="query_year" value='' />
    <if test='${search.year}>1800'>
      <set name="query_year" value="+(${search.year})" />
    </if>
    <retrieve name="search_page" url="http://akas.imdb.com/find?s=tt;q=${search.title:safe(ISO-8859-1)}${query_year}" />
 
    <!-- if we got a details page, this is used. if not, regex does not match so we dont process the loop-->
    <parse name="details_page_block" input="${search_page}" regex="${rx_details_page_block}" />
    <loop name="details_page_verified" on="details_page_block">
      <add name="counter" value1="${count}" value2="${offset}" />
      <parse name="title_year" input="${details_page_verified}" regex="${rx_title_year}" />
      <set name="movie[${counter}].title" value="${title_year[0][0]:htmldecode}" />
      <set name="movie[${counter}].year" value="${title_year[0][1]:htmldecode}" />
      <parse name="imdb" input="${details_page_verified}" regex="(tt\d{7})/fullcredits" />
      <set name="movie[${counter}].imdb_id" value="${imdb[0][0]}" />
      <set name="movie[${counter}].site_id" value="${imdb[0][0]}" />

      <!-- AKA page -->
      <retrieve name="aka_page" url="http://www.imdb.com/title/${imdb[0][0]}/releaseinfo" />
      <parse name="akas" input="${aka_page}" regex="${rx_aka_details}" />
      <set name="movie[${counter}].alternate_titles" value="" />
      <loop name="currAka" on="akas" limit="50">
        <set name="movie[${counter}].alternate_titles" value="${movie[${counter}].alternate_titles}|${currAka[0]:htmldecode}" />
      </loop>

      <subtract name="movie[${counter}].popularity" value1="100" value2="${counter}" />
    </loop>
    

    <!-- if we got a search result page, this is used. if not, regex does not match so we dont process the outer loop. -->
    <parse name="search_results_block" input="${search_page}" regex='&lt;meta name="title" content="IMDb Title Search"&gt;.+&lt;/body&gt;' />
    <if test="${search_results_block}!=">

	<!-- to prevent an exact title match from being burried by popular matches, lets grab the first exact title match result -->
	<parse name="title_exact_match" input="${search_results_block}" regex="${rx_title_exact_match}" />
	<if test="${title_exact_match}!=">
		<set name="movie[0].imdb_id" value="${title_exact_match[0][0]:htmldecode}" />
		<set name="movie[0].site_id" value="${title_exact_match[0][0]:htmldecode}" />
		<set name="movie[0].title" value="${title_exact_match[0][1]:htmldecode}" />
		<set name="movie[0].year" value="${title_exact_match[0][2]:htmldecode}" />
		<set name="movie[0].popularity" value="100" />
		<set name="offset" value="1" />
	</if>

	<!-- process the rest of the search page and obtain all the popular results -->
	<loop name="search_results_verified" on="search_results_block">
		<parse name="movie_details" input="${search_results_verified}" regex="${rx_search_results}" />
		<loop name='curr_details' on='movie_details'>
			<add name="counter" value1="${count}" value2="${offset}" />
			<set name="movie[${counter}].imdb_id" value="${curr_details[0]:htmldecode}" />
			<set name="movie[${counter}].site_id" value="${curr_details[0]:htmldecode}" />
			<set name="movie[${counter}].title" value="${curr_details[1]:htmldecode}" />

			<parse name="akas" input="${curr_details[3]:htmldecode}" regex="${rx_aka}" />
			<set name="movie[${counter}].alternate_titles" value="" />
			<loop name="currAka" on="akas" limit="50">
				<set name="movie[${counter}].alternate_titles" value="${movie[${counter}].alternate_titles}|${currAka[0]}" />
			</loop>

			<set name="movie[${counter}].year" value="${curr_details[2]:htmldecode}" />
			<subtract name="movie[${counter}].popularity" value1="100" value2="${counter}" />
		</loop>
	</loop>
    </if>
  </action>


  <action name="get_details">

    <!-- if the site id is not set try to set it using the imdb id -->
    <if test="${movie.site_id}=">
      <if test="${movie.imdb_id}!=">
        <set name="movie.site_id" value="${movie.imdb_id}" />
      </if>
    </if>

    <!-- if we have a site id (imdb id) we can continue -->
    <if test="${movie.site_id}!=">

      <set name="site" value="http://www.imdb.com/title/${movie.site_id}" />

      <retrieve name="details_page" url="${site}" />

      <set name="rx_title_year">
	<![CDATA[
	<title>(?<movieTitle>[^\(]+?)\s?\((?<movieYear>\d{4})[\/IVX]*\).*?</title>
	]]>
      </set>

      <set name="rx_directors_block">
	<![CDATA[
	<h5>Director[s]?:</h5>(?<directors_block>.*?)</div>|Director[s]?:\s*</h4>[^<]*(?<directors_block>.*?)</div>
	]]>
      </set>

      <set name="rx_directors">
	<![CDATA[
	<a\s+href="/name/nm\d{7}/"[^>]*>(?<movieDirectors>[^<]+)</a>
	]]>
      </set>

      <set name="rx_writers_block">
	<![CDATA[
	<h5>Writer[s]?.*?<div class="info-content">.*?(?<writers_block>.*?)</div>|Writer[s]?:\s*</h4>[^<]*(?<writers_block>.*?)</div>
	]]>
      </set>

      <set name="rx_writers">
	<![CDATA[
	<a\s+href="/name/nm\d{7}/"[^>]*>(?<movieWriters>[^<]+)</a>
	]]>
      </set>

      <set name="rx_actors_block">
	<![CDATA[
	<div class="headerinline"><h3>Cast</h3>(?<cast_block>.+?)<h3>Additional Details</h3>|<h2>Cast</h2>(?<cast_block>.+?)<h2>Details</h2>
	]]>
      </set>

      <set name="rx_actors">
	<![CDATA[
	<td[^<]*<a\s+href="/name/nm\d{7}/"[^>]*>(?<movieActors>[^<]*)</a>
	]]>
      </set>

      <set name="rx_genres_block">
	<![CDATA[
	<h5>Genre:</h5>(?<genres_block>.+?)</div>|Genre[s]?:[^<]*</h4>(?<genres_block>.+?)</div>
	]]>
      </set>

      <set name="rx_genres">
	<![CDATA[
	<a href="(?:/Sections)?/[Gg]enre[s]?/[^/]+/?">(?<movieGenres>.+?)</a>
	]]>
      </set>

      <set name="rx_certification">
	<![CDATA[
	(?:>\s*UK:|Rated\s*?)(?<movieCertification>(?:U)|(?:PG)|(?:12)|(?:12A)|(?:15)|(?:18)|(?:R18))(?:</a>|\s*?for)
	]]>
      </set>

      <set name="rx_runtime">
	<![CDATA[
	<h5>Runtime:</h5>.*?(?<movieRuntime>\d+) min\s+|(?<movieRuntime>\d+) min&nbsp;&nbsp;-&nbsp;&nbsp;
	]]>
      </set>

      <set name="rx_tagline">
	<![CDATA[
	<h5>Tagline.</h5>[^>]+>[^\r]?(?<movieTagline>[^<]+)|Tagline[s]?:</h4>[\s\f\r]*(?<movieTagline>[^<]+)
	]]>
      </set>

      <set name="rx_score">
	<![CDATA[
	<b>(?<movieScore>\d+.\d+)/10</b>|<span[^<]*>(?<movieScore>\d+.\d+)<span>/10</span>
	]]>
      </set>

      <set name="rx_popularity">
	<![CDATA[
	"ratings"[^>]+>(?<moviePopularity>[^\s]+) votes</a>
	]]>
      </set>

      <set name="rx_language">
	<![CDATA[
	<a\shref=./Sections/Languages/[^/]+/?">\s*(?<movieLanguage>[^<]+)</a>
	]]>
      </set>

      <set name="rx_plot">
	<![CDATA[
	<p class="plotpar">\s*(?<moviePlotLong>.*?)<i>.*?</i>.*?</p>|<h5>Plot Synopsis:</h5>[^>]+>\s*(?<moviePlotLong>.*?)\s*<a
	]]>
      </set>

      <set name="rx_plot2">
	<![CDATA[
	<h5>Plot:</h5>[^>]+>\s+?(?<moviePlotShort>.*?)(?: \|)?\s+?<a|Users:.*?<p>(?<moviePlotShort>.*?)</p>
	]]>
      </set>

      <set name="rx_aka">
	<![CDATA[
	<tr>\s+?<td>(.+?)</td>\s+?<td>(.+?)</td>.*?</tr>
	]]>
      </set>

      <set name="movie.imdb_id" value="${movie.site_id}" />

      <!-- Title and Year -->
      <parse name="title_year" input="${details_page}" regex="${rx_title_year}" />
      <set name="movie.title" value="${title_year[0][0]:htmldecode}" />
      <set name="movie.year" value="${title_year[0][1]:htmldecode}" />

      <!-- Directors -->
      <parse name="directors_block" input="${details_page}" regex="${rx_directors_block}" />
      <parse name="directors" input="${directors_block}" regex="${rx_directors}" />
      <set name='movie.directors' value='' />
      <loop name='currDirector' on='directors'>
        <set name="movie.directors" value="${movie.directors}|${currDirector[0]:htmldecode}" />
      </loop>

      <!-- Writers -->
      <parse name="writers_block" input="${details_page}" regex="${rx_writers_block}" />
      <parse name='writers' input="${writers_block}" regex="${rx_writers}" />
      <set name='movie.writers' value='' />
      <loop name='currWriter' on='writers'>
        <set name='movie.writers' value='${movie.writers}|${currWriter[0]:htmldecode}' />
      </loop>

      <!-- Actors -->
      <parse name="actors_block" input="${details_page}" regex="${rx_actors_block}" />
      <parse name='actors' input='${actors_block}' regex="${rx_actors}" />
      <set name='movie.actors' value='' />
      <loop name='currActor' on='actors'>
        <set name='movie.actors' value='${movie.actors}|${currActor[0]:htmldecode}' />
      </loop>

      <!-- Genres -->
      <parse name="genres_block" input="${details_page}" regex="${rx_genres_block}" />
      <parse name="genres" input="${genres_block}" regex="${rx_genres}" />
      <set name='movie.genres' value='' />
      <loop name='currGenre' on='genres'>
        <set name='movie.genres' value='${movie.genres}|${currGenre[0]:htmldecode}' />
      </loop>

      <!-- Certification -->
      <parse name="certification" input="${details_page}" regex="${rx_certification}" />
      <set name='movie.certification' value='${certification[0][0]:htmldecode}' />
      <if test="${movie.certification}=">
        <retrieve name='parentalguide_page' url='${site}/parentalguide' />
        <parse name="certification" input="${parentalguide_page}" regex="${rx_certification}" />
        <set name='movie.certification' value='${certification[0][0]:htmldecode}' />
      </if>

      <!-- Runtime -->
      <parse name="runtime" input="${details_page}" regex="${rx_runtime}" />
      <set name='movie.runtime' value='${runtime[0][0]:htmldecode}' />

      <!-- Tagline -->
      <parse name="tagline" input="${details_page}" regex="${rx_tagline}" />
      <set name='movie.tagline' value='${tagline[0][0]:htmldecode}' />

      <!-- Score -->
      <parse name="score" input="${details_page}" regex="${rx_score}" />
      <set name='movie.score' value='${score[0][0]:htmldecode}' />

      <!-- Popularity -->
      <parse name="votes" input="${details_page}" regex="${rx_popularity}" />
      <replace name='votes_clean1' input='${votes[0][0]:htmldecode}' pattern='\,' with='' />
      <replace name='votes_clean2' input='${votes_clean1}' pattern='\.' with='' />
      <set name='movie.popularity' value='${votes_clean2}' />

      <!-- Language -->
      <parse name="language" input="${details_page}" regex='${rx_language}' />
      <set name='movie.language' value='${language[0][0]:htmldecode}' />

      <!-- Plot Summary -->
      <retrieve name='summary_page' url='${site}/plotsummary' />
      <parse name="summary" input="${summary_page}" regex="${rx_plot}" />
      <set name="summary_clean" value="${summary[0][0]:striptags}" />
      <set name="movie.summary" value="${summary_clean:htmldecode}" />

      <!-- Plot Summary (if first method fails) -->
      <if test="${movie.summary}=">
        <parse name="summary2" input="${details_page}" regex="${rx_plot2}" />
        <set name="summary_clean" value="${summary2[0][0]:striptags}" />
        <set name="movie.summary" value="${summary_clean:htmldecode}" />
      </if>

      <!-- AKA page -->
      <retrieve name="aka_page" url='${site}/releaseinfo' />
      <parse name="akas" input="${aka_page}" regex="${rx_aka}" />
      <set name="movie.alternate_titles" value="" />
      <loop name="currAka" on="akas" limit="50">
        <set name="movie.alternate_titles" value="${movie.alternate_titles}|${currAka[0]:htmldecode}" />
      </loop>
      
      <!-- Details URL -->
      <set name="movie.details_url" value="${site}" />

    </if>
    
  </action>

  <!-- Covers -->
  
  <action name="get_cover_art">
    
    <!-- if the site id is not set try to set it using the imdb id -->
    <if test="${movie.site_id}=">
      <if test="${movie.imdb_id}!=">
        <set name="movie.site_id" value="${movie.imdb_id}" />
      </if>
    </if>
    
    <!-- if we have a site id (imdb id) we can continue -->
    <if test="${movie.site_id}!=">

      <!-- Retrieve details -->
      <retrieve name="details_page" url="http://www.imdb.com/title/${movie.site_id}" />

      <!-- Regular Expressions-->
      <set name="rx_cover_url">
	<![CDATA[
	<a name="poster" href="([^"]+)"
	]]>
      </set>

      <set name="rx_cover">
	<![CDATA[
	primary-img.+?src="([^"]+)"
	]]>
      </set>

      <!-- Get cover url from details page -->
      <parse name="cover_url" input="${details_page}" regex="${rx_cover_url}" />

      <if test="${cover_url}!=">

        <!-- Retrieve the cover page  -->
        <retrieve name="cover_page" url="http://www.imdb.com${cover_url[0][0]}" />

        <!-- Get cover source from the cover page -->
        <parse name="cover_src" input="${cover_page}" regex="${rx_cover}" />

        <!-- set cover -->
        <set name="cover_art[0].url" value="${cover_src[0][0]}" />
        
      </if>
      
    </if>
   
  </action>
  
</ScriptableScraper>