Normal
I prototyped the following C# code in IMDB.cs to support "aliases". Should I change the script instead?[CODE] private void FindIMDB(string strURL, int iLimit) { string absoluteUri; string httpPage = GetPage(strURL, "utf-8", out absoluteUri); string httpString = System.Web.HttpUtility.HtmlDecode(httpPage); Match imdbEntry = Regex.Match( httpString, @"<tr>[\s]* <td[^>]*>.*?</td>[\s]* <td[^>]*>.*?</td>[\s]* <td[^>]*>.*? <a\shref=""(?<tt>/title/tt[0-9]*/)[^>]*>(?<title>.*?)</a> (?<options>.*?)[\s]* (?(?=<br>)(?<aka><br>.*?)) </td>[\s]* </tr>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); while (imdbEntry.Success) { string imdbTitle = imdbEntry.Groups["title"].Value.Replace("\"", ""); string imdbURL = imdbEntry.Groups["tt"].Value; string imdbOptions = Regex.Replace(imdbEntry.Groups["options"].Value, @"<(.|\n)*?>", String.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); IMDBUrl url = new IMDBUrl(@"http://us.imdb.com" + imdbURL, imdbTitle + @" " + imdbOptions + @" (imdb)", @"IMDB"); elements.Add(url); imdbEntry = imdbEntry.NextMatch(); string imdbAka = imdbEntry.Groups["aka"].Value; Match imdbAlias = Regex.Match(imdbAka, @"<br>\saka\s<em>""(?<alias>.*?)""</em>", RegexOptions.IgnoreCase | RegexOptions.Compiled); while (imdbAlias.Success) { imdbTitle = imdbAlias.Groups["alias"].Value; url = new IMDBUrl(@"http://us.imdb.com" + imdbURL, imdbTitle + @" " + imdbOptions + @" (imdb)", @"IMDB"); elements.Add(url); imdbAlias = imdbAlias.NextMatch(); } } }[/CODE]My favorite western is "The Good, The Bad, and the Ugly". Without this change I have to re-name the file to "Buono, il brutto, il cattivo., Il" to get an IMDB match. The above code adds aliases to the list of results so I can use the US english title instead of the published Italian name.Rick
I prototyped the following C# code in IMDB.cs to support "aliases". Should I change the script instead?
[CODE] private void FindIMDB(string strURL, int iLimit)
{
string absoluteUri;
string httpPage = GetPage(strURL, "utf-8", out absoluteUri);
string httpString = System.Web.HttpUtility.HtmlDecode(httpPage);
Match imdbEntry = Regex.Match(
httpString,
@"<tr>[\s]*
<td[^>]*>.*?</td>[\s]*
<td[^>]*>.*?
<a\shref=""(?<tt>/title/tt[0-9]*/)[^>]*>(?<title>.*?)</a>
(?<options>.*?)[\s]*
(?(?=<br>)(?<aka><br>.*?))
</td>[\s]*
</tr>",
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
while (imdbEntry.Success)
string imdbTitle = imdbEntry.Groups["title"].Value.Replace("\"", "");
string imdbURL = imdbEntry.Groups["tt"].Value;
string imdbOptions = Regex.Replace(imdbEntry.Groups["options"].Value, @"<(.|\n)*?>", String.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled);
IMDBUrl url = new IMDBUrl(@"http://us.imdb.com" + imdbURL, imdbTitle + @" " + imdbOptions + @" (imdb)", @"IMDB");
elements.Add(url);
imdbEntry = imdbEntry.NextMatch();
string imdbAka = imdbEntry.Groups["aka"].Value;
Match imdbAlias = Regex.Match(imdbAka, @"<br>\saka\s<em>""(?<alias>.*?)""</em>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
while (imdbAlias.Success)
imdbTitle = imdbAlias.Groups["alias"].Value;
url = new IMDBUrl(@"http://us.imdb.com" + imdbURL, imdbTitle + @" " + imdbOptions + @" (imdb)", @"IMDB");
imdbAlias = imdbAlias.NextMatch();
}
}[/CODE]
My favorite western is "The Good, The Bad, and the Ugly". Without this change I have to re-name the file to "Buono, il brutto, il cattivo., Il" to get an IMDB match. The above code adds aliases to the list of results so I can use the US english title instead of the published Italian name.
Rick