**------------------------------------------------------------------------------------------------
* @header_start
* WebGrab+Plus ini for grabbing EPG data from TvGuide websites
* @Site: tvguide.com
* @MinSWversion: V1.53.15/0.16
*   none
* @Revision 8 - [02/05/2014] Francis De Paemeleere
*  - adjust to the new timezone implementation of WG++
* @Revision 7 - [05/03/2014] Francis De Paemeleere
*  - totaly rewrite of the code
* @Remarks:
* this is the base version
* timezone is set to +00:00, because the site uses UTC timings
* @header_end
**------------------------------------------------------------------------------------------------

site {url=tvguide.com|timezone=UTC|maxdays=16.1|cultureinfo=en-US|charset=UTF-8|titlematchfactor=90|firstshow=2|keeptabs}
site {ratingsystem=MPAA|episodesystem=xmltv_ns}

urldate.format{datenumber|UNIX|0}

scope.range {(urlindex)|end}
index_temp_1.modify {substring(type=regex)|'config_site_id' "\(srvID([^)]*)\)"}
url_index.modify {replace|##svrID##|'index_temp_1'}
index_temp_1.modify {substring(type=regex)|'config_site_id' "SourceId:([^,]*)"}
url_index.modify {replace|##SourceId##|'index_temp_1'}
index_temp_1.modify {substring(type=regex)|'config_site_id' "Number:([^,]*)"}
url_index.modify {replace|##Number##|'index_temp_1'}
index_temp_1.modify {calculate(format=D0)|'config_timespan_days' 1 + 1440 *}  * calculate how may minutes we want to get (so we only download the days, we want to see)
url_index.modify {replace|##Minutes##|'index_temp_1'}
end_scope

url_index{url|http://mobilelistings.tvguide.com/Listingsweb/ws/rest/airings/##svrID##/start/|urldate|/duration/##Minutes##?channelsourceids=##SourceId##%7C##Number##&formattype=json}
url_index.headers 	  {customheader=Accept-Encoding=gzip,deflate}     * to speedup the downloading of the index pages

index_showsplit.scrub {regex||"ProgramSchedule":.*?}}||}

index_start.scrub {regex||"StartTime"\s*:\s*((?:[^\s,])*)\s*,||}
index_stop.scrub  {regex||"EndTime"\s*:\s*((?:[^\s,])*)\s*,||}
index_title.scrub {regex||"Title"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||}
index_subtitle.scrub {regex()||"EpisodeTitle"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||}
index_temp_3.scrub {regex||"ProgramId"\s*:\s*((?:[^\s,])*)\s*,||}
index_temp_4.scrub {regex||"AiringAttrib"\s*:\s*(\d*)||}	* binairy: 1= Live, 2=??,4=New, 8=??
* grab season and episode number
index_temp_5.scrub {regex||"SeasonNumber":(\d+)||}
index_temp_6.scrub {regex||"EpisodeNumber":"(\d+)||}

scope.range {(indexshowdetails)|end}
index_title.modify {cleanup(style=jsondecode)}
index_subtitle.modify {cleanup(style=jsondecode)}
* remove subtitle  from title
index_title.modify {remove(notnull type=string)|:'index_subtitle'}
index_start.modify {calculate(format=utctime,HH:mm)}
index_stop.modify  {calculate(format=utctime,HH:mm)}

* generate season.episode
index_episode.modify {clear}
index_temp_5.modify  {calculate(not="" format=F0)|1 -}
index_episode.modify {addend('index_temp_5' not="")|'index_temp_5'}
index_episode.modify {addend|.}
index_temp_6.modify  {calculate(not="" format=F0)|1 -}
index_episode.modify {addend('index_temp_6' not="")|'index_temp_6'}
index_episode.modify {addend|.}
index_episode.modify {clear(="..")}
index_episode.modify {clear(~"-1")}
index_temp_6.modify  {clear}

* add flags accoridng to airing attributes
index_temp_5.modify {calculate(format=D0)|'index_temp_4' 1 and}
index_temp_6.modify {addend('index_temp_5'=="1")| (live)}
index_temp_5.modify {calculate(format=D0)|'index_temp_4' 2 and}
index_temp_6.modify {addend('index_temp_5'=="2")| (repeat)}
index_temp_5.modify {calculate(format=D0)|'index_temp_4' 4 and}
index_temp_6.modify {addend('index_temp_5'=="4")| (new)}
* next line adds * to new shows, Title * = new shows, Title = "not new", 2 places
index_title.modify {addend('index_temp_5'=="4")| *}
index_temp_5.modify {calculate(format=D0)|'index_temp_4' 8 and}
index_temp_6.modify {addend('index_temp_5'=="8")| (cc)}
end_scope

index_urlshow.modify {addstart('index_temp_3' not "")|http://www.tvguide.com/listings/data/detailcache.aspx?Qr='index_temp_3'}
index_urlshow.headers {customheader=Accept-Encoding=gzip,deflate}     * to speedup the downloading of the detail pages

temp_1.scrub 			{regex||^(?:[^\t]*\t){0}([^\t]*)||}
title.scrub 			{regex||^(?:[^\t]*\t){1}([^\t]*)||}
*subtitle.scrub			{regex||^(?:[^\t]*\t){2}([^\t]*)||}
description.scrub		{regex||^(?:[^\t]*\t){3}([^\t]*)||}
rating.scrub			{regex||^(?:[^\t]*\t){7}([^\t]*)||}
productiondate.scrub	{regex||^(?:[^\t]*\t){9}([^\t]*)||}
temp_2.scrub			{regex||^(?:[^\t]*\t){10}([^\t]*)||}		* genre
temp_3.scrub			{regex||^(?:[^\t]*\t){11}([^\t]*)||}		* subgenre
director.scrub			{regex||^(?:[^\t]*\t){13}([^\t]*)||}
actor.scrub				{regex||^(?:[^\t]*\t){14}([^\t]*)||}
temp_4.scrub			{regex||^(?:[^\t]*\t){19}([^\t]*)||}		* genre ID


scope.range {(showdetails)|end}
* remove subtitle from title
title.modify {remove(notnull type=string)|:'index_subtitle'}

temp_5.modify {calculate(format=D0)|'index_temp_4' 4 and}
* next line adds * to new shows, Title * = new shows, Title = "not new", 2 places
title.modify {addend('temp_5'=="4")| *}

actor.modify 		{replace|,|\|} * make actor multi a multi element
rating.modify   	{clear("None")}
description.modify  {addend|'index_temp_6'}

category.modify {addend('temp_4' == "1")|##_##movie}
category.modify {addend('temp_4' == "2")|##_##sports}
category.modify {addend('temp_4' == "3")|##_##family}
category.modify {addend('temp_4' == "4")|##_##news}
category.modify {addend|##_##'temp_2'}
category.modify {addend|##_##'temp_3'}
category.modify {replace|##_##|\|}
category.modify {remove|other}
end_scope

urlsubdetail.modify {clear}
urlsubdetail.modify {addstart('temp_1' not="")|http://mobileapi.tvguide.com/listings/details?ChannelFields=Name%2CFullName%2CNumber%2CSourceId&ScheduleFields=ProgramId%2CEndTime%2CStartTime%2CTitle%2CAiringAttrib%2CCatId&program='temp_1'}
urlsubdetail.modify {clear('temp_1' =="0")}
urlsubdetail.headers {customheader=Accept-Encoding=gzip,deflate}     * to speedup the downloading of the detail pages

*subdetail_title.scrub     {regex||"Title"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||}
subdetail_showicon.scrub  {regex||"photos":\[[^]]*?\]||}
subdetail_showicon.modify {substring(type=regex)|"url":"([^"]*)"} * get all the photos for this show
subdetail_showicon.modify {substring(type=element)|0 1}  * only get the first image (multi images are not yet supported by WG++)
subdetail_showicon.modify {cleanup(style=jsondecode)}

subdetail_temp_1.scrub {regex||"season"\s*:\s*(\d*)||}
subdetail_temp_2.scrub {regex||"episode"\s*:\s*(\d*)||}
subdetail_temp_6.scrub {regex||"seo_url"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||}

* subdetail_temp_1 = season
* subdetail_temp_2 = episode
*subdetail_temp_1.modify  {calculate(not="" format=F0)|1 -}
*subdetail_temp_2.modify  {calculate(not="" format=F0)|1 -}
*subdetail_episode.modify {clear}
*subdetail_episode.modify {addend('subdetail_temp_1' not="")|'subdetail_temp_1'}
*subdetail_episode.modify {addend|.}
*subdetail_episode.modify {addend('subdetail_temp_2' not="")|'subdetail_temp_2'}
*subdetail_episode.modify {addend|.}
*subdetail_episode.modify {clear(="..")}


***  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  TIMEZONE FILE CREATION (only to create the xxx-channel.xml file)
**  this will create a .channels.xml file (channels by timezone) that will contain all the channels for all the supported timezones
**  this can be used to generate a new tvguide.com_channels_byTimezone.xml
** @auto_xml_timezone_start
*url_index{url|http://mobilelistings.tvguide.com/Listingsweb/ws/rest/schedules/|subpage|/start/|urldate|/duration/1?ChannelFields=Name%2CFullName%2CNumber%2CSourceId&ScheduleFields=&formattype=json&disableChannels=}
*subpage.format {list|80001|80002|80003|80004|80005|80006}
*index_site_id.scrub {single||||}
*scope.range {(channellist)|end}
** ! remark: we loop from the back to the front!
*index_temp_4.modify {clear}
*index_temp_4.modify {addstart|80006}
*
*index_site_id.modify {replace|[\{"Channel":|\n(srvIDNNNN)} * "split" the full lising into timezone separate parts
*
*index_site_channel.modify {substring(type=regex)|'index_site_id' "(srvIDNNNN)\|"FullName":"([^"]*)"[^}]*"SourceId":\d*"}
*index_site_id.modify {substring(type=regex)|'index_site_id' "(srvIDNNNN)\|"FullName":"[^"]*"[^}]*"SourceId":(\d*)"}
*
*index_temp_1.modify {calculate(type=element format=F0)|'index_site_channel' #}     * how many elements? Used as loop counter.
*index_temp_6.modify {addstart|'index_site_channel'}                                * copy original elemets in temp
*index_temp_5.modify {addstart|'index_site_id'}                                    	* copy original elemets in temp
*index_site_channel.modify {clear}                                                	* to be refilled later
*index_site_id.modify {clear}                                                		* to be refilled later
*loop {('index_temp_1' > "0" max=5000)|end}
*index_temp_1.modify {calculate(format=F0)|1 -}                            			* decrease the loop counter
*index_temp_2.modify {substring(type=element)|'index_temp_6' 'index_temp_1' 1}      * element to work on
*index_temp_3.modify {substring(type=element)|'index_temp_5' 'index_temp_1' 1}      * element to work on
*
*index_site_channel.modify	{addstart('index_temp_2' not== "srvIDNNNN")|##_####TIMEZONE##'index_temp_2'}                	* fill elements again , add a placeholder ##_## as separator
*index_site_id.modify		{addstart('index_temp_2' not== "srvIDNNNN")|##_####SERVERID##Number:*,SourceId:'index_temp_3'}  * fill elements again , add a placeholder ##_## as separator
*
*index_site_id.modify 		{replace('index_temp_2' == "srvIDNNNN")|##SERVERID##|(srvID'index_temp_4')}				* after all channels are added, change the server id to the correct value
*index_site_channel.modify	{replace('index_temp_2' == "srvIDNNNN")|##TIMEZONE##|##TIMEZONE'index_temp_4'##}		* after all channels are added, change the server id to the correct value
*index_temp_4.modify 	  {calculate('index_temp_2' == "srvIDNNNN" format=F0)|'index_temp_4' 1 -}					* decrease the server id for the next time (80006, 80005, 80004, ...)
*
*end_loop
*index_site_channel.modify	{replace|##TIMEZONE80001##|(Eastern)}
*index_site_channel.modify	{replace|##TIMEZONE80002##|(Central)}
*index_site_channel.modify	{replace|##TIMEZONE80003##|(Mountain)}
*index_site_channel.modify	{replace|##TIMEZONE80004##|(Pacific)}
*index_site_channel.modify	{replace|##TIMEZONE80005##|(Alaskan)}
*index_site_channel.modify	{replace|##TIMEZONE80006##|(Hawaiian)}
*index_site_channel.modify	{replace|##_##|\|}
*index_site_id.modify		{replace|##_##|\|}
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
** replace ( ) in channels with [ ]
*index_site_channel.modify {replace|)|]}
*index_site_channel.modify {replace|(|[}
end_scope
** @auto_xml_timezone_end

**  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  COUNTRY INTERNATIONAL FILE CREATION (only to create the xxx-channel.xml file)
**  this will create a .channels.xml file (providers file, NO CHANNELS) that will contain all the providers for all the supported countries
**  this can be used to generate a new tvguide.com_providers_byCountry.xml
** @auto_xml_country_start
*url_index{url|http://www.tvguide.com/listings/setup/localizeInt.aspx?RedirectUrl=%2flistings}
*subpage.format {list|Anguilla|Antigua+%26+Barbuda|Argentina|Aruba|Bahamas|Barbados|Bolivia|BR+Virgin+Islands|Brazil|Canada|Chile|Colombia|Costa+Rica|Cuba|Curaçao|Dominican+Republic|Ecuador|El+Salvador|Grenada|Guatemala|Guyana|Honduras|Jamaica|Mexico|Montserrat|Netherlands|Nicaragua|Panama|Peru|Saint-Martin|St.+Kitts+%26+Nevis|St.+Lucia|St.+Vincent|Trinidad+%26+Tobago|Turks+%26+Caicos|USA|Venezuela}
**subpage.format {list|Anguilla|Antigua+%26+Barbuda|Argentina|Aruba}
*url_index.headers {method=POST|contenttype=application/x-www-form-urlencoded}
*url_index.headers {postdata=__EVENTTARGET=ctl00%24ArticleContent1%24ImgBtnGo&__EVENTARGUMENT=&ctl00%24ArticleContent1%24drpCountry='subpage'}
*
*index_site_channel.scrub {regex||<h2 class="section-header">(.*?)</h2>\|id="ctl00_ArticleContent1_drpProvider"[^>]*>(?:\s*<option [^>]*value="[^"]*"[^>]*>([^<]*)</option>)*||}
*index_site_id.scrub {regex||<h2 class="section-header">(.*?)</h2>\|id="ctl00_ArticleContent1_drpProvider"[^>]*>(?:\s*<option [^>]*value="([^"]*)"[^>]*>[^<]*</option>)*||}
*scope.range {(channellist)|end}
** ! remark: we loop from the back to the front!
*index_temp_1.modify {calculate(type=element format=F0)|'index_site_channel' #}      * how many elements? Used as loop counter.
*index_temp_6.modify {addstart|'index_site_channel'}                                 * copy original elemets in temp
*index_temp_5.modify {addstart|'index_site_id'}                                    	* copy original elemets in temp
*index_site_channel.modify {clear}                                                	* to be refilled later
*index_site_id.modify {clear}                                                	 	* to be refilled later
*loop {('index_temp_1' > "0" max=5000)|end}
*index_temp_1.modify {calculate(format=F0)|1 -}                            			* decrease the loop counter
*index_temp_2.modify {substring(type=element)|'index_temp_6' 'index_temp_1' 1}       * element to work on
*index_temp_3.modify {substring(type=element)|'index_temp_5' 'index_temp_1' 1}       * element to work on
*
*
*index_site_channel.modify {addstart('index_temp_2' not~ "/span")|##_####COUNTRY##'index_temp_2'}     * fill elements again , add a placeholder ##_## as separator
*index_site_id.modify {addstart('index_temp_2' not~ "/span")|##_##'index_temp_3'}                     * fill elements again , add a placeholder ##_## as separator
*
*index_temp_4.modify {clear('index_temp_2' ~ "/span")}
*index_temp_4.modify {addstart('index_temp_2' ~ "/span")|'index_temp_2'}	* save the country name
*index_temp_4.modify {remove('index_temp_2' ~ "/span")|INTERNATIONAL}	* save the country name
*index_temp_4.modify {cleanup}	* remove remaining HTML tags
*index_site_channel.modify {replace('index_temp_2' ~ "/span")|##COUNTRY##|('index_temp_4') } * replace the ##COUNTRY## with the correct country name
*end_loop
*index_site_channel.modify {replace|##_##|\|}
*index_site_id.modify {replace|##_##|\|}
*
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
*end_scope
** @auto_xml_country_end


***  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  PROVIDER FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_provider_start
*url_index{url|http://www.tvguide.com/listings/data/localizebyzip.ashx?zipcode=|channel}
*url_index.headers 	  {customheader=Accept-Encoding=gzip,deflate}    		* to speedup the downloading of the index pages
*index_site_id.scrub {regex||([^\t]*\t[^\t]*)\t(?:[^\t]*\t){2}[^\n]*\n*||}
*index_site_channel.scrub {regex||(?:[^\t]*\t[^\t]*)\t((?:[^\t]*\t){2}[^\n]*)\n*||}
*scope.range {(channellist)|end}
*index_site_id.modify {replace|\t|.}
*index_site_channel.modify {replace|\t| - }
*end_scope
** @auto_xml_provider_end


***  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _
**      #####  CHANNEL FILE CREATION (only to create the xxx-channel.xml file)
**
** @auto_xml_channel_start
*url_index{url|http://mobilelistings.tvguide.com/Listingsweb/ws/rest/schedules/|channel|/start/|urldate|/duration/1?ChannelFields=Name%2CFullName%2CNumber%2CSourceId&ScheduleFields=&formattype=json&disableChannels=}
*index_site_channel.scrub {regex||"FullName":"([^"]*)"[^}]*"SourceId":\d*||}
*index_site_id.scrub {regex||"FullName":"[^"]*"[^}]*("Number":"[^"]*"[^}]*"SourceId":\d*)||}
*scope.range {(channellist)|end}
*index_site_id.modify {remove(type=regex)|("Sort"\s*:\s*\d*\s*,*)}
*index_site_id.modify {remove|"}
*index_site_id.modify {addstart|(srvID'config_site_id')} *add the config site_id, so, we can keep this siteini generic (no need for the user to edit it)
*index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")}
** replace ( ) in channels with [ ]
*index_site_channel.modify {replace|)|]}
*index_site_channel.modify {replace|(|[}
*end_scope
** @auto_xml_channel_end