**------------------------------------------------------------------------------------------------ * @header_start * WebGrab+Plus ini for grabbing EPG data from TvGuide websites * @Site: tvguide.com * @MinSWversion: V1.53.15/0.16 * none * @Revision 8 - [02/05/2014] Francis De Paemeleere * - adjust to the new timezone implementation of WG++ * @Revision 7 - [05/03/2014] Francis De Paemeleere * - totaly rewrite of the code * @Remarks: * this is the base version * timezone is set to +00:00, because the site uses UTC timings * @header_end **------------------------------------------------------------------------------------------------ site {url=tvguide.com|timezone=UTC|maxdays=16.1|cultureinfo=en-US|charset=UTF-8|titlematchfactor=90|firstshow=2|keeptabs} site {ratingsystem=MPAA|episodesystem=xmltv_ns} urldate.format{datenumber|UNIX|0} scope.range {(urlindex)|end} index_temp_1.modify {substring(type=regex)|'config_site_id' "\(srvID([^)]*)\)"} url_index.modify {replace|##svrID##|'index_temp_1'} index_temp_1.modify {substring(type=regex)|'config_site_id' "SourceId:([^,]*)"} url_index.modify {replace|##SourceId##|'index_temp_1'} index_temp_1.modify {substring(type=regex)|'config_site_id' "Number:([^,]*)"} url_index.modify {replace|##Number##|'index_temp_1'} index_temp_1.modify {calculate(format=D0)|'config_timespan_days' 1 + 1440 *} * calculate how may minutes we want to get (so we only download the days, we want to see) url_index.modify {replace|##Minutes##|'index_temp_1'} end_scope url_index{url|http://mobilelistings.tvguide.com/Listingsweb/ws/rest/airings/##svrID##/start/|urldate|/duration/##Minutes##?channelsourceids=##SourceId##%7C##Number##&formattype=json} url_index.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the index pages index_showsplit.scrub {regex||"ProgramSchedule":.*?}}||} index_start.scrub {regex||"StartTime"\s*:\s*((?:[^\s,])*)\s*,||} index_stop.scrub {regex||"EndTime"\s*:\s*((?:[^\s,])*)\s*,||} index_title.scrub {regex||"Title"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||} index_subtitle.scrub {regex()||"EpisodeTitle"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||} index_temp_3.scrub {regex||"ProgramId"\s*:\s*((?:[^\s,])*)\s*,||} index_temp_4.scrub {regex||"AiringAttrib"\s*:\s*(\d*)||} * binairy: 1= Live, 2=??,4=New, 8=?? * grab season and episode number index_temp_5.scrub {regex||"SeasonNumber":(\d+)||} index_temp_6.scrub {regex||"EpisodeNumber":"(\d+)||} scope.range {(indexshowdetails)|end} index_title.modify {cleanup(style=jsondecode)} index_subtitle.modify {cleanup(style=jsondecode)} * remove subtitle from title index_title.modify {remove(notnull type=string)|:'index_subtitle'} index_start.modify {calculate(format=utctime,HH:mm)} index_stop.modify {calculate(format=utctime,HH:mm)} * generate season.episode index_episode.modify {clear} index_temp_5.modify {calculate(not="" format=F0)|1 -} index_episode.modify {addend('index_temp_5' not="")|'index_temp_5'} index_episode.modify {addend|.} index_temp_6.modify {calculate(not="" format=F0)|1 -} index_episode.modify {addend('index_temp_6' not="")|'index_temp_6'} index_episode.modify {addend|.} index_episode.modify {clear(="..")} index_episode.modify {clear(~"-1")} index_temp_6.modify {clear} * add flags accoridng to airing attributes index_temp_5.modify {calculate(format=D0)|'index_temp_4' 1 and} index_temp_6.modify {addend('index_temp_5'=="1")| (live)} index_temp_5.modify {calculate(format=D0)|'index_temp_4' 2 and} index_temp_6.modify {addend('index_temp_5'=="2")| (repeat)} index_temp_5.modify {calculate(format=D0)|'index_temp_4' 4 and} index_temp_6.modify {addend('index_temp_5'=="4")| (new)} * next line adds * to new shows, Title * = new shows, Title = "not new", 2 places index_title.modify {addend('index_temp_5'=="4")| *} index_temp_5.modify {calculate(format=D0)|'index_temp_4' 8 and} index_temp_6.modify {addend('index_temp_5'=="8")| (cc)} end_scope index_urlshow.modify {addstart('index_temp_3' not "")|http://www.tvguide.com/listings/data/detailcache.aspx?Qr='index_temp_3'} index_urlshow.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the detail pages temp_1.scrub {regex||^(?:[^\t]*\t){0}([^\t]*)||} title.scrub {regex||^(?:[^\t]*\t){1}([^\t]*)||} *subtitle.scrub {regex||^(?:[^\t]*\t){2}([^\t]*)||} description.scrub {regex||^(?:[^\t]*\t){3}([^\t]*)||} rating.scrub {regex||^(?:[^\t]*\t){7}([^\t]*)||} productiondate.scrub {regex||^(?:[^\t]*\t){9}([^\t]*)||} temp_2.scrub {regex||^(?:[^\t]*\t){10}([^\t]*)||} * genre temp_3.scrub {regex||^(?:[^\t]*\t){11}([^\t]*)||} * subgenre director.scrub {regex||^(?:[^\t]*\t){13}([^\t]*)||} actor.scrub {regex||^(?:[^\t]*\t){14}([^\t]*)||} temp_4.scrub {regex||^(?:[^\t]*\t){19}([^\t]*)||} * genre ID scope.range {(showdetails)|end} * remove subtitle from title title.modify {remove(notnull type=string)|:'index_subtitle'} temp_5.modify {calculate(format=D0)|'index_temp_4' 4 and} * next line adds * to new shows, Title * = new shows, Title = "not new", 2 places title.modify {addend('temp_5'=="4")| *} actor.modify {replace|,|\|} * make actor multi a multi element rating.modify {clear("None")} description.modify {addend|'index_temp_6'} category.modify {addend('temp_4' == "1")|##_##movie} category.modify {addend('temp_4' == "2")|##_##sports} category.modify {addend('temp_4' == "3")|##_##family} category.modify {addend('temp_4' == "4")|##_##news} category.modify {addend|##_##'temp_2'} category.modify {addend|##_##'temp_3'} category.modify {replace|##_##|\|} category.modify {remove|other} end_scope urlsubdetail.modify {clear} urlsubdetail.modify {addstart('temp_1' not="")|http://mobileapi.tvguide.com/listings/details?ChannelFields=Name%2CFullName%2CNumber%2CSourceId&ScheduleFields=ProgramId%2CEndTime%2CStartTime%2CTitle%2CAiringAttrib%2CCatId&program='temp_1'} urlsubdetail.modify {clear('temp_1' =="0")} urlsubdetail.headers {customheader=Accept-Encoding=gzip,deflate} * to speedup the downloading of the detail pages *subdetail_title.scrub {regex||"Title"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||} subdetail_showicon.scrub {regex||"photos":\[[^]]*?\]||} subdetail_showicon.modify {substring(type=regex)|"url":"([^"]*)"} * get all the photos for this show subdetail_showicon.modify {substring(type=element)|0 1} * only get the first image (multi images are not yet supported by WG++) subdetail_showicon.modify {cleanup(style=jsondecode)} subdetail_temp_1.scrub {regex||"season"\s*:\s*(\d*)||} subdetail_temp_2.scrub {regex||"episode"\s*:\s*(\d*)||} subdetail_temp_6.scrub {regex||"seo_url"\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"||} * subdetail_temp_1 = season * subdetail_temp_2 = episode *subdetail_temp_1.modify {calculate(not="" format=F0)|1 -} *subdetail_temp_2.modify {calculate(not="" format=F0)|1 -} *subdetail_episode.modify {clear} *subdetail_episode.modify {addend('subdetail_temp_1' not="")|'subdetail_temp_1'} *subdetail_episode.modify {addend|.} *subdetail_episode.modify {addend('subdetail_temp_2' not="")|'subdetail_temp_2'} *subdetail_episode.modify {addend|.} *subdetail_episode.modify {clear(="..")} *** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ** ##### TIMEZONE FILE CREATION (only to create the xxx-channel.xml file) ** this will create a .channels.xml file (channels by timezone) that will contain all the channels for all the supported timezones ** this can be used to generate a new tvguide.com_channels_byTimezone.xml ** @auto_xml_timezone_start *url_index{url|http://mobilelistings.tvguide.com/Listingsweb/ws/rest/schedules/|subpage|/start/|urldate|/duration/1?ChannelFields=Name%2CFullName%2CNumber%2CSourceId&ScheduleFields=&formattype=json&disableChannels=} *subpage.format {list|80001|80002|80003|80004|80005|80006} *index_site_id.scrub {single||||} *scope.range {(channellist)|end} ** ! remark: we loop from the back to the front! *index_temp_4.modify {clear} *index_temp_4.modify {addstart|80006} * *index_site_id.modify {replace|[\{"Channel":|\n(srvIDNNNN)} * "split" the full lising into timezone separate parts * *index_site_channel.modify {substring(type=regex)|'index_site_id' "(srvIDNNNN)\|"FullName":"([^"]*)"[^}]*"SourceId":\d*"} *index_site_id.modify {substring(type=regex)|'index_site_id' "(srvIDNNNN)\|"FullName":"[^"]*"[^}]*"SourceId":(\d*)"} * *index_temp_1.modify {calculate(type=element format=F0)|'index_site_channel' #} * how many elements? Used as loop counter. *index_temp_6.modify {addstart|'index_site_channel'} * copy original elemets in temp *index_temp_5.modify {addstart|'index_site_id'} * copy original elemets in temp *index_site_channel.modify {clear} * to be refilled later *index_site_id.modify {clear} * to be refilled later *loop {('index_temp_1' > "0" max=5000)|end} *index_temp_1.modify {calculate(format=F0)|1 -} * decrease the loop counter *index_temp_2.modify {substring(type=element)|'index_temp_6' 'index_temp_1' 1} * element to work on *index_temp_3.modify {substring(type=element)|'index_temp_5' 'index_temp_1' 1} * element to work on * *index_site_channel.modify {addstart('index_temp_2' not== "srvIDNNNN")|##_####TIMEZONE##'index_temp_2'} * fill elements again , add a placeholder ##_## as separator *index_site_id.modify {addstart('index_temp_2' not== "srvIDNNNN")|##_####SERVERID##Number:*,SourceId:'index_temp_3'} * fill elements again , add a placeholder ##_## as separator * *index_site_id.modify {replace('index_temp_2' == "srvIDNNNN")|##SERVERID##|(srvID'index_temp_4')} * after all channels are added, change the server id to the correct value *index_site_channel.modify {replace('index_temp_2' == "srvIDNNNN")|##TIMEZONE##|##TIMEZONE'index_temp_4'##} * after all channels are added, change the server id to the correct value *index_temp_4.modify {calculate('index_temp_2' == "srvIDNNNN" format=F0)|'index_temp_4' 1 -} * decrease the server id for the next time (80006, 80005, 80004, ...) * *end_loop *index_site_channel.modify {replace|##TIMEZONE80001##|(Eastern)} *index_site_channel.modify {replace|##TIMEZONE80002##|(Central)} *index_site_channel.modify {replace|##TIMEZONE80003##|(Mountain)} *index_site_channel.modify {replace|##TIMEZONE80004##|(Pacific)} *index_site_channel.modify {replace|##TIMEZONE80005##|(Alaskan)} *index_site_channel.modify {replace|##TIMEZONE80006##|(Hawaiian)} *index_site_channel.modify {replace|##_##|\|} *index_site_id.modify {replace|##_##|\|} *index_site_id.modify {cleanup(removeduplicates=equal,100 link="index_site_channel")} ** replace ( ) in channels with [ ] *index_site_channel.modify {replace|)|]} *index_site_channel.modify {replace|(|[} end_scope ** @auto_xml_timezone_end ** _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ** ##### COUNTRY INTERNATIONAL FILE CREATION (only to create the xxx-channel.xml file) ** this will create a .channels.xml file (providers file, NO CHANNELS) that will contain all the providers for all the supported countries ** this can be used to generate a new tvguide.com_providers_byCountry.xml ** @auto_xml_country_start *url_index{url|http://www.tvguide.com/listings/setup/localizeInt.aspx?RedirectUrl=%2flistings} *subpage.format {list|Anguilla|Antigua+%26+Barbuda|Argentina|Aruba|Bahamas|Barbados|Bolivia|BR+Virgin+Islands|Brazil|Canada|Chile|Colombia|Costa+Rica|Cuba|Curaçao|Dominican+Republic|Ecuador|El+Salvador|Grenada|Guatemala|Guyana|Honduras|Jamaica|Mexico|Montserrat|Netherlands|Nicaragua|Panama|Peru|Saint-Martin|St.+Kitts+%26+Nevis|St.+Lucia|St.+Vincent|Trinidad+%26+Tobago|Turks+%26+Caicos|USA|Venezuela} **subpage.format {list|Anguilla|Antigua+%26+Barbuda|Argentina|Aruba} *url_index.headers {method=POST|contenttype=application/x-www-form-urlencoded} *url_index.headers {postdata=__EVENTTARGET=ctl00%24ArticleContent1%24ImgBtnGo&__EVENTARGUMENT=&ctl00%24ArticleContent1%24drpCountry='subpage'} * *index_site_channel.scrub {regex||