After downloading it import into a table dim_int_mktg_tld which you can join with the staging data.
CREATE TABLE `dim_int_mktg_tld` (
`tld` varchar(16) NOT NULL,
`type` varchar(32) default 'country-code', #-- Can be country-code or generic
`info` varchar(500) default NULL,
PRIMARY KEY (`tld`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
#-- Join with yesterdays staging data and get TLD
SELECT WL.url, TLD.tld
FROM stg_weblog_date WL
LEFT OUTER JOIN dim_int_mktg_tld TLD
ON substring_index(
substring_index(
substring_index(url, '://', -1),
'/',1),
'.',-1) = TLD.tld
WHERE WL.date = DATE_ADD(CURRENT_DATE, interval -1 day)
#-- Example: http://www.abc.co.au/in/test
#-- Innermost substring_index returns everything to the right of '://'
#-- i.e, www.abc.co.au/in/test,
#-- then the middle one gets everything to the left of first '/'
#-- i.e, www.abc.co.au
#-- then the outer most substring_index returns everything to the right of last '.'
#-- i.e, au
#-- which is then joined with TLD table
HTH,
Shiva
No comments:
Post a Comment