Repository URL to install this package:
|
Version:
1.1.1 ▾
|
require "uri"
require 'url_expander'
module JApiAgent::ContentParser
class ParserBase
def initialize(args)
args.each do |k,v|
instance_variable_set("@#{k}", v)
end
end
def self.instagram_image(url)
image = ""
doc = Nokogiri::HTML(open(url)) rescue nil
return image if doc.nil?
image_tags = doc.css("img.photo")
image = image_tags.first['src'] unless image_tags.blank?
image
end
def self.path_image(url)
image = ""
doc = Nokogiri::HTML(open(self.change_url_scheme(url,"https")))
image_tags = doc.css("img.photo-image")
image = image_tags.first['src'] unless image_tags.blank?
image
end
def self.change_url_scheme(url,scheme)
u= URI(url)
u.scheme="https"
u.to_s
end
def self.youtube_id(url)
uri = URI(url)
return self.youtu_be_id(uri) if uri.host == "youtu.be"
return "" unless uri && uri.query
u = uri.query.split("&")
id = u.select{|x| x =~/^v=/}.first if u
id.nil? ? "" : id.delete("v=")
end
def self.youtu_be_id(url)
url.path[1..-1]
end
def self.first_image(url)
self.get_first_image(open(url)) rescue ""
end
def self.get_first_image(content)
image_src = ""
doc = Nokogiri::HTML(content)
return "" if doc.nil?
og_image = doc.xpath("//meta[@property='og:image']")
#image_tags = doc.xpath('//img')
#image_src = image_tags.first['src'] if !image_tags.blank? && image_tags.first['src'] =~ /^http/ && (image_tags.first['src'] =~ /(flickr|yimg|stats.wordpress.com|b.scorecardresearch.com)/) == nil
image_src = ""
image_src = og_image.first['content'] unless og_image.blank?
return "" if (image_src =~ /^http/) == nil || (image_src =~ /(flickr|yimg|stats.wordpress.com|b.scorecardresearch.com|statse.webtrendslive.com)/)
return image_src
end
def self.facebook_image_processing(content)
doc = Nokogiri::HTML(content)
return content unless doc
image_tags = doc.xpath('//img')
image_tags.each {|x| self.facebook_image_tag_processing(x)} if image_tags
doc.css('body').inner_html
end
def self.facebook_image_tag_processing(tag)
url = URI(tag['src'])
if url.to_s =~ /(yimg|flickr)/
tag.remove
return
end
source_url = url.query.split('&').select{|x| x =~ /^url=/ || x =~ /^src=/}.first if url.query
tag.delete("style")
tag.delete("width")
tag.delete("height")
if source_url
tag['src'] = CGI.unescape(source_url[4..-1])
return
else
tag['src'] = self.facebook_image_expander(url.to_s)
end
end
def self.facebook_url_processing(content)
doc = Nokogiri::HTML(content)
return content unless doc
a_tags = doc.xpath('//a')
a_tags.each do |x|
if x['href'] && !x['href'].empty?
x['href'] = x['href'].split('/l.php?u=').last.split('&').first
end
end
doc.css('body').inner_html
end
def self.url_expander(url)
UrlExpander::Client.expand(url)
end
def self.facebook_image_expander(url)
if (url =~ /fbcdn.net/ && url =~ /_(s|t|n)\.jpg/)
return url.gsub(/_(s|t|n).jpg$/,"_o.jpg")
elsif (url =~ /fbcdn.net/ && url =~ /_(s|t|n)\.png/)
return url.gsub(/_(s|t|n).png$/,"_o.png")
else
url
end
end
def self.simple_truncate(string,length)
string.slice(0,length)+"..." unless string.nil?
end
end
end