<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>ANT IL</title>
	<atom:link href="http://big.anythingilike.net/feed/" rel="self" type="application/rss+xml" />
	<link>http://big.anythingilike.net</link>
	<description>AnyThingILike.net</description>
	<lastBuildDate>Mon, 06 Sep 2010 03:39:13 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.3.1</generator>
		<item>
		<title>iMacro, Click email to activate pligg</title>
		<link>http://big.anythingilike.net/articles/imacro-click-email-to-activate-pligg/</link>
		<comments>http://big.anythingilike.net/articles/imacro-click-email-to-activate-pligg/#comments</comments>
		<pubDate>Mon, 06 Sep 2010 03:39:13 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Articles]]></category>
		<category><![CDATA[activate]]></category>
		<category><![CDATA[email]]></category>
		<category><![CDATA[imacro]]></category>
		<category><![CDATA[pligg]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=325</guid>
		<description><![CDATA[iMacro, Click email to activate pligg VERSION BUILD=6700624 RECORDER=FX SET !ERRORIGNORE YES SET !ERRORCONTINUE YES TAB T=1 TAB CLOSEALLOTHERS URL GOTO=https://mail.google.com/ TAG POS=1 TYPE=A ATTR=HREF:*&#038;&#038;TXT:Thanks* TAG POS=1 TYPE=A ATTR=HREF:?&#038;&#038;TXT:Show TAG POS=1 TYPE=A ATTR=TXT:Administrator TAG POS=1 TYPE=A ATTR=TARGET:_blank&#038;&#038;HREF:http://*&#038;&#038;TXT:http://* TAB T=1 TAG POS=1 TYPE=A ATTR=HREF:*a=dm&#038;at=*&#038;m=*&#038;&#038;TXT:ลบ TAG POS=1 TYPE=A ATTR=HREF:?&#038;&#038;TXT:«BacktoInbox]]></description>
			<content:encoded><![CDATA[<p>iMacro, Click email to activate pligg<br />
<code>VERSION BUILD=6700624 RECORDER=FX<br />
SET !ERRORIGNORE YES<br />
SET !ERRORCONTINUE YES<br />
TAB T=1<br />
TAB CLOSEALLOTHERS<br />
URL GOTO=https://mail.google.com/<br />
TAG POS=1 TYPE=A ATTR=HREF:*&#038;&#038;TXT:Thanks*<br />
TAG POS=1 TYPE=A ATTR=HREF:?&#038;&#038;TXT:Show<br />
TAG POS=1 TYPE=A ATTR=TXT:Administrator<br />
TAG POS=1 TYPE=A ATTR=TARGET:_blank&#038;&#038;HREF:http://*&#038;&#038;TXT:http://*<br />
TAB T=1<br />
TAG POS=1 TYPE=A ATTR=HREF:*a=dm&#038;at=*&#038;m=*&#038;&#038;TXT:ลบ<br />
TAG POS=1 TYPE=A ATTR=HREF:?&#038;&#038;TXT:«<SP>Back<SP>to<SP>Inbox<br />
</code></p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Farticles%2Fimacro-click-email-to-activate-pligg%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/articles/imacro-click-email-to-activate-pligg/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Google search update</title>
		<link>http://big.anythingilike.net/technology/google-search-update/</link>
		<comments>http://big.anythingilike.net/technology/google-search-update/#comments</comments>
		<pubDate>Fri, 07 May 2010 05:23:31 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[technology]]></category>
		<category><![CDATA[google]]></category>
		<category><![CDATA[update]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=285</guid>
		<description><![CDATA[httpv://www.youtube.com/watch?v=C-rnxNFRAQA&#38;]]></description>
			<content:encoded><![CDATA[<p>httpv://www.youtube.com/watch?v=C-rnxNFRAQA&amp;</p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Ftechnology%2Fgoogle-search-update%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/technology/google-search-update/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Touch screen technology (concept)</title>
		<link>http://big.anythingilike.net/technology/touch-screen-technology-concept/</link>
		<comments>http://big.anythingilike.net/technology/touch-screen-technology-concept/#comments</comments>
		<pubDate>Wed, 21 Apr 2010 18:41:39 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[technology]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=280</guid>
		<description><![CDATA[httpv://www.youtube.com/watch?v=8weBw6M9HHY]]></description>
			<content:encoded><![CDATA[<p><a href="http://www.youtube.com/watch?v=8weBw6M9HHY">httpv://www.youtube.com/watch?v=8weBw6M9HHY</a></p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Ftechnology%2Ftouch-screen-technology-concept%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/technology/touch-screen-technology-concept/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Google Analytic E-commerce tracking</title>
		<link>http://big.anythingilike.net/website/google-analytic-e-commerce-tracking/</link>
		<comments>http://big.anythingilike.net/website/google-analytic-e-commerce-tracking/#comments</comments>
		<pubDate>Tue, 20 Apr 2010 19:34:29 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Website]]></category>
		<category><![CDATA[e commerce]]></category>
		<category><![CDATA[google analytics]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=276</guid>
		<description><![CDATA[Google Analytic E-commerce tracking Useful tool, Modify it to track click (order), By E Commerce module. Noted: It could set product pricing, But I lazy to do -_-&#39; &#160; &#160; Good to know Where your customer came from Which country order / Sale &#60; can do if modify some script to send product pricing, I [...]]]></description>
			<content:encoded><![CDATA[<p style="float:right; margin:0 0 10px 15px; width:240px;">
		<img src="http://big.anythingilike.net/wp-content/uploads/googleanalyticEcom.png" width="240" />
		</p><p>Google Analytic E-commerce tracking </p>
<p>Useful tool, Modify it to track click (order), By E Commerce module.</p>
<p><img alt="google analytics ecommerce tracking order (sales)" height="487" src="http://big.anythingilike.net/wp-content/uploads/googleanalyticEcom.png" width="590" /></p>
<p>Noted: It could set product pricing, But I lazy to do -_-&#39;</p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<p>Good to know Where your customer came from <img src='http://big.anythingilike.net/wp-includes/images/smilies/icon_biggrin.gif' alt=':D' class='wp-smiley' /> <br />
	Which country order / Sale &lt; can do if modify some script to send product pricing, I set all to 1 usd then Could track only order <img src='http://big.anythingilike.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> </p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Fwebsite%2Fgoogle-analytic-e-commerce-tracking%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/website/google-analytic-e-commerce-tracking/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Scrape site basic</title>
		<link>http://big.anythingilike.net/articles/scrape-site-basic/</link>
		<comments>http://big.anythingilike.net/articles/scrape-site-basic/#comments</comments>
		<pubDate>Sun, 18 Apr 2010 11:11:56 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Articles]]></category>
		<category><![CDATA[scrape]]></category>
		<category><![CDATA[web crawler]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=273</guid>
		<description><![CDATA[&#160; From last articles about proxy SEO, I have some research more about this topic and found more keywords interesting, scrape,glype,web crawler perhap it could use with some of our site. Modify to white hat to make webmaster work easier for manage site. What is it ? &#160; Web scraping&#160;(also called&#160;Web harvesting&#160;or&#160;Web data extraction) is [...]]]></description>
			<content:encoded><![CDATA[<p>&nbsp;</p>
<p>From last articles about proxy SEO, I have some research more about this topic and found more keywords interesting, scrape,glype,web crawler perhap it could use with some of our site. Modify to white hat to make webmaster work easier for manage site.</p>
<p>What is it ?</p>
<p>&nbsp;</p>
<p style="margin-top: 0.4em;margin-right: 0px;margin-bottom: 0.5em;margin-left: 0px;line-height: 1.5em"><b>Web scraping</b>&nbsp;(also called&nbsp;<b>Web harvesting</b>&nbsp;or&nbsp;<b>Web data extraction</b>) is a computer software technique of extracting information from websites. Usually, such software programs simulate human exploration of the&nbsp;<a href="http://en.wikipedia.org/wiki/World_Wide_Web" title="World Wide Web">Web</a>&nbsp;by either implementing low-level<a href="http://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol" title="Hypertext Transfer Protocol">Hypertext Transfer Protocol</a>&nbsp;(HTTP), or embedding certain full-fledged Web browsers, such as the&nbsp;<a href="http://en.wikipedia.org/wiki/Internet_Explorer" title="Internet Explorer">Internet Explorer</a>&nbsp;(IE) and the&nbsp;<a href="http://en.wikipedia.org/wiki/Mozilla" title="Mozilla">Mozilla</a>&nbsp;Web browser. Web scraping is closely related to&nbsp;<a href="http://en.wikipedia.org/wiki/Web_indexing" title="Web indexing">Web indexing</a>, which indexes Web content using a&nbsp;<a href="http://en.wikipedia.org/wiki/Internet_bot" title="Internet bot">bot</a>&nbsp;and is a universal technique adopted by most search engines. In contrast, Web scraping focuses more on the transformation of unstructured Web content, typically in&nbsp;<a href="http://en.wikipedia.org/wiki/HTML" title="HTML">HTML</a>&nbsp;format, into structured data that can be stored and analyzed in a central local database or spreadsheet. Web scraping is also related to Web automation, which simulates human Web browsing using computer software. Exemplary uses of Web scraping include online price comparison, weather data monitoring, website change detection, Web research, Web content mashup and Web data integration.</p>
<h2><span class="mw-headline"><span class="Apple-style-span" style="font-size: 13px"><br />
	Web scraping is the process of automatically collecting Web information.&nbsp;Web scraping is a field with active developments sharing a common goal with the&nbsp;<a href="http://en.wikipedia.org/wiki/Semantic_Web" title="Semantic Web">semantic Web</a>&nbsp;vision, an ambitious initiative that still requires breakthroughs in text processing, semantic understanding, artificial intelligence and human-computer interactions. Web scraping, instead, favors practical solutions based on existing technologies even though some solutions are entirely ad hoc. Therefore, there are different levels of automations that existing Web-scraping technologies can provide:</p>
<p>	</span>Techniques for Web scraping</span></h2>
<ul>
<li>Human copy-and-paste: Sometimes even the best Web-scraping technology can not replace human&rsquo;s manual examination and copy-and-paste, and sometimes this may be the only workable solution when the websites for scraping explicitly setup barriers to prevent machine automation.</li>
<li>Text grepping and regular expression matching: A simple yet powerful approach to extract information from Web pages can be based on the UNIX&nbsp;<a href="http://en.wikipedia.org/wiki/Grep" title="Grep">grep</a>&nbsp;command or regular expression matching facilities of programming languages (for instance&nbsp;<a href="http://en.wikipedia.org/wiki/Perl" title="Perl">Perl</a>&nbsp;or<a href="http://en.wikipedia.org/wiki/Python_(programming_language)" title="Python (programming language)">Python</a>).</li>
<li>HTTP programming: Static and dynamic Web pages can be retrieved by posting HTTP requests to the remote Web server using&nbsp;<a class="mw-redirect" href="http://en.wikipedia.org/wiki/Socket_programming" title="Socket programming">socket programming</a>.</li>
<li>DOM parsing: By embedding a full-fledged Web browser, such as the&nbsp;<a href="http://en.wikipedia.org/wiki/Internet_Explorer" title="Internet Explorer">Internet Explorer</a>&nbsp;or the&nbsp;<a href="http://en.wikipedia.org/wiki/Mozilla" title="Mozilla">Mozilla</a>&nbsp;Web browser control, programs can retrieve the dynamic contents generated by client side scripts. These Web browser controls also parse Web pages into a DOM tree, based on which programs can retrieve parts of the Web pages.</li>
<li>HTML parsers: Some semi-structured data query languages, such as the&nbsp;<a class="mw-redirect" href="http://en.wikipedia.org/wiki/XML_query_language" title="XML query language">XML query language</a>&nbsp;(XQL) and the&nbsp;<a class="new" href="http://en.wikipedia.org/w/index.php?title=Hyper-text_query_language&amp;action=edit&amp;redlink=1" title="Hyper-text query language (page does not exist)">hyper-text query language</a>&nbsp;(HTQL), can be used to parse HTML pages and to retrieve and transform Web content.</li>
<li>Web-scraping software: There are many Web-scraping software available that can be used to customize Web-scraping solutions. These software may provide a Web recording interface that removes the necessity to manually write Web-scraping codes, or some scripting functions that can be used to extract and transform Web content, and database interfaces that can store the scraped data in local databases.</li>
<li>Semantic annotation recognizing: The Web pages may embrace metadata or semantic markups/annotations which can be made use of to locate specific data snippets. If the annotations are embedded in the pages, as&nbsp;<a href="http://en.wikipedia.org/wiki/Microformat" title="Microformat">Microformat</a>&nbsp;does, this technique can be viewed as a special case of DOM parsing. In another case, the annotations, organized into a semantic layer, are stored and managed separated to the Web pages, so the Web scrapers can retrieve data schema and instructions from this layer before scraping the pages.</li>
</ul>
<h2><span class="mw-headline">Legal issues</span></h2>
<p style="margin-top: 0.4em;margin-right: 0px;margin-bottom: 0.5em;margin-left: 0px;line-height: 1.5em">Web scraping may be against the&nbsp;<a class="mw-redirect" href="http://en.wikipedia.org/wiki/Terms_of_use" title="Terms of use">terms of use</a>&nbsp;of some websites. The enforceability of these terms is unclear.&nbsp;While outright duplication of original expression will in many cases be illegal, in the&nbsp;<a href="http://en.wikipedia.org/wiki/United_States" title="United States">United States</a>&nbsp;the courts ruled in&nbsp;<a href="http://en.wikipedia.org/wiki/Feist_Publications_v._Rural_Telephone_Service" title="Feist Publications v. Rural Telephone Service">Feist Publications v. Rural Telephone Service</a>&nbsp;that duplication of facts is allowable. Also, in a February, 2006 ruling, the Danish Maritime and Commercial Court (Copenhagen) found systematic crawling, indexing and&nbsp;<a href="http://en.wikipedia.org/wiki/Deep_linking" title="Deep linking">deep linking</a>&nbsp;by portal site ofir.dk of real estate site Home.dk not to conflict with Danish law or the database directive of the European Union.</p>
<p style="margin-top: 0.4em;margin-right: 0px;margin-bottom: 0.5em;margin-left: 0px;line-height: 1.5em">U.S. courts have acknowledged that users of &quot;scrapers&quot; or &quot;robots&quot; may be held liable for committing&nbsp;<a href="http://en.wikipedia.org/wiki/Trespass_to_chattels" title="Trespass to chattels">trespass to chattels</a>,&nbsp;which involves a computer system itself being considered personal property upon which the user of a scraper is trespassing. However, to succeed on a claim of trespass to chattels, the plaintiff must demonstrate that the defendant intentionally and without authorization interfered with the plaintiff&#39;s possessory interest in the computer system and that the defendant&#39;s unauthorized use caused damage to the plaintiff. Not all cases of web spidering brought before the courts have been considered trespass to chattels.</p>
<p style="margin-top: 0.4em;margin-right: 0px;margin-bottom: 0.5em;margin-left: 0px;line-height: 1.5em">In Australia, the&nbsp;<a href="http://en.wikipedia.org/wiki/Spam_Act_2003" title="Spam Act 2003">Spam Act 2003</a>&nbsp;outlaws some forms of web harvesting.</p>
<h2><span class="mw-headline">Technical measures to stop bots</span></h2>
<p style="margin-top: 0.4em;margin-right: 0px;margin-bottom: 0.5em;margin-left: 0px;line-height: 1.5em">The administrator of a website can use various measures to stop or slow a bot. Some techniques include:</p>
<ul>
<li>If the application is well behaved, adding entries to&nbsp;<a href="http://en.wikipedia.org/wiki/Robots_exclusion_standard" title="Robots exclusion standard">robots.txt</a>&nbsp;will be adhered to. Google and other well-behaved bots can be stopped this way.</li>
<li>Blocking an IP address. This will also block all browsing from that address.</li>
<li>Sometimes bots declare who they are. Well behaved ones do (for example &#39;<a href="http://en.wikipedia.org/wiki/Googlebot" title="Googlebot">googlebot</a>&#39;). They can be blocked on that basis. Unfortunately, malicious bots may declare they are a normal browser.</li>
<li>Bots can be blocked by excess traffic monitoring.</li>
<li>Bots can be blocked with tools to verify that it is a real person accessing the site, such as the&nbsp;<a href="http://en.wikipedia.org/wiki/CAPTCHA" title="CAPTCHA">CAPTCHA</a>&nbsp;project.</li>
<li>Sometimes bots can be blocked with carefully crafted Javascript.</li>
<li>Locating bots with a&nbsp;<a href="http://en.wikipedia.org/wiki/Honeypot_(computing)" title="Honeypot (computing)">honeypot</a>&nbsp;or other method to identify the IP addresses of automated crawlers.</li>
</ul>
<ul>
<li><a class="mw-redirect" href="http://en.wikipedia.org/wiki/Screen_scraping" title="Screen scraping">Screen scraping</a></li>
<li><a href="http://en.wikipedia.org/wiki/Scraper_site" title="Scraper site">Scraper site</a></li>
<li><a href="http://en.wikipedia.org/wiki/Mashup_(web_application_hybrid)" title="Mashup (web application hybrid)">Mashup (web application hybrid)</a></li>
<li><a href="http://en.wikipedia.org/wiki/Text_corpus" title="Text corpus">Text corpus</a></li>
<li><a href="http://en.wikipedia.org/wiki/Corpus_linguistics" title="Corpus linguistics">Corpus linguistics</a></li>
</ul>
<p>&nbsp;</p>
<p>Some interesting article about how to scrape site<br />
	from <a href="http://www.oooff.com">oooff</a></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="Apple-style-span" style="font-size: 18px;font-weight: bold">Most Basic Web Data Parsing Script</span></p>
<h3>Whole script -</h3>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">The whole script minus the line numbers of course. Those are just their for our reference.</p>
<p class="code" style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">1. &lt;?php&nbsp;<br />
	2. $data = file_get_contents(&#39;http://search.msn.com/results.aspx?q=site%3Afroogle.com&#39;);<br />
	3. $regex = &#39;/Page 1 of (.+?) results/&#39;;<br />
	4. preg_match($regex,$data,$match);<br />
	5. var_dump($match);&nbsp;<br />
	6. echo $match[1];<br />
	7. ?&gt;</p>
<h3 class="code">Script Explanation -</h3>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Ok here goes with the basic explanation&#8230;</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="explan_headings">Line 2.</span><br />
	<span class="code">$data = file_get_contents(&#39;http://search.msn.com/results.aspx?q=site%3Afroogle.com&#39;);<br />
	Now if you studied up on the first tutorial you&#39;ll know that we&#39;re pulling data from MSN search using the file_get_contents command and assigning the data to the $data variable.</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="code">However we&#39;re also passing some data in the url to get the specific page from MSN that we want to scrape. If you already know about passing variables in the url you can go to Line 3.</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">You might be asking what is all that stuff after the MSN url? I&#39;m sure you&#39;ve seen it a lot of times but might not been sure what it was. Basically what all that stuff is, is just like passing a variable in a php script but you&#39;re doing it through a url. Lets take a peak at the url we&#39;re using here to get a better understanding.&nbsp;<span class="code">Our url if you don&#39;t remember is &quot;http://search.msn.com/results.aspx?q=site%3Afroogle.com</span>&quot;.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Let&#39;s break it into two parts split on the question mark. Why you ask? That&#39;s where the url ends and the data being passed begins. With is separated we have:</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">http://search.msn.com/results.aspx<br />
	and<br />
	<span class="code">q=site%3Afroogle.com</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Now I hope I don&#39;t need to go into an explanation on the first part so I&#39;m really only going to talk about the second. Also I&#39;ll do some basic tutorials on accepting data later so you have an understand what happens to this url on the other side. When you look at the second part of the url you&#39;ll always see a field and a value for the field, although sometimes that value is blank. How do you know which is the field and which is the value you ask? The field is always going to come before the equal sign = and the value will come after. Basically think of it like assigning a variable a value. In this data being passed by the url our field is &quot;q&quot; if you didn&#39;t already guess and our value is site%3Afroogle.com. The field &#39;q&#39; that MSN takes stands for query. So passing data assigned to the &#39;q&#39; field is telling MSN, &quot;hey look this search/query up for me.&quot;</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">The value assigned to the field &#39;q&#39; is site%3Afroogle.com. First thing you&#39;re probably thinking is what in the world is that %3A, I didn&#39;t type that. Well to keep things very simplistic, there&#39;s certain variables that can&#39;t be passed through url&#39;s things like colon&#39;s, quotes, semi-colon&#39;s etc, because these are protected and mean certain things to a web server when they see them. So we need to use some other form of formatting. In this case we&#39;re converting the &#39;:&#39; in site:froogle.com to a encoded value (more on that later). So what we&#39;re asking for by the site: command in MSN is how many pages from site X are in your search engine. So specifically how many pages from froogle are indexed in MSN.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><a href="http://search.msn.com/results.aspx?q=site%3Afroogle.com">Click here to see the page we&#39;re scraping</a></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">// Some hosting aware file_get_contents as danger commmand, Maybe we should try curl, or some class.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="explan_headings">Line 3.</span><br />
	<span class="code">$regex = &#39;/Page 1 of (.+?) results/&#39;;</p>
<p>	</span></p>
<p class="style2">div id=&quot;search_header&quot;&gt;&lt;h1&gt;site:froogle.com&lt;/h1&gt;&lt;h5&gt;Page 1 of 9,138 results&lt;/h5&gt;&amp;nbsp;&lt;b&gt;&amp;#01</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Anytime you see a $varname = &#39;something here&#39;; or $varname = &quot;something here&quot;; you know it&#39;s just a value being assigned to a variable. Also note you can use single &#39; and double &quot; quotes interchangeably.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span style="color:#006400">(.+?) is our best friend when it comes to regex, it basically means match everything starting from the text</span> ( I&#39;ll call that text anchors too, so be prepared for me to use the interchangeably) in the beginning and stopping at our end text/anchor. Something like this:</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span style="color:#006400">opening anchor text here ( .+?) closing anchor text here</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Pretty easy huh? Yeah I thought so. The only other thing to note in this is that there is the forward slashes in the &#39;/stuff/&#39;; that&#39;s a regex thing. Just know that in php you always need to let regex know what to match inside of forward slashes.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">// Know more about regex here :&nbsp;<span class="Apple-style-span" style="font-size: medium"><a href="http://de.php.net/manual/en/reference.pcre.pattern.syntax.php">http://de.php.net/manual/en/reference.pcre.pattern.syntax.php</a></span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="Apple-style-span">Line 4.</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="code">preg_match($regex,$data,$match);</span><br />
	Ah a new function&#39;s in town, preg_match(). Preg_match() is the PHP function to call regex for a single match. So anytime we want to match one thing in our data we&#39;re going to call the parsing function preg_match().</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">With preg match we&#39;re doing something called passing data to the function for it to work on. In this case we&#39;re passing $regex, $data, $match. We know what both $regex (parsing string we just made) and $data (scraped page from MSN) are but what is the $match variable? It&#39;s just the variable that our parsed data is going to be returned to. In plain english we&#39;re saying take $data and then apply the filter $regex to it. Then whatever comes through that filter dump out into $match. Make sense?</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">I sure hope you said yep, that&#39;s easy.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="explan_headings">Line 5.</span><br />
	<span class="code">var_dump($match);&nbsp;</span><br />
	The function var_dump() is your best friend as a programmer. It says whatever is in this variable or array dump it out onto the screen so I can see what&#39;s happening. So this line will output this onto the screen.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">array(2) {<br />
	[0]=&gt;<br />
	string(23) &quot;Page 1 of 9,138 results&quot;<br />
	[1]=&gt;<br />
	string(5) &quot;9,138&quot;<br />
	}</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Array? What&#39;s that? Well this is as good a time as any to introduce what an array is. They&#39;re extremely useful tools for you to know. So lets backup a little we know that a variable is something that holds 1 thing, right? Well an array is just like a variable except it holds multiple things. I like to think of it like this. Stop and imagine a train for a second it has all these cars on it that hold things right? well a variable is a single car and can only hold a single thing. Where an array is like a train that has multiple cars to hold things. In the output above we have a two (2) cell array, which is just like a 2 car train. In car 0 we have the string &#39;Page 1 of 9,138 results&#39; and in car 1 we have the string &#39;9,138&#39;, which is the result we want right? You might be asking why does preg_match return an array rather then just a simple string. It does this two give you two options on how to match things. You&#39;ll notice car/cell 0 has the anchors included as well as the matched text. Where car 1 only has the text inside the anchors.&nbsp;</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="explan_headings">Line 6.</span><br />
	<span class="code">echo $match[1];<br />
	What&#39;s with the new notation? If you hadn&#39;t already guessed that&#39;s how we access the cars in our train. We know if we have a array and what we want is in car 1 we access that&nbsp;</span>by &#39;referencing&#39; that car which is what the [1] means. We want to output only what&#39;s in the second cell because we don&#39;t want the anchors included. This will output to our screen:</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">9,138</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">Which is exactly what we aimed to do.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px"><span class="Apple-style-span">Other things to try -</span></p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">So fun stuff to try using our new skills.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">1. Use the link: command in MSN and see if you can get the number of links for a domain. Don&#39;t forget that : = %3A</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">2. See if you can get the title of a web of any web page. Hint: anchors are going to be &lt;title&gt; and &lt;/title&gt;.</p>
<h3>Conclusion -</h3>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">You can make some pretty cool tools with just the two very basic things I&#39;ve shard with you so far. Pulling data from somewhere using the file_get_contents() function and the data parsing preg_match() function. Have fun with it and I&#39;ll see you on the next data scraping tutorial.</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">wiki scrape</p>
<p style="font-size: 14px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 15px">&nbsp;</p>
<pre style="margin-top: 0px;margin-right: 0px;margin-bottom: 0px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;width: auto;clear: none;font-size: 12px;line-height: 1.333;font-family: monospace"><span class="Apple-style-span"><span>&lt;?php</span>
<span>function</span> wikipedia<span>(</span><span>$article</span><span>)</span>	<span>{</span>
	<span>$pattern</span><span>[</span><span>0</span><span>]</span> <span>=</span> <span>&#39;/&lt;a href=&quot;(.*?)&quot;&gt;(.*?)&lt;\\/a&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>0</span><span>]</span> <span>=</span> <span>&#39;$2&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>1</span><span>]</span> <span>=</span> <span>&#39;/&lt;h3 id=\&quot;siteSub\&quot;&gt;From Wikipedia, the free encyclopedia&lt;\/h3&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>1</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>2</span><span>]</span> <span>=</span> <span>&#39;/&lt;div id=\&quot;contentSub\&quot;&gt;(.*?)&lt;\/div&gt;&lt;div id=\&quot;jump-to-nav\&quot;&gt;Jump to: navigation, search&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>2</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>3</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;messagebox cleanup metadata\&quot;&gt;(.*?)&lt;p&gt;&lt;br \/&gt;&lt;\/p&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>3</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>4</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=\&quot;messagebox\&quot; (.*?)&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>4</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>5</span><span>]</span> <span>=</span> <span>&#39;/&lt;dl&gt;(.*?)&lt;\/dl&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>5</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>6</span><span>]</span> <span>=</span> <span>&#39;/&lt;h1 class=\&quot;firstHeading&quot;\&gt;(.*?)&lt;\/h1&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>6</span><span>]</span> <span>=</span> <span>&#39;&lt;h3&gt;$1&lt;/h3&gt;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>7</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=\&quot;messagebox protected\&quot; style=\&quot;border: 1px solid #8888aa; padding: 0px; font-size:9pt;\&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>7</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>8</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;infobox sisterproject\&quot;&gt;(.*?)&lt;\/div&gt;&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>8</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>9</span><span>]</span> <span>=</span> <span>&#39;/&lt;sup (.*?)&gt;(.*?)&lt;\/sup&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>9</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>10</span><span>]</span> <span>=</span> <span>&#39;/&lt;table style=\&quot;background: transparent;\&quot; width=\&quot;0\&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>10</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>11</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=\&quot;messagebox current\&quot; style=\&quot;font-size:	normal;\&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>11</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>12</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=\&quot;toccolours\&quot; align=\&quot;center\&quot; width=\&quot;55%\&quot; cellpadding=\&quot;0\&quot; cellspacing=\&quot;0\&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>12</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>13</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;editsection\&quot;(.*?)&gt;(.*?)&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>13</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>14</span><span>]</span> <span>=</span> <span>&#39;/&lt;div id=\&quot;bodyContent\&quot;&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>14</span><span>]</span> <span>=</span> <span>&#39;&lt;div&gt;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>15</span><span>]</span> <span>=</span> <span>&#39;/&lt;dd&gt;(.*?)&lt;\/dd&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>15</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>16</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;messagebox cleanup metadata\&quot;&gt;(.*?)&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>16</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>17</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;thumbcaption\&quot;&gt;(.*?)&lt;\/div&gt;&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>17</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>18</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=\&quot;thumb tright\&quot;&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>18</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>19</span><span>]</span> <span>=</span> <span>&#39;/\[(.*?)\]/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>19</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>20</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=&quot;messagebox protected&quot; (.*?)&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>20</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>21</span><span>]</span> <span>=</span> <span>&#39;/&lt;div style=&quot;position:absolute; z-index:100; right:20px; top:10px; height:10px; width:300px;&quot;&gt;&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>21</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>22</span><span>]</span> <span>=</span> <span>&#39;/&lt;div style=&quot;position:absolute; z-index:100; right:10px; top:10px;&quot; class=&quot;metadata&quot; id=&quot;administrator&quot;&gt;(.*?)&lt;\/div&gt;&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>22</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>23</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=&quot;messagebox current&quot;(.*?)&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>23</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>24</span><span>]</span> <span>=</span> <span>&#39;/&lt;table class=&quot;messagebox current&quot; style=&quot;width: auto;&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>24</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>25</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=&quot;dablink&quot;&gt;(.*?)&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>25</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>26</span><span>]</span> <span>=</span> <span>&#39;/&lt;b&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>26</span><span>]</span> <span>=</span> <span>&#39;&lt;strong&gt;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>27</span><span>]</span> <span>=</span> <span>&#39;/&lt;\/b&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>27</span><span>]</span> <span>=</span> <span>&#39;&lt;/strong&gt;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>28</span><span>]</span> <span>=</span> <span>&#39;/&lt;div(.*?)&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>28</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>29</span><span>]</span> <span>=</span> <span>&#39;/&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>29</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>30</span><span>]</span> <span>=</span> <span>&#39;/&lt;map(.*?)&gt;(.*?)&lt;\/map&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>30</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>31</span><span>]</span> <span>=</span> <span>&#39;/&lt;img src=&quot;(.*?)&quot; alt=&quot;This page is semi-protected.&quot; width=&quot;18&quot; (.*?)\/&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>31</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>32</span><span>]</span> <span>=</span> <span>&#39;/&lt;table style=&quot;width:100%;background:none&quot;&gt;(.*?)&lt;\/table&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>32</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$pattern</span><span>[</span><span>33</span><span>]</span> <span>=</span> <span>&#39;/&lt;div class=&quot;messagebox merge metadata&quot;&gt;(.*?)&lt;\/div&gt;/&#39;</span><span>;</span>
	<span>$replace</span><span>[</span><span>33</span><span>]</span> <span>=</span> <span>&#39;&#39;</span><span>;</span>
	<span>$wikipedia</span> <span>=</span> <span>fopen</span><span>(</span><span>$article</span><span>,</span> <span>&quot;r&quot;</span><span>)</span><span>;</span>
	<span>$wikipedia</span> <span>=</span> <span>preg_replace</span><span>(</span><span>$pattern</span><span>,</span> <span>$replace</span><span>,</span> <span>$wikipedia</span><span>)</span><span>;</span>
		<span>if</span> <span>(</span><span>preg_match</span><span>(</span><span>&quot;/&lt;\!-- start content --\&gt;(.*)&lt;table id=<span>\&quot;</span>toc<span>\&quot;</span> class=<span>\&quot;</span>toc<span>\&quot;</span> summary=<span>\&quot;</span>(.*)<span>\&quot;</span>&gt;/&quot;</span><span>,</span> <span>$wikipedia</span><span>,</span> <span>$w</span><span>)</span><span>)</span> <span>{</span>
			<span>$wikipedia</span> <span>=</span> <span>$w</span><span>[</span><span>1</span><span>]</span><span>;</span>
		<span>}</span> <span>elseif</span> <span>(</span><span>preg_match</span><span>(</span><span>&quot;/&lt;\!-- start content --\&gt;(.*)&lt;a name=<span>\&quot;</span>(.*)<span>\&quot;</span>&gt;/is&quot;</span><span>,</span> <span>$wikipedia</span><span>,</span> <span>$w</span><span>)</span><span>)</span> <span>{</span>
			<span>$wikipedia</span> <span>=</span> <span>$w</span><span>[</span><span>1</span><span>]</span><span>;</span>
		<span>}</span> <span>elseif</span> <span>(</span><span>preg_match</span><span>(</span><span>&quot;/&lt;\!-- start content --\&gt;(.*)&lt;div class=<span>\&quot;</span>boilerplate metadata<span>\&quot;</span> id=<span>\&quot;</span>stub<span>\&quot;</span>&gt;/is&quot;</span><span>,</span> <span>$wikipedia</span><span>,</span> <span>$w</span><span>)</span><span>)</span> <span>{</span>
			<span>$wikipedia</span> <span>=</span> <span>$w</span><span>[</span><span>1</span><span>]</span><span>;</span>
		<span>}</span> <span>elseif</span> <span>(</span><span>preg_match</span><span>(</span><span>&quot;/&lt;\!-- start content --\&gt;(.*)&lt;div class=<span>\&quot;</span>printfooter<span>\&quot;</span>&gt;/is&quot;</span><span>,</span> <span>$wikipedia</span><span>,</span> <span>$w</span><span>)</span><span>)</span> <span>{</span>
			<span>$wikipedia</span> <span>=</span> <span>$w</span><span>[</span><span>1</span><span>]</span><span>;</span>
		<span>}</span>
	<span>}</span>
	<span>print</span> <span>$wikipedia</span><span>;</span>
<span>}</span>
<span>?&gt;</span></span></pre>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Farticles%2Fscrape-site-basic%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/articles/scrape-site-basic/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>When an Italian Tries to Speak English</title>
		<link>http://big.anythingilike.net/life/clip/when-an-italian-tries-to-speak-english/</link>
		<comments>http://big.anythingilike.net/life/clip/when-an-italian-tries-to-speak-english/#comments</comments>
		<pubDate>Sat, 17 Apr 2010 19:05:38 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Clip]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=271</guid>
		<description><![CDATA[httpv://www.youtube.com/watch?v=eOPJwPDuurs&#38;feature=player_embedded]]></description>
			<content:encoded><![CDATA[<p>httpv://www.youtube.com/watch?v=eOPJwPDuurs&amp;feature=player_embedded</p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Flife%2Fclip%2Fwhen-an-italian-tries-to-speak-english%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/life/clip/when-an-italian-tries-to-speak-english/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Webmaster knowledge : Protect site from proxy SEO technique</title>
		<link>http://big.anythingilike.net/articles/webmaster-knowledge-protect-site-from-proxy-seo-technique/</link>
		<comments>http://big.anythingilike.net/articles/webmaster-knowledge-protect-site-from-proxy-seo-technique/#comments</comments>
		<pubDate>Sat, 17 Apr 2010 16:08:15 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Articles]]></category>
		<category><![CDATA[Featured]]></category>
		<category><![CDATA[proxy]]></category>
		<category><![CDATA[seo]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=269</guid>
		<description><![CDATA[Webmaster knowledge : Protect site from proxy SEO technique very good article from seofaststart How To Tell If You&#39;ve Been Proxy Hacked The simplest test, if you are experiencing a problem, is to examine Google search results for a phrase (search term in quotes) that should be unique to your page. For example, if your [...]]]></description>
			<content:encoded><![CDATA[<h2>Webmaster knowledge : Protect site from proxy SEO technique</h2>
<p>very good article from <a href="http://www.seofaststart.com/blog/google-proxy-hacking">seofaststart</a></p>
<h2>How To Tell If You&#39;ve Been Proxy Hacked</h2>
<p>The simplest test, if you are experiencing a problem, is to examine Google search results for a phrase (search term in quotes) that should be unique to your page. For example, if your home page says &quot;Fred&#39;s Widget Factory sells the best down-home widgets on Earth&quot; then you can search for that phrase.<span id="more-269"></span></p>
<p>You want to use a phrase (or combination of phrases) that should only appear on your page, and nowhere else on the web&hellip; or very few places at least. Then you do the search &ndash; if there&#39;s more than one result (your page), then you need to examine the other URLs that are listed. If some of them are delivering an exact copy of the page, you just may be dealing with a proxy that has hijacked your content.</p>
<p><span><strong>A typical proxy link looks something like this:</strong></span><br />
	<span>www.example.com/nph-proxy.pl/011110A/http/www.mattcutts.com/blog/</span><br />
	It&#39;s<strong> easy to see what URL that would fetch</strong>, if example.com were a real proxy.<span><strong> Other proxy URLs encode the target URL</strong></span> so it&#39;s <strong>not always that easy </strong>to determine what they&#39;re going to fetch just by looking.</p>
<p><strong>The mere presence of proxies in the index<span> doesn&#39;t necessarily mean you&#39;ll be dropped or penalized</span>.</strong> The situation inside Google&#39;s systems is no doubt very complex. I have seen sites with multiple proxies indexed, and no ill effects. It&#39;s possible that there are certain factors<strong> (trust, authority, domain age, etc.) t</strong>hat make one site more susceptible than another. I have no idea how they make the decision on which copy of a page to keep.</p>
<h2>Why Is This Even Possible?</h2>
<p>In simple terms, it appears that the original (authentic) page gets dropped or penalized as duplicate content.</p>
<p>A couple years ago, Google deployed some software &amp; infrastructure changes collectively known as &quot;Big Daddy.&quot; This involved crawling from many different data centers, and changes to the crawler itself. It appears that the changes include moving some of the duplicate content detection down to the crawlers. The bug probably arises from the way the data centers are synchronized. Pure speculation here, but the picture I have of what happens looks like this:</p>
<ol>
<li><span>The original page exists in at least some of the data centers.</span></li>
<li><span>A copy (proxy) gets indexed in one data center, and that gets sync&#39;d across to the others.</span></li>
<li><span>A spider visits the original, checks to see if the content is duplicate, and erroneously decides that it is.</span></li>
<li><span>The original is dropped or penalized.</span></li>
</ol>
<p><strong>As far as whether &quot;any site&quot; could get hacked,&quot; I don&#39;t know.</strong> I&#39;m not a black hat. I don&#39;t have a <span>link farm. I don&#39;t have a botnet to spam blogs</span> with. So I<span> can&#39;t manufacture thousands of links to thousands of proxies,</span> in an attempt to knock sites off of SERPS. I wouldn&#39;t do that anyway &ndash; it&#39;s evil. So what I know is based mostly on sites reporting a problem, blocking the proxies, and seeing the problem disappear after the proxies are gone. Then repeating the exercise with the same results.</p>
<h2>How To Fight Back</h2>
<p>There are basically three main possibilities for your situation:</p>
<p><strong>Situation 1: You are running an Apache server. </strong>We have 2 solutions in this case, that were developed by <a class="external<br />
external_icon" href="http://www.seoegghead.com/blog/about/">Jaimie Sirovich</a> (co-author of <em>Professional Search Engine Optimization with PHP</em>). We&#39;ve worked some late nights on this.</p>
<p><strong>Solution #1 uses <span>mod_write and .htaccess, to pass all spider requests through a PHP script that validates the request</span>. </strong>This will only<strong> defends against being hacked via &quot;normal&quot; anonymous proxies</strong> that pass long the user agent &ndash; it only inspects visits from the &quot;Big 4&quot; search engines (Ask, Google, MSN, and Yahoo). I call this the &quot;first tier&quot; defense &ndash; it won&#39;t stop every proxy that exists, but it will come close, and you can implement it without modifying any of your applications. It wil even work if your web site is all static pages. This is what I&#39;m implementing. Jaimie doesn&#39;t like it because it&#39;s kind of a hack &ndash; and he would rather you didn&#39;t use it at all.</p>
<p><strong>Solution #2 is a PHP script that implements the<span> &quot;reverse cloaking&quot; defense,</span></strong><span> putting a &quot;nonindex, nofollow&quot; robots meta tag into your pages </span>unless it&#39;s a spider that you have configured the script to recognize. This will only be possible if your site is built on PHP. It wouldn&#39;t be terribly difficult for a competent PHP user to implement this in an all-static site, you&#39;d just need to change .htaccess so that your .html files are parsed as PHP. A WordPress plug-in will follow soon. This is a more robust defense, against more proxies.</p>
<p><strong>How to get the code:</strong> An <a class="external external_icon" href="http://www.seoegghead.com/blog/seo/how-to-guide-prevent-google-proxy-hacking-p210.html">implementation guide</a> is provided on Jaimie&#39;s blog, along with a testing environment that you can use to <a class="external external_icon" href="http://www.seoegghead.com/tools/test-simple-cloak.php">check spider user agents &amp; IP addresses</a>, and of course the <a href="http://www.seoegghead.com/blog/simplecloak-v2-php-implementation/">source code for both solutions</a>. No warranty is given. This is hard core code for a hard core situation. Don&#39;t use it if you don&#39;t need it, and all code should really be deployed by professionals who can understand what it does, modify it to suit unique environments, etc.</p>
<p><strong>Situation 2: You are running a Microsoft (IIS) server.</strong> Jaimie is working on an IIS/ASP solution similar to the Apache/PHP solution, which should be available soon. Think days, not weeks, in other words. Much sooner than his new book (Professional SEO with ASP), which is also in the pipeline.</p>
<p><strong>Situation 3: You are on a hosted solution, aren&#39;t running PHP scripts that you can edit, don&#39;t control the web server, etc.</strong> This is a more complex situation. I will have another post tomorrow that will offer some possible solutions, including one that involves <strong><span>creating your own caching proxy on a separate server. </span></strong>In this case, I don&#39;t recommend doing anything unless you really believe that you have a problem with proxies.</p>
<p>There are other solutions available. Bill Atchison&#39;s <a class="external external_icon" href="http://www.crawlwall.com/">Crawlwall</a> is a professional (commercial)<strong><span> solution, that does a lot more to prevent content theft</span></strong>, etc. If you have the means, you may want to consider this instead, and move the burden of &quot;keeping up with the spiders&quot; onto Bill&#39;s shoulders. Jaimie is working on a more general proxy-blocking solution as well. Ekstreme has the beginnings of a spider validation solution in the <a class="external external_icon" href="http://ekstreme.com/phplabs/search-engine-authentication.php">PHP Search Engine Bot Authentication</a> code they published.</p>
<h2>If You Are Operating A Proxy &ndash; Don&#39;t Be Part of the Problem</h2>
<p>If you are <strong><span>operating a proxy server, and you don&#39;t want to be part of the problem, you can prevent your server from being used as a tool by adding a robots.txt</span></strong> file that prevents all search engine spiders from indexing proxied content through your server. For example, if all proxy URLs begin with /proxy/ then you can use:</p>
<pre>User-agent: *</pre>
<pre>Disallow: /proxy/</pre>
<p>Of course, not all proxies are being run by innocent people for innocent reasons. Some of them are actually designed to hijack content &ndash; to deliver ads, etc. Some people want to steal your content, and they want the search engines to index it. In fact, I would not be surprised if a large part of the overall problem isn&#39;t caused by such people firing links at their own proxies.</p>
<h2>Is It Just Google?</h2>
<p>You got me&hellip; I haven&#39;t seen any cases on other engines that looked like a proxy hack, but I&#39;d be surprised if it only affected Google. Google may simply be the only search engine that shows you enough search results to let you &quot;catch&quot; the proxies. Google may be more susceptible because they crawl more URLs more often, and use multiple data centers.</p>
<p>Assuming I am not completely wrong, it sure looks like less of a design flaw, and more of an &quot;emergent property&quot; of the very things that make Google the world&#39;s best search engine (just my opinion, apparently <a class="external<br />
external_icon" href="http://news.com.com/8301-10784_3-9759090-7.html">the average consumer no longer agrees</a>). I don&#39;t know that there is an easy solution, especially if the problem arises because of their multiple-data-center strategy.</p>
<p>Unfortunately, any countermeasures that we implement could be thwarted by someone willing to copy our content in other ways, or by constructing a proxy that spoofs user agents, uses intermediate proxies to hide its IP address, and strips out meta tags. This has always been possible, BTW. Anyone actually doing these things, of course, would likely be committing a crime&hellip; and would be a lot easier to find than some script kiddie using comment spam to fire links at someone else&#39;s proxies.</p>
<h1><span style="text-decoration: line-through">UPDATE: As of May 1, 2008, I have every reason to believe that Google has solved this problem, at least in the general case. At this point, the only sites I can see getting &quot;duped by proxy&quot; are spammier than the proxies themselves.</span></h1>
<p><span><strong>Update again: September 2009 </strong>- damned if this thing hasn&#39;t cropped up again &ndash; now it looks like Google&#39;s replacing the duped URL with the copy&#39;s URL &ndash; and even RANKING the duplicates&hellip; (similar to the already-known-and-passed-off-as-a-feature 302 redirect bug).</span></p>
<p>&nbsp;</p>
<p>My view from this point, Could use this SEO tip for improve our system, with concern : Google bot not stupid, Perhap it need some tips to make it&#39;s work.<br />
	For protect this following article is great : <a href="http://www.seoimage.com/stardate/google-remove-url-tool/">SEO image</a></p>
<p>For those of you who do not know, SEO Image is one of the most plagiarized websites. Our content is stolen and rewritten every day by new and novice SEO companies throughout the world.</p>
<p>One issue we have is that these novice SEO companies not only copy word for word, but some cause the same effect that the proxy search portals do. That is a duplicate content filter. For those of you new to my blog, I have been very anti-duplicate content filter since its unleashing in 2005 as an overly aggressive filter.</p>
<p>So, to take this further the proxy sites are ways that searchers can try and mask the IP they are searching from, as the proxy server will allow someone to access a site that has banned regions from accessing it. I do not want to get to technical with the proxy servers, you can read more at <a href="http://en.wikipedia.org/wiki/Proxy_server" target="_blank" title="Wikipedia Proxy Server">Wikipedia</a>. The problem with proxy servers is that they cache websites that are searched and then allow search engines to spider them so that they can appear as larger websites (page spam) and rank better so that people can click on the paid ads.</p>
<p>The <a href="http://services.google.com:8882/urlconsole/controller" title="Google URL Removal Tool">Google URL Removal tool</a> is a sure way of removing proxy duplicates. Since we feel the duplicate content filter will remove most copies, the proxy search results concern us because they are used by Black Hat SEO&rsquo;s to try and hurt other websites rankings.</p>
<p>There is one easy way to remove the proxy servers with Google&rsquo;s Remove URL tool. That is, first you need to be able to deny IP ranges from accessing your website in either Windows IIS Administration, or htaccess for Linux Servers.<br />
	First Step:</p>
<ol>
<li>Find the proxy indexed in Google with your content</li>
<li>Find the Reverse DNS using <a href="http://www.dnsstuff.com/" target="_blank" title="DNS Stuff">DNS Stuff</a> to determine the IP we generally block the Name services and the IP by C Class (XXX.XXX.C-Class.XXX). If the IP does not work, try our <a href="http://www.seoimage.com/seo-tools/check-redirect-and-server-header.html" title="Server<br />
 Header Checker">Server Header Checker Tool</a>.</li>
<li>Using your .htaccess file or IIS Administration deny access to the IP ranges by the C Class of the IP.</li>
<li>Click the link in the Google Search Results and see if it returns a 403 Forbidden Code.</li>
</ol>
<p>This is where it gets tricky, if you get the 403 code, then the site will no longer be duplicating you, however, if the site uses a frameset or iframe, then you will NOT be able to use the Google URL Removal Tool as it will see a 200 &ldquo;Found&rdquo; header directive and assume the page still exists.</p>
<p>Use the URL Removal Tool and check off &ldquo;anything associated with this domain&rdquo;. If the site does not use frames then you will get it removed, if it does have frames then google gets a 200 code and will NOT remove the site despite the frame. You can try to access the frame and submit that page, but it generally will not help.</p>
<p>All in all, the ability of proxy servers to hurt rankings is unknown. We believe it will effect some of the sites rankings, but may not be the full story. Another issue of proxy servers, is that they can 302 hijack sites if they are set poorly.</p>
<p>We have not found any code that can ban proxy servers even ones that use nph-proxy.cgi.</p>
<p>&nbsp;</p>
<h3>How To Fight Back &mdash; Code implementations</h3>
<p><a href="http://www.seoegghead.com/blog/simplecloak-v2-php-implementation">Seo-Egg<br />
	</a></p>
<p>Well that&#39;s where I come in. I have 2 implementations in beta (read: they work according to my tests, but I&#39;m going to be testing more) that address the problem based on the methods the search engines cite. Then, essentially, we&#39;re using a benign form of cloaking (yes, cloaking!) to make it more difficult for bad bots, proxies, etc. to exploit us.</p>
<h4>&nbsp;</h4>
<p>I&#39;ll expand the explanation in that documentation to make it easier to comprehend/install. <i>But if you know PHP, dive right in.</i></p>
<p>The code and concepts were primarily based off on the book I coauthored, &quot;<a href="http://www.amazon.com/Professional-Search-Engine-Optimization-PHP/dp/0470100923/">Search Engine Optimization with PHP</a>.&quot; It is my sentiment that most SEOs have to be aware of technology more so than they think &mdash; hence the book authored by me and co-author <a href="http://www.cristiandarie.ro">Cristian Darie</a>. This is just one example.</p>
<blockquote>
<p>&nbsp;</p>
<h3>Note: I didn&#39;t realize WP changes quotes to curly quotes to look &quot;pretty&quot; since version 2.1. I turned that feature off. Cut and pasting should work.</h3>
<p><b>Below is the main class necessary for the cloaking functionality, &quot;SimpleCloakV2:&quot;</b></p>
<p>
		<!-- BEGIN_PHP_HIGHLIGHT --><code><span><span>&lt;?php</span></span></code></p>
<p>$__metaRobotsExcludeProxiesCallbackHTML&nbsp;<span>=&nbsp;</span><span>&quot;</span><span>;</span></p>
<p><span>/*<br />
		//&nbsp;+<span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span>+<br />
		//&nbsp;|&nbsp;SimpleCloakV2&nbsp;Version&nbsp;2&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|<br />
		//&nbsp;|&nbsp;Class&nbsp;for&nbsp;cloaking&nbsp;content&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|<br />
		//&nbsp;|&nbsp;http://www.SEOEgghead.com&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|<br />
		//&nbsp;+<span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span>+<br />
		//&nbsp;|&nbsp;Copyright&nbsp;(c)&nbsp;2005-2006&nbsp;Jaimie&nbsp;Sirovich&nbsp;and&nbsp;Cristian&nbsp;Darie&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|<br />
		//&nbsp;+<span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span><span>-</span>+<br />
		*/</span></p>
<p>//&nbsp;load&nbsp;configuration&nbsp;file<br />
		<span>require_once(</span><span>&#39;config.inc.php&#39;</span><span>);</span></p>
<p>class&nbsp;<span>SimpleCloakV2<br />
		</span><span>{&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;function&nbsp;</span><span>_connect</span><span>()<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>USE_CUSTOM_CONNECT_CODE</span><span>)&nbsp;return&nbsp;</span><span>true</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;Connect&nbsp;to&nbsp;MySQL&nbsp;server<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>mysql_connect</span><span>(</span><span>DB_HOST</span><span>,&nbsp;</span><span>DB_USER</span><span>,&nbsp;</span><span>DB_PASSWORD</span><span>)<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;or&nbsp;die(</span><span>&quot;Could&nbsp;not&nbsp;connect:&nbsp;&quot;&nbsp;</span><span>.&nbsp;</span><span>mysql_error</span><span>());</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;Connect&nbsp;to&nbsp;the&nbsp;seophp&nbsp;database&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_select_db</span><span>(</span><span>DB_DATABASE</span><span>)&nbsp;or&nbsp;die(</span><span>&quot;Could&nbsp;not&nbsp;select&nbsp;database&quot;</span><span>);&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>$dbLink</span><span>;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;function&nbsp;</span><span>_close</span><span>(</span><span>$dbLink</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>USE_CUSTOM_CONNECT_CODE</span><span>)&nbsp;return&nbsp;</span><span>true</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;close&nbsp;database&nbsp;connection&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;returns&nbsp;the&nbsp;confidence&nbsp;level<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>isSpider</span><span>(</span><span>$spider_name&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$check_uas&nbsp;</span><span>=&nbsp;</span><span>true</span><span>,&nbsp;</span><span>$check_ips&nbsp;</span><span>=&nbsp;</span><span>true</span><span>,&nbsp;</span><span>$use_user_defined_data&nbsp;</span><span>=&nbsp;</span><span>true</span><span>,&nbsp;</span><span>$ignore_bad_uas&nbsp;</span><span>=&nbsp;</span><span>true</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;default&nbsp;confidence&nbsp;level&nbsp;to&nbsp;0<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$confidence&nbsp;</span><span>=&nbsp;</span><span>0</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;matching&nbsp;user&nbsp;agent?&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$check_uas</span><span>)<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>_get</span><span>(</span><span>0</span><span>,&nbsp;</span><span>$spider_name</span><span>,&nbsp;</span><span>&#39;UA&#39;</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;HTTP_USER_AGENT&#39;</span><span>],&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$use_user_defined_data&nbsp;</span><span>?&nbsp;</span><span>&quot;&nbsp;</span><span>:&nbsp;</span><span>&#39;N&#39;</span><span>,&nbsp;</span><span>$ignore_bad_uas&nbsp;</span><span>?&nbsp;</span><span>&#39;bad&#39;&nbsp;</span><span>:&nbsp;</span><span>&quot;</span><span>))&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$confidence&nbsp;</span><span>+=&nbsp;</span><span>2</span><span>;<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;matching&nbsp;IP?&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$check_ips</span><span>)<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>_get</span><span>(</span><span>0</span><span>,&nbsp;</span><span>$spider_name</span><span>,&nbsp;</span><span>&#39;IP&#39;</span><span>,&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>],&nbsp;</span><span>$use_user_defined_data&nbsp;</span><span>?&nbsp;</span><span>&quot;&nbsp;</span><span>:&nbsp;</span><span>&#39;N&#39;</span><span>,&nbsp;</span><span>$ignore_bad_uas&nbsp;</span><span>?&nbsp;</span><span>&#39;bad&#39;&nbsp;</span><span>:&nbsp;</span><span>&quot;</span><span>))&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$confidence&nbsp;</span><span>+=&nbsp;</span><span>3</span><span>;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;return&nbsp;confidence&nbsp;level<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>$confidence</span><span>;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;retrieve&nbsp;cloaking&nbsp;data&nbsp;filtered&nbsp;by&nbsp;the&nbsp;supplied&nbsp;parameters<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>_get</span><span>(</span><span>$id&nbsp;</span><span>=&nbsp;</span><span>0</span><span>,&nbsp;</span><span>$spider_name&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$record_type&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$value&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$wildcard_value&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$is_user_defined_data&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>,&nbsp;</span><span>$not_spider_name&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;by&nbsp;default,&nbsp;retrieve&nbsp;all&nbsp;records<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>=&nbsp;</span><span>&quot;&nbsp;SELECT&nbsp;cloak_data.*&nbsp;FROM&nbsp;cloak_data&nbsp;WHERE&nbsp;TRUE&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;add&nbsp;filters<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$id</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$id&nbsp;</span><span>=&nbsp;(int)&nbsp;</span><span>$id</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;id&nbsp;=&nbsp;$id&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$spider_name</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$spider_name&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$spider_name</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;spider_name&nbsp;=&nbsp;&#39;$spider_name&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$record_type</span><span>)&nbsp;{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$record_type&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$record_type</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;record_type&nbsp;=&nbsp;&#39;$record_type&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$value</span><span>)&nbsp;{&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$value&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$value</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;value&nbsp;=&nbsp;&#39;$value&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$wildcard_value</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$wildcard_value&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$wildcard_value</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;(&nbsp;&#39;$wildcard_value&#39;&nbsp;=&nbsp;value&nbsp;OR&nbsp;&#39;$wildcard_value&#39;&nbsp;LIKE&nbsp;CONCAT(value,&nbsp;&#39;.%&#39;)&nbsp;)&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$is_user_defined_data</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$is_user_defined_data&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$is_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;is_user_defined_data&nbsp;=&nbsp;&#39;$is_user_defined_data&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$not_spider_name</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$not_spider_name&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$not_spider_name</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;spider_name&nbsp;&lt;&gt;&nbsp;&#39;$not_spider_name&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_connect</span><span>();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;execute&nbsp;the&nbsp;query<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$tmp&nbsp;</span><span>=&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;return&nbsp;the&nbsp;results&nbsp;as&nbsp;an&nbsp;associative&nbsp;array<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$rows&nbsp;</span><span>=&nbsp;array();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;while&nbsp;(</span><span>$_x&nbsp;</span><span>=&nbsp;</span><span>mysql_fetch_assoc</span><span>(</span><span>$tmp</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$rows</span><span>[]&nbsp;=&nbsp;</span><span>$_x</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>$rows</span><span>;<br />
		&nbsp;&nbsp;}&nbsp;&nbsp;<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;updates&nbsp;the&nbsp;entire&nbsp;database&nbsp;with&nbsp;fresh&nbsp;spider&nbsp;data,&nbsp;but&nbsp;only&nbsp;if&nbsp;our&nbsp;data&nbsp;is<br />
		&nbsp;&nbsp;//&nbsp;more&nbsp;than&nbsp;7&nbsp;days&nbsp;old,&nbsp;and&nbsp;if&nbsp;the&nbsp;online&nbsp;version&nbsp;from&nbsp;iplists.org&nbsp;has&nbsp;changed<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>updateAll</span><span>(</span><span>$delete_user_defined_data&nbsp;</span><span>=&nbsp;</span><span>false</span><span>)<br />
		&nbsp;&nbsp;{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_connect</span><span>();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;retrieve&nbsp;last&nbsp;update&nbsp;information&nbsp;from&nbsp;database<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>=&nbsp;</span><span>&quot;SELECT&nbsp;cloak_update.*&nbsp;FROM&nbsp;cloak_update&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$tmp&nbsp;</span><span>=&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$updated&nbsp;</span><span>=&nbsp;</span><span>mysql_fetch_assoc</span><span>(</span><span>$tmp</span><span>);&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$db_version&nbsp;</span><span>=&nbsp;</span><span>$updated</span><span>[</span><span>&#39;version&#39;</span><span>];&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$updated_on&nbsp;</span><span>=&nbsp;</span><span>$updated&nbsp;</span><span>[</span><span>&#39;updated_on&#39;</span><span>];<br />
		&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;get&nbsp;the&nbsp;latest&nbsp;update&nbsp;more&nbsp;recent&nbsp;than&nbsp;7&nbsp;days,&nbsp;don&#39;t&nbsp;attempt&nbsp;an&nbsp;update&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(isset(</span><span>$updated_on</span><span>)&nbsp;&amp;&amp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(</span><span>strtotime</span><span>(</span><span>$updated_on</span><span>)&nbsp;&gt;&nbsp;</span><span>strtotime</span><span>(</span><span>&quot;-604800&nbsp;seconds&quot;</span><span>)))&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;close&nbsp;database&nbsp;connection&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;return&nbsp;false&nbsp;to&nbsp;indicate&nbsp;an&nbsp;update&nbsp;wasn&#39;t&nbsp;performed<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;read&nbsp;the&nbsp;latest&nbsp;iplists&nbsp;version&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$version_url&nbsp;</span><span>=&nbsp;</span><span>&#39;http://www.iplists.com/nw/version.php&#39;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ch&nbsp;</span><span>=&nbsp;</span><span>curl_init</span><span>();&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_URL</span><span>,&nbsp;</span><span>$version_url</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_RETURNTRANSFER</span><span>,&nbsp;</span><span>1</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_TIMEOUT</span><span>,&nbsp;</span><span>60</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$latest_version&nbsp;</span><span>=&nbsp;</span><span>curl_exec</span><span>(</span><span>$ch</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_close</span><span>(</span><span>$ch</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$latest_version&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$latest_version</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;if&nbsp;no&nbsp;updated&nbsp;version&nbsp;information&nbsp;was&nbsp;retrieved,&nbsp;abort<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(!</span><span>$latest_version</span><span>)&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;return&nbsp;false&nbsp;to&nbsp;indicate&nbsp;an&nbsp;update&nbsp;wasn&#39;t&nbsp;performed<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;save&nbsp;the&nbsp;update&nbsp;data<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>=&nbsp;</span><span>&quot;DELETE&nbsp;FROM&nbsp;cloak_update&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>=&nbsp;</span><span>&quot;INSERT&nbsp;INTO&nbsp;cloak_update&nbsp;(version,&nbsp;updated_on)&nbsp;&quot;&nbsp;</span><span>.&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&quot;VALUES(&#39;$latest_version&#39;,&nbsp;NOW())&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;we&nbsp;already&nbsp;have&nbsp;the&nbsp;current&nbsp;data,&nbsp;don&#39;t&nbsp;attempt&nbsp;an&nbsp;update&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$latest_version&nbsp;</span><span>==&nbsp;</span><span>$db_version</span><span>)&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;close&nbsp;database&nbsp;connection&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;return&nbsp;false&nbsp;to&nbsp;indicate&nbsp;an&nbsp;update&nbsp;wasn&#39;t&nbsp;performed<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;update&nbsp;the&nbsp;database<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;google&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/google.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;yahoo&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/inktomi.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;msn&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/msn.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;ask&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/askjeeves.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;altavista&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/altavista.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;lycos&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/lycos.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_updateCloakingDB</span><span>(</span><span>&#39;wisenut&#39;</span><span>,&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&#39;http://www.iplists.com/nw/wisenut.txt&#39;</span><span>,&nbsp;</span><span>$delete_user_defined_data</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;close&nbsp;connection&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;return&nbsp;true&nbsp;to&nbsp;indicate&nbsp;a&nbsp;successful&nbsp;update<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>true</span><span>;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;update&nbsp;the&nbsp;database&nbsp;for&nbsp;the&nbsp;mentioned&nbsp;spider,&nbsp;by&nbsp;reading&nbsp;the&nbsp;provided&nbsp;URL<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>_updateCloakingDB</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>$url</span><span>,&nbsp;</span><span>$delete_user_defined_data&nbsp;</span><span>=&nbsp;</span><span>false</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ua_regex&nbsp;</span><span>=&nbsp;</span><span>&#39;/^#&nbsp;UA&nbsp;&quot;(.*)&quot;$/m&#39;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ip_regex&nbsp;</span><span>=&nbsp;</span><span>&#39;/^([0-9.]+)$/m&#39;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;use&nbsp;cURL&nbsp;to&nbsp;read&nbsp;the&nbsp;data&nbsp;from&nbsp;$url<br />
		&nbsp;&nbsp;&nbsp;&nbsp;//&nbsp;NOTE:&nbsp;additional&nbsp;settings&nbsp;are&nbsp;required&nbsp;when&nbsp;accessing&nbsp;the&nbsp;web&nbsp;through&nbsp;a&nbsp;proxy<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ch&nbsp;</span><span>=&nbsp;</span><span>curl_init</span><span>();&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_URL</span><span>,&nbsp;</span><span>$url</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_HEADER</span><span>,&nbsp;</span><span>1</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_RETURNTRANSFER</span><span>,&nbsp;</span><span>1</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_FOLLOWLOCATION</span><span>,&nbsp;</span><span>1</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_TIMEOUT</span><span>,&nbsp;</span><span>60</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$result&nbsp;</span><span>=&nbsp;</span><span>curl_exec</span><span>(</span><span>$ch</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_close</span><span>(</span><span>$ch</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;use&nbsp;_parseListURL&nbsp;to&nbsp;parse&nbsp;the&nbsp;list&nbsp;of&nbsp;IPs&nbsp;and&nbsp;user&nbsp;agents<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$lists&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_parseListURL</span><span>(</span><span>$result</span><span>,&nbsp;</span><span>$ua_regex</span><span>,&nbsp;</span><span>$ip_regex</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;the&nbsp;user&nbsp;agents&nbsp;and&nbsp;IPs&nbsp;weren&#39;t&nbsp;retrieved,&nbsp;we&nbsp;cancel&nbsp;the&nbsp;update&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(!</span><span>$lists</span><span>[</span><span>&#39;ua_list&#39;</span><span>]&nbsp;||&nbsp;!</span><span>$lists</span><span>[</span><span>&#39;ip_list&#39;</span><span>])&nbsp;return;</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;lock&nbsp;the&nbsp;cloack_data&nbsp;table&nbsp;to&nbsp;avoid&nbsp;concurrency&nbsp;problems<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>&#39;LOCK&nbsp;TABLES&nbsp;cloak_data&nbsp;WRITE&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;delete&nbsp;all&nbsp;the&nbsp;existing&nbsp;data&nbsp;for&nbsp;$spider_name<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_deleteSpiderData</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>$delete_user_defined_data&nbsp;</span><span>?&nbsp;</span><span>&quot;&nbsp;</span><span>:&nbsp;</span><span>&#39;N&#39;</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;insert&nbsp;the&nbsp;list&nbsp;of&nbsp;user&nbsp;agents&nbsp;for&nbsp;the&nbsp;spider<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>foreach&nbsp;(</span><span>$lists</span><span>[</span><span>&#39;ua_list&#39;</span><span>]&nbsp;as&nbsp;</span><span>$ua</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_insertSpiderData</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>&#39;UA&#39;</span><span>,&nbsp;</span><span>$ua</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;insert&nbsp;the&nbsp;list&nbsp;of&nbsp;IPs&nbsp;for&nbsp;the&nbsp;spider<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>foreach&nbsp;(</span><span>$lists</span><span>[</span><span>&#39;ip_list&#39;</span><span>]&nbsp;as&nbsp;</span><span>$ip</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_insertSpiderData</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>&#39;IP&#39;</span><span>,&nbsp;</span><span>$ip</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;release&nbsp;the&nbsp;table&nbsp;lock<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>&#39;UNLOCK&nbsp;TABLES&#39;</span><span>);<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;helper&nbsp;function&nbsp;used&nbsp;to&nbsp;parse&nbsp;lists&nbsp;of&nbsp;user&nbsp;agents&nbsp;and&nbsp;IPs<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>_parseListURL</span><span>(</span><span>$data</span><span>,&nbsp;</span><span>$ua_regex</span><span>,&nbsp;</span><span>$ip_regex</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ua_list_ret&nbsp;</span><span>=&nbsp;</span><span>preg_match_all</span><span>(</span><span>$ua_regex</span><span>,&nbsp;</span><span>$data</span><span>,&nbsp;</span><span>$ua_list</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ip_list_ret&nbsp;</span><span>=&nbsp;</span><span>preg_match_all</span><span>(</span><span>$ip_regex</span><span>,&nbsp;</span><span>$data</span><span>,&nbsp;</span><span>$ip_list</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;array(</span><span>&#39;ua_list&#39;&nbsp;</span><span>=&gt;&nbsp;</span><span>$ua_list</span><span>[</span><span>1</span><span>],&nbsp;</span><span>&#39;ip_list&#39;&nbsp;</span><span>=&gt;&nbsp;</span><span>$ip_list</span><span>[</span><span>1</span><span>]);&nbsp;&nbsp;<br />
		&nbsp;&nbsp;}</span></p>
<p>&nbsp;&nbsp;<span>//&nbsp;inserts&nbsp;a&nbsp;new&nbsp;row&nbsp;of&nbsp;data&nbsp;to&nbsp;the&nbsp;cloaking&nbsp;table<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>_insertSpiderData</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>$record_type</span><span>,&nbsp;</span><span>$value</span><span>,&nbsp;</span><span>$is_user_defined&nbsp;</span><span>=&nbsp;</span><span>&#39;N&#39;</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;escape&nbsp;input&nbsp;data<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$spider_name&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$spider_name</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$record_type&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$record_type</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$value&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$value</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$is_user_defined&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$is_user_defined</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;build&nbsp;and&nbsp;execute&nbsp;the&nbsp;INSERT&nbsp;query<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;&nbsp;</span><span>=&nbsp;</span><span>&quot;INSERT&nbsp;INTO&nbsp;cloak_data&nbsp;(spider_name,&nbsp;record_type,&nbsp;value,&nbsp;is_user_defined)&nbsp;&quot;&nbsp;</span><span>.&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>&quot;VALUES&nbsp;(&#39;$spider_name&#39;,&nbsp;&#39;$record_type&#39;,&nbsp;&#39;$value&#39;,&nbsp;&#39;$is_user_defined&#39;)&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;delete&nbsp;the&nbsp;cloaking&nbsp;data&nbsp;for&nbsp;the&nbsp;mentioned&nbsp;spider<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>_deleteSpiderData</span><span>(</span><span>$spider_name</span><span>,&nbsp;</span><span>$is_user_defined&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>)<br />
		&nbsp;&nbsp;{&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;escape&nbsp;input&nbsp;data<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$spider_name&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$spider_name</span><span>);</span></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>//&nbsp;build&nbsp;and&nbsp;execute&nbsp;the&nbsp;DELETE&nbsp;query<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>=&nbsp;</span><span>&quot;DELETE&nbsp;FROM&nbsp;cloak_data&nbsp;WHERE&nbsp;spider_name=&#39;$spider_name&#39;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$is_user_defined</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$is_user_defined&nbsp;</span><span>=&nbsp;</span><span>mysql_escape_string</span><span>(</span><span>$is_user_defined</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$q&nbsp;</span><span>.=&nbsp;</span><span>&quot;&nbsp;AND&nbsp;is_user_defined&nbsp;=&nbsp;&#39;$is_user_defined&#39;&nbsp;&quot;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>mysql_query</span><span>(</span><span>$q</span><span>);&nbsp;&nbsp;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;</span><span>//&nbsp;only&nbsp;use&nbsp;if&nbsp;it&#39;s&nbsp;not&nbsp;found&nbsp;via&nbsp;the&nbsp;IPLists&nbsp;cloaking&nbsp;database<br />
		&nbsp;&nbsp;</span><span>function&nbsp;</span><span>botVerifyByDNS</span><span>(</span><span>$ua&nbsp;</span><span>=&nbsp;array(</span><span>&#39;google&#39;</span><span>,&nbsp;</span><span>&#39;#.*\.googlebot\.com$#&#39;</span><span>))<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;check&nbsp;cache&nbsp;of&nbsp;bad&nbsp;bots<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>isSpider</span><span>(</span><span>&#39;bad&#39;</span><span>,&nbsp;</span><span>false</span><span>,&nbsp;</span><span>true</span><span>,&nbsp;</span><span>true</span><span>,&nbsp;</span><span>false</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;check&nbsp;only&nbsp;UA&nbsp;since&nbsp;this&nbsp;function&nbsp;is&nbsp;only&nbsp;called&nbsp;if&nbsp;the&nbsp;cloaking&nbsp;DB&nbsp;doesn&#39;t&nbsp;handle&nbsp;it<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>isSpider</span><span>(</span><span>$ua</span><span>[</span><span>0</span><span>],&nbsp;</span><span>true</span><span>,&nbsp;</span><span>false</span><span>))&nbsp;{&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;reverse&nbsp;lookup<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$host_name&nbsp;</span><span>=&nbsp;</span><span>gethostbyaddr</span><span>(</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>]);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&nbsp;says&nbsp;it&#39;s&nbsp;a&nbsp;certain&nbsp;UA&nbsp;but&nbsp;gethostbyaddr&nbsp;the&nbsp;corresponding&nbsp;domain&nbsp;regex,&nbsp;store&nbsp;it&nbsp;and&nbsp;then&nbsp;abort<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(!</span><span>preg_match</span><span>(</span><span>$ua</span><span>[</span><span>1</span><span>],&nbsp;</span><span>$host_name</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_connect</span><span>();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_insertSpiderData</span><span>(</span><span>&#39;bad&#39;</span><span>,&nbsp;</span><span>&#39;IP&#39;</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>],&nbsp;</span><span>&#39;Y&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$connected_ip_address&nbsp;</span><span>=&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>];<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$host_name_ip_address&nbsp;</span><span>=&nbsp;</span><span>gethostbyname</span><span>(</span><span>$host_name</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;the&nbsp;connected&nbsp;IP&nbsp;matches&nbsp;the&nbsp;authoritative&nbsp;IP,&nbsp;we&nbsp;have&nbsp;a&nbsp;match<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$connected_ip_address&nbsp;</span><span>==&nbsp;</span><span>$host_name_ip_address</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_connect</span><span>();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_insertSpiderData</span><span>(</span><span>$ua</span><span>[</span><span>0</span><span>],&nbsp;</span><span>&#39;IP&#39;</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>],&nbsp;</span><span>&#39;Y&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>true</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}&nbsp;else&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&nbsp;says&nbsp;it&#39;s&nbsp;a&nbsp;certain&nbsp;UA,&nbsp;gethostbyaddr&nbsp;says&nbsp;the&nbsp;right&nbsp;thing,&nbsp;but&nbsp;gethostbyname&nbsp;does&nbsp;not<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$dbLink&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_connect</span><span>();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_insertSpiderData</span><span>(</span><span>&#39;bad&#39;</span><span>,&nbsp;</span><span>&#39;IP&#39;</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REMOTE_ADDR&#39;</span><span>],&nbsp;</span><span>&#39;Y&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>_close</span><span>(</span><span>$dbLink</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;it&nbsp;does&nbsp;not&nbsp;even&nbsp;say&nbsp;it&#39;s&nbsp;a&nbsp;bot&nbsp;via&nbsp;UA<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;function&nbsp;</span><span>_addMetaRobotsExcludeProxiesCallback</span><span>(</span><span>$buffer</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;global&nbsp;</span><span>$__metaRobotsExcludeProxiesCallbackHTML</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>preg_replace</span><span>(</span><span>&#39;#&lt;/title&gt;#&#39;</span><span>,&nbsp;</span><span>&#39;&lt;/title&gt;&#39;&nbsp;</span><span>.&nbsp;</span><span>$__metaRobotsExcludeProxiesCallbackHTML</span><span>,&nbsp;</span><span>$buffer</span><span>);<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;<br />
		&nbsp;&nbsp;function&nbsp;</span><span>metaRobotsExcludeProxies</span><span>(</span><span>$auto_modify_content&nbsp;</span><span>=&nbsp;</span><span>true</span><span>,&nbsp;</span><span>$uas&nbsp;</span><span>=&nbsp;array(array(</span><span>&#39;google&#39;</span><span>,&nbsp;</span><span>&#39;#.*\.googlebot\.com$#&#39;</span><span>),&nbsp;array(</span><span>&#39;yahoo&#39;</span><span>,&nbsp;</span><span>&#39;#.*\.yahoo\.net$#&#39;</span><span>),&nbsp;array(</span><span>&#39;msn&#39;</span><span>,&nbsp;</span><span>&#39;#.*\.live\.com$#&#39;</span><span>),&nbsp;array(</span><span>&#39;ask&#39;</span><span>,&nbsp;</span><span>&#39;#.*\.ask.com$#&#39;</span><span>)&nbsp;),&nbsp;</span><span>$meta_tag&nbsp;</span><span>=&nbsp;</span><span>&#39;&lt;meta&nbsp;name=&quot;robots&quot;&nbsp;content=&quot;noindex,nofollow&quot;&nbsp;/&gt;&#39;</span><span>,&nbsp;</span><span>$passlist_regex&nbsp;</span><span>=&nbsp;</span><span>&quot;</span><span>)<br />
		&nbsp;&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;global&nbsp;</span><span>$__metaRobotsExcludeProxiesCallbackHTML</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$meta_tag</span><span>)<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$__metaRobotsExcludeProxiesCallbackHTML&nbsp;</span><span>=&nbsp;</span><span>$meta_tag</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&#39;s&nbsp;on&nbsp;our&nbsp;passlist&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;//&nbsp;ex:&nbsp;#become|lycos|somestupidbot#<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$passlist_regex</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>preg_match</span><span>(</span><span>$passlist_regex</span><span>,&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;HTTP_USER_AGENT&#39;</span><span>]))&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;foreach&nbsp;(</span><span>$uas&nbsp;</span><span>as&nbsp;</span><span>$u</span><span>)&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&#39;s&nbsp;a&nbsp;bot&nbsp;according&nbsp;to&nbsp;UA,&nbsp;then&nbsp;start&nbsp;to&nbsp;investigate<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>isSpider</span><span>(</span><span>$u</span><span>[</span><span>0</span><span>],&nbsp;</span><span>true</span><span>,&nbsp;</span><span>false</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&#39;s&nbsp;a&nbsp;bot&nbsp;according&nbsp;to&nbsp;IPLists&nbsp;or&nbsp;our&nbsp;user-defined&nbsp;list<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>isSpider</span><span>(</span><span>$u</span><span>[</span><span>0</span><span>],&nbsp;</span><span>false</span><span>,&nbsp;</span><span>true</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&#39;s&nbsp;a&nbsp;bot&nbsp;according&nbsp;to&nbsp;DNS<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>}&nbsp;else&nbsp;if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>botVerifyByDNS</span><span>(</span><span>$u</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>false</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;if&nbsp;it&#39;s&nbsp;not<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>}&nbsp;else&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;(</span><span>$auto_modify_content</span><span>)&nbsp;</span><span>ob_start</span><span>(array(</span><span>&#39;SimpleCloakV2&prime;</span><span>,&nbsp;</span><span>&#39;_addMetaRobotsExcludeProxiesCallback&#39;</span><span>));<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>true</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;it&#39;s&nbsp;not&nbsp;a&nbsp;bot&nbsp;according&nbsp;to&nbsp;UA<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>$auto_modify_content</span><span>)&nbsp;</span><span>ob_start</span><span>(array(</span><span>&#39;SimpleCloakV2&prime;</span><span>,&nbsp;</span><span>&#39;_addMetaRobotsExcludeProxiesCallback&#39;</span><span>));<br />
		&nbsp;&nbsp;&nbsp;&nbsp;return&nbsp;</span><span>true&nbsp;</span><span>+&nbsp;</span><span>1</span><span>;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
		&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;<br />
		}<br />
		</span><span>?&gt;</span><!-- END_PHP_HIGHLIGHT --></p>
<p><b>Save this file as &quot;simple_cloak_v2.php.&quot;</b></p>
<p><b>You will also need the configuration file (it is referenced in &quot;simple_cloak_v2.php&quot;):</b></p>
<p><!-- BEGIN_PHP_HIGHLIGHT --><code><span><span>&lt;?php<br />
		</span><span>//&nbsp;defines&nbsp;database&nbsp;connection&nbsp;data<br />
		//&nbsp;set&nbsp;to&nbsp;&quot;1&Prime; if&nbsp;you&nbsp;are&nbsp;already&nbsp;connected&nbsp;in&nbsp;your&nbsp;application.<br />
		</span><span>define</span><span>(</span><span>&quot;USE_CUSTOM_CONNECT_CODE&quot;</span><span>,&nbsp;</span><span>0</span><span>);<br />
		</span><span>//&nbsp;usually&nbsp;localhost<br />
		</span><span>define</span><span>(</span><span>&quot;DB_HOST&quot;</span><span>,&nbsp;</span><span>&quot;your_db_host&quot;</span><span>);<br />
		</span><span>//&nbsp;db&nbsp;user&nbsp;<br />
		</span><span>define</span><span>(</span><span>&quot;DB_USER&quot;</span><span>,&nbsp;</span><span>&quot;some_user&quot;</span><span>);<br />
		</span><span>//&nbsp;password<br />
		</span><span>define</span><span>(</span><span>&quot;DB_PASSWORD&quot;</span><span>,&nbsp;</span><span>&quot;secret&quot;</span><span>);<br />
		</span><span>//db&nbsp;name<br />
		</span><span>define</span><span>(</span><span>&quot;DB_DATABASE&quot;</span><span>,&nbsp;</span><span>&quot;your_db&quot;</span><span>);<br />
		</span><span>?&gt;</span></span></code><!-- END_PHP_HIGHLIGHT --></p>
<p><b>Save this as &quot;config.inc.php.&quot;</b></p>
<h2>Then, to implement:</h2>
<h3>Use this SQL to create the database tables needed for the SimpleCloakV2 class</h3>
<p>Run the following queries in your mySQL database (using the mysql binary or phpmysqladmin):</p>
<p>CREATE TABLE `cloak_data` (<br />
		`id` int(11) NOT NULL auto_increment,<br />
		`spider_name` varchar(255) NOT NULL default &quot;,<br />
		`record_type` enum(&#39;UA&#39;,&#39;IP&#39;) NOT NULL default &#39;UA&#39;,<br />
		`value` varchar(255) NOT NULL default &quot;,<br />
		`is_user_defined` enum(&#39;N&#39;,&#39;Y&#39;) NOT NULL default &#39;N&#39;,<br />
		PRIMARY KEY (`id`),<br />
		KEY `value` (`value`)<br />
		) ENGINE=MyISAM DEFAULT CHARSET=latin1;</p>
<p>CREATE TABLE `cloak_update` (<br />
		`version` varchar(255) NOT NULL default &quot;,<br />
		`updated_on` datetime NOT NULL default &#39;0000-00-00 00:00:00&prime;<br />
		) ENGINE=MyISAM DEFAULT CHARSET=latin1;</p>
<p><i><b>Only if</b></i> you already have a &quot;cloak_data&quot; table (from our book or a previous version of SimpleCloak on the blog), run this SQL:</p>
<p>ALTER TABLE cloak_data ADD `is_user_defined` ENUM(&#39;N&#39;,&#39;Y&#39;) NOT NULL;</p>
<h3>Populate the Cloaking database with the data from <a href="http://www.iplists.com">IPLists.com</a></h3>
<p><b>Note:</b> This should be run periodically from a cron job to keep the data updated. It will update only once a week regardless. However, you may also put it in the footer of an application.</p>
<p><!-- BEGIN_PHP_HIGHLIGHT --><code><span><span>&lt;?php</span></span></code></p>
<p><span>//&nbsp;load&nbsp;the&nbsp;SimpleCloakV2&nbsp;library<br />
		</span><span>require_once&nbsp;</span><span>&#39;simple_cloak_v2.inc.php&#39;</span><span>;</span></p>
<p><span>//&nbsp;update&nbsp;cloaking&nbsp;data&nbsp;and&nbsp;indicate&nbsp;the&nbsp;success&nbsp;status<br />
		</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>updateAll</span><span>())<br />
		{<br />
		&nbsp;&nbsp;echo&nbsp;</span><span>&quot;Cloaking&nbsp;database&nbsp;updated!&quot;</span><span>;<br />
		}<br />
		else<br />
		{<br />
		&nbsp;&nbsp;echo&nbsp;</span><span>&quot;Cloaking&nbsp;database&nbsp;was&nbsp;already&nbsp;up&nbsp;to&nbsp;date,&nbsp;or&nbsp;the&nbsp;update&nbsp;failed.&quot;</span><span>;<br />
		}</span></p>
<p><span>?&gt;</span><!-- END_PHP_HIGHLIGHT --></p>
<h2>Then pick *1* of the following methods.</h2>
<p><b>Note:</b> Method #2 is a bit of a kludge, as the RewriteMap directive of Apache cannot be used in .htaccess. *It has <i>not</i> been tested extensively yet!*</p>
<h3>METHOD NUMBER 1 &mdash; PHP Implementation</h3>
<p>Place this code at the top of your application (or relevant parts thereof):</p>
<p><!-- BEGIN_PHP_HIGHLIGHT --><code><span><span>&lt;?<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>include_once(</span><span>&#39;simple_cloak_v2.inc.php&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$_x&nbsp;</span><span>=&nbsp;</span><span>SimpleCloakV2</span><span>::</span><span>metaRobotsExcludeProxies</span><span>();<br />
		</span><span>?&gt;</span></span></code><!-- END_PHP_HIGHLIGHT --></p>
<p>The code automatically inserts the meta tag using PHP output buffering. If you want a more custom/efficient solution, that is also possible. See the first parameter of function &quot;metaRobotsExcludeProxies.&quot; <b>Set to false, it will not use the output buffering, and you may use the result to effect changes in your application as desired.</b></p>
<h3>METHOD NUMBER 2 &mdash; .htaccess Implementation</h3>
<p><b>Place this in your .htaccess file</b></p>
<p>RewriteEngine On<br />
		RewriteCond %{HTTP_USER_AGENT} yahoo|slurp|msn|ask|google|gsa [NC]<br />
		RewriteRule (^.*$) proxy.php?orig_url=$1</p>
<p><b>And this is the code for proxy.php:</b></p>
<p><!-- BEGIN_PHP_HIGHLIGHT --><code><span><span>&lt;?</span></span></code></p>
<p>&nbsp;&nbsp;&nbsp;&nbsp;<span>include&nbsp;(</span><span>&#39;simple_cloak_v2.inc.php&#39;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;should&nbsp;we&nbsp;deny&nbsp;access?<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>if&nbsp;(</span><span>SimpleCloakV2</span><span>::</span><span>metaRobotsExcludeProxies</span><span>(</span><span>false</span><span>))&nbsp;{<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>header</span><span>(</span><span>&quot;HTTP/1.0&nbsp;403&nbsp;Forbidden&quot;</span><span>);<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;echo&nbsp;</span><span>&#39;forbidden&nbsp;&hellip;&nbsp;&#39;</span><span>;<br />
		&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;exit();<br />
		&nbsp;&nbsp;&nbsp;&nbsp;}<br />
		&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;otherwise&nbsp;echo&nbsp;as&nbsp;it&nbsp;was&nbsp;&hellip;</span></p>
<p>	&nbsp;&nbsp;&nbsp;&nbsp;//&nbsp;construct&nbsp;the&nbsp;original&nbsp;URL<br />
	&nbsp;&nbsp;&nbsp;&nbsp;<span>$url&nbsp;</span><span>=&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;SERVER_NAME&#39;</span><span>]&nbsp;.&nbsp;</span><span>&#39;/&#39;&nbsp;</span><span>.&nbsp;</span><span>$_SERVER</span><span>[</span><span>&#39;REQUEST_URI&#39;</span><span>];<br />
	&nbsp;&nbsp;&nbsp;&nbsp;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;get&nbsp;the&nbsp;contents<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$ch&nbsp;</span><span>=&nbsp;</span><span>curl_init</span><span>();&nbsp;&nbsp;&nbsp;&nbsp;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_URL</span><span>,&nbsp;</span><span>$url</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_HEADER</span><span>,&nbsp;</span><span>1</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_setopt&nbsp;</span><span>(</span><span>$ch</span><span>,&nbsp;</span><span>CURLOPT_RETURNTRANSFER</span><span>,&nbsp;</span><span>1</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$result&nbsp;</span><span>=&nbsp;</span><span>curl_exec</span><span>(</span><span>$ch</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>curl_close</span><span>(</span><span>$ch</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;do&nbsp;some&nbsp;parsing<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>preg_match</span><span>(</span><span>&quot;#(.*)\r\n\r(.*)#s&quot;</span><span>,&nbsp;</span><span>$result</span><span>,&nbsp;</span><span>$captures</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$headers&nbsp;</span><span>=&nbsp;</span><span>$captures</span><span>[</span><span>1</span><span>];<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$data&nbsp;</span><span>=&nbsp;</span><span>$captures</span><span>[</span><span>2</span><span>];<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>preg_match_all</span><span>(</span><span>&#39;#(.*)\r#m&#39;</span><span>,&nbsp;</span><span>$headers</span><span>,&nbsp;</span><span>$captures</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>$split_headers&nbsp;</span><span>=&nbsp;</span><span>$captures</span><span>[</span><span>1</span><span>];<br />
	&nbsp;&nbsp;&nbsp;&nbsp;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;we&nbsp;have&nbsp;to&nbsp;reissue&nbsp;the&nbsp;headers&nbsp;as&nbsp;is<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>foreach&nbsp;(</span><span>$split_headers&nbsp;</span><span>as&nbsp;</span><span>$s</span><span>)&nbsp;{<br />
	&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span>header</span><span>(</span><span>$s</span><span>);<br />
	&nbsp;&nbsp;&nbsp;&nbsp;}<br />
	&nbsp;&nbsp;&nbsp;&nbsp;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>//&nbsp;echo&nbsp;the&nbsp;body.<br />
	&nbsp;&nbsp;&nbsp;&nbsp;</span><span>echo&nbsp;</span><span>$data</span><span>;<br />
	&nbsp;&nbsp;&nbsp;&nbsp;<br />
	</span><span>?&gt;</span></p></blockquote>
<p>More implementation :</p>
<p><font color="#000000" face="verdana" size="2">1. Add this to all of your headers: <br />
	</font></p>
<table border="0" cellpadding="0" cellspacing="5">
<tbody>
<tr>
<td bgcolor="#333333">
<table border="0" cellpadding="5" cellspacing="1" width="100%">
<tbody>
<tr>
<td bgcolor="#f2f2ff"><font color="#222222" face="arial" size="2">&lt;base href=&quot;http://www.yoursite.com/&quot; /&gt; </font></td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
<p><font color="#000000" face="verdana" size="2">and if you see an attempted hijack&#8230; </font></p>
<p><font color="#000000" face="verdana" size="2">2. Block the site via .htaccess: <br />
	</font></p>
<table border="0" cellpadding="0" cellspacing="5">
<tbody>
<tr>
<td bgcolor="#333333">
<table border="0" cellpadding="5" cellspacing="1" width="100%">
<tbody>
<tr>
<td bgcolor="#f2f2ff"><font color="#222222" face="arial" size="2">RewriteCond %{HTTP_REFERER} yourproblemproxy\.com </font></td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
<p><font color="#000000" face="verdana" size="2">3. Block the IP address of the proxy <br />
	</font></p>
<table border="0" cellpadding="0" cellspacing="5">
<tbody>
<tr>
<td bgcolor="#333333">
<table border="0" cellpadding="5" cellspacing="1" width="100%">
<tbody>
<tr>
<td bgcolor="#f2f2ff"><font color="#222222" face="arial" size="2">order allow,deny <br />
								deny from 11.22.33.44 <br />
								allow from all </font></td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
<p><font color="#000000" face="verdana" size="2">4. Do your research and file a spam report with Google. </font></p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Farticles%2Fwebmaster-knowledge-protect-site-from-proxy-seo-technique%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/articles/webmaster-knowledge-protect-site-from-proxy-seo-technique/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>CMS &#8211; SEO Compare , Joomla WordPress Drupal</title>
		<link>http://big.anythingilike.net/articles/cms-seo-compare-joomla-wordpress-drupal/</link>
		<comments>http://big.anythingilike.net/articles/cms-seo-compare-joomla-wordpress-drupal/#comments</comments>
		<pubDate>Thu, 15 Apr 2010 20:14:06 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Articles]]></category>
		<category><![CDATA[drupal]]></category>
		<category><![CDATA[joomla]]></category>
		<category><![CDATA[wordpress]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=267</guid>
		<description><![CDATA[&#160; CMS &#8211; SEO Compare , Joomla WordPress Drupal SEOMOZ have some experiment : here Within two weeks the WordPress site was so far ahead that it looked as if they competition was over. Slowly however, Drupal and Joomla have recovered until they now lead the pack. The Joomla site now has 5 top ten [...]]]></description>
			<content:encoded><![CDATA[<p style="float:right; margin:0 0 10px 15px; width:240px;">
		<img src="http://photos1.meetupstatic.com/photos/event/3/d/f/e/highres_9195870.jpeg" width="240" />
		</p><p>&nbsp;</p>
<h3>CMS &#8211; SEO Compare , Joomla WordPress Drupal</h3>
<p><img alt="" height="396" src="http://photos1.meetupstatic.com/photos/event/3/d/f/e/highres_9195870.jpeg" width="400" /></p>
<p>SEOMOZ have some experiment : <a href="http://www.alledia.com/blog/seo-competition/march-update-to-search-engine-optimization-test/">here</a></p>
<blockquote>
<p><span class="Apple-style-span">Within two weeks the WordPress site was so far ahead that it looked as if they competition was over. Slowly however, Drupal and Joomla have recovered until they now lead the pack. The Joomla site now has 5 top ten rankings in every search engine that we&#39;re following and the Drupal site continues to climb, only in second place by a small amount.&nbsp;</span></p>
</blockquote>
<p>Some interesting points.</p>
<p>&nbsp;</p>
<ul style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 15px;margin-left: 40px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">
<li><span style="color:#006400"><strong>Is it a blog?</strong>&nbsp;If so, we&#39;ll expect it to be updated regularly</span></li>
<li><span style="color:#006400"><strong>Is it a corporate website?</strong>&nbsp;If so, we can expect it to remain relatively unchanged</span></li>
</ul>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">&nbsp;</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">Basically, Google is asking you to decide what kind of site you want to be and then to walk the walk. If run a blog that used to be updated every day, but now hasn&#39;t seen fresh content two months, why should Google rank you? Why should searchers have to wade through months-old blog posts?</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">&nbsp;</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px"><strong>What is the one of the easiest ways Google can determine what kind of site you&#39;re running? <span style="color:#006400">Your software.</span></strong><span style="color:#006400">&nbsp;If you install a WordPress site you are telling Google that you&#39;re starting a blog and that it will be updated regularly</span>. <span style="color:#006400">If you install Joomla or Drupal, Google has no such expectations. </span>Maybe that is why <em><strong><span style="color:#006400">WordPress started so strongly, but has since fallen behind</span></strong></em>. If you don&#39;t meet expectations, you will be penalised.&nbsp;</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">&nbsp;</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">Not just SEO, We should care some topic more,</p>
<p style="background-color: transparent;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-size: 12px;margin-top: 0px;margin-right: 0px;margin-bottom: 10px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px">&nbsp;</p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span"><strong>Usability</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">The conventional wisdom says that the learning curve for Joomla is much greater than for WordPress.&nbsp; My opinion is that the conventional wisdom is wise in this case.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">The two main things that made WordPress easy for me, someone who built his first website three months ago, are the html editor and the template editor.&nbsp; With WordPress, you can toggle back and forth between your WYSIWYG and html editors for pages and individual posts.&nbsp; Additionally, most templates break down the PHP coding into easy to use sections like &ldquo;header,&rdquo; &ldquo;footer,&rdquo; and &ldquo;main index.&rdquo;&nbsp; This made customizing our site&rsquo;s theme much easier.&nbsp;</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span"><em>Bottom Line: Your site will look great in about 5 minutes using WordPress, but if you are willing to trade some extra learning time for a more advanced site, go with Joomla.</em></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span"><strong>Versatility</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">WordPress makes some effort to help you build the site you want, but Joomla is the clear winner here.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">WordPress is built for bloggers, featuring comments, tagging, and virtually everything else your blog needs out of the box. Simple. Easy. Straightforward.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">Joomla is the better choice if your site is almost anything but a blog. The tough learning curve is partly a function of the advanced features that Joomla provides. You&rsquo;ll have to spend some more time with it, and Joomla is obsessed with goofy looking contact forms and FAQ pages, but your non-blogging site will thank you.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span">For example, it&rsquo;s very easy once you get the hang of it to extend Joomla with e-commerce applications, banner advertising, and great form builders.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span"><em>Bottom Line: If your site is only a blog, use WordPress. The more cool-looking static content you need, the more Joomla will help you.</em></span></p>
<p>&nbsp;</p>
<h3>SEO Tools for CMS</h3>
<h3>Drupal Modules</h3>
<ol style="margin-top: 0px;margin-right: 0px;margin-bottom: 0px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial">
<li><a href="http://drupal.org/project/blockcache">Block Cache</a>&nbsp;creates cached versions of each block displayed on your website. For each cached block only one SQL query is executed thus reducing server load resulting in better performance and faster delivery of content.</li>
<li><a href="http://drupal.org/project/find_path">Find URL Alias</a>&nbsp;is a utility module that lets you search for particular URL aliases. This module requires the core&nbsp;<em>path</em>&nbsp;module which enables search engine friendly URLs for your site. Find URL Alias is especially useful when there are many aliases stored in your database. But keep in mind that changing URLs is something you should avoid.</li>
<li><a href="http://drupal.org/node/49388">Google Analytics</a>&nbsp;integrates Google&#39;s powerful web statistics tracking system with your website. The module enables you to selectively track users by role.</li>
<li><a href="http://drupal.org/project/nodewords">Meta tags</a>&nbsp;allows you to define site wide meta tags and specific tags for each piece of content (node). If you use the core taxonomy module (you really should do) meta tags can be assigned automatically by using the terms (tags) you use to categorize your content.</li>
<li><a href="http://drupal.org/project/pathauto">Pathauto</a>&nbsp;automatically generates path aliases based on the modules settings. A very powerful module that lets you define different URL patterns based on content types. In the latest version also allows to filter common words. Requires the core&nbsp;<em>path</em>&nbsp;module.</li>
<li><a href="http://drupal.org/node/53579">RobotsTxt</a>&nbsp;is useful if you run multiple Drupal sites from a single code base and want to have different robots.txt files for each of them.</li>
<li><a href="http://drupal.org/project/service_links">Service links</a>&nbsp;automatically adds links to social bookmarking and blog search services to your content. You can select which services you want to link to, restrict the display based on content (node) types and whether to display links in teaser and/or full page view.</li>
<li><a href="http://drupal.org/project/urlify">URLify</a>&nbsp;automatically generates the path alias for a piece of content based on its title using JavaScript. Requires the core&nbsp;<em>path</em>&nbsp;module. I lightweight alternative to pathauto.</li>
<li><a href="http://drupal.org/project/gsitemap">XML Sitemap</a>&nbsp;generates an XML sitemap which complies with the sitemaps.org specification. The relative priority of each piece of content is calculated based on content type, number of comments, and promotion to front page. The values of each of these factors can be set in the admin section of the module.</li>
</ol>
<h3>Joomla Modules, Components, Plugins</h3>
<ol style="margin-top: 0px;margin-right: 0px;margin-bottom: 0px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial">
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,113/Itemid,35/">Advanced SEF Bot for Joomla 1.1.x</a>&nbsp;is a plugin that enables search engine friendly URLs. Since Joomla&#39;s standard URLs are not really meaningful this one is a must.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,1584/Itemid,35/">Dynamic gSitemap</a>&nbsp;is a PHP script that dynamically creates an XML sitemap of your site when GoogleBot visits it.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,1830/Itemid,35/">JoomSEO</a>&nbsp;is a plugin that dynamically creates meta tags, changes the title tag on the fly, adds heading tags to content titles and more. Some of the configurable features include: show, hide or override&nbsp;<a class="kLink" href="http://www.seo-expert-blog.com/blog/25-free-seo-add-ons-drupal-joomla-and-wordpress#" id="KonaLink3" target="undefined"><font color="#0072bc"><span class="kLink">keywords</span></font></a>, site name, and content title, adjust element order in the title and heading tag selection.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,825/Itemid,35/">LinX</a>&nbsp;is a link exchange component that lets you manage a reciprocal link directory. Links that are submitted to your site are automatically checked and only added if there is a link back to your site. A large number of links to your website of course has an effect on the PageRank but what really matters are links from quality and trusted websites that have a similar target audience as your site.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,1134/Itemid,35/">MetaTags NX</a>&nbsp;creates meta description and keyword tags for your content on the fly. Keywords are generated based on their frequency in the content and stopwords can be excluded.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,904/Itemid,35/">Redirect component</a>&nbsp;lets you redirect old urls to new ones and set their status codes. Remember, changing URLs often is a bad idea.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,1646/Itemid,35/">Simple SEO Plugin</a>&nbsp;uses Yahoo to analyze your content and gives immediate feedback on what Yahoo thinks the content is about. Also creates meta tags.</li>
<li><a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,1431/Itemid,35/">Website Validators Tool</a>&nbsp;contains links to validation and site information services that help you make your website more standards compliant, see how you rank who links to you and more.</li>
</ol>
<h3>&nbsp;</h3>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><br />
	</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><strong>Joomla SEO</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">At the suggestion of&nbsp;<a href="http://seoroi.com/">Bookworm SEO</a>, I decided to write a post about SEO for Joomla sites. [Note: I&#39;m not here to suggest that Joomla is better than WordPress, Drupal, or any other CMS. My goal is simply to discuss some things that can be done with a Joomla site to make it better.] Also, I&#39;m still far from an expert in the SEO field, so take it easy on me. <img src='http://big.anythingilike.net/wp-includes/images/smilies/icon_smile.gif' alt=':-)' class='wp-smiley' /> </span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">The big issue with Joomla is that its out-of-the-box settings aren&#39;t exactly search-engine friendly. It defaults to creating horrible URLs, its default page title system has issues, and it creates duplicate content in several ways. (Great CMS, no?)</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">&nbsp;</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><strong>Turning Down the Crap-Factor on Joomla URLs</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">The default URLs can be improved slightly by turning on &quot;Search Engine Friendly URLs&quot; in the site&#39;s Global Configuration menu. They&#39;ll go from looking like this:</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">http://www.yoursite.com/component/option,com_mtree/task,viewlink/link_id,2380/Itemid,35/</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">to this:</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">http://www.yoursite.com/content/view/30/55</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">Better, but still not great, as they&#39;re completely lacking in keywords.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">The best solution is to simply download and install a plug-in. There are several that get the job done, but the one I use is&nbsp;<a href="http://extensions.joomla.org/component/option,com_mtree/task,viewlink/link_id,2380/Itemid,35/">sh404SEF</a>.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">An example of a URL from my site with the above plug-in installed:</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">http://www.businesstaxbooks.com/index.php/Sole-Proprietor-Tax-Guide/Self-Employment-Tax-Explained.html</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">Looks better to me. [Note: Here&#39;s a chance for somebody who&nbsp;<em>actually</em>&nbsp;knows their stuff to show me up and explain how to get the&quot;/index.php&quot; out of there.]</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">&nbsp;</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><strong>Solving Duplicate Content Issues with Joomla</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">The above URL plug-in will fix a great deal of the problems that Joomla creates by pointing so many URLs at the same piece of content. Another important step to take [at the risk of stating the overly-obvious] is to be sure to turn off the &quot;email icon,&quot; &quot;print icon,&quot; and &quot;pdf icon&quot; in the Global Configuration menu.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">&nbsp;</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><strong>Improving Joomla Page Titles</strong></span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">By default, Joomla page titles follow this structure: &quot;SiteName &#8211; PageName&quot;</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">There&#39;s been some&nbsp;<a href="http://www.seomoz.org/blog/rewriting-the-beginners-guide-part-iv-continued-titles-meta-data-url-structures#jtc44322">discussion</a>&nbsp;recently about whether or not to include the site name in the page title. Regardless, I think we can at least agree that it&#39;s a good thing to have control over whether or not to include the site title. (And further, to have control over whether it comes before or after the page&#39;s title.)</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">For instance, my site&#39;s name (Business Tax Books) is rather long. As such, my longer titles were getting cut off due to character limits. For example, my&nbsp;<a href="http://www.businesstaxbooks.com/index.php/LLC-/-S-Corp-/-C-Corp/LLC-S-Corporation-or-C-Corporation-The-3-Minute-Version.html">C-Corp vs S-Corp vs LLC page</a>&nbsp;only had approximately half of its title showing in the SERPS. (And it was the relevant keywords getting cut off, because they came after the site name.)</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">A quick change in the joomla.php file (located at /includes/joomla.php) will fix this.</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">Change this:</span></p>
<p><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">$this-&gt;_head[&#39;title&#39;] = $title ? $GLOBALS[&#39;mosConfig_sitename&#39;] . &#39; &#8211; &#39;. $title : $GLOBALS[&#39;mosConfig_sitename&#39;];</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">To this:</span></p>
<p><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">$this-&gt;_head[&#39;title&#39;] = $title &amp;&amp; $title != &quot;Home&quot; ? $title : $GLOBALS[&#39;mosConfig_sitename&#39;];</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px">I take no credit for the above fix. I found it on the Joomla forums. <img src='http://big.anythingilike.net/wp-includes/images/smilies/icon_smile.gif' alt=':-)' class='wp-smiley' />  After I made the change, I noticed a substantial increase in search traffic to the page I mentioned above, as well as a few others with long titles. (And that&#39;s without even doing anything to rank better!)</span></p>
<p style="margin-top: 0px;margin-right: 0px;margin-bottom: 1.5em;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial;font-weight: inherit;font-style: inherit;font-size: 13px;font-family: inherit;vertical-align: baseline"><span class="Apple-style-span" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size: 13px;font-weight: normal;line-height: 20px"><span class="Apple-style-span" style="font-family: Arial, Verdana, sans-serif;line-height: normal;font-size: 15px;font-weight: bold">WordPress Plugins</span></span></p>
<ol style="margin-top: 0px;margin-right: 0px;margin-bottom: 0px;margin-left: 0px;padding-top: 0px;padding-right: 0px;padding-bottom: 0px;padding-left: 0px;border-top-width: 0px;border-right-width: 0px;border-bottom-width: 0px;border-left-width: 0px;border-style: initial;border-color: initial">
<li><a href="http://wordpress.org/extend/plugins/add-to-any/">Add to Any</a>&nbsp;adds links to a large number of social bookmarking sites to your posts.</li>
<li><a href="http://wordpress.org/extend/plugins/generalstats/">GeneralStats</a>&nbsp;is a statistics components that counts the number of users, categories, posts, comments, pages, words in posts, words in comments and words in pages. Useful for doing keyword research.</li>
<li><a href="http://wordpress.org/extend/plugins/google-sitemap-generator/">Google Sitemap Generator</a>&nbsp;creates an XML sitemap of your website. In the current version homepage, posts, static pages, categories and archives are supported. Priority is automatically assigned based on the number of comments.</li>
<li><a href="http://wordpress.org/extend/plugins/gregarious/">Gregarious</a>&nbsp;is a social bookmarking plugin for Digg, Reddit and Feedburner with update checks via AJAX.</li>
<li><a href="http://wordpress.org/extend/plugins/popularity-contest/">Popularity Contest</a>&nbsp;is a counter for posts, categories, archive views, comments, trackbacks, etc. to determine the most popular pages of your site.</li>
<li><a href="http://wordpress.org/extend/plugins/technorati-tagging/">Technorati Tagging Plugin</a>&nbsp;adds Techorati tags to your posts and enables you to display a tag cloud.</li>
<li><a href="http://wordpress.org/extend/plugins/wp-cache/">WP-Cache</a>&nbsp;is a page caching system to improve your websites performance. Cached pages are stored as static files, reducing server load thus making your site faster and more responsive.</li>
<li><a href="http://wordpress.org/extend/plugins/x-valid/">X-Valid</a>&nbsp;attempts to convert posts and comments to valid XHTML. Read more on the benefits of&nbsp;<a href="http://www.seo-expert-blog.com/blog/web-standards-compliance">Web Standards Compliance</a>.</li>
<li>&nbsp;</li>
</ol>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Farticles%2Fcms-seo-compare-joomla-wordpress-drupal%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/articles/cms-seo-compare-joomla-wordpress-drupal/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Business Card Throwing</title>
		<link>http://big.anythingilike.net/life/clip/business-card-throwing/</link>
		<comments>http://big.anythingilike.net/life/clip/business-card-throwing/#comments</comments>
		<pubDate>Tue, 13 Apr 2010 16:39:16 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[Clip]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=265</guid>
		<description><![CDATA[httpv://www.youtube.com/watch?v=FVq0HdiM-Ok]]></description>
			<content:encoded><![CDATA[<p>httpv://www.youtube.com/watch?v=FVq0HdiM-Ok</p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Flife%2Fclip%2Fbusiness-card-throwing%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/life/clip/business-card-throwing/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Adobe photoshop cs5 is now available</title>
		<link>http://big.anythingilike.net/news/adobe-photoshop-cs5-is-now-available/</link>
		<comments>http://big.anythingilike.net/news/adobe-photoshop-cs5-is-now-available/#comments</comments>
		<pubDate>Mon, 12 Apr 2010 10:32:44 +0000</pubDate>
		<dc:creator>admin</dc:creator>
				<category><![CDATA[News]]></category>
		<category><![CDATA[adobe]]></category>
		<category><![CDATA[cs5]]></category>
		<category><![CDATA[photoshop]]></category>

		<guid isPermaLink="false">http://big.anythingilike.net/?p=260</guid>
		<description><![CDATA[httpv://www.youtube.com/watch?v=dgKjs8ZjQNg New Features Truer Edge technology in Photoshop CS5 Extended offers improved edge detecting technology and masking results in less time. Photoshop CS5 Extended also lets users remove an image element and immediately replace the missing pixels with Content-Aware Fill. InDesign CS5 powers the transition to digital publishing with new interactive documents and enhanced eReader [...]]]></description>
			<content:encoded><![CDATA[<p>httpv://www.youtube.com/watch?v=dgKjs8ZjQNg</p>
<p><b>New Features <br />
	</b></p>
<ul>
<li class="bwlistitemmarginbottom">Truer Edge technology in Photoshop CS5 Extended offers improved edge detecting technology and masking results in less time. Photoshop CS5 Extended also lets users remove an image element and immediately replace the missing pixels with Content-Aware Fill.</li>
<li class="bwlistitemmarginbottom">InDesign CS5 powers the transition to digital publishing with new interactive documents and enhanced eReader device support.</li>
<li class="bwlistitemmarginbottom">Native 64-bit support in Photoshop, Adobe Premiere Pro and After Effects enables customers to work more fluidly on high resolution projects.</li>
<li class="bwlistitemmarginbottom">New Text Layout Framework in Flash Professional CS5 provides professional-level typography capabilities with functions like kerning, ligatures, tracking, leading, threaded text block and multiple columns.</li>
<li class="bwlistitemmarginbottom">New stroke options in Illustrator CS5 allow users to create strokes of variable widths and precisely adjust the width at any point along the stroke.</li>
<li class="bwlistitemmarginbottom">The NVIDIA&reg; GPU accelerated Adobe Mercury Playback Engine allows Adobe Premiere Pro CS5 users to open projects faster, refine effects-rich HD sequences in real time and play back complex projects without rendering.</li>
<li class="bwlistitemmarginbottom">The new Roto Brush tool in After Effects helps users save time by isolating moving foreground elements in a fraction of the normal time.</li>
<li class="bwlistitemmarginbottom">Dreamweaver CS5 now supports popular content management systems Drupal, Joomla! and WordPress, allowing designers to get accurate views of dynamic Web content from within Dreamweaver.</li>
</ul>
<p><b>Pricing and Availability</b></p>
<p>Adobe Creative Suite 5 and its associated point products are scheduled to ship within 30 days, with availability through Adobe Authorized Resellers, the Adobe Store and Adobe Direct Sales.</p>
<p>Estimated street price for:</p>
<ul>
<li>Adobe Creative Suite 5 Master Collection is expected to be US$2599</li>
<li>Adobe Creative Suite 5 Design Premium is expected to be US$1899</li>
<li>Adobe Creative Suite 5 Design Standard is expected to be US$1299</li>
<li>Adobe Creative Suite 5 Web Premium is expected to be US$1799</li>
<li>Adobe Creative Suite 5 Production Premium is expected to be US$1699</li>
</ul>
<p>httpv://www.youtube.com/watch?v=v69S22ZBBqA</p>
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fbig.anythingilike.net%2Fnews%2Fadobe-photoshop-cs5-is-now-available%2F&amp;layout=standard&amp;show_faces=true&amp;width=450&amp;action=like&amp;colorscheme=light" scrolling="no" frameborder="0" allowTransparency="true" style="border:none; overflow:hidden; width:450px;margin-top:5px;"></iframe>]]></content:encoded>
			<wfw:commentRss>http://big.anythingilike.net/news/adobe-photoshop-cs5-is-now-available/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>

<!-- Performance optimized by W3 Total Cache. Learn more: http://www.w3-edge.com/wordpress-plugins/

Served from: big.anythingilike.net @ 2012-02-23 05:48:07 -->
