链家房屋爬虫

使用xlwt库将爬取内容写入excel

#coding=UTF-8
import urllib2
from bs4 import BeautifulSoup
import sys
import xlwt
page=1
num=0
book=xlwt.Workbook(encoding="utf-8",style_compression=0)
sheet=book.add_sheet('test',cell_overwrite_ok=True)
while page<100:
pages = str(page)
url = "http://ty.fangjia.com/zufang/--e-" + pages + "#pagelist"
reload(sys)
sys.setdefaultencoding('utf-8')
html = urllib2.urlopen(url)
soup = BeautifulSoup(html, "html.parser")
house = soup.find("div", class_="house")
for home in house.find_all("li", {"name": "__page_click_area"}):
for title in home.find_all("span", class_="tit"):
for text in title.stripped_strings:
sheet.write(num+1, 0, text)
for address in home.find_all("span", class_="address"):
i=1
for text in address.stripped_strings:
sheet.write(num + 1, i, text)
i=i+1
for attribute in home.find_all("span", class_="attribute"):
for text in attribute.stripped_strings:
sheet.write(num + 1, 3, text)
for price in home.find_all("span", class_="xq_aprice xq_esf_width"):
for text in price.em.stripped_strings:
sheet.write(num + 1, 4, text+ "万")
num=num+1
print str(page) + "页插入完成"
book.save('/Users/dubo/Desktop/typrice2.xls')
page = page + 1
print "总计"+str(num)+"套房屋"

阅读全文

豆瓣读书Top250

#coding=UTF-8
import urllib2
from bs4 import BeautifulSoup
# https://book.douban.com/top250?start=
time=0
sum=25
while time<=225:
times=str(time)
url="https://book.douban.com/top250?start="+times
req = urllib2.urlopen(url)
content = req.read()
soup=BeautifulSoup(content,"html.parser")
print "----------page=" + str(sum/25) + "-----------"
for link in soup.find_all('div',{"class":"pl2"}):
for text in link.find_all("a"):
for none in text.stripped_strings:
print none
sum=sum+1
time=time+25
print sum-25

阅读全文

Swift GET&POST请求 网络缓存的简单处理

参考该CSDN
CNBLOG

阅读全文

Swift同步请求获取网络数据demo

let APIURL="http://www.mrdubo.com/api/api.php"
let url=URL(string: APIURL)
//创建请求对象
let urlRequest:NSURLRequest = NSURLRequest(url: url!)
let ur:NSURLRequest=NSURLRequest(url: url!, cachePolicy: NSURLRequest.CachePolicy.reloadIgnoringLocalCacheData , timeoutInterval: 5)
//响应对象
var response:URLResponse?
do{
//发送请求
let jsonData:NSData? = try NSURLConnection.sendSynchronousRequest(urlRequest as URLRequest,returning: &response) as NSData?
if jsonData==nil {
print("error")
exit(-1)
}
}catch let error as NSError{
//打印错误消息
var alterView = UIAlertController()
alterView.title="Network"
alterView.message="Network not allowed"
var cancelAction = UIAlertAction(title: "取消", style: UIAlertActionStyle.cancel, handler: nil)
alterView.addAction(cancelAction)
self.present(alterView,animated: true, completion: nil)
}
}

阅读全文

NSURLRequest各种缓存方式讲解

1.NSURLRequestUseProtocolCachePolicy NSURLRequest

默认的cache policy,使用Protocol协议定义。
2.NSURLRequestReloadIgnoringCacheData

忽略缓存直接从原始地址下载。
3.NSURLRequestReturnCacheDataDontLoad

阅读全文

Swift中三元运算符的低级错误

var str = a>b ?"123" : "1234"

阅读全文

PD11下安装ubuntu虚拟机闪屏

一开始在PD11下安装ubuntu虚拟机无法使用

通过使用PD自带的下载ubuntu下载后可以正常使用

阅读全文

Ubuntu下配置jdk

安装jdk:

sudo chmod 777 /opt
cp jdk-8u101-linux-x64.tar.gz /opt
cd /opt
tar -xvf jdk-8u101-linux-x64.tar.gz
ls
/opt/jdk1.8.0_101/bin/java -version

阅读全文

一个免费的天气api

该天气的api为免费

和风天气
可用方便用于调试

阅读全文

Xcode 7 关于限制http的访问限制修改

在info.plist文件中添加

App Transport Security Settings

Allow Arbitrary Loads boolean类型 YES

阅读全文