1 Redis List 类型
1.1 案例1
package mylist;
import redis.clients.jedis.BinaryClient;
import redis.clients.jedis.Jedis;
import java.util.List;
public class ListMain {
public static void main(String[] args) {
Jedis jedis = new Jedis("127.0.0.1", 6379);
jedis.del("光明顶");
jedis.lpush("光明顶", "乔峰", "段誉", "虚竹", "鸠摩智");
for (String name : jedis.lrange("光明顶", 0, -1)) {
System.out.print(name + " ");
}
System.out.println("===============================");
//新来一个人,王语嫣,插队到第一位
jedis.rpush("光明顶", "王语嫣");
List<String> list = jedis.lrange("光明顶", 0, -1);
for (String name : list) {
System.out.print(name + " ");
}
System.out.println("===============================");
//剧情:鸠摩智很不高兴,正好慕容复来了,说:慕容兄,你插我前面
jedis.linsert("光明顶", BinaryClient.LIST_POSITION.AFTER, "鸠摩智", "慕容复");
List<String> list1 = jedis.lrange("光明顶", 0, -1);
for (String name : list1) {
System.out.print(name + " ");
}
System.out.println("===============================");
//剧情:看到慕容复插队大家很生气,正好阿紫和游坦之。让阿紫和游坦之依次插到虚竹的后面
jedis.linsert("光明顶", BinaryClient.LIST_POSITION.BEFORE, "虚竹", "阿紫");
jedis.linsert("光明顶", BinaryClient.LIST_POSITION.BEFORE, "阿紫", "游坦之");
List<String> list2 =jedis.lrange("光明顶",0,-1);
for(String name:list2){
System.out.print(name+" ");
}
System.out.println("===============================");
//剧情:插队不文明,为了遏制这种不文明的现象,大决决定打一架。 鸠摩智被打跑了。
jedis.lpop("光明顶");
for(String name:jedis.lrange("光明顶",0,-1)){
System.out.print(name+" ");
}
System.out.println("===============================");
//剧情:慕容复一看情况不好,以表哥的身份忽悠王语嫣,把王语嫣打伤。
jedis.rpop("光明顶");
for(String name:jedis.lrange("光明顶",0,-1)){
System.out.print(name+" ");
}
System.out.println("===============================");
//剧情:在大家打架的时候,无名氏偷偷插队
jedis.rpush("光明顶", "无名氏");
for (String name : jedis.lrange("柜台1", 0, -1)) {
System.out.print(name + " ");
}
System.out.println("===============================");
//慕容复被打跑了
jedis.lpop("光明顶");
for (String name : jedis.lrange("光明顶", 0, -1)) {
System.out.print(name + " ");
}
System.out.println("===============================");
//剧情:星宿老怪 突然来了,把 阿紫和游坦之同时弄走了。
String result = jedis.ltrim("光明顶", 2, 5);
if ("OK".equals(result)) {
for (String name : jedis.lrange("光明顶", 0, -1)) {
System.out.print(name + " ");
}
}
System.out.println("===============================");
//剧情:这时候,乔峰三人发现了无名氏,与之大战三百回合,无名氏全身而退
String res = jedis.ltrim("光明顶", 0, 2);
if ("OK".equals(res)) {
for (String name : jedis.lrange("光明顶", 0, -1)) {
System.out.print(name + " ");
}
}
}
}
D:\programs\Java\jdk1.8.0_181\bin\java.exe "-javaagent:D:\programs\JetBrains\IntelliJ IDEA 2018.2.4\lib\idea_rt.jar=50417:D:\programs\JetBrains\IntelliJ IDEA 2018.2.4\bin" -Dfile.encoding=UTF-8 -classpath D:\programs\Java\jdk1.8.0_181\jre\lib\charsets.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\deploy.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\access-bridge-64.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\cldrdata.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\dnsns.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\jaccess.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\jfxrt.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\localedata.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\nashorn.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunec.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunjce_provider.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunmscapi.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunpkcs11.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\zipfs.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\javaws.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jce.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jfr.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jfxswt.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jsse.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\management-agent.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\plugin.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\resources.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\rt.jar;D:\Data\JavaProject\myredis\target\classes;C:\Users\tzb\.m2\repository\redis\clients\jedis\2.8.2\jedis-2.8.2.jar;C:\Users\tzb\.m2\repository\org\apache\commons\commons-pool2\2.4.2\commons-pool2-2.4.2.jar;C:\Users\tzb\.m2\repository\junit\junit\4.12\junit-4.12.jar;C:\Users\tzb\.m2\repository\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\tzb\.m2\repository\com\google\code\gson\gson\2.3.1\gson-2.3.1.jar mylist.ListMain
鸠摩智 虚竹 段誉 乔峰 ===============================
鸠摩智 虚竹 段誉 乔峰 王语嫣 ===============================
鸠摩智 慕容复 虚竹 段誉 乔峰 王语嫣 ===============================
鸠摩智 慕容复 游坦之 阿紫 虚竹 段誉 乔峰 王语嫣 ===============================
慕容复 游坦之 阿紫 虚竹 段誉 乔峰 王语嫣 ===============================
慕容复 游坦之 阿紫 虚竹 段誉 乔峰 ===============================
===============================
游坦之 阿紫 虚竹 段誉 乔峰 无名氏 ===============================
虚竹 段誉 乔峰 无名氏 ===============================
虚竹 段誉 乔峰
Process finished with exit code 0
1.2 案例2
1.2.1 爬虫测试
package mylist;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
/*
* 访问单个新闻业
* */
public class SingleArticle {
public static void main(String[] args) throws IOException {
String url = "http://www.huxiu.com/article/102062/1.html";
Document document = Jsoup.connect(url).get();
//解析页面
Elements titleElements = document.getElementsByTag("title");
String title=titleElements.get(0).text();
Elements elements=document.select("div #article_content");
String content = elements.text();
System.out.println("title: "+title);
System.out.println("content: "+content);
}
}
1.2.2 爬取的内容放到 redis
package mylist;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import redis.clients.jedis.Jedis;
import java.util.Date;
public class Crawler {
//定义需要爬取的url list
private static final String redisUrlsWillKey = "crawler:urls:will";
//如果需要去重的话,可以使用set保存已经爬起过的url
public static void main(String[] args) throws Exception {
//准备Url
String startUrl = "https://www.huxiu.com/channel/22.html";
String domain = "http://www.huxiu.com/";
//获取文章Url
getUrls(startUrl, domain);
//处理url,下载文章的内容并打印
parserUrl();
}
private static void parserUrl() throws Exception {
Jedis jedis = new Jedis("127.0.0.1", 6379);
//从右边弹出一个url
while (true) {
String url = jedis.rpop(redisUrlsWillKey);
try {
Article article = parser(url);
System.out.println(article);
} catch (Exception e) {
// jedis.lpush(redisUrlsWillKey, url);
}
}
}
private static Article parser(String url) throws Exception {
Document articleDocument = Jsoup.connect(url).get();
Article article = new Article();
// 封装作者的信息
Elements author = articleDocument
.getElementsByClass("author-name");
article.setAuthor(StringUtil.isBlank(author.text()) ? "jingzhongyue"
: author.text());
// 抽取文章日期
Elements date = articleDocument
.getElementsByClass("article-time");
article.setDate(StringUtil.isBlank(date.text()) ? new Date()
: DateUtil.getDate(date.text()));
// 抽取文章标题
Elements title = articleDocument.getElementsByTag("title");
article.setTitle(title.text());
// 抽取文章编号
// http://www.huxiu.com/article/124698/1.html
String id = url.substring(29);
int index = id.indexOf("/");
id = id.substring(0, index);
article.setId(id);
// 抽取文章正文
StringBuffer stringBuffer = new StringBuffer();
Elements contents = articleDocument
.getElementsByAttribute("id");
for (Element element : contents) {
String idTag = element.attr("id");
if ("article_content".equals(idTag)) {
Elements childs = element.children();
Elements pElements = childs.tagName("p");
for (Element element2 : pElements) {
stringBuffer.append(element2.text());
}
}
}
return article;
}
private static void getUrls(String startUrl, String domain) throws Exception {
Jedis jedis = new Jedis("127.0.0.1", 6379);
Document document = Jsoup.connect(startUrl).get();
Elements elements = document.getElementsByAttribute("href");
for (Element element : elements) {
String endUrl = element.attr("href");
if (endUrl.contains("article")) {
String url = domain + endUrl;
System.out.println(url);
jedis.lpush(redisUrlsWillKey, url);
}
}
}
}
package mylist;
import java.util.Date;
import org.jsoup.helper.StringUtil;
public class Article {
private String id;
private String title;
private String content;
private String author;
private Date date;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
@Override
public String toString() {
return "Article [id=" + id + ", title=" + title + ", content="
+ content + ", author=" + author + ", date=" + date + "]";
}
public boolean isNotNull() {
if (StringUtil.isBlank(content)) {
return false;
}
return true;
}
}
package mylist;
import java.text.SimpleDateFormat;
import java.util.Date;
public class DateUtil {
private static SimpleDateFormat simpleDateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public static Date getDate(String dString) throws Exception{
return simpleDateFormat.parse(dString);
}
public static String getString(Date date) {
return simpleDateFormat.format(date);
}
}
D:\programs\Java\jdk1.8.0_181\bin\java.exe "-javaagent:D:\programs\JetBrains\IntelliJ IDEA 2018.2.4\lib\idea_rt.jar=60901:D:\programs\JetBrains\IntelliJ IDEA 2018.2.4\bin" -Dfile.encoding=UTF-8 -classpath D:\programs\Java\jdk1.8.0_181\jre\lib\charsets.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\deploy.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\access-bridge-64.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\cldrdata.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\dnsns.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\jaccess.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\jfxrt.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\localedata.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\nashorn.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunec.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunjce_provider.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunmscapi.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\sunpkcs11.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\ext\zipfs.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\javaws.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jce.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jfr.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jfxswt.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\jsse.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\management-agent.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\plugin.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\resources.jar;D:\programs\Java\jdk1.8.0_181\jre\lib\rt.jar;D:\Data\JavaProject\myredis\target\classes;C:\Users\tzb\.m2\repository\redis\clients\jedis\2.8.2\jedis-2.8.2.jar;C:\Users\tzb\.m2\repository\org\apache\commons\commons-pool2\2.4.2\commons-pool2-2.4.2.jar;C:\Users\tzb\.m2\repository\junit\junit\4.12\junit-4.12.jar;C:\Users\tzb\.m2\repository\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\tzb\.m2\repository\com\google\code\gson\gson\2.3.1\gson-2.3.1.jar;C:\Users\tzb\.m2\repository\org\jsoup\jsoup\1.8.3\jsoup-1.8.3.jar mylist.Crawler
http://www.huxiu.com//article/265397.html
http://www.huxiu.com//article/265362.html
http://www.huxiu.com//article/265365.html
http://www.huxiu.com//article/265356.html
http://www.huxiu.com//article/265328.html
http://www.huxiu.com//article/265321.html
http://www.huxiu.com//article/265318.html
http://www.huxiu.com//article/265160.html
http://www.huxiu.com//article/265278.html
http://www.huxiu.com//article/265228.html
http://www.huxiu.com//article/265224.html
http://www.huxiu.com//article/265176.html
http://www.huxiu.com//article/265198.html
http://www.huxiu.com//article/265074.html
http://www.huxiu.com//article/264936.html
http://www.huxiu.com//article/264972.html
http://www.huxiu.com//article/264935.html
http://www.huxiu.com//article/264951.html
http://www.huxiu.com//article/264945.html
http://www.huxiu.com//article/264917.html
Article [id=, title=月活堪比QQ,版权碾压同行……年度最强文娱公司要上市了-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:41 CST 2018]
Article [id=, title=一下科技的十字路口-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:41 CST 2018]
Article [id=, title=《李茶的姑妈》遇冷,高速狂奔的开心麻花该“换挡”了?-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:42 CST 2018]
Article [id=, title=我们与那些没能出现在《影》中的人聊了聊-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:42 CST 2018]
Article [id=, title=抓娃娃,心理“陷阱”下的消费魔爪-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:43 CST 2018]
Article [id=, title=搏击 VS 拳击:嘴炮喧嚣背后的商业化“迷途”-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:43 CST 2018]
Article [id=, title=张艺谋的杂念与执念-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:44 CST 2018]
Article [id=, title=横店静悄悄-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:44 CST 2018]
Article [id=, title=清白做人,认真演戏,这才配叫“角儿”-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:45 CST 2018]
Article [id=, title=苹果:“More Than iPhones”之“影视内容”观察-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:45 CST 2018]
Article [id=, title=中国盗版DVD二十年:一段隐秘的地下史-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:46 CST 2018]
Article [id=, title=媒体人国庆节片单出炉:这7部电影映照传媒世界的魔幻现实-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:46 CST 2018]
Article [id=, title=抖音 VS 快手:从产品理念来看短视频的功能设计-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:47 CST 2018]
Article [id=, title=“桃色绯闻”未平又陷商业纠纷 《军师联盟》给吴秀波带来太多“惊喜”与“惊吓”-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:47 CST 2018]
Article [id=, title=歌手臧天朔的人生往事:摇滚老炮、江湖大哥、身陷囹圄-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:48 CST 2018]
Article [id=, title=怎样理解B站的社区生态与内容策略-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:48 CST 2018]
Article [id=, title=中国不配有好游戏?-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:49 CST 2018]
Article [id=, title=B站离不开游戏-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:50 CST 2018]
Article [id=, title=湖南卫视,“妖孽”二十年-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:50 CST 2018]
Article [id=, title=大数据分析全球电影市场类型,原来中国观众最爱的不是爱情喜剧-虎嗅网, content=null, author=jingzhongyue, date=Wed Oct 03 10:24:51 CST 2018]