极客时间-轻松学习，高效学习-极客邦

third

2019-03-20

from wordcloud import WordCloud
import matplotlib.pyplot as plt
import requests

headers = {
    'Referer': 'http://music.163.com',
    'Host': 'music.163.com',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'User-Agent': 'Chrome/10'
}

def getsong(headers):
    url = 'http://music.163.com/api/playlist/detail?id=753776811'
    res = requests.request('GET', url, headers=headers)
    page = res.json()
    tracks=page['result']['tracks']
    list = []
    for i in range(len(tracks)):
        Name = tracks[i]["name"]
        list.append(Name)
    return list

def create_WordCloud(list):
    print("根源词频计算词云")

    wc = WordCloud(
        font_path="simhei.ttf",
        max_words=100, # 设置最大字数
        width=2000, # 设置画布宽度
        height=1200, # 设置画布高度
        random_state=100
    )
    wordcloud = wc.generate(list)
    # 写词云图片
    wordcloud.to_file("作业.jpg")
    # 显示词云文件
    plt.imshow(wordcloud)
    # 不需要显示X/Y轴，用off将坐标轴关闭
    plt.axis("off")
    plt.show()

# 去掉停用词以及中英文混合的词
def remove_stop_words(f):
    stop_words = ['(伴奏)', '(Demo版)', '(必胜客新春版)']
    mixed_words = ['Bonus Track：一荤一素', 'Bonus Track：给你给我']
    for stop_word in stop_words:
        f = f.replace(stop_word, '')
    for mixed_word in mixed_words:
        f = f.replace(mixed_word, mixed_word[12:])
    return f

gerlists = getsong(headers)
# 获取歌单是list类型直接转化为转换为str
lists_str = " ".join(gerlists)

lists = remove_stop_words(lists_str)
create_WordCloud(lists)

展开

作者回复: Good Job



 1
王彬成

2019-03-13

（2）将歌单的歌曲对应的歌词作词云展示
import requests
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt

#得到指定歌单页面的全部歌曲的歌曲ID，歌曲名
def get_songs(playlist_id):
    page_url='http://music.163.com/api/playlist/detail?id='+playlist_id
    #获取网页HTML
    res=requests.request('GET',page_url,headers=headers)
    # 输出歌单中歌曲数量
    print(len(res.json()['result']['tracks']))

    # 设置热门歌曲的ID，歌曲名称
    song_ids=[]
    song_names=[]

    for i in range(len(res.json()['result']['tracks'])):
        names=res.json()['result']['tracks'][i]['name']
        ids=res.json()['result']['tracks'][i]['id']
        song_names.append(names)
        song_ids.append(ids)
        print(names,' ',ids)
    return song_names,song_ids

# 得到某一首歌的歌词
def get_song_lyric(headers,lyric_url):
    res = requests.request('GET', lyric_url, headers=headers)
    if 'lrc' in res.json():
       lyric = res.json()['lrc']['lyric']
       new_lyric = re.sub(r'[\d:.[\]]','',lyric)
       return new_lyric
    else:
       return ''
       print(res.json())

#生成词云
def create_word_cloud(f):
    print('根据词频生成词云')
    f=remove_stop_words(f)
    cut_text=' '.join(jieba.cut(f,cut_all=False,HMM=True))
    wc = WordCloud(
       font_path="./wc.ttf",
       max_words=100,
       width=2000,
       height=1200,
    )
    print(cut_text)
    wordcloud = wc.generate(cut_text)
    # 写词云图片
    wordcloud.to_file("wordcloud.jpg")
    # 显示词云文件
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()


# 设置歌单ID，【毛不易 | 不善言辞的深情】为753776811
playlist_id='753776811'
[song_names,song_ids]=get_songs(playlist_id)

#所有歌词
all_word=''
# 获取每首歌歌词
for (song_id, song_name) in zip(song_ids, song_names):
    # 歌词 API URL
    lyric_url = 'http://music.163.com/api/song/lyric?os=pc&id=' + str(song_id) + '&lv=-1&kv=-1&tv=-1'
    lyric = get_song_lyric(headers, lyric_url)
    all_word = all_word + ' ' + lyric
    print(song_name)

#根据词频，生成词云
create_word_cloud(all_word)

展开

作者回复: Good Job



 1
跳跳

2019-03-13

#需要注意的有两点
#1.歌单返回的是json文件，get_songs需要参考get_songs_lyri获取
#2.list_url中间是str类型，注意类型转换
#emmm，代码太长放不下了，删除了一部分和老师一样的函数
# -*- coding:utf-8 -*-
# 网易云音乐通过歌单ID，生成该歌单的词云
import requests
import sys
import re
import os
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import jieba
from PIL import Image
import numpy as np
from lxml import etree
def get_songs(songlist):
    list_url='https://music.163.com/api/playlist/detail?id='+songlist
    res = requests.request('GET', list_url, headers=headers)
    if 'result' in res.json():
        for item in res.json()['result']['tracks']: # 读取json数据中的每一个ID和name
            song_ids.append(item['id'])
            song_names.append(item['name'])
            print(item['id']," ",item['name'])
            time.sleep(1) # 设置停留时间，防止“bad handshake”
        return song_ids,song_names
    else:
        return ''
        print(res.json())
# 设置歌单
songlist = '753776811'
[song_ids, song_names] = get_songs(songlist)
# 所有歌词
all_word = ''
# 获取每首歌歌词
for (song_id, song_name) in zip(song_ids, song_names):
    # 歌词 API URL
    list_url = 'http://music.163.com/api/song/lyric?os=pc&id=' + str(song_id) + '&lv=-1&kv=-1&tv=-1'
    lyric = get_song_lyric(headers, list_url)
    all_word = all_word + ' ' + lyric
    print(song_name)
# 根据词频生成词云
create_word_cloud(all_word)

展开



 1
一语中的

2019-03-11

以http://music.163.com/api/playlist/detail?id=753776811中歌单为例做词云展示
#-*- coding:utf-8 -*-
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import requests

def getSonglists(url, headers):
    #根据歌单API获取歌曲列表
    #将页面信息转换为json格式便于通过字典取值
    r = requests.get(url, headers=headers)
    page_json = r.json()
    tracks = page_json["result"]["tracks"]
    lists = []
    for i in range(len(tracks)):
        listName = tracks[i]["name"]
        lists.append(listName)
    return lists

#生成词云
def create_WordCloud(lists):
    print("根源词频计算词云")

    wc = WordCloud(
        font_path = "C:\Windows\Fonts\simhei.ttf", #设置中文字体
        max_words = 100, #设置最大字数
        width = 2000, #设置画布宽度
        height = 1200, #设置画布高度
        random_state = 100
    )
    wordcloud = wc.generate(lists)
    #写词云图片
    wordcloud.to_file("wordcloud.jpg")
    #显示词云文件
    plt.imshow(wordcloud)
    #不需要显示X/Y轴，用off将坐标轴关闭
    plt.axis("off")
    plt.show()

#去掉停用词以及中英文混合的词
def remove_stop_words(f):
    stop_words = ['(伴奏)', '(Demo版)', '(必胜客新春版)']
    mixed_words = ['Bonus Track：一荤一素', 'Bonus Track：给你给我']
    for stop_word in stop_words:
        f = f.replace(stop_word, '')
    for mixed_word in mixed_words:
        f = f.replace(mixed_word, mixed_word[12:])
    return f

if __name__ == "__main__":

    headers = {
        'Referer': 'http://music.163.com',
        'Host': 'music.163.com',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'User-Agent': 'Chrome/10'
    }
    url = "http://music.163.com/api/playlist/detail?id=753776811"
    gerLists = getSonglists(url, headers)
    #获取歌单是list类型，转换为str，那么就不再需要用jieba分词
    lists_str = " ".join(gerLists)

    lists = remove_stop_words(lists_str)
    create_WordCloud(lists)

展开

作者回复: Good Job



 1
GS

2019-12-15

https://github.com/leledada/jupyter/tree/master/wordcloud

作者回复: 赞认真做作业的GS同学




GS

2019-12-13

if 'lrc' in res.json():
        try:
            lyric = res.json()['lrc']['lyric']
            new_lyric = re.sub(r'[\d:.[\]]','',lyric)
            return new_lyric
        except:
            print('发生了异常-----------------------------------',lyric_url)
            return ''
    else:
        return ''

展开

作者回复: 有异常处理，很好




GS

2019-12-13

解析歌词的时候最好是用try except 包起来，不然遇到异常就不走了

作者回复: 对的很好的建议




#Yema

2019-12-12

lyric_url = 'http://music.163.com/api/song/lyric?os=pc&id=' + str(song_id) + '&lv=-1&kv=-1
老师能解释一下这个url是从哪找到的吗？为什么在浏览器抓包里面没有找到这个api，我找到的api是一个异步需要向api传csrf_token才能拿到歌词

作者回复: 网易云音乐有一些API接口，比如歌词，歌曲信息，关键词搜索




Kyle

2019-09-22

# -*- coding:utf-8 -*-
# 网易云音乐通过歌手ID，生成该歌手的词云
import requests
import os
import re
import sys
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import jieba
from lxml import etree

headers = {
    'Referer': 'http://music.163.com',
    'Host': 'music.163.com',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'User-Agent': 'Chrome/10'
}

# 获取到歌单的信息
def get_song_list(headers,song_list_url):
    res = requests.request("GET",song_list_url,headers=headers)
    if 'result' in res.json():
        all_song_list = ""
        for song_list in res.json()['result']['tracks']:
            all_song_list = all_song_list + song_list['name']
            print(all_song_list)
        return all_song_list
    else:
        return ''

# 创建词云展示
def wordcloud(f):
    print("根据词频结果进行词云展示！")
    cut_text = " ".join(jieba.cut(f,cut_all=False,HMM=True))
    wc = WordCloud(
        font_path="./wc.ttf",
        max_words=100,
        width=2000,
        height=1200,
    )
    wordcloud = wc.generate(cut_text)
    wordcloud.to_file("song_list_wordcloud.jpg")
    # 词云展示
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()

# 获取歌单
song_list_id = '753776811'
song_list_url = 'http://music.163.com/api/playlist/detail?id=' + song_list_id
all_song_list_new = get_song_list(headers,song_list_url)
wordcloud(all_song_list_new)

展开

作者回复: Good Job




挠头侠

2019-05-23

老師這個歌曲頁面不是动态加载的吗，怎么可以直接用requests呀




挠头侠

2019-05-23

老师可以介绍一下jieba中HMM参数的作用吗




滢

2019-04-24

课后作业：语言Python3.6
import requests
import matplotlib.pyplot as plt
from wordcloud import WordCloud

#创建请求头
headers = {
    'Referer':'http://music.163.com',
    'Host':'music.163.com',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/Webp,image/apng,*/*;q=0.8',
    'User-Agent':'Chrome/10'
}

#获取歌单
def get_song_name(req_url):
    res = requests.request('GET',req_url,headers=headers)
    if 'tracks' in res.json()['result']:
        tracks = res.json()['result']['tracks']
        names = []
        for i in range(len(tracks)):
            name = tracks[i]['name']
            names.append(name)
        return names
    else:
        return ''

#过滤停用词
def remove_stop_words(text):
    stop_words = ['(伴奏)','Bonus Track：','(Demo版)']
    for stop_word in stop_words:
        text = text.replace(stop_word,'')
    return text

path = '/Users/apple/Desktop/GitHubProject/Read mark/数据分析/geekTime/data/'
#获得词云
def create_wordcloud(text):
    #移除过滤词
    text = remove_stop_words(text)
    wc = WordCloud(
        font_path = "/Library/Fonts/Arial Unicode.ttf",
        max_words = 100,
        width = 2000,
        height = 1200
    )
    wordcloud = wc.generate(text)
    wordcloud.to_file(path + 'wordcloud_homework.jpg')
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.show()

#处理歌单，形成词云
request_url = 'http://music.163.com/api/playlist/detail?id=753776811'
content_list = get_song_name(request_url)
content = " ".join(content_list)
print('歌单信息---',content)
create_wordcloud(content)

--------------
歌单信息--- 别再闹了在无风时那时的我们从无到有一江水借消愁不染盛夏哎哟无问一荤一素南一道街芬芳一生请记住我项羽虞姬给你给我想你想你意料之中平凡的一天像我这样的人感觉自己是巨星如果有一天我变得很有钱借 (伴奏) 消愁 (伴奏) 盛夏 (伴奏) 哎哟 (伴奏) 想你想你 (伴奏) 南一道街 (伴奏) 给你给我 (伴奏) 芬芳一生 (伴奏) 一荤一素 (伴奏) 平凡的一天 (伴奏) 像我这样的人 (伴奏) 如果有一天我变得很有钱 (伴奏) 如果有一天我变得很有钱 (必胜客新春版) Bonus Track：一荤一素 (Demo版) Bonus Track：给你给我 (Demo版)
---------------
图片无法展示，希望极客实践的PM能提出改进方案，前端和后台实现一下（😂😂），祝专栏越做越好

展开

作者回复: 滢同学不错啊~ 经常做作业，Good Job




上官

2019-03-25

Traceback (most recent call last):
File "/Users/shangguan/PycharmProjects/LicenseRecognition/geci.py", line 85, in <module>
[song_ids, song_names] = get_songs(artist_id)
TypeError: 'NoneType' object is not iterable

为什么报nonetype？

展开




王彬成

2019-03-13

题目理解（1）：将歌单的歌曲名称作词云展示
-------
import requests
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt

headers = {
       'Referer' :'http://music.163.com',
       'Host' :'music.163.com',
       'Accept' :'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
       'User-Agent':'Chrome/10'
    }

#得到指定歌单页面的全部歌曲的歌曲ID，歌曲名
def get_songs(playlist_id):
    page_url='http://music.163.com/api/playlist/detail?id='+playlist_id
    #获取网页HTML
    res=requests.request('GET',page_url,headers=headers)
    # 输出歌单中歌曲数量
    print(len(res.json()['result']['tracks']))

    # 设置热门歌曲的ID，歌曲名称
    song_ids=[]
    song_names=[]

    for i in range(len(res.json()['result']['tracks'])):
        names=res.json()['result']['tracks'][i]['name']
        ids=res.json()['result']['tracks'][i]['id']
        song_names.append(names)
        song_ids.append(ids)
        print(names,' ',ids)
    return song_names,song_ids

#去掉停用词
def remove_stop_words(f):
    stop_words=['Demo','伴奏','版','必胜客','Bonus','Track']
    for stop_word in stop_words:
        f=f.replace(stop_word,'')
    return f

#生成词云
def create_word_cloud(f):
    print('根据词频生成词云')
    f=remove_stop_words(f)
    cut_text=' '.join(jieba.cut(f,cut_all=False,HMM=True))
    wc = WordCloud(
       font_path="./wc.ttf",
       max_words=100,
       width=2000,
       height=1200,
    )
    print(cut_text)
    wordcloud = wc.generate(cut_text)
    # 写词云图片
    wordcloud.to_file("wordcloud.jpg")
    # 显示词云文件
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()


# 设置歌单ID，【毛不易 | 不善言辞的深情】为753776811
playlist_id='753776811'
[song_names,song_ids]=get_songs(playlist_id)

# 将列表song_names生成字符串
song_names_text=" ".join(song_names)

#根据词频，生成词云
create_word_cloud(song_names_text)

展开




志

2019-03-11

思考题关键代码部分：

import time
id_list = []
name_list = []
# 得到某一歌单里的每一首歌ID和歌名
def get_song_list(headers,list_url):
    res = requests.request('GET', list_url, headers=headers)
    if 'result' in res.json():
        for item in res.json()['result']['tracks']: # 读取json数据中的每一个ID和name
            id_list.append(item['id'])
            name_list.append(item['name'])
            print(item['id']," ",item['name'])
            time.sleep(1) # 设置停留时间，防止“bad handshake”
        return id_list,name_list
    else:
        return ''
        print(res.json())

# 设置歌单链接
list_url = 'https://music.163.com/api/playlist/detail?id=753776811'
# 获得歌单每一首歌的ID和name
get_song_list(headers,list_url)

all_word_list = ''
# 获取每首歌歌词
for (song_id, song_name) in zip(id_list, name_list):
    # 歌词 API URL
    lyric_url = 'http://music.163.com/api/song/lyric?os=pc&id=' + str(song_id) + '&lv=-1&kv=-1&tv=-1'
    lyric = get_song_lyric(headers, lyric_url)
    all_word_list = all_word_list + ' ' + lyric
    print(song_name)

# 去掉停用词
remove_stop_words(all_word_list)

# 根据词频生成词云
create_word_cloud(all_word_list)

展开

作者回复: Good Job




上善若水

2019-03-11

Traceback (most recent call last):
  File "C:\Program Files\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-d883ef070907>", line 1, in <module>
运行报错，请问是什么原因？



