我努力寻找解决我问题的办法,尽管分享它会很棒。问题是:
我有一个文本,它可能包括任何类型的标点符号。我想把它分成两部分:
下面是一些例子:
str = "one two, three, quatro 5! : six sept ocho nine 10!"
splitAfterXWords(str, 2)
// ["one two,", "three, quatro 5! : six sept ocho nine 10!"]
splitAfterXWords(str, 5)
// ["one two, three, quatro 5!", " : six sept ocho nine 10!"]
splitAfterXWords(str, 20)
// ["one two, three, quatro 5! : six sept ocho nine 10!", ""]
splitAfterXWords(str, 6)
// ["one two, three, quatro 5! : six", " sept ocho nine 10!"]
下面是一个执行此操作的函数:
function splitAfterXWords(to_split, words){
regex = new RegExp("(([\\s;:!,.?\"'’]*[^\\s]+){" + words + "})(.*)")
result = regex.exec(to_split)
return result ? [result[1], to_split.substr(result[1].length)] : [to_split, '']
}
你可以看到它在这把小提琴上工作。
欢迎改进和评论!
下面是我从一个给定的句子中获取n
单词的尝试:
var regexp = /\s*\S+\/;
function truncateToNWords(s, n) {
var l=0;
if (s == null || n<= 0) return l;
for (var i=0; i<n && (match = regexp.exec(s)) != null; i++) {
s = s.substring(match[0].length);
l += match[0].length;
}
return l;
}
// your sentence
var s = "one two, three, quatro 5!: six sept ocho nine 10!";
l = truncateToNWords(s, 2);
console.log([s.substring(0, l), s.substring(l)]);
l = truncateToNWords(s, 5);
console.log([s.substring(0, l), s.substring(l)]);
l = truncateToNWords(s, 6);
console.log([s.substring(0, l), s.substring(l)]);
l = truncateToNWords(s, 20);
console.log([s.substring(0, l), s.substring(l)]);
["one two,", " three, quatro 5!: six sept ocho nine 10!"]
["one two, three, quatro 5!:", " six sept ocho nine 10!"]
["one two, three, quatro 5!: six", " sept ocho nine 10!"]
["one two, three, quatro 5!: six sept ocho nine 10!", ""]